finbourne-sdk-utils 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. features/__init__.py +0 -0
  2. features/main.py +11 -0
  3. finbourne_sdk_utils/__init__.py +8 -0
  4. finbourne_sdk_utils/cocoon/__init__.py +34 -0
  5. finbourne_sdk_utils/cocoon/async_tools.py +94 -0
  6. finbourne_sdk_utils/cocoon/cocoon.py +1862 -0
  7. finbourne_sdk_utils/cocoon/cocoon_printer.py +455 -0
  8. finbourne_sdk_utils/cocoon/config/domain_settings.json +125 -0
  9. finbourne_sdk_utils/cocoon/config/seed_sample_data.json +36 -0
  10. finbourne_sdk_utils/cocoon/dateorcutlabel.py +198 -0
  11. finbourne_sdk_utils/cocoon/instruments.py +482 -0
  12. finbourne_sdk_utils/cocoon/properties.py +442 -0
  13. finbourne_sdk_utils/cocoon/seed_sample_data.py +137 -0
  14. finbourne_sdk_utils/cocoon/systemConfiguration.py +92 -0
  15. finbourne_sdk_utils/cocoon/transaction_type_upload.py +136 -0
  16. finbourne_sdk_utils/cocoon/utilities.py +1877 -0
  17. finbourne_sdk_utils/cocoon/validator.py +243 -0
  18. finbourne_sdk_utils/extract/__init__.py +1 -0
  19. finbourne_sdk_utils/extract/group_holdings.py +400 -0
  20. finbourne_sdk_utils/iam/__init__.py +1 -0
  21. finbourne_sdk_utils/iam/roles.py +74 -0
  22. finbourne_sdk_utils/jupyter_tools/__init__.py +2 -0
  23. finbourne_sdk_utils/jupyter_tools/hide_code_button.py +23 -0
  24. finbourne_sdk_utils/jupyter_tools/stop_execution.py +14 -0
  25. finbourne_sdk_utils/logger/LusidLogger.py +41 -0
  26. finbourne_sdk_utils/logger/__init__.py +1 -0
  27. finbourne_sdk_utils/lpt/__init__.py +0 -0
  28. finbourne_sdk_utils/lpt/back_compat.py +20 -0
  29. finbourne_sdk_utils/lpt/cash_ladder.py +191 -0
  30. finbourne_sdk_utils/lpt/connect_lusid.py +64 -0
  31. finbourne_sdk_utils/lpt/connect_none.py +5 -0
  32. finbourne_sdk_utils/lpt/connect_token.py +9 -0
  33. finbourne_sdk_utils/lpt/dfq.py +321 -0
  34. finbourne_sdk_utils/lpt/either.py +65 -0
  35. finbourne_sdk_utils/lpt/get_instruments.py +101 -0
  36. finbourne_sdk_utils/lpt/lpt.py +374 -0
  37. finbourne_sdk_utils/lpt/lse.py +188 -0
  38. finbourne_sdk_utils/lpt/map_instruments.py +164 -0
  39. finbourne_sdk_utils/lpt/pager.py +32 -0
  40. finbourne_sdk_utils/lpt/record.py +13 -0
  41. finbourne_sdk_utils/lpt/refreshing_token.py +43 -0
  42. finbourne_sdk_utils/lpt/search_instruments.py +48 -0
  43. finbourne_sdk_utils/lpt/stdargs.py +154 -0
  44. finbourne_sdk_utils/lpt/txn_config.py +128 -0
  45. finbourne_sdk_utils/lpt/txn_config_yaml.py +493 -0
  46. finbourne_sdk_utils/pandas_utils/__init__.py +0 -0
  47. finbourne_sdk_utils/pandas_utils/lusid_pandas.py +128 -0
  48. finbourne_sdk_utils-0.0.24.dist-info/LICENSE +21 -0
  49. finbourne_sdk_utils-0.0.24.dist-info/METADATA +25 -0
  50. finbourne_sdk_utils-0.0.24.dist-info/RECORD +52 -0
  51. finbourne_sdk_utils-0.0.24.dist-info/WHEEL +5 -0
  52. finbourne_sdk_utils-0.0.24.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1877 @@
1
+ import argparse
2
+ import copy
3
+ import csv
4
+ import os
5
+ import uuid
6
+ import re
7
+ import numpy as np
8
+ import lusid
9
+ from collections.abc import Mapping
10
+ import pandas as pd
11
+ from detect_delimiter import detect
12
+ import requests
13
+ import json
14
+ import inspect
15
+ import functools
16
+ from pathlib import Path
17
+
18
+ from finbourne_sdk_utils.cocoon.dateorcutlabel import DateOrCutLabel
19
+ import lusid.models as models
20
+ import logging
21
+ import time as default_time
22
+ from finbourne_sdk_utils.cocoon.validator import Validator
23
+ import types
24
+ import typing
25
+ import pydantic.v1
26
+
27
+
28
+ def checkargs(function: typing.Callable) -> typing.Callable:
29
+ """
30
+ This can be used as a decorator to test the type of arguments are correct. It checks that the provided arguments
31
+ match any type annotations and/or the default value for the parameter.
32
+
33
+ Parameters
34
+ ----------
35
+ function : typing.Callable
36
+ The function to wrap with annotated types, all parameters must be annotated with a type
37
+
38
+ Returns
39
+ -------
40
+ _f : typing.Callable
41
+ The wrapped function
42
+ """
43
+
44
+ @functools.wraps(function)
45
+ def _f(*args, **kwargs):
46
+
47
+ # Get all the function arguments in order
48
+ function_arguments = inspect.signature(function).parameters
49
+
50
+ # Collect each non keyword argument value and key it by the argument name
51
+ keyed_arguments = {
52
+ list(function_arguments.keys())[i]: args[i] for i in range(0, len(args))
53
+ }
54
+
55
+ # Update this with the keyword argument values
56
+ keyed_arguments.update(kwargs)
57
+
58
+ # For each argument raise an error if it is of the incorrect type and if it has an invalid default value
59
+ for argument_name, argument_value in keyed_arguments.items():
60
+
61
+ if argument_name not in list(function_arguments.keys()):
62
+ raise ValueError(
63
+ f"The argument {argument_name} is not a valid keyword argument for this function, valid arguments"
64
+ + f" are {str(list(function_arguments.keys()))}"
65
+ )
66
+
67
+ # Get the arguments details
68
+ argument_details = function_arguments[argument_name]
69
+ # Assume that there is no default value for this parameter
70
+ is_default_value = False
71
+
72
+ # If there is a default value
73
+ if argument_details.default is not argument_details.empty:
74
+ # Check to see if the argument value matches the default
75
+ if argument_details.default is None:
76
+ is_default_value = argument_value is argument_details.default
77
+ else:
78
+ is_default_value = argument_value == argument_details.default
79
+
80
+ # If the argument value is of the wrong type e.g. list instead of dict then throw an error
81
+ if (
82
+ not isinstance(argument_value, argument_details.annotation)
83
+ and argument_details.annotation is not argument_details.empty
84
+ ):
85
+ # Only exception to this is if it matches the default value which may be of a different type e.g. None
86
+ if not is_default_value:
87
+ raise TypeError(
88
+ f"""The value provided for {argument_name} is of type {type(argument_value)} not of
89
+ type {argument_details.annotation}. Please update the provided value to be of type
90
+ {argument_details.annotation}"""
91
+ )
92
+
93
+ return function(*args, **kwargs)
94
+
95
+ return _f
96
+
97
+
98
+ def make_code_lusid_friendly(raw_code) -> str:
99
+ """
100
+ This function takes a column name and converts it to a LUSID friendly code creating LUSID objects. LUSID allows
101
+ for up to 64 characters which can be lowercase and uppercase letters, numbers, a dash ("-") or an underscore ("_").
102
+ The complete restrictions are here: https://support.lusid.com/what-is-a-code
103
+
104
+ Parameters
105
+ ----------
106
+ raw_code : any
107
+ A raw column header which needs special characters stripped out
108
+
109
+ Returns
110
+ -------
111
+ friendly_code : str
112
+ A LUSID friendly code with special characters removed
113
+ """
114
+
115
+ # Convert any type to a string
116
+ try:
117
+ raw_code = str(raw_code)
118
+ except Exception as exception:
119
+ raise ValueError(
120
+ f"Could not convert value of {raw_code} with type {type(raw_code)} to a string. "
121
+ + "Please convert to a format which can be cast to a string and try again"
122
+ ) from exception
123
+
124
+ # Check that it does not exceed the max length
125
+ max_length = 64
126
+
127
+ if len(raw_code) > max_length:
128
+ raise ValueError(
129
+ f"""The name {raw_code} is {len(raw_code)} characters long and exceeds the limit of {max_length}
130
+ for a code. Please shorten it by {len(raw_code) - 64} characters."""
131
+ )
132
+
133
+ # Specifically convert known unfriendly characters with a specific string and remove the rest completely
134
+ friendly_code = re.sub(
135
+ r"[^-\w]",
136
+ "",
137
+ raw_code.replace("%", "Percentage")
138
+ .replace("&", "and")
139
+ .replace(".", "_")
140
+ .strip(),
141
+ )
142
+
143
+ return friendly_code
144
+
145
+
146
+ @checkargs
147
+ def populate_model(
148
+ model_object_name: str,
149
+ required_mapping: dict,
150
+ optional_mapping: dict,
151
+ row: pd.Series,
152
+ properties,
153
+ identifiers: dict = None,
154
+ sub_holding_keys=None,
155
+ ) -> typing.Callable:
156
+ """
157
+ This function populates the provided LUSID model object in lusid.models with values from a Pandas Series
158
+
159
+ Parameters
160
+ ----------
161
+ model_object_name : str
162
+ The name of the model object to populate
163
+ required_mapping : dict
164
+ The required mapping between the row columns and the model attributes
165
+ optional_mapping : dict
166
+ The optional mapping between the row columns and the model attributes
167
+ row : pd.Series
168
+ The row from the provided pd.DataFrame to use to populate the model
169
+ properties
170
+ The properties for this model
171
+ identifiers : dict
172
+ The identifiers for this model
173
+ sub_holding_keys
174
+ The sub holding keys to use
175
+
176
+ Returns
177
+ -------
178
+ set_attributes : typing.Callable
179
+ The function to set the attributes for the model
180
+ """
181
+
182
+ # Check that the provided model name actually exists
183
+ model_object = getattr(lusid.models, model_object_name, None)
184
+
185
+ if model_object is None:
186
+ raise TypeError("The provided model_object is not a lusid.model object")
187
+
188
+ # Expand the mapping out from being a dot separated flat dictionary e.g. transaction_price.price to being nested
189
+ update_dict(required_mapping, optional_mapping)
190
+
191
+ mapping_expanded = expand_dictionary(required_mapping)
192
+
193
+ # Set the attributes on the model
194
+ return set_attributes_recursive(
195
+ model_object=model_object,
196
+ mapping=mapping_expanded,
197
+ row=row,
198
+ properties=properties,
199
+ identifiers=identifiers,
200
+ sub_holding_keys=sub_holding_keys,
201
+ )
202
+
203
+
204
+ @checkargs
205
+ def set_attributes_recursive(
206
+ model_object,
207
+ mapping: dict,
208
+ row: pd.Series,
209
+ properties=None,
210
+ identifiers: dict = None,
211
+ sub_holding_keys=None,
212
+ ):
213
+ """
214
+ This function takes a lusid.model object name and an expanded mapping between its attributes and the provided
215
+ row of data and constructs a populated model
216
+
217
+ Parameters
218
+ ----------
219
+ model_object : lusid.models
220
+ The object from lusid.models to populate
221
+ mapping : dict
222
+ The expanded dictionary mapping the Series columns to the LUSID model attributes
223
+ row : pd.Series
224
+ The current row of the DataFrame being worked on
225
+ properties : any
226
+ The properties to use on this model
227
+ identifiers : any
228
+ The instrument identifiers to use on this model
229
+ sub_holding_keys
230
+ The sub holding keys to use on this model
231
+
232
+ Returns
233
+ -------
234
+ new model_object : lusid.models
235
+ An instance of the model object with populated attributes
236
+ """
237
+
238
+ # Get the object attributes
239
+
240
+ obj_attr = get_attributes_and_types( model_object )
241
+ obj_attr_required_map = get_required_attributes_from_model(model_object)
242
+ obj_init_values = {}
243
+
244
+ # Additional attributes which are used on most models but will be populated outside the provided mapping
245
+ additional_attributes = {
246
+ "instrument_identifiers": identifiers,
247
+ "properties": properties,
248
+ "sub_holding_keys": sub_holding_keys,
249
+ "identifiers": identifiers,
250
+ }
251
+
252
+ # Generate the intersection between the available attributes and the provided attributes
253
+ provided_attributes = set(list(mapping.keys()) + list(additional_attributes.keys()))
254
+ available_attributes = set(list(obj_attr.keys()))
255
+ populate_attributes = provided_attributes.intersection(available_attributes)
256
+
257
+ # Used to check if all attributes are none
258
+ total_count = 0
259
+ none_count = 0
260
+ missing_value = False
261
+
262
+ # For each of the attributes to populate
263
+ for key in list(populate_attributes):
264
+
265
+ # Get the attribute type
266
+ attribute_type = obj_attr[key]
267
+
268
+ # If it is an additional attribute, populate it with the provided values and move to the next attribute
269
+ if key in list(additional_attributes.keys()):
270
+ # Handle identifiers provided within instrument definition (e.g. 'Bond', 'Future', etc.)
271
+ if (key, attribute_type) == ("identifiers", "Mapping[str, dont_match"):
272
+ obj_init_values[key] = {
273
+ str_key: row[str_value]
274
+ for str_key, str_value in mapping[key].items()
275
+ if not pd.isna(row[str_value])
276
+ }
277
+ else:
278
+ obj_init_values[key] = additional_attributes[key]
279
+
280
+ continue
281
+
282
+ # This block keeps track of the number of missing (non-additional) attributes
283
+ else:
284
+ total_count += 1
285
+ if mapping[key] is None:
286
+ none_count += 1
287
+
288
+ # If this is the last object and there is no more nesting set the value from the row
289
+ if not isinstance(mapping[key], dict):
290
+ # If this exists in the mapping with a value and there is a value in the row for it
291
+ if mapping[key] is not None and not pd.isna(row[mapping[key]]):
292
+ # Converts to a date if it is a date field
293
+ if "date" in key or "created" in key or "effective_at" in key:
294
+ obj_init_values[key] = str(DateOrCutLabel(row[mapping[key]]))
295
+ # Converts to a list element if it is a list field
296
+ elif "list" in attribute_type and not isinstance(
297
+ row[mapping[key]], list
298
+ ):
299
+ obj_init_values[key] = [row[mapping[key]]]
300
+ else:
301
+ obj_init_values[key] = row[mapping[key]]
302
+ elif key in obj_attr_required_map:
303
+ missing_value = True
304
+ elif mapping[key]:
305
+ none_count += 1
306
+
307
+ # if there is more nesting call the function recursively
308
+ else:
309
+ # Ensure that that if there is a complex attribute type e.g. dict(str, InstrumentIdValue) it is extracted
310
+ attribute_type, nested_type, optional = extract_lusid_model_from_attribute_type(
311
+ attribute_type
312
+ )
313
+
314
+ # Call the function recursively
315
+ value = set_attributes_recursive(
316
+ model_object=getattr(lusid.models, attribute_type),
317
+ mapping=mapping[key],
318
+ row=row,
319
+ )
320
+
321
+ obj_init_values[key] = [value] if nested_type == "list" else value
322
+
323
+ """
324
+ If all attributes are None propagate None rather than a model filled with Nones. For example if a CorporateActionSourceId
325
+ has no scope or code return build a model with CorporateActionSourceId = None rather than CorporateActionSourceId =
326
+ lusid.models.ResourceId(scope=None, code=None)
327
+
328
+ """
329
+ if total_count == none_count or missing_value:
330
+ return None
331
+
332
+ # Create an instance of and populate the model object
333
+ instance = model_object(**obj_init_values)
334
+
335
+ # Support for polymorphism, we can identify these `abstract` classes by the existence of the below
336
+
337
+ """ if getattr(instance, "discriminator"):
338
+ discriminator = getattr(instance, getattr(instance, "discriminator"))
339
+
340
+ actual_class = model_object.discriminator_value_class_map[discriminator]
341
+
342
+ return set_attributes_recursive(
343
+ model_object=getattr(lusid.models, actual_class), mapping=mapping, row=row,
344
+ )
345
+ """
346
+ return instance
347
+
348
+
349
+ @checkargs
350
+ def update_dict(orig_dict: dict, new_dict) -> None:
351
+ """
352
+ This is used to update a dictionary with another dictionary. Using the default Python update method does not merge
353
+ nested dictionaries. This method allows for this. This modifies the original dictionary in place.
354
+
355
+ Parameters
356
+ ----------
357
+ orig_dict : dict
358
+ The original dictionary to update
359
+ new_dict : dict
360
+ The new dictionary to merge with the original
361
+
362
+ Returns
363
+ -------
364
+ orig_dict : dict
365
+ The updated original dictionary
366
+ """
367
+
368
+ # Iterate over key value pairs in the new dictionary to merge into the original
369
+ for key, val in new_dict.items():
370
+ # If a mapping object (e.g. dictionary) call the function recursively
371
+ if isinstance(val, Mapping):
372
+ tmp = update_dict(orig_dict.get(key, {}), val)
373
+ orig_dict[key] = tmp
374
+ # If a list then merge it into the original dictionary
375
+ elif isinstance(val, list):
376
+ orig_dict[key] = orig_dict.get(key, []) + val
377
+ # Do the same for any other type
378
+ else:
379
+ orig_dict[key] = new_dict[key]
380
+
381
+ return orig_dict
382
+
383
+
384
+ @checkargs
385
+ def expand_dictionary(dictionary: dict, key_separator: str = ".") -> dict:
386
+ """
387
+ Takes a flat dictionary (no nesting) with keys separated by a separator and converts it into a nested
388
+ dictionary
389
+
390
+ Parameters
391
+ ----------
392
+ dictionary : dict
393
+ The input dictionary with separated keys
394
+ key_separator : str
395
+ The seprator to use
396
+
397
+ Returns
398
+ -------
399
+ dict_expanded : dict
400
+ The expanded nested dictionary
401
+ """
402
+
403
+ dict_expanded = {}
404
+
405
+ # Loop over each composite key and final value
406
+ for key, value in dictionary.items():
407
+ # Split the key on the separator
408
+ components = key.split(key_separator)
409
+ # Get the expanded dictionary for this key and update the master dictionary
410
+ update_dict(
411
+ dict_expanded, expand_dictionary_single_recursive(0, components, value)
412
+ )
413
+
414
+ return dict_expanded
415
+
416
+
417
+ @checkargs
418
+ def expand_dictionary_single_recursive(index: int, key_list: list, value) -> dict:
419
+ """
420
+ Takes a list of keys and a value and turns it into a nested dictionary. This is a recursive function.
421
+
422
+ Parameters
423
+ ----------
424
+ index : int
425
+ The current index of the key in the list of keys
426
+ key_list : list[str]
427
+ The list of keys to turn into a nested dictionary
428
+ value : any
429
+ The final value to match against the last (deepest) key
430
+
431
+ Returns
432
+ -------
433
+ dict
434
+ The final value to match against the last (deepest) key
435
+ """
436
+
437
+ # Gets the current key in the list
438
+ key = key_list[index]
439
+
440
+ # If it is the last key in the list return a dictionary with it keyed against the value
441
+ if key == key_list[-1]:
442
+ return {key: value}
443
+
444
+ # Otherwise if it is not the last key, key it against calling this function recursively with the next key
445
+ return {key: expand_dictionary_single_recursive(index + 1, key_list, value)}
446
+
447
+
448
+ @checkargs
449
+ def get_swagger_dict(api_url: str) -> dict:
450
+ """
451
+ Gets the lusid.json swagger file
452
+
453
+ Parameters
454
+ ----------
455
+ api_url : str
456
+ The base api url for the LUSID instance
457
+
458
+ Returns
459
+ -------
460
+ dict
461
+ The swagger file as a dictionary
462
+ """
463
+
464
+ swagger_path = "/swagger/v0/swagger.json"
465
+ swagger_url = api_url + swagger_path
466
+ swagger_file = requests.get(swagger_url)
467
+
468
+ if swagger_file.status_code == 200:
469
+ swagger = json.loads(swagger_file.text)
470
+
471
+ app_name = swagger.get("info", {}).get("title", {})
472
+ if app_name is None or app_name != "LUSID API":
473
+ raise ValueError(f"Invalid LUSID OpenAPI file: {swagger_url}")
474
+
475
+ return swagger
476
+ else:
477
+ raise ValueError(
478
+ f"""Received a {swagger_file.status_code} response from the provided url, please double check
479
+ the base api url and try again"""
480
+ )
481
+
482
+
483
+ def generate_required_attributes_list():
484
+ pass
485
+
486
+
487
+ @checkargs
488
+ def verify_all_required_attributes_mapped(
489
+ mapping: dict,
490
+ model_object_name: str,
491
+ exempt_attributes: list = None,
492
+ key_separator: str = ".",
493
+ ) -> None:
494
+ """
495
+ Verifies that all required attributes are included in the mapping, passes silently if they are and raises an exception
496
+ otherwise
497
+
498
+ Parameters
499
+ ----------
500
+ mapping : dict
501
+ The required mapping
502
+ model_object_name : str
503
+ The name of the lusid.models object that the mapping is for
504
+ exempt_attributes : list[str]
505
+ The attributes that are exempt from needing to be in the required mapping
506
+ key_separator : str
507
+ The separator to use to join the required attributes together
508
+
509
+ Returns
510
+ -------
511
+ key_separator : str
512
+ The separator to use to join the required attributes together
513
+ """
514
+
515
+ # Check that the provided model name actually exists
516
+ model_object = getattr(lusid.models, model_object_name, None)
517
+
518
+ if model_object is None:
519
+ raise TypeError("The provided model_object is not a lusid.model object")
520
+
521
+ # Convert a None to an empty list
522
+ exempt_attributes = (
523
+ Validator(exempt_attributes, "exempt_attributes")
524
+ .set_default_value_if_none([])
525
+ .value
526
+ )
527
+
528
+ # Gets the required attributes for this model
529
+ required_attributes = get_required_attributes_model_recursive(
530
+ model_object=model_object, key_separator=key_separator
531
+ )
532
+
533
+ # Removes the exempt attributes
534
+ for attribute in required_attributes:
535
+ # Removes all nested attributes for example if "identifiers" is exempt "identifiers.value" will be removed
536
+ if attribute.split(key_separator)[0] in exempt_attributes:
537
+ required_attributes.remove(attribute)
538
+
539
+ missing_attributes = set(required_attributes) - set(list(mapping.keys()))
540
+
541
+ if len(missing_attributes) > 0:
542
+ raise ValueError(
543
+ f"""The required attributes {str(missing_attributes)} are missing from the mapping. Please
544
+ add them."""
545
+ )
546
+
547
+ def get_attributes_and_types(model_object):
548
+
549
+
550
+ attributes = {}
551
+
552
+ # __fields__ is a pydantic.v1 property
553
+ for index, (key, value) in enumerate(model_object.__fields__.items()):
554
+
555
+ nested_type = None
556
+
557
+ attribute_type = str(value)
558
+
559
+ match = re.search(r"type=([A-Za-z\[\], ]+)\s", attribute_type)
560
+
561
+ if match:
562
+ attribute_type = match.group(1)
563
+
564
+ optional = False
565
+ optionalStr = "Optional["
566
+
567
+ if attribute_type.startswith(optionalStr):
568
+ attribute_type = attribute_type.split(optionalStr)[1]
569
+ attribute_type = attribute_type[0:len(attribute_type)-1]
570
+ optional = True
571
+
572
+ """ # If the attribute type is a mapping e.g. Mapping[str, InstrumentIdValue], extract the type
573
+ if "Mapping" in attribute_type:
574
+ attribute_type = attribute_type.split(", ")[1].rstrip("]")
575
+ nested_type = "Mapping"
576
+
577
+ # If the attribute type is a dictionary e.g. dict(str, InstrumentIdValue), extract the type
578
+
579
+ if "dict" in attribute_type:
580
+ attribute_type = attribute_type.split(", ")[1].rstrip(")")
581
+ nested_type = "dict"
582
+ # If it is a list e.g. list[ModelProperty] extract the type
583
+ if "list" in attribute_type:
584
+ attribute_type = attribute_type.split("list[")[1].rstrip("]")
585
+ nested_type = "list"
586
+
587
+ if "List" in attribute_type:
588
+ attribute_type = attribute_type.split("List[")[1].rstrip("]")
589
+ nested_type = "list"
590
+ """
591
+
592
+
593
+
594
+ attributes[key] = attribute_type
595
+
596
+
597
+ """ # Get the members of the object
598
+ for attr_name, attr_value in inspect.getmembers(model_object):
599
+ # Check if it's a data attribute (not a method or function)
600
+ if not callable(attr_value) and not attr_name.startswith('__'):
601
+ # Get the type of the attribute
602
+ attr_type = type(attr_value).__name__
603
+ attributes[attr_name] = attr_type """
604
+ return attributes
605
+
606
+ @checkargs
607
+ def get_required_attributes_model_recursive(model_object, key_separator: str = "."):
608
+ """
609
+ This is a recursive function which gets all of the required attributes on a LUSID model. If the model is nested
610
+ then it separates the attributes by a '.' until the bottom level where no more models are required and a primitive
611
+ type is supplied e.g. string, int etc.
612
+
613
+ Parameters
614
+ ----------
615
+ model_object : lusid.model
616
+ The model to get required attributes for
617
+ key_separator : str
618
+ The separator to use to join the required attributes together
619
+
620
+ Returns
621
+ -------
622
+ list[str]
623
+ The required attributes of the model
624
+ """
625
+
626
+ attributes = []
627
+
628
+ # Get the required attributes for the current model
629
+ required_attributes = get_required_attributes_from_model(model_object)
630
+
631
+ # Get the types of the attributes for the current model
632
+ open_api_types =get_attributes_and_types(model_object)
633
+
634
+ for required_attribute in required_attributes:
635
+
636
+ required_attribute_type = open_api_types[required_attribute]
637
+
638
+ # Check to see if there is a LUSID model for this required attribute, if no further nesting then add this attribute
639
+ if not check_nested_model(str(required_attribute_type)):
640
+ attributes.append(camel_case_to_pep_8(required_attribute))
641
+
642
+ # Otherwise call the function recursively
643
+ else:
644
+ # Ensure that that if there is a complex attribute type e.g. dict(str, InstrumentIdValue) it is extracted
645
+ (
646
+ required_attribute_type,
647
+ nested_type,
648
+ optional
649
+ ) = extract_lusid_model_from_attribute_type(str(required_attribute_type))
650
+
651
+ nested_required_attributes = get_required_attributes_model_recursive(
652
+ model_object=getattr(lusid.models, required_attribute_type),
653
+ )
654
+
655
+ for nested_required_attribute in nested_required_attributes:
656
+ attributes.append(
657
+ key_separator.join(
658
+ [
659
+ camel_case_to_pep_8(required_attribute),
660
+ nested_required_attribute,
661
+ ]
662
+ )
663
+ )
664
+
665
+ return attributes
666
+
667
+
668
+ def get_required_attributes_from_model(model_object) -> list:
669
+ """
670
+ Gets the required attributes for a LUSID model using reflection
671
+
672
+ Parameters
673
+ ----------
674
+ model_object : lusid.models
675
+ A LUSID model object
676
+
677
+ Returns
678
+ -------
679
+ list[str]
680
+ The required attributes
681
+ """
682
+
683
+ # Get the source code for the model
684
+ model_details = inspect.getsource(model_object)
685
+
686
+ # bit of cleansing to aid the regex
687
+ model_details = model_details.replace('"""','')
688
+ model_details = model_details.replace( r"\n","\n")
689
+
690
+
691
+ required_attributes = re.findall(r'(\w+):.*?= Field\(\.\.\.,', model_details)
692
+ all_attributes = re.findall(r'^\s*(\w+):', model_details, re.MULTILINE)
693
+
694
+
695
+ # Set the status (required or optional) for each attribute based on whether "is None:" exists in the setter function
696
+ '''
697
+ Here are two examples
698
+
699
+ A) A None value is not allowed and hence this is required. Notice the "if identifiers is None:" condition.
700
+
701
+ @identifiers.setter
702
+ def identifiers(self, identifiers):
703
+ """Sets the identifiers of this InstrumentDefinition.
704
+ A set of identifiers that can be used to identify the instrument. At least one of these must be configured to be a unique identifier. # noqa: E501
705
+ :param identifiers: The identifiers of this InstrumentDefinition. # noqa: E501
706
+ :type: dict(str, InstrumentIdValue)
707
+ """
708
+ if identifiers is None:
709
+ raise ValueError("Invalid value for `identifiers`, must not be `None`") # noqa: E501
710
+
711
+ self._identifiers = identifiers
712
+
713
+ B) A None value is allowed and hence this is optional
714
+
715
+ @look_through_portfolio_id.setter
716
+ def look_through_portfolio_id(self, look_through_portfolio_id):
717
+ """Sets the look_through_portfolio_id of this InstrumentDefinition.
718
+ :param look_through_portfolio_id: The look_through_portfolio_id of this InstrumentDefinition. # noqa: E501
719
+ :type: ResourceId
720
+ """
721
+
722
+ self._look_through_portfolio_id = look_through_portfolio_id
723
+
724
+ '''
725
+ return required_attributes
726
+
727
+
728
+
729
+ def extract_lusid_model_from_attribute_type(attribute_type: str):
730
+ """
731
+ Extracts a LUSID model from a complex attribute type e.g. dict(str, InstrumentIdValue) if it exists. If there
732
+ is no LUSID model the attribute type is still returned
733
+
734
+ Parameters
735
+ ----------
736
+ attribute_type : str
737
+ The attribute type to extract the model from
738
+
739
+ Returns
740
+ -------
741
+ attribute_type : str
742
+ The returned attribute type with the LUSID model extracted if possible
743
+ nested_type : str
744
+ The type of nesting used e.g. List or Dict
745
+ """
746
+
747
+ nested_type = None
748
+ # for "name='identifiers' type=Mapping[str, InstrumentIdValue] required=True"
749
+ # will give Mapping[str, InstrumentIdValue]
750
+
751
+ match = re.search(r"type=([A-Za-z\[\], ]+)\s", attribute_type)
752
+
753
+ if match:
754
+ attribute_type = match.group(1)
755
+
756
+ # If the attribute type is a mapping e.g. Mapping[str, InstrumentIdValue], extract the type
757
+ if "Mapping" in attribute_type:
758
+ attribute_type = attribute_type.split(", ")[1].rstrip("]")
759
+ nested_type = "Mapping"
760
+
761
+ # If the attribute type is a dictionary e.g. dict(str, InstrumentIdValue), extract the type
762
+
763
+ if "dict" in attribute_type:
764
+ attribute_type = attribute_type.split(", ")[1].rstrip(")")
765
+ nested_type = "dict"
766
+ # If it is a list e.g. list[ModelProperty] extract the type
767
+ if "list" in attribute_type:
768
+ attribute_type = attribute_type.split("list[")[1].rstrip("]")
769
+ nested_type = "list"
770
+
771
+ if "List" in attribute_type:
772
+ attribute_type = attribute_type.split("List[")[1].rstrip("]")
773
+ nested_type = "list"
774
+
775
+ optional = False
776
+ optionalStr = "Optional["
777
+
778
+ if attribute_type.startswith(optionalStr):
779
+ attribute_type = attribute_type.split(optionalStr)[1].rstrip("]")
780
+ optional = True
781
+
782
+ return attribute_type, nested_type, optional
783
+
784
+
785
+ @checkargs
786
+ def check_nested_model(required_attribute_type: str) -> bool:
787
+
788
+ """
789
+ Takes the properties of a required attribute on a model and searches as to whether or not this attribute
790
+ requires a model of its own
791
+
792
+ Parameters
793
+ ----------
794
+ required_attribute_type : str
795
+ The type of the required attribute
796
+
797
+ Returns
798
+ -------
799
+ str
800
+ The name of the LUSID model
801
+ """
802
+
803
+ required_attribute_type, nested_type,optional = extract_lusid_model_from_attribute_type(
804
+ required_attribute_type
805
+ )
806
+
807
+ top_level_model = getattr(lusid.models, required_attribute_type, None)
808
+
809
+ if top_level_model is None:
810
+ return False
811
+
812
+ return True
813
+
814
+
815
+ @checkargs
816
+ def gen_dict_extract(key, var: dict):
817
+ """
818
+ Searches a nested dictionary for a key, yielding any values it finds against that key
819
+
820
+ Parameters
821
+ ----------
822
+ key : str
823
+ The key to search for
824
+ var : dict
825
+ The dictionary to search
826
+
827
+ Returns
828
+ -------
829
+ generator(result)
830
+ A generator with the results
831
+ """
832
+
833
+ if hasattr(var, "items"):
834
+ for k, v in var.items():
835
+ if k == key:
836
+ yield v
837
+ if isinstance(v, dict):
838
+ for result in gen_dict_extract(key, v):
839
+ yield result
840
+ elif isinstance(v, list):
841
+ for d in v:
842
+ for result in gen_dict_extract(key, d):
843
+ yield result
844
+
845
+
846
+ @checkargs
847
+ def camel_case_to_pep_8(attribute_name: str) -> str:
848
+ """
849
+ Converts a camel case name to PEP 8 standard
850
+
851
+ Parameters
852
+ ----------
853
+ attribute_name : str
854
+ The camel case attribute name
855
+
856
+ Returns
857
+ -------
858
+ str
859
+ The PEP 8 formatted attribute name
860
+ """
861
+
862
+ matches = re.finditer(
863
+ ".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", attribute_name
864
+ )
865
+ return "_".join([m.group(0)[0].lower() + m.group(0)[1:] for m in matches])
866
+
867
+
868
+ def convert_cell_value_to_string(data):
869
+ """
870
+ Converts the value of a call to a string if it is a list or a dictionary
871
+
872
+ Parameters
873
+ ----------
874
+ data
875
+ The value of the cell in the dataframe
876
+
877
+ Returns
878
+ -------
879
+ str
880
+ The original data if it is not a list or a dictionary, otherwise the string representation of these
881
+
882
+ """
883
+
884
+ if isinstance(data, list):
885
+ return ", ".join(data)
886
+
887
+ elif isinstance(data, dict):
888
+ return str(data)
889
+
890
+ else:
891
+ return data
892
+
893
+
894
+ def handle_nested_default_and_column_mapping(
895
+ data_frame: pd.DataFrame, mapping: dict, constant_prefix: str = "$"
896
+ ):
897
+ """
898
+ This function handles when a mapping is provided which contains as a value a dictionary with a column and/or default
899
+ key rather than just a string with the column name. It populates the DataFrame with the default value as appropriate
900
+ and removes the nesting so that the model can be populated later.
901
+ Parameters
902
+ ----------
903
+ data_frame : pd.DataFrame
904
+ The updated dataframe
905
+ mapping : dict
906
+ The original mapping (can be required or optional)
907
+ constant_prefix : str
908
+ The prefix that can be used to specify a constant
909
+ Returns
910
+ -------
911
+ dataframe : pd.DataFrame
912
+ The updated DataFrame
913
+ mapping_updated : dict
914
+ The updated mapping
915
+ """
916
+
917
+ # Copy the data frame to ensure that it is a copy and not a view (which could make changes to the original
918
+ # dataframe). This also fixes the SettingWithCopyWarning that pandas will throw due to the difference between copy
919
+ # and view.
920
+ data_frame = data_frame.copy()
921
+
922
+ mapping_updated = {}
923
+
924
+ for key, value in mapping.items():
925
+
926
+ # If the value of the mapping is a dictionary
927
+ if isinstance(value, dict):
928
+
929
+ # If the dictionary contains a column and a default, fill nulls with the default in that column
930
+ if ("column" in list(value.keys())) and ("default" in list(value.keys())):
931
+ mapping_updated[key] = value["column"]
932
+ data_frame[mapping_updated[key]] = data_frame[
933
+ mapping_updated[key]
934
+ ].fillna(value["default"])
935
+
936
+ # If there is only a default specified, create a new column filled with the default
937
+ elif not ("column" in list(value.keys())) and (
938
+ "default" in list(value.keys())
939
+ ):
940
+ mapping_updated[key] = f"LUSID.{key}"
941
+ data_frame[mapping_updated[key]] = value["default"]
942
+
943
+ # If there is only a column specified unnest it
944
+ elif ("column" in list(value.keys())) and not (
945
+ "default" in list(value.keys())
946
+ ):
947
+ mapping_updated[key] = value["column"]
948
+
949
+ else:
950
+ raise KeyError(
951
+ f"""You have passed in a dictionary as the value for the mapping for {key}, however
952
+ it does not contain a key for "column" or "default". Please provide a key, value
953
+ pair for one or both of these keys with the column being the column name and default
954
+ value being the default value to use. Alternatively just provide a string which
955
+ is the column name to use."""
956
+ )
957
+
958
+ elif isinstance(value, str):
959
+
960
+ if len(value) == 0:
961
+ raise IndexError(
962
+ f"Unspecified mapping field: {key}. Please assign a value or remove this from the "
963
+ f"mapping"
964
+ )
965
+
966
+ if value[0] != constant_prefix:
967
+ mapping_updated[key] = value
968
+ else:
969
+ mapping_updated[key] = f"LUSID.{key}"
970
+ data_frame[mapping_updated[key]] = value[1:]
971
+
972
+ elif isinstance(value, int):
973
+ mapping_updated[key] = f"LUSID.{key}"
974
+ data_frame[mapping_updated[key]] = value
975
+
976
+ else:
977
+ raise ValueError(
978
+ f"""You have passed in a value with type {type(value)} for the mapping for {key}, this is
979
+ not a supported type. Please provide a string with the column name to use, a constant
980
+ value prefixed by {constant_prefix},an integer value or a dictionary
981
+ with the keys "column" and "default" where column is the column name and default
982
+ being the default value to use."""
983
+ )
984
+
985
+ return data_frame, mapping_updated
986
+
987
+
988
+ def load_json_file(file_path: str) -> dict:
989
+ """
990
+
991
+ Parameters
992
+ ----------
993
+ file_path : str
994
+ relative_file_path
995
+
996
+ Returns
997
+ -------
998
+ data : dict
999
+ parsed data from json file
1000
+ """
1001
+
1002
+ if not os.path.isabs(file_path):
1003
+ file_path = Path(__file__).parent.joinpath(file_path)
1004
+ if not os.path.exists(file_path):
1005
+ raise OSError(f"Json file not found at {file_path}")
1006
+ with open(file_path) as json_file:
1007
+ data = json.load(json_file)
1008
+ return data
1009
+
1010
+
1011
+ @checkargs
1012
+ def load_data_to_df_and_detect_delimiter(args: dict) -> pd.DataFrame:
1013
+ """
1014
+ This function loads data from given file path and converts it into a pandas DataFrame
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ args : dict
1019
+ Arguments parsed in from command line, containing args["file_path"]
1020
+
1021
+ Returns
1022
+ -------
1023
+ pd.DataFrame : pd.dataframe
1024
+ DataFrame containing data
1025
+ """
1026
+ if not os.path.exists(args["file_path"]):
1027
+ raise OSError(f"file path {args['file_path']} does not exist")
1028
+
1029
+ with open(args["file_path"], "r") as read_file:
1030
+ logging.info(f"loading data from {args['file_path']}")
1031
+ data = csv.reader(read_file, lineterminator=args["line_terminator"])
1032
+
1033
+ # iterate over data in unrelated lines to get to the first line of data that we are interested in
1034
+ for pre_amble in range(args["num_header"]):
1035
+ read_file.readline()
1036
+
1037
+ # now that we are at the first line of data, get the header row, that will contain the formatting we are
1038
+ # interested in
1039
+ header_line = read_file.readline()
1040
+
1041
+ if not args["delimiter"]:
1042
+ args["delimiter"] = get_delimiter(header_line)
1043
+
1044
+ if args["delimiter"] == header_line:
1045
+ err = (
1046
+ f"Unable to detect delimiter from first line of data at line at line number: {args['num_header']}: "
1047
+ f"\n\t>> "
1048
+ f"{header_line}"
1049
+ )
1050
+ raise ValueError(err)
1051
+
1052
+ with open(args["file_path"], "r") as read_file:
1053
+ # read data from lines specified at command line by num_header and num_footer
1054
+ return pd.read_csv(
1055
+ args["file_path"],
1056
+ delimiter=args["delimiter"],
1057
+ header=args["num_header"],
1058
+ skipfooter=args["num_footer"],
1059
+ engine="python",
1060
+ )
1061
+
1062
+
1063
+ def get_delimiter(sample_string: str):
1064
+ return detect(sample_string).replace("\\", "\\\\")
1065
+
1066
+
1067
+ def check_mapping_fields_exist(
1068
+ required_list: list, search_list: list, file_type: str
1069
+ ) -> list:
1070
+ """
1071
+ This function checks that items in one list exist in another list
1072
+
1073
+ Parameters
1074
+ ----------
1075
+ required_list : list[str]
1076
+ list of items to search for
1077
+ search_list : list[str]
1078
+ list to search in
1079
+ file_type : list[str]
1080
+ the file type of the data eg.instruments, holdings, transactions
1081
+
1082
+ Returns
1083
+ -------
1084
+ missing_fields : list[str]
1085
+ list of items in required_list missing from search_list
1086
+ """
1087
+
1088
+ missing_fields = [
1089
+ item
1090
+ for item in required_list
1091
+ if item not in search_list and (len(item) > 0 and item[0] != "$")
1092
+ ]
1093
+ if missing_fields:
1094
+ raise ValueError(
1095
+ f"{file_type} fields not found in data columns: {missing_fields}"
1096
+ )
1097
+ return missing_fields
1098
+
1099
+
1100
+ def parse_args(args: dict):
1101
+ """
1102
+ Argument parser for command line apps
1103
+
1104
+ Parameters
1105
+ ----------
1106
+ args : dict
1107
+
1108
+ Returns
1109
+ -------
1110
+ vars(ap.parse_args(args=args))
1111
+ parsed arguments
1112
+ """
1113
+ ap = argparse.ArgumentParser()
1114
+ ap.add_argument(
1115
+ "-f",
1116
+ "--file_path",
1117
+ required=True,
1118
+ help=r"full path for data (eg. c:\Users\Joe\data\instruments1.csv)",
1119
+ )
1120
+ ap.add_argument(
1121
+ "-c",
1122
+ "--secrets_file",
1123
+ help=r"full path for credential secrets (eg. c:\Users\Joe\secrets.json). Not required if set as "
1124
+ r"environment variables",
1125
+ )
1126
+ ap.add_argument(
1127
+ "-m",
1128
+ "--mapping",
1129
+ required=True,
1130
+ help=r"full path to mappings.json (see mappings_template.json)",
1131
+ )
1132
+ ap.add_argument("-s", "--scope", help=r"LUSID scope to act in")
1133
+ ap.add_argument(
1134
+ "-ps", "--property_scope", help=r"LUSID scope to load properties into"
1135
+ )
1136
+ ap.add_argument(
1137
+ "-dl",
1138
+ "--delimiter",
1139
+ help=r"explicitly specify delimiter for data file and disable automatic delimiter detection",
1140
+ )
1141
+ ap.add_argument(
1142
+ "-nh",
1143
+ "--num_header",
1144
+ type=int,
1145
+ default=0,
1146
+ help="number of header lines before column titles",
1147
+ )
1148
+ ap.add_argument(
1149
+ "-nf",
1150
+ "--num_footer",
1151
+ type=int,
1152
+ default=0,
1153
+ help="number of footer lines after end of data",
1154
+ )
1155
+ ap.add_argument(
1156
+ "-lt",
1157
+ "--line_terminator",
1158
+ default=r"\n",
1159
+ help="character that specifies the end of a line, default value is {}".format(
1160
+ r"\n"
1161
+ ),
1162
+ )
1163
+ ap.add_argument(
1164
+ "-b",
1165
+ "--batch_size",
1166
+ default=2000,
1167
+ type=int,
1168
+ help="specifies the batch size for async requests",
1169
+ )
1170
+ ap.add_argument(
1171
+ "-disp",
1172
+ "--display_response_head",
1173
+ help="Displays the first 40 successful and unsuccessful items",
1174
+ action="store_true",
1175
+ )
1176
+ ap.add_argument(
1177
+ "-dr",
1178
+ "--dryrun",
1179
+ help="runs the app without calling LUSID",
1180
+ action="store_true",
1181
+ )
1182
+ ap.add_argument(
1183
+ "-d", "--debug", help=r"print debug messages, expected input: 'debug'"
1184
+ )
1185
+
1186
+ ap.add_argument(
1187
+ "-l",
1188
+ "--logging_file",
1189
+ required=False,
1190
+ help=r"full path for logging file (eg. c:\Users\Joe\data\debug.log )",
1191
+ )
1192
+
1193
+ return vars(ap.parse_args(args=args)), ap
1194
+
1195
+
1196
+ def scale_quote_of_type(
1197
+ df: pd.DataFrame, mapping: dict, file_type: str = "quotes"
1198
+ ) -> tuple[pd.DataFrame, dict]:
1199
+ """
1200
+ Scales quote values of quotes of a specified type
1201
+
1202
+ This function appends an extra row (__adjusted_quote) to a dataframe that contains quotes that have been scaled by
1203
+ a scale factor specified in the mapping, if they can be identified using another field. An example usage of this
1204
+ is processing of a quotes file containing a mixture of equities prices as GBP and GBp.
1205
+
1206
+ Parameters
1207
+ ----------
1208
+ df : pd.DataFrame
1209
+ DataFrame containing quotes,
1210
+ mapping : dict
1211
+ mapping containing containing mapping[file_type]["quote_scalar"]
1212
+ file_type : str
1213
+ File type of data default = "quotes"
1214
+
1215
+ Returns
1216
+ -------
1217
+ df : pd.DataFrame
1218
+ dataframe containing "__adjusted_quotes" column
1219
+ mapping : dict
1220
+ mapping updated with "metric_value.value" updated to be "__adjusted_quotes"
1221
+ """
1222
+
1223
+ price_col = mapping[file_type]["quote_scalar"]["price"]
1224
+ type_col = mapping[file_type]["quote_scalar"]["type"]
1225
+ type_code = mapping[file_type]["quote_scalar"]["type_code"]
1226
+ scale_factor = mapping[file_type]["quote_scalar"]["scale_factor"]
1227
+
1228
+ for col in [price_col, type_col]:
1229
+ if col not in df.columns:
1230
+ logging.error(f"column {col} does not exist in quotes DataFrame.")
1231
+ raise KeyError(f"column {col} does not exist in quotes DataFrame.")
1232
+
1233
+ df["__adjusted_quote"] = None
1234
+
1235
+ for index, row in df.iterrows():
1236
+ if np.isnan(row[price_col]) and row[type_col] == type_code:
1237
+ logging.warning(
1238
+ f"Could not adjust price at row {index} because it contains no price value"
1239
+ )
1240
+ continue
1241
+ elif np.isnan(row[price_col]):
1242
+ continue
1243
+
1244
+ __adjusted_quote = (
1245
+ row[price_col] * scale_factor
1246
+ if row[type_col] == type_code
1247
+ else row[price_col]
1248
+ )
1249
+
1250
+ df.at[index, "__adjusted_quote"] = __adjusted_quote
1251
+ mapping[file_type]["required"]["metric_value.value"] = "__adjusted_quote"
1252
+ return df, mapping
1253
+
1254
+
1255
+ def identify_cash_items(
1256
+ dataframe, mappings, file_type: str, remove_cash_items: bool = False
1257
+ ) -> tuple[pd.DataFrame, dict]:
1258
+ """
1259
+ This function identifies cash items in a dataframe and either creates a currency_identifier in a new
1260
+ currency_identifier_for_LUSID column and amends the mapping dictionary accordingly or deletes cash items from the
1261
+ dataframe.
1262
+
1263
+ Parameters
1264
+ ----------
1265
+ dataframe : pd.DataFrame
1266
+ The dataframe to look for cash items in
1267
+ mappings : dict
1268
+ Full mapping structure
1269
+ file_type : str
1270
+ type of data in dataframe eg. "instruments", "quotes", "transactions", "portfolios"
1271
+ remove_cash_items: bool
1272
+ indication to remove cash items from dataframe
1273
+
1274
+ Returns
1275
+ -------
1276
+ dataframe : pd.DataFrame
1277
+ dataframe containing scaled quotes
1278
+ mappings : dict
1279
+ mapping with currency identifier mapping included
1280
+ """
1281
+
1282
+ cash_flag_specification = mappings["cash_flag"]
1283
+ if not remove_cash_items:
1284
+ dataframe["__currency_identifier_for_LUSID"] = None
1285
+ mappings[file_type]["identifier_mapping"][
1286
+ "Currency"
1287
+ ] = "__currency_identifier_for_LUSID"
1288
+
1289
+ rm_index = []
1290
+ for index, row in dataframe.iterrows():
1291
+ for column in cash_flag_specification["cash_identifiers"].keys():
1292
+ if row[column] in cash_flag_specification["cash_identifiers"][column]:
1293
+ if remove_cash_items:
1294
+ rm_index.append(index)
1295
+ else:
1296
+ dataframe.at[
1297
+ index, "__currency_identifier_for_LUSID"
1298
+ ] = populate_currency_identifier_for_LUSID(
1299
+ row, column, cash_flag_specification
1300
+ )
1301
+ break
1302
+ if remove_cash_items:
1303
+ dataframe = dataframe.drop(rm_index)
1304
+
1305
+ return dataframe, mappings
1306
+
1307
+
1308
+ def populate_currency_identifier_for_LUSID(
1309
+ row: dict, column, cash_flag_specification: dict
1310
+ ) -> str:
1311
+ """
1312
+ This function takes a cash transaction or holding in the form of a row from a dataframe and returns it's currency
1313
+ code, given the data's column containing a cash identifier and a dictionary that specifies how to set the currency
1314
+ code.
1315
+
1316
+ Parameters
1317
+ ----------
1318
+ row : dict
1319
+ current data row
1320
+ column : str
1321
+ current dataframe column that contains values that can be used to identify a cash transaction or
1322
+ holding
1323
+ cash_flag_specification : dict
1324
+ dictionary containing cash identifier columns and values with either explicit currancy codes or the column from
1325
+ which the currency code can be infered
1326
+
1327
+ Returns
1328
+ -------
1329
+ currency_code : str
1330
+ The currency code for the current transaction or holding
1331
+ """
1332
+
1333
+ if isinstance(cash_flag_specification["cash_identifiers"][column], dict):
1334
+ if row[column] in cash_flag_specification["cash_identifiers"][column]:
1335
+ logging.debug("Getting currency code from explicit definition in mapping")
1336
+ currency_code = cash_flag_specification["cash_identifiers"][column][
1337
+ row[column]
1338
+ ]
1339
+ if not currency_code and "implicit" in cash_flag_specification.keys():
1340
+ logging.debug("couldn't find currency code in explicit definition. ")
1341
+ currency_code = row[cash_flag_specification["implicit"]]
1342
+ else:
1343
+ ex = (
1344
+ f"failed to find currency code either explicitly in cash_flag or implicitly from currency column "
1345
+ f"specified in cash_flag for {row}"
1346
+ )
1347
+ logging.error(ex)
1348
+ raise ValueError(ex)
1349
+
1350
+ elif isinstance(cash_flag_specification["cash_identifiers"][column], list):
1351
+ if "implicit" in cash_flag_specification.keys():
1352
+ logging.info(
1353
+ "No currency codes explicitly specified, attempting to get implicitly from currency code "
1354
+ "column"
1355
+ )
1356
+ currency_code = row[cash_flag_specification["implicit"]]
1357
+ else:
1358
+ err = (
1359
+ f"No cash identifiers were specified as a list, without any explicit currency codes and no 'implicit'"
1360
+ f" field containing the name of a column containing currency codes exists. Please reformat cash_flag "
1361
+ f"inside mapping file correctly"
1362
+ )
1363
+ raise ValueError(err)
1364
+ else:
1365
+ logging.error(
1366
+ f"cash_flag not configured correctly. 'cash_identifiers' must be dictionary (explicit) or list "
1367
+ f"(for implicit), but got {type(cash_flag_specification['cash_identifiers'])}"
1368
+ )
1369
+ raise ValueError(
1370
+ f"cash_flag not configured correctly. 'cash_identifiers' must be dictionary (explicit) or "
1371
+ f"list (for implicit), but got {type(cash_flag_specification['cash_identifiers'])}"
1372
+ )
1373
+ return currency_code
1374
+
1375
+
1376
+ def validate_mapping_file_structure(mapping: dict, columns: list, file_type: str):
1377
+ """
1378
+ This function takes a mapping structure and checks that each of the required fields is present
1379
+
1380
+ Parameters
1381
+ ----------
1382
+ mapping : dict
1383
+ mapping containing full mapping structure
1384
+ columns : list
1385
+ columns from source data to search in
1386
+ file_type : str
1387
+ type of file being upserted eg. "instruments", "holdings", etc.
1388
+
1389
+ Returns
1390
+ -------
1391
+ None
1392
+
1393
+ """
1394
+
1395
+ # file_type
1396
+ domain_lookup = load_json_file("config/domain_settings.json")
1397
+ file_type_check = (
1398
+ Validator(file_type, "file_type")
1399
+ .make_singular()
1400
+ .make_lower()
1401
+ .check_allowed_value(list(domain_lookup.keys()))
1402
+ .value
1403
+ )
1404
+
1405
+ # required
1406
+ if "required" in mapping[file_type].keys():
1407
+ for field in mapping[file_type]["required"]:
1408
+ if isinstance(mapping[file_type]["required"][field], dict):
1409
+ check_mapping_fields_exist(
1410
+ mapping[file_type]["required"][field]["column"].values(),
1411
+ columns,
1412
+ "required",
1413
+ )
1414
+ else:
1415
+ check_mapping_fields_exist(
1416
+ mapping[file_type]["required"].values(), columns, "required"
1417
+ )
1418
+ else:
1419
+ raise ValueError(f"'required' mapping field not provided in mapping")
1420
+
1421
+ # optional
1422
+ if "optional" in mapping.keys():
1423
+ check_mapping_fields_exist(
1424
+ mapping[file_type]["optional"].values(), columns, "optional"
1425
+ )
1426
+
1427
+ # identifier_mapping
1428
+ if "identifier_mapping" in mapping[file_type].keys():
1429
+ check_mapping_fields_exist(
1430
+ mapping[file_type]["identifier_mapping"].values(),
1431
+ columns,
1432
+ "identifier_mapping",
1433
+ )
1434
+
1435
+
1436
+ def strip_whitespace(df: pd.DataFrame, columns: list) -> pd.DataFrame:
1437
+ """
1438
+ This function removes prefixed or postfixed white space from string values in a Pandas DataFrame
1439
+
1440
+ Parameters
1441
+ ----------
1442
+ df : pd.DataFrame
1443
+ Dataframe containing data to remove whitespace from
1444
+ columns : list[dict{dict}]
1445
+ list of nested dictionaries of any depth
1446
+
1447
+ Returns
1448
+ -------
1449
+ stripped_df : pd.DataFrame
1450
+ DataFrame with whitespace removed
1451
+ """
1452
+
1453
+ stripped_df = pd.DataFrame.copy(df)
1454
+
1455
+ for col in columns:
1456
+ stripped_df[col] = stripped_df[col].apply(
1457
+ lambda x: x.strip() if isinstance(x, str) else x
1458
+ )
1459
+
1460
+ return stripped_df
1461
+
1462
+
1463
+ def generate_time_based_unique_id(time_generator: None):
1464
+ """
1465
+ Generates a unique ID based on the time since epoch.
1466
+
1467
+ Parameters
1468
+ ----------
1469
+ time_generator
1470
+ Any class that has a .time() method on it which produces time since 1970 in seconds
1471
+
1472
+ Returns
1473
+ -------
1474
+ uid : str
1475
+ Unique, time based ID
1476
+
1477
+ """
1478
+
1479
+ if time_generator is None or isinstance(time_generator, types.ModuleType):
1480
+ time_generator = default_time
1481
+
1482
+ elif getattr(time_generator, "time", None) is None or not isinstance(
1483
+ getattr(time_generator, "time"), types.MethodType
1484
+ ):
1485
+ raise AttributeError(
1486
+ "The provided time_generator does not have a method called time"
1487
+ )
1488
+
1489
+ # Get the current time since epoch
1490
+ current_time = time_generator.time()
1491
+
1492
+ if not isinstance(current_time, int) and not isinstance(current_time, float):
1493
+ raise ValueError(
1494
+ f"The provided response time time_generator.time() is not an int it is a {type(current_time)}"
1495
+ )
1496
+
1497
+ # Multiply by 7 to get value to 100s of nano seconds
1498
+ timestamp = hex(int(current_time * 10000000.0))
1499
+ # Create the scope id by joining the hex representation with dashes every 4 characters
1500
+ uid = "-".join(timestamp[i : i + 4] for i in range(2, len(timestamp), 4))
1501
+ return uid
1502
+
1503
+
1504
+ def generate_uuid():
1505
+ return str(uuid.uuid4())
1506
+
1507
+
1508
+ def create_scope_id(time_generator=None, use_uuid=False):
1509
+ """
1510
+ This function creates a unique ID based on the time since epoch for use
1511
+ as a scope id.
1512
+
1513
+ Parameters
1514
+ ----------
1515
+ time_generator
1516
+ Any class that has a .time() method on it which produces time since 1970 in seconds
1517
+
1518
+ Returns
1519
+ -------
1520
+ scope_id : str
1521
+ Scope identifier
1522
+ """
1523
+ if use_uuid:
1524
+ return generate_uuid()
1525
+ else:
1526
+ return generate_time_based_unique_id(time_generator)
1527
+
1528
+
1529
+ def default_fx_forward_model(
1530
+ df: pd.DataFrame,
1531
+ fx_code: str,
1532
+ func_transaction_units: typing.Callable[[], bool],
1533
+ func_total_consideration: typing.Callable[[], bool],
1534
+ mapping: dict,
1535
+ ) -> tuple[pd.DataFrame, dict]:
1536
+ """
1537
+ Function that takes 2 rows representing a single forward and merge them into a single transaction
1538
+
1539
+ Parameters
1540
+ ----------
1541
+ df : pd.DataFrame
1542
+ DataFrame containing transactions data
1543
+ fx_code : str
1544
+ The transaction type that identifies a forward
1545
+ func_transaction_units : typing.Callable[[], bool]
1546
+ function that evaluates to true for where the dataframe row contains transaction units
1547
+ func_total_consideration : typing.Callable[[], bool]
1548
+ function that evaluates to true for where the dataframe row contains total consideration
1549
+ mapping : dict
1550
+ mapping for FX transactions
1551
+
1552
+ Returns
1553
+ -------
1554
+ fwds_txn_df : pd.DataFrame
1555
+ DataFrame containing FX transactions merged into a single row
1556
+ mapping_cash_txn : dict
1557
+ updates mapping dictionary for fwds_txn_df
1558
+ """
1559
+
1560
+ logging.info(
1561
+ f"combining transactions of type {fx_code} into a single line using {default_fx_forward_model.__name__}"
1562
+ f" utility function"
1563
+ )
1564
+
1565
+ t_type = mapping["transactions"]["required"]["type"]
1566
+
1567
+ if fx_code not in df[t_type].values:
1568
+ raise ValueError(
1569
+ f"Input transactions have no fx transaction types {fx_code} in column transaction type{t_type}"
1570
+ )
1571
+
1572
+ fwds_df = pd.DataFrame(df[df[t_type] == fx_code])
1573
+
1574
+ transaction_units_df = fwds_df[func_transaction_units]
1575
+ total_consideration_df = fwds_df[func_total_consideration]
1576
+
1577
+ t_id = mapping["transactions"]["required"]["transaction_id"]
1578
+
1579
+ transaction_units_suffix = "_txn"
1580
+ total_consideration_suffix = "_tc"
1581
+ logging.info(
1582
+ f"merging buy and sell legs of FX trades and suffixing with {[transaction_units_suffix, total_consideration_suffix]}"
1583
+ )
1584
+ fwds_txn_df = pd.merge(
1585
+ transaction_units_df,
1586
+ total_consideration_df,
1587
+ how="outer",
1588
+ on=[t_id, t_type],
1589
+ suffixes=[transaction_units_suffix, total_consideration_suffix],
1590
+ )
1591
+
1592
+ mapping_cash_txn = remap_after_merge(
1593
+ mapping,
1594
+ transaction_units_suffix=transaction_units_suffix,
1595
+ total_consideration_suffix=total_consideration_suffix,
1596
+ )
1597
+
1598
+ return fwds_txn_df, mapping_cash_txn
1599
+
1600
+
1601
+ def remap_after_merge(
1602
+ mapping: dict, transaction_units_suffix: str, total_consideration_suffix: str
1603
+ ) -> dict:
1604
+ """
1605
+ Remaps buy and sell fields in a mapping dictionary to the suffixed column names after a dataframe merge
1606
+
1607
+ Parameters
1608
+ ----------
1609
+ mapping : dict
1610
+ mapping dictionary that needs updating
1611
+ transaction_units_suffix : str
1612
+ Suffix appended to transaction units transaction fields (e.g. "_txn")
1613
+ total_consideration_suffix : str
1614
+ Suffix appended to total consideration transaction fields(e.g. "_tc")
1615
+
1616
+ Returns
1617
+ -------
1618
+ mapping : dict
1619
+ updated mapping dictionary
1620
+ """
1621
+ new_mapping = copy.deepcopy(mapping)
1622
+ file_type = "transactions"
1623
+ logging.info(f"updating mapping to new Total Consideration and transaction fields ")
1624
+ # currencies and amounts coming into the portfolio i.e. buy
1625
+
1626
+ total_consideration_fields = [
1627
+ "total_consideration.amount",
1628
+ "total_consideration.currency",
1629
+ "settlement_currency",
1630
+ ]
1631
+
1632
+ # currencies and amounts leaving the portfolio i.e. sell
1633
+
1634
+ transaction_units_fields = ["units", "transaction_currency"]
1635
+
1636
+ for key in new_mapping[file_type]["required"].keys():
1637
+ if key in transaction_units_fields:
1638
+ update_dict_value(
1639
+ new_mapping,
1640
+ key,
1641
+ new_mapping[file_type]["required"][key] + transaction_units_suffix,
1642
+ [file_type],
1643
+ )
1644
+ elif key in total_consideration_fields:
1645
+ update_dict_value(
1646
+ new_mapping,
1647
+ key,
1648
+ new_mapping[file_type]["required"][key] + total_consideration_suffix,
1649
+ [file_type],
1650
+ )
1651
+ return new_mapping
1652
+
1653
+
1654
+ def update_dict_value(
1655
+ d: dict, s_key: str, val: typing.Union[str, float], top_level_values_to_search=[]
1656
+ ):
1657
+ """
1658
+ Recursively searches a dictionary for a key and updates the value
1659
+
1660
+ This function searches for a key in a dictionary and updates the value belonging to any matching keys. The top level
1661
+ values in which to search can be specified as
1662
+
1663
+ Parameters
1664
+ ----------
1665
+ d : dict
1666
+ Dictionary to update
1667
+ s_key : str
1668
+ Key to search for that belongs to the value to be updated
1669
+ val : typing.Union[str, float]
1670
+ Updated value belonging to search key
1671
+ file_type : str
1672
+ (optional) specific file_type in mapping to update. If not specified, all matches are replaced
1673
+
1674
+ Returns
1675
+ -------
1676
+ d : dict
1677
+ updated dictionary
1678
+
1679
+ """
1680
+ # if a file type had been specified, only search that values belonging to that key
1681
+ if top_level_values_to_search:
1682
+ for f_type in top_level_values_to_search:
1683
+ if f_type in d.keys():
1684
+ d[f_type] = update_dict_value(d.get(f_type, {}), s_key, val)
1685
+ else:
1686
+ err = (
1687
+ f"file_type {top_level_values_to_search} not found in top level of mapping. If passing full mapping structure,"
1688
+ f"ensure file type had been corrctly specified. If passing in a partial mapping structure,"
1689
+ f"remove this parameter."
1690
+ )
1691
+ logging.error(err)
1692
+ raise KeyError(err)
1693
+
1694
+ for k, v in d.items():
1695
+ # if no type specified and search key in keys
1696
+ if s_key in k:
1697
+ d[k] = update_value(d[k], val)
1698
+ # if no search key found, make recursive call for each key
1699
+ elif isinstance(v, dict) and not top_level_values_to_search:
1700
+ d[k] = update_dict_value(d.get(k, {}), s_key, val)
1701
+ return d
1702
+
1703
+
1704
+ def update_value(d: typing.Union[dict, str], val: typing.Union[str, float]):
1705
+ """
1706
+ Updates value in dictionary and handles default and constant ($) specification
1707
+
1708
+ Parameters
1709
+ ----------
1710
+ d : typing.Union[dict, str]
1711
+ Data key to update
1712
+ val : typing.Union[dict, str]
1713
+ value to update
1714
+
1715
+ Returns
1716
+ -------
1717
+ None
1718
+
1719
+ """
1720
+
1721
+ # update values provided in "column" "default" format
1722
+ if isinstance(d, dict):
1723
+ if set(d.keys()) != {"column", "default"}:
1724
+ err = f"Failed to update dictionary. Expected ['column', 'default'] in {d}, but found {list(d.keys())}"
1725
+ raise ValueError(err)
1726
+
1727
+ if type(val) != type(d["column"]):
1728
+ warn = f"new data type is not same as original value"
1729
+ # logging.warning(warn)
1730
+ d["column"] = val
1731
+ return d
1732
+
1733
+ if type(val) != type(d):
1734
+ warn = f"new data type is not same as original value"
1735
+ # logging.warning(warn)
1736
+
1737
+ # update value provided with constant format using "$"
1738
+ if isinstance(d, str) and d[0] == "$":
1739
+ return {"default": d[1:], "column": val}
1740
+ # for any other data types, simply update the value
1741
+ d = val
1742
+
1743
+ return d
1744
+
1745
+
1746
+ def group_request_into_one(
1747
+ model_type: str, request_list: list, attribute_for_grouping: list, batch_index=0
1748
+ ):
1749
+ """
1750
+ This function take a list of requests and collates an attribute from each request, adding the collated attributes
1751
+ back onto the first request in the list. The function returns the modified first request.
1752
+ For example, the function can take a list of CreatePortfolioGroupRequests, extract the "values" or portfolios from
1753
+ each request, and then add all portfolios back onto the first request in the list.
1754
+
1755
+ Parameters
1756
+ ----------
1757
+ model_type : str
1758
+ the model type which we will modify (eg "CreatePortfolioGroupRequest").
1759
+ request_list : list
1760
+ a list of requests.
1761
+ attribute_for_grouping : list
1762
+ the attributes on these requests which will be grouped.
1763
+ batch_index
1764
+ The index of the batch
1765
+
1766
+ Returns
1767
+ -------
1768
+ request
1769
+ a single LUSID request
1770
+ """
1771
+
1772
+ # Define a base request for modifying - this is the first request in the list by default
1773
+
1774
+ if model_type not in dir(models):
1775
+ raise ValueError(f"The model {model_type} is not a valid LUSID model.")
1776
+
1777
+ model_class = getattr( models, model_type)
1778
+
1779
+ if batch_index > len(request_list):
1780
+ raise IndexError(
1781
+ f"The length of the batch_index ({batch_index}) is greater than the request_list ({len(request_list)}) provided."
1782
+ )
1783
+
1784
+ if type(attribute_for_grouping) == list and len(attribute_for_grouping) == 0:
1785
+ raise ValueError("The provided list of attribute_for_grouping is empty.")
1786
+
1787
+ base_request = request_list[batch_index]
1788
+
1789
+ attribs = get_attributes_and_types ( model_class )
1790
+
1791
+
1792
+ for attrib in attribute_for_grouping:
1793
+ #for attrib, attrib_type in attribs.items():
1794
+
1795
+ if "List[" in attribs[attrib]:
1796
+
1797
+ # Collect the attributes from each request onto a list
1798
+
1799
+ batch_attrib = [
1800
+ lusid_model
1801
+ for nested_list in [
1802
+ getattr(request, attrib)
1803
+ for request in request_list
1804
+ if getattr(request, attrib) is not None
1805
+ ]
1806
+ for lusid_model in nested_list
1807
+ ]
1808
+
1809
+ # Assign collated values onto the base request
1810
+
1811
+ setattr(base_request, attrib, batch_attrib)
1812
+
1813
+ #elif "dict" in attrib_type:
1814
+ elif "Mapping[" in attribs[attrib]:
1815
+ # Collect the attributes from each request onto a dictionary
1816
+
1817
+ batch_attrib = dict(
1818
+ [
1819
+ (lusid_model, nested_list[lusid_model])
1820
+ for nested_list in [
1821
+ getattr(request, attrib)
1822
+ for request in request_list
1823
+ if getattr(request, attrib) is not None
1824
+ ]
1825
+ for lusid_model in nested_list
1826
+ ]
1827
+ )
1828
+
1829
+ # Assign collated values onto the base request
1830
+
1831
+ setattr(base_request, attrib, batch_attrib)
1832
+
1833
+ # Return base request with collated attributes
1834
+
1835
+ return base_request
1836
+
1837
+
1838
+ def extract_unique_portfolio_codes(sync_batches: list):
1839
+ """
1840
+ Extract a unique list of portfolio codes from the sync_batches
1841
+
1842
+ Parameters
1843
+ ----------
1844
+ sync_batches : list
1845
+ A list of the batches used to upload the data into LUSID.
1846
+
1847
+ Returns
1848
+ -------
1849
+ A list of all the unique portfolio codes in the sync batches
1850
+ """
1851
+ codes_list = []
1852
+ for sync_batch in sync_batches:
1853
+ for key, value in sync_batch.items():
1854
+ if key == "codes":
1855
+ codes_list.extend(value)
1856
+ return list(set(codes_list))
1857
+
1858
+
1859
+ def extract_unique_portfolio_codes_effective_at_tuples(sync_batches: list):
1860
+ """
1861
+ Extract a unique list of tuples containing portfolio codes and effective_at times
1862
+
1863
+ Parameters
1864
+ ----------
1865
+ sync_batches : list
1866
+ A list of the batches used to upload the data into LUSID.
1867
+
1868
+ Returns
1869
+ -------
1870
+ A list of all the unique tuples of portfolio codes and effective at times in the sync batches
1871
+ """
1872
+ code_tuples = []
1873
+ for sync_batch in sync_batches:
1874
+ for code, effective_at in zip(sync_batch["codes"], sync_batch["effective_at"],):
1875
+ # Append a tuple of (code, effective_at) to the code_tuples list
1876
+ code_tuples.append((code, effective_at))
1877
+ return list(set(code_tuples))