finbourne-sdk-utils 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- features/__init__.py +0 -0
- features/main.py +11 -0
- finbourne_sdk_utils/__init__.py +8 -0
- finbourne_sdk_utils/cocoon/__init__.py +34 -0
- finbourne_sdk_utils/cocoon/async_tools.py +94 -0
- finbourne_sdk_utils/cocoon/cocoon.py +1862 -0
- finbourne_sdk_utils/cocoon/cocoon_printer.py +455 -0
- finbourne_sdk_utils/cocoon/config/domain_settings.json +125 -0
- finbourne_sdk_utils/cocoon/config/seed_sample_data.json +36 -0
- finbourne_sdk_utils/cocoon/dateorcutlabel.py +198 -0
- finbourne_sdk_utils/cocoon/instruments.py +482 -0
- finbourne_sdk_utils/cocoon/properties.py +442 -0
- finbourne_sdk_utils/cocoon/seed_sample_data.py +137 -0
- finbourne_sdk_utils/cocoon/systemConfiguration.py +92 -0
- finbourne_sdk_utils/cocoon/transaction_type_upload.py +136 -0
- finbourne_sdk_utils/cocoon/utilities.py +1877 -0
- finbourne_sdk_utils/cocoon/validator.py +243 -0
- finbourne_sdk_utils/extract/__init__.py +1 -0
- finbourne_sdk_utils/extract/group_holdings.py +400 -0
- finbourne_sdk_utils/iam/__init__.py +1 -0
- finbourne_sdk_utils/iam/roles.py +74 -0
- finbourne_sdk_utils/jupyter_tools/__init__.py +2 -0
- finbourne_sdk_utils/jupyter_tools/hide_code_button.py +23 -0
- finbourne_sdk_utils/jupyter_tools/stop_execution.py +14 -0
- finbourne_sdk_utils/logger/LusidLogger.py +41 -0
- finbourne_sdk_utils/logger/__init__.py +1 -0
- finbourne_sdk_utils/lpt/__init__.py +0 -0
- finbourne_sdk_utils/lpt/back_compat.py +20 -0
- finbourne_sdk_utils/lpt/cash_ladder.py +191 -0
- finbourne_sdk_utils/lpt/connect_lusid.py +64 -0
- finbourne_sdk_utils/lpt/connect_none.py +5 -0
- finbourne_sdk_utils/lpt/connect_token.py +9 -0
- finbourne_sdk_utils/lpt/dfq.py +321 -0
- finbourne_sdk_utils/lpt/either.py +65 -0
- finbourne_sdk_utils/lpt/get_instruments.py +101 -0
- finbourne_sdk_utils/lpt/lpt.py +374 -0
- finbourne_sdk_utils/lpt/lse.py +188 -0
- finbourne_sdk_utils/lpt/map_instruments.py +164 -0
- finbourne_sdk_utils/lpt/pager.py +32 -0
- finbourne_sdk_utils/lpt/record.py +13 -0
- finbourne_sdk_utils/lpt/refreshing_token.py +43 -0
- finbourne_sdk_utils/lpt/search_instruments.py +48 -0
- finbourne_sdk_utils/lpt/stdargs.py +154 -0
- finbourne_sdk_utils/lpt/txn_config.py +128 -0
- finbourne_sdk_utils/lpt/txn_config_yaml.py +493 -0
- finbourne_sdk_utils/pandas_utils/__init__.py +0 -0
- finbourne_sdk_utils/pandas_utils/lusid_pandas.py +128 -0
- finbourne_sdk_utils-0.0.24.dist-info/LICENSE +21 -0
- finbourne_sdk_utils-0.0.24.dist-info/METADATA +25 -0
- finbourne_sdk_utils-0.0.24.dist-info/RECORD +52 -0
- finbourne_sdk_utils-0.0.24.dist-info/WHEEL +5 -0
- finbourne_sdk_utils-0.0.24.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1877 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import copy
|
|
3
|
+
import csv
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
import re
|
|
7
|
+
import numpy as np
|
|
8
|
+
import lusid
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from detect_delimiter import detect
|
|
12
|
+
import requests
|
|
13
|
+
import json
|
|
14
|
+
import inspect
|
|
15
|
+
import functools
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from finbourne_sdk_utils.cocoon.dateorcutlabel import DateOrCutLabel
|
|
19
|
+
import lusid.models as models
|
|
20
|
+
import logging
|
|
21
|
+
import time as default_time
|
|
22
|
+
from finbourne_sdk_utils.cocoon.validator import Validator
|
|
23
|
+
import types
|
|
24
|
+
import typing
|
|
25
|
+
import pydantic.v1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def checkargs(function: typing.Callable) -> typing.Callable:
|
|
29
|
+
"""
|
|
30
|
+
This can be used as a decorator to test the type of arguments are correct. It checks that the provided arguments
|
|
31
|
+
match any type annotations and/or the default value for the parameter.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
function : typing.Callable
|
|
36
|
+
The function to wrap with annotated types, all parameters must be annotated with a type
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
_f : typing.Callable
|
|
41
|
+
The wrapped function
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@functools.wraps(function)
|
|
45
|
+
def _f(*args, **kwargs):
|
|
46
|
+
|
|
47
|
+
# Get all the function arguments in order
|
|
48
|
+
function_arguments = inspect.signature(function).parameters
|
|
49
|
+
|
|
50
|
+
# Collect each non keyword argument value and key it by the argument name
|
|
51
|
+
keyed_arguments = {
|
|
52
|
+
list(function_arguments.keys())[i]: args[i] for i in range(0, len(args))
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Update this with the keyword argument values
|
|
56
|
+
keyed_arguments.update(kwargs)
|
|
57
|
+
|
|
58
|
+
# For each argument raise an error if it is of the incorrect type and if it has an invalid default value
|
|
59
|
+
for argument_name, argument_value in keyed_arguments.items():
|
|
60
|
+
|
|
61
|
+
if argument_name not in list(function_arguments.keys()):
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"The argument {argument_name} is not a valid keyword argument for this function, valid arguments"
|
|
64
|
+
+ f" are {str(list(function_arguments.keys()))}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Get the arguments details
|
|
68
|
+
argument_details = function_arguments[argument_name]
|
|
69
|
+
# Assume that there is no default value for this parameter
|
|
70
|
+
is_default_value = False
|
|
71
|
+
|
|
72
|
+
# If there is a default value
|
|
73
|
+
if argument_details.default is not argument_details.empty:
|
|
74
|
+
# Check to see if the argument value matches the default
|
|
75
|
+
if argument_details.default is None:
|
|
76
|
+
is_default_value = argument_value is argument_details.default
|
|
77
|
+
else:
|
|
78
|
+
is_default_value = argument_value == argument_details.default
|
|
79
|
+
|
|
80
|
+
# If the argument value is of the wrong type e.g. list instead of dict then throw an error
|
|
81
|
+
if (
|
|
82
|
+
not isinstance(argument_value, argument_details.annotation)
|
|
83
|
+
and argument_details.annotation is not argument_details.empty
|
|
84
|
+
):
|
|
85
|
+
# Only exception to this is if it matches the default value which may be of a different type e.g. None
|
|
86
|
+
if not is_default_value:
|
|
87
|
+
raise TypeError(
|
|
88
|
+
f"""The value provided for {argument_name} is of type {type(argument_value)} not of
|
|
89
|
+
type {argument_details.annotation}. Please update the provided value to be of type
|
|
90
|
+
{argument_details.annotation}"""
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return function(*args, **kwargs)
|
|
94
|
+
|
|
95
|
+
return _f
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def make_code_lusid_friendly(raw_code) -> str:
|
|
99
|
+
"""
|
|
100
|
+
This function takes a column name and converts it to a LUSID friendly code creating LUSID objects. LUSID allows
|
|
101
|
+
for up to 64 characters which can be lowercase and uppercase letters, numbers, a dash ("-") or an underscore ("_").
|
|
102
|
+
The complete restrictions are here: https://support.lusid.com/what-is-a-code
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
raw_code : any
|
|
107
|
+
A raw column header which needs special characters stripped out
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
friendly_code : str
|
|
112
|
+
A LUSID friendly code with special characters removed
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
# Convert any type to a string
|
|
116
|
+
try:
|
|
117
|
+
raw_code = str(raw_code)
|
|
118
|
+
except Exception as exception:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Could not convert value of {raw_code} with type {type(raw_code)} to a string. "
|
|
121
|
+
+ "Please convert to a format which can be cast to a string and try again"
|
|
122
|
+
) from exception
|
|
123
|
+
|
|
124
|
+
# Check that it does not exceed the max length
|
|
125
|
+
max_length = 64
|
|
126
|
+
|
|
127
|
+
if len(raw_code) > max_length:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"""The name {raw_code} is {len(raw_code)} characters long and exceeds the limit of {max_length}
|
|
130
|
+
for a code. Please shorten it by {len(raw_code) - 64} characters."""
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Specifically convert known unfriendly characters with a specific string and remove the rest completely
|
|
134
|
+
friendly_code = re.sub(
|
|
135
|
+
r"[^-\w]",
|
|
136
|
+
"",
|
|
137
|
+
raw_code.replace("%", "Percentage")
|
|
138
|
+
.replace("&", "and")
|
|
139
|
+
.replace(".", "_")
|
|
140
|
+
.strip(),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return friendly_code
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@checkargs
|
|
147
|
+
def populate_model(
|
|
148
|
+
model_object_name: str,
|
|
149
|
+
required_mapping: dict,
|
|
150
|
+
optional_mapping: dict,
|
|
151
|
+
row: pd.Series,
|
|
152
|
+
properties,
|
|
153
|
+
identifiers: dict = None,
|
|
154
|
+
sub_holding_keys=None,
|
|
155
|
+
) -> typing.Callable:
|
|
156
|
+
"""
|
|
157
|
+
This function populates the provided LUSID model object in lusid.models with values from a Pandas Series
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
model_object_name : str
|
|
162
|
+
The name of the model object to populate
|
|
163
|
+
required_mapping : dict
|
|
164
|
+
The required mapping between the row columns and the model attributes
|
|
165
|
+
optional_mapping : dict
|
|
166
|
+
The optional mapping between the row columns and the model attributes
|
|
167
|
+
row : pd.Series
|
|
168
|
+
The row from the provided pd.DataFrame to use to populate the model
|
|
169
|
+
properties
|
|
170
|
+
The properties for this model
|
|
171
|
+
identifiers : dict
|
|
172
|
+
The identifiers for this model
|
|
173
|
+
sub_holding_keys
|
|
174
|
+
The sub holding keys to use
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
set_attributes : typing.Callable
|
|
179
|
+
The function to set the attributes for the model
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
# Check that the provided model name actually exists
|
|
183
|
+
model_object = getattr(lusid.models, model_object_name, None)
|
|
184
|
+
|
|
185
|
+
if model_object is None:
|
|
186
|
+
raise TypeError("The provided model_object is not a lusid.model object")
|
|
187
|
+
|
|
188
|
+
# Expand the mapping out from being a dot separated flat dictionary e.g. transaction_price.price to being nested
|
|
189
|
+
update_dict(required_mapping, optional_mapping)
|
|
190
|
+
|
|
191
|
+
mapping_expanded = expand_dictionary(required_mapping)
|
|
192
|
+
|
|
193
|
+
# Set the attributes on the model
|
|
194
|
+
return set_attributes_recursive(
|
|
195
|
+
model_object=model_object,
|
|
196
|
+
mapping=mapping_expanded,
|
|
197
|
+
row=row,
|
|
198
|
+
properties=properties,
|
|
199
|
+
identifiers=identifiers,
|
|
200
|
+
sub_holding_keys=sub_holding_keys,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@checkargs
|
|
205
|
+
def set_attributes_recursive(
|
|
206
|
+
model_object,
|
|
207
|
+
mapping: dict,
|
|
208
|
+
row: pd.Series,
|
|
209
|
+
properties=None,
|
|
210
|
+
identifiers: dict = None,
|
|
211
|
+
sub_holding_keys=None,
|
|
212
|
+
):
|
|
213
|
+
"""
|
|
214
|
+
This function takes a lusid.model object name and an expanded mapping between its attributes and the provided
|
|
215
|
+
row of data and constructs a populated model
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
model_object : lusid.models
|
|
220
|
+
The object from lusid.models to populate
|
|
221
|
+
mapping : dict
|
|
222
|
+
The expanded dictionary mapping the Series columns to the LUSID model attributes
|
|
223
|
+
row : pd.Series
|
|
224
|
+
The current row of the DataFrame being worked on
|
|
225
|
+
properties : any
|
|
226
|
+
The properties to use on this model
|
|
227
|
+
identifiers : any
|
|
228
|
+
The instrument identifiers to use on this model
|
|
229
|
+
sub_holding_keys
|
|
230
|
+
The sub holding keys to use on this model
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
new model_object : lusid.models
|
|
235
|
+
An instance of the model object with populated attributes
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
# Get the object attributes
|
|
239
|
+
|
|
240
|
+
obj_attr = get_attributes_and_types( model_object )
|
|
241
|
+
obj_attr_required_map = get_required_attributes_from_model(model_object)
|
|
242
|
+
obj_init_values = {}
|
|
243
|
+
|
|
244
|
+
# Additional attributes which are used on most models but will be populated outside the provided mapping
|
|
245
|
+
additional_attributes = {
|
|
246
|
+
"instrument_identifiers": identifiers,
|
|
247
|
+
"properties": properties,
|
|
248
|
+
"sub_holding_keys": sub_holding_keys,
|
|
249
|
+
"identifiers": identifiers,
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Generate the intersection between the available attributes and the provided attributes
|
|
253
|
+
provided_attributes = set(list(mapping.keys()) + list(additional_attributes.keys()))
|
|
254
|
+
available_attributes = set(list(obj_attr.keys()))
|
|
255
|
+
populate_attributes = provided_attributes.intersection(available_attributes)
|
|
256
|
+
|
|
257
|
+
# Used to check if all attributes are none
|
|
258
|
+
total_count = 0
|
|
259
|
+
none_count = 0
|
|
260
|
+
missing_value = False
|
|
261
|
+
|
|
262
|
+
# For each of the attributes to populate
|
|
263
|
+
for key in list(populate_attributes):
|
|
264
|
+
|
|
265
|
+
# Get the attribute type
|
|
266
|
+
attribute_type = obj_attr[key]
|
|
267
|
+
|
|
268
|
+
# If it is an additional attribute, populate it with the provided values and move to the next attribute
|
|
269
|
+
if key in list(additional_attributes.keys()):
|
|
270
|
+
# Handle identifiers provided within instrument definition (e.g. 'Bond', 'Future', etc.)
|
|
271
|
+
if (key, attribute_type) == ("identifiers", "Mapping[str, dont_match"):
|
|
272
|
+
obj_init_values[key] = {
|
|
273
|
+
str_key: row[str_value]
|
|
274
|
+
for str_key, str_value in mapping[key].items()
|
|
275
|
+
if not pd.isna(row[str_value])
|
|
276
|
+
}
|
|
277
|
+
else:
|
|
278
|
+
obj_init_values[key] = additional_attributes[key]
|
|
279
|
+
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
# This block keeps track of the number of missing (non-additional) attributes
|
|
283
|
+
else:
|
|
284
|
+
total_count += 1
|
|
285
|
+
if mapping[key] is None:
|
|
286
|
+
none_count += 1
|
|
287
|
+
|
|
288
|
+
# If this is the last object and there is no more nesting set the value from the row
|
|
289
|
+
if not isinstance(mapping[key], dict):
|
|
290
|
+
# If this exists in the mapping with a value and there is a value in the row for it
|
|
291
|
+
if mapping[key] is not None and not pd.isna(row[mapping[key]]):
|
|
292
|
+
# Converts to a date if it is a date field
|
|
293
|
+
if "date" in key or "created" in key or "effective_at" in key:
|
|
294
|
+
obj_init_values[key] = str(DateOrCutLabel(row[mapping[key]]))
|
|
295
|
+
# Converts to a list element if it is a list field
|
|
296
|
+
elif "list" in attribute_type and not isinstance(
|
|
297
|
+
row[mapping[key]], list
|
|
298
|
+
):
|
|
299
|
+
obj_init_values[key] = [row[mapping[key]]]
|
|
300
|
+
else:
|
|
301
|
+
obj_init_values[key] = row[mapping[key]]
|
|
302
|
+
elif key in obj_attr_required_map:
|
|
303
|
+
missing_value = True
|
|
304
|
+
elif mapping[key]:
|
|
305
|
+
none_count += 1
|
|
306
|
+
|
|
307
|
+
# if there is more nesting call the function recursively
|
|
308
|
+
else:
|
|
309
|
+
# Ensure that that if there is a complex attribute type e.g. dict(str, InstrumentIdValue) it is extracted
|
|
310
|
+
attribute_type, nested_type, optional = extract_lusid_model_from_attribute_type(
|
|
311
|
+
attribute_type
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Call the function recursively
|
|
315
|
+
value = set_attributes_recursive(
|
|
316
|
+
model_object=getattr(lusid.models, attribute_type),
|
|
317
|
+
mapping=mapping[key],
|
|
318
|
+
row=row,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
obj_init_values[key] = [value] if nested_type == "list" else value
|
|
322
|
+
|
|
323
|
+
"""
|
|
324
|
+
If all attributes are None propagate None rather than a model filled with Nones. For example if a CorporateActionSourceId
|
|
325
|
+
has no scope or code return build a model with CorporateActionSourceId = None rather than CorporateActionSourceId =
|
|
326
|
+
lusid.models.ResourceId(scope=None, code=None)
|
|
327
|
+
|
|
328
|
+
"""
|
|
329
|
+
if total_count == none_count or missing_value:
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
# Create an instance of and populate the model object
|
|
333
|
+
instance = model_object(**obj_init_values)
|
|
334
|
+
|
|
335
|
+
# Support for polymorphism, we can identify these `abstract` classes by the existence of the below
|
|
336
|
+
|
|
337
|
+
""" if getattr(instance, "discriminator"):
|
|
338
|
+
discriminator = getattr(instance, getattr(instance, "discriminator"))
|
|
339
|
+
|
|
340
|
+
actual_class = model_object.discriminator_value_class_map[discriminator]
|
|
341
|
+
|
|
342
|
+
return set_attributes_recursive(
|
|
343
|
+
model_object=getattr(lusid.models, actual_class), mapping=mapping, row=row,
|
|
344
|
+
)
|
|
345
|
+
"""
|
|
346
|
+
return instance
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@checkargs
|
|
350
|
+
def update_dict(orig_dict: dict, new_dict) -> None:
|
|
351
|
+
"""
|
|
352
|
+
This is used to update a dictionary with another dictionary. Using the default Python update method does not merge
|
|
353
|
+
nested dictionaries. This method allows for this. This modifies the original dictionary in place.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
orig_dict : dict
|
|
358
|
+
The original dictionary to update
|
|
359
|
+
new_dict : dict
|
|
360
|
+
The new dictionary to merge with the original
|
|
361
|
+
|
|
362
|
+
Returns
|
|
363
|
+
-------
|
|
364
|
+
orig_dict : dict
|
|
365
|
+
The updated original dictionary
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
# Iterate over key value pairs in the new dictionary to merge into the original
|
|
369
|
+
for key, val in new_dict.items():
|
|
370
|
+
# If a mapping object (e.g. dictionary) call the function recursively
|
|
371
|
+
if isinstance(val, Mapping):
|
|
372
|
+
tmp = update_dict(orig_dict.get(key, {}), val)
|
|
373
|
+
orig_dict[key] = tmp
|
|
374
|
+
# If a list then merge it into the original dictionary
|
|
375
|
+
elif isinstance(val, list):
|
|
376
|
+
orig_dict[key] = orig_dict.get(key, []) + val
|
|
377
|
+
# Do the same for any other type
|
|
378
|
+
else:
|
|
379
|
+
orig_dict[key] = new_dict[key]
|
|
380
|
+
|
|
381
|
+
return orig_dict
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@checkargs
|
|
385
|
+
def expand_dictionary(dictionary: dict, key_separator: str = ".") -> dict:
|
|
386
|
+
"""
|
|
387
|
+
Takes a flat dictionary (no nesting) with keys separated by a separator and converts it into a nested
|
|
388
|
+
dictionary
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
dictionary : dict
|
|
393
|
+
The input dictionary with separated keys
|
|
394
|
+
key_separator : str
|
|
395
|
+
The seprator to use
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
dict_expanded : dict
|
|
400
|
+
The expanded nested dictionary
|
|
401
|
+
"""
|
|
402
|
+
|
|
403
|
+
dict_expanded = {}
|
|
404
|
+
|
|
405
|
+
# Loop over each composite key and final value
|
|
406
|
+
for key, value in dictionary.items():
|
|
407
|
+
# Split the key on the separator
|
|
408
|
+
components = key.split(key_separator)
|
|
409
|
+
# Get the expanded dictionary for this key and update the master dictionary
|
|
410
|
+
update_dict(
|
|
411
|
+
dict_expanded, expand_dictionary_single_recursive(0, components, value)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
return dict_expanded
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
@checkargs
|
|
418
|
+
def expand_dictionary_single_recursive(index: int, key_list: list, value) -> dict:
|
|
419
|
+
"""
|
|
420
|
+
Takes a list of keys and a value and turns it into a nested dictionary. This is a recursive function.
|
|
421
|
+
|
|
422
|
+
Parameters
|
|
423
|
+
----------
|
|
424
|
+
index : int
|
|
425
|
+
The current index of the key in the list of keys
|
|
426
|
+
key_list : list[str]
|
|
427
|
+
The list of keys to turn into a nested dictionary
|
|
428
|
+
value : any
|
|
429
|
+
The final value to match against the last (deepest) key
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
dict
|
|
434
|
+
The final value to match against the last (deepest) key
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
# Gets the current key in the list
|
|
438
|
+
key = key_list[index]
|
|
439
|
+
|
|
440
|
+
# If it is the last key in the list return a dictionary with it keyed against the value
|
|
441
|
+
if key == key_list[-1]:
|
|
442
|
+
return {key: value}
|
|
443
|
+
|
|
444
|
+
# Otherwise if it is not the last key, key it against calling this function recursively with the next key
|
|
445
|
+
return {key: expand_dictionary_single_recursive(index + 1, key_list, value)}
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
@checkargs
|
|
449
|
+
def get_swagger_dict(api_url: str) -> dict:
|
|
450
|
+
"""
|
|
451
|
+
Gets the lusid.json swagger file
|
|
452
|
+
|
|
453
|
+
Parameters
|
|
454
|
+
----------
|
|
455
|
+
api_url : str
|
|
456
|
+
The base api url for the LUSID instance
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
dict
|
|
461
|
+
The swagger file as a dictionary
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
swagger_path = "/swagger/v0/swagger.json"
|
|
465
|
+
swagger_url = api_url + swagger_path
|
|
466
|
+
swagger_file = requests.get(swagger_url)
|
|
467
|
+
|
|
468
|
+
if swagger_file.status_code == 200:
|
|
469
|
+
swagger = json.loads(swagger_file.text)
|
|
470
|
+
|
|
471
|
+
app_name = swagger.get("info", {}).get("title", {})
|
|
472
|
+
if app_name is None or app_name != "LUSID API":
|
|
473
|
+
raise ValueError(f"Invalid LUSID OpenAPI file: {swagger_url}")
|
|
474
|
+
|
|
475
|
+
return swagger
|
|
476
|
+
else:
|
|
477
|
+
raise ValueError(
|
|
478
|
+
f"""Received a {swagger_file.status_code} response from the provided url, please double check
|
|
479
|
+
the base api url and try again"""
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def generate_required_attributes_list():
|
|
484
|
+
pass
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@checkargs
|
|
488
|
+
def verify_all_required_attributes_mapped(
|
|
489
|
+
mapping: dict,
|
|
490
|
+
model_object_name: str,
|
|
491
|
+
exempt_attributes: list = None,
|
|
492
|
+
key_separator: str = ".",
|
|
493
|
+
) -> None:
|
|
494
|
+
"""
|
|
495
|
+
Verifies that all required attributes are included in the mapping, passes silently if they are and raises an exception
|
|
496
|
+
otherwise
|
|
497
|
+
|
|
498
|
+
Parameters
|
|
499
|
+
----------
|
|
500
|
+
mapping : dict
|
|
501
|
+
The required mapping
|
|
502
|
+
model_object_name : str
|
|
503
|
+
The name of the lusid.models object that the mapping is for
|
|
504
|
+
exempt_attributes : list[str]
|
|
505
|
+
The attributes that are exempt from needing to be in the required mapping
|
|
506
|
+
key_separator : str
|
|
507
|
+
The separator to use to join the required attributes together
|
|
508
|
+
|
|
509
|
+
Returns
|
|
510
|
+
-------
|
|
511
|
+
key_separator : str
|
|
512
|
+
The separator to use to join the required attributes together
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
# Check that the provided model name actually exists
|
|
516
|
+
model_object = getattr(lusid.models, model_object_name, None)
|
|
517
|
+
|
|
518
|
+
if model_object is None:
|
|
519
|
+
raise TypeError("The provided model_object is not a lusid.model object")
|
|
520
|
+
|
|
521
|
+
# Convert a None to an empty list
|
|
522
|
+
exempt_attributes = (
|
|
523
|
+
Validator(exempt_attributes, "exempt_attributes")
|
|
524
|
+
.set_default_value_if_none([])
|
|
525
|
+
.value
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Gets the required attributes for this model
|
|
529
|
+
required_attributes = get_required_attributes_model_recursive(
|
|
530
|
+
model_object=model_object, key_separator=key_separator
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Removes the exempt attributes
|
|
534
|
+
for attribute in required_attributes:
|
|
535
|
+
# Removes all nested attributes for example if "identifiers" is exempt "identifiers.value" will be removed
|
|
536
|
+
if attribute.split(key_separator)[0] in exempt_attributes:
|
|
537
|
+
required_attributes.remove(attribute)
|
|
538
|
+
|
|
539
|
+
missing_attributes = set(required_attributes) - set(list(mapping.keys()))
|
|
540
|
+
|
|
541
|
+
if len(missing_attributes) > 0:
|
|
542
|
+
raise ValueError(
|
|
543
|
+
f"""The required attributes {str(missing_attributes)} are missing from the mapping. Please
|
|
544
|
+
add them."""
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
def get_attributes_and_types(model_object):
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
attributes = {}
|
|
551
|
+
|
|
552
|
+
# __fields__ is a pydantic.v1 property
|
|
553
|
+
for index, (key, value) in enumerate(model_object.__fields__.items()):
|
|
554
|
+
|
|
555
|
+
nested_type = None
|
|
556
|
+
|
|
557
|
+
attribute_type = str(value)
|
|
558
|
+
|
|
559
|
+
match = re.search(r"type=([A-Za-z\[\], ]+)\s", attribute_type)
|
|
560
|
+
|
|
561
|
+
if match:
|
|
562
|
+
attribute_type = match.group(1)
|
|
563
|
+
|
|
564
|
+
optional = False
|
|
565
|
+
optionalStr = "Optional["
|
|
566
|
+
|
|
567
|
+
if attribute_type.startswith(optionalStr):
|
|
568
|
+
attribute_type = attribute_type.split(optionalStr)[1]
|
|
569
|
+
attribute_type = attribute_type[0:len(attribute_type)-1]
|
|
570
|
+
optional = True
|
|
571
|
+
|
|
572
|
+
""" # If the attribute type is a mapping e.g. Mapping[str, InstrumentIdValue], extract the type
|
|
573
|
+
if "Mapping" in attribute_type:
|
|
574
|
+
attribute_type = attribute_type.split(", ")[1].rstrip("]")
|
|
575
|
+
nested_type = "Mapping"
|
|
576
|
+
|
|
577
|
+
# If the attribute type is a dictionary e.g. dict(str, InstrumentIdValue), extract the type
|
|
578
|
+
|
|
579
|
+
if "dict" in attribute_type:
|
|
580
|
+
attribute_type = attribute_type.split(", ")[1].rstrip(")")
|
|
581
|
+
nested_type = "dict"
|
|
582
|
+
# If it is a list e.g. list[ModelProperty] extract the type
|
|
583
|
+
if "list" in attribute_type:
|
|
584
|
+
attribute_type = attribute_type.split("list[")[1].rstrip("]")
|
|
585
|
+
nested_type = "list"
|
|
586
|
+
|
|
587
|
+
if "List" in attribute_type:
|
|
588
|
+
attribute_type = attribute_type.split("List[")[1].rstrip("]")
|
|
589
|
+
nested_type = "list"
|
|
590
|
+
"""
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
attributes[key] = attribute_type
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
""" # Get the members of the object
|
|
598
|
+
for attr_name, attr_value in inspect.getmembers(model_object):
|
|
599
|
+
# Check if it's a data attribute (not a method or function)
|
|
600
|
+
if not callable(attr_value) and not attr_name.startswith('__'):
|
|
601
|
+
# Get the type of the attribute
|
|
602
|
+
attr_type = type(attr_value).__name__
|
|
603
|
+
attributes[attr_name] = attr_type """
|
|
604
|
+
return attributes
|
|
605
|
+
|
|
606
|
+
@checkargs
|
|
607
|
+
def get_required_attributes_model_recursive(model_object, key_separator: str = "."):
|
|
608
|
+
"""
|
|
609
|
+
This is a recursive function which gets all of the required attributes on a LUSID model. If the model is nested
|
|
610
|
+
then it separates the attributes by a '.' until the bottom level where no more models are required and a primitive
|
|
611
|
+
type is supplied e.g. string, int etc.
|
|
612
|
+
|
|
613
|
+
Parameters
|
|
614
|
+
----------
|
|
615
|
+
model_object : lusid.model
|
|
616
|
+
The model to get required attributes for
|
|
617
|
+
key_separator : str
|
|
618
|
+
The separator to use to join the required attributes together
|
|
619
|
+
|
|
620
|
+
Returns
|
|
621
|
+
-------
|
|
622
|
+
list[str]
|
|
623
|
+
The required attributes of the model
|
|
624
|
+
"""
|
|
625
|
+
|
|
626
|
+
attributes = []
|
|
627
|
+
|
|
628
|
+
# Get the required attributes for the current model
|
|
629
|
+
required_attributes = get_required_attributes_from_model(model_object)
|
|
630
|
+
|
|
631
|
+
# Get the types of the attributes for the current model
|
|
632
|
+
open_api_types =get_attributes_and_types(model_object)
|
|
633
|
+
|
|
634
|
+
for required_attribute in required_attributes:
|
|
635
|
+
|
|
636
|
+
required_attribute_type = open_api_types[required_attribute]
|
|
637
|
+
|
|
638
|
+
# Check to see if there is a LUSID model for this required attribute, if no further nesting then add this attribute
|
|
639
|
+
if not check_nested_model(str(required_attribute_type)):
|
|
640
|
+
attributes.append(camel_case_to_pep_8(required_attribute))
|
|
641
|
+
|
|
642
|
+
# Otherwise call the function recursively
|
|
643
|
+
else:
|
|
644
|
+
# Ensure that that if there is a complex attribute type e.g. dict(str, InstrumentIdValue) it is extracted
|
|
645
|
+
(
|
|
646
|
+
required_attribute_type,
|
|
647
|
+
nested_type,
|
|
648
|
+
optional
|
|
649
|
+
) = extract_lusid_model_from_attribute_type(str(required_attribute_type))
|
|
650
|
+
|
|
651
|
+
nested_required_attributes = get_required_attributes_model_recursive(
|
|
652
|
+
model_object=getattr(lusid.models, required_attribute_type),
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
for nested_required_attribute in nested_required_attributes:
|
|
656
|
+
attributes.append(
|
|
657
|
+
key_separator.join(
|
|
658
|
+
[
|
|
659
|
+
camel_case_to_pep_8(required_attribute),
|
|
660
|
+
nested_required_attribute,
|
|
661
|
+
]
|
|
662
|
+
)
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
return attributes
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def get_required_attributes_from_model(model_object) -> list:
|
|
669
|
+
"""
|
|
670
|
+
Gets the required attributes for a LUSID model using reflection
|
|
671
|
+
|
|
672
|
+
Parameters
|
|
673
|
+
----------
|
|
674
|
+
model_object : lusid.models
|
|
675
|
+
A LUSID model object
|
|
676
|
+
|
|
677
|
+
Returns
|
|
678
|
+
-------
|
|
679
|
+
list[str]
|
|
680
|
+
The required attributes
|
|
681
|
+
"""
|
|
682
|
+
|
|
683
|
+
# Get the source code for the model
|
|
684
|
+
model_details = inspect.getsource(model_object)
|
|
685
|
+
|
|
686
|
+
# bit of cleansing to aid the regex
|
|
687
|
+
model_details = model_details.replace('"""','')
|
|
688
|
+
model_details = model_details.replace( r"\n","\n")
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
required_attributes = re.findall(r'(\w+):.*?= Field\(\.\.\.,', model_details)
|
|
692
|
+
all_attributes = re.findall(r'^\s*(\w+):', model_details, re.MULTILINE)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
# Set the status (required or optional) for each attribute based on whether "is None:" exists in the setter function
|
|
696
|
+
'''
|
|
697
|
+
Here are two examples
|
|
698
|
+
|
|
699
|
+
A) A None value is not allowed and hence this is required. Notice the "if identifiers is None:" condition.
|
|
700
|
+
|
|
701
|
+
@identifiers.setter
|
|
702
|
+
def identifiers(self, identifiers):
|
|
703
|
+
"""Sets the identifiers of this InstrumentDefinition.
|
|
704
|
+
A set of identifiers that can be used to identify the instrument. At least one of these must be configured to be a unique identifier. # noqa: E501
|
|
705
|
+
:param identifiers: The identifiers of this InstrumentDefinition. # noqa: E501
|
|
706
|
+
:type: dict(str, InstrumentIdValue)
|
|
707
|
+
"""
|
|
708
|
+
if identifiers is None:
|
|
709
|
+
raise ValueError("Invalid value for `identifiers`, must not be `None`") # noqa: E501
|
|
710
|
+
|
|
711
|
+
self._identifiers = identifiers
|
|
712
|
+
|
|
713
|
+
B) A None value is allowed and hence this is optional
|
|
714
|
+
|
|
715
|
+
@look_through_portfolio_id.setter
|
|
716
|
+
def look_through_portfolio_id(self, look_through_portfolio_id):
|
|
717
|
+
"""Sets the look_through_portfolio_id of this InstrumentDefinition.
|
|
718
|
+
:param look_through_portfolio_id: The look_through_portfolio_id of this InstrumentDefinition. # noqa: E501
|
|
719
|
+
:type: ResourceId
|
|
720
|
+
"""
|
|
721
|
+
|
|
722
|
+
self._look_through_portfolio_id = look_through_portfolio_id
|
|
723
|
+
|
|
724
|
+
'''
|
|
725
|
+
return required_attributes
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def extract_lusid_model_from_attribute_type(attribute_type: str):
|
|
730
|
+
"""
|
|
731
|
+
Extracts a LUSID model from a complex attribute type e.g. dict(str, InstrumentIdValue) if it exists. If there
|
|
732
|
+
is no LUSID model the attribute type is still returned
|
|
733
|
+
|
|
734
|
+
Parameters
|
|
735
|
+
----------
|
|
736
|
+
attribute_type : str
|
|
737
|
+
The attribute type to extract the model from
|
|
738
|
+
|
|
739
|
+
Returns
|
|
740
|
+
-------
|
|
741
|
+
attribute_type : str
|
|
742
|
+
The returned attribute type with the LUSID model extracted if possible
|
|
743
|
+
nested_type : str
|
|
744
|
+
The type of nesting used e.g. List or Dict
|
|
745
|
+
"""
|
|
746
|
+
|
|
747
|
+
nested_type = None
|
|
748
|
+
# for "name='identifiers' type=Mapping[str, InstrumentIdValue] required=True"
|
|
749
|
+
# will give Mapping[str, InstrumentIdValue]
|
|
750
|
+
|
|
751
|
+
match = re.search(r"type=([A-Za-z\[\], ]+)\s", attribute_type)
|
|
752
|
+
|
|
753
|
+
if match:
|
|
754
|
+
attribute_type = match.group(1)
|
|
755
|
+
|
|
756
|
+
# If the attribute type is a mapping e.g. Mapping[str, InstrumentIdValue], extract the type
|
|
757
|
+
if "Mapping" in attribute_type:
|
|
758
|
+
attribute_type = attribute_type.split(", ")[1].rstrip("]")
|
|
759
|
+
nested_type = "Mapping"
|
|
760
|
+
|
|
761
|
+
# If the attribute type is a dictionary e.g. dict(str, InstrumentIdValue), extract the type
|
|
762
|
+
|
|
763
|
+
if "dict" in attribute_type:
|
|
764
|
+
attribute_type = attribute_type.split(", ")[1].rstrip(")")
|
|
765
|
+
nested_type = "dict"
|
|
766
|
+
# If it is a list e.g. list[ModelProperty] extract the type
|
|
767
|
+
if "list" in attribute_type:
|
|
768
|
+
attribute_type = attribute_type.split("list[")[1].rstrip("]")
|
|
769
|
+
nested_type = "list"
|
|
770
|
+
|
|
771
|
+
if "List" in attribute_type:
|
|
772
|
+
attribute_type = attribute_type.split("List[")[1].rstrip("]")
|
|
773
|
+
nested_type = "list"
|
|
774
|
+
|
|
775
|
+
optional = False
|
|
776
|
+
optionalStr = "Optional["
|
|
777
|
+
|
|
778
|
+
if attribute_type.startswith(optionalStr):
|
|
779
|
+
attribute_type = attribute_type.split(optionalStr)[1].rstrip("]")
|
|
780
|
+
optional = True
|
|
781
|
+
|
|
782
|
+
return attribute_type, nested_type, optional
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
@checkargs
|
|
786
|
+
def check_nested_model(required_attribute_type: str) -> bool:
|
|
787
|
+
|
|
788
|
+
"""
|
|
789
|
+
Takes the properties of a required attribute on a model and searches as to whether or not this attribute
|
|
790
|
+
requires a model of its own
|
|
791
|
+
|
|
792
|
+
Parameters
|
|
793
|
+
----------
|
|
794
|
+
required_attribute_type : str
|
|
795
|
+
The type of the required attribute
|
|
796
|
+
|
|
797
|
+
Returns
|
|
798
|
+
-------
|
|
799
|
+
str
|
|
800
|
+
The name of the LUSID model
|
|
801
|
+
"""
|
|
802
|
+
|
|
803
|
+
required_attribute_type, nested_type,optional = extract_lusid_model_from_attribute_type(
|
|
804
|
+
required_attribute_type
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
top_level_model = getattr(lusid.models, required_attribute_type, None)
|
|
808
|
+
|
|
809
|
+
if top_level_model is None:
|
|
810
|
+
return False
|
|
811
|
+
|
|
812
|
+
return True
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
@checkargs
|
|
816
|
+
def gen_dict_extract(key, var: dict):
|
|
817
|
+
"""
|
|
818
|
+
Searches a nested dictionary for a key, yielding any values it finds against that key
|
|
819
|
+
|
|
820
|
+
Parameters
|
|
821
|
+
----------
|
|
822
|
+
key : str
|
|
823
|
+
The key to search for
|
|
824
|
+
var : dict
|
|
825
|
+
The dictionary to search
|
|
826
|
+
|
|
827
|
+
Returns
|
|
828
|
+
-------
|
|
829
|
+
generator(result)
|
|
830
|
+
A generator with the results
|
|
831
|
+
"""
|
|
832
|
+
|
|
833
|
+
if hasattr(var, "items"):
|
|
834
|
+
for k, v in var.items():
|
|
835
|
+
if k == key:
|
|
836
|
+
yield v
|
|
837
|
+
if isinstance(v, dict):
|
|
838
|
+
for result in gen_dict_extract(key, v):
|
|
839
|
+
yield result
|
|
840
|
+
elif isinstance(v, list):
|
|
841
|
+
for d in v:
|
|
842
|
+
for result in gen_dict_extract(key, d):
|
|
843
|
+
yield result
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
@checkargs
|
|
847
|
+
def camel_case_to_pep_8(attribute_name: str) -> str:
|
|
848
|
+
"""
|
|
849
|
+
Converts a camel case name to PEP 8 standard
|
|
850
|
+
|
|
851
|
+
Parameters
|
|
852
|
+
----------
|
|
853
|
+
attribute_name : str
|
|
854
|
+
The camel case attribute name
|
|
855
|
+
|
|
856
|
+
Returns
|
|
857
|
+
-------
|
|
858
|
+
str
|
|
859
|
+
The PEP 8 formatted attribute name
|
|
860
|
+
"""
|
|
861
|
+
|
|
862
|
+
matches = re.finditer(
|
|
863
|
+
".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", attribute_name
|
|
864
|
+
)
|
|
865
|
+
return "_".join([m.group(0)[0].lower() + m.group(0)[1:] for m in matches])
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def convert_cell_value_to_string(data):
|
|
869
|
+
"""
|
|
870
|
+
Converts the value of a call to a string if it is a list or a dictionary
|
|
871
|
+
|
|
872
|
+
Parameters
|
|
873
|
+
----------
|
|
874
|
+
data
|
|
875
|
+
The value of the cell in the dataframe
|
|
876
|
+
|
|
877
|
+
Returns
|
|
878
|
+
-------
|
|
879
|
+
str
|
|
880
|
+
The original data if it is not a list or a dictionary, otherwise the string representation of these
|
|
881
|
+
|
|
882
|
+
"""
|
|
883
|
+
|
|
884
|
+
if isinstance(data, list):
|
|
885
|
+
return ", ".join(data)
|
|
886
|
+
|
|
887
|
+
elif isinstance(data, dict):
|
|
888
|
+
return str(data)
|
|
889
|
+
|
|
890
|
+
else:
|
|
891
|
+
return data
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def handle_nested_default_and_column_mapping(
|
|
895
|
+
data_frame: pd.DataFrame, mapping: dict, constant_prefix: str = "$"
|
|
896
|
+
):
|
|
897
|
+
"""
|
|
898
|
+
This function handles when a mapping is provided which contains as a value a dictionary with a column and/or default
|
|
899
|
+
key rather than just a string with the column name. It populates the DataFrame with the default value as appropriate
|
|
900
|
+
and removes the nesting so that the model can be populated later.
|
|
901
|
+
Parameters
|
|
902
|
+
----------
|
|
903
|
+
data_frame : pd.DataFrame
|
|
904
|
+
The updated dataframe
|
|
905
|
+
mapping : dict
|
|
906
|
+
The original mapping (can be required or optional)
|
|
907
|
+
constant_prefix : str
|
|
908
|
+
The prefix that can be used to specify a constant
|
|
909
|
+
Returns
|
|
910
|
+
-------
|
|
911
|
+
dataframe : pd.DataFrame
|
|
912
|
+
The updated DataFrame
|
|
913
|
+
mapping_updated : dict
|
|
914
|
+
The updated mapping
|
|
915
|
+
"""
|
|
916
|
+
|
|
917
|
+
# Copy the data frame to ensure that it is a copy and not a view (which could make changes to the original
|
|
918
|
+
# dataframe). This also fixes the SettingWithCopyWarning that pandas will throw due to the difference between copy
|
|
919
|
+
# and view.
|
|
920
|
+
data_frame = data_frame.copy()
|
|
921
|
+
|
|
922
|
+
mapping_updated = {}
|
|
923
|
+
|
|
924
|
+
for key, value in mapping.items():
|
|
925
|
+
|
|
926
|
+
# If the value of the mapping is a dictionary
|
|
927
|
+
if isinstance(value, dict):
|
|
928
|
+
|
|
929
|
+
# If the dictionary contains a column and a default, fill nulls with the default in that column
|
|
930
|
+
if ("column" in list(value.keys())) and ("default" in list(value.keys())):
|
|
931
|
+
mapping_updated[key] = value["column"]
|
|
932
|
+
data_frame[mapping_updated[key]] = data_frame[
|
|
933
|
+
mapping_updated[key]
|
|
934
|
+
].fillna(value["default"])
|
|
935
|
+
|
|
936
|
+
# If there is only a default specified, create a new column filled with the default
|
|
937
|
+
elif not ("column" in list(value.keys())) and (
|
|
938
|
+
"default" in list(value.keys())
|
|
939
|
+
):
|
|
940
|
+
mapping_updated[key] = f"LUSID.{key}"
|
|
941
|
+
data_frame[mapping_updated[key]] = value["default"]
|
|
942
|
+
|
|
943
|
+
# If there is only a column specified unnest it
|
|
944
|
+
elif ("column" in list(value.keys())) and not (
|
|
945
|
+
"default" in list(value.keys())
|
|
946
|
+
):
|
|
947
|
+
mapping_updated[key] = value["column"]
|
|
948
|
+
|
|
949
|
+
else:
|
|
950
|
+
raise KeyError(
|
|
951
|
+
f"""You have passed in a dictionary as the value for the mapping for {key}, however
|
|
952
|
+
it does not contain a key for "column" or "default". Please provide a key, value
|
|
953
|
+
pair for one or both of these keys with the column being the column name and default
|
|
954
|
+
value being the default value to use. Alternatively just provide a string which
|
|
955
|
+
is the column name to use."""
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
elif isinstance(value, str):
|
|
959
|
+
|
|
960
|
+
if len(value) == 0:
|
|
961
|
+
raise IndexError(
|
|
962
|
+
f"Unspecified mapping field: {key}. Please assign a value or remove this from the "
|
|
963
|
+
f"mapping"
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
if value[0] != constant_prefix:
|
|
967
|
+
mapping_updated[key] = value
|
|
968
|
+
else:
|
|
969
|
+
mapping_updated[key] = f"LUSID.{key}"
|
|
970
|
+
data_frame[mapping_updated[key]] = value[1:]
|
|
971
|
+
|
|
972
|
+
elif isinstance(value, int):
|
|
973
|
+
mapping_updated[key] = f"LUSID.{key}"
|
|
974
|
+
data_frame[mapping_updated[key]] = value
|
|
975
|
+
|
|
976
|
+
else:
|
|
977
|
+
raise ValueError(
|
|
978
|
+
f"""You have passed in a value with type {type(value)} for the mapping for {key}, this is
|
|
979
|
+
not a supported type. Please provide a string with the column name to use, a constant
|
|
980
|
+
value prefixed by {constant_prefix},an integer value or a dictionary
|
|
981
|
+
with the keys "column" and "default" where column is the column name and default
|
|
982
|
+
being the default value to use."""
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
return data_frame, mapping_updated
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
def load_json_file(file_path: str) -> dict:
|
|
989
|
+
"""
|
|
990
|
+
|
|
991
|
+
Parameters
|
|
992
|
+
----------
|
|
993
|
+
file_path : str
|
|
994
|
+
relative_file_path
|
|
995
|
+
|
|
996
|
+
Returns
|
|
997
|
+
-------
|
|
998
|
+
data : dict
|
|
999
|
+
parsed data from json file
|
|
1000
|
+
"""
|
|
1001
|
+
|
|
1002
|
+
if not os.path.isabs(file_path):
|
|
1003
|
+
file_path = Path(__file__).parent.joinpath(file_path)
|
|
1004
|
+
if not os.path.exists(file_path):
|
|
1005
|
+
raise OSError(f"Json file not found at {file_path}")
|
|
1006
|
+
with open(file_path) as json_file:
|
|
1007
|
+
data = json.load(json_file)
|
|
1008
|
+
return data
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
@checkargs
|
|
1012
|
+
def load_data_to_df_and_detect_delimiter(args: dict) -> pd.DataFrame:
|
|
1013
|
+
"""
|
|
1014
|
+
This function loads data from given file path and converts it into a pandas DataFrame
|
|
1015
|
+
|
|
1016
|
+
Parameters
|
|
1017
|
+
----------
|
|
1018
|
+
args : dict
|
|
1019
|
+
Arguments parsed in from command line, containing args["file_path"]
|
|
1020
|
+
|
|
1021
|
+
Returns
|
|
1022
|
+
-------
|
|
1023
|
+
pd.DataFrame : pd.dataframe
|
|
1024
|
+
DataFrame containing data
|
|
1025
|
+
"""
|
|
1026
|
+
if not os.path.exists(args["file_path"]):
|
|
1027
|
+
raise OSError(f"file path {args['file_path']} does not exist")
|
|
1028
|
+
|
|
1029
|
+
with open(args["file_path"], "r") as read_file:
|
|
1030
|
+
logging.info(f"loading data from {args['file_path']}")
|
|
1031
|
+
data = csv.reader(read_file, lineterminator=args["line_terminator"])
|
|
1032
|
+
|
|
1033
|
+
# iterate over data in unrelated lines to get to the first line of data that we are interested in
|
|
1034
|
+
for pre_amble in range(args["num_header"]):
|
|
1035
|
+
read_file.readline()
|
|
1036
|
+
|
|
1037
|
+
# now that we are at the first line of data, get the header row, that will contain the formatting we are
|
|
1038
|
+
# interested in
|
|
1039
|
+
header_line = read_file.readline()
|
|
1040
|
+
|
|
1041
|
+
if not args["delimiter"]:
|
|
1042
|
+
args["delimiter"] = get_delimiter(header_line)
|
|
1043
|
+
|
|
1044
|
+
if args["delimiter"] == header_line:
|
|
1045
|
+
err = (
|
|
1046
|
+
f"Unable to detect delimiter from first line of data at line at line number: {args['num_header']}: "
|
|
1047
|
+
f"\n\t>> "
|
|
1048
|
+
f"{header_line}"
|
|
1049
|
+
)
|
|
1050
|
+
raise ValueError(err)
|
|
1051
|
+
|
|
1052
|
+
with open(args["file_path"], "r") as read_file:
|
|
1053
|
+
# read data from lines specified at command line by num_header and num_footer
|
|
1054
|
+
return pd.read_csv(
|
|
1055
|
+
args["file_path"],
|
|
1056
|
+
delimiter=args["delimiter"],
|
|
1057
|
+
header=args["num_header"],
|
|
1058
|
+
skipfooter=args["num_footer"],
|
|
1059
|
+
engine="python",
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def get_delimiter(sample_string: str):
|
|
1064
|
+
return detect(sample_string).replace("\\", "\\\\")
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
def check_mapping_fields_exist(
|
|
1068
|
+
required_list: list, search_list: list, file_type: str
|
|
1069
|
+
) -> list:
|
|
1070
|
+
"""
|
|
1071
|
+
This function checks that items in one list exist in another list
|
|
1072
|
+
|
|
1073
|
+
Parameters
|
|
1074
|
+
----------
|
|
1075
|
+
required_list : list[str]
|
|
1076
|
+
list of items to search for
|
|
1077
|
+
search_list : list[str]
|
|
1078
|
+
list to search in
|
|
1079
|
+
file_type : list[str]
|
|
1080
|
+
the file type of the data eg.instruments, holdings, transactions
|
|
1081
|
+
|
|
1082
|
+
Returns
|
|
1083
|
+
-------
|
|
1084
|
+
missing_fields : list[str]
|
|
1085
|
+
list of items in required_list missing from search_list
|
|
1086
|
+
"""
|
|
1087
|
+
|
|
1088
|
+
missing_fields = [
|
|
1089
|
+
item
|
|
1090
|
+
for item in required_list
|
|
1091
|
+
if item not in search_list and (len(item) > 0 and item[0] != "$")
|
|
1092
|
+
]
|
|
1093
|
+
if missing_fields:
|
|
1094
|
+
raise ValueError(
|
|
1095
|
+
f"{file_type} fields not found in data columns: {missing_fields}"
|
|
1096
|
+
)
|
|
1097
|
+
return missing_fields
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
def parse_args(args: dict):
|
|
1101
|
+
"""
|
|
1102
|
+
Argument parser for command line apps
|
|
1103
|
+
|
|
1104
|
+
Parameters
|
|
1105
|
+
----------
|
|
1106
|
+
args : dict
|
|
1107
|
+
|
|
1108
|
+
Returns
|
|
1109
|
+
-------
|
|
1110
|
+
vars(ap.parse_args(args=args))
|
|
1111
|
+
parsed arguments
|
|
1112
|
+
"""
|
|
1113
|
+
ap = argparse.ArgumentParser()
|
|
1114
|
+
ap.add_argument(
|
|
1115
|
+
"-f",
|
|
1116
|
+
"--file_path",
|
|
1117
|
+
required=True,
|
|
1118
|
+
help=r"full path for data (eg. c:\Users\Joe\data\instruments1.csv)",
|
|
1119
|
+
)
|
|
1120
|
+
ap.add_argument(
|
|
1121
|
+
"-c",
|
|
1122
|
+
"--secrets_file",
|
|
1123
|
+
help=r"full path for credential secrets (eg. c:\Users\Joe\secrets.json). Not required if set as "
|
|
1124
|
+
r"environment variables",
|
|
1125
|
+
)
|
|
1126
|
+
ap.add_argument(
|
|
1127
|
+
"-m",
|
|
1128
|
+
"--mapping",
|
|
1129
|
+
required=True,
|
|
1130
|
+
help=r"full path to mappings.json (see mappings_template.json)",
|
|
1131
|
+
)
|
|
1132
|
+
ap.add_argument("-s", "--scope", help=r"LUSID scope to act in")
|
|
1133
|
+
ap.add_argument(
|
|
1134
|
+
"-ps", "--property_scope", help=r"LUSID scope to load properties into"
|
|
1135
|
+
)
|
|
1136
|
+
ap.add_argument(
|
|
1137
|
+
"-dl",
|
|
1138
|
+
"--delimiter",
|
|
1139
|
+
help=r"explicitly specify delimiter for data file and disable automatic delimiter detection",
|
|
1140
|
+
)
|
|
1141
|
+
ap.add_argument(
|
|
1142
|
+
"-nh",
|
|
1143
|
+
"--num_header",
|
|
1144
|
+
type=int,
|
|
1145
|
+
default=0,
|
|
1146
|
+
help="number of header lines before column titles",
|
|
1147
|
+
)
|
|
1148
|
+
ap.add_argument(
|
|
1149
|
+
"-nf",
|
|
1150
|
+
"--num_footer",
|
|
1151
|
+
type=int,
|
|
1152
|
+
default=0,
|
|
1153
|
+
help="number of footer lines after end of data",
|
|
1154
|
+
)
|
|
1155
|
+
ap.add_argument(
|
|
1156
|
+
"-lt",
|
|
1157
|
+
"--line_terminator",
|
|
1158
|
+
default=r"\n",
|
|
1159
|
+
help="character that specifies the end of a line, default value is {}".format(
|
|
1160
|
+
r"\n"
|
|
1161
|
+
),
|
|
1162
|
+
)
|
|
1163
|
+
ap.add_argument(
|
|
1164
|
+
"-b",
|
|
1165
|
+
"--batch_size",
|
|
1166
|
+
default=2000,
|
|
1167
|
+
type=int,
|
|
1168
|
+
help="specifies the batch size for async requests",
|
|
1169
|
+
)
|
|
1170
|
+
ap.add_argument(
|
|
1171
|
+
"-disp",
|
|
1172
|
+
"--display_response_head",
|
|
1173
|
+
help="Displays the first 40 successful and unsuccessful items",
|
|
1174
|
+
action="store_true",
|
|
1175
|
+
)
|
|
1176
|
+
ap.add_argument(
|
|
1177
|
+
"-dr",
|
|
1178
|
+
"--dryrun",
|
|
1179
|
+
help="runs the app without calling LUSID",
|
|
1180
|
+
action="store_true",
|
|
1181
|
+
)
|
|
1182
|
+
ap.add_argument(
|
|
1183
|
+
"-d", "--debug", help=r"print debug messages, expected input: 'debug'"
|
|
1184
|
+
)
|
|
1185
|
+
|
|
1186
|
+
ap.add_argument(
|
|
1187
|
+
"-l",
|
|
1188
|
+
"--logging_file",
|
|
1189
|
+
required=False,
|
|
1190
|
+
help=r"full path for logging file (eg. c:\Users\Joe\data\debug.log )",
|
|
1191
|
+
)
|
|
1192
|
+
|
|
1193
|
+
return vars(ap.parse_args(args=args)), ap
|
|
1194
|
+
|
|
1195
|
+
|
|
1196
|
+
def scale_quote_of_type(
|
|
1197
|
+
df: pd.DataFrame, mapping: dict, file_type: str = "quotes"
|
|
1198
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
1199
|
+
"""
|
|
1200
|
+
Scales quote values of quotes of a specified type
|
|
1201
|
+
|
|
1202
|
+
This function appends an extra row (__adjusted_quote) to a dataframe that contains quotes that have been scaled by
|
|
1203
|
+
a scale factor specified in the mapping, if they can be identified using another field. An example usage of this
|
|
1204
|
+
is processing of a quotes file containing a mixture of equities prices as GBP and GBp.
|
|
1205
|
+
|
|
1206
|
+
Parameters
|
|
1207
|
+
----------
|
|
1208
|
+
df : pd.DataFrame
|
|
1209
|
+
DataFrame containing quotes,
|
|
1210
|
+
mapping : dict
|
|
1211
|
+
mapping containing containing mapping[file_type]["quote_scalar"]
|
|
1212
|
+
file_type : str
|
|
1213
|
+
File type of data default = "quotes"
|
|
1214
|
+
|
|
1215
|
+
Returns
|
|
1216
|
+
-------
|
|
1217
|
+
df : pd.DataFrame
|
|
1218
|
+
dataframe containing "__adjusted_quotes" column
|
|
1219
|
+
mapping : dict
|
|
1220
|
+
mapping updated with "metric_value.value" updated to be "__adjusted_quotes"
|
|
1221
|
+
"""
|
|
1222
|
+
|
|
1223
|
+
price_col = mapping[file_type]["quote_scalar"]["price"]
|
|
1224
|
+
type_col = mapping[file_type]["quote_scalar"]["type"]
|
|
1225
|
+
type_code = mapping[file_type]["quote_scalar"]["type_code"]
|
|
1226
|
+
scale_factor = mapping[file_type]["quote_scalar"]["scale_factor"]
|
|
1227
|
+
|
|
1228
|
+
for col in [price_col, type_col]:
|
|
1229
|
+
if col not in df.columns:
|
|
1230
|
+
logging.error(f"column {col} does not exist in quotes DataFrame.")
|
|
1231
|
+
raise KeyError(f"column {col} does not exist in quotes DataFrame.")
|
|
1232
|
+
|
|
1233
|
+
df["__adjusted_quote"] = None
|
|
1234
|
+
|
|
1235
|
+
for index, row in df.iterrows():
|
|
1236
|
+
if np.isnan(row[price_col]) and row[type_col] == type_code:
|
|
1237
|
+
logging.warning(
|
|
1238
|
+
f"Could not adjust price at row {index} because it contains no price value"
|
|
1239
|
+
)
|
|
1240
|
+
continue
|
|
1241
|
+
elif np.isnan(row[price_col]):
|
|
1242
|
+
continue
|
|
1243
|
+
|
|
1244
|
+
__adjusted_quote = (
|
|
1245
|
+
row[price_col] * scale_factor
|
|
1246
|
+
if row[type_col] == type_code
|
|
1247
|
+
else row[price_col]
|
|
1248
|
+
)
|
|
1249
|
+
|
|
1250
|
+
df.at[index, "__adjusted_quote"] = __adjusted_quote
|
|
1251
|
+
mapping[file_type]["required"]["metric_value.value"] = "__adjusted_quote"
|
|
1252
|
+
return df, mapping
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
def identify_cash_items(
|
|
1256
|
+
dataframe, mappings, file_type: str, remove_cash_items: bool = False
|
|
1257
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
1258
|
+
"""
|
|
1259
|
+
This function identifies cash items in a dataframe and either creates a currency_identifier in a new
|
|
1260
|
+
currency_identifier_for_LUSID column and amends the mapping dictionary accordingly or deletes cash items from the
|
|
1261
|
+
dataframe.
|
|
1262
|
+
|
|
1263
|
+
Parameters
|
|
1264
|
+
----------
|
|
1265
|
+
dataframe : pd.DataFrame
|
|
1266
|
+
The dataframe to look for cash items in
|
|
1267
|
+
mappings : dict
|
|
1268
|
+
Full mapping structure
|
|
1269
|
+
file_type : str
|
|
1270
|
+
type of data in dataframe eg. "instruments", "quotes", "transactions", "portfolios"
|
|
1271
|
+
remove_cash_items: bool
|
|
1272
|
+
indication to remove cash items from dataframe
|
|
1273
|
+
|
|
1274
|
+
Returns
|
|
1275
|
+
-------
|
|
1276
|
+
dataframe : pd.DataFrame
|
|
1277
|
+
dataframe containing scaled quotes
|
|
1278
|
+
mappings : dict
|
|
1279
|
+
mapping with currency identifier mapping included
|
|
1280
|
+
"""
|
|
1281
|
+
|
|
1282
|
+
cash_flag_specification = mappings["cash_flag"]
|
|
1283
|
+
if not remove_cash_items:
|
|
1284
|
+
dataframe["__currency_identifier_for_LUSID"] = None
|
|
1285
|
+
mappings[file_type]["identifier_mapping"][
|
|
1286
|
+
"Currency"
|
|
1287
|
+
] = "__currency_identifier_for_LUSID"
|
|
1288
|
+
|
|
1289
|
+
rm_index = []
|
|
1290
|
+
for index, row in dataframe.iterrows():
|
|
1291
|
+
for column in cash_flag_specification["cash_identifiers"].keys():
|
|
1292
|
+
if row[column] in cash_flag_specification["cash_identifiers"][column]:
|
|
1293
|
+
if remove_cash_items:
|
|
1294
|
+
rm_index.append(index)
|
|
1295
|
+
else:
|
|
1296
|
+
dataframe.at[
|
|
1297
|
+
index, "__currency_identifier_for_LUSID"
|
|
1298
|
+
] = populate_currency_identifier_for_LUSID(
|
|
1299
|
+
row, column, cash_flag_specification
|
|
1300
|
+
)
|
|
1301
|
+
break
|
|
1302
|
+
if remove_cash_items:
|
|
1303
|
+
dataframe = dataframe.drop(rm_index)
|
|
1304
|
+
|
|
1305
|
+
return dataframe, mappings
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def populate_currency_identifier_for_LUSID(
|
|
1309
|
+
row: dict, column, cash_flag_specification: dict
|
|
1310
|
+
) -> str:
|
|
1311
|
+
"""
|
|
1312
|
+
This function takes a cash transaction or holding in the form of a row from a dataframe and returns it's currency
|
|
1313
|
+
code, given the data's column containing a cash identifier and a dictionary that specifies how to set the currency
|
|
1314
|
+
code.
|
|
1315
|
+
|
|
1316
|
+
Parameters
|
|
1317
|
+
----------
|
|
1318
|
+
row : dict
|
|
1319
|
+
current data row
|
|
1320
|
+
column : str
|
|
1321
|
+
current dataframe column that contains values that can be used to identify a cash transaction or
|
|
1322
|
+
holding
|
|
1323
|
+
cash_flag_specification : dict
|
|
1324
|
+
dictionary containing cash identifier columns and values with either explicit currancy codes or the column from
|
|
1325
|
+
which the currency code can be infered
|
|
1326
|
+
|
|
1327
|
+
Returns
|
|
1328
|
+
-------
|
|
1329
|
+
currency_code : str
|
|
1330
|
+
The currency code for the current transaction or holding
|
|
1331
|
+
"""
|
|
1332
|
+
|
|
1333
|
+
if isinstance(cash_flag_specification["cash_identifiers"][column], dict):
|
|
1334
|
+
if row[column] in cash_flag_specification["cash_identifiers"][column]:
|
|
1335
|
+
logging.debug("Getting currency code from explicit definition in mapping")
|
|
1336
|
+
currency_code = cash_flag_specification["cash_identifiers"][column][
|
|
1337
|
+
row[column]
|
|
1338
|
+
]
|
|
1339
|
+
if not currency_code and "implicit" in cash_flag_specification.keys():
|
|
1340
|
+
logging.debug("couldn't find currency code in explicit definition. ")
|
|
1341
|
+
currency_code = row[cash_flag_specification["implicit"]]
|
|
1342
|
+
else:
|
|
1343
|
+
ex = (
|
|
1344
|
+
f"failed to find currency code either explicitly in cash_flag or implicitly from currency column "
|
|
1345
|
+
f"specified in cash_flag for {row}"
|
|
1346
|
+
)
|
|
1347
|
+
logging.error(ex)
|
|
1348
|
+
raise ValueError(ex)
|
|
1349
|
+
|
|
1350
|
+
elif isinstance(cash_flag_specification["cash_identifiers"][column], list):
|
|
1351
|
+
if "implicit" in cash_flag_specification.keys():
|
|
1352
|
+
logging.info(
|
|
1353
|
+
"No currency codes explicitly specified, attempting to get implicitly from currency code "
|
|
1354
|
+
"column"
|
|
1355
|
+
)
|
|
1356
|
+
currency_code = row[cash_flag_specification["implicit"]]
|
|
1357
|
+
else:
|
|
1358
|
+
err = (
|
|
1359
|
+
f"No cash identifiers were specified as a list, without any explicit currency codes and no 'implicit'"
|
|
1360
|
+
f" field containing the name of a column containing currency codes exists. Please reformat cash_flag "
|
|
1361
|
+
f"inside mapping file correctly"
|
|
1362
|
+
)
|
|
1363
|
+
raise ValueError(err)
|
|
1364
|
+
else:
|
|
1365
|
+
logging.error(
|
|
1366
|
+
f"cash_flag not configured correctly. 'cash_identifiers' must be dictionary (explicit) or list "
|
|
1367
|
+
f"(for implicit), but got {type(cash_flag_specification['cash_identifiers'])}"
|
|
1368
|
+
)
|
|
1369
|
+
raise ValueError(
|
|
1370
|
+
f"cash_flag not configured correctly. 'cash_identifiers' must be dictionary (explicit) or "
|
|
1371
|
+
f"list (for implicit), but got {type(cash_flag_specification['cash_identifiers'])}"
|
|
1372
|
+
)
|
|
1373
|
+
return currency_code
|
|
1374
|
+
|
|
1375
|
+
|
|
1376
|
+
def validate_mapping_file_structure(mapping: dict, columns: list, file_type: str):
|
|
1377
|
+
"""
|
|
1378
|
+
This function takes a mapping structure and checks that each of the required fields is present
|
|
1379
|
+
|
|
1380
|
+
Parameters
|
|
1381
|
+
----------
|
|
1382
|
+
mapping : dict
|
|
1383
|
+
mapping containing full mapping structure
|
|
1384
|
+
columns : list
|
|
1385
|
+
columns from source data to search in
|
|
1386
|
+
file_type : str
|
|
1387
|
+
type of file being upserted eg. "instruments", "holdings", etc.
|
|
1388
|
+
|
|
1389
|
+
Returns
|
|
1390
|
+
-------
|
|
1391
|
+
None
|
|
1392
|
+
|
|
1393
|
+
"""
|
|
1394
|
+
|
|
1395
|
+
# file_type
|
|
1396
|
+
domain_lookup = load_json_file("config/domain_settings.json")
|
|
1397
|
+
file_type_check = (
|
|
1398
|
+
Validator(file_type, "file_type")
|
|
1399
|
+
.make_singular()
|
|
1400
|
+
.make_lower()
|
|
1401
|
+
.check_allowed_value(list(domain_lookup.keys()))
|
|
1402
|
+
.value
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
# required
|
|
1406
|
+
if "required" in mapping[file_type].keys():
|
|
1407
|
+
for field in mapping[file_type]["required"]:
|
|
1408
|
+
if isinstance(mapping[file_type]["required"][field], dict):
|
|
1409
|
+
check_mapping_fields_exist(
|
|
1410
|
+
mapping[file_type]["required"][field]["column"].values(),
|
|
1411
|
+
columns,
|
|
1412
|
+
"required",
|
|
1413
|
+
)
|
|
1414
|
+
else:
|
|
1415
|
+
check_mapping_fields_exist(
|
|
1416
|
+
mapping[file_type]["required"].values(), columns, "required"
|
|
1417
|
+
)
|
|
1418
|
+
else:
|
|
1419
|
+
raise ValueError(f"'required' mapping field not provided in mapping")
|
|
1420
|
+
|
|
1421
|
+
# optional
|
|
1422
|
+
if "optional" in mapping.keys():
|
|
1423
|
+
check_mapping_fields_exist(
|
|
1424
|
+
mapping[file_type]["optional"].values(), columns, "optional"
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
# identifier_mapping
|
|
1428
|
+
if "identifier_mapping" in mapping[file_type].keys():
|
|
1429
|
+
check_mapping_fields_exist(
|
|
1430
|
+
mapping[file_type]["identifier_mapping"].values(),
|
|
1431
|
+
columns,
|
|
1432
|
+
"identifier_mapping",
|
|
1433
|
+
)
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
def strip_whitespace(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
|
1437
|
+
"""
|
|
1438
|
+
This function removes prefixed or postfixed white space from string values in a Pandas DataFrame
|
|
1439
|
+
|
|
1440
|
+
Parameters
|
|
1441
|
+
----------
|
|
1442
|
+
df : pd.DataFrame
|
|
1443
|
+
Dataframe containing data to remove whitespace from
|
|
1444
|
+
columns : list[dict{dict}]
|
|
1445
|
+
list of nested dictionaries of any depth
|
|
1446
|
+
|
|
1447
|
+
Returns
|
|
1448
|
+
-------
|
|
1449
|
+
stripped_df : pd.DataFrame
|
|
1450
|
+
DataFrame with whitespace removed
|
|
1451
|
+
"""
|
|
1452
|
+
|
|
1453
|
+
stripped_df = pd.DataFrame.copy(df)
|
|
1454
|
+
|
|
1455
|
+
for col in columns:
|
|
1456
|
+
stripped_df[col] = stripped_df[col].apply(
|
|
1457
|
+
lambda x: x.strip() if isinstance(x, str) else x
|
|
1458
|
+
)
|
|
1459
|
+
|
|
1460
|
+
return stripped_df
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
def generate_time_based_unique_id(time_generator: None):
|
|
1464
|
+
"""
|
|
1465
|
+
Generates a unique ID based on the time since epoch.
|
|
1466
|
+
|
|
1467
|
+
Parameters
|
|
1468
|
+
----------
|
|
1469
|
+
time_generator
|
|
1470
|
+
Any class that has a .time() method on it which produces time since 1970 in seconds
|
|
1471
|
+
|
|
1472
|
+
Returns
|
|
1473
|
+
-------
|
|
1474
|
+
uid : str
|
|
1475
|
+
Unique, time based ID
|
|
1476
|
+
|
|
1477
|
+
"""
|
|
1478
|
+
|
|
1479
|
+
if time_generator is None or isinstance(time_generator, types.ModuleType):
|
|
1480
|
+
time_generator = default_time
|
|
1481
|
+
|
|
1482
|
+
elif getattr(time_generator, "time", None) is None or not isinstance(
|
|
1483
|
+
getattr(time_generator, "time"), types.MethodType
|
|
1484
|
+
):
|
|
1485
|
+
raise AttributeError(
|
|
1486
|
+
"The provided time_generator does not have a method called time"
|
|
1487
|
+
)
|
|
1488
|
+
|
|
1489
|
+
# Get the current time since epoch
|
|
1490
|
+
current_time = time_generator.time()
|
|
1491
|
+
|
|
1492
|
+
if not isinstance(current_time, int) and not isinstance(current_time, float):
|
|
1493
|
+
raise ValueError(
|
|
1494
|
+
f"The provided response time time_generator.time() is not an int it is a {type(current_time)}"
|
|
1495
|
+
)
|
|
1496
|
+
|
|
1497
|
+
# Multiply by 7 to get value to 100s of nano seconds
|
|
1498
|
+
timestamp = hex(int(current_time * 10000000.0))
|
|
1499
|
+
# Create the scope id by joining the hex representation with dashes every 4 characters
|
|
1500
|
+
uid = "-".join(timestamp[i : i + 4] for i in range(2, len(timestamp), 4))
|
|
1501
|
+
return uid
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
def generate_uuid():
|
|
1505
|
+
return str(uuid.uuid4())
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
def create_scope_id(time_generator=None, use_uuid=False):
|
|
1509
|
+
"""
|
|
1510
|
+
This function creates a unique ID based on the time since epoch for use
|
|
1511
|
+
as a scope id.
|
|
1512
|
+
|
|
1513
|
+
Parameters
|
|
1514
|
+
----------
|
|
1515
|
+
time_generator
|
|
1516
|
+
Any class that has a .time() method on it which produces time since 1970 in seconds
|
|
1517
|
+
|
|
1518
|
+
Returns
|
|
1519
|
+
-------
|
|
1520
|
+
scope_id : str
|
|
1521
|
+
Scope identifier
|
|
1522
|
+
"""
|
|
1523
|
+
if use_uuid:
|
|
1524
|
+
return generate_uuid()
|
|
1525
|
+
else:
|
|
1526
|
+
return generate_time_based_unique_id(time_generator)
|
|
1527
|
+
|
|
1528
|
+
|
|
1529
|
+
def default_fx_forward_model(
|
|
1530
|
+
df: pd.DataFrame,
|
|
1531
|
+
fx_code: str,
|
|
1532
|
+
func_transaction_units: typing.Callable[[], bool],
|
|
1533
|
+
func_total_consideration: typing.Callable[[], bool],
|
|
1534
|
+
mapping: dict,
|
|
1535
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
1536
|
+
"""
|
|
1537
|
+
Function that takes 2 rows representing a single forward and merge them into a single transaction
|
|
1538
|
+
|
|
1539
|
+
Parameters
|
|
1540
|
+
----------
|
|
1541
|
+
df : pd.DataFrame
|
|
1542
|
+
DataFrame containing transactions data
|
|
1543
|
+
fx_code : str
|
|
1544
|
+
The transaction type that identifies a forward
|
|
1545
|
+
func_transaction_units : typing.Callable[[], bool]
|
|
1546
|
+
function that evaluates to true for where the dataframe row contains transaction units
|
|
1547
|
+
func_total_consideration : typing.Callable[[], bool]
|
|
1548
|
+
function that evaluates to true for where the dataframe row contains total consideration
|
|
1549
|
+
mapping : dict
|
|
1550
|
+
mapping for FX transactions
|
|
1551
|
+
|
|
1552
|
+
Returns
|
|
1553
|
+
-------
|
|
1554
|
+
fwds_txn_df : pd.DataFrame
|
|
1555
|
+
DataFrame containing FX transactions merged into a single row
|
|
1556
|
+
mapping_cash_txn : dict
|
|
1557
|
+
updates mapping dictionary for fwds_txn_df
|
|
1558
|
+
"""
|
|
1559
|
+
|
|
1560
|
+
logging.info(
|
|
1561
|
+
f"combining transactions of type {fx_code} into a single line using {default_fx_forward_model.__name__}"
|
|
1562
|
+
f" utility function"
|
|
1563
|
+
)
|
|
1564
|
+
|
|
1565
|
+
t_type = mapping["transactions"]["required"]["type"]
|
|
1566
|
+
|
|
1567
|
+
if fx_code not in df[t_type].values:
|
|
1568
|
+
raise ValueError(
|
|
1569
|
+
f"Input transactions have no fx transaction types {fx_code} in column transaction type{t_type}"
|
|
1570
|
+
)
|
|
1571
|
+
|
|
1572
|
+
fwds_df = pd.DataFrame(df[df[t_type] == fx_code])
|
|
1573
|
+
|
|
1574
|
+
transaction_units_df = fwds_df[func_transaction_units]
|
|
1575
|
+
total_consideration_df = fwds_df[func_total_consideration]
|
|
1576
|
+
|
|
1577
|
+
t_id = mapping["transactions"]["required"]["transaction_id"]
|
|
1578
|
+
|
|
1579
|
+
transaction_units_suffix = "_txn"
|
|
1580
|
+
total_consideration_suffix = "_tc"
|
|
1581
|
+
logging.info(
|
|
1582
|
+
f"merging buy and sell legs of FX trades and suffixing with {[transaction_units_suffix, total_consideration_suffix]}"
|
|
1583
|
+
)
|
|
1584
|
+
fwds_txn_df = pd.merge(
|
|
1585
|
+
transaction_units_df,
|
|
1586
|
+
total_consideration_df,
|
|
1587
|
+
how="outer",
|
|
1588
|
+
on=[t_id, t_type],
|
|
1589
|
+
suffixes=[transaction_units_suffix, total_consideration_suffix],
|
|
1590
|
+
)
|
|
1591
|
+
|
|
1592
|
+
mapping_cash_txn = remap_after_merge(
|
|
1593
|
+
mapping,
|
|
1594
|
+
transaction_units_suffix=transaction_units_suffix,
|
|
1595
|
+
total_consideration_suffix=total_consideration_suffix,
|
|
1596
|
+
)
|
|
1597
|
+
|
|
1598
|
+
return fwds_txn_df, mapping_cash_txn
|
|
1599
|
+
|
|
1600
|
+
|
|
1601
|
+
def remap_after_merge(
|
|
1602
|
+
mapping: dict, transaction_units_suffix: str, total_consideration_suffix: str
|
|
1603
|
+
) -> dict:
|
|
1604
|
+
"""
|
|
1605
|
+
Remaps buy and sell fields in a mapping dictionary to the suffixed column names after a dataframe merge
|
|
1606
|
+
|
|
1607
|
+
Parameters
|
|
1608
|
+
----------
|
|
1609
|
+
mapping : dict
|
|
1610
|
+
mapping dictionary that needs updating
|
|
1611
|
+
transaction_units_suffix : str
|
|
1612
|
+
Suffix appended to transaction units transaction fields (e.g. "_txn")
|
|
1613
|
+
total_consideration_suffix : str
|
|
1614
|
+
Suffix appended to total consideration transaction fields(e.g. "_tc")
|
|
1615
|
+
|
|
1616
|
+
Returns
|
|
1617
|
+
-------
|
|
1618
|
+
mapping : dict
|
|
1619
|
+
updated mapping dictionary
|
|
1620
|
+
"""
|
|
1621
|
+
new_mapping = copy.deepcopy(mapping)
|
|
1622
|
+
file_type = "transactions"
|
|
1623
|
+
logging.info(f"updating mapping to new Total Consideration and transaction fields ")
|
|
1624
|
+
# currencies and amounts coming into the portfolio i.e. buy
|
|
1625
|
+
|
|
1626
|
+
total_consideration_fields = [
|
|
1627
|
+
"total_consideration.amount",
|
|
1628
|
+
"total_consideration.currency",
|
|
1629
|
+
"settlement_currency",
|
|
1630
|
+
]
|
|
1631
|
+
|
|
1632
|
+
# currencies and amounts leaving the portfolio i.e. sell
|
|
1633
|
+
|
|
1634
|
+
transaction_units_fields = ["units", "transaction_currency"]
|
|
1635
|
+
|
|
1636
|
+
for key in new_mapping[file_type]["required"].keys():
|
|
1637
|
+
if key in transaction_units_fields:
|
|
1638
|
+
update_dict_value(
|
|
1639
|
+
new_mapping,
|
|
1640
|
+
key,
|
|
1641
|
+
new_mapping[file_type]["required"][key] + transaction_units_suffix,
|
|
1642
|
+
[file_type],
|
|
1643
|
+
)
|
|
1644
|
+
elif key in total_consideration_fields:
|
|
1645
|
+
update_dict_value(
|
|
1646
|
+
new_mapping,
|
|
1647
|
+
key,
|
|
1648
|
+
new_mapping[file_type]["required"][key] + total_consideration_suffix,
|
|
1649
|
+
[file_type],
|
|
1650
|
+
)
|
|
1651
|
+
return new_mapping
|
|
1652
|
+
|
|
1653
|
+
|
|
1654
|
+
def update_dict_value(
|
|
1655
|
+
d: dict, s_key: str, val: typing.Union[str, float], top_level_values_to_search=[]
|
|
1656
|
+
):
|
|
1657
|
+
"""
|
|
1658
|
+
Recursively searches a dictionary for a key and updates the value
|
|
1659
|
+
|
|
1660
|
+
This function searches for a key in a dictionary and updates the value belonging to any matching keys. The top level
|
|
1661
|
+
values in which to search can be specified as
|
|
1662
|
+
|
|
1663
|
+
Parameters
|
|
1664
|
+
----------
|
|
1665
|
+
d : dict
|
|
1666
|
+
Dictionary to update
|
|
1667
|
+
s_key : str
|
|
1668
|
+
Key to search for that belongs to the value to be updated
|
|
1669
|
+
val : typing.Union[str, float]
|
|
1670
|
+
Updated value belonging to search key
|
|
1671
|
+
file_type : str
|
|
1672
|
+
(optional) specific file_type in mapping to update. If not specified, all matches are replaced
|
|
1673
|
+
|
|
1674
|
+
Returns
|
|
1675
|
+
-------
|
|
1676
|
+
d : dict
|
|
1677
|
+
updated dictionary
|
|
1678
|
+
|
|
1679
|
+
"""
|
|
1680
|
+
# if a file type had been specified, only search that values belonging to that key
|
|
1681
|
+
if top_level_values_to_search:
|
|
1682
|
+
for f_type in top_level_values_to_search:
|
|
1683
|
+
if f_type in d.keys():
|
|
1684
|
+
d[f_type] = update_dict_value(d.get(f_type, {}), s_key, val)
|
|
1685
|
+
else:
|
|
1686
|
+
err = (
|
|
1687
|
+
f"file_type {top_level_values_to_search} not found in top level of mapping. If passing full mapping structure,"
|
|
1688
|
+
f"ensure file type had been corrctly specified. If passing in a partial mapping structure,"
|
|
1689
|
+
f"remove this parameter."
|
|
1690
|
+
)
|
|
1691
|
+
logging.error(err)
|
|
1692
|
+
raise KeyError(err)
|
|
1693
|
+
|
|
1694
|
+
for k, v in d.items():
|
|
1695
|
+
# if no type specified and search key in keys
|
|
1696
|
+
if s_key in k:
|
|
1697
|
+
d[k] = update_value(d[k], val)
|
|
1698
|
+
# if no search key found, make recursive call for each key
|
|
1699
|
+
elif isinstance(v, dict) and not top_level_values_to_search:
|
|
1700
|
+
d[k] = update_dict_value(d.get(k, {}), s_key, val)
|
|
1701
|
+
return d
|
|
1702
|
+
|
|
1703
|
+
|
|
1704
|
+
def update_value(d: typing.Union[dict, str], val: typing.Union[str, float]):
|
|
1705
|
+
"""
|
|
1706
|
+
Updates value in dictionary and handles default and constant ($) specification
|
|
1707
|
+
|
|
1708
|
+
Parameters
|
|
1709
|
+
----------
|
|
1710
|
+
d : typing.Union[dict, str]
|
|
1711
|
+
Data key to update
|
|
1712
|
+
val : typing.Union[dict, str]
|
|
1713
|
+
value to update
|
|
1714
|
+
|
|
1715
|
+
Returns
|
|
1716
|
+
-------
|
|
1717
|
+
None
|
|
1718
|
+
|
|
1719
|
+
"""
|
|
1720
|
+
|
|
1721
|
+
# update values provided in "column" "default" format
|
|
1722
|
+
if isinstance(d, dict):
|
|
1723
|
+
if set(d.keys()) != {"column", "default"}:
|
|
1724
|
+
err = f"Failed to update dictionary. Expected ['column', 'default'] in {d}, but found {list(d.keys())}"
|
|
1725
|
+
raise ValueError(err)
|
|
1726
|
+
|
|
1727
|
+
if type(val) != type(d["column"]):
|
|
1728
|
+
warn = f"new data type is not same as original value"
|
|
1729
|
+
# logging.warning(warn)
|
|
1730
|
+
d["column"] = val
|
|
1731
|
+
return d
|
|
1732
|
+
|
|
1733
|
+
if type(val) != type(d):
|
|
1734
|
+
warn = f"new data type is not same as original value"
|
|
1735
|
+
# logging.warning(warn)
|
|
1736
|
+
|
|
1737
|
+
# update value provided with constant format using "$"
|
|
1738
|
+
if isinstance(d, str) and d[0] == "$":
|
|
1739
|
+
return {"default": d[1:], "column": val}
|
|
1740
|
+
# for any other data types, simply update the value
|
|
1741
|
+
d = val
|
|
1742
|
+
|
|
1743
|
+
return d
|
|
1744
|
+
|
|
1745
|
+
|
|
1746
|
+
def group_request_into_one(
|
|
1747
|
+
model_type: str, request_list: list, attribute_for_grouping: list, batch_index=0
|
|
1748
|
+
):
|
|
1749
|
+
"""
|
|
1750
|
+
This function take a list of requests and collates an attribute from each request, adding the collated attributes
|
|
1751
|
+
back onto the first request in the list. The function returns the modified first request.
|
|
1752
|
+
For example, the function can take a list of CreatePortfolioGroupRequests, extract the "values" or portfolios from
|
|
1753
|
+
each request, and then add all portfolios back onto the first request in the list.
|
|
1754
|
+
|
|
1755
|
+
Parameters
|
|
1756
|
+
----------
|
|
1757
|
+
model_type : str
|
|
1758
|
+
the model type which we will modify (eg "CreatePortfolioGroupRequest").
|
|
1759
|
+
request_list : list
|
|
1760
|
+
a list of requests.
|
|
1761
|
+
attribute_for_grouping : list
|
|
1762
|
+
the attributes on these requests which will be grouped.
|
|
1763
|
+
batch_index
|
|
1764
|
+
The index of the batch
|
|
1765
|
+
|
|
1766
|
+
Returns
|
|
1767
|
+
-------
|
|
1768
|
+
request
|
|
1769
|
+
a single LUSID request
|
|
1770
|
+
"""
|
|
1771
|
+
|
|
1772
|
+
# Define a base request for modifying - this is the first request in the list by default
|
|
1773
|
+
|
|
1774
|
+
if model_type not in dir(models):
|
|
1775
|
+
raise ValueError(f"The model {model_type} is not a valid LUSID model.")
|
|
1776
|
+
|
|
1777
|
+
model_class = getattr( models, model_type)
|
|
1778
|
+
|
|
1779
|
+
if batch_index > len(request_list):
|
|
1780
|
+
raise IndexError(
|
|
1781
|
+
f"The length of the batch_index ({batch_index}) is greater than the request_list ({len(request_list)}) provided."
|
|
1782
|
+
)
|
|
1783
|
+
|
|
1784
|
+
if type(attribute_for_grouping) == list and len(attribute_for_grouping) == 0:
|
|
1785
|
+
raise ValueError("The provided list of attribute_for_grouping is empty.")
|
|
1786
|
+
|
|
1787
|
+
base_request = request_list[batch_index]
|
|
1788
|
+
|
|
1789
|
+
attribs = get_attributes_and_types ( model_class )
|
|
1790
|
+
|
|
1791
|
+
|
|
1792
|
+
for attrib in attribute_for_grouping:
|
|
1793
|
+
#for attrib, attrib_type in attribs.items():
|
|
1794
|
+
|
|
1795
|
+
if "List[" in attribs[attrib]:
|
|
1796
|
+
|
|
1797
|
+
# Collect the attributes from each request onto a list
|
|
1798
|
+
|
|
1799
|
+
batch_attrib = [
|
|
1800
|
+
lusid_model
|
|
1801
|
+
for nested_list in [
|
|
1802
|
+
getattr(request, attrib)
|
|
1803
|
+
for request in request_list
|
|
1804
|
+
if getattr(request, attrib) is not None
|
|
1805
|
+
]
|
|
1806
|
+
for lusid_model in nested_list
|
|
1807
|
+
]
|
|
1808
|
+
|
|
1809
|
+
# Assign collated values onto the base request
|
|
1810
|
+
|
|
1811
|
+
setattr(base_request, attrib, batch_attrib)
|
|
1812
|
+
|
|
1813
|
+
#elif "dict" in attrib_type:
|
|
1814
|
+
elif "Mapping[" in attribs[attrib]:
|
|
1815
|
+
# Collect the attributes from each request onto a dictionary
|
|
1816
|
+
|
|
1817
|
+
batch_attrib = dict(
|
|
1818
|
+
[
|
|
1819
|
+
(lusid_model, nested_list[lusid_model])
|
|
1820
|
+
for nested_list in [
|
|
1821
|
+
getattr(request, attrib)
|
|
1822
|
+
for request in request_list
|
|
1823
|
+
if getattr(request, attrib) is not None
|
|
1824
|
+
]
|
|
1825
|
+
for lusid_model in nested_list
|
|
1826
|
+
]
|
|
1827
|
+
)
|
|
1828
|
+
|
|
1829
|
+
# Assign collated values onto the base request
|
|
1830
|
+
|
|
1831
|
+
setattr(base_request, attrib, batch_attrib)
|
|
1832
|
+
|
|
1833
|
+
# Return base request with collated attributes
|
|
1834
|
+
|
|
1835
|
+
return base_request
|
|
1836
|
+
|
|
1837
|
+
|
|
1838
|
+
def extract_unique_portfolio_codes(sync_batches: list):
|
|
1839
|
+
"""
|
|
1840
|
+
Extract a unique list of portfolio codes from the sync_batches
|
|
1841
|
+
|
|
1842
|
+
Parameters
|
|
1843
|
+
----------
|
|
1844
|
+
sync_batches : list
|
|
1845
|
+
A list of the batches used to upload the data into LUSID.
|
|
1846
|
+
|
|
1847
|
+
Returns
|
|
1848
|
+
-------
|
|
1849
|
+
A list of all the unique portfolio codes in the sync batches
|
|
1850
|
+
"""
|
|
1851
|
+
codes_list = []
|
|
1852
|
+
for sync_batch in sync_batches:
|
|
1853
|
+
for key, value in sync_batch.items():
|
|
1854
|
+
if key == "codes":
|
|
1855
|
+
codes_list.extend(value)
|
|
1856
|
+
return list(set(codes_list))
|
|
1857
|
+
|
|
1858
|
+
|
|
1859
|
+
def extract_unique_portfolio_codes_effective_at_tuples(sync_batches: list):
|
|
1860
|
+
"""
|
|
1861
|
+
Extract a unique list of tuples containing portfolio codes and effective_at times
|
|
1862
|
+
|
|
1863
|
+
Parameters
|
|
1864
|
+
----------
|
|
1865
|
+
sync_batches : list
|
|
1866
|
+
A list of the batches used to upload the data into LUSID.
|
|
1867
|
+
|
|
1868
|
+
Returns
|
|
1869
|
+
-------
|
|
1870
|
+
A list of all the unique tuples of portfolio codes and effective at times in the sync batches
|
|
1871
|
+
"""
|
|
1872
|
+
code_tuples = []
|
|
1873
|
+
for sync_batch in sync_batches:
|
|
1874
|
+
for code, effective_at in zip(sync_batch["codes"], sync_batch["effective_at"],):
|
|
1875
|
+
# Append a tuple of (code, effective_at) to the code_tuples list
|
|
1876
|
+
code_tuples.append((code, effective_at))
|
|
1877
|
+
return list(set(code_tuples))
|