finbourne-sdk-utils 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- features/__init__.py +0 -0
- features/main.py +11 -0
- finbourne_sdk_utils/__init__.py +8 -0
- finbourne_sdk_utils/cocoon/__init__.py +34 -0
- finbourne_sdk_utils/cocoon/async_tools.py +94 -0
- finbourne_sdk_utils/cocoon/cocoon.py +1862 -0
- finbourne_sdk_utils/cocoon/cocoon_printer.py +455 -0
- finbourne_sdk_utils/cocoon/config/domain_settings.json +125 -0
- finbourne_sdk_utils/cocoon/config/seed_sample_data.json +36 -0
- finbourne_sdk_utils/cocoon/dateorcutlabel.py +198 -0
- finbourne_sdk_utils/cocoon/instruments.py +482 -0
- finbourne_sdk_utils/cocoon/properties.py +442 -0
- finbourne_sdk_utils/cocoon/seed_sample_data.py +137 -0
- finbourne_sdk_utils/cocoon/systemConfiguration.py +92 -0
- finbourne_sdk_utils/cocoon/transaction_type_upload.py +136 -0
- finbourne_sdk_utils/cocoon/utilities.py +1877 -0
- finbourne_sdk_utils/cocoon/validator.py +243 -0
- finbourne_sdk_utils/extract/__init__.py +1 -0
- finbourne_sdk_utils/extract/group_holdings.py +400 -0
- finbourne_sdk_utils/iam/__init__.py +1 -0
- finbourne_sdk_utils/iam/roles.py +74 -0
- finbourne_sdk_utils/jupyter_tools/__init__.py +2 -0
- finbourne_sdk_utils/jupyter_tools/hide_code_button.py +23 -0
- finbourne_sdk_utils/jupyter_tools/stop_execution.py +14 -0
- finbourne_sdk_utils/logger/LusidLogger.py +41 -0
- finbourne_sdk_utils/logger/__init__.py +1 -0
- finbourne_sdk_utils/lpt/__init__.py +0 -0
- finbourne_sdk_utils/lpt/back_compat.py +20 -0
- finbourne_sdk_utils/lpt/cash_ladder.py +191 -0
- finbourne_sdk_utils/lpt/connect_lusid.py +64 -0
- finbourne_sdk_utils/lpt/connect_none.py +5 -0
- finbourne_sdk_utils/lpt/connect_token.py +9 -0
- finbourne_sdk_utils/lpt/dfq.py +321 -0
- finbourne_sdk_utils/lpt/either.py +65 -0
- finbourne_sdk_utils/lpt/get_instruments.py +101 -0
- finbourne_sdk_utils/lpt/lpt.py +374 -0
- finbourne_sdk_utils/lpt/lse.py +188 -0
- finbourne_sdk_utils/lpt/map_instruments.py +164 -0
- finbourne_sdk_utils/lpt/pager.py +32 -0
- finbourne_sdk_utils/lpt/record.py +13 -0
- finbourne_sdk_utils/lpt/refreshing_token.py +43 -0
- finbourne_sdk_utils/lpt/search_instruments.py +48 -0
- finbourne_sdk_utils/lpt/stdargs.py +154 -0
- finbourne_sdk_utils/lpt/txn_config.py +128 -0
- finbourne_sdk_utils/lpt/txn_config_yaml.py +493 -0
- finbourne_sdk_utils/pandas_utils/__init__.py +0 -0
- finbourne_sdk_utils/pandas_utils/lusid_pandas.py +128 -0
- finbourne_sdk_utils-0.0.24.dist-info/LICENSE +21 -0
- finbourne_sdk_utils-0.0.24.dist-info/METADATA +25 -0
- finbourne_sdk_utils-0.0.24.dist-info/RECORD +52 -0
- finbourne_sdk_utils-0.0.24.dist-info/WHEEL +5 -0
- finbourne_sdk_utils-0.0.24.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from dateutil import parser
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import pytz
|
|
6
|
+
import re
|
|
7
|
+
from collections import UserString
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _process_timestamp(datetime_value: pd.Timestamp):
|
|
11
|
+
"""
|
|
12
|
+
Processes pandas timestamp convert it to ISO format and parse to string
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
datetime : pd.Timestamp
|
|
17
|
+
Datetime value
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
datetime : pd.Timestamp
|
|
22
|
+
Datetime value in ISO format
|
|
23
|
+
"""
|
|
24
|
+
return pd.to_datetime(datetime_value, utc=True, unit="us").isoformat()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _process_custom_date(datetime_value: str, date_format: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Processes a datetime provided in custom format
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
datetime_value : str
|
|
34
|
+
Custom Datetime value
|
|
35
|
+
date_format : str
|
|
36
|
+
Format of custom Datetime
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
datetime_value : str
|
|
41
|
+
Datetime value as str
|
|
42
|
+
"""
|
|
43
|
+
if not isinstance(datetime_value, str):
|
|
44
|
+
raise TypeError(
|
|
45
|
+
f"Date {datetime_value} is of type {type(datetime_value)} must be of type 'str' "
|
|
46
|
+
f"when specifying a custom date format. "
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
datetime_value = datetime.strptime(datetime_value, date_format).isoformat()
|
|
51
|
+
except ValueError:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"The date format provided {date_format} was not recognised in the"
|
|
54
|
+
f" datetime provided: {datetime_value}"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return datetime_value
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _process_date_as_string(datetime_value: str):
|
|
61
|
+
"""
|
|
62
|
+
Adds timezone to partially ISO valid datetimes as strings
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
datetime_value : str
|
|
67
|
+
Datetime value provided as a string
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
datetime_value : str
|
|
72
|
+
Datetime value provided as a string in valid ISO format
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
# Cut label regular expression, no modification required
|
|
76
|
+
if re.findall("\d{4}-\d{2}-\d{2}N\w+", datetime_value):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
# Already in isoformat and UTC timezone
|
|
80
|
+
elif re.findall(
|
|
81
|
+
"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", datetime_value
|
|
82
|
+
) or re.findall("\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z", datetime_value):
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
# Already in isoformat but not necessarily UTC timezone
|
|
86
|
+
elif re.findall(
|
|
87
|
+
"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[\+-]\d{2}:\d+", datetime_value
|
|
88
|
+
) or re.findall(
|
|
89
|
+
"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+[\+-]\d{2}:\d{2}", datetime_value
|
|
90
|
+
):
|
|
91
|
+
# Convert to UTC
|
|
92
|
+
datetime_value = (
|
|
93
|
+
parser.isoparse(datetime_value).astimezone(pytz.utc).isoformat()
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# ISO format with no timezone
|
|
97
|
+
elif re.findall("\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", datetime_value):
|
|
98
|
+
datetime_value = datetime_value + "+00:00"
|
|
99
|
+
elif re.findall("\d{4}-\d{2}-\d{2}", datetime_value):
|
|
100
|
+
datetime_value = datetime_value + "T00:00:00+00:00"
|
|
101
|
+
else:
|
|
102
|
+
datetime_value = _process_datetime(
|
|
103
|
+
parser.parse(timestr=datetime_value, dayfirst=True)
|
|
104
|
+
)
|
|
105
|
+
return datetime_value
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _process_numpy_datetime64(datetime_value: np.datetime64) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Converts numpy.datetime64 to UTC date to a string
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
datetime_value : np.datetime64
|
|
115
|
+
Datetime value
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
datetime_value : str
|
|
120
|
+
timezone aware UTC date
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
return str(np.datetime_as_string(arr=datetime_value, timezone="UTC", unit="us"))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _process_ndarray(datetime_value: np.ndarray) -> str:
|
|
127
|
+
"""
|
|
128
|
+
Converts numpy.ndarray to UTC date to a string
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
datetime_value : np.ndarray
|
|
133
|
+
Datetime value
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
datetime_value : str
|
|
138
|
+
timezone aware UTC date
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
return str(np.datetime_as_string(arr=datetime_value, timezone="UTC", unit="us")[0])
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _process_datetime(datetime_value):
|
|
145
|
+
# If there is no timezone assume that it is in UTC
|
|
146
|
+
if (
|
|
147
|
+
datetime_value.tzinfo is None
|
|
148
|
+
or datetime_value.tzinfo.utcoffset(datetime_value) is None
|
|
149
|
+
):
|
|
150
|
+
return datetime_value.replace(tzinfo=pytz.UTC).isoformat()
|
|
151
|
+
# If there is a timezone convert to UTC
|
|
152
|
+
else:
|
|
153
|
+
return datetime_value.astimezone(pytz.UTC).isoformat()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class DateOrCutLabel(UserString):
|
|
157
|
+
def __init__(self, datetime_value, date_format=None):
|
|
158
|
+
def convert_datetime_utc(datetime_value, date_format=None):
|
|
159
|
+
"""
|
|
160
|
+
This function ensures that a variable is a timezone aware UTC datetime
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
datetime_value : any
|
|
165
|
+
Datetime variable
|
|
166
|
+
date_format : str
|
|
167
|
+
(optional)The format of a custom date as a string eg "%Y-%m-%d %H:%M:%S.%f". see https://strftime.org/
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
datetime_value : str
|
|
172
|
+
The converted timezone aware datetime in the UTC timezone
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
# Convert custom dates to readable string format
|
|
177
|
+
if date_format:
|
|
178
|
+
datetime_value = _process_custom_date(datetime_value, date_format)
|
|
179
|
+
|
|
180
|
+
# Convert strings to readable string format
|
|
181
|
+
if isinstance(datetime_value, str):
|
|
182
|
+
return _process_date_as_string(datetime_value)
|
|
183
|
+
|
|
184
|
+
# Convert datetime to string
|
|
185
|
+
elif isinstance(datetime_value, datetime):
|
|
186
|
+
return _process_datetime(datetime_value)
|
|
187
|
+
|
|
188
|
+
# Convert np.datetime to string
|
|
189
|
+
elif isinstance(datetime_value, np.datetime64):
|
|
190
|
+
return _process_numpy_datetime64(datetime_value)
|
|
191
|
+
|
|
192
|
+
# Convert np.ndarray to string
|
|
193
|
+
elif isinstance(datetime_value, np.ndarray):
|
|
194
|
+
return _process_ndarray(datetime_value)
|
|
195
|
+
|
|
196
|
+
return datetime_value
|
|
197
|
+
|
|
198
|
+
self.data = convert_datetime_utc(datetime_value, date_format)
|
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
import lusid
|
|
2
|
+
import lusid.models as models
|
|
3
|
+
from lusid.api import InstrumentsApi, SearchApi
|
|
4
|
+
import numpy as np
|
|
5
|
+
import time
|
|
6
|
+
from finbourne_sdk_utils.cocoon.utilities import checkargs
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from finbourne_sdk_utils.cocoon.async_tools import run_in_executor
|
|
11
|
+
import asyncio
|
|
12
|
+
from typing import Callable
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@checkargs
|
|
16
|
+
def prepare_key(identifier_lusid: str, full_key_format: bool) -> str:
|
|
17
|
+
"""
|
|
18
|
+
This function prepares the key for the identifier based on whether the full key or just the code is required
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
identifier_lusid : str
|
|
23
|
+
The LUSID identifier in either full key format or code only
|
|
24
|
+
full_key_format : bool
|
|
25
|
+
Whether they key needs to be in the full key format
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
str
|
|
30
|
+
The LUSID identifier in the correct format
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
if full_key_format:
|
|
34
|
+
return (
|
|
35
|
+
identifier_lusid
|
|
36
|
+
if re.findall("Instrument/\S+/\S+", identifier_lusid)
|
|
37
|
+
else f"Instrument/default/{identifier_lusid}"
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
return (
|
|
41
|
+
identifier_lusid.split("/")[2]
|
|
42
|
+
if re.findall("Instrument/default/\S+", identifier_lusid)
|
|
43
|
+
else identifier_lusid
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@checkargs
|
|
48
|
+
def create_identifiers(
|
|
49
|
+
index,
|
|
50
|
+
row: pd.Series,
|
|
51
|
+
file_type: str,
|
|
52
|
+
instrument_identifier_mapping: dict = None,
|
|
53
|
+
unique_identifiers: list = None,
|
|
54
|
+
full_key_format: bool = True,
|
|
55
|
+
prepare_key: Callable = prepare_key,
|
|
56
|
+
) -> dict:
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
index
|
|
62
|
+
The index of the row in the DataFrame
|
|
63
|
+
row : pd.Series
|
|
64
|
+
The row of the DataFrame to create identifiers for
|
|
65
|
+
file_type : str
|
|
66
|
+
The file type to create identifiers for
|
|
67
|
+
instrument_identifier_mapping : dict
|
|
68
|
+
The instrument identifier mapping to use
|
|
69
|
+
unique_identifiers : list
|
|
70
|
+
The list of allowable unique instrument identifiers
|
|
71
|
+
full_key_format : bool
|
|
72
|
+
Whether the full key format i.e. 'Instrument/default/Figi' is required
|
|
73
|
+
prepare_key : callable
|
|
74
|
+
The function to use to prepare the key
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
identifiers : dict
|
|
79
|
+
The identifiers to use on the request
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
# Populate the identifiers for this instrument
|
|
83
|
+
identifiers = {
|
|
84
|
+
# Handles specifying the entire property key e.g. Instrument/default/Figi or just the code e.g. Figi
|
|
85
|
+
prepare_key(identifier_lusid, full_key_format): models.InstrumentIdValue(
|
|
86
|
+
value=str(row[identifier_column])
|
|
87
|
+
)
|
|
88
|
+
if file_type == "instrument"
|
|
89
|
+
else str(row[identifier_column])
|
|
90
|
+
for identifier_lusid, identifier_column in instrument_identifier_mapping.items()
|
|
91
|
+
if not pd.isna( # Only use the identifier if it has a value
|
|
92
|
+
row[identifier_column] # type is instrument_identifiers: Dict[str, StrictStr]
|
|
93
|
+
)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# If there are no identifiers raise an error
|
|
97
|
+
if len(identifiers) == 0:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"""The row at index {str(index)} has no value for every single one of the provided
|
|
100
|
+
identifiers. Please ensure that each row has at least one identifier and try again"""
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Check that at least one unique identifier exists if it is an instrument file (need to move this out of here)
|
|
104
|
+
if file_type == "instrument":
|
|
105
|
+
|
|
106
|
+
# Get the unique list of unique identifiers which have been populated
|
|
107
|
+
unique_identifiers_populated = list(
|
|
108
|
+
set(unique_identifiers).intersection(set(list(identifiers.keys())))
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# If there are no unique identifiers raise an Exception as you need at least one to make a successful call
|
|
112
|
+
if len(unique_identifiers_populated) == 0:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"""The instrument at index {str(index)} has no value for at least one unique
|
|
115
|
+
identifier. Please ensure that each instrument has at least one unique identifier and try again. The
|
|
116
|
+
allowed unique identifiers are {str(unique_identifiers)}"""
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
# If the transaction/holding is cash remove all other identifiers and just use this one
|
|
121
|
+
if "Instrument/default/Currency" in list(identifiers.keys()):
|
|
122
|
+
currency_value = identifiers["Instrument/default/Currency"]
|
|
123
|
+
identifiers.clear()
|
|
124
|
+
|
|
125
|
+
if currency_value == 'nan':
|
|
126
|
+
currency_value = 'GBP' # default to GBP if not supplied
|
|
127
|
+
|
|
128
|
+
identifiers["Instrument/default/Currency"] = currency_value
|
|
129
|
+
|
|
130
|
+
return identifiers
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@checkargs
|
|
134
|
+
def resolve_instruments(
|
|
135
|
+
api_factory: lusid.SyncApiClientFactory,
|
|
136
|
+
data_frame: pd.DataFrame,
|
|
137
|
+
identifier_mapping: dict,
|
|
138
|
+
):
|
|
139
|
+
"""
|
|
140
|
+
This function attempts to resolve each row of the file to an instrument in LUSID
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
api_factory : lusid.SyncApiClientFactory
|
|
145
|
+
An instance of the Lusid Api Factory
|
|
146
|
+
data_frame : pd.DataFrame
|
|
147
|
+
The DataFrame containing the transactions or holdings to resolve to unique instruments
|
|
148
|
+
identifier_mapping : dict
|
|
149
|
+
The column mapping between allowable identifiers in LUSID and identifier columns in the dataframe
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
_data_frame : pd.DataFrame
|
|
154
|
+
The input DataFrame with resolution columns added
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
if "Currency" not in list(
|
|
158
|
+
identifier_mapping.keys()
|
|
159
|
+
) and "Instrument/default/Currency" not in list(identifier_mapping.keys()):
|
|
160
|
+
raise KeyError(
|
|
161
|
+
"""There is no column specified in the identifier_mapping to identify whether or not an instrument is cash.
|
|
162
|
+
Please specify add the key "Currency" or "Instrument/default/Currency" to your mapping. If no instruments
|
|
163
|
+
are cash you can set the value to be None"""
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if "Currency" in list(identifier_mapping.keys()):
|
|
167
|
+
identifier_mapping["Instrument/default/Currency"] = identifier_mapping[
|
|
168
|
+
"Currency"
|
|
169
|
+
]
|
|
170
|
+
del identifier_mapping["Currency"]
|
|
171
|
+
|
|
172
|
+
# Check that the values of the mapping exist in the DataFrame
|
|
173
|
+
if not (set(identifier_mapping.values()) <= set(data_frame.columns)):
|
|
174
|
+
raise KeyError(
|
|
175
|
+
"there are values in identifier_mapping that are not columns in the dataframe"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Get the allowable instrument identifiers from LUSID
|
|
179
|
+
response = api_factory.build(InstrumentsApi).get_instrument_identifier_types()
|
|
180
|
+
"""
|
|
181
|
+
# Collect the names and property keys for the identifiers and concatenate them
|
|
182
|
+
allowable_identifier_names = [identifier.identifier_type for identifier in response.values]
|
|
183
|
+
allowable_identifier_keys = [identifier.property_key for identifier in response.values]
|
|
184
|
+
allowable_identifiers = allowable_identifier_names + allowable_identifier_keys
|
|
185
|
+
|
|
186
|
+
# Check that the identifiers in the mapping are all allowed to be used in LUSID
|
|
187
|
+
if not (set(identifier_mapping['identifier_mapping'].keys()) <= set(allowable_identifiers)):
|
|
188
|
+
raise Exception(
|
|
189
|
+
'there are LUSID identifiers in the identifier_mapping which are not configured in LUSID')
|
|
190
|
+
"""
|
|
191
|
+
# Copy the data_frame to ensure the original isn't modified
|
|
192
|
+
_data_frame = data_frame.copy(deep=True)
|
|
193
|
+
|
|
194
|
+
# Set up the result Pandas Series to track resolution
|
|
195
|
+
found_with = pd.Series(index=_data_frame.index, dtype=np.dtype(object))
|
|
196
|
+
resolvable = pd.Series(index=_data_frame.index, dtype=np.dtype(bool))
|
|
197
|
+
luid = pd.Series(index=_data_frame.index, dtype=np.dtype(object))
|
|
198
|
+
comment = pd.Series(index=_data_frame.index, dtype=np.dtype(object))
|
|
199
|
+
logging.info("Beginning instrument resolution process")
|
|
200
|
+
# Iterate over each row in the DataFrame
|
|
201
|
+
for index, row in _data_frame.iterrows():
|
|
202
|
+
|
|
203
|
+
if index % 10 == 0:
|
|
204
|
+
logging.info(f"Up to row {index}")
|
|
205
|
+
# Initialise list to hold the identifiers used to resolve
|
|
206
|
+
found_with_current = []
|
|
207
|
+
# Initialise a value of False for the row's resolvability to an instrument in LUSID
|
|
208
|
+
resolvable_current = False
|
|
209
|
+
# Initialise the LUID value
|
|
210
|
+
luid_current = None
|
|
211
|
+
# Initialise the comment value
|
|
212
|
+
comment_current = "No instruments found for the given identifiers"
|
|
213
|
+
# Takes the currency resolution function and applies it
|
|
214
|
+
currency = row[identifier_mapping["Instrument/default/Currency"]]
|
|
215
|
+
|
|
216
|
+
if not pd.isna(currency):
|
|
217
|
+
resolvable_current = True
|
|
218
|
+
found_with_current.append(currency)
|
|
219
|
+
luid_current = currency
|
|
220
|
+
comment_current = "Resolved as cash with a currency"
|
|
221
|
+
|
|
222
|
+
search_requests = [
|
|
223
|
+
models.InstrumentSearchProperty(
|
|
224
|
+
key=f"Instrument/default/{identifier_lusid}"
|
|
225
|
+
if "Instrument/" not in identifier_lusid
|
|
226
|
+
else identifier_lusid,
|
|
227
|
+
value=str(row[identifier_dataframe]),
|
|
228
|
+
)
|
|
229
|
+
for identifier_lusid, identifier_dataframe in identifier_mapping.items()
|
|
230
|
+
if not pd.isnull(row[identifier_dataframe])
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
# Call LUSID to search for instruments
|
|
234
|
+
attempts = 0
|
|
235
|
+
|
|
236
|
+
if len(search_requests) > 0:
|
|
237
|
+
while attempts < 3:
|
|
238
|
+
try:
|
|
239
|
+
response = api_factory.build(SearchApi).instruments_search(
|
|
240
|
+
instrument_search_property=search_requests, mastered_only=True
|
|
241
|
+
)
|
|
242
|
+
break
|
|
243
|
+
except lusid.exceptions.ApiException as error_message:
|
|
244
|
+
attempts += 1
|
|
245
|
+
comment_current = f"Failed to find instrument due to LUSID error during search due to status {error_message.status} with reason {error_message.reason}"
|
|
246
|
+
time.sleep(5)
|
|
247
|
+
|
|
248
|
+
if attempts == 3:
|
|
249
|
+
# Update the luid series
|
|
250
|
+
luid.iloc[index] = luid_current
|
|
251
|
+
# Update the found with series
|
|
252
|
+
found_with.iloc[index] = found_with_current
|
|
253
|
+
# Update the resolvable series
|
|
254
|
+
resolvable.iloc[index] = resolvable_current
|
|
255
|
+
# Update the comment series
|
|
256
|
+
comment.iloc[index] = comment_current
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
search_request_number = -1
|
|
260
|
+
|
|
261
|
+
for result in response:
|
|
262
|
+
|
|
263
|
+
search_request_number += 1
|
|
264
|
+
# If there are matches
|
|
265
|
+
if len(result.mastered_instruments) == 1:
|
|
266
|
+
# Add the identifier responsible for the successful search request to the list
|
|
267
|
+
found_with_current.append(
|
|
268
|
+
search_requests[search_request_number].key.split("/")[2]
|
|
269
|
+
)
|
|
270
|
+
comment_current = (
|
|
271
|
+
"Uniquely resolved to an instrument in the securities master"
|
|
272
|
+
)
|
|
273
|
+
resolvable_current = True
|
|
274
|
+
luid_current = (
|
|
275
|
+
result.mastered_instruments[0]
|
|
276
|
+
.identifiers["LusidInstrumentId"]
|
|
277
|
+
.value
|
|
278
|
+
)
|
|
279
|
+
break
|
|
280
|
+
|
|
281
|
+
elif len(result.mastered_instruments) > 1:
|
|
282
|
+
comment_current = f'Multiple instruments found for the instrument using identifier {search_requests[search_request_number].key.split("/")[2]}'
|
|
283
|
+
resolvable_current = False
|
|
284
|
+
luid_current = np.nan
|
|
285
|
+
|
|
286
|
+
# Update the luid series
|
|
287
|
+
luid.iloc[index] = luid_current
|
|
288
|
+
# Update the found with series
|
|
289
|
+
found_with.iloc[index] = found_with_current
|
|
290
|
+
# Update the resolvable series
|
|
291
|
+
resolvable.iloc[index] = resolvable_current
|
|
292
|
+
# Update the comment series
|
|
293
|
+
comment.iloc[index] = comment_current
|
|
294
|
+
|
|
295
|
+
# Add the series to the dataframe
|
|
296
|
+
_data_frame["resolvable"] = resolvable
|
|
297
|
+
_data_frame["foundWith"] = found_with
|
|
298
|
+
_data_frame["LusidInstrumentId"] = luid
|
|
299
|
+
_data_frame["comment"] = comment
|
|
300
|
+
|
|
301
|
+
return _data_frame
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@checkargs
|
|
305
|
+
def get_unique_identifiers(api_factory: lusid.SyncApiClientFactory):
|
|
306
|
+
|
|
307
|
+
"""
|
|
308
|
+
Tests getting the unique instrument identifiers
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
api_factory : lusid.SyncApiClientFactory
|
|
313
|
+
The LUSID api factory to use
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
list[str]
|
|
318
|
+
The property keys of the available identifiers
|
|
319
|
+
"""
|
|
320
|
+
# Get the allowed instrument identifiers from LUSID
|
|
321
|
+
identifiers = api_factory.build(
|
|
322
|
+
lusid.api.InstrumentsApi
|
|
323
|
+
).get_instrument_identifier_types()
|
|
324
|
+
|
|
325
|
+
# Return the identifiers that are configured to be unique
|
|
326
|
+
return [
|
|
327
|
+
identifier.identifier_type
|
|
328
|
+
for identifier in identifiers.values
|
|
329
|
+
if identifier.is_unique_identifier_type
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
async def enrich_instruments(
|
|
334
|
+
api_factory: lusid.SyncApiClientFactory,
|
|
335
|
+
data_frame: pd.DataFrame,
|
|
336
|
+
instrument_identifier_mapping: dict,
|
|
337
|
+
mapping_required: dict,
|
|
338
|
+
constant_prefix: str = "$",
|
|
339
|
+
**kwargs,
|
|
340
|
+
):
|
|
341
|
+
search_requests_all = []
|
|
342
|
+
|
|
343
|
+
for index, row in data_frame.iterrows():
|
|
344
|
+
search_requests_instrument = [
|
|
345
|
+
lusid.models.InstrumentSearchProperty(
|
|
346
|
+
key=identifier_lusid
|
|
347
|
+
if re.findall("Instrument/default/\S+", identifier_lusid)
|
|
348
|
+
else f"Instrument/default/{identifier_lusid}",
|
|
349
|
+
value=row[identifier_column],
|
|
350
|
+
)
|
|
351
|
+
for identifier_lusid, identifier_column in instrument_identifier_mapping.items()
|
|
352
|
+
if not pd.isna(row[identifier_column])
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
search_requests_all.append(search_requests_instrument)
|
|
356
|
+
|
|
357
|
+
responses = await asyncio.gather(
|
|
358
|
+
*[
|
|
359
|
+
instrument_search(
|
|
360
|
+
api_factory=api_factory, search_requests=search_requests, **kwargs
|
|
361
|
+
)
|
|
362
|
+
for search_requests in search_requests_all
|
|
363
|
+
],
|
|
364
|
+
return_exceptions=False,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
names = []
|
|
368
|
+
|
|
369
|
+
for response in responses:
|
|
370
|
+
name = np.nan
|
|
371
|
+
for single_search in response:
|
|
372
|
+
if isinstance(single_search, Exception):
|
|
373
|
+
logging.warning(single_search)
|
|
374
|
+
continue
|
|
375
|
+
elif len(single_search[0].external_instruments) > 0:
|
|
376
|
+
name = single_search[0].external_instruments[0].name
|
|
377
|
+
break
|
|
378
|
+
names.append(name)
|
|
379
|
+
|
|
380
|
+
enriched_column_name = "LUSID.Name.Enriched"
|
|
381
|
+
|
|
382
|
+
data_frame[enriched_column_name] = names
|
|
383
|
+
|
|
384
|
+
# Missing mapping for name altogether
|
|
385
|
+
if "name" not in list(mapping_required.keys()):
|
|
386
|
+
mapping_required["name"] = enriched_column_name
|
|
387
|
+
|
|
388
|
+
# A column for name already exists and needs to be enriched
|
|
389
|
+
elif (
|
|
390
|
+
isinstance(mapping_required["name"], str)
|
|
391
|
+
and mapping_required["name"][0] != constant_prefix
|
|
392
|
+
):
|
|
393
|
+
data_frame[mapping_required["name"]] = data_frame[
|
|
394
|
+
mapping_required["name"]
|
|
395
|
+
].fillna(value=data_frame[enriched_column_name])
|
|
396
|
+
|
|
397
|
+
elif isinstance(mapping_required["name"], dict) and "column" in list(
|
|
398
|
+
mapping_required["name"].keys()
|
|
399
|
+
):
|
|
400
|
+
data_frame[mapping_required["name"]["column"]] = data_frame[
|
|
401
|
+
mapping_required["name"]["column"]
|
|
402
|
+
].fillna(value=data_frame[enriched_column_name])
|
|
403
|
+
|
|
404
|
+
# Is a constant specified by the constant prefix
|
|
405
|
+
elif (
|
|
406
|
+
isinstance(mapping_required["name"], str)
|
|
407
|
+
and mapping_required["name"][0] == constant_prefix
|
|
408
|
+
):
|
|
409
|
+
mapping_required["name"] = {"default": mapping_required["name"][1:]}
|
|
410
|
+
mapping_required["name"]["column"] = enriched_column_name
|
|
411
|
+
|
|
412
|
+
# Is a constant specified by the default nested dictionary specification
|
|
413
|
+
elif (
|
|
414
|
+
isinstance(mapping_required["name"], dict)
|
|
415
|
+
and "default" in list(mapping_required["name"].keys())
|
|
416
|
+
and "column" not in list(mapping_required["name"].keys())
|
|
417
|
+
):
|
|
418
|
+
mapping_required["name"]["column"] = enriched_column_name
|
|
419
|
+
|
|
420
|
+
return data_frame, mapping_required
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
async def instrument_search(
|
|
424
|
+
api_factory: lusid.SyncApiClientFactory, search_requests: list, **kwargs
|
|
425
|
+
) -> list:
|
|
426
|
+
"""
|
|
427
|
+
Conducts an instrument search for a single instrument
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
api_factory : lusid.SyncApiClientFactory
|
|
432
|
+
The api factory to use
|
|
433
|
+
search_requests: list[lusid.models.InstrumentSearchProperty]
|
|
434
|
+
The search requests for this instrument
|
|
435
|
+
kwargs
|
|
436
|
+
|
|
437
|
+
Returns
|
|
438
|
+
-------
|
|
439
|
+
None
|
|
440
|
+
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
instrument_search_results = []
|
|
444
|
+
|
|
445
|
+
for search_request in search_requests:
|
|
446
|
+
try:
|
|
447
|
+
result = await instrument_search_single(
|
|
448
|
+
api_factory, search_request, **kwargs
|
|
449
|
+
)
|
|
450
|
+
instrument_search_results.append(result)
|
|
451
|
+
except lusid.exceptions.ApiException as e:
|
|
452
|
+
instrument_search_results.append(e)
|
|
453
|
+
return instrument_search_results
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
@run_in_executor
|
|
457
|
+
def instrument_search_single(
|
|
458
|
+
api_factory: lusid.SyncApiClientFactory,
|
|
459
|
+
search_request: lusid.models.InstrumentSearchProperty,
|
|
460
|
+
**kwargs,
|
|
461
|
+
) -> list:
|
|
462
|
+
"""
|
|
463
|
+
Conducts an instrument search with a single search request
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
api_factory : lusid.SyncApiClientFactory
|
|
468
|
+
The Api factory to use
|
|
469
|
+
search_request : lusid.models.InstrumentSearchProperty
|
|
470
|
+
The search request
|
|
471
|
+
kwargs
|
|
472
|
+
|
|
473
|
+
Returns
|
|
474
|
+
-------
|
|
475
|
+
list[lusid.models.InstrumentMatch]
|
|
476
|
+
The results of the search
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
api_instance = api_factory.build(lusid.api.SearchApi)
|
|
480
|
+
|
|
481
|
+
return api_instance.instruments_search(instrument_search_property=[search_request])
|
|
482
|
+
|