mainsequence 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mainsequence/__init__.py +0 -0
- mainsequence/__main__.py +9 -0
- mainsequence/cli/__init__.py +1 -0
- mainsequence/cli/api.py +157 -0
- mainsequence/cli/cli.py +442 -0
- mainsequence/cli/config.py +78 -0
- mainsequence/cli/ssh_utils.py +126 -0
- mainsequence/client/__init__.py +17 -0
- mainsequence/client/base.py +431 -0
- mainsequence/client/data_sources_interfaces/__init__.py +0 -0
- mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
- mainsequence/client/data_sources_interfaces/timescale.py +479 -0
- mainsequence/client/models_helpers.py +113 -0
- mainsequence/client/models_report_studio.py +412 -0
- mainsequence/client/models_tdag.py +2276 -0
- mainsequence/client/models_vam.py +1983 -0
- mainsequence/client/utils.py +387 -0
- mainsequence/dashboards/__init__.py +0 -0
- mainsequence/dashboards/streamlit/__init__.py +0 -0
- mainsequence/dashboards/streamlit/assets/config.toml +12 -0
- mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
- mainsequence/dashboards/streamlit/assets/logo.png +0 -0
- mainsequence/dashboards/streamlit/core/__init__.py +0 -0
- mainsequence/dashboards/streamlit/core/theme.py +212 -0
- mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
- mainsequence/dashboards/streamlit/scaffold.py +220 -0
- mainsequence/instrumentation/__init__.py +7 -0
- mainsequence/instrumentation/utils.py +101 -0
- mainsequence/instruments/__init__.py +1 -0
- mainsequence/instruments/data_interface/__init__.py +10 -0
- mainsequence/instruments/data_interface/data_interface.py +361 -0
- mainsequence/instruments/instruments/__init__.py +3 -0
- mainsequence/instruments/instruments/base_instrument.py +85 -0
- mainsequence/instruments/instruments/bond.py +447 -0
- mainsequence/instruments/instruments/european_option.py +74 -0
- mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
- mainsequence/instruments/instruments/json_codec.py +585 -0
- mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
- mainsequence/instruments/instruments/position.py +475 -0
- mainsequence/instruments/instruments/ql_fields.py +239 -0
- mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
- mainsequence/instruments/pricing_models/__init__.py +0 -0
- mainsequence/instruments/pricing_models/black_scholes.py +49 -0
- mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
- mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
- mainsequence/instruments/pricing_models/indices.py +350 -0
- mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
- mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
- mainsequence/instruments/settings.py +175 -0
- mainsequence/instruments/utils.py +29 -0
- mainsequence/logconf.py +284 -0
- mainsequence/reportbuilder/__init__.py +0 -0
- mainsequence/reportbuilder/__main__.py +0 -0
- mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
- mainsequence/reportbuilder/model.py +713 -0
- mainsequence/reportbuilder/slide_templates.py +532 -0
- mainsequence/tdag/__init__.py +8 -0
- mainsequence/tdag/__main__.py +0 -0
- mainsequence/tdag/config.py +129 -0
- mainsequence/tdag/data_nodes/__init__.py +12 -0
- mainsequence/tdag/data_nodes/build_operations.py +751 -0
- mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
- mainsequence/tdag/data_nodes/persist_managers.py +812 -0
- mainsequence/tdag/data_nodes/run_operations.py +543 -0
- mainsequence/tdag/data_nodes/utils.py +24 -0
- mainsequence/tdag/future_registry.py +25 -0
- mainsequence/tdag/utils.py +40 -0
- mainsequence/virtualfundbuilder/__init__.py +45 -0
- mainsequence/virtualfundbuilder/__main__.py +235 -0
- mainsequence/virtualfundbuilder/agent_interface.py +77 -0
- mainsequence/virtualfundbuilder/config_handling.py +86 -0
- mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
- mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
- mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
- mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
- mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
- mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
- mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
- mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
- mainsequence/virtualfundbuilder/data_nodes.py +637 -0
- mainsequence/virtualfundbuilder/enums.py +23 -0
- mainsequence/virtualfundbuilder/models.py +282 -0
- mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
- mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
- mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
- mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
- mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
- mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
- mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
- mainsequence/virtualfundbuilder/utils.py +381 -0
- mainsequence-2.0.0.dist-info/METADATA +105 -0
- mainsequence-2.0.0.dist-info/RECORD +110 -0
- mainsequence-2.0.0.dist-info/WHEEL +5 -0
- mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
- mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,810 @@
|
|
1
|
+
import copy
|
2
|
+
|
3
|
+
import pytz
|
4
|
+
from typing import Union, Dict, List, Literal, Optional,Tuple, Any
|
5
|
+
import pandas as pd
|
6
|
+
import numpy as np
|
7
|
+
import datetime
|
8
|
+
import pandas_market_calendars as mcal
|
9
|
+
|
10
|
+
from mainsequence.tdag.data_nodes import DataNode, WrapperDataNode, APIDataNode
|
11
|
+
from mainsequence.client import (CONSTANTS, LocalTimeSeriesDoesNotExist, LocalTimeSerie, DynamicTableDataSource,
|
12
|
+
UpdateStatistics, AssetCategory, AssetTranslationTable, AssetTranslationRule, AssetFilter
|
13
|
+
)
|
14
|
+
from mainsequence.client import MARKETS_CONSTANTS, ExecutionVenue
|
15
|
+
from mainsequence.client import DoesNotExist, Asset
|
16
|
+
from mainsequence.tdag.data_nodes.utils import (
|
17
|
+
string_frequency_to_minutes,
|
18
|
+
string_freq_to_time_delta,
|
19
|
+
)
|
20
|
+
import os
|
21
|
+
|
22
|
+
from tqdm import tqdm
|
23
|
+
from joblib import Parallel, delayed
|
24
|
+
|
25
|
+
from mainsequence.virtualfundbuilder.models import AssetsConfiguration
|
26
|
+
from mainsequence.virtualfundbuilder.utils import logger, TIMEDELTA
|
27
|
+
from typing import Optional
|
28
|
+
from functools import lru_cache
|
29
|
+
|
30
|
+
|
31
|
+
FULL_CALENDAR = "24/7"
|
32
|
+
|
33
|
+
@lru_cache(maxsize=256)
|
34
|
+
def _get_calendar_by_name(calendar_name: str):
|
35
|
+
if calendar_name == FULL_CALENDAR:
|
36
|
+
return None
|
37
|
+
return mcal.get_calendar(calendar_name)
|
38
|
+
|
39
|
+
@lru_cache(maxsize=1024)
|
40
|
+
def _get_schedule_cached(calendar_name: str, start_date: str, end_date: str) -> pd.DataFrame:
|
41
|
+
"""
|
42
|
+
Returns a schedule DataFrame indexed by normalized UTC session date.
|
43
|
+
start_date / end_date should be 'YYYY-MM-DD' strings for cacheability.
|
44
|
+
"""
|
45
|
+
cal = _get_calendar_by_name(calendar_name)
|
46
|
+
if cal is None:
|
47
|
+
# 24/7 synthetic schedule (open at 00:00, close 23:59 UTC)
|
48
|
+
sessions = pd.date_range(start=start_date, end=end_date, freq="D", tz="UTC")
|
49
|
+
df = pd.DataFrame({
|
50
|
+
"market_open": sessions,
|
51
|
+
"market_close": sessions + pd.Timedelta(days=1) - pd.Timedelta(minutes=1),
|
52
|
+
}, index=sessions)
|
53
|
+
df.index.name = "session"
|
54
|
+
return df
|
55
|
+
|
56
|
+
sched = cal.schedule(start_date=start_date, end_date=end_date).reset_index().rename(columns={"index": "session"})
|
57
|
+
sched["session"] = pd.to_datetime(sched["session"], utc=True).dt.normalize()
|
58
|
+
sched["market_open"] = pd.to_datetime(sched["market_open"], utc=True)
|
59
|
+
sched["market_close"] = pd.to_datetime(sched["market_close"], utc=True)
|
60
|
+
return sched.set_index("session")
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
def get_interpolated_prices_timeseries(assets_configuration: Optional[AssetsConfiguration]=None, asset_list=None,
|
67
|
+
|
68
|
+
):
|
69
|
+
"""
|
70
|
+
Creates a Wrapper Timeseries for an asset configuration.
|
71
|
+
"""
|
72
|
+
|
73
|
+
|
74
|
+
if assets_configuration is None:
|
75
|
+
assert asset_list is not None, "asset_list and assets_configuration both cant be None"
|
76
|
+
if assets_configuration is not None:
|
77
|
+
prices_configuration = copy.deepcopy(assets_configuration).prices_configuration
|
78
|
+
prices_configuration_kwargs = prices_configuration.model_dump()
|
79
|
+
prices_configuration_kwargs.pop("is_live", None)
|
80
|
+
prices_configuration_kwargs.pop("markets_time_series", None)
|
81
|
+
|
82
|
+
if asset_list is None:
|
83
|
+
return InterpolatedPrices(
|
84
|
+
asset_category_unique_id=assets_configuration.assets_category_unique_id,
|
85
|
+
**prices_configuration_kwargs
|
86
|
+
)
|
87
|
+
else:
|
88
|
+
|
89
|
+
return InterpolatedPrices(
|
90
|
+
asset_list=asset_list,
|
91
|
+
**prices_configuration_kwargs
|
92
|
+
)
|
93
|
+
|
94
|
+
raise Exception("Not implemented prices_configuration Kwargs")
|
95
|
+
|
96
|
+
class UpsampleAndInterpolation:
|
97
|
+
"""
|
98
|
+
Handles upsampling and interpolation of bar data.
|
99
|
+
"""
|
100
|
+
TIMESTAMP_COLS = ["first_trade_time", "last_trade_time", "open_time"]
|
101
|
+
|
102
|
+
def __init__(
|
103
|
+
self,
|
104
|
+
bar_frequency_id: str,
|
105
|
+
upsample_frequency_id: str,
|
106
|
+
intraday_bar_interpolation_rule: str,
|
107
|
+
):
|
108
|
+
self.bar_frequency_id = bar_frequency_id
|
109
|
+
self.upsample_frequency_id = upsample_frequency_id
|
110
|
+
self.intraday_bar_interpolation_rule = intraday_bar_interpolation_rule
|
111
|
+
|
112
|
+
rows = string_frequency_to_minutes(self.upsample_frequency_id) / string_frequency_to_minutes(
|
113
|
+
self.bar_frequency_id)
|
114
|
+
assert rows.is_integer()
|
115
|
+
|
116
|
+
if "d" in self.bar_frequency_id:
|
117
|
+
assert bar_frequency_id == self.upsample_frequency_id # Upsampling for daily bars not implemented
|
118
|
+
|
119
|
+
self.upsample_frequency_td = string_freq_to_time_delta(self.upsample_frequency_id)
|
120
|
+
|
121
|
+
|
122
|
+
@staticmethod
|
123
|
+
def upsample_bars(
|
124
|
+
bars_df: pd.DataFrame,
|
125
|
+
upsample_frequency_obs: int,
|
126
|
+
upsample_frequency_td: object,
|
127
|
+
calendar: str,
|
128
|
+
open_to_close_time_delta: datetime.timedelta,
|
129
|
+
is_portfolio: bool = False
|
130
|
+
) -> pd.DataFrame:
|
131
|
+
"""
|
132
|
+
Upsamples the bars dataframe based on the given parameters.
|
133
|
+
For example, it can convert 5-minute bars to 1-minute bars.
|
134
|
+
Note that this method works on iloc as the underlying data should be already interpolated so should be completed
|
135
|
+
|
136
|
+
|
137
|
+
Args:
|
138
|
+
bars_df (pd.DataFrame): The bars data to be upsampled.
|
139
|
+
upsample_frequency_obs (int): Frequency for upsampling.
|
140
|
+
upsample_frequency_td (object): Time delta for upsampling.
|
141
|
+
calendar (str): Trading calendar to account for trading hours.
|
142
|
+
open_to_close_time_delta (datetime.timedelta): Time delta between open and close.
|
143
|
+
is_portfolio (bool): Whether the data is for a portfolio or a single asset.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
pd.DataFrame: The upsampled bars dataframe.
|
147
|
+
"""
|
148
|
+
obs = bars_df.shape[0] / upsample_frequency_obs
|
149
|
+
assert obs > 1.0
|
150
|
+
|
151
|
+
trading_halts = calendar != FULL_CALENDAR
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
calendar = mcal.get_calendar(calendar)
|
157
|
+
|
158
|
+
full_schedule = calendar.schedule(bars_df["trade_day"].min(), bars_df["trade_day"].max()).reset_index()
|
159
|
+
full_schedule["index"] = full_schedule["index"].apply(lambda x: x.timestamp())
|
160
|
+
full_schedule = full_schedule.set_index("index").to_dict("index")
|
161
|
+
|
162
|
+
all_dfs = []
|
163
|
+
for i in tqdm(range(bars_df.shape[0] - upsample_frequency_obs + 1),
|
164
|
+
desc=f"Upsampling from {bars_df['trade_day'].iloc[0]} to {bars_df['trade_day'].iloc[-1]} for assets {bars_df['unique_identifier'].dropna().unique()}"):
|
165
|
+
start = i
|
166
|
+
end = i + upsample_frequency_obs
|
167
|
+
tmp_df = bars_df.iloc[start:end]
|
168
|
+
|
169
|
+
day_schedule = full_schedule[tmp_df["trade_day"].iloc[0].timestamp()]
|
170
|
+
first_available_bar = day_schedule["market_open"] + upsample_frequency_td
|
171
|
+
last_available_bar = day_schedule["market_close"]
|
172
|
+
|
173
|
+
if trading_halts and tmp_df.index[-1] < first_available_bar:
|
174
|
+
# edge case 1market is close should not upsample to the next day
|
175
|
+
continue
|
176
|
+
elif trading_halts and tmp_df.index[-1] > last_available_bar:
|
177
|
+
continue
|
178
|
+
else:
|
179
|
+
dollar = tmp_df.vwap * tmp_df.volume
|
180
|
+
volume = np.nansum(tmp_df.volume.values)
|
181
|
+
vwap = np.nansum(dollar.values) / volume
|
182
|
+
close = tmp_df.close.iloc[-1]
|
183
|
+
vwap = vwap if not np.isnan(vwap) else close
|
184
|
+
new_bar = {
|
185
|
+
"open_time": tmp_df.index[0] - open_to_close_time_delta,
|
186
|
+
"time": tmp_df.index[-1],
|
187
|
+
"volume": volume,
|
188
|
+
"vwap": vwap,
|
189
|
+
"open": tmp_df.open.iloc[0],
|
190
|
+
"close": close,
|
191
|
+
}
|
192
|
+
if not is_portfolio:
|
193
|
+
new_bar.update({
|
194
|
+
"high": np.nanmax(tmp_df.high.values),
|
195
|
+
"low": np.nanmin(tmp_df.low.values),
|
196
|
+
})
|
197
|
+
|
198
|
+
all_dfs.append(new_bar)
|
199
|
+
|
200
|
+
all_dfs = pd.DataFrame(all_dfs)
|
201
|
+
all_dfs["unique_identifier"] = bars_df["unique_identifier"].iloc[0]
|
202
|
+
all_dfs = all_dfs.set_index("time")
|
203
|
+
|
204
|
+
return all_dfs
|
205
|
+
|
206
|
+
|
207
|
+
def get_interpolated_upsampled_bars(
|
208
|
+
self,
|
209
|
+
calendar: str,
|
210
|
+
tmp_df: pd.DataFrame,
|
211
|
+
last_observation: Union[None, pd.Series] = None
|
212
|
+
) -> pd.DataFrame:
|
213
|
+
"""
|
214
|
+
Gets interpolated and upsampled bars based on the given parameters.
|
215
|
+
First interpolates the data to fill any gaps, then upsamples it to the desired frequency.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
calendar (str): Trading calendar for interpolation and upsampling.
|
219
|
+
tmp_df (pd.DataFrame): Dataframe containing the bars to be processed.
|
220
|
+
last_observation (Union[None, pd.Series], optional): Last observed data to fill gaps.
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
pd.DataFrame: Interpolated and upsampled bars dataframe.
|
224
|
+
"""
|
225
|
+
for col in self.TIMESTAMP_COLS:
|
226
|
+
|
227
|
+
try:
|
228
|
+
if col in tmp_df.columns:
|
229
|
+
s = pd.to_numeric(tmp_df[col], errors="coerce")
|
230
|
+
digits = s.dropna().abs().astype("int64").astype(str).str.len()
|
231
|
+
UNIT_BY_MIN_DIGITS = {18: "ns", 15: "us", 12: "ms", 10: "s", 0: "m"}
|
232
|
+
d = int(digits.mode().iat[0])
|
233
|
+
|
234
|
+
unit = UNIT_BY_MIN_DIGITS[max(k for k in UNIT_BY_MIN_DIGITS if d >= k)]
|
235
|
+
tmp_df[col] = pd.to_datetime(s, unit=unit, utc=True)
|
236
|
+
|
237
|
+
except Exception as e:
|
238
|
+
raise e
|
239
|
+
|
240
|
+
if "d" in self.bar_frequency_id:
|
241
|
+
tmp_df = interpolate_daily_bars(
|
242
|
+
bars_df=tmp_df.copy(),
|
243
|
+
interpolation_rule=self.intraday_bar_interpolation_rule,
|
244
|
+
calendar=calendar,
|
245
|
+
last_observation=last_observation,
|
246
|
+
)
|
247
|
+
elif "m" in self.bar_frequency_id:
|
248
|
+
bars_frequency_min = string_frequency_to_minutes(self.bar_frequency_id)
|
249
|
+
|
250
|
+
# Interpolation to fill gaps
|
251
|
+
tmp_df = interpolate_intraday_bars(
|
252
|
+
bars_df=tmp_df.copy(),
|
253
|
+
interpolation_rule=self.intraday_bar_interpolation_rule,
|
254
|
+
calendar=calendar,
|
255
|
+
bars_frequency_min=bars_frequency_min,
|
256
|
+
last_observation=last_observation,
|
257
|
+
)
|
258
|
+
|
259
|
+
if len(tmp_df) == 0:
|
260
|
+
return tmp_df
|
261
|
+
|
262
|
+
assert tmp_df.isnull().sum()[["close", "open"]].sum() == 0
|
263
|
+
|
264
|
+
# Upsample to the correct frequency
|
265
|
+
if "d" in self.bar_frequency_id:
|
266
|
+
all_columns = self.TIMESTAMP_COLS
|
267
|
+
upsampled_df = tmp_df
|
268
|
+
else:
|
269
|
+
upsample_freq_obs = string_frequency_to_minutes(self.upsample_frequency_id) // bars_frequency_min
|
270
|
+
|
271
|
+
if upsample_freq_obs > bars_frequency_min:
|
272
|
+
upsampled_df = UpsampleAndInterpolation.upsample_bars(
|
273
|
+
bars_df=tmp_df,
|
274
|
+
upsample_frequency_obs=upsample_freq_obs,
|
275
|
+
upsample_frequency_td=self.upsample_frequency_td,
|
276
|
+
calendar=calendar,
|
277
|
+
is_portfolio=False,
|
278
|
+
open_to_close_time_delta=datetime.timedelta(minutes=bars_frequency_min),
|
279
|
+
)
|
280
|
+
else:
|
281
|
+
upsampled_df = tmp_df
|
282
|
+
all_columns = self.TIMESTAMP_COLS + ["trade_day"]
|
283
|
+
# Keep everything as timezone-aware datetimes.
|
284
|
+
for col in all_columns:
|
285
|
+
if col in upsampled_df.columns:
|
286
|
+
upsampled_df[col] = pd.to_datetime(upsampled_df[col]).astype(np.int64).values
|
287
|
+
|
288
|
+
return upsampled_df
|
289
|
+
|
290
|
+
|
291
|
+
def interpolate_daily_bars(
|
292
|
+
bars_df: pd.DataFrame,
|
293
|
+
interpolation_rule: str,
|
294
|
+
calendar: str,
|
295
|
+
last_observation: Union[None, pd.Series] = None, #fix annotation
|
296
|
+
):
|
297
|
+
try:
|
298
|
+
calendar_name = getattr(calendar, "name", calendar)
|
299
|
+
except Exception as e:
|
300
|
+
raise e
|
301
|
+
|
302
|
+
def rebase_with_forward_fill(bars_df, last_observation):
|
303
|
+
try:
|
304
|
+
if last_observation is not None:
|
305
|
+
if "interpolated" in last_observation.columns:
|
306
|
+
last_observation = last_observation.drop(columns="interpolated")
|
307
|
+
|
308
|
+
bars_df = pd.concat([last_observation, bars_df], axis=0).sort_index()
|
309
|
+
if "unique_identifier" in bars_df.columns:
|
310
|
+
bars_df.loc[:, ['unique_identifier']] = bars_df[
|
311
|
+
['unique_identifier']
|
312
|
+
].bfill().ffill()
|
313
|
+
|
314
|
+
null_index = bars_df.isnull().any(axis=1)
|
315
|
+
bars_df["close"] = bars_df["close"].ffill()
|
316
|
+
bars_df["open"] = bars_df["open"].where(~null_index, bars_df["close"])
|
317
|
+
try:
|
318
|
+
bars_df.volume = bars_df.volume.fillna(0)
|
319
|
+
except Exception as e:
|
320
|
+
raise e
|
321
|
+
if "vwap" in bars_df.columns:
|
322
|
+
bars_df.vwap = bars_df.vwap.ffill()
|
323
|
+
if "trade_count" in bars_df.columns:
|
324
|
+
bars_df.trade_count = bars_df.trade_count.fillna(0)
|
325
|
+
|
326
|
+
if len(null_index) > 0:
|
327
|
+
if "high" in bars_df.columns:
|
328
|
+
bars_df["high"] = bars_df["high"].where(~null_index, bars_df["close"])
|
329
|
+
bars_df["low"] = bars_df["low"].where(~null_index, bars_df["close"])
|
330
|
+
|
331
|
+
bars_df["interpolated"] = False
|
332
|
+
bars_df.loc[null_index, "interpolated"] = True
|
333
|
+
|
334
|
+
else:
|
335
|
+
bars_df["interpolated"] = False
|
336
|
+
|
337
|
+
if last_observation is not None:
|
338
|
+
bars_df = bars_df.iloc[1:]
|
339
|
+
except Exception as e:
|
340
|
+
raise e
|
341
|
+
|
342
|
+
return bars_df
|
343
|
+
|
344
|
+
|
345
|
+
# Extend the schedule window so midnight stamps on non-trading days can fill to the next session close
|
346
|
+
start_date = bars_df.index.min().date().isoformat()
|
347
|
+
end_date = (pd.Timestamp(bars_df.index.max()).normalize()).date().isoformat()
|
348
|
+
restricted_schedule = _get_schedule_cached(calendar_name, start_date, end_date)
|
349
|
+
|
350
|
+
restricted_schedule = restricted_schedule.reset_index()
|
351
|
+
market_type = "market_close"
|
352
|
+
|
353
|
+
restricted_schedule = restricted_schedule.set_index(market_type)
|
354
|
+
full_index = bars_df.index.union(restricted_schedule.index)
|
355
|
+
|
356
|
+
bars_df = bars_df.reindex(full_index)
|
357
|
+
|
358
|
+
if interpolation_rule == "None":
|
359
|
+
pass
|
360
|
+
elif interpolation_rule == "ffill":
|
361
|
+
bars_df = rebase_with_forward_fill(bars_df, last_observation=last_observation)
|
362
|
+
if last_observation is None:
|
363
|
+
bars_df = bars_df.bfill()
|
364
|
+
else:
|
365
|
+
raise Exception
|
366
|
+
|
367
|
+
if len(bars_df):
|
368
|
+
last_observation = bars_df.iloc[[-1]]
|
369
|
+
|
370
|
+
if len(bars_df) == 0:
|
371
|
+
return pd.DataFrame()
|
372
|
+
|
373
|
+
bars_df = bars_df[bars_df.index.isin(restricted_schedule.index)]
|
374
|
+
|
375
|
+
null_index = bars_df[bars_df["open_time"].isnull()].index
|
376
|
+
if len(null_index) > 0:
|
377
|
+
|
378
|
+
# Use the market_open that corresponds to each market_close index
|
379
|
+
bars_df.loc[null_index, "open_time"] = restricted_schedule.loc[null_index, "market_open"]
|
380
|
+
|
381
|
+
return bars_df
|
382
|
+
|
383
|
+
|
384
|
+
def interpolate_intraday_bars(
|
385
|
+
bars_df: pd.DataFrame,
|
386
|
+
interpolation_rule: str,
|
387
|
+
bars_frequency_min: int,
|
388
|
+
calendar: str,
|
389
|
+
last_observation: Union[None, pd.Series] = None,
|
390
|
+
) -> pd.DataFrame:
|
391
|
+
"""
|
392
|
+
Interpolates intraday bars based on the given parameters. Fills in missing data points in intraday bar data in case of gaps.
|
393
|
+
"""
|
394
|
+
calendar_instance = mcal.get_calendar(calendar.name)
|
395
|
+
|
396
|
+
def build_daily_range_from_schedule(start, end):
|
397
|
+
return pd.date_range(start=start, end=end, freq=f"{bars_frequency_min}min")
|
398
|
+
|
399
|
+
def sanitize_today_update(x: pd.DataFrame, date_range, day):
|
400
|
+
# normalize to UTC for consistent “today” comparison
|
401
|
+
today_utc =datetime.datetime.utcnow()
|
402
|
+
day_utc = day
|
403
|
+
|
404
|
+
if day.date() == today_utc.date():
|
405
|
+
x.index.name = None
|
406
|
+
if len(x.index) > 0:
|
407
|
+
last = x.index.max()
|
408
|
+
date_range = [i for i in date_range if i <= last]
|
409
|
+
return date_range
|
410
|
+
|
411
|
+
def rebase_withoutnan_fill(x, trade_starts, trade_ends):
|
412
|
+
date_range = build_daily_range_from_schedule(trade_starts, trade_ends)
|
413
|
+
date_range = sanitize_today_update(x=x, date_range=date_range,day=trade_starts)
|
414
|
+
x = x.reindex(date_range)
|
415
|
+
return x
|
416
|
+
|
417
|
+
def rebase_with_forward_fill(x, trade_starts, trade_ends, last_observation):
|
418
|
+
is_start_of_day = False
|
419
|
+
if (x.shape[0] == 1) and x.index[0].hour == 0:
|
420
|
+
is_start_of_day = True
|
421
|
+
x["interpolated"] = False
|
422
|
+
if not is_start_of_day:
|
423
|
+
date_range = build_daily_range_from_schedule(trade_starts, trade_ends)
|
424
|
+
date_range = sanitize_today_update(x=x, date_range=date_range,day=trade_starts)
|
425
|
+
try:
|
426
|
+
x = x.reindex(date_range)
|
427
|
+
|
428
|
+
if last_observation is not None:
|
429
|
+
if "interpolated" in x.columns:
|
430
|
+
last_observation = last_observation.drop(columns="interpolated")
|
431
|
+
x = pd.concat([last_observation, x], axis=0)
|
432
|
+
|
433
|
+
null_index = x[x["close"].isnull()].index
|
434
|
+
x.close = x.close.ffill()
|
435
|
+
x.loc[null_index, "open"] = x.loc[null_index, "close"]
|
436
|
+
x.volume = x.volume.fillna(0)
|
437
|
+
x.vwap = x.vwap.ffill()
|
438
|
+
if "trade_count" in x.columns:
|
439
|
+
x.trade_count = x.trade_count.fillna(0)
|
440
|
+
x["interpolated"] = False
|
441
|
+
if len(null_index) > 0:
|
442
|
+
if "high" in x.columns:
|
443
|
+
x.loc[null_index, "high"] = x.loc[null_index, "close"]
|
444
|
+
x.loc[null_index, "low"] = x.loc[null_index, "close"]
|
445
|
+
|
446
|
+
x.loc[null_index, "interpolated"] = True
|
447
|
+
|
448
|
+
if last_observation is not None:
|
449
|
+
x = x.iloc[1:]
|
450
|
+
except Exception as e:
|
451
|
+
raise e
|
452
|
+
|
453
|
+
# interpolate any other columns with 0
|
454
|
+
|
455
|
+
return x
|
456
|
+
|
457
|
+
full_index = bars_df.index
|
458
|
+
|
459
|
+
try:
|
460
|
+
# because index are closes the greates value should be the open time of the last close to do not extra interpolate
|
461
|
+
|
462
|
+
restricted_schedule = calendar_instance.schedule(bars_df.index.min(),
|
463
|
+
bars_df.iloc[-1]["open_time"]) # This needs to be faster
|
464
|
+
except Exception as e:
|
465
|
+
raise e
|
466
|
+
|
467
|
+
bars_df = bars_df[~bars_df.index.duplicated(keep='first')] # todo: remove uncessary with indices.
|
468
|
+
|
469
|
+
full_index = bars_df.index.union(restricted_schedule.set_index("market_open").index).union(
|
470
|
+
restricted_schedule.set_index("market_close").index)
|
471
|
+
|
472
|
+
|
473
|
+
bars_df = bars_df.reindex(full_index)
|
474
|
+
|
475
|
+
bars_df["trade_day"] = bars_df.index
|
476
|
+
bars_df["trade_day"] = pd.to_datetime(bars_df.index, utc=True).normalize()
|
477
|
+
|
478
|
+
|
479
|
+
groups = bars_df.groupby("trade_day")
|
480
|
+
interpolated_data = []
|
481
|
+
|
482
|
+
for day, group_df in groups:
|
483
|
+
schedule = calendar_instance.schedule(start_date=day, end_date=day)
|
484
|
+
if schedule.shape[0] == 0:
|
485
|
+
continue
|
486
|
+
try:
|
487
|
+
trade_starts = schedule["market_open"].iloc[0]
|
488
|
+
trade_ends = schedule["market_close"].iloc[0]
|
489
|
+
except Exception as e:
|
490
|
+
raise e
|
491
|
+
|
492
|
+
group_df = group_df[group_df.index >= schedule["market_open"].iloc[0]]
|
493
|
+
group_df = group_df[group_df.index <= schedule["market_close"].iloc[0]]
|
494
|
+
|
495
|
+
if group_df.dropna().shape[0] == 0:
|
496
|
+
continue
|
497
|
+
|
498
|
+
if trade_starts < day:
|
499
|
+
trade_starts = day
|
500
|
+
next_day = day + datetime.timedelta(days=1)
|
501
|
+
if trade_ends >= next_day:
|
502
|
+
trade_ends = next_day - datetime.timedelta(minutes=1)
|
503
|
+
|
504
|
+
if interpolation_rule == "None":
|
505
|
+
tmp_df = rebase_withoutnan_fill(group_df, trade_starts=trade_starts, trade_ends=trade_ends)
|
506
|
+
elif interpolation_rule == "ffill":
|
507
|
+
tmp_df = rebase_with_forward_fill(
|
508
|
+
group_df,
|
509
|
+
trade_starts=trade_starts,
|
510
|
+
last_observation=last_observation,
|
511
|
+
trade_ends=trade_ends
|
512
|
+
)
|
513
|
+
if last_observation is None:
|
514
|
+
tmp_df = tmp_df.bfill()
|
515
|
+
else:
|
516
|
+
raise Exception
|
517
|
+
|
518
|
+
if len(tmp_df):
|
519
|
+
last_observation = tmp_df.iloc[[-1]]
|
520
|
+
interpolated_data.append(tmp_df)
|
521
|
+
|
522
|
+
if len(interpolated_data) == 0:
|
523
|
+
return pd.DataFrame()
|
524
|
+
|
525
|
+
interpolated_data = pd.concat(interpolated_data, axis=0)
|
526
|
+
interpolated_data["trade_day"] = interpolated_data.index
|
527
|
+
interpolated_data["trade_day"] = interpolated_data["trade_day"].apply(
|
528
|
+
lambda x: x.replace(hour=0, minute=0, second=0)
|
529
|
+
)
|
530
|
+
|
531
|
+
return interpolated_data
|
532
|
+
|
533
|
+
|
534
|
+
class InterpolatedPrices(DataNode):
|
535
|
+
"""
|
536
|
+
Handles interpolated prices for assets.
|
537
|
+
"""
|
538
|
+
OFFSET_START = datetime.datetime(2017, 7, 20).replace(tzinfo=pytz.utc)
|
539
|
+
_ARGS_IGNORE_IN_STORAGE_HASH=["asset_category_unique_id", "asset_list"]
|
540
|
+
def __init__(
|
541
|
+
self,
|
542
|
+
bar_frequency_id: str,
|
543
|
+
intraday_bar_interpolation_rule: str,
|
544
|
+
asset_category_unique_id: Optional[str] = None,
|
545
|
+
upsample_frequency_id: Optional[str] = None,
|
546
|
+
asset_list: List = None,
|
547
|
+
translation_table_unique_id: Optional[str] = None,
|
548
|
+
source_bars_data_node:Optional[DataNode] = None,
|
549
|
+
*args,
|
550
|
+
**kwargs
|
551
|
+
):
|
552
|
+
"""
|
553
|
+
Initializes the InterpolatedPrices object.
|
554
|
+
"""
|
555
|
+
assert "d" in bar_frequency_id or "m" in bar_frequency_id, f"bar_frequency_id={bar_frequency_id} should be 'd for days' or 'm for min'"
|
556
|
+
if source_bars_data_node is None:
|
557
|
+
if asset_category_unique_id is None:
|
558
|
+
assert asset_list is not None, f"asset_category_unique_id={asset_category_unique_id} should not be None or asset_list should be defined"
|
559
|
+
|
560
|
+
|
561
|
+
|
562
|
+
self.asset_category_unique_id = asset_category_unique_id
|
563
|
+
self.interpolator = UpsampleAndInterpolation(
|
564
|
+
bar_frequency_id=bar_frequency_id,
|
565
|
+
upsample_frequency_id=upsample_frequency_id,
|
566
|
+
intraday_bar_interpolation_rule=intraday_bar_interpolation_rule
|
567
|
+
)
|
568
|
+
self.constructor_asset_list = asset_list
|
569
|
+
bars_frequency_min = string_frequency_to_minutes(bar_frequency_id)
|
570
|
+
self.maximum_forward_fill = datetime.timedelta(minutes=bars_frequency_min) - TIMEDELTA
|
571
|
+
|
572
|
+
self.intraday_bar_interpolation_rule = intraday_bar_interpolation_rule
|
573
|
+
self.bar_frequency_id = bar_frequency_id
|
574
|
+
self.upsample_frequency_id = upsample_frequency_id
|
575
|
+
self.source_bars_data_node=source_bars_data_node
|
576
|
+
# get the translation rules
|
577
|
+
if source_bars_data_node is None:
|
578
|
+
if translation_table_unique_id is None:
|
579
|
+
raise Exception(f"Translation table needs to be set")
|
580
|
+
translation_table = AssetTranslationTable.get(unique_identifier=translation_table_unique_id)
|
581
|
+
|
582
|
+
self.bars_ts = WrapperDataNode(translation_table=translation_table)
|
583
|
+
else:
|
584
|
+
self.bars_ts = source_bars_data_node
|
585
|
+
|
586
|
+
super().__init__(*args, **kwargs)
|
587
|
+
|
588
|
+
|
589
|
+
def dependencies(self):
|
590
|
+
return {"bars_ts":self.bars_ts}
|
591
|
+
|
592
|
+
def get_string_frequency_to_minutes(self):
|
593
|
+
return string_frequency_to_minutes(self.bar_frequency_id)
|
594
|
+
|
595
|
+
def _get_required_cores(self, last_observation_map) -> int:
|
596
|
+
"""
|
597
|
+
Determines the required number of cores for processing.
|
598
|
+
"""
|
599
|
+
if len(last_observation_map) == 0:
|
600
|
+
required = 1
|
601
|
+
else:
|
602
|
+
required = min(len(last_observation_map), 20)
|
603
|
+
|
604
|
+
return required
|
605
|
+
|
606
|
+
def run_post_update_routines(self, error_on_last_update):
|
607
|
+
if not self.local_persist_manager.metadata.protect_from_deletion:
|
608
|
+
self.local_persist_manager.protect_from_deletion()
|
609
|
+
|
610
|
+
def _transform_raw_data_to_upsampled_df(
|
611
|
+
self,
|
612
|
+
raw_data_df: pd.DataFrame,
|
613
|
+
) -> pd.DataFrame:
|
614
|
+
"""
|
615
|
+
Transforms raw data into an upsampled dataframe.
|
616
|
+
"""
|
617
|
+
upsampled_df = []
|
618
|
+
|
619
|
+
# TODO this should be a helper function
|
620
|
+
unique_identifier_range_map =self.update_statistics.get_update_range_map_great_or_equal()
|
621
|
+
full_last_observation = self.get_ranged_data_per_asset (range_descriptor=unique_identifier_range_map)
|
622
|
+
last_observation_map = {}
|
623
|
+
|
624
|
+
for unique_identifier in raw_data_df["unique_identifier"].unique():
|
625
|
+
if full_last_observation is None or full_last_observation.empty:
|
626
|
+
last_observation_map[unique_identifier] = None
|
627
|
+
continue
|
628
|
+
|
629
|
+
if unique_identifier in full_last_observation.index.get_level_values("unique_identifier").to_list():
|
630
|
+
last_obs = full_last_observation.loc[(slice(None), unique_identifier), :].reset_index(
|
631
|
+
["unique_identifier"], drop=True
|
632
|
+
)
|
633
|
+
last_obs.index.name = None
|
634
|
+
if "open_time" in last_obs.columns:
|
635
|
+
last_obs["open_time"] = pd.to_datetime(last_obs["open_time"], utc=True)
|
636
|
+
last_observation_map[unique_identifier] = last_obs
|
637
|
+
else:
|
638
|
+
last_observation_map[unique_identifier] = None
|
639
|
+
|
640
|
+
def multiproc_upsample(calendar, tmp_df, unique_identifier, last_observation, interpolator_kwargs):
|
641
|
+
interpolator = UpsampleAndInterpolation(**interpolator_kwargs)
|
642
|
+
df = interpolator.get_interpolated_upsampled_bars(
|
643
|
+
calendar=calendar,
|
644
|
+
tmp_df=tmp_df,
|
645
|
+
last_observation=last_observation_map[unique_identifier]
|
646
|
+
)
|
647
|
+
df["unique_identifier"] = unique_identifier
|
648
|
+
return df
|
649
|
+
|
650
|
+
required_cores = self._get_required_cores(last_observation_map=last_observation_map)
|
651
|
+
required_cores = 1
|
652
|
+
if required_cores == 1:
|
653
|
+
# Single-core processing
|
654
|
+
for unique_identifier, df in raw_data_df.groupby("unique_identifier"):
|
655
|
+
if df.shape[0] > 1:
|
656
|
+
df = self.interpolator.get_interpolated_upsampled_bars(
|
657
|
+
calendar=self.asset_calendar_map[unique_identifier],
|
658
|
+
tmp_df=df,
|
659
|
+
last_observation=last_observation_map[unique_identifier],
|
660
|
+
)
|
661
|
+
df["unique_identifier"] = unique_identifier
|
662
|
+
upsampled_df.append(df)
|
663
|
+
else:
|
664
|
+
upsampled_df = Parallel(n_jobs=required_cores)(
|
665
|
+
delayed(multiproc_upsample)(
|
666
|
+
calendar=self.asset_calendar_map[unique_identifier],
|
667
|
+
tmp_df=tmp_df,
|
668
|
+
unique_identifier=unique_identifier,
|
669
|
+
last_observation=last_observation_map[unique_identifier],
|
670
|
+
interpolator_kwargs=dict(
|
671
|
+
bar_frequency_id=self.bar_frequency_id,
|
672
|
+
upsample_frequency_id=self.upsample_frequency_id,
|
673
|
+
intraday_bar_interpolation_rule=self.intraday_bar_interpolation_rule,
|
674
|
+
)
|
675
|
+
)
|
676
|
+
for unique_identifier, tmp_df in raw_data_df.groupby("unique_identifier") if tmp_df.shape[0] > 0
|
677
|
+
)
|
678
|
+
|
679
|
+
upsampled_df = [d for d in upsampled_df if len(d) > 0] # Remove empty dataframes
|
680
|
+
if len(upsampled_df) == 0:
|
681
|
+
return pd.DataFrame()
|
682
|
+
|
683
|
+
max_value_per_asset = {d.index.max(): d.unique_identifier.iloc[0] for d in upsampled_df}
|
684
|
+
|
685
|
+
min_end_time = min(max_value_per_asset.keys())
|
686
|
+
max_end_time = max(max_value_per_asset.keys())
|
687
|
+
self.logger.info(
|
688
|
+
"Upsampled window aligned: min_end=%s (asset=%s), max_end=%s",
|
689
|
+
min_end_time.isoformat(), max_value_per_asset[min_end_time], max_end_time.isoformat()
|
690
|
+
)
|
691
|
+
upsampled_df = pd.concat(upsampled_df, axis=0)
|
692
|
+
# upsampled_df = upsampled_df[upsampled_df.index <= min_max]
|
693
|
+
upsampled_df.volume = upsampled_df.volume.fillna(0)
|
694
|
+
|
695
|
+
upsampled_df.index.name = "time_index"
|
696
|
+
upsampled_df = upsampled_df.set_index("unique_identifier", append=True)
|
697
|
+
upsampled_df = upsampled_df.sort_index(level=0)
|
698
|
+
|
699
|
+
if upsampled_df.shape[0] == 0:
|
700
|
+
upsampled_df = pd.DataFrame()
|
701
|
+
|
702
|
+
return upsampled_df
|
703
|
+
|
704
|
+
def get_upsampled_data(self) -> pd.DataFrame:
|
705
|
+
"""
|
706
|
+
Main method to get upsampled data for prices.
|
707
|
+
"""
|
708
|
+
unique_identifier_range_map = self.update_statistics.get_update_range_map_great_or_equal()
|
709
|
+
|
710
|
+
|
711
|
+
raw_data_df = self.bars_ts.get_ranged_data_per_asset(range_descriptor=unique_identifier_range_map)
|
712
|
+
if raw_data_df.empty:
|
713
|
+
self.logger.info("No new data to interpolate")
|
714
|
+
return pd.DataFrame()
|
715
|
+
|
716
|
+
upsampled_df = self._transform_raw_data_to_upsampled_df(raw_data_df.reset_index(["unique_identifier"]))
|
717
|
+
return upsampled_df
|
718
|
+
|
719
|
+
def get_asset_list(self):
|
720
|
+
"""
|
721
|
+
Creates mappings from symbols to IDs
|
722
|
+
"""
|
723
|
+
if self.source_bars_data_node is not None:
|
724
|
+
return self.bars_ts.get_asset_list()
|
725
|
+
|
726
|
+
if self.constructor_asset_list is not None:
|
727
|
+
asset_list= self.constructor_asset_list
|
728
|
+
else:
|
729
|
+
asset_category = AssetCategory.get(unique_identifier=self.asset_category_unique_id)
|
730
|
+
asset_list = Asset.filter(id__in=asset_category.assets)
|
731
|
+
|
732
|
+
return asset_list
|
733
|
+
|
734
|
+
def update(self) -> pd.DataFrame:
|
735
|
+
"""
|
736
|
+
Updates the series from the source based on the latest value.
|
737
|
+
"""
|
738
|
+
self.asset_calendar_map = {a.unique_identifier: a.get_calendar() for a in self.update_statistics.asset_list}
|
739
|
+
prices = self.get_upsampled_data()
|
740
|
+
if prices.shape[0] == 0:
|
741
|
+
return pd.DataFrame()
|
742
|
+
|
743
|
+
if self.update_statistics.is_empty() == False:
|
744
|
+
TARGET_COLS = ['open', 'close', 'high', 'low', 'volume', 'open_time']
|
745
|
+
assert prices[[c for c in prices.columns if c in TARGET_COLS]].isnull().sum().sum() == 0
|
746
|
+
|
747
|
+
prices = self.update_statistics.filter_df_by_latest_value(prices)
|
748
|
+
|
749
|
+
duplicates_exist = prices.reset_index().duplicated(subset=["time_index", "unique_identifier"]).any()
|
750
|
+
if duplicates_exist:
|
751
|
+
raise Exception()
|
752
|
+
|
753
|
+
# adapt to InterpolatedPrices data schema
|
754
|
+
if "vwap" not in prices.columns:
|
755
|
+
self.logger.warning("vwap not calculated in prices, set to NaN")
|
756
|
+
prices["vwap"] = np.nan
|
757
|
+
if "trade_count" not in prices.columns:
|
758
|
+
self.logger.warning("trade_count not calculated in prices, set to NaN")
|
759
|
+
prices["trade_count"] = np.nan
|
760
|
+
|
761
|
+
prices = prices[['open_time', 'open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap', 'interpolated']]
|
762
|
+
return prices
|
763
|
+
|
764
|
+
class ExternalPrices(DataNode):
|
765
|
+
|
766
|
+
def __init__(
|
767
|
+
self,
|
768
|
+
artifact_name: str,
|
769
|
+
bucket_name: str,
|
770
|
+
asset_category_unique_id,
|
771
|
+
*args,
|
772
|
+
**kwargs
|
773
|
+
):
|
774
|
+
self.artifact_name = artifact_name
|
775
|
+
self.bucket_name = bucket_name
|
776
|
+
self.asset_category_unique_id = asset_category_unique_id
|
777
|
+
super().__init__(*args, **kwargs)
|
778
|
+
|
779
|
+
def get_asset_list(self):
|
780
|
+
"""
|
781
|
+
Creates mappings from symbols to IDs
|
782
|
+
"""
|
783
|
+
asset_category = AssetCategory.get(unique_identifier=self.asset_category_unique_id)
|
784
|
+
asset_list = Asset.filter(id__in=asset_category.assets)
|
785
|
+
return asset_list
|
786
|
+
|
787
|
+
def update(self) -> pd.DataFrame:
|
788
|
+
from mainsequence.client.models_tdag import Artifact
|
789
|
+
source_artifact = Artifact.get(bucket__name=self.bucket_name, name=self.artifact_name)
|
790
|
+
prices_source = pd.read_csv(source_artifact.content)
|
791
|
+
|
792
|
+
expected_cols = [
|
793
|
+
"time_index",
|
794
|
+
"figi",
|
795
|
+
"price",
|
796
|
+
]
|
797
|
+
prices_source = prices_source[expected_cols].copy()
|
798
|
+
prices_source["time_index"] = pd.to_datetime(
|
799
|
+
prices_source["time_index"], utc=True
|
800
|
+
)
|
801
|
+
|
802
|
+
# convert figis in source data
|
803
|
+
for asset in self.update_statistics.asset_list:
|
804
|
+
prices_source.loc[prices_source["figi"] == asset.figi, "unique_identifier"] = asset.unique_identifier
|
805
|
+
|
806
|
+
prices_source.set_index(["time_index", "unique_identifier"], inplace=True)
|
807
|
+
prices = self.update_statistics.filter_df_by_latest_value(prices_source)
|
808
|
+
|
809
|
+
prices = prices.rename(columns={"price": "open"})[["open"]]
|
810
|
+
return prices
|