mainsequence 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. mainsequence/__init__.py +0 -0
  2. mainsequence/__main__.py +9 -0
  3. mainsequence/cli/__init__.py +1 -0
  4. mainsequence/cli/api.py +157 -0
  5. mainsequence/cli/cli.py +442 -0
  6. mainsequence/cli/config.py +78 -0
  7. mainsequence/cli/ssh_utils.py +126 -0
  8. mainsequence/client/__init__.py +17 -0
  9. mainsequence/client/base.py +431 -0
  10. mainsequence/client/data_sources_interfaces/__init__.py +0 -0
  11. mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
  12. mainsequence/client/data_sources_interfaces/timescale.py +479 -0
  13. mainsequence/client/models_helpers.py +113 -0
  14. mainsequence/client/models_report_studio.py +412 -0
  15. mainsequence/client/models_tdag.py +2276 -0
  16. mainsequence/client/models_vam.py +1983 -0
  17. mainsequence/client/utils.py +387 -0
  18. mainsequence/dashboards/__init__.py +0 -0
  19. mainsequence/dashboards/streamlit/__init__.py +0 -0
  20. mainsequence/dashboards/streamlit/assets/config.toml +12 -0
  21. mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
  22. mainsequence/dashboards/streamlit/assets/logo.png +0 -0
  23. mainsequence/dashboards/streamlit/core/__init__.py +0 -0
  24. mainsequence/dashboards/streamlit/core/theme.py +212 -0
  25. mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
  26. mainsequence/dashboards/streamlit/scaffold.py +220 -0
  27. mainsequence/instrumentation/__init__.py +7 -0
  28. mainsequence/instrumentation/utils.py +101 -0
  29. mainsequence/instruments/__init__.py +1 -0
  30. mainsequence/instruments/data_interface/__init__.py +10 -0
  31. mainsequence/instruments/data_interface/data_interface.py +361 -0
  32. mainsequence/instruments/instruments/__init__.py +3 -0
  33. mainsequence/instruments/instruments/base_instrument.py +85 -0
  34. mainsequence/instruments/instruments/bond.py +447 -0
  35. mainsequence/instruments/instruments/european_option.py +74 -0
  36. mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
  37. mainsequence/instruments/instruments/json_codec.py +585 -0
  38. mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
  39. mainsequence/instruments/instruments/position.py +475 -0
  40. mainsequence/instruments/instruments/ql_fields.py +239 -0
  41. mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
  42. mainsequence/instruments/pricing_models/__init__.py +0 -0
  43. mainsequence/instruments/pricing_models/black_scholes.py +49 -0
  44. mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
  45. mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
  46. mainsequence/instruments/pricing_models/indices.py +350 -0
  47. mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
  48. mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
  49. mainsequence/instruments/settings.py +175 -0
  50. mainsequence/instruments/utils.py +29 -0
  51. mainsequence/logconf.py +284 -0
  52. mainsequence/reportbuilder/__init__.py +0 -0
  53. mainsequence/reportbuilder/__main__.py +0 -0
  54. mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
  55. mainsequence/reportbuilder/model.py +713 -0
  56. mainsequence/reportbuilder/slide_templates.py +532 -0
  57. mainsequence/tdag/__init__.py +8 -0
  58. mainsequence/tdag/__main__.py +0 -0
  59. mainsequence/tdag/config.py +129 -0
  60. mainsequence/tdag/data_nodes/__init__.py +12 -0
  61. mainsequence/tdag/data_nodes/build_operations.py +751 -0
  62. mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
  63. mainsequence/tdag/data_nodes/persist_managers.py +812 -0
  64. mainsequence/tdag/data_nodes/run_operations.py +543 -0
  65. mainsequence/tdag/data_nodes/utils.py +24 -0
  66. mainsequence/tdag/future_registry.py +25 -0
  67. mainsequence/tdag/utils.py +40 -0
  68. mainsequence/virtualfundbuilder/__init__.py +45 -0
  69. mainsequence/virtualfundbuilder/__main__.py +235 -0
  70. mainsequence/virtualfundbuilder/agent_interface.py +77 -0
  71. mainsequence/virtualfundbuilder/config_handling.py +86 -0
  72. mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
  73. mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
  74. mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
  75. mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
  76. mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
  77. mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
  78. mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
  79. mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
  80. mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
  81. mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
  82. mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
  83. mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
  84. mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
  85. mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
  86. mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
  87. mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
  88. mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
  89. mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
  90. mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
  91. mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
  92. mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
  93. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
  94. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
  95. mainsequence/virtualfundbuilder/data_nodes.py +637 -0
  96. mainsequence/virtualfundbuilder/enums.py +23 -0
  97. mainsequence/virtualfundbuilder/models.py +282 -0
  98. mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
  99. mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
  100. mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
  101. mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
  102. mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
  103. mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
  104. mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
  105. mainsequence/virtualfundbuilder/utils.py +381 -0
  106. mainsequence-2.0.0.dist-info/METADATA +105 -0
  107. mainsequence-2.0.0.dist-info/RECORD +110 -0
  108. mainsequence-2.0.0.dist-info/WHEEL +5 -0
  109. mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
  110. mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,810 @@
1
+ import copy
2
+
3
+ import pytz
4
+ from typing import Union, Dict, List, Literal, Optional,Tuple, Any
5
+ import pandas as pd
6
+ import numpy as np
7
+ import datetime
8
+ import pandas_market_calendars as mcal
9
+
10
+ from mainsequence.tdag.data_nodes import DataNode, WrapperDataNode, APIDataNode
11
+ from mainsequence.client import (CONSTANTS, LocalTimeSeriesDoesNotExist, LocalTimeSerie, DynamicTableDataSource,
12
+ UpdateStatistics, AssetCategory, AssetTranslationTable, AssetTranslationRule, AssetFilter
13
+ )
14
+ from mainsequence.client import MARKETS_CONSTANTS, ExecutionVenue
15
+ from mainsequence.client import DoesNotExist, Asset
16
+ from mainsequence.tdag.data_nodes.utils import (
17
+ string_frequency_to_minutes,
18
+ string_freq_to_time_delta,
19
+ )
20
+ import os
21
+
22
+ from tqdm import tqdm
23
+ from joblib import Parallel, delayed
24
+
25
+ from mainsequence.virtualfundbuilder.models import AssetsConfiguration
26
+ from mainsequence.virtualfundbuilder.utils import logger, TIMEDELTA
27
+ from typing import Optional
28
+ from functools import lru_cache
29
+
30
+
31
+ FULL_CALENDAR = "24/7"
32
+
33
+ @lru_cache(maxsize=256)
34
+ def _get_calendar_by_name(calendar_name: str):
35
+ if calendar_name == FULL_CALENDAR:
36
+ return None
37
+ return mcal.get_calendar(calendar_name)
38
+
39
+ @lru_cache(maxsize=1024)
40
+ def _get_schedule_cached(calendar_name: str, start_date: str, end_date: str) -> pd.DataFrame:
41
+ """
42
+ Returns a schedule DataFrame indexed by normalized UTC session date.
43
+ start_date / end_date should be 'YYYY-MM-DD' strings for cacheability.
44
+ """
45
+ cal = _get_calendar_by_name(calendar_name)
46
+ if cal is None:
47
+ # 24/7 synthetic schedule (open at 00:00, close 23:59 UTC)
48
+ sessions = pd.date_range(start=start_date, end=end_date, freq="D", tz="UTC")
49
+ df = pd.DataFrame({
50
+ "market_open": sessions,
51
+ "market_close": sessions + pd.Timedelta(days=1) - pd.Timedelta(minutes=1),
52
+ }, index=sessions)
53
+ df.index.name = "session"
54
+ return df
55
+
56
+ sched = cal.schedule(start_date=start_date, end_date=end_date).reset_index().rename(columns={"index": "session"})
57
+ sched["session"] = pd.to_datetime(sched["session"], utc=True).dt.normalize()
58
+ sched["market_open"] = pd.to_datetime(sched["market_open"], utc=True)
59
+ sched["market_close"] = pd.to_datetime(sched["market_close"], utc=True)
60
+ return sched.set_index("session")
61
+
62
+
63
+
64
+
65
+
66
+ def get_interpolated_prices_timeseries(assets_configuration: Optional[AssetsConfiguration]=None, asset_list=None,
67
+
68
+ ):
69
+ """
70
+ Creates a Wrapper Timeseries for an asset configuration.
71
+ """
72
+
73
+
74
+ if assets_configuration is None:
75
+ assert asset_list is not None, "asset_list and assets_configuration both cant be None"
76
+ if assets_configuration is not None:
77
+ prices_configuration = copy.deepcopy(assets_configuration).prices_configuration
78
+ prices_configuration_kwargs = prices_configuration.model_dump()
79
+ prices_configuration_kwargs.pop("is_live", None)
80
+ prices_configuration_kwargs.pop("markets_time_series", None)
81
+
82
+ if asset_list is None:
83
+ return InterpolatedPrices(
84
+ asset_category_unique_id=assets_configuration.assets_category_unique_id,
85
+ **prices_configuration_kwargs
86
+ )
87
+ else:
88
+
89
+ return InterpolatedPrices(
90
+ asset_list=asset_list,
91
+ **prices_configuration_kwargs
92
+ )
93
+
94
+ raise Exception("Not implemented prices_configuration Kwargs")
95
+
96
+ class UpsampleAndInterpolation:
97
+ """
98
+ Handles upsampling and interpolation of bar data.
99
+ """
100
+ TIMESTAMP_COLS = ["first_trade_time", "last_trade_time", "open_time"]
101
+
102
+ def __init__(
103
+ self,
104
+ bar_frequency_id: str,
105
+ upsample_frequency_id: str,
106
+ intraday_bar_interpolation_rule: str,
107
+ ):
108
+ self.bar_frequency_id = bar_frequency_id
109
+ self.upsample_frequency_id = upsample_frequency_id
110
+ self.intraday_bar_interpolation_rule = intraday_bar_interpolation_rule
111
+
112
+ rows = string_frequency_to_minutes(self.upsample_frequency_id) / string_frequency_to_minutes(
113
+ self.bar_frequency_id)
114
+ assert rows.is_integer()
115
+
116
+ if "d" in self.bar_frequency_id:
117
+ assert bar_frequency_id == self.upsample_frequency_id # Upsampling for daily bars not implemented
118
+
119
+ self.upsample_frequency_td = string_freq_to_time_delta(self.upsample_frequency_id)
120
+
121
+
122
+ @staticmethod
123
+ def upsample_bars(
124
+ bars_df: pd.DataFrame,
125
+ upsample_frequency_obs: int,
126
+ upsample_frequency_td: object,
127
+ calendar: str,
128
+ open_to_close_time_delta: datetime.timedelta,
129
+ is_portfolio: bool = False
130
+ ) -> pd.DataFrame:
131
+ """
132
+ Upsamples the bars dataframe based on the given parameters.
133
+ For example, it can convert 5-minute bars to 1-minute bars.
134
+ Note that this method works on iloc as the underlying data should be already interpolated so should be completed
135
+
136
+
137
+ Args:
138
+ bars_df (pd.DataFrame): The bars data to be upsampled.
139
+ upsample_frequency_obs (int): Frequency for upsampling.
140
+ upsample_frequency_td (object): Time delta for upsampling.
141
+ calendar (str): Trading calendar to account for trading hours.
142
+ open_to_close_time_delta (datetime.timedelta): Time delta between open and close.
143
+ is_portfolio (bool): Whether the data is for a portfolio or a single asset.
144
+
145
+ Returns:
146
+ pd.DataFrame: The upsampled bars dataframe.
147
+ """
148
+ obs = bars_df.shape[0] / upsample_frequency_obs
149
+ assert obs > 1.0
150
+
151
+ trading_halts = calendar != FULL_CALENDAR
152
+
153
+
154
+
155
+
156
+ calendar = mcal.get_calendar(calendar)
157
+
158
+ full_schedule = calendar.schedule(bars_df["trade_day"].min(), bars_df["trade_day"].max()).reset_index()
159
+ full_schedule["index"] = full_schedule["index"].apply(lambda x: x.timestamp())
160
+ full_schedule = full_schedule.set_index("index").to_dict("index")
161
+
162
+ all_dfs = []
163
+ for i in tqdm(range(bars_df.shape[0] - upsample_frequency_obs + 1),
164
+ desc=f"Upsampling from {bars_df['trade_day'].iloc[0]} to {bars_df['trade_day'].iloc[-1]} for assets {bars_df['unique_identifier'].dropna().unique()}"):
165
+ start = i
166
+ end = i + upsample_frequency_obs
167
+ tmp_df = bars_df.iloc[start:end]
168
+
169
+ day_schedule = full_schedule[tmp_df["trade_day"].iloc[0].timestamp()]
170
+ first_available_bar = day_schedule["market_open"] + upsample_frequency_td
171
+ last_available_bar = day_schedule["market_close"]
172
+
173
+ if trading_halts and tmp_df.index[-1] < first_available_bar:
174
+ # edge case 1market is close should not upsample to the next day
175
+ continue
176
+ elif trading_halts and tmp_df.index[-1] > last_available_bar:
177
+ continue
178
+ else:
179
+ dollar = tmp_df.vwap * tmp_df.volume
180
+ volume = np.nansum(tmp_df.volume.values)
181
+ vwap = np.nansum(dollar.values) / volume
182
+ close = tmp_df.close.iloc[-1]
183
+ vwap = vwap if not np.isnan(vwap) else close
184
+ new_bar = {
185
+ "open_time": tmp_df.index[0] - open_to_close_time_delta,
186
+ "time": tmp_df.index[-1],
187
+ "volume": volume,
188
+ "vwap": vwap,
189
+ "open": tmp_df.open.iloc[0],
190
+ "close": close,
191
+ }
192
+ if not is_portfolio:
193
+ new_bar.update({
194
+ "high": np.nanmax(tmp_df.high.values),
195
+ "low": np.nanmin(tmp_df.low.values),
196
+ })
197
+
198
+ all_dfs.append(new_bar)
199
+
200
+ all_dfs = pd.DataFrame(all_dfs)
201
+ all_dfs["unique_identifier"] = bars_df["unique_identifier"].iloc[0]
202
+ all_dfs = all_dfs.set_index("time")
203
+
204
+ return all_dfs
205
+
206
+
207
+ def get_interpolated_upsampled_bars(
208
+ self,
209
+ calendar: str,
210
+ tmp_df: pd.DataFrame,
211
+ last_observation: Union[None, pd.Series] = None
212
+ ) -> pd.DataFrame:
213
+ """
214
+ Gets interpolated and upsampled bars based on the given parameters.
215
+ First interpolates the data to fill any gaps, then upsamples it to the desired frequency.
216
+
217
+ Args:
218
+ calendar (str): Trading calendar for interpolation and upsampling.
219
+ tmp_df (pd.DataFrame): Dataframe containing the bars to be processed.
220
+ last_observation (Union[None, pd.Series], optional): Last observed data to fill gaps.
221
+
222
+ Returns:
223
+ pd.DataFrame: Interpolated and upsampled bars dataframe.
224
+ """
225
+ for col in self.TIMESTAMP_COLS:
226
+
227
+ try:
228
+ if col in tmp_df.columns:
229
+ s = pd.to_numeric(tmp_df[col], errors="coerce")
230
+ digits = s.dropna().abs().astype("int64").astype(str).str.len()
231
+ UNIT_BY_MIN_DIGITS = {18: "ns", 15: "us", 12: "ms", 10: "s", 0: "m"}
232
+ d = int(digits.mode().iat[0])
233
+
234
+ unit = UNIT_BY_MIN_DIGITS[max(k for k in UNIT_BY_MIN_DIGITS if d >= k)]
235
+ tmp_df[col] = pd.to_datetime(s, unit=unit, utc=True)
236
+
237
+ except Exception as e:
238
+ raise e
239
+
240
+ if "d" in self.bar_frequency_id:
241
+ tmp_df = interpolate_daily_bars(
242
+ bars_df=tmp_df.copy(),
243
+ interpolation_rule=self.intraday_bar_interpolation_rule,
244
+ calendar=calendar,
245
+ last_observation=last_observation,
246
+ )
247
+ elif "m" in self.bar_frequency_id:
248
+ bars_frequency_min = string_frequency_to_minutes(self.bar_frequency_id)
249
+
250
+ # Interpolation to fill gaps
251
+ tmp_df = interpolate_intraday_bars(
252
+ bars_df=tmp_df.copy(),
253
+ interpolation_rule=self.intraday_bar_interpolation_rule,
254
+ calendar=calendar,
255
+ bars_frequency_min=bars_frequency_min,
256
+ last_observation=last_observation,
257
+ )
258
+
259
+ if len(tmp_df) == 0:
260
+ return tmp_df
261
+
262
+ assert tmp_df.isnull().sum()[["close", "open"]].sum() == 0
263
+
264
+ # Upsample to the correct frequency
265
+ if "d" in self.bar_frequency_id:
266
+ all_columns = self.TIMESTAMP_COLS
267
+ upsampled_df = tmp_df
268
+ else:
269
+ upsample_freq_obs = string_frequency_to_minutes(self.upsample_frequency_id) // bars_frequency_min
270
+
271
+ if upsample_freq_obs > bars_frequency_min:
272
+ upsampled_df = UpsampleAndInterpolation.upsample_bars(
273
+ bars_df=tmp_df,
274
+ upsample_frequency_obs=upsample_freq_obs,
275
+ upsample_frequency_td=self.upsample_frequency_td,
276
+ calendar=calendar,
277
+ is_portfolio=False,
278
+ open_to_close_time_delta=datetime.timedelta(minutes=bars_frequency_min),
279
+ )
280
+ else:
281
+ upsampled_df = tmp_df
282
+ all_columns = self.TIMESTAMP_COLS + ["trade_day"]
283
+ # Keep everything as timezone-aware datetimes.
284
+ for col in all_columns:
285
+ if col in upsampled_df.columns:
286
+ upsampled_df[col] = pd.to_datetime(upsampled_df[col]).astype(np.int64).values
287
+
288
+ return upsampled_df
289
+
290
+
291
+ def interpolate_daily_bars(
292
+ bars_df: pd.DataFrame,
293
+ interpolation_rule: str,
294
+ calendar: str,
295
+ last_observation: Union[None, pd.Series] = None, #fix annotation
296
+ ):
297
+ try:
298
+ calendar_name = getattr(calendar, "name", calendar)
299
+ except Exception as e:
300
+ raise e
301
+
302
+ def rebase_with_forward_fill(bars_df, last_observation):
303
+ try:
304
+ if last_observation is not None:
305
+ if "interpolated" in last_observation.columns:
306
+ last_observation = last_observation.drop(columns="interpolated")
307
+
308
+ bars_df = pd.concat([last_observation, bars_df], axis=0).sort_index()
309
+ if "unique_identifier" in bars_df.columns:
310
+ bars_df.loc[:, ['unique_identifier']] = bars_df[
311
+ ['unique_identifier']
312
+ ].bfill().ffill()
313
+
314
+ null_index = bars_df.isnull().any(axis=1)
315
+ bars_df["close"] = bars_df["close"].ffill()
316
+ bars_df["open"] = bars_df["open"].where(~null_index, bars_df["close"])
317
+ try:
318
+ bars_df.volume = bars_df.volume.fillna(0)
319
+ except Exception as e:
320
+ raise e
321
+ if "vwap" in bars_df.columns:
322
+ bars_df.vwap = bars_df.vwap.ffill()
323
+ if "trade_count" in bars_df.columns:
324
+ bars_df.trade_count = bars_df.trade_count.fillna(0)
325
+
326
+ if len(null_index) > 0:
327
+ if "high" in bars_df.columns:
328
+ bars_df["high"] = bars_df["high"].where(~null_index, bars_df["close"])
329
+ bars_df["low"] = bars_df["low"].where(~null_index, bars_df["close"])
330
+
331
+ bars_df["interpolated"] = False
332
+ bars_df.loc[null_index, "interpolated"] = True
333
+
334
+ else:
335
+ bars_df["interpolated"] = False
336
+
337
+ if last_observation is not None:
338
+ bars_df = bars_df.iloc[1:]
339
+ except Exception as e:
340
+ raise e
341
+
342
+ return bars_df
343
+
344
+
345
+ # Extend the schedule window so midnight stamps on non-trading days can fill to the next session close
346
+ start_date = bars_df.index.min().date().isoformat()
347
+ end_date = (pd.Timestamp(bars_df.index.max()).normalize()).date().isoformat()
348
+ restricted_schedule = _get_schedule_cached(calendar_name, start_date, end_date)
349
+
350
+ restricted_schedule = restricted_schedule.reset_index()
351
+ market_type = "market_close"
352
+
353
+ restricted_schedule = restricted_schedule.set_index(market_type)
354
+ full_index = bars_df.index.union(restricted_schedule.index)
355
+
356
+ bars_df = bars_df.reindex(full_index)
357
+
358
+ if interpolation_rule == "None":
359
+ pass
360
+ elif interpolation_rule == "ffill":
361
+ bars_df = rebase_with_forward_fill(bars_df, last_observation=last_observation)
362
+ if last_observation is None:
363
+ bars_df = bars_df.bfill()
364
+ else:
365
+ raise Exception
366
+
367
+ if len(bars_df):
368
+ last_observation = bars_df.iloc[[-1]]
369
+
370
+ if len(bars_df) == 0:
371
+ return pd.DataFrame()
372
+
373
+ bars_df = bars_df[bars_df.index.isin(restricted_schedule.index)]
374
+
375
+ null_index = bars_df[bars_df["open_time"].isnull()].index
376
+ if len(null_index) > 0:
377
+
378
+ # Use the market_open that corresponds to each market_close index
379
+ bars_df.loc[null_index, "open_time"] = restricted_schedule.loc[null_index, "market_open"]
380
+
381
+ return bars_df
382
+
383
+
384
+ def interpolate_intraday_bars(
385
+ bars_df: pd.DataFrame,
386
+ interpolation_rule: str,
387
+ bars_frequency_min: int,
388
+ calendar: str,
389
+ last_observation: Union[None, pd.Series] = None,
390
+ ) -> pd.DataFrame:
391
+ """
392
+ Interpolates intraday bars based on the given parameters. Fills in missing data points in intraday bar data in case of gaps.
393
+ """
394
+ calendar_instance = mcal.get_calendar(calendar.name)
395
+
396
+ def build_daily_range_from_schedule(start, end):
397
+ return pd.date_range(start=start, end=end, freq=f"{bars_frequency_min}min")
398
+
399
+ def sanitize_today_update(x: pd.DataFrame, date_range, day):
400
+ # normalize to UTC for consistent “today” comparison
401
+ today_utc =datetime.datetime.utcnow()
402
+ day_utc = day
403
+
404
+ if day.date() == today_utc.date():
405
+ x.index.name = None
406
+ if len(x.index) > 0:
407
+ last = x.index.max()
408
+ date_range = [i for i in date_range if i <= last]
409
+ return date_range
410
+
411
+ def rebase_withoutnan_fill(x, trade_starts, trade_ends):
412
+ date_range = build_daily_range_from_schedule(trade_starts, trade_ends)
413
+ date_range = sanitize_today_update(x=x, date_range=date_range,day=trade_starts)
414
+ x = x.reindex(date_range)
415
+ return x
416
+
417
+ def rebase_with_forward_fill(x, trade_starts, trade_ends, last_observation):
418
+ is_start_of_day = False
419
+ if (x.shape[0] == 1) and x.index[0].hour == 0:
420
+ is_start_of_day = True
421
+ x["interpolated"] = False
422
+ if not is_start_of_day:
423
+ date_range = build_daily_range_from_schedule(trade_starts, trade_ends)
424
+ date_range = sanitize_today_update(x=x, date_range=date_range,day=trade_starts)
425
+ try:
426
+ x = x.reindex(date_range)
427
+
428
+ if last_observation is not None:
429
+ if "interpolated" in x.columns:
430
+ last_observation = last_observation.drop(columns="interpolated")
431
+ x = pd.concat([last_observation, x], axis=0)
432
+
433
+ null_index = x[x["close"].isnull()].index
434
+ x.close = x.close.ffill()
435
+ x.loc[null_index, "open"] = x.loc[null_index, "close"]
436
+ x.volume = x.volume.fillna(0)
437
+ x.vwap = x.vwap.ffill()
438
+ if "trade_count" in x.columns:
439
+ x.trade_count = x.trade_count.fillna(0)
440
+ x["interpolated"] = False
441
+ if len(null_index) > 0:
442
+ if "high" in x.columns:
443
+ x.loc[null_index, "high"] = x.loc[null_index, "close"]
444
+ x.loc[null_index, "low"] = x.loc[null_index, "close"]
445
+
446
+ x.loc[null_index, "interpolated"] = True
447
+
448
+ if last_observation is not None:
449
+ x = x.iloc[1:]
450
+ except Exception as e:
451
+ raise e
452
+
453
+ # interpolate any other columns with 0
454
+
455
+ return x
456
+
457
+ full_index = bars_df.index
458
+
459
+ try:
460
+ # because index are closes the greates value should be the open time of the last close to do not extra interpolate
461
+
462
+ restricted_schedule = calendar_instance.schedule(bars_df.index.min(),
463
+ bars_df.iloc[-1]["open_time"]) # This needs to be faster
464
+ except Exception as e:
465
+ raise e
466
+
467
+ bars_df = bars_df[~bars_df.index.duplicated(keep='first')] # todo: remove uncessary with indices.
468
+
469
+ full_index = bars_df.index.union(restricted_schedule.set_index("market_open").index).union(
470
+ restricted_schedule.set_index("market_close").index)
471
+
472
+
473
+ bars_df = bars_df.reindex(full_index)
474
+
475
+ bars_df["trade_day"] = bars_df.index
476
+ bars_df["trade_day"] = pd.to_datetime(bars_df.index, utc=True).normalize()
477
+
478
+
479
+ groups = bars_df.groupby("trade_day")
480
+ interpolated_data = []
481
+
482
+ for day, group_df in groups:
483
+ schedule = calendar_instance.schedule(start_date=day, end_date=day)
484
+ if schedule.shape[0] == 0:
485
+ continue
486
+ try:
487
+ trade_starts = schedule["market_open"].iloc[0]
488
+ trade_ends = schedule["market_close"].iloc[0]
489
+ except Exception as e:
490
+ raise e
491
+
492
+ group_df = group_df[group_df.index >= schedule["market_open"].iloc[0]]
493
+ group_df = group_df[group_df.index <= schedule["market_close"].iloc[0]]
494
+
495
+ if group_df.dropna().shape[0] == 0:
496
+ continue
497
+
498
+ if trade_starts < day:
499
+ trade_starts = day
500
+ next_day = day + datetime.timedelta(days=1)
501
+ if trade_ends >= next_day:
502
+ trade_ends = next_day - datetime.timedelta(minutes=1)
503
+
504
+ if interpolation_rule == "None":
505
+ tmp_df = rebase_withoutnan_fill(group_df, trade_starts=trade_starts, trade_ends=trade_ends)
506
+ elif interpolation_rule == "ffill":
507
+ tmp_df = rebase_with_forward_fill(
508
+ group_df,
509
+ trade_starts=trade_starts,
510
+ last_observation=last_observation,
511
+ trade_ends=trade_ends
512
+ )
513
+ if last_observation is None:
514
+ tmp_df = tmp_df.bfill()
515
+ else:
516
+ raise Exception
517
+
518
+ if len(tmp_df):
519
+ last_observation = tmp_df.iloc[[-1]]
520
+ interpolated_data.append(tmp_df)
521
+
522
+ if len(interpolated_data) == 0:
523
+ return pd.DataFrame()
524
+
525
+ interpolated_data = pd.concat(interpolated_data, axis=0)
526
+ interpolated_data["trade_day"] = interpolated_data.index
527
+ interpolated_data["trade_day"] = interpolated_data["trade_day"].apply(
528
+ lambda x: x.replace(hour=0, minute=0, second=0)
529
+ )
530
+
531
+ return interpolated_data
532
+
533
+
534
+ class InterpolatedPrices(DataNode):
535
+ """
536
+ Handles interpolated prices for assets.
537
+ """
538
+ OFFSET_START = datetime.datetime(2017, 7, 20).replace(tzinfo=pytz.utc)
539
+ _ARGS_IGNORE_IN_STORAGE_HASH=["asset_category_unique_id", "asset_list"]
540
+ def __init__(
541
+ self,
542
+ bar_frequency_id: str,
543
+ intraday_bar_interpolation_rule: str,
544
+ asset_category_unique_id: Optional[str] = None,
545
+ upsample_frequency_id: Optional[str] = None,
546
+ asset_list: List = None,
547
+ translation_table_unique_id: Optional[str] = None,
548
+ source_bars_data_node:Optional[DataNode] = None,
549
+ *args,
550
+ **kwargs
551
+ ):
552
+ """
553
+ Initializes the InterpolatedPrices object.
554
+ """
555
+ assert "d" in bar_frequency_id or "m" in bar_frequency_id, f"bar_frequency_id={bar_frequency_id} should be 'd for days' or 'm for min'"
556
+ if source_bars_data_node is None:
557
+ if asset_category_unique_id is None:
558
+ assert asset_list is not None, f"asset_category_unique_id={asset_category_unique_id} should not be None or asset_list should be defined"
559
+
560
+
561
+
562
+ self.asset_category_unique_id = asset_category_unique_id
563
+ self.interpolator = UpsampleAndInterpolation(
564
+ bar_frequency_id=bar_frequency_id,
565
+ upsample_frequency_id=upsample_frequency_id,
566
+ intraday_bar_interpolation_rule=intraday_bar_interpolation_rule
567
+ )
568
+ self.constructor_asset_list = asset_list
569
+ bars_frequency_min = string_frequency_to_minutes(bar_frequency_id)
570
+ self.maximum_forward_fill = datetime.timedelta(minutes=bars_frequency_min) - TIMEDELTA
571
+
572
+ self.intraday_bar_interpolation_rule = intraday_bar_interpolation_rule
573
+ self.bar_frequency_id = bar_frequency_id
574
+ self.upsample_frequency_id = upsample_frequency_id
575
+ self.source_bars_data_node=source_bars_data_node
576
+ # get the translation rules
577
+ if source_bars_data_node is None:
578
+ if translation_table_unique_id is None:
579
+ raise Exception(f"Translation table needs to be set")
580
+ translation_table = AssetTranslationTable.get(unique_identifier=translation_table_unique_id)
581
+
582
+ self.bars_ts = WrapperDataNode(translation_table=translation_table)
583
+ else:
584
+ self.bars_ts = source_bars_data_node
585
+
586
+ super().__init__(*args, **kwargs)
587
+
588
+
589
+ def dependencies(self):
590
+ return {"bars_ts":self.bars_ts}
591
+
592
+ def get_string_frequency_to_minutes(self):
593
+ return string_frequency_to_minutes(self.bar_frequency_id)
594
+
595
+ def _get_required_cores(self, last_observation_map) -> int:
596
+ """
597
+ Determines the required number of cores for processing.
598
+ """
599
+ if len(last_observation_map) == 0:
600
+ required = 1
601
+ else:
602
+ required = min(len(last_observation_map), 20)
603
+
604
+ return required
605
+
606
+ def run_post_update_routines(self, error_on_last_update):
607
+ if not self.local_persist_manager.metadata.protect_from_deletion:
608
+ self.local_persist_manager.protect_from_deletion()
609
+
610
+ def _transform_raw_data_to_upsampled_df(
611
+ self,
612
+ raw_data_df: pd.DataFrame,
613
+ ) -> pd.DataFrame:
614
+ """
615
+ Transforms raw data into an upsampled dataframe.
616
+ """
617
+ upsampled_df = []
618
+
619
+ # TODO this should be a helper function
620
+ unique_identifier_range_map =self.update_statistics.get_update_range_map_great_or_equal()
621
+ full_last_observation = self.get_ranged_data_per_asset (range_descriptor=unique_identifier_range_map)
622
+ last_observation_map = {}
623
+
624
+ for unique_identifier in raw_data_df["unique_identifier"].unique():
625
+ if full_last_observation is None or full_last_observation.empty:
626
+ last_observation_map[unique_identifier] = None
627
+ continue
628
+
629
+ if unique_identifier in full_last_observation.index.get_level_values("unique_identifier").to_list():
630
+ last_obs = full_last_observation.loc[(slice(None), unique_identifier), :].reset_index(
631
+ ["unique_identifier"], drop=True
632
+ )
633
+ last_obs.index.name = None
634
+ if "open_time" in last_obs.columns:
635
+ last_obs["open_time"] = pd.to_datetime(last_obs["open_time"], utc=True)
636
+ last_observation_map[unique_identifier] = last_obs
637
+ else:
638
+ last_observation_map[unique_identifier] = None
639
+
640
+ def multiproc_upsample(calendar, tmp_df, unique_identifier, last_observation, interpolator_kwargs):
641
+ interpolator = UpsampleAndInterpolation(**interpolator_kwargs)
642
+ df = interpolator.get_interpolated_upsampled_bars(
643
+ calendar=calendar,
644
+ tmp_df=tmp_df,
645
+ last_observation=last_observation_map[unique_identifier]
646
+ )
647
+ df["unique_identifier"] = unique_identifier
648
+ return df
649
+
650
+ required_cores = self._get_required_cores(last_observation_map=last_observation_map)
651
+ required_cores = 1
652
+ if required_cores == 1:
653
+ # Single-core processing
654
+ for unique_identifier, df in raw_data_df.groupby("unique_identifier"):
655
+ if df.shape[0] > 1:
656
+ df = self.interpolator.get_interpolated_upsampled_bars(
657
+ calendar=self.asset_calendar_map[unique_identifier],
658
+ tmp_df=df,
659
+ last_observation=last_observation_map[unique_identifier],
660
+ )
661
+ df["unique_identifier"] = unique_identifier
662
+ upsampled_df.append(df)
663
+ else:
664
+ upsampled_df = Parallel(n_jobs=required_cores)(
665
+ delayed(multiproc_upsample)(
666
+ calendar=self.asset_calendar_map[unique_identifier],
667
+ tmp_df=tmp_df,
668
+ unique_identifier=unique_identifier,
669
+ last_observation=last_observation_map[unique_identifier],
670
+ interpolator_kwargs=dict(
671
+ bar_frequency_id=self.bar_frequency_id,
672
+ upsample_frequency_id=self.upsample_frequency_id,
673
+ intraday_bar_interpolation_rule=self.intraday_bar_interpolation_rule,
674
+ )
675
+ )
676
+ for unique_identifier, tmp_df in raw_data_df.groupby("unique_identifier") if tmp_df.shape[0] > 0
677
+ )
678
+
679
+ upsampled_df = [d for d in upsampled_df if len(d) > 0] # Remove empty dataframes
680
+ if len(upsampled_df) == 0:
681
+ return pd.DataFrame()
682
+
683
+ max_value_per_asset = {d.index.max(): d.unique_identifier.iloc[0] for d in upsampled_df}
684
+
685
+ min_end_time = min(max_value_per_asset.keys())
686
+ max_end_time = max(max_value_per_asset.keys())
687
+ self.logger.info(
688
+ "Upsampled window aligned: min_end=%s (asset=%s), max_end=%s",
689
+ min_end_time.isoformat(), max_value_per_asset[min_end_time], max_end_time.isoformat()
690
+ )
691
+ upsampled_df = pd.concat(upsampled_df, axis=0)
692
+ # upsampled_df = upsampled_df[upsampled_df.index <= min_max]
693
+ upsampled_df.volume = upsampled_df.volume.fillna(0)
694
+
695
+ upsampled_df.index.name = "time_index"
696
+ upsampled_df = upsampled_df.set_index("unique_identifier", append=True)
697
+ upsampled_df = upsampled_df.sort_index(level=0)
698
+
699
+ if upsampled_df.shape[0] == 0:
700
+ upsampled_df = pd.DataFrame()
701
+
702
+ return upsampled_df
703
+
704
+ def get_upsampled_data(self) -> pd.DataFrame:
705
+ """
706
+ Main method to get upsampled data for prices.
707
+ """
708
+ unique_identifier_range_map = self.update_statistics.get_update_range_map_great_or_equal()
709
+
710
+
711
+ raw_data_df = self.bars_ts.get_ranged_data_per_asset(range_descriptor=unique_identifier_range_map)
712
+ if raw_data_df.empty:
713
+ self.logger.info("No new data to interpolate")
714
+ return pd.DataFrame()
715
+
716
+ upsampled_df = self._transform_raw_data_to_upsampled_df(raw_data_df.reset_index(["unique_identifier"]))
717
+ return upsampled_df
718
+
719
+ def get_asset_list(self):
720
+ """
721
+ Creates mappings from symbols to IDs
722
+ """
723
+ if self.source_bars_data_node is not None:
724
+ return self.bars_ts.get_asset_list()
725
+
726
+ if self.constructor_asset_list is not None:
727
+ asset_list= self.constructor_asset_list
728
+ else:
729
+ asset_category = AssetCategory.get(unique_identifier=self.asset_category_unique_id)
730
+ asset_list = Asset.filter(id__in=asset_category.assets)
731
+
732
+ return asset_list
733
+
734
+ def update(self) -> pd.DataFrame:
735
+ """
736
+ Updates the series from the source based on the latest value.
737
+ """
738
+ self.asset_calendar_map = {a.unique_identifier: a.get_calendar() for a in self.update_statistics.asset_list}
739
+ prices = self.get_upsampled_data()
740
+ if prices.shape[0] == 0:
741
+ return pd.DataFrame()
742
+
743
+ if self.update_statistics.is_empty() == False:
744
+ TARGET_COLS = ['open', 'close', 'high', 'low', 'volume', 'open_time']
745
+ assert prices[[c for c in prices.columns if c in TARGET_COLS]].isnull().sum().sum() == 0
746
+
747
+ prices = self.update_statistics.filter_df_by_latest_value(prices)
748
+
749
+ duplicates_exist = prices.reset_index().duplicated(subset=["time_index", "unique_identifier"]).any()
750
+ if duplicates_exist:
751
+ raise Exception()
752
+
753
+ # adapt to InterpolatedPrices data schema
754
+ if "vwap" not in prices.columns:
755
+ self.logger.warning("vwap not calculated in prices, set to NaN")
756
+ prices["vwap"] = np.nan
757
+ if "trade_count" not in prices.columns:
758
+ self.logger.warning("trade_count not calculated in prices, set to NaN")
759
+ prices["trade_count"] = np.nan
760
+
761
+ prices = prices[['open_time', 'open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap', 'interpolated']]
762
+ return prices
763
+
764
+ class ExternalPrices(DataNode):
765
+
766
+ def __init__(
767
+ self,
768
+ artifact_name: str,
769
+ bucket_name: str,
770
+ asset_category_unique_id,
771
+ *args,
772
+ **kwargs
773
+ ):
774
+ self.artifact_name = artifact_name
775
+ self.bucket_name = bucket_name
776
+ self.asset_category_unique_id = asset_category_unique_id
777
+ super().__init__(*args, **kwargs)
778
+
779
+ def get_asset_list(self):
780
+ """
781
+ Creates mappings from symbols to IDs
782
+ """
783
+ asset_category = AssetCategory.get(unique_identifier=self.asset_category_unique_id)
784
+ asset_list = Asset.filter(id__in=asset_category.assets)
785
+ return asset_list
786
+
787
+ def update(self) -> pd.DataFrame:
788
+ from mainsequence.client.models_tdag import Artifact
789
+ source_artifact = Artifact.get(bucket__name=self.bucket_name, name=self.artifact_name)
790
+ prices_source = pd.read_csv(source_artifact.content)
791
+
792
+ expected_cols = [
793
+ "time_index",
794
+ "figi",
795
+ "price",
796
+ ]
797
+ prices_source = prices_source[expected_cols].copy()
798
+ prices_source["time_index"] = pd.to_datetime(
799
+ prices_source["time_index"], utc=True
800
+ )
801
+
802
+ # convert figis in source data
803
+ for asset in self.update_statistics.asset_list:
804
+ prices_source.loc[prices_source["figi"] == asset.figi, "unique_identifier"] = asset.unique_identifier
805
+
806
+ prices_source.set_index(["time_index", "unique_identifier"], inplace=True)
807
+ prices = self.update_statistics.filter_df_by_latest_value(prices_source)
808
+
809
+ prices = prices.rename(columns={"price": "open"})[["open"]]
810
+ return prices