mainsequence 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. mainsequence/__init__.py +0 -0
  2. mainsequence/__main__.py +9 -0
  3. mainsequence/cli/__init__.py +1 -0
  4. mainsequence/cli/api.py +157 -0
  5. mainsequence/cli/cli.py +442 -0
  6. mainsequence/cli/config.py +78 -0
  7. mainsequence/cli/ssh_utils.py +126 -0
  8. mainsequence/client/__init__.py +17 -0
  9. mainsequence/client/base.py +431 -0
  10. mainsequence/client/data_sources_interfaces/__init__.py +0 -0
  11. mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
  12. mainsequence/client/data_sources_interfaces/timescale.py +479 -0
  13. mainsequence/client/models_helpers.py +113 -0
  14. mainsequence/client/models_report_studio.py +412 -0
  15. mainsequence/client/models_tdag.py +2276 -0
  16. mainsequence/client/models_vam.py +1983 -0
  17. mainsequence/client/utils.py +387 -0
  18. mainsequence/dashboards/__init__.py +0 -0
  19. mainsequence/dashboards/streamlit/__init__.py +0 -0
  20. mainsequence/dashboards/streamlit/assets/config.toml +12 -0
  21. mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
  22. mainsequence/dashboards/streamlit/assets/logo.png +0 -0
  23. mainsequence/dashboards/streamlit/core/__init__.py +0 -0
  24. mainsequence/dashboards/streamlit/core/theme.py +212 -0
  25. mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
  26. mainsequence/dashboards/streamlit/scaffold.py +220 -0
  27. mainsequence/instrumentation/__init__.py +7 -0
  28. mainsequence/instrumentation/utils.py +101 -0
  29. mainsequence/instruments/__init__.py +1 -0
  30. mainsequence/instruments/data_interface/__init__.py +10 -0
  31. mainsequence/instruments/data_interface/data_interface.py +361 -0
  32. mainsequence/instruments/instruments/__init__.py +3 -0
  33. mainsequence/instruments/instruments/base_instrument.py +85 -0
  34. mainsequence/instruments/instruments/bond.py +447 -0
  35. mainsequence/instruments/instruments/european_option.py +74 -0
  36. mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
  37. mainsequence/instruments/instruments/json_codec.py +585 -0
  38. mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
  39. mainsequence/instruments/instruments/position.py +475 -0
  40. mainsequence/instruments/instruments/ql_fields.py +239 -0
  41. mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
  42. mainsequence/instruments/pricing_models/__init__.py +0 -0
  43. mainsequence/instruments/pricing_models/black_scholes.py +49 -0
  44. mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
  45. mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
  46. mainsequence/instruments/pricing_models/indices.py +350 -0
  47. mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
  48. mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
  49. mainsequence/instruments/settings.py +175 -0
  50. mainsequence/instruments/utils.py +29 -0
  51. mainsequence/logconf.py +284 -0
  52. mainsequence/reportbuilder/__init__.py +0 -0
  53. mainsequence/reportbuilder/__main__.py +0 -0
  54. mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
  55. mainsequence/reportbuilder/model.py +713 -0
  56. mainsequence/reportbuilder/slide_templates.py +532 -0
  57. mainsequence/tdag/__init__.py +8 -0
  58. mainsequence/tdag/__main__.py +0 -0
  59. mainsequence/tdag/config.py +129 -0
  60. mainsequence/tdag/data_nodes/__init__.py +12 -0
  61. mainsequence/tdag/data_nodes/build_operations.py +751 -0
  62. mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
  63. mainsequence/tdag/data_nodes/persist_managers.py +812 -0
  64. mainsequence/tdag/data_nodes/run_operations.py +543 -0
  65. mainsequence/tdag/data_nodes/utils.py +24 -0
  66. mainsequence/tdag/future_registry.py +25 -0
  67. mainsequence/tdag/utils.py +40 -0
  68. mainsequence/virtualfundbuilder/__init__.py +45 -0
  69. mainsequence/virtualfundbuilder/__main__.py +235 -0
  70. mainsequence/virtualfundbuilder/agent_interface.py +77 -0
  71. mainsequence/virtualfundbuilder/config_handling.py +86 -0
  72. mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
  73. mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
  74. mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
  75. mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
  76. mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
  77. mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
  78. mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
  79. mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
  80. mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
  81. mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
  82. mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
  83. mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
  84. mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
  85. mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
  86. mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
  87. mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
  88. mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
  89. mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
  90. mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
  91. mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
  92. mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
  93. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
  94. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
  95. mainsequence/virtualfundbuilder/data_nodes.py +637 -0
  96. mainsequence/virtualfundbuilder/enums.py +23 -0
  97. mainsequence/virtualfundbuilder/models.py +282 -0
  98. mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
  99. mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
  100. mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
  101. mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
  102. mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
  103. mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
  104. mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
  105. mainsequence/virtualfundbuilder/utils.py +381 -0
  106. mainsequence-2.0.0.dist-info/METADATA +105 -0
  107. mainsequence-2.0.0.dist-info/RECORD +110 -0
  108. mainsequence-2.0.0.dist-info/WHEEL +5 -0
  109. mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
  110. mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1292 @@
1
+ import datetime
2
+ import os
3
+ from abc import ABC, abstractmethod
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from typing import Dict, Any, List, Optional, Tuple, Callable
8
+ import json
9
+ import time
10
+ import traceback
11
+ import pytz
12
+ import inspect
13
+ import logging
14
+ import copy
15
+ import cloudpickle
16
+ from dataclasses import asdict
17
+ from mainsequence.client import Scheduler
18
+ from mainsequence.instrumentation import tracer
19
+ from mainsequence.tdag.config import (
20
+ ogm
21
+ )
22
+ import tempfile
23
+ import structlog.contextvars as cvars
24
+ from structlog.stdlib import BoundLogger
25
+
26
+ from mainsequence.logconf import logger
27
+
28
+ from mainsequence.tdag.data_nodes.persist_managers import PersistManager, APIPersistManager
29
+ from mainsequence.client.models_tdag import (DataSource,
30
+ UpdateStatistics, UniqueIdentifierRangeMap, ColumnMetaData, )
31
+
32
+
33
+ from abc import ABC
34
+
35
+ from typing import Union
36
+
37
+ from mainsequence.client import LocalTimeSerie, CONSTANTS, \
38
+ DynamicTableDataSource, AssetTranslationTable
39
+
40
+ from functools import wraps
41
+
42
+ import mainsequence.client as ms_client
43
+ import mainsequence.tdag.data_nodes.run_operations as run_operations
44
+ import mainsequence.tdag.data_nodes.build_operations as build_operations
45
+
46
+
47
+
48
+
49
+ def get_data_source_from_orm() -> Any:
50
+ from mainsequence.client import SessionDataSource
51
+ if SessionDataSource.data_source.related_resource is None:
52
+ raise Exception("This Pod does not have a default data source")
53
+ return SessionDataSource.data_source
54
+
55
+ def get_latest_update_by_assets_filter(asset_symbols: Optional[list], last_update_per_asset: dict) -> datetime.datetime:
56
+ """
57
+ Gets the latest update timestamp for a list of asset symbols.
58
+
59
+ Args:
60
+ asset_symbols: A list of asset symbols.
61
+ last_update_per_asset: A dictionary mapping assets to their last update time.
62
+
63
+ Returns:
64
+ The latest update timestamp.
65
+ """
66
+ if asset_symbols is not None:
67
+ last_update_in_table = np.max([timestamp for unique_identifier, timestamp in last_update_per_asset.items()
68
+ if unique_identifier in asset_symbols
69
+ ])
70
+ else:
71
+ last_update_in_table = np.max(last_update_per_asset.values)
72
+ return last_update_in_table
73
+
74
+
75
+
76
+ def last_update_per_unique_identifier(unique_identifier_list: Optional[list],
77
+ last_update_per_asset: dict) -> datetime.datetime:
78
+ """
79
+ Gets the earliest last update time for a list of unique identifiers.
80
+
81
+ Args:
82
+ unique_identifier_list: A list of unique identifiers.
83
+ last_update_per_asset: A dictionary mapping assets to their last update times.
84
+
85
+ Returns:
86
+ The earliest last update timestamp.
87
+ """
88
+ if unique_identifier_list is not None:
89
+ last_update_in_table = min(
90
+ [t for a in last_update_per_asset.values() for t in a.values() if a in unique_identifier_list])
91
+ else:
92
+ last_update_in_table = min([t for a in last_update_per_asset.values() for t in a.values()])
93
+ return last_update_in_table
94
+
95
+
96
+
97
+
98
+
99
+ class DependencyUpdateError(Exception):
100
+ pass
101
+
102
+
103
+
104
+ class DataAccessMixin:
105
+ """A mixin for classes that provide access to time series data."""
106
+
107
+ def __repr__(self) -> str:
108
+ try:
109
+ local_id = self.local_time_serie.id
110
+ except:
111
+ local_id = 0
112
+ repr = self.__class__.__name__ + f" {os.environ['TDAG_ENDPOINT']}/local-time-series/details/?local_time_serie_id={local_id}"
113
+ return repr
114
+
115
+ def get_last_observation(self,asset_list:List[ms_client.AssetMixin]):
116
+ update_statistics = self.get_update_statistics()
117
+ update_statistics = update_statistics.update_assets(asset_list=asset_list)
118
+ update_range_map = update_statistics.get_update_range_map_great_or_equal()
119
+ last_observation = self.get_ranged_data_per_asset(update_range_map)
120
+ return last_observation
121
+
122
+ def get_pickle_path_from_time_serie(self) -> str:
123
+ path = build_operations.get_pickle_path(update_hash=self.update_hash,
124
+ data_source_id=self.data_source_id,
125
+ is_api=self.is_api
126
+ )
127
+ return path
128
+
129
+ def persist_to_pickle(self, overwrite: bool = False) -> Tuple[str, str]:
130
+ """
131
+ Persists the DataNode object to a pickle file using an atomic write.
132
+
133
+ Uses a single method to determine the pickle path and dispatches to
134
+ type-specific logic only where necessary.
135
+
136
+ Args:
137
+ overwrite: If True, overwrites any existing pickle file.
138
+
139
+ Returns:
140
+ A tuple containing the full path and the relative path of the pickle file.
141
+ """
142
+ # 1. Common Logic: Determine the pickle path for both types
143
+ path = self.get_pickle_path_from_time_serie()
144
+
145
+ # 2. Type-Specific Logic: Run pre-dump actions only for standard DataNode
146
+ if not self.is_api:
147
+ self.logger.debug(f"Patching source code and git hash for {self.storage_hash}")
148
+ self.local_persist_manager.update_git_and_code_in_backend(time_serie_class=self.__class__)
149
+ # Prepare for pickling by removing the unpicklable ThreadLock
150
+ self._local_persist_manager = None
151
+
152
+ # 3. Common Logic: Persist the data source if needed
153
+ data_source_id = getattr(self.data_source, 'id', self.data_source_id)
154
+ data_source_path = build_operations.data_source_pickle_path(data_source_id)
155
+ if not os.path.isfile(data_source_path) or overwrite:
156
+ self.data_source.persist_to_pickle(data_source_path)
157
+
158
+ # 4. Common Logic: Atomically write the main pickle file
159
+ if os.path.isfile(path) and not overwrite:
160
+ self.logger.debug(f"Pickle file already exists at {path}. Skipping.")
161
+ else:
162
+ if overwrite:
163
+ self.logger.warning(f"Overwriting pickle file at {path}")
164
+ self._atomic_pickle_dump(path)
165
+
166
+ # 5. Common Logic: Return the full and relative paths
167
+ return path, path.replace(ogm.pickle_storage_path + "/", "")
168
+
169
+ def _atomic_pickle_dump(self, path: str) -> None:
170
+ """
171
+ Private helper to atomically dump the object to a pickle file.
172
+ This prevents file corruption if the process is interrupted.
173
+ """
174
+ dir_, fname = os.path.split(path)
175
+ # Ensure the target directory exists
176
+ os.makedirs(dir_, exist_ok=True)
177
+
178
+ fd, tmp_path = tempfile.mkstemp(prefix=f"{fname}~", dir=dir_)
179
+ os.close(fd)
180
+ try:
181
+ with open(tmp_path, 'wb') as handle:
182
+ cloudpickle.dump(self, handle)
183
+ # Atomic replace is safer than a direct write
184
+ os.replace(tmp_path, path)
185
+ self.logger.debug(f"Successfully persisted pickle to {path}")
186
+ except Exception:
187
+ # Clean up the temporary file on error to avoid clutter
188
+ try:
189
+ os.remove(tmp_path)
190
+ except OSError:
191
+ pass
192
+ raise
193
+
194
+
195
+ def get_logger_context_variables(self) -> Dict[str, Any]:
196
+ return dict(update_hash=self.update_hash,
197
+ local_hash_id_data_source=self.data_source_id,
198
+ api_time_series=self.__class__.__name__ == "APIDataNode")
199
+
200
+ @property
201
+ def logger(self) -> logging.Logger:
202
+ """Gets a logger instance with bound context variables."""
203
+ # import structlog.contextvars as cvars
204
+ # cvars.bind_contextvars(update_hash=self.update_hash,
205
+ # update_hash=self.data_source_id,
206
+ # api_time_series=True,)
207
+ global logger
208
+ if hasattr(self, "_logger") == False:
209
+ cvars.bind_contextvars(**self.get_logger_context_variables() )
210
+ self._logger = logger
211
+
212
+ return self._logger
213
+ @staticmethod
214
+ def set_context_in_logger(logger_context: Dict[str, Any]) -> None:
215
+ """
216
+ Binds context variables to the global logger.
217
+
218
+ Args:
219
+ logger_context: A dictionary of context variables.
220
+ """
221
+ global logger
222
+ for key, value in logger_context.items():
223
+ logger.bind(**dict(key=value))
224
+
225
+ def unbind_context_variables_from_logger(self) -> None:
226
+ cvars.unbind_contextvars(*self.get_logger_context_variables().keys())
227
+
228
+ def get_df_between_dates(
229
+ self,
230
+ start_date: Optional[datetime.datetime] = None,
231
+ end_date: Optional[datetime.datetime] = None,
232
+ unique_identifier_list: Optional[list] = None,
233
+ great_or_equal: bool = True,
234
+ less_or_equal: bool = True,
235
+ unique_identifier_range_map: Optional[UniqueIdentifierRangeMap] = None,
236
+ columns:Optional[List[str]] = None
237
+ ) -> pd.DataFrame:
238
+ """
239
+ Retrieve rows from this DataNode whose `time_index` (and optional `unique_identifier`) fall within the specified date ranges.
240
+
241
+ **Note:** If `unique_identifier_range_map` is provided, **all** other filters
242
+ (`start_date`, `end_date`, `unique_identifier_list`, `great_or_equal`, `less_or_equal`)
243
+ are ignored, and only the per-identifier ranges in `unique_identifier_range_map` apply.
244
+
245
+ Filtering logic (when `unique_identifier_range_map` is None):
246
+ - If `start_date` is provided, include rows where
247
+ `time_index > start_date` (if `great_or_equal=False`)
248
+ or `time_index >= start_date` (if `great_or_equal=True`).
249
+ - If `end_date` is provided, include rows where
250
+ `time_index < end_date` (if `less_or_equal=False`)
251
+ or `time_index <= end_date` (if `less_or_equal=True`).
252
+ - If `unique_identifier_list` is provided, only include rows whose
253
+ `unique_identifier` is in that list.
254
+
255
+ Filtering logic (when `unique_identifier_range_map` is provided):
256
+ - For each `unique_identifier`, apply its own `start_date`/`end_date`
257
+ filters using the specified operands (`">"`, `">="`, `"<"`, `"<="`):
258
+ {
259
+ <uid>: {
260
+ "start_date": datetime,
261
+ "start_date_operand": ">=" or ">",
262
+ "end_date": datetime,
263
+ "end_date_operand": "<=" or "<"
264
+ },
265
+ ...
266
+ }
267
+
268
+ Parameters
269
+ ----------
270
+ start_date : datetime.datetime or None
271
+ Global lower bound for `time_index`. Ignored if `unique_identifier_range_map` is provided.
272
+ end_date : datetime.datetime or None
273
+ Global upper bound for `time_index`. Ignored if `unique_identifier_range_map` is provided.
274
+ unique_identifier_list : list or None
275
+ If provided, only include rows matching these IDs. Ignored if `unique_identifier_range_map` is provided.
276
+ great_or_equal : bool, default True
277
+ If True, use `>=` when filtering by `start_date`; otherwise use `>`. Ignored if `unique_identifier_range_map` is provided.
278
+ less_or_equal : bool, default True
279
+ If True, use `<=` when filtering by `end_date`; otherwise use `<`. Ignored if `unique_identifier_range_map` is provided.
280
+ unique_identifier_range_map : UniqueIdentifierRangeMap or None
281
+ Mapping of specific `unique_identifier` keys to their own sub-filters. When provided, this is the sole filter applied.
282
+
283
+ Returns
284
+ -------
285
+ pd.DataFrame
286
+ A DataFrame containing rows that satisfy the combined time and identifier filters.
287
+ """
288
+ return self.local_persist_manager.get_df_between_dates(
289
+ start_date=start_date,
290
+ end_date=end_date,
291
+ unique_identifier_list=unique_identifier_list,
292
+ great_or_equal=great_or_equal,
293
+ less_or_equal=less_or_equal,
294
+ unique_identifier_range_map=unique_identifier_range_map,
295
+ columns=columns,
296
+ )
297
+
298
+
299
+
300
+
301
+
302
+ def get_ranged_data_per_asset(self, range_descriptor: Optional[UniqueIdentifierRangeMap],
303
+ columns=None,
304
+ ) -> pd.DataFrame:
305
+ """
306
+ Gets data based on a range descriptor.
307
+
308
+ Args:
309
+ range_descriptor: A UniqueIdentifierRangeMap object.
310
+
311
+ Returns:
312
+ A DataFrame with the ranged data.
313
+ """
314
+ return self.get_df_between_dates(unique_identifier_range_map=range_descriptor,
315
+ columns=columns,
316
+ )
317
+ def get_ranged_data_per_asset_great_or_equal(self, range_descriptor: Optional[UniqueIdentifierRangeMap],
318
+ columns=None,
319
+ ) -> pd.DataFrame:
320
+ """
321
+ Gets data based on a range descriptor.
322
+
323
+ Args:
324
+ range_descriptor: A UniqueIdentifierRangeMap object.
325
+
326
+ Returns:
327
+ A DataFrame with the ranged data.
328
+ """
329
+
330
+ for k,v in range_descriptor.items():
331
+ v["start_date_operand"]="=>"
332
+ return self.get_df_between_dates(unique_identifier_range_map=range_descriptor,
333
+ columns=columns,
334
+ )
335
+
336
+ def filter_by_assets_ranges(self, asset_ranges_map: dict) -> pd.DataFrame:
337
+ """
338
+ Filters data by asset ranges.
339
+
340
+ Args:
341
+ asset_ranges_map: A dictionary mapping assets to their date ranges.
342
+
343
+ Returns:
344
+ A DataFrame with the filtered data.
345
+ """
346
+ return self.local_persist_manager.filter_by_assets_ranges(asset_ranges_map)
347
+
348
+
349
+ class APIDataNode(DataAccessMixin):
350
+
351
+
352
+ @classmethod
353
+ def build_from_local_time_serie(cls, source_table: "LocalTimeSerie") -> "APIDataNode":
354
+ return cls(data_source_id=source_table.data_source.id,
355
+ storage_hash=source_table.storage_hash
356
+ )
357
+
358
+ @classmethod
359
+ def build_from_table_id(cls, table_id: str) -> "APIDataNode":
360
+ table = ms_client.DynamicTableMetaData.get(id=table_id)
361
+ ts = cls(
362
+ data_source_id=table.data_source.id,
363
+ storage_hash=table.storage_hash
364
+ )
365
+ return ts
366
+
367
+ @classmethod
368
+ def build_from_identifier(cls, identifier: str) -> "APIDataNode":
369
+
370
+ table = ms_client.DynamicTableMetaData.get(identifier=identifier)
371
+ ts = cls(
372
+ data_source_id=table.data_source.id,
373
+ storage_hash=table.storage_hash
374
+ )
375
+ return ts
376
+
377
+ def __init__(self,
378
+ data_source_id: int, storage_hash: str,
379
+ data_source_local_lake: Optional[DataSource] = None):
380
+ """
381
+ Initializes an APIDataNode.
382
+
383
+ Args:
384
+ data_source_id: The ID of the data source.
385
+ update_hash: The local hash ID of the time series.
386
+ data_source_local_lake: Optional local data source for the lake.
387
+ """
388
+ if data_source_local_lake is not None:
389
+ assert data_source_local_lake.data_type in CONSTANTS.DATA_SOURCE_TYPE_LOCAL_DISK_LAKE, "data_source_local_lake should be of type CONSTANTS.DATA_SOURCE_TYPE_LOCAL_DISK_LAKE"
390
+
391
+ assert isinstance(data_source_id, int)
392
+ self.data_source_id = data_source_id
393
+ self.storage_hash = storage_hash
394
+ self.data_source = data_source_local_lake
395
+ self._local_persist_manager: APIPersistManager = None
396
+ self.update_statistics = None
397
+
398
+ def __repr__(self) -> str:
399
+
400
+
401
+ repr = self.__class__.__name__ + f" {os.environ['TDAG_ENDPOINT']}/dynamic-table-metadatas/details/?dynamic_table_id={self.data_source_id}"
402
+ return repr
403
+
404
+ @property
405
+ def is_api(self):
406
+ return True
407
+
408
+ @staticmethod
409
+ def _get_update_hash(storage_hash):
410
+ return "API_"+f"{storage_hash}"
411
+ @property
412
+ def update_hash(self):
413
+ return self._get_update_hash(storage_hash=self.storage_hash)
414
+
415
+ def __getstate__(self) -> Dict[str, Any]:
416
+ """Prepares the state for pickling."""
417
+ state = self.__dict__.copy()
418
+ # Remove unpicklable/transient state specific to APIDataNode
419
+ names_to_remove = [
420
+ "_local_persist_manager", # APIPersistManager instance
421
+ ]
422
+ cleaned_state = {k: v for k, v in state.items() if k not in names_to_remove}
423
+ return cleaned_state
424
+
425
+ @property
426
+ def local_persist_manager(self) -> Any:
427
+ """Gets the local persistence manager, initializing it if necessary."""
428
+ if self._local_persist_manager is None:
429
+ self._set_local_persist_manager()
430
+ self.logger.debug(f"Setting local persist manager for {self.storage_hash}")
431
+ return self._local_persist_manager
432
+
433
+ def set_relation_tree(self) -> None:
434
+ pass # do nothing for API Time Series
435
+
436
+ def _verify_local_data_source(self) -> None:
437
+ """Verifies and sets the local data source from environment variables if available."""
438
+ pod_source = os.environ.get("POD_DEFAULT_DATA_SOURCE", None)
439
+ if pod_source != None:
440
+ from mainsequence.client import models as models
441
+ pod_source = json.loads(pod_source)
442
+ ModelClass = pod_source["tdag_orm_class"]
443
+ pod_source.pop("tdag_orm_class", None)
444
+ ModelClass = getattr(models, ModelClass)
445
+ pod_source = ModelClass(**pod_source)
446
+ self.data_source = pod_source
447
+
448
+ def build_data_source_from_configuration(self, data_config: Dict[str, Any]) -> DataSource:
449
+ """
450
+ Builds a data source object from a configuration dictionary.
451
+
452
+ Args:
453
+ data_config: The data source configuration.
454
+
455
+ Returns:
456
+ A DataSource object.
457
+ """
458
+ ModelClass = DynamicTableDataSource.get_class(data_config['data_type'])
459
+ pod_source = ModelClass.get(data_config["id"])
460
+ return pod_source
461
+
462
+ def _set_local_persist_manager(self) -> None:
463
+ self._verify_local_data_source()
464
+ self._local_persist_manager = APIPersistManager(storage_hash=self.storage_hash, data_source_id=self.data_source_id)
465
+ metadata = self._local_persist_manager.metadata
466
+
467
+ assert metadata is not None, f"Verify that the table {self.storage_hash} exists "
468
+
469
+
470
+
471
+
472
+ def get_update_statistics(self, asset_symbols: Optional[list] = None) -> Tuple[Optional[datetime.datetime], Optional[Dict[str, datetime.datetime]]]:
473
+ """
474
+ Gets update statistics from the database.
475
+
476
+ Args:
477
+ asset_symbols: An optional list of asset symbols to filter by.
478
+
479
+ Returns:
480
+ A tuple containing the last update time for the table and a dictionary of last update times per asset.
481
+ """
482
+
483
+ return self.local_persist_manager.metadata.sourcetableconfiguration.get_data_updates()
484
+
485
+ def get_earliest_updated_asset_filter(self, unique_identifier_list: list,
486
+ last_update_per_asset: dict) -> datetime.datetime:
487
+ """
488
+ Gets the earliest last update time for a list of unique identifiers.
489
+
490
+ Args:
491
+ unique_identifier_list: A list of unique identifiers.
492
+ last_update_per_asset: A dictionary mapping assets to their last update times.
493
+
494
+ Returns:
495
+ The earliest last update timestamp.
496
+ """
497
+ if unique_identifier_list is not None:
498
+ last_update_in_table = min(
499
+ [t for a in last_update_per_asset.values() for t in a.values() if a in unique_identifier_list])
500
+ else:
501
+ last_update_in_table = min([t for a in last_update_per_asset.values() for t in a.values()])
502
+ return last_update_in_table
503
+
504
+ def update(self, *args, **kwargs) -> pd.DataFrame:
505
+ self.logger.info("Not updating series")
506
+ pass
507
+
508
+
509
+
510
+ class DataNode(DataAccessMixin,ABC):
511
+ """
512
+ Base DataNode class
513
+ """
514
+ OFFSET_START = datetime.datetime(2018, 1, 1, tzinfo=pytz.utc)
515
+ _ARGS_IGNORE_IN_STORAGE_HASH = []
516
+
517
+
518
+ # --- Dunder & Serialization Methods ---
519
+
520
+ def __setstate__(self, state: Dict[str, Any]) -> None:
521
+ # Restore instance attributes (i.e., filename and lineno).
522
+ self.__dict__.update(state)
523
+
524
+ def __getstate__(self) -> Dict[str, Any]:
525
+ # Copy the object's state from self.__dict__ which contains
526
+ # all our instance attributes. Always use the dict.copy()
527
+ # method to avoid modifying the original state.
528
+ state = self._prepare_state_for_pickle(state=self.__dict__)
529
+
530
+ # Remove the unpicklable entries.
531
+ return state
532
+
533
+ def __init__(
534
+ self,
535
+ init_meta: Optional[build_operations.TimeSerieInitMeta] = None,
536
+ build_meta_data: Union[dict, None] = None,
537
+ *args,
538
+ **kwargs):
539
+ """
540
+ Initializes the DataNode object with the provided metadata and configurations. For extension of the method
541
+
542
+ This method sets up the time series object, loading the necessary configurations
543
+ and metadata.
544
+
545
+ Each DataNode instance will create a table in the Main Sequence Data Engine by uniquely hashing
546
+ the arguments with exception of:
547
+
548
+ - init_meta
549
+ - build_meta_data
550
+
551
+ Each DataNode instance will create a update_hash and a LocalTimeSerie instance in the Data Engine by uniquely hashing
552
+ the same arguments as the table but excluding the arguments inside _LOCAL_KWARGS_TO_IGNORE
553
+
554
+
555
+ allowed type of arguments can only be str,list, int or Pydantic objects inlcuding lists of Pydantic Objects.
556
+
557
+ The OFFSET_START property can be overridend and markts the minimum date value where the table will insert data
558
+
559
+ Parameters
560
+ ----------
561
+ init_meta : dict, optional
562
+ Metadata for initializing the time series instance.
563
+ build_meta_data : dict, optional
564
+ Metadata related to the building process of the time series.
565
+ *args : tuple
566
+ Additional arguments.
567
+ **kwargs : dict
568
+ Additional keyword arguments.
569
+ """
570
+
571
+
572
+ self.init_meta = init_meta
573
+
574
+ self.build_meta_data = build_meta_data or {}
575
+ self.build_meta_data.setdefault("initialize_with_default_partitions", True)
576
+
577
+ self.build_meta_data = build_meta_data
578
+
579
+ self.pre_load_routines_run = False
580
+ self._data_source: Optional[DynamicTableDataSource] = None # is set later
581
+ self._local_persist_manager: Optional[PersistManager] = None
582
+
583
+ self._scheduler_tree_connected = False
584
+ self.update_statistics=None
585
+
586
+ def __init_subclass__(cls, **kwargs):
587
+ """
588
+ This special method is called when DataNode is subclassed.
589
+ It automatically wraps the subclass's __init__ method to add post-init routines.
590
+ """
591
+ super().__init_subclass__(**kwargs)
592
+
593
+ # Get the original __init__ from the new subclass
594
+ original_init = cls.__init__
595
+
596
+ @wraps(original_init)
597
+ def wrapped_init(self, *args, **kwargs):
598
+ # 1. Call the original __init__ of the subclass first
599
+ original_init(self, *args, **kwargs)
600
+
601
+ # 2. Capture all arguments from __init__ methods in the MRO up to DataNode
602
+ final_kwargs = {}
603
+ mro = self.__class__.mro()
604
+
605
+ try:
606
+ # We want to inspect from parent to child to ensure subclass arguments override.
607
+ # The MRO is ordered from child to parent, so we find DataNode and reverse the part before it.
608
+ data_node_index = mro.index(DataNode)
609
+ classes_to_inspect = reversed(mro[:data_node_index])
610
+ except ValueError:
611
+ # Fallback if DataNode is not in the MRO.
612
+ classes_to_inspect = [self.__class__]
613
+
614
+ for cls_to_inspect in classes_to_inspect:
615
+ # Only inspect the __init__ defined on the class itself.
616
+ if '__init__' in cls_to_inspect.__dict__:
617
+ sig = inspect.signature(cls_to_inspect.__init__)
618
+ try:
619
+ # Use bind_partial as the full set of args might not match this specific signature.
620
+ bound_args = sig.bind_partial(self, *args, **kwargs)
621
+ bound_args.apply_defaults()
622
+
623
+ current_args = bound_args.arguments
624
+ current_args.pop('self', None)
625
+
626
+ # If the signature has **kwargs, it collects extraneous arguments. Unpack them.
627
+ if 'kwargs' in current_args:
628
+ final_kwargs.update(current_args.pop('kwargs'))
629
+
630
+ # Update the final arguments. Overwrites parent args with child args.
631
+ final_kwargs.update(current_args)
632
+ except TypeError:
633
+ logger.warning(f"Could not bind arguments for {cls_to_inspect.__name__}.__init__; skipping for config.")
634
+ continue
635
+
636
+ # Remove `args` as it collects un-named positional arguments which are not part of the config hash.
637
+ final_kwargs.pop('args', None)
638
+
639
+
640
+ # 3. Run the post-initialization routines
641
+ logger.debug(f"Running post-init routines for {self.__class__.__name__}")
642
+ self._initialize_configuration(init_kwargs=final_kwargs)
643
+
644
+ # 7. Final setup
645
+ self.set_data_source()
646
+ logger.bind(update_hash=self.update_hash)
647
+
648
+ self.run_after_post_init_routines()
649
+
650
+ #requirements for graph update
651
+ self.dependencies_df: Optional[pd.DataFrame] = None
652
+ self.depth_df: Optional[pd.DataFrame] = None
653
+
654
+ self.scheduler : Optional[Scheduler] = None
655
+ self.update_details_tree :Optional[Dict[str,Any]] =None
656
+
657
+ self._patch_build_from_env()
658
+ logger.debug(f"Post-init routines for {self.__class__.__name__} complete.")
659
+
660
+ # Replace the subclass's __init__ with our new wrapped version
661
+ cls.__init__ = wrapped_init
662
+
663
+ def _initialize_configuration(self, init_kwargs: dict) -> None:
664
+ """Creates config from init args and sets them as instance attributes."""
665
+ logger.debug(f"Creating configuration for {self.__class__.__name__}")
666
+
667
+ init_kwargs["time_series_class_import_path"] = {
668
+ "module": self.__class__.__module__,
669
+ "qualname": self.__class__.__qualname__
670
+ }
671
+
672
+ config = build_operations.create_config(
673
+ arguments_to_ignore_from_storage_hash=self._ARGS_IGNORE_IN_STORAGE_HASH,
674
+ kwargs=init_kwargs,
675
+ ts_class_name=self.__class__.__name__
676
+ )
677
+
678
+ for field_name, value in asdict(config).items():
679
+ setattr(self, field_name, value)
680
+
681
+ def _patch_build_from_env(self) -> None:
682
+ """
683
+ Checks for the PATCH_BUILD_CONFIGURATION environment variable and,
684
+ if set, flushes the pickle and patches the build configuration.
685
+ """
686
+ patch_build = os.environ.get("PATCH_BUILD_CONFIGURATION", "false").lower() in ["true", "1"]
687
+ if patch_build:
688
+ self.logger.warning(f"Patching build configuration for {self.storage_hash}")
689
+
690
+ # Ensure dependencies are initialized
691
+ self.local_persist_manager
692
+ self.verify_and_build_remote_objects()
693
+
694
+ pickle_path = self.get_pickle_path_from_time_serie()
695
+ build_operations.flush_pickle(pickle_path=pickle_path)
696
+
697
+ self.local_persist_manager.patch_build_configuration(
698
+ local_configuration=self.local_initial_configuration,
699
+ remote_configuration=self.remote_initial_configuration,
700
+ remote_build_metadata=self.remote_build_metadata,
701
+ )
702
+
703
+
704
+ # --- Core Properties ---
705
+
706
+
707
+ @property
708
+ def is_api(self):
709
+ return False
710
+
711
+
712
+ @property
713
+ def data_source_id(self) -> int:
714
+ return self.data_source.id
715
+
716
+
717
+ @property
718
+ def local_time_serie(self) -> LocalTimeSerie:
719
+ """The local time series metadata object."""
720
+ return self.local_persist_manager.local_metadata
721
+
722
+ @property
723
+ def metadata(self) -> "DynamicTableMetaData":
724
+ return self.local_persist_manager.metadata
725
+
726
+
727
+ @property
728
+ def local_persist_manager(self) -> PersistManager:
729
+ if self._local_persist_manager is None:
730
+ self.logger.debug(f"Setting local persist manager for {self.storage_hash}")
731
+ self._set_local_persist_manager(update_hash=self.update_hash)
732
+ return self._local_persist_manager
733
+
734
+ @property
735
+ def data_source(self) -> Any:
736
+ if self._data_source is not None:
737
+ return self._data_source
738
+ else:
739
+ raise Exception("Data source has not been set")
740
+
741
+ # --- Persistence & Backend Methods ---
742
+
743
+ @tracer.start_as_current_span("TS: set_state_with_sessions")
744
+ def _set_state_with_sessions(self, include_vam_client_objects: bool = True,
745
+ graph_depth_limit: int = 1000,
746
+ graph_depth: int = 0) -> None:
747
+ """
748
+ Sets the state of the DataNode after loading from pickle, including sessions.
749
+
750
+ Args:
751
+ include_vam_client_objects: Whether to include VAM client objects.
752
+ graph_depth_limit: The depth limit for graph traversal.
753
+ graph_depth: The current depth in the graph.
754
+ """
755
+ if graph_depth_limit == -1:
756
+ graph_depth_limit = 1e6
757
+
758
+ minimum_required_depth_for_update = self.get_minimum_required_depth_for_update()
759
+
760
+ state = self.__dict__
761
+
762
+ if graph_depth_limit < minimum_required_depth_for_update and graph_depth == 0:
763
+ graph_depth_limit = minimum_required_depth_for_update
764
+ self.logger.warning(f"Graph depth limit overwritten to {minimum_required_depth_for_update}")
765
+
766
+ # if the data source is not local then the de-serialization needs to happend after setting the local persist manager
767
+ # to guranteed a proper patch in the back-end
768
+ if graph_depth <= graph_depth_limit and self.data_source.related_resource_class_type:
769
+ self._set_local_persist_manager(
770
+ update_hash=self.update_hash,
771
+ local_metadata=None,
772
+ )
773
+
774
+ deserializer = build_operations.DeserializerManager()
775
+ state = deserializer.deserialize_pickle_state(
776
+ state=state,
777
+ data_source_id=self.data_source.id,
778
+ include_vam_client_objects=include_vam_client_objects,
779
+ graph_depth_limit=graph_depth_limit,
780
+ graph_depth=graph_depth + 1
781
+ )
782
+
783
+ self.__dict__.update(state)
784
+
785
+ self.local_persist_manager.synchronize_metadata(local_metadata=None)
786
+
787
+ def _prepare_state_for_pickle(self, state: Dict[str, Any]) -> Dict[str, Any]:
788
+ """
789
+ Prepares the object's state for pickling by serializing and removing unpicklable entries.
790
+
791
+ Args:
792
+ state: The object's __dict__.
793
+
794
+ Returns:
795
+ A pickle-safe dictionary representing the object's state.
796
+ """
797
+ properties = state
798
+ serializer = build_operations.Serializer()
799
+ properties = serializer.serialize_for_pickle(properties)
800
+ names_to_remove = []
801
+ for name, attr in properties.items():
802
+ if name in [
803
+ "local_persist_manager",
804
+ "logger",
805
+ "init_meta",
806
+ "_local_metadata_future",
807
+ "_local_metadata_lock",
808
+ "_local_persist_manager",
809
+ "update_tracker",
810
+ ]:
811
+ names_to_remove.append(name)
812
+ continue
813
+
814
+ try:
815
+ cloudpickle.dumps(attr)
816
+ except Exception as e:
817
+ logger.exception(f"Cant Pickle property {name}")
818
+ raise e
819
+
820
+ for n in names_to_remove:
821
+ properties.pop(n, None)
822
+
823
+ return properties
824
+ def _set_local_persist_manager(self, update_hash: str,
825
+ local_metadata: Union[None, dict] = None,
826
+
827
+ ) -> None:
828
+ """
829
+ Initializes the local persistence manager for the time series. It sets up
830
+ the necessary configurations and checks for existing metadata. If the metadata doesn't
831
+ exist or is incomplete, it sets up the initial configuration and builds the update details.
832
+
833
+ Args:
834
+ update_hash : str
835
+ The local hash ID for the time series.
836
+ storage_hash : str
837
+ The remote table hash name for the time series.
838
+ local_metadata : Union[None, dict], optional
839
+ Local metadata for the time series, if available.
840
+ """
841
+ self._local_persist_manager = PersistManager.get_from_data_type(
842
+ update_hash=update_hash,
843
+ class_name=self.__class__.__name__,
844
+ local_metadata=local_metadata,
845
+ data_source=self.data_source
846
+ )
847
+
848
+
849
+ def set_data_source(self,
850
+ data_source: Optional[object] = None) -> None:
851
+ """
852
+ Sets the data source for the time series.
853
+
854
+ Args:
855
+ data_source: The data source object. If None, the default is fetched from the ORM.
856
+ """
857
+ if data_source is None:
858
+ self._data_source = get_data_source_from_orm()
859
+ else:
860
+ self._data_source = data_source
861
+
862
+ def verify_and_build_remote_objects(self) -> None:
863
+ """
864
+ Verifies and builds remote objects by calling the persistence layer.
865
+ This logic is now correctly located within the BuildManager.
866
+ """
867
+ # Use self.owner to get properties from the DataNode instance
868
+ owner_class = self.__class__
869
+ time_serie_source_code_git_hash = build_operations.get_data_node_source_code_git_hash(owner_class)
870
+ time_serie_source_code = build_operations.get_data_node_source_code(owner_class)
871
+
872
+ # The call to the low-level persist manager is encapsulated here
873
+ self.local_persist_manager.local_persist_exist_set_config(
874
+ storage_hash=self.storage_hash,
875
+ local_configuration=self.local_initial_configuration,
876
+ remote_configuration=self.remote_initial_configuration,
877
+ time_serie_source_code_git_hash=time_serie_source_code_git_hash,
878
+ time_serie_source_code=time_serie_source_code,
879
+ data_source=self.data_source,
880
+ build_configuration_json_schema=self.build_configuration_json_schema,
881
+ )
882
+ def set_relation_tree(self):
883
+
884
+ """Sets the node relationships in the backend by calling the dependencies() method."""
885
+
886
+ if self.local_persist_manager.local_metadata is None:
887
+ self.verify_and_build_remote_objects() #
888
+ if self.local_persist_manager.is_local_relation_tree_set():
889
+ return
890
+ declared_dependencies = self.dependencies() or {}
891
+
892
+ for name, dependency_ts in declared_dependencies.items():
893
+ self.logger.debug(f"Connecting dependency '{name}'...")
894
+
895
+ # Ensure the dependency itself is properly initialized
896
+ is_api = dependency_ts.is_api
897
+ if is_api == False:
898
+ dependency_ts.verify_and_build_remote_objects()
899
+
900
+
901
+ self.local_persist_manager.depends_on_connect(dependency_ts, is_api=is_api)
902
+
903
+ # Recursively set the relation tree for the dependency
904
+ dependency_ts.set_relation_tree()
905
+
906
+ self.local_persist_manager.set_ogm_dependencies_linked()
907
+
908
+
909
+ def set_dependencies_df(self):
910
+ depth_df = self.local_persist_manager.get_all_dependencies_update_priority()
911
+ self.depth_df = depth_df
912
+ if not depth_df.empty:
913
+ self.dependencies_df = depth_df[
914
+ depth_df["local_time_serie_id"] != self.local_time_serie.id].copy()
915
+ else:
916
+ self.dependencies_df = pd.DataFrame()
917
+
918
+ def get_update_statistics(self):
919
+ """
920
+ This method always queries last state
921
+ """
922
+ return self.metadata.sourcetableconfiguration.get_data_updates()
923
+ def _set_update_statistics(self,
924
+ update_statistics: UpdateStatistics) -> UpdateStatistics:
925
+ """
926
+ UpdateStatistics provides the last-ingested positions:
927
+ - For a single-index series (time_index only), `update_statistics.max_time` is either:
928
+ - None: no prior data—fetch all available rows.
929
+ - a datetime: fetch rows where `time_index > max_time`.
930
+ - For a dual-index series (time_index, unique_identifier), `update_statistics.max_time_per_id` is either:
931
+ - None: single-index behavior applies.
932
+ - dict[str, datetime]: for each `unique_identifier` (matching `Asset.unique_identifier`), fetch rows where
933
+ `time_index > max_time_per_id[unique_identifier]`.
934
+
935
+ Default method to narrow down update statistics un local time series,
936
+ the method will filter using asset_list if the attribute exists as well as the init fallback date
937
+ :param update_statistics:
938
+
939
+ :return:
940
+ """
941
+ # Filter update_statistics to include only assets in self.asset_list.
942
+
943
+ asset_list = self.get_asset_list()
944
+ self._setted_asset_list = asset_list
945
+
946
+ update_statistics = update_statistics.update_assets(
947
+ asset_list, init_fallback_date=self.OFFSET_START
948
+ )
949
+
950
+ self.update_statistics = update_statistics
951
+
952
+ # --- Public API ---
953
+
954
+ def run(
955
+ self,
956
+ debug_mode: bool,
957
+ *,
958
+ update_tree: bool = True,
959
+ force_update: bool = False,
960
+ update_only_tree: bool = False,
961
+ remote_scheduler: Union[object, None] = None,
962
+ override_update_stats:Optional[UpdateStatistics] = None
963
+ ):
964
+
965
+ update_runner = run_operations.UpdateRunner(time_serie=self,
966
+ debug_mode=debug_mode,
967
+ force_update=force_update,
968
+ update_tree=update_tree,
969
+ update_only_tree=update_only_tree,
970
+ remote_scheduler=remote_scheduler,
971
+ override_update_stats=override_update_stats
972
+ )
973
+ error_on_last_update, updated_df= update_runner.run()
974
+
975
+ return error_on_last_update,updated_df
976
+
977
+
978
+ # --- Optional Hooks for Customization ---
979
+ def run_after_post_init_routines(self) -> None:
980
+ pass
981
+
982
+ def get_minimum_required_depth_for_update(self) -> int:
983
+ """
984
+ Controls the minimum depth that needs to be rebuilt.
985
+ """
986
+ return 0
987
+
988
+ def get_table_metadata(self,)->Optional[ms_client.TableMetaData]:
989
+ """Provides the metadata configuration for a market time series.
990
+
991
+ """
992
+
993
+
994
+ return None
995
+
996
+ def get_column_metadata(self) -> Optional[List[ColumnMetaData]]:
997
+ """
998
+ This Method should return a list for ColumnMetaData to add extra context to each time series
999
+ Examples:
1000
+ from mainsequence.client.models_tdag import ColumnMetaData
1001
+ columns_metadata = [ColumnMetaData(column_name="instrument",
1002
+ dtype="str",
1003
+ label="Instrument",
1004
+ description=(
1005
+ "Unique identifier provided by Valmer; it’s a composition of the "
1006
+ "columns `tv_emisora_serie`, and is also used as a ticker for custom "
1007
+ "assets in Valmer."
1008
+ )
1009
+ ),
1010
+ ColumnMetaData(column_name="currency",
1011
+ dtype="str",
1012
+ label="Currency",
1013
+ description=(
1014
+ "Corresponds to code for curries be aware this may not match Figi Currency assets"
1015
+ )
1016
+ ),
1017
+
1018
+ ]
1019
+ Returns:
1020
+ A list of ColumnMetaData objects, or None.
1021
+ """
1022
+ return None
1023
+
1024
+ def get_asset_list(self) -> Optional[List["Asset"]]:
1025
+ """
1026
+ Provide the list of assets that this DataNode should include when updating.
1027
+
1028
+ By default, this method returns `self.asset_list` if defined.
1029
+ Subclasses _must_ override this method when no `asset_list` attribute was set
1030
+ during initialization, to supply a dynamic list of assets for update_statistics.
1031
+
1032
+ Use Case:
1033
+ - For category-based series, return all Asset unique_identifiers in a given category
1034
+ (e.g., `AssetCategory(unique_identifier="investable_assets")`), so that only those
1035
+ assets are updated in this DataNode.
1036
+
1037
+ Returns
1038
+ -------
1039
+ list or None
1040
+ - A list of asset unique_identifiers to include in the update.
1041
+ - `None` if no filtering by asset is required (update all assets by default).
1042
+ """
1043
+ if hasattr(self, "asset_list"):
1044
+ return self.asset_list
1045
+
1046
+ return None
1047
+
1048
+ def run_post_update_routines(self, error_on_last_update: bool, ) -> None:
1049
+ """ Should be overwritten by subclass """
1050
+ pass
1051
+
1052
+ @abstractmethod
1053
+ def dependencies(self) -> Dict[str, Union["DataNode", "APIDataNode"]]:
1054
+ """
1055
+ Subclasses must implement this method to explicitly declare their upstream dependencies.
1056
+
1057
+ Returns:
1058
+ A dictionary where keys are descriptive names and values are the DataNode dependency instances.
1059
+ """
1060
+ raise NotImplementedError
1061
+
1062
+ @abstractmethod
1063
+ def update(self) -> pd.DataFrame:
1064
+ """
1065
+ Fetch and ingest only the new rows for this DataNode based on prior update checkpoints.
1066
+
1067
+
1068
+
1069
+ Requirements:
1070
+ - `time_index` **must** be a `datetime.datetime` instance with UTC timezone.
1071
+ - Column names **must** be all lowercase.
1072
+ - No column values may be Python `datetime` objects; if date/time storage is needed, convert to integer
1073
+ timestamps (e.g., UNIX epoch in seconds or milliseconds).
1074
+
1075
+ After retrieving the incremental rows, this method inserts or upserts them into the Main Sequence Data Engine.
1076
+
1077
+ Parameters
1078
+ ----------
1079
+ update_statistics : UpdateStatistics
1080
+ Object capturing the previous update state. Must expose:
1081
+ - `max_time` (datetime | None)
1082
+ - `max_time_per_id` (dict[str, datetime] | None)
1083
+
1084
+ Returns
1085
+ -------
1086
+ pd.DataFrame
1087
+ A DataFrame containing only the newly added or updated records.
1088
+ """
1089
+ raise NotImplementedError
1090
+
1091
+
1092
+
1093
+ class WrapperDataNode(DataNode):
1094
+ """A wrapper class for managing multiple DataNode objects."""
1095
+
1096
+ def __init__(self, translation_table: AssetTranslationTable, *args, **kwargs):
1097
+ """
1098
+ Initialize the WrapperDataNode.
1099
+
1100
+ Args:
1101
+ time_series_dict: Dictionary of DataNode objects.
1102
+ """
1103
+ super().__init__(*args, **kwargs)
1104
+
1105
+ def get_time_serie_from_markets_unique_id(table_identifier: str) -> DataNode:
1106
+ """
1107
+ Returns the appropriate bar time series based on the asset list and source.
1108
+ """
1109
+ from mainsequence.client import DoesNotExist
1110
+ try:
1111
+ metadata = ms_client.DynamicTableMetaData.get(identifier=table_identifier)
1112
+
1113
+ except DoesNotExist as e:
1114
+ raise e
1115
+ api_ts = APIDataNode(
1116
+ data_source_id=metadata.data_source.id,
1117
+ storage_hash=metadata.storage_hash
1118
+ )
1119
+ return api_ts
1120
+
1121
+ translation_table = copy.deepcopy(translation_table)
1122
+
1123
+ self.api_ts_map = {}
1124
+ for rule in translation_table.rules:
1125
+ if rule.markets_time_serie_unique_identifier not in self.api_ts_map:
1126
+ self.api_ts_map[rule.markets_time_serie_unique_identifier] = get_time_serie_from_markets_unique_id(
1127
+ table_identifier=rule.markets_time_serie_unique_identifier)
1128
+
1129
+ self.translation_table = translation_table
1130
+
1131
+ def dependencies(self) -> Dict[str, Union["DataNode", "APIDataNode"]]:
1132
+ return self.api_ts_map
1133
+
1134
+ def get_ranged_data_per_asset(self, range_descriptor: Optional[UniqueIdentifierRangeMap]) -> pd.DataFrame:
1135
+ """
1136
+ Gets data based on a range descriptor.
1137
+
1138
+ Args:
1139
+ range_descriptor: A UniqueIdentifierRangeMap object.
1140
+
1141
+ Returns:
1142
+ A DataFrame with the ranged data.
1143
+ """
1144
+ return self.get_df_between_dates(unique_identifier_range_map=range_descriptor)
1145
+
1146
+ def get_df_between_dates(
1147
+ self,
1148
+ start_date: Optional[datetime.datetime] = None,
1149
+ end_date: Optional[datetime.datetime] = None,
1150
+ unique_identifier_list: Optional[list] = None,
1151
+ great_or_equal: bool = True,
1152
+ less_or_equal: bool = True,
1153
+ unique_identifier_range_map: Optional[UniqueIdentifierRangeMap] = None,
1154
+ ) -> pd.DataFrame:
1155
+ """
1156
+ Retrieves a DataFrame of time series data between specified dates, handling asset translation.
1157
+
1158
+ Args:
1159
+ start_date: The start date of the data range.
1160
+ end_date: The end date of the data range.
1161
+ unique_identifier_list: An optional list of unique identifiers to filter by.
1162
+ great_or_equal: Whether to include the start date.
1163
+ less_or_equal: Whether to include the end date.
1164
+ unique_identifier_range_map: An optional map of ranges for unique identifiers.
1165
+
1166
+ Returns:
1167
+ A pandas DataFrame with the requested data.
1168
+ """
1169
+ if (unique_identifier_list is None) == (unique_identifier_range_map is None):
1170
+ raise ValueError(
1171
+ "Pass **either** unique_identifier_list **or** unique_identifier_range_map, but not both."
1172
+ )
1173
+
1174
+ if unique_identifier_list is not None:
1175
+ wanted_src_uids = set(unique_identifier_list)
1176
+ else: # range‑map path
1177
+ wanted_src_uids = set(unique_identifier_range_map.keys())
1178
+
1179
+ if not wanted_src_uids:
1180
+ return pd.DataFrame()
1181
+
1182
+ # evaluate the rules for each asset
1183
+ from mainsequence.client import Asset
1184
+ assets = Asset.filter(unique_identifier__in=list(wanted_src_uids))
1185
+ #assets that i want to get pricces
1186
+
1187
+ asset_translation_dict = {}
1188
+ for asset in assets:
1189
+ asset_translation_dict[asset.unique_identifier] = self.translation_table.evaluate_asset(asset)
1190
+
1191
+ # we grouped the assets for the same rules together and now query all assets that have the same target
1192
+ translation_df = pd.DataFrame.from_dict(asset_translation_dict, orient="index")
1193
+ try:
1194
+ grouped = translation_df.groupby(
1195
+ ["markets_time_serie_unique_identifier", "exchange_code"],
1196
+ dropna=False
1197
+ )
1198
+ except Exception as e:
1199
+ raise e
1200
+
1201
+ data_df = []
1202
+ for (mkt_ts_id, target_exchange_code), group_df in grouped:
1203
+ # get the correct DataNode instance from our pre-built map
1204
+ api_ts = self.api_ts_map[mkt_ts_id]
1205
+
1206
+ # figure out which assets belong to this group
1207
+ grouped_unique_ids = group_df.index.tolist()
1208
+ source_assets = [
1209
+ a for a in assets
1210
+ if a.unique_identifier in grouped_unique_ids
1211
+ ] # source the ones we want to have
1212
+
1213
+ # get correct target assets based on the share classes
1214
+ asset_ticker_group_ides = [a.asset_ticker_group_id for a in assets]
1215
+ asset_query = dict(
1216
+ asset_ticker_group_id__in=asset_ticker_group_ides
1217
+ )
1218
+ if not pd.isna(target_exchange_code):
1219
+ asset_query["exchange_code"] = target_exchange_code
1220
+
1221
+ target_assets = Asset.filter(**asset_query) #the assets that have the same group
1222
+
1223
+ target_asset_unique_ids = [a.asset_ticker_group_id for a in target_assets]
1224
+ if len(asset_ticker_group_ides) > len(target_asset_unique_ids):
1225
+ raise Exception(f"Not all assets were found in backend for translation table: {set(asset_ticker_group_ides) - set(target_asset_unique_ids)}")
1226
+
1227
+ if len(asset_ticker_group_ides) < len(target_asset_unique_ids):
1228
+ #this will blow the proper selection of assets
1229
+ raise Exception(f"Too many assets were found in backend for translation table: {set(target_asset_unique_ids) - set(asset_ticker_group_ides)}")
1230
+
1231
+ # create the source-target mapping
1232
+ ticker_group_to_uid_map = {}
1233
+ for a in source_assets:
1234
+ if a.asset_ticker_group_id in ticker_group_to_uid_map:
1235
+ raise ValueError(f"Share class {a.asset_ticker_group_id} cannot be duplicated")
1236
+ ticker_group_to_uid_map[a.asset_ticker_group_id] = a.unique_identifier
1237
+
1238
+ source_target_map = {}
1239
+ for a in target_assets:
1240
+ asset_ticker_group_id = a.asset_ticker_group_id
1241
+ source_unique_identifier = ticker_group_to_uid_map[asset_ticker_group_id]
1242
+ source_target_map[source_unique_identifier] = a.unique_identifier
1243
+
1244
+ target_source_map = {v: k for k, v in source_target_map.items()}
1245
+ if unique_identifier_range_map is not None:
1246
+ # create the correct unique identifier range map
1247
+ unique_identifier_range_map_target = {}
1248
+ for a_unique_identifier, asset_range in unique_identifier_range_map.items():
1249
+ if a_unique_identifier not in source_target_map.keys(): continue
1250
+ target_key = source_target_map[a_unique_identifier]
1251
+ unique_identifier_range_map_target[target_key] = asset_range
1252
+
1253
+ if not unique_identifier_range_map_target:
1254
+ self.logger.warning(
1255
+ f"Unique identifier map is empty for group assets {source_assets} and unique_identifier_range_map {unique_identifier_range_map}")
1256
+ continue
1257
+
1258
+ tmp_data = api_ts.get_df_between_dates(
1259
+ unique_identifier_range_map=unique_identifier_range_map_target,
1260
+ start_date=start_date,
1261
+ end_date=end_date,
1262
+ great_or_equal=great_or_equal,
1263
+ less_or_equal=less_or_equal,
1264
+ )
1265
+ else:
1266
+ tmp_data = api_ts.get_df_between_dates(
1267
+ start_date=start_date,
1268
+ end_date=end_date,
1269
+ unique_identifier_list=list(target_source_map.keys()),
1270
+ great_or_equal=great_or_equal,
1271
+ less_or_equal=less_or_equal,
1272
+ )
1273
+
1274
+ if tmp_data.empty:
1275
+ continue
1276
+
1277
+ tmp_data = tmp_data.rename(index=target_source_map, level="unique_identifier")
1278
+ data_df.append(tmp_data)
1279
+
1280
+ if not data_df:
1281
+ return pd.DataFrame()
1282
+
1283
+ data_df = pd.concat(data_df, axis=0)
1284
+ return data_df
1285
+
1286
+ def update(self, update_statistics):
1287
+ """ WrapperTimeSeries does not update """
1288
+ pass
1289
+
1290
+
1291
+ build_operations.serialize_argument.register(DataNode, build_operations._serialize_timeserie)
1292
+ build_operations.serialize_argument.register(APIDataNode, build_operations._serialize_api_timeserie)