mainsequence 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mainsequence/__init__.py +0 -0
- mainsequence/__main__.py +9 -0
- mainsequence/cli/__init__.py +1 -0
- mainsequence/cli/api.py +157 -0
- mainsequence/cli/cli.py +442 -0
- mainsequence/cli/config.py +78 -0
- mainsequence/cli/ssh_utils.py +126 -0
- mainsequence/client/__init__.py +17 -0
- mainsequence/client/base.py +431 -0
- mainsequence/client/data_sources_interfaces/__init__.py +0 -0
- mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
- mainsequence/client/data_sources_interfaces/timescale.py +479 -0
- mainsequence/client/models_helpers.py +113 -0
- mainsequence/client/models_report_studio.py +412 -0
- mainsequence/client/models_tdag.py +2276 -0
- mainsequence/client/models_vam.py +1983 -0
- mainsequence/client/utils.py +387 -0
- mainsequence/dashboards/__init__.py +0 -0
- mainsequence/dashboards/streamlit/__init__.py +0 -0
- mainsequence/dashboards/streamlit/assets/config.toml +12 -0
- mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
- mainsequence/dashboards/streamlit/assets/logo.png +0 -0
- mainsequence/dashboards/streamlit/core/__init__.py +0 -0
- mainsequence/dashboards/streamlit/core/theme.py +212 -0
- mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
- mainsequence/dashboards/streamlit/scaffold.py +220 -0
- mainsequence/instrumentation/__init__.py +7 -0
- mainsequence/instrumentation/utils.py +101 -0
- mainsequence/instruments/__init__.py +1 -0
- mainsequence/instruments/data_interface/__init__.py +10 -0
- mainsequence/instruments/data_interface/data_interface.py +361 -0
- mainsequence/instruments/instruments/__init__.py +3 -0
- mainsequence/instruments/instruments/base_instrument.py +85 -0
- mainsequence/instruments/instruments/bond.py +447 -0
- mainsequence/instruments/instruments/european_option.py +74 -0
- mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
- mainsequence/instruments/instruments/json_codec.py +585 -0
- mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
- mainsequence/instruments/instruments/position.py +475 -0
- mainsequence/instruments/instruments/ql_fields.py +239 -0
- mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
- mainsequence/instruments/pricing_models/__init__.py +0 -0
- mainsequence/instruments/pricing_models/black_scholes.py +49 -0
- mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
- mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
- mainsequence/instruments/pricing_models/indices.py +350 -0
- mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
- mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
- mainsequence/instruments/settings.py +175 -0
- mainsequence/instruments/utils.py +29 -0
- mainsequence/logconf.py +284 -0
- mainsequence/reportbuilder/__init__.py +0 -0
- mainsequence/reportbuilder/__main__.py +0 -0
- mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
- mainsequence/reportbuilder/model.py +713 -0
- mainsequence/reportbuilder/slide_templates.py +532 -0
- mainsequence/tdag/__init__.py +8 -0
- mainsequence/tdag/__main__.py +0 -0
- mainsequence/tdag/config.py +129 -0
- mainsequence/tdag/data_nodes/__init__.py +12 -0
- mainsequence/tdag/data_nodes/build_operations.py +751 -0
- mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
- mainsequence/tdag/data_nodes/persist_managers.py +812 -0
- mainsequence/tdag/data_nodes/run_operations.py +543 -0
- mainsequence/tdag/data_nodes/utils.py +24 -0
- mainsequence/tdag/future_registry.py +25 -0
- mainsequence/tdag/utils.py +40 -0
- mainsequence/virtualfundbuilder/__init__.py +45 -0
- mainsequence/virtualfundbuilder/__main__.py +235 -0
- mainsequence/virtualfundbuilder/agent_interface.py +77 -0
- mainsequence/virtualfundbuilder/config_handling.py +86 -0
- mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
- mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
- mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
- mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
- mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
- mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
- mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
- mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
- mainsequence/virtualfundbuilder/data_nodes.py +637 -0
- mainsequence/virtualfundbuilder/enums.py +23 -0
- mainsequence/virtualfundbuilder/models.py +282 -0
- mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
- mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
- mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
- mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
- mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
- mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
- mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
- mainsequence/virtualfundbuilder/utils.py +381 -0
- mainsequence-2.0.0.dist-info/METADATA +105 -0
- mainsequence-2.0.0.dist-info/RECORD +110 -0
- mainsequence-2.0.0.dist-info/WHEEL +5 -0
- mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
- mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1292 @@
|
|
1
|
+
import datetime
|
2
|
+
import os
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from typing import Dict, Any, List, Optional, Tuple, Callable
|
8
|
+
import json
|
9
|
+
import time
|
10
|
+
import traceback
|
11
|
+
import pytz
|
12
|
+
import inspect
|
13
|
+
import logging
|
14
|
+
import copy
|
15
|
+
import cloudpickle
|
16
|
+
from dataclasses import asdict
|
17
|
+
from mainsequence.client import Scheduler
|
18
|
+
from mainsequence.instrumentation import tracer
|
19
|
+
from mainsequence.tdag.config import (
|
20
|
+
ogm
|
21
|
+
)
|
22
|
+
import tempfile
|
23
|
+
import structlog.contextvars as cvars
|
24
|
+
from structlog.stdlib import BoundLogger
|
25
|
+
|
26
|
+
from mainsequence.logconf import logger
|
27
|
+
|
28
|
+
from mainsequence.tdag.data_nodes.persist_managers import PersistManager, APIPersistManager
|
29
|
+
from mainsequence.client.models_tdag import (DataSource,
|
30
|
+
UpdateStatistics, UniqueIdentifierRangeMap, ColumnMetaData, )
|
31
|
+
|
32
|
+
|
33
|
+
from abc import ABC
|
34
|
+
|
35
|
+
from typing import Union
|
36
|
+
|
37
|
+
from mainsequence.client import LocalTimeSerie, CONSTANTS, \
|
38
|
+
DynamicTableDataSource, AssetTranslationTable
|
39
|
+
|
40
|
+
from functools import wraps
|
41
|
+
|
42
|
+
import mainsequence.client as ms_client
|
43
|
+
import mainsequence.tdag.data_nodes.run_operations as run_operations
|
44
|
+
import mainsequence.tdag.data_nodes.build_operations as build_operations
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
def get_data_source_from_orm() -> Any:
|
50
|
+
from mainsequence.client import SessionDataSource
|
51
|
+
if SessionDataSource.data_source.related_resource is None:
|
52
|
+
raise Exception("This Pod does not have a default data source")
|
53
|
+
return SessionDataSource.data_source
|
54
|
+
|
55
|
+
def get_latest_update_by_assets_filter(asset_symbols: Optional[list], last_update_per_asset: dict) -> datetime.datetime:
|
56
|
+
"""
|
57
|
+
Gets the latest update timestamp for a list of asset symbols.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
asset_symbols: A list of asset symbols.
|
61
|
+
last_update_per_asset: A dictionary mapping assets to their last update time.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
The latest update timestamp.
|
65
|
+
"""
|
66
|
+
if asset_symbols is not None:
|
67
|
+
last_update_in_table = np.max([timestamp for unique_identifier, timestamp in last_update_per_asset.items()
|
68
|
+
if unique_identifier in asset_symbols
|
69
|
+
])
|
70
|
+
else:
|
71
|
+
last_update_in_table = np.max(last_update_per_asset.values)
|
72
|
+
return last_update_in_table
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
def last_update_per_unique_identifier(unique_identifier_list: Optional[list],
|
77
|
+
last_update_per_asset: dict) -> datetime.datetime:
|
78
|
+
"""
|
79
|
+
Gets the earliest last update time for a list of unique identifiers.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
unique_identifier_list: A list of unique identifiers.
|
83
|
+
last_update_per_asset: A dictionary mapping assets to their last update times.
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
The earliest last update timestamp.
|
87
|
+
"""
|
88
|
+
if unique_identifier_list is not None:
|
89
|
+
last_update_in_table = min(
|
90
|
+
[t for a in last_update_per_asset.values() for t in a.values() if a in unique_identifier_list])
|
91
|
+
else:
|
92
|
+
last_update_in_table = min([t for a in last_update_per_asset.values() for t in a.values()])
|
93
|
+
return last_update_in_table
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
class DependencyUpdateError(Exception):
|
100
|
+
pass
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
class DataAccessMixin:
|
105
|
+
"""A mixin for classes that provide access to time series data."""
|
106
|
+
|
107
|
+
def __repr__(self) -> str:
|
108
|
+
try:
|
109
|
+
local_id = self.local_time_serie.id
|
110
|
+
except:
|
111
|
+
local_id = 0
|
112
|
+
repr = self.__class__.__name__ + f" {os.environ['TDAG_ENDPOINT']}/local-time-series/details/?local_time_serie_id={local_id}"
|
113
|
+
return repr
|
114
|
+
|
115
|
+
def get_last_observation(self,asset_list:List[ms_client.AssetMixin]):
|
116
|
+
update_statistics = self.get_update_statistics()
|
117
|
+
update_statistics = update_statistics.update_assets(asset_list=asset_list)
|
118
|
+
update_range_map = update_statistics.get_update_range_map_great_or_equal()
|
119
|
+
last_observation = self.get_ranged_data_per_asset(update_range_map)
|
120
|
+
return last_observation
|
121
|
+
|
122
|
+
def get_pickle_path_from_time_serie(self) -> str:
|
123
|
+
path = build_operations.get_pickle_path(update_hash=self.update_hash,
|
124
|
+
data_source_id=self.data_source_id,
|
125
|
+
is_api=self.is_api
|
126
|
+
)
|
127
|
+
return path
|
128
|
+
|
129
|
+
def persist_to_pickle(self, overwrite: bool = False) -> Tuple[str, str]:
|
130
|
+
"""
|
131
|
+
Persists the DataNode object to a pickle file using an atomic write.
|
132
|
+
|
133
|
+
Uses a single method to determine the pickle path and dispatches to
|
134
|
+
type-specific logic only where necessary.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
overwrite: If True, overwrites any existing pickle file.
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
A tuple containing the full path and the relative path of the pickle file.
|
141
|
+
"""
|
142
|
+
# 1. Common Logic: Determine the pickle path for both types
|
143
|
+
path = self.get_pickle_path_from_time_serie()
|
144
|
+
|
145
|
+
# 2. Type-Specific Logic: Run pre-dump actions only for standard DataNode
|
146
|
+
if not self.is_api:
|
147
|
+
self.logger.debug(f"Patching source code and git hash for {self.storage_hash}")
|
148
|
+
self.local_persist_manager.update_git_and_code_in_backend(time_serie_class=self.__class__)
|
149
|
+
# Prepare for pickling by removing the unpicklable ThreadLock
|
150
|
+
self._local_persist_manager = None
|
151
|
+
|
152
|
+
# 3. Common Logic: Persist the data source if needed
|
153
|
+
data_source_id = getattr(self.data_source, 'id', self.data_source_id)
|
154
|
+
data_source_path = build_operations.data_source_pickle_path(data_source_id)
|
155
|
+
if not os.path.isfile(data_source_path) or overwrite:
|
156
|
+
self.data_source.persist_to_pickle(data_source_path)
|
157
|
+
|
158
|
+
# 4. Common Logic: Atomically write the main pickle file
|
159
|
+
if os.path.isfile(path) and not overwrite:
|
160
|
+
self.logger.debug(f"Pickle file already exists at {path}. Skipping.")
|
161
|
+
else:
|
162
|
+
if overwrite:
|
163
|
+
self.logger.warning(f"Overwriting pickle file at {path}")
|
164
|
+
self._atomic_pickle_dump(path)
|
165
|
+
|
166
|
+
# 5. Common Logic: Return the full and relative paths
|
167
|
+
return path, path.replace(ogm.pickle_storage_path + "/", "")
|
168
|
+
|
169
|
+
def _atomic_pickle_dump(self, path: str) -> None:
|
170
|
+
"""
|
171
|
+
Private helper to atomically dump the object to a pickle file.
|
172
|
+
This prevents file corruption if the process is interrupted.
|
173
|
+
"""
|
174
|
+
dir_, fname = os.path.split(path)
|
175
|
+
# Ensure the target directory exists
|
176
|
+
os.makedirs(dir_, exist_ok=True)
|
177
|
+
|
178
|
+
fd, tmp_path = tempfile.mkstemp(prefix=f"{fname}~", dir=dir_)
|
179
|
+
os.close(fd)
|
180
|
+
try:
|
181
|
+
with open(tmp_path, 'wb') as handle:
|
182
|
+
cloudpickle.dump(self, handle)
|
183
|
+
# Atomic replace is safer than a direct write
|
184
|
+
os.replace(tmp_path, path)
|
185
|
+
self.logger.debug(f"Successfully persisted pickle to {path}")
|
186
|
+
except Exception:
|
187
|
+
# Clean up the temporary file on error to avoid clutter
|
188
|
+
try:
|
189
|
+
os.remove(tmp_path)
|
190
|
+
except OSError:
|
191
|
+
pass
|
192
|
+
raise
|
193
|
+
|
194
|
+
|
195
|
+
def get_logger_context_variables(self) -> Dict[str, Any]:
|
196
|
+
return dict(update_hash=self.update_hash,
|
197
|
+
local_hash_id_data_source=self.data_source_id,
|
198
|
+
api_time_series=self.__class__.__name__ == "APIDataNode")
|
199
|
+
|
200
|
+
@property
|
201
|
+
def logger(self) -> logging.Logger:
|
202
|
+
"""Gets a logger instance with bound context variables."""
|
203
|
+
# import structlog.contextvars as cvars
|
204
|
+
# cvars.bind_contextvars(update_hash=self.update_hash,
|
205
|
+
# update_hash=self.data_source_id,
|
206
|
+
# api_time_series=True,)
|
207
|
+
global logger
|
208
|
+
if hasattr(self, "_logger") == False:
|
209
|
+
cvars.bind_contextvars(**self.get_logger_context_variables() )
|
210
|
+
self._logger = logger
|
211
|
+
|
212
|
+
return self._logger
|
213
|
+
@staticmethod
|
214
|
+
def set_context_in_logger(logger_context: Dict[str, Any]) -> None:
|
215
|
+
"""
|
216
|
+
Binds context variables to the global logger.
|
217
|
+
|
218
|
+
Args:
|
219
|
+
logger_context: A dictionary of context variables.
|
220
|
+
"""
|
221
|
+
global logger
|
222
|
+
for key, value in logger_context.items():
|
223
|
+
logger.bind(**dict(key=value))
|
224
|
+
|
225
|
+
def unbind_context_variables_from_logger(self) -> None:
|
226
|
+
cvars.unbind_contextvars(*self.get_logger_context_variables().keys())
|
227
|
+
|
228
|
+
def get_df_between_dates(
|
229
|
+
self,
|
230
|
+
start_date: Optional[datetime.datetime] = None,
|
231
|
+
end_date: Optional[datetime.datetime] = None,
|
232
|
+
unique_identifier_list: Optional[list] = None,
|
233
|
+
great_or_equal: bool = True,
|
234
|
+
less_or_equal: bool = True,
|
235
|
+
unique_identifier_range_map: Optional[UniqueIdentifierRangeMap] = None,
|
236
|
+
columns:Optional[List[str]] = None
|
237
|
+
) -> pd.DataFrame:
|
238
|
+
"""
|
239
|
+
Retrieve rows from this DataNode whose `time_index` (and optional `unique_identifier`) fall within the specified date ranges.
|
240
|
+
|
241
|
+
**Note:** If `unique_identifier_range_map` is provided, **all** other filters
|
242
|
+
(`start_date`, `end_date`, `unique_identifier_list`, `great_or_equal`, `less_or_equal`)
|
243
|
+
are ignored, and only the per-identifier ranges in `unique_identifier_range_map` apply.
|
244
|
+
|
245
|
+
Filtering logic (when `unique_identifier_range_map` is None):
|
246
|
+
- If `start_date` is provided, include rows where
|
247
|
+
`time_index > start_date` (if `great_or_equal=False`)
|
248
|
+
or `time_index >= start_date` (if `great_or_equal=True`).
|
249
|
+
- If `end_date` is provided, include rows where
|
250
|
+
`time_index < end_date` (if `less_or_equal=False`)
|
251
|
+
or `time_index <= end_date` (if `less_or_equal=True`).
|
252
|
+
- If `unique_identifier_list` is provided, only include rows whose
|
253
|
+
`unique_identifier` is in that list.
|
254
|
+
|
255
|
+
Filtering logic (when `unique_identifier_range_map` is provided):
|
256
|
+
- For each `unique_identifier`, apply its own `start_date`/`end_date`
|
257
|
+
filters using the specified operands (`">"`, `">="`, `"<"`, `"<="`):
|
258
|
+
{
|
259
|
+
<uid>: {
|
260
|
+
"start_date": datetime,
|
261
|
+
"start_date_operand": ">=" or ">",
|
262
|
+
"end_date": datetime,
|
263
|
+
"end_date_operand": "<=" or "<"
|
264
|
+
},
|
265
|
+
...
|
266
|
+
}
|
267
|
+
|
268
|
+
Parameters
|
269
|
+
----------
|
270
|
+
start_date : datetime.datetime or None
|
271
|
+
Global lower bound for `time_index`. Ignored if `unique_identifier_range_map` is provided.
|
272
|
+
end_date : datetime.datetime or None
|
273
|
+
Global upper bound for `time_index`. Ignored if `unique_identifier_range_map` is provided.
|
274
|
+
unique_identifier_list : list or None
|
275
|
+
If provided, only include rows matching these IDs. Ignored if `unique_identifier_range_map` is provided.
|
276
|
+
great_or_equal : bool, default True
|
277
|
+
If True, use `>=` when filtering by `start_date`; otherwise use `>`. Ignored if `unique_identifier_range_map` is provided.
|
278
|
+
less_or_equal : bool, default True
|
279
|
+
If True, use `<=` when filtering by `end_date`; otherwise use `<`. Ignored if `unique_identifier_range_map` is provided.
|
280
|
+
unique_identifier_range_map : UniqueIdentifierRangeMap or None
|
281
|
+
Mapping of specific `unique_identifier` keys to their own sub-filters. When provided, this is the sole filter applied.
|
282
|
+
|
283
|
+
Returns
|
284
|
+
-------
|
285
|
+
pd.DataFrame
|
286
|
+
A DataFrame containing rows that satisfy the combined time and identifier filters.
|
287
|
+
"""
|
288
|
+
return self.local_persist_manager.get_df_between_dates(
|
289
|
+
start_date=start_date,
|
290
|
+
end_date=end_date,
|
291
|
+
unique_identifier_list=unique_identifier_list,
|
292
|
+
great_or_equal=great_or_equal,
|
293
|
+
less_or_equal=less_or_equal,
|
294
|
+
unique_identifier_range_map=unique_identifier_range_map,
|
295
|
+
columns=columns,
|
296
|
+
)
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
|
302
|
+
def get_ranged_data_per_asset(self, range_descriptor: Optional[UniqueIdentifierRangeMap],
|
303
|
+
columns=None,
|
304
|
+
) -> pd.DataFrame:
|
305
|
+
"""
|
306
|
+
Gets data based on a range descriptor.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
range_descriptor: A UniqueIdentifierRangeMap object.
|
310
|
+
|
311
|
+
Returns:
|
312
|
+
A DataFrame with the ranged data.
|
313
|
+
"""
|
314
|
+
return self.get_df_between_dates(unique_identifier_range_map=range_descriptor,
|
315
|
+
columns=columns,
|
316
|
+
)
|
317
|
+
def get_ranged_data_per_asset_great_or_equal(self, range_descriptor: Optional[UniqueIdentifierRangeMap],
|
318
|
+
columns=None,
|
319
|
+
) -> pd.DataFrame:
|
320
|
+
"""
|
321
|
+
Gets data based on a range descriptor.
|
322
|
+
|
323
|
+
Args:
|
324
|
+
range_descriptor: A UniqueIdentifierRangeMap object.
|
325
|
+
|
326
|
+
Returns:
|
327
|
+
A DataFrame with the ranged data.
|
328
|
+
"""
|
329
|
+
|
330
|
+
for k,v in range_descriptor.items():
|
331
|
+
v["start_date_operand"]="=>"
|
332
|
+
return self.get_df_between_dates(unique_identifier_range_map=range_descriptor,
|
333
|
+
columns=columns,
|
334
|
+
)
|
335
|
+
|
336
|
+
def filter_by_assets_ranges(self, asset_ranges_map: dict) -> pd.DataFrame:
|
337
|
+
"""
|
338
|
+
Filters data by asset ranges.
|
339
|
+
|
340
|
+
Args:
|
341
|
+
asset_ranges_map: A dictionary mapping assets to their date ranges.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
A DataFrame with the filtered data.
|
345
|
+
"""
|
346
|
+
return self.local_persist_manager.filter_by_assets_ranges(asset_ranges_map)
|
347
|
+
|
348
|
+
|
349
|
+
class APIDataNode(DataAccessMixin):
|
350
|
+
|
351
|
+
|
352
|
+
@classmethod
|
353
|
+
def build_from_local_time_serie(cls, source_table: "LocalTimeSerie") -> "APIDataNode":
|
354
|
+
return cls(data_source_id=source_table.data_source.id,
|
355
|
+
storage_hash=source_table.storage_hash
|
356
|
+
)
|
357
|
+
|
358
|
+
@classmethod
|
359
|
+
def build_from_table_id(cls, table_id: str) -> "APIDataNode":
|
360
|
+
table = ms_client.DynamicTableMetaData.get(id=table_id)
|
361
|
+
ts = cls(
|
362
|
+
data_source_id=table.data_source.id,
|
363
|
+
storage_hash=table.storage_hash
|
364
|
+
)
|
365
|
+
return ts
|
366
|
+
|
367
|
+
@classmethod
|
368
|
+
def build_from_identifier(cls, identifier: str) -> "APIDataNode":
|
369
|
+
|
370
|
+
table = ms_client.DynamicTableMetaData.get(identifier=identifier)
|
371
|
+
ts = cls(
|
372
|
+
data_source_id=table.data_source.id,
|
373
|
+
storage_hash=table.storage_hash
|
374
|
+
)
|
375
|
+
return ts
|
376
|
+
|
377
|
+
def __init__(self,
|
378
|
+
data_source_id: int, storage_hash: str,
|
379
|
+
data_source_local_lake: Optional[DataSource] = None):
|
380
|
+
"""
|
381
|
+
Initializes an APIDataNode.
|
382
|
+
|
383
|
+
Args:
|
384
|
+
data_source_id: The ID of the data source.
|
385
|
+
update_hash: The local hash ID of the time series.
|
386
|
+
data_source_local_lake: Optional local data source for the lake.
|
387
|
+
"""
|
388
|
+
if data_source_local_lake is not None:
|
389
|
+
assert data_source_local_lake.data_type in CONSTANTS.DATA_SOURCE_TYPE_LOCAL_DISK_LAKE, "data_source_local_lake should be of type CONSTANTS.DATA_SOURCE_TYPE_LOCAL_DISK_LAKE"
|
390
|
+
|
391
|
+
assert isinstance(data_source_id, int)
|
392
|
+
self.data_source_id = data_source_id
|
393
|
+
self.storage_hash = storage_hash
|
394
|
+
self.data_source = data_source_local_lake
|
395
|
+
self._local_persist_manager: APIPersistManager = None
|
396
|
+
self.update_statistics = None
|
397
|
+
|
398
|
+
def __repr__(self) -> str:
|
399
|
+
|
400
|
+
|
401
|
+
repr = self.__class__.__name__ + f" {os.environ['TDAG_ENDPOINT']}/dynamic-table-metadatas/details/?dynamic_table_id={self.data_source_id}"
|
402
|
+
return repr
|
403
|
+
|
404
|
+
@property
|
405
|
+
def is_api(self):
|
406
|
+
return True
|
407
|
+
|
408
|
+
@staticmethod
|
409
|
+
def _get_update_hash(storage_hash):
|
410
|
+
return "API_"+f"{storage_hash}"
|
411
|
+
@property
|
412
|
+
def update_hash(self):
|
413
|
+
return self._get_update_hash(storage_hash=self.storage_hash)
|
414
|
+
|
415
|
+
def __getstate__(self) -> Dict[str, Any]:
|
416
|
+
"""Prepares the state for pickling."""
|
417
|
+
state = self.__dict__.copy()
|
418
|
+
# Remove unpicklable/transient state specific to APIDataNode
|
419
|
+
names_to_remove = [
|
420
|
+
"_local_persist_manager", # APIPersistManager instance
|
421
|
+
]
|
422
|
+
cleaned_state = {k: v for k, v in state.items() if k not in names_to_remove}
|
423
|
+
return cleaned_state
|
424
|
+
|
425
|
+
@property
|
426
|
+
def local_persist_manager(self) -> Any:
|
427
|
+
"""Gets the local persistence manager, initializing it if necessary."""
|
428
|
+
if self._local_persist_manager is None:
|
429
|
+
self._set_local_persist_manager()
|
430
|
+
self.logger.debug(f"Setting local persist manager for {self.storage_hash}")
|
431
|
+
return self._local_persist_manager
|
432
|
+
|
433
|
+
def set_relation_tree(self) -> None:
|
434
|
+
pass # do nothing for API Time Series
|
435
|
+
|
436
|
+
def _verify_local_data_source(self) -> None:
|
437
|
+
"""Verifies and sets the local data source from environment variables if available."""
|
438
|
+
pod_source = os.environ.get("POD_DEFAULT_DATA_SOURCE", None)
|
439
|
+
if pod_source != None:
|
440
|
+
from mainsequence.client import models as models
|
441
|
+
pod_source = json.loads(pod_source)
|
442
|
+
ModelClass = pod_source["tdag_orm_class"]
|
443
|
+
pod_source.pop("tdag_orm_class", None)
|
444
|
+
ModelClass = getattr(models, ModelClass)
|
445
|
+
pod_source = ModelClass(**pod_source)
|
446
|
+
self.data_source = pod_source
|
447
|
+
|
448
|
+
def build_data_source_from_configuration(self, data_config: Dict[str, Any]) -> DataSource:
|
449
|
+
"""
|
450
|
+
Builds a data source object from a configuration dictionary.
|
451
|
+
|
452
|
+
Args:
|
453
|
+
data_config: The data source configuration.
|
454
|
+
|
455
|
+
Returns:
|
456
|
+
A DataSource object.
|
457
|
+
"""
|
458
|
+
ModelClass = DynamicTableDataSource.get_class(data_config['data_type'])
|
459
|
+
pod_source = ModelClass.get(data_config["id"])
|
460
|
+
return pod_source
|
461
|
+
|
462
|
+
def _set_local_persist_manager(self) -> None:
|
463
|
+
self._verify_local_data_source()
|
464
|
+
self._local_persist_manager = APIPersistManager(storage_hash=self.storage_hash, data_source_id=self.data_source_id)
|
465
|
+
metadata = self._local_persist_manager.metadata
|
466
|
+
|
467
|
+
assert metadata is not None, f"Verify that the table {self.storage_hash} exists "
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
|
472
|
+
def get_update_statistics(self, asset_symbols: Optional[list] = None) -> Tuple[Optional[datetime.datetime], Optional[Dict[str, datetime.datetime]]]:
|
473
|
+
"""
|
474
|
+
Gets update statistics from the database.
|
475
|
+
|
476
|
+
Args:
|
477
|
+
asset_symbols: An optional list of asset symbols to filter by.
|
478
|
+
|
479
|
+
Returns:
|
480
|
+
A tuple containing the last update time for the table and a dictionary of last update times per asset.
|
481
|
+
"""
|
482
|
+
|
483
|
+
return self.local_persist_manager.metadata.sourcetableconfiguration.get_data_updates()
|
484
|
+
|
485
|
+
def get_earliest_updated_asset_filter(self, unique_identifier_list: list,
|
486
|
+
last_update_per_asset: dict) -> datetime.datetime:
|
487
|
+
"""
|
488
|
+
Gets the earliest last update time for a list of unique identifiers.
|
489
|
+
|
490
|
+
Args:
|
491
|
+
unique_identifier_list: A list of unique identifiers.
|
492
|
+
last_update_per_asset: A dictionary mapping assets to their last update times.
|
493
|
+
|
494
|
+
Returns:
|
495
|
+
The earliest last update timestamp.
|
496
|
+
"""
|
497
|
+
if unique_identifier_list is not None:
|
498
|
+
last_update_in_table = min(
|
499
|
+
[t for a in last_update_per_asset.values() for t in a.values() if a in unique_identifier_list])
|
500
|
+
else:
|
501
|
+
last_update_in_table = min([t for a in last_update_per_asset.values() for t in a.values()])
|
502
|
+
return last_update_in_table
|
503
|
+
|
504
|
+
def update(self, *args, **kwargs) -> pd.DataFrame:
|
505
|
+
self.logger.info("Not updating series")
|
506
|
+
pass
|
507
|
+
|
508
|
+
|
509
|
+
|
510
|
+
class DataNode(DataAccessMixin,ABC):
|
511
|
+
"""
|
512
|
+
Base DataNode class
|
513
|
+
"""
|
514
|
+
OFFSET_START = datetime.datetime(2018, 1, 1, tzinfo=pytz.utc)
|
515
|
+
_ARGS_IGNORE_IN_STORAGE_HASH = []
|
516
|
+
|
517
|
+
|
518
|
+
# --- Dunder & Serialization Methods ---
|
519
|
+
|
520
|
+
def __setstate__(self, state: Dict[str, Any]) -> None:
|
521
|
+
# Restore instance attributes (i.e., filename and lineno).
|
522
|
+
self.__dict__.update(state)
|
523
|
+
|
524
|
+
def __getstate__(self) -> Dict[str, Any]:
|
525
|
+
# Copy the object's state from self.__dict__ which contains
|
526
|
+
# all our instance attributes. Always use the dict.copy()
|
527
|
+
# method to avoid modifying the original state.
|
528
|
+
state = self._prepare_state_for_pickle(state=self.__dict__)
|
529
|
+
|
530
|
+
# Remove the unpicklable entries.
|
531
|
+
return state
|
532
|
+
|
533
|
+
def __init__(
|
534
|
+
self,
|
535
|
+
init_meta: Optional[build_operations.TimeSerieInitMeta] = None,
|
536
|
+
build_meta_data: Union[dict, None] = None,
|
537
|
+
*args,
|
538
|
+
**kwargs):
|
539
|
+
"""
|
540
|
+
Initializes the DataNode object with the provided metadata and configurations. For extension of the method
|
541
|
+
|
542
|
+
This method sets up the time series object, loading the necessary configurations
|
543
|
+
and metadata.
|
544
|
+
|
545
|
+
Each DataNode instance will create a table in the Main Sequence Data Engine by uniquely hashing
|
546
|
+
the arguments with exception of:
|
547
|
+
|
548
|
+
- init_meta
|
549
|
+
- build_meta_data
|
550
|
+
|
551
|
+
Each DataNode instance will create a update_hash and a LocalTimeSerie instance in the Data Engine by uniquely hashing
|
552
|
+
the same arguments as the table but excluding the arguments inside _LOCAL_KWARGS_TO_IGNORE
|
553
|
+
|
554
|
+
|
555
|
+
allowed type of arguments can only be str,list, int or Pydantic objects inlcuding lists of Pydantic Objects.
|
556
|
+
|
557
|
+
The OFFSET_START property can be overridend and markts the minimum date value where the table will insert data
|
558
|
+
|
559
|
+
Parameters
|
560
|
+
----------
|
561
|
+
init_meta : dict, optional
|
562
|
+
Metadata for initializing the time series instance.
|
563
|
+
build_meta_data : dict, optional
|
564
|
+
Metadata related to the building process of the time series.
|
565
|
+
*args : tuple
|
566
|
+
Additional arguments.
|
567
|
+
**kwargs : dict
|
568
|
+
Additional keyword arguments.
|
569
|
+
"""
|
570
|
+
|
571
|
+
|
572
|
+
self.init_meta = init_meta
|
573
|
+
|
574
|
+
self.build_meta_data = build_meta_data or {}
|
575
|
+
self.build_meta_data.setdefault("initialize_with_default_partitions", True)
|
576
|
+
|
577
|
+
self.build_meta_data = build_meta_data
|
578
|
+
|
579
|
+
self.pre_load_routines_run = False
|
580
|
+
self._data_source: Optional[DynamicTableDataSource] = None # is set later
|
581
|
+
self._local_persist_manager: Optional[PersistManager] = None
|
582
|
+
|
583
|
+
self._scheduler_tree_connected = False
|
584
|
+
self.update_statistics=None
|
585
|
+
|
586
|
+
def __init_subclass__(cls, **kwargs):
|
587
|
+
"""
|
588
|
+
This special method is called when DataNode is subclassed.
|
589
|
+
It automatically wraps the subclass's __init__ method to add post-init routines.
|
590
|
+
"""
|
591
|
+
super().__init_subclass__(**kwargs)
|
592
|
+
|
593
|
+
# Get the original __init__ from the new subclass
|
594
|
+
original_init = cls.__init__
|
595
|
+
|
596
|
+
@wraps(original_init)
|
597
|
+
def wrapped_init(self, *args, **kwargs):
|
598
|
+
# 1. Call the original __init__ of the subclass first
|
599
|
+
original_init(self, *args, **kwargs)
|
600
|
+
|
601
|
+
# 2. Capture all arguments from __init__ methods in the MRO up to DataNode
|
602
|
+
final_kwargs = {}
|
603
|
+
mro = self.__class__.mro()
|
604
|
+
|
605
|
+
try:
|
606
|
+
# We want to inspect from parent to child to ensure subclass arguments override.
|
607
|
+
# The MRO is ordered from child to parent, so we find DataNode and reverse the part before it.
|
608
|
+
data_node_index = mro.index(DataNode)
|
609
|
+
classes_to_inspect = reversed(mro[:data_node_index])
|
610
|
+
except ValueError:
|
611
|
+
# Fallback if DataNode is not in the MRO.
|
612
|
+
classes_to_inspect = [self.__class__]
|
613
|
+
|
614
|
+
for cls_to_inspect in classes_to_inspect:
|
615
|
+
# Only inspect the __init__ defined on the class itself.
|
616
|
+
if '__init__' in cls_to_inspect.__dict__:
|
617
|
+
sig = inspect.signature(cls_to_inspect.__init__)
|
618
|
+
try:
|
619
|
+
# Use bind_partial as the full set of args might not match this specific signature.
|
620
|
+
bound_args = sig.bind_partial(self, *args, **kwargs)
|
621
|
+
bound_args.apply_defaults()
|
622
|
+
|
623
|
+
current_args = bound_args.arguments
|
624
|
+
current_args.pop('self', None)
|
625
|
+
|
626
|
+
# If the signature has **kwargs, it collects extraneous arguments. Unpack them.
|
627
|
+
if 'kwargs' in current_args:
|
628
|
+
final_kwargs.update(current_args.pop('kwargs'))
|
629
|
+
|
630
|
+
# Update the final arguments. Overwrites parent args with child args.
|
631
|
+
final_kwargs.update(current_args)
|
632
|
+
except TypeError:
|
633
|
+
logger.warning(f"Could not bind arguments for {cls_to_inspect.__name__}.__init__; skipping for config.")
|
634
|
+
continue
|
635
|
+
|
636
|
+
# Remove `args` as it collects un-named positional arguments which are not part of the config hash.
|
637
|
+
final_kwargs.pop('args', None)
|
638
|
+
|
639
|
+
|
640
|
+
# 3. Run the post-initialization routines
|
641
|
+
logger.debug(f"Running post-init routines for {self.__class__.__name__}")
|
642
|
+
self._initialize_configuration(init_kwargs=final_kwargs)
|
643
|
+
|
644
|
+
# 7. Final setup
|
645
|
+
self.set_data_source()
|
646
|
+
logger.bind(update_hash=self.update_hash)
|
647
|
+
|
648
|
+
self.run_after_post_init_routines()
|
649
|
+
|
650
|
+
#requirements for graph update
|
651
|
+
self.dependencies_df: Optional[pd.DataFrame] = None
|
652
|
+
self.depth_df: Optional[pd.DataFrame] = None
|
653
|
+
|
654
|
+
self.scheduler : Optional[Scheduler] = None
|
655
|
+
self.update_details_tree :Optional[Dict[str,Any]] =None
|
656
|
+
|
657
|
+
self._patch_build_from_env()
|
658
|
+
logger.debug(f"Post-init routines for {self.__class__.__name__} complete.")
|
659
|
+
|
660
|
+
# Replace the subclass's __init__ with our new wrapped version
|
661
|
+
cls.__init__ = wrapped_init
|
662
|
+
|
663
|
+
def _initialize_configuration(self, init_kwargs: dict) -> None:
|
664
|
+
"""Creates config from init args and sets them as instance attributes."""
|
665
|
+
logger.debug(f"Creating configuration for {self.__class__.__name__}")
|
666
|
+
|
667
|
+
init_kwargs["time_series_class_import_path"] = {
|
668
|
+
"module": self.__class__.__module__,
|
669
|
+
"qualname": self.__class__.__qualname__
|
670
|
+
}
|
671
|
+
|
672
|
+
config = build_operations.create_config(
|
673
|
+
arguments_to_ignore_from_storage_hash=self._ARGS_IGNORE_IN_STORAGE_HASH,
|
674
|
+
kwargs=init_kwargs,
|
675
|
+
ts_class_name=self.__class__.__name__
|
676
|
+
)
|
677
|
+
|
678
|
+
for field_name, value in asdict(config).items():
|
679
|
+
setattr(self, field_name, value)
|
680
|
+
|
681
|
+
def _patch_build_from_env(self) -> None:
|
682
|
+
"""
|
683
|
+
Checks for the PATCH_BUILD_CONFIGURATION environment variable and,
|
684
|
+
if set, flushes the pickle and patches the build configuration.
|
685
|
+
"""
|
686
|
+
patch_build = os.environ.get("PATCH_BUILD_CONFIGURATION", "false").lower() in ["true", "1"]
|
687
|
+
if patch_build:
|
688
|
+
self.logger.warning(f"Patching build configuration for {self.storage_hash}")
|
689
|
+
|
690
|
+
# Ensure dependencies are initialized
|
691
|
+
self.local_persist_manager
|
692
|
+
self.verify_and_build_remote_objects()
|
693
|
+
|
694
|
+
pickle_path = self.get_pickle_path_from_time_serie()
|
695
|
+
build_operations.flush_pickle(pickle_path=pickle_path)
|
696
|
+
|
697
|
+
self.local_persist_manager.patch_build_configuration(
|
698
|
+
local_configuration=self.local_initial_configuration,
|
699
|
+
remote_configuration=self.remote_initial_configuration,
|
700
|
+
remote_build_metadata=self.remote_build_metadata,
|
701
|
+
)
|
702
|
+
|
703
|
+
|
704
|
+
# --- Core Properties ---
|
705
|
+
|
706
|
+
|
707
|
+
@property
|
708
|
+
def is_api(self):
|
709
|
+
return False
|
710
|
+
|
711
|
+
|
712
|
+
@property
|
713
|
+
def data_source_id(self) -> int:
|
714
|
+
return self.data_source.id
|
715
|
+
|
716
|
+
|
717
|
+
@property
|
718
|
+
def local_time_serie(self) -> LocalTimeSerie:
|
719
|
+
"""The local time series metadata object."""
|
720
|
+
return self.local_persist_manager.local_metadata
|
721
|
+
|
722
|
+
@property
|
723
|
+
def metadata(self) -> "DynamicTableMetaData":
|
724
|
+
return self.local_persist_manager.metadata
|
725
|
+
|
726
|
+
|
727
|
+
@property
|
728
|
+
def local_persist_manager(self) -> PersistManager:
|
729
|
+
if self._local_persist_manager is None:
|
730
|
+
self.logger.debug(f"Setting local persist manager for {self.storage_hash}")
|
731
|
+
self._set_local_persist_manager(update_hash=self.update_hash)
|
732
|
+
return self._local_persist_manager
|
733
|
+
|
734
|
+
@property
|
735
|
+
def data_source(self) -> Any:
|
736
|
+
if self._data_source is not None:
|
737
|
+
return self._data_source
|
738
|
+
else:
|
739
|
+
raise Exception("Data source has not been set")
|
740
|
+
|
741
|
+
# --- Persistence & Backend Methods ---
|
742
|
+
|
743
|
+
@tracer.start_as_current_span("TS: set_state_with_sessions")
|
744
|
+
def _set_state_with_sessions(self, include_vam_client_objects: bool = True,
|
745
|
+
graph_depth_limit: int = 1000,
|
746
|
+
graph_depth: int = 0) -> None:
|
747
|
+
"""
|
748
|
+
Sets the state of the DataNode after loading from pickle, including sessions.
|
749
|
+
|
750
|
+
Args:
|
751
|
+
include_vam_client_objects: Whether to include VAM client objects.
|
752
|
+
graph_depth_limit: The depth limit for graph traversal.
|
753
|
+
graph_depth: The current depth in the graph.
|
754
|
+
"""
|
755
|
+
if graph_depth_limit == -1:
|
756
|
+
graph_depth_limit = 1e6
|
757
|
+
|
758
|
+
minimum_required_depth_for_update = self.get_minimum_required_depth_for_update()
|
759
|
+
|
760
|
+
state = self.__dict__
|
761
|
+
|
762
|
+
if graph_depth_limit < minimum_required_depth_for_update and graph_depth == 0:
|
763
|
+
graph_depth_limit = minimum_required_depth_for_update
|
764
|
+
self.logger.warning(f"Graph depth limit overwritten to {minimum_required_depth_for_update}")
|
765
|
+
|
766
|
+
# if the data source is not local then the de-serialization needs to happend after setting the local persist manager
|
767
|
+
# to guranteed a proper patch in the back-end
|
768
|
+
if graph_depth <= graph_depth_limit and self.data_source.related_resource_class_type:
|
769
|
+
self._set_local_persist_manager(
|
770
|
+
update_hash=self.update_hash,
|
771
|
+
local_metadata=None,
|
772
|
+
)
|
773
|
+
|
774
|
+
deserializer = build_operations.DeserializerManager()
|
775
|
+
state = deserializer.deserialize_pickle_state(
|
776
|
+
state=state,
|
777
|
+
data_source_id=self.data_source.id,
|
778
|
+
include_vam_client_objects=include_vam_client_objects,
|
779
|
+
graph_depth_limit=graph_depth_limit,
|
780
|
+
graph_depth=graph_depth + 1
|
781
|
+
)
|
782
|
+
|
783
|
+
self.__dict__.update(state)
|
784
|
+
|
785
|
+
self.local_persist_manager.synchronize_metadata(local_metadata=None)
|
786
|
+
|
787
|
+
def _prepare_state_for_pickle(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
788
|
+
"""
|
789
|
+
Prepares the object's state for pickling by serializing and removing unpicklable entries.
|
790
|
+
|
791
|
+
Args:
|
792
|
+
state: The object's __dict__.
|
793
|
+
|
794
|
+
Returns:
|
795
|
+
A pickle-safe dictionary representing the object's state.
|
796
|
+
"""
|
797
|
+
properties = state
|
798
|
+
serializer = build_operations.Serializer()
|
799
|
+
properties = serializer.serialize_for_pickle(properties)
|
800
|
+
names_to_remove = []
|
801
|
+
for name, attr in properties.items():
|
802
|
+
if name in [
|
803
|
+
"local_persist_manager",
|
804
|
+
"logger",
|
805
|
+
"init_meta",
|
806
|
+
"_local_metadata_future",
|
807
|
+
"_local_metadata_lock",
|
808
|
+
"_local_persist_manager",
|
809
|
+
"update_tracker",
|
810
|
+
]:
|
811
|
+
names_to_remove.append(name)
|
812
|
+
continue
|
813
|
+
|
814
|
+
try:
|
815
|
+
cloudpickle.dumps(attr)
|
816
|
+
except Exception as e:
|
817
|
+
logger.exception(f"Cant Pickle property {name}")
|
818
|
+
raise e
|
819
|
+
|
820
|
+
for n in names_to_remove:
|
821
|
+
properties.pop(n, None)
|
822
|
+
|
823
|
+
return properties
|
824
|
+
def _set_local_persist_manager(self, update_hash: str,
|
825
|
+
local_metadata: Union[None, dict] = None,
|
826
|
+
|
827
|
+
) -> None:
|
828
|
+
"""
|
829
|
+
Initializes the local persistence manager for the time series. It sets up
|
830
|
+
the necessary configurations and checks for existing metadata. If the metadata doesn't
|
831
|
+
exist or is incomplete, it sets up the initial configuration and builds the update details.
|
832
|
+
|
833
|
+
Args:
|
834
|
+
update_hash : str
|
835
|
+
The local hash ID for the time series.
|
836
|
+
storage_hash : str
|
837
|
+
The remote table hash name for the time series.
|
838
|
+
local_metadata : Union[None, dict], optional
|
839
|
+
Local metadata for the time series, if available.
|
840
|
+
"""
|
841
|
+
self._local_persist_manager = PersistManager.get_from_data_type(
|
842
|
+
update_hash=update_hash,
|
843
|
+
class_name=self.__class__.__name__,
|
844
|
+
local_metadata=local_metadata,
|
845
|
+
data_source=self.data_source
|
846
|
+
)
|
847
|
+
|
848
|
+
|
849
|
+
def set_data_source(self,
|
850
|
+
data_source: Optional[object] = None) -> None:
|
851
|
+
"""
|
852
|
+
Sets the data source for the time series.
|
853
|
+
|
854
|
+
Args:
|
855
|
+
data_source: The data source object. If None, the default is fetched from the ORM.
|
856
|
+
"""
|
857
|
+
if data_source is None:
|
858
|
+
self._data_source = get_data_source_from_orm()
|
859
|
+
else:
|
860
|
+
self._data_source = data_source
|
861
|
+
|
862
|
+
def verify_and_build_remote_objects(self) -> None:
|
863
|
+
"""
|
864
|
+
Verifies and builds remote objects by calling the persistence layer.
|
865
|
+
This logic is now correctly located within the BuildManager.
|
866
|
+
"""
|
867
|
+
# Use self.owner to get properties from the DataNode instance
|
868
|
+
owner_class = self.__class__
|
869
|
+
time_serie_source_code_git_hash = build_operations.get_data_node_source_code_git_hash(owner_class)
|
870
|
+
time_serie_source_code = build_operations.get_data_node_source_code(owner_class)
|
871
|
+
|
872
|
+
# The call to the low-level persist manager is encapsulated here
|
873
|
+
self.local_persist_manager.local_persist_exist_set_config(
|
874
|
+
storage_hash=self.storage_hash,
|
875
|
+
local_configuration=self.local_initial_configuration,
|
876
|
+
remote_configuration=self.remote_initial_configuration,
|
877
|
+
time_serie_source_code_git_hash=time_serie_source_code_git_hash,
|
878
|
+
time_serie_source_code=time_serie_source_code,
|
879
|
+
data_source=self.data_source,
|
880
|
+
build_configuration_json_schema=self.build_configuration_json_schema,
|
881
|
+
)
|
882
|
+
def set_relation_tree(self):
|
883
|
+
|
884
|
+
"""Sets the node relationships in the backend by calling the dependencies() method."""
|
885
|
+
|
886
|
+
if self.local_persist_manager.local_metadata is None:
|
887
|
+
self.verify_and_build_remote_objects() #
|
888
|
+
if self.local_persist_manager.is_local_relation_tree_set():
|
889
|
+
return
|
890
|
+
declared_dependencies = self.dependencies() or {}
|
891
|
+
|
892
|
+
for name, dependency_ts in declared_dependencies.items():
|
893
|
+
self.logger.debug(f"Connecting dependency '{name}'...")
|
894
|
+
|
895
|
+
# Ensure the dependency itself is properly initialized
|
896
|
+
is_api = dependency_ts.is_api
|
897
|
+
if is_api == False:
|
898
|
+
dependency_ts.verify_and_build_remote_objects()
|
899
|
+
|
900
|
+
|
901
|
+
self.local_persist_manager.depends_on_connect(dependency_ts, is_api=is_api)
|
902
|
+
|
903
|
+
# Recursively set the relation tree for the dependency
|
904
|
+
dependency_ts.set_relation_tree()
|
905
|
+
|
906
|
+
self.local_persist_manager.set_ogm_dependencies_linked()
|
907
|
+
|
908
|
+
|
909
|
+
def set_dependencies_df(self):
|
910
|
+
depth_df = self.local_persist_manager.get_all_dependencies_update_priority()
|
911
|
+
self.depth_df = depth_df
|
912
|
+
if not depth_df.empty:
|
913
|
+
self.dependencies_df = depth_df[
|
914
|
+
depth_df["local_time_serie_id"] != self.local_time_serie.id].copy()
|
915
|
+
else:
|
916
|
+
self.dependencies_df = pd.DataFrame()
|
917
|
+
|
918
|
+
def get_update_statistics(self):
|
919
|
+
"""
|
920
|
+
This method always queries last state
|
921
|
+
"""
|
922
|
+
return self.metadata.sourcetableconfiguration.get_data_updates()
|
923
|
+
def _set_update_statistics(self,
|
924
|
+
update_statistics: UpdateStatistics) -> UpdateStatistics:
|
925
|
+
"""
|
926
|
+
UpdateStatistics provides the last-ingested positions:
|
927
|
+
- For a single-index series (time_index only), `update_statistics.max_time` is either:
|
928
|
+
- None: no prior data—fetch all available rows.
|
929
|
+
- a datetime: fetch rows where `time_index > max_time`.
|
930
|
+
- For a dual-index series (time_index, unique_identifier), `update_statistics.max_time_per_id` is either:
|
931
|
+
- None: single-index behavior applies.
|
932
|
+
- dict[str, datetime]: for each `unique_identifier` (matching `Asset.unique_identifier`), fetch rows where
|
933
|
+
`time_index > max_time_per_id[unique_identifier]`.
|
934
|
+
|
935
|
+
Default method to narrow down update statistics un local time series,
|
936
|
+
the method will filter using asset_list if the attribute exists as well as the init fallback date
|
937
|
+
:param update_statistics:
|
938
|
+
|
939
|
+
:return:
|
940
|
+
"""
|
941
|
+
# Filter update_statistics to include only assets in self.asset_list.
|
942
|
+
|
943
|
+
asset_list = self.get_asset_list()
|
944
|
+
self._setted_asset_list = asset_list
|
945
|
+
|
946
|
+
update_statistics = update_statistics.update_assets(
|
947
|
+
asset_list, init_fallback_date=self.OFFSET_START
|
948
|
+
)
|
949
|
+
|
950
|
+
self.update_statistics = update_statistics
|
951
|
+
|
952
|
+
# --- Public API ---
|
953
|
+
|
954
|
+
def run(
|
955
|
+
self,
|
956
|
+
debug_mode: bool,
|
957
|
+
*,
|
958
|
+
update_tree: bool = True,
|
959
|
+
force_update: bool = False,
|
960
|
+
update_only_tree: bool = False,
|
961
|
+
remote_scheduler: Union[object, None] = None,
|
962
|
+
override_update_stats:Optional[UpdateStatistics] = None
|
963
|
+
):
|
964
|
+
|
965
|
+
update_runner = run_operations.UpdateRunner(time_serie=self,
|
966
|
+
debug_mode=debug_mode,
|
967
|
+
force_update=force_update,
|
968
|
+
update_tree=update_tree,
|
969
|
+
update_only_tree=update_only_tree,
|
970
|
+
remote_scheduler=remote_scheduler,
|
971
|
+
override_update_stats=override_update_stats
|
972
|
+
)
|
973
|
+
error_on_last_update, updated_df= update_runner.run()
|
974
|
+
|
975
|
+
return error_on_last_update,updated_df
|
976
|
+
|
977
|
+
|
978
|
+
# --- Optional Hooks for Customization ---
|
979
|
+
def run_after_post_init_routines(self) -> None:
|
980
|
+
pass
|
981
|
+
|
982
|
+
def get_minimum_required_depth_for_update(self) -> int:
|
983
|
+
"""
|
984
|
+
Controls the minimum depth that needs to be rebuilt.
|
985
|
+
"""
|
986
|
+
return 0
|
987
|
+
|
988
|
+
def get_table_metadata(self,)->Optional[ms_client.TableMetaData]:
|
989
|
+
"""Provides the metadata configuration for a market time series.
|
990
|
+
|
991
|
+
"""
|
992
|
+
|
993
|
+
|
994
|
+
return None
|
995
|
+
|
996
|
+
def get_column_metadata(self) -> Optional[List[ColumnMetaData]]:
|
997
|
+
"""
|
998
|
+
This Method should return a list for ColumnMetaData to add extra context to each time series
|
999
|
+
Examples:
|
1000
|
+
from mainsequence.client.models_tdag import ColumnMetaData
|
1001
|
+
columns_metadata = [ColumnMetaData(column_name="instrument",
|
1002
|
+
dtype="str",
|
1003
|
+
label="Instrument",
|
1004
|
+
description=(
|
1005
|
+
"Unique identifier provided by Valmer; it’s a composition of the "
|
1006
|
+
"columns `tv_emisora_serie`, and is also used as a ticker for custom "
|
1007
|
+
"assets in Valmer."
|
1008
|
+
)
|
1009
|
+
),
|
1010
|
+
ColumnMetaData(column_name="currency",
|
1011
|
+
dtype="str",
|
1012
|
+
label="Currency",
|
1013
|
+
description=(
|
1014
|
+
"Corresponds to code for curries be aware this may not match Figi Currency assets"
|
1015
|
+
)
|
1016
|
+
),
|
1017
|
+
|
1018
|
+
]
|
1019
|
+
Returns:
|
1020
|
+
A list of ColumnMetaData objects, or None.
|
1021
|
+
"""
|
1022
|
+
return None
|
1023
|
+
|
1024
|
+
def get_asset_list(self) -> Optional[List["Asset"]]:
|
1025
|
+
"""
|
1026
|
+
Provide the list of assets that this DataNode should include when updating.
|
1027
|
+
|
1028
|
+
By default, this method returns `self.asset_list` if defined.
|
1029
|
+
Subclasses _must_ override this method when no `asset_list` attribute was set
|
1030
|
+
during initialization, to supply a dynamic list of assets for update_statistics.
|
1031
|
+
|
1032
|
+
Use Case:
|
1033
|
+
- For category-based series, return all Asset unique_identifiers in a given category
|
1034
|
+
(e.g., `AssetCategory(unique_identifier="investable_assets")`), so that only those
|
1035
|
+
assets are updated in this DataNode.
|
1036
|
+
|
1037
|
+
Returns
|
1038
|
+
-------
|
1039
|
+
list or None
|
1040
|
+
- A list of asset unique_identifiers to include in the update.
|
1041
|
+
- `None` if no filtering by asset is required (update all assets by default).
|
1042
|
+
"""
|
1043
|
+
if hasattr(self, "asset_list"):
|
1044
|
+
return self.asset_list
|
1045
|
+
|
1046
|
+
return None
|
1047
|
+
|
1048
|
+
def run_post_update_routines(self, error_on_last_update: bool, ) -> None:
|
1049
|
+
""" Should be overwritten by subclass """
|
1050
|
+
pass
|
1051
|
+
|
1052
|
+
@abstractmethod
|
1053
|
+
def dependencies(self) -> Dict[str, Union["DataNode", "APIDataNode"]]:
|
1054
|
+
"""
|
1055
|
+
Subclasses must implement this method to explicitly declare their upstream dependencies.
|
1056
|
+
|
1057
|
+
Returns:
|
1058
|
+
A dictionary where keys are descriptive names and values are the DataNode dependency instances.
|
1059
|
+
"""
|
1060
|
+
raise NotImplementedError
|
1061
|
+
|
1062
|
+
@abstractmethod
|
1063
|
+
def update(self) -> pd.DataFrame:
|
1064
|
+
"""
|
1065
|
+
Fetch and ingest only the new rows for this DataNode based on prior update checkpoints.
|
1066
|
+
|
1067
|
+
|
1068
|
+
|
1069
|
+
Requirements:
|
1070
|
+
- `time_index` **must** be a `datetime.datetime` instance with UTC timezone.
|
1071
|
+
- Column names **must** be all lowercase.
|
1072
|
+
- No column values may be Python `datetime` objects; if date/time storage is needed, convert to integer
|
1073
|
+
timestamps (e.g., UNIX epoch in seconds or milliseconds).
|
1074
|
+
|
1075
|
+
After retrieving the incremental rows, this method inserts or upserts them into the Main Sequence Data Engine.
|
1076
|
+
|
1077
|
+
Parameters
|
1078
|
+
----------
|
1079
|
+
update_statistics : UpdateStatistics
|
1080
|
+
Object capturing the previous update state. Must expose:
|
1081
|
+
- `max_time` (datetime | None)
|
1082
|
+
- `max_time_per_id` (dict[str, datetime] | None)
|
1083
|
+
|
1084
|
+
Returns
|
1085
|
+
-------
|
1086
|
+
pd.DataFrame
|
1087
|
+
A DataFrame containing only the newly added or updated records.
|
1088
|
+
"""
|
1089
|
+
raise NotImplementedError
|
1090
|
+
|
1091
|
+
|
1092
|
+
|
1093
|
+
class WrapperDataNode(DataNode):
|
1094
|
+
"""A wrapper class for managing multiple DataNode objects."""
|
1095
|
+
|
1096
|
+
def __init__(self, translation_table: AssetTranslationTable, *args, **kwargs):
|
1097
|
+
"""
|
1098
|
+
Initialize the WrapperDataNode.
|
1099
|
+
|
1100
|
+
Args:
|
1101
|
+
time_series_dict: Dictionary of DataNode objects.
|
1102
|
+
"""
|
1103
|
+
super().__init__(*args, **kwargs)
|
1104
|
+
|
1105
|
+
def get_time_serie_from_markets_unique_id(table_identifier: str) -> DataNode:
|
1106
|
+
"""
|
1107
|
+
Returns the appropriate bar time series based on the asset list and source.
|
1108
|
+
"""
|
1109
|
+
from mainsequence.client import DoesNotExist
|
1110
|
+
try:
|
1111
|
+
metadata = ms_client.DynamicTableMetaData.get(identifier=table_identifier)
|
1112
|
+
|
1113
|
+
except DoesNotExist as e:
|
1114
|
+
raise e
|
1115
|
+
api_ts = APIDataNode(
|
1116
|
+
data_source_id=metadata.data_source.id,
|
1117
|
+
storage_hash=metadata.storage_hash
|
1118
|
+
)
|
1119
|
+
return api_ts
|
1120
|
+
|
1121
|
+
translation_table = copy.deepcopy(translation_table)
|
1122
|
+
|
1123
|
+
self.api_ts_map = {}
|
1124
|
+
for rule in translation_table.rules:
|
1125
|
+
if rule.markets_time_serie_unique_identifier not in self.api_ts_map:
|
1126
|
+
self.api_ts_map[rule.markets_time_serie_unique_identifier] = get_time_serie_from_markets_unique_id(
|
1127
|
+
table_identifier=rule.markets_time_serie_unique_identifier)
|
1128
|
+
|
1129
|
+
self.translation_table = translation_table
|
1130
|
+
|
1131
|
+
def dependencies(self) -> Dict[str, Union["DataNode", "APIDataNode"]]:
|
1132
|
+
return self.api_ts_map
|
1133
|
+
|
1134
|
+
def get_ranged_data_per_asset(self, range_descriptor: Optional[UniqueIdentifierRangeMap]) -> pd.DataFrame:
|
1135
|
+
"""
|
1136
|
+
Gets data based on a range descriptor.
|
1137
|
+
|
1138
|
+
Args:
|
1139
|
+
range_descriptor: A UniqueIdentifierRangeMap object.
|
1140
|
+
|
1141
|
+
Returns:
|
1142
|
+
A DataFrame with the ranged data.
|
1143
|
+
"""
|
1144
|
+
return self.get_df_between_dates(unique_identifier_range_map=range_descriptor)
|
1145
|
+
|
1146
|
+
def get_df_between_dates(
|
1147
|
+
self,
|
1148
|
+
start_date: Optional[datetime.datetime] = None,
|
1149
|
+
end_date: Optional[datetime.datetime] = None,
|
1150
|
+
unique_identifier_list: Optional[list] = None,
|
1151
|
+
great_or_equal: bool = True,
|
1152
|
+
less_or_equal: bool = True,
|
1153
|
+
unique_identifier_range_map: Optional[UniqueIdentifierRangeMap] = None,
|
1154
|
+
) -> pd.DataFrame:
|
1155
|
+
"""
|
1156
|
+
Retrieves a DataFrame of time series data between specified dates, handling asset translation.
|
1157
|
+
|
1158
|
+
Args:
|
1159
|
+
start_date: The start date of the data range.
|
1160
|
+
end_date: The end date of the data range.
|
1161
|
+
unique_identifier_list: An optional list of unique identifiers to filter by.
|
1162
|
+
great_or_equal: Whether to include the start date.
|
1163
|
+
less_or_equal: Whether to include the end date.
|
1164
|
+
unique_identifier_range_map: An optional map of ranges for unique identifiers.
|
1165
|
+
|
1166
|
+
Returns:
|
1167
|
+
A pandas DataFrame with the requested data.
|
1168
|
+
"""
|
1169
|
+
if (unique_identifier_list is None) == (unique_identifier_range_map is None):
|
1170
|
+
raise ValueError(
|
1171
|
+
"Pass **either** unique_identifier_list **or** unique_identifier_range_map, but not both."
|
1172
|
+
)
|
1173
|
+
|
1174
|
+
if unique_identifier_list is not None:
|
1175
|
+
wanted_src_uids = set(unique_identifier_list)
|
1176
|
+
else: # range‑map path
|
1177
|
+
wanted_src_uids = set(unique_identifier_range_map.keys())
|
1178
|
+
|
1179
|
+
if not wanted_src_uids:
|
1180
|
+
return pd.DataFrame()
|
1181
|
+
|
1182
|
+
# evaluate the rules for each asset
|
1183
|
+
from mainsequence.client import Asset
|
1184
|
+
assets = Asset.filter(unique_identifier__in=list(wanted_src_uids))
|
1185
|
+
#assets that i want to get pricces
|
1186
|
+
|
1187
|
+
asset_translation_dict = {}
|
1188
|
+
for asset in assets:
|
1189
|
+
asset_translation_dict[asset.unique_identifier] = self.translation_table.evaluate_asset(asset)
|
1190
|
+
|
1191
|
+
# we grouped the assets for the same rules together and now query all assets that have the same target
|
1192
|
+
translation_df = pd.DataFrame.from_dict(asset_translation_dict, orient="index")
|
1193
|
+
try:
|
1194
|
+
grouped = translation_df.groupby(
|
1195
|
+
["markets_time_serie_unique_identifier", "exchange_code"],
|
1196
|
+
dropna=False
|
1197
|
+
)
|
1198
|
+
except Exception as e:
|
1199
|
+
raise e
|
1200
|
+
|
1201
|
+
data_df = []
|
1202
|
+
for (mkt_ts_id, target_exchange_code), group_df in grouped:
|
1203
|
+
# get the correct DataNode instance from our pre-built map
|
1204
|
+
api_ts = self.api_ts_map[mkt_ts_id]
|
1205
|
+
|
1206
|
+
# figure out which assets belong to this group
|
1207
|
+
grouped_unique_ids = group_df.index.tolist()
|
1208
|
+
source_assets = [
|
1209
|
+
a for a in assets
|
1210
|
+
if a.unique_identifier in grouped_unique_ids
|
1211
|
+
] # source the ones we want to have
|
1212
|
+
|
1213
|
+
# get correct target assets based on the share classes
|
1214
|
+
asset_ticker_group_ides = [a.asset_ticker_group_id for a in assets]
|
1215
|
+
asset_query = dict(
|
1216
|
+
asset_ticker_group_id__in=asset_ticker_group_ides
|
1217
|
+
)
|
1218
|
+
if not pd.isna(target_exchange_code):
|
1219
|
+
asset_query["exchange_code"] = target_exchange_code
|
1220
|
+
|
1221
|
+
target_assets = Asset.filter(**asset_query) #the assets that have the same group
|
1222
|
+
|
1223
|
+
target_asset_unique_ids = [a.asset_ticker_group_id for a in target_assets]
|
1224
|
+
if len(asset_ticker_group_ides) > len(target_asset_unique_ids):
|
1225
|
+
raise Exception(f"Not all assets were found in backend for translation table: {set(asset_ticker_group_ides) - set(target_asset_unique_ids)}")
|
1226
|
+
|
1227
|
+
if len(asset_ticker_group_ides) < len(target_asset_unique_ids):
|
1228
|
+
#this will blow the proper selection of assets
|
1229
|
+
raise Exception(f"Too many assets were found in backend for translation table: {set(target_asset_unique_ids) - set(asset_ticker_group_ides)}")
|
1230
|
+
|
1231
|
+
# create the source-target mapping
|
1232
|
+
ticker_group_to_uid_map = {}
|
1233
|
+
for a in source_assets:
|
1234
|
+
if a.asset_ticker_group_id in ticker_group_to_uid_map:
|
1235
|
+
raise ValueError(f"Share class {a.asset_ticker_group_id} cannot be duplicated")
|
1236
|
+
ticker_group_to_uid_map[a.asset_ticker_group_id] = a.unique_identifier
|
1237
|
+
|
1238
|
+
source_target_map = {}
|
1239
|
+
for a in target_assets:
|
1240
|
+
asset_ticker_group_id = a.asset_ticker_group_id
|
1241
|
+
source_unique_identifier = ticker_group_to_uid_map[asset_ticker_group_id]
|
1242
|
+
source_target_map[source_unique_identifier] = a.unique_identifier
|
1243
|
+
|
1244
|
+
target_source_map = {v: k for k, v in source_target_map.items()}
|
1245
|
+
if unique_identifier_range_map is not None:
|
1246
|
+
# create the correct unique identifier range map
|
1247
|
+
unique_identifier_range_map_target = {}
|
1248
|
+
for a_unique_identifier, asset_range in unique_identifier_range_map.items():
|
1249
|
+
if a_unique_identifier not in source_target_map.keys(): continue
|
1250
|
+
target_key = source_target_map[a_unique_identifier]
|
1251
|
+
unique_identifier_range_map_target[target_key] = asset_range
|
1252
|
+
|
1253
|
+
if not unique_identifier_range_map_target:
|
1254
|
+
self.logger.warning(
|
1255
|
+
f"Unique identifier map is empty for group assets {source_assets} and unique_identifier_range_map {unique_identifier_range_map}")
|
1256
|
+
continue
|
1257
|
+
|
1258
|
+
tmp_data = api_ts.get_df_between_dates(
|
1259
|
+
unique_identifier_range_map=unique_identifier_range_map_target,
|
1260
|
+
start_date=start_date,
|
1261
|
+
end_date=end_date,
|
1262
|
+
great_or_equal=great_or_equal,
|
1263
|
+
less_or_equal=less_or_equal,
|
1264
|
+
)
|
1265
|
+
else:
|
1266
|
+
tmp_data = api_ts.get_df_between_dates(
|
1267
|
+
start_date=start_date,
|
1268
|
+
end_date=end_date,
|
1269
|
+
unique_identifier_list=list(target_source_map.keys()),
|
1270
|
+
great_or_equal=great_or_equal,
|
1271
|
+
less_or_equal=less_or_equal,
|
1272
|
+
)
|
1273
|
+
|
1274
|
+
if tmp_data.empty:
|
1275
|
+
continue
|
1276
|
+
|
1277
|
+
tmp_data = tmp_data.rename(index=target_source_map, level="unique_identifier")
|
1278
|
+
data_df.append(tmp_data)
|
1279
|
+
|
1280
|
+
if not data_df:
|
1281
|
+
return pd.DataFrame()
|
1282
|
+
|
1283
|
+
data_df = pd.concat(data_df, axis=0)
|
1284
|
+
return data_df
|
1285
|
+
|
1286
|
+
def update(self, update_statistics):
|
1287
|
+
""" WrapperTimeSeries does not update """
|
1288
|
+
pass
|
1289
|
+
|
1290
|
+
|
1291
|
+
build_operations.serialize_argument.register(DataNode, build_operations._serialize_timeserie)
|
1292
|
+
build_operations.serialize_argument.register(APIDataNode, build_operations._serialize_api_timeserie)
|