mainsequence 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mainsequence/__init__.py +0 -0
- mainsequence/__main__.py +9 -0
- mainsequence/cli/__init__.py +1 -0
- mainsequence/cli/api.py +157 -0
- mainsequence/cli/cli.py +442 -0
- mainsequence/cli/config.py +78 -0
- mainsequence/cli/ssh_utils.py +126 -0
- mainsequence/client/__init__.py +17 -0
- mainsequence/client/base.py +431 -0
- mainsequence/client/data_sources_interfaces/__init__.py +0 -0
- mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
- mainsequence/client/data_sources_interfaces/timescale.py +479 -0
- mainsequence/client/models_helpers.py +113 -0
- mainsequence/client/models_report_studio.py +412 -0
- mainsequence/client/models_tdag.py +2276 -0
- mainsequence/client/models_vam.py +1983 -0
- mainsequence/client/utils.py +387 -0
- mainsequence/dashboards/__init__.py +0 -0
- mainsequence/dashboards/streamlit/__init__.py +0 -0
- mainsequence/dashboards/streamlit/assets/config.toml +12 -0
- mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
- mainsequence/dashboards/streamlit/assets/logo.png +0 -0
- mainsequence/dashboards/streamlit/core/__init__.py +0 -0
- mainsequence/dashboards/streamlit/core/theme.py +212 -0
- mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
- mainsequence/dashboards/streamlit/scaffold.py +220 -0
- mainsequence/instrumentation/__init__.py +7 -0
- mainsequence/instrumentation/utils.py +101 -0
- mainsequence/instruments/__init__.py +1 -0
- mainsequence/instruments/data_interface/__init__.py +10 -0
- mainsequence/instruments/data_interface/data_interface.py +361 -0
- mainsequence/instruments/instruments/__init__.py +3 -0
- mainsequence/instruments/instruments/base_instrument.py +85 -0
- mainsequence/instruments/instruments/bond.py +447 -0
- mainsequence/instruments/instruments/european_option.py +74 -0
- mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
- mainsequence/instruments/instruments/json_codec.py +585 -0
- mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
- mainsequence/instruments/instruments/position.py +475 -0
- mainsequence/instruments/instruments/ql_fields.py +239 -0
- mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
- mainsequence/instruments/pricing_models/__init__.py +0 -0
- mainsequence/instruments/pricing_models/black_scholes.py +49 -0
- mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
- mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
- mainsequence/instruments/pricing_models/indices.py +350 -0
- mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
- mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
- mainsequence/instruments/settings.py +175 -0
- mainsequence/instruments/utils.py +29 -0
- mainsequence/logconf.py +284 -0
- mainsequence/reportbuilder/__init__.py +0 -0
- mainsequence/reportbuilder/__main__.py +0 -0
- mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
- mainsequence/reportbuilder/model.py +713 -0
- mainsequence/reportbuilder/slide_templates.py +532 -0
- mainsequence/tdag/__init__.py +8 -0
- mainsequence/tdag/__main__.py +0 -0
- mainsequence/tdag/config.py +129 -0
- mainsequence/tdag/data_nodes/__init__.py +12 -0
- mainsequence/tdag/data_nodes/build_operations.py +751 -0
- mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
- mainsequence/tdag/data_nodes/persist_managers.py +812 -0
- mainsequence/tdag/data_nodes/run_operations.py +543 -0
- mainsequence/tdag/data_nodes/utils.py +24 -0
- mainsequence/tdag/future_registry.py +25 -0
- mainsequence/tdag/utils.py +40 -0
- mainsequence/virtualfundbuilder/__init__.py +45 -0
- mainsequence/virtualfundbuilder/__main__.py +235 -0
- mainsequence/virtualfundbuilder/agent_interface.py +77 -0
- mainsequence/virtualfundbuilder/config_handling.py +86 -0
- mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
- mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
- mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
- mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
- mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
- mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
- mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
- mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
- mainsequence/virtualfundbuilder/data_nodes.py +637 -0
- mainsequence/virtualfundbuilder/enums.py +23 -0
- mainsequence/virtualfundbuilder/models.py +282 -0
- mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
- mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
- mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
- mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
- mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
- mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
- mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
- mainsequence/virtualfundbuilder/utils.py +381 -0
- mainsequence-2.0.0.dist-info/METADATA +105 -0
- mainsequence-2.0.0.dist-info/RECORD +110 -0
- mainsequence-2.0.0.dist-info/WHEEL +5 -0
- mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
- mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,751 @@
|
|
1
|
+
|
2
|
+
import inspect
|
3
|
+
import hashlib
|
4
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union,Mapping,Type
|
5
|
+
import copy
|
6
|
+
from pydantic import BaseModel
|
7
|
+
import json
|
8
|
+
from mainsequence.client import POD_PROJECT
|
9
|
+
import os
|
10
|
+
import importlib
|
11
|
+
from mainsequence.client.models_helpers import get_model_class
|
12
|
+
from enum import Enum
|
13
|
+
from types import SimpleNamespace
|
14
|
+
from mainsequence.client import BaseObjectOrm
|
15
|
+
import collections
|
16
|
+
from functools import singledispatch
|
17
|
+
from mainsequence.tdag.config import bcolors
|
18
|
+
import cloudpickle
|
19
|
+
from pathlib import Path
|
20
|
+
from mainsequence.instrumentation import tracer, tracer_instrumentator
|
21
|
+
from mainsequence.tdag.config import API_TS_PICKLE_PREFIFX
|
22
|
+
import mainsequence.client as ms_client
|
23
|
+
from .persist_managers import PersistManager, get_data_node_source_code,get_data_node_source_code_git_hash
|
24
|
+
from mainsequence.tdag.config import (
|
25
|
+
ogm
|
26
|
+
)
|
27
|
+
from dataclasses import dataclass
|
28
|
+
from mainsequence.logconf import logger
|
29
|
+
from abc import ABC, abstractmethod
|
30
|
+
import datetime
|
31
|
+
build_model = lambda model_data: get_model_class(model_data["orm_class"])(**model_data)
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
# 1. Create a "registry" function using the decorator
|
39
|
+
@singledispatch
|
40
|
+
def serialize_argument(value: Any, pickle_ts: bool) -> Any:
|
41
|
+
"""
|
42
|
+
Default implementation for any type not specifically registered.
|
43
|
+
It can either return the value as is or raise a TypeError.
|
44
|
+
"""
|
45
|
+
# For types we don't explicitly handle, we can check if they are serializable
|
46
|
+
# or just return them. For simplicity, we return as is.
|
47
|
+
return value
|
48
|
+
|
49
|
+
|
50
|
+
def _serialize_timeserie(value: "DataNode", pickle_ts: bool = False) -> Dict[str, Any]:
|
51
|
+
"""Serialization logic for DataNode objects."""
|
52
|
+
print(f"Serializing DataNode: {value.update_hash}")
|
53
|
+
if pickle_ts:
|
54
|
+
return {"is_time_serie_pickled": True, "update_hash": value.update_hash, "data_source_id": value.data_source_id}
|
55
|
+
return {"is_time_serie_instance": True, "update_hash": value.update_hash}
|
56
|
+
|
57
|
+
def _serialize_api_timeserie(value, pickle_ts: bool):
|
58
|
+
if pickle_ts:
|
59
|
+
new_value = {"is_api_time_serie_pickled": True}
|
60
|
+
value.persist_to_pickle()
|
61
|
+
new_value["update_hash"] = value.update_hash
|
62
|
+
new_value['data_source_id'] = value.data_source_id
|
63
|
+
return new_value
|
64
|
+
return value
|
65
|
+
|
66
|
+
@serialize_argument.register(BaseModel)
|
67
|
+
def _(value: BaseModel, pickle_ts: bool = False) -> Dict[str, Any]:
|
68
|
+
"""Serialization logic for any Pydantic BaseModel."""
|
69
|
+
import_path = {"module": value.__class__.__module__, "qualname": value.__class__.__qualname__}
|
70
|
+
# Recursively call serialize_argument on each value in the model's dictionary.
|
71
|
+
serialized_model = {k: serialize_argument(v, pickle_ts) for k, v in json.loads(value.model_dump_json()).items()}
|
72
|
+
|
73
|
+
ignore_from_storage_hash = [k for k,v in value.model_fields.items() if v.json_schema_extra and v.json_schema_extra.get("ignore_from_storage_hash",False)==True]
|
74
|
+
|
75
|
+
return {"pydantic_model_import_path": import_path, "serialized_model": serialized_model,
|
76
|
+
"ignore_from_storage_hash":ignore_from_storage_hash
|
77
|
+
}
|
78
|
+
|
79
|
+
@serialize_argument.register(BaseObjectOrm)
|
80
|
+
def _(value, pickle_ts: bool):
|
81
|
+
new_dict = json.loads(value.model_dump_json())
|
82
|
+
if hasattr(value, 'unique_identifier'):
|
83
|
+
new_dict['unique_identifier'] = value.unique_identifier
|
84
|
+
return new_dict
|
85
|
+
|
86
|
+
@serialize_argument.register(list)
|
87
|
+
def _(value: list, pickle_ts: bool):
|
88
|
+
if not value:
|
89
|
+
return []
|
90
|
+
|
91
|
+
# 1. DETECT if it's a list of ORM models
|
92
|
+
if isinstance(value[0], BaseObjectOrm):
|
93
|
+
# 2. SORT the list to ensure a stable hash
|
94
|
+
sorted_value = sorted(value, key=lambda x: x.unique_identifier)
|
95
|
+
|
96
|
+
# 3. SERIALIZE each item in the now-sorted list
|
97
|
+
serialized_items = [serialize_argument(item, pickle_ts) for item in sorted_value]
|
98
|
+
|
99
|
+
# 4. WRAP the result in an identifiable structure for deserialization
|
100
|
+
return {"__type__": "orm_model_list", "items": serialized_items}
|
101
|
+
|
102
|
+
# Fallback for all other list types
|
103
|
+
return [serialize_argument(item, pickle_ts) for item in value]
|
104
|
+
|
105
|
+
@serialize_argument.register(tuple)
|
106
|
+
def _(value, pickle_ts: bool):
|
107
|
+
items = [serialize_argument(item, pickle_ts) for item in value]
|
108
|
+
return {"__type__": "tuple", "items": items}
|
109
|
+
|
110
|
+
|
111
|
+
@serialize_argument.register(dict)
|
112
|
+
def _(value: dict, pickle_ts: bool):
|
113
|
+
# Check for the special marker key.
|
114
|
+
if value.get("is_time_series_config") is True:
|
115
|
+
# If it's a special config dict, preserve its unique structure.
|
116
|
+
# Serialize its contents recursively.
|
117
|
+
config_data = {k: serialize_argument(v, pickle_ts) for k, v in value.items()}
|
118
|
+
|
119
|
+
return {"is_time_series_config": True, "config_data": config_data}
|
120
|
+
|
121
|
+
# Otherwise, handle it as a regular dictionary.
|
122
|
+
return {k: serialize_argument(v, pickle_ts) for k, v in value.items()}
|
123
|
+
|
124
|
+
|
125
|
+
@serialize_argument.register(SimpleNamespace)
|
126
|
+
def _(value, pickle_ts: bool):
|
127
|
+
return serialize_argument.dispatch(dict)(vars(value), pickle_ts)
|
128
|
+
|
129
|
+
@serialize_argument.register(Enum)
|
130
|
+
def _(value, pickle_ts: bool):
|
131
|
+
return value.value
|
132
|
+
|
133
|
+
|
134
|
+
class TimeSerieInitMeta(BaseModel):
|
135
|
+
...
|
136
|
+
|
137
|
+
def data_source_dir_path(data_source_id: int) -> str:
|
138
|
+
path = ogm.pickle_storage_path
|
139
|
+
return f"{path}/{data_source_id}"
|
140
|
+
|
141
|
+
def data_source_pickle_path(data_source_id: int) -> str:
|
142
|
+
return f"{data_source_dir_path(data_source_id)}/data_source.pickle"
|
143
|
+
|
144
|
+
|
145
|
+
def parse_dictionary_before_hashing(dictionary: Dict[str, Any]) -> Dict[str, Any]:
|
146
|
+
"""
|
147
|
+
Parses a dictionary before hashing, handling nested structures and special types.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
dictionary: The dictionary to parse.
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
A new dictionary ready for hashing.
|
154
|
+
"""
|
155
|
+
local_ts_dict_to_hash = {}
|
156
|
+
for key, value in dictionary.items():
|
157
|
+
if key != "build_meta_data":
|
158
|
+
local_ts_dict_to_hash[key] = value
|
159
|
+
if isinstance(value, dict):
|
160
|
+
if "orm_class" in value.keys():
|
161
|
+
|
162
|
+
local_ts_dict_to_hash[key] = value['unique_identifier']
|
163
|
+
|
164
|
+
elif "is_time_series_config" in value.keys():
|
165
|
+
tmp_local_ts, remote_ts = hash_signature(value["config_data"])
|
166
|
+
local_ts_dict_to_hash[key] = {"is_time_series_config": value["is_time_series_config"],
|
167
|
+
"config_data": tmp_local_ts}
|
168
|
+
|
169
|
+
|
170
|
+
elif isinstance(value, dict) and value.get("__type__") == "orm_model_list":
|
171
|
+
|
172
|
+
# The value["items"] are already serialized dicts
|
173
|
+
|
174
|
+
local_ts_dict_to_hash[key] = [v["unique_identifier"] for v in value["items"]]
|
175
|
+
else:
|
176
|
+
# recursively apply hash signature
|
177
|
+
local_ts_dict_to_hash[key] = parse_dictionary_before_hashing(value)
|
178
|
+
|
179
|
+
|
180
|
+
return local_ts_dict_to_hash
|
181
|
+
|
182
|
+
def prepare_config_kwargs(kwargs: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
183
|
+
"""
|
184
|
+
Separates all meta-arguments from the core configuration arguments and applies defaults.
|
185
|
+
This replaces _separate_meta_kwargs and sanitize_default_build_metadata.
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
A tuple of (core_kwargs, meta_kwargs).
|
189
|
+
"""
|
190
|
+
meta_keys = ["init_meta", "build_meta_data",]
|
191
|
+
meta_kwargs = {}
|
192
|
+
|
193
|
+
for key in meta_keys:
|
194
|
+
if key in kwargs:
|
195
|
+
# Move the argument from the main dict to the meta dict
|
196
|
+
meta_kwargs[key] = kwargs.pop(key)
|
197
|
+
|
198
|
+
# --- Apply Defaults (replaces sanitize_default_build_metadata) ---
|
199
|
+
if meta_kwargs.get("init_meta") is None:
|
200
|
+
meta_kwargs["init_meta"] = TimeSerieInitMeta()
|
201
|
+
|
202
|
+
if meta_kwargs.get("build_meta_data") is None:
|
203
|
+
meta_kwargs["build_meta_data"] = {"initialize_with_default_partitions": True}
|
204
|
+
|
205
|
+
return kwargs, meta_kwargs # Returns (core_kwargs, meta_kwargs)
|
206
|
+
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
def verify_backend_git_hash_with_pickle(local_persist_manager:PersistManager,
|
212
|
+
time_serie_class: "DataNode") -> None:
|
213
|
+
"""Verifies if the git hash in the backend matches the one from the pickled object."""
|
214
|
+
if local_persist_manager.metadata is not None:
|
215
|
+
load_git_hash = get_data_node_source_code_git_hash(time_serie_class)
|
216
|
+
|
217
|
+
persisted_pickle_hash = local_persist_manager.metadata.time_serie_source_code_git_hash
|
218
|
+
if load_git_hash != persisted_pickle_hash:
|
219
|
+
local_persist_manager.logger.warning(
|
220
|
+
f"{bcolors.WARNING}Source code does not match with pickle rebuilding{bcolors.ENDC}")
|
221
|
+
pickle_path = get_pickle_path(update_hash=local_persist_manager.update_hash,
|
222
|
+
data_source_id=local_persist_manager.data_source.id, )
|
223
|
+
flush_pickle(pickle_path)
|
224
|
+
|
225
|
+
rebuild_time_serie = rebuild_from_configuration(update_hash=local_persist_manager.update_hash,
|
226
|
+
data_source=local_persist_manager.data_source,
|
227
|
+
)
|
228
|
+
rebuild_time_serie.persist_to_pickle()
|
229
|
+
else:
|
230
|
+
# if no need to rebuild, just sync the metadata
|
231
|
+
local_persist_manager.synchronize_metadata(local_metadata=None)
|
232
|
+
|
233
|
+
def hash_signature(dictionary: Dict[str, Any]) -> Tuple[str, str]:
|
234
|
+
"""
|
235
|
+
Computes MD5 hashes for local and remote configurations from a single dictionary.
|
236
|
+
"""
|
237
|
+
dhash_local = hashlib.md5()
|
238
|
+
dhash_remote = hashlib.md5()
|
239
|
+
|
240
|
+
# The function expects to receive the full dictionary, including meta-args
|
241
|
+
local_ts_dict_to_hash = parse_dictionary_before_hashing(dictionary)
|
242
|
+
remote_ts_in_db_hash = copy.deepcopy(local_ts_dict_to_hash)
|
243
|
+
|
244
|
+
# Add project_id for local hash
|
245
|
+
local_ts_dict_to_hash["project_id"] = POD_PROJECT.id
|
246
|
+
|
247
|
+
# Handle remote hash filtering internally
|
248
|
+
if "arguments_to_ignore_from_storage_hash" in local_ts_dict_to_hash:
|
249
|
+
keys_to_ignore = sorted(local_ts_dict_to_hash['arguments_to_ignore_from_storage_hash'])
|
250
|
+
for k in keys_to_ignore:
|
251
|
+
remote_ts_in_db_hash.pop(k, None)
|
252
|
+
remote_ts_in_db_hash.pop("arguments_to_ignore_from_storage_hash", None)
|
253
|
+
|
254
|
+
#remove keys from pydantic objects
|
255
|
+
for k,val in local_ts_dict_to_hash.items():
|
256
|
+
if isinstance(val, dict) == False:
|
257
|
+
continue
|
258
|
+
if "pydantic_model_import_path" in val:
|
259
|
+
if "ignore_from_storage_hash" in val:
|
260
|
+
for arg in val["ignore_from_storage_hash"]:
|
261
|
+
remote_ts_in_db_hash[k]["serialized_model"].pop(arg, None)
|
262
|
+
if k in remote_ts_in_db_hash and "ignore_from_storage_hash" in remote_ts_in_db_hash[k]:
|
263
|
+
remote_ts_in_db_hash[k].pop("ignore_from_storage_hash")
|
264
|
+
# Encode and hash both versions
|
265
|
+
encoded_local = json.dumps(local_ts_dict_to_hash, sort_keys=True).encode()
|
266
|
+
encoded_remote = json.dumps(remote_ts_in_db_hash, sort_keys=True).encode()
|
267
|
+
|
268
|
+
dhash_local.update(encoded_local)
|
269
|
+
dhash_remote.update(encoded_remote)
|
270
|
+
|
271
|
+
return dhash_local.hexdigest(), dhash_remote.hexdigest()
|
272
|
+
|
273
|
+
|
274
|
+
|
275
|
+
def rebuild_with_type(value: Dict[str, Any], rebuild_function: Callable) -> Union[tuple, Any]:
|
276
|
+
"""
|
277
|
+
Rebuilds a tuple from a serialized dictionary representation.
|
278
|
+
|
279
|
+
Args:
|
280
|
+
value: A dictionary with a '__type__' key.
|
281
|
+
rebuild_function: A function to apply to each item in the tuple.
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
A rebuilt tuple.
|
285
|
+
|
286
|
+
Raises:
|
287
|
+
NotImplementedError: If the type is not 'tuple'.
|
288
|
+
"""
|
289
|
+
type_marker = value.get("__type__")
|
290
|
+
|
291
|
+
if type_marker == "tuple":
|
292
|
+
return tuple([rebuild_function(c) for c in value["items"]])
|
293
|
+
# Add this block to handle the ORM model list
|
294
|
+
elif type_marker == "orm_model_list":
|
295
|
+
return [rebuild_function(c) for c in value["items"]]
|
296
|
+
else:
|
297
|
+
raise NotImplementedError
|
298
|
+
|
299
|
+
|
300
|
+
class Serializer:
|
301
|
+
"""Encapsulates the logic for converting a configuration dict into a serializable format."""
|
302
|
+
|
303
|
+
def serialize_init_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
304
|
+
"""
|
305
|
+
Serializes __init__ keyword arguments for a DataNode.
|
306
|
+
This maps to your original `serialize_init_kwargs`.
|
307
|
+
"""
|
308
|
+
return self._serialize_dict(kwargs=kwargs, pickle_ts=False)
|
309
|
+
|
310
|
+
def serialize_for_pickle(self, properties: Dict[str, Any]) -> Dict[str, Any]:
|
311
|
+
"""
|
312
|
+
Serializes properties to a pickle-friendly dictionary.
|
313
|
+
"""
|
314
|
+
return self._serialize_dict(kwargs=properties, pickle_ts=True)
|
315
|
+
|
316
|
+
def _serialize_dict(self, kwargs: Dict[str, Any], pickle_ts: bool) -> Dict[str, Any]:
|
317
|
+
"""
|
318
|
+
Internal worker that serializes a dictionary by calling the dispatcher.
|
319
|
+
This maps to your original `_serialize_configuration_dict`.
|
320
|
+
"""
|
321
|
+
new_kwargs = {key: serialize_argument(value, pickle_ts) for key, value in kwargs.items()}
|
322
|
+
return collections.OrderedDict(sorted(new_kwargs.items()))
|
323
|
+
|
324
|
+
|
325
|
+
|
326
|
+
class BaseRebuilder(ABC):
|
327
|
+
"""
|
328
|
+
Abstract base class for deserialization specialists.
|
329
|
+
Defines a common structure with a registry and a dispatch method.
|
330
|
+
"""
|
331
|
+
|
332
|
+
@property
|
333
|
+
@abstractmethod
|
334
|
+
def registry(self) -> Dict[str, callable]:
|
335
|
+
"""The registry mapping keys to handler methods."""
|
336
|
+
pass
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
def rebuild(self, value: Any, **kwargs) -> Any:
|
341
|
+
"""
|
342
|
+
Main dispatch method. Recursively rebuilds a value using the registry.
|
343
|
+
"""
|
344
|
+
# Base cases for recursion
|
345
|
+
if not isinstance(value, (dict, list, tuple)):
|
346
|
+
return value
|
347
|
+
if isinstance(value, list):
|
348
|
+
return [self.rebuild(item, **kwargs) for item in value]
|
349
|
+
if isinstance(value, tuple):
|
350
|
+
return tuple(self.rebuild(item, **kwargs) for item in value)
|
351
|
+
|
352
|
+
# For dictionaries, use the specialized registry
|
353
|
+
if isinstance(value, dict):
|
354
|
+
# Find a handler in the registry and use it
|
355
|
+
for key, handler in self.registry.items():
|
356
|
+
if key in value:
|
357
|
+
return handler(value, **kwargs)
|
358
|
+
|
359
|
+
|
360
|
+
# If no handler, it's a generic dict; rebuild its contents
|
361
|
+
return {k: self.rebuild(v, **kwargs) for k, v in value.items()}
|
362
|
+
|
363
|
+
return value # Fallback
|
364
|
+
|
365
|
+
|
366
|
+
class PickleRebuilder(BaseRebuilder):
|
367
|
+
"""Specialist for deserializing objects from a pickled state."""
|
368
|
+
|
369
|
+
@property
|
370
|
+
def registry(self) -> Dict[str, Callable]:
|
371
|
+
return {
|
372
|
+
"is_time_serie_pickled": self._handle_pickled_timeserie,
|
373
|
+
"is_api_time_serie_pickled": self._handle_api_timeserie,
|
374
|
+
"pydantic_model_import_path": self._handle_pydantic_model,
|
375
|
+
"is_time_series_config": self._handle_timeseries_config,
|
376
|
+
"orm_class": self._handle_orm_model,
|
377
|
+
"__type__": self._handle_complex_type,
|
378
|
+
}
|
379
|
+
def _handle_pickled_timeserie(self, value: Dict, **state_kwargs) -> "DataNode":
|
380
|
+
"""Handles 'is_time_serie_pickled' markers."""
|
381
|
+
import cloudpickle
|
382
|
+
# Note: You need to make DataNode available here
|
383
|
+
full_path = get_pickle_path(
|
384
|
+
update_hash=value['update_hash'],
|
385
|
+
data_source_id=value['data_source_id']
|
386
|
+
)
|
387
|
+
with open(full_path, 'rb') as handle:
|
388
|
+
ts = cloudpickle.load(handle)
|
389
|
+
|
390
|
+
ds_pickle_path=data_source_pickle_path(value['data_source_id'])
|
391
|
+
data_source = load_data_source_from_pickle(ds_pickle_path)
|
392
|
+
ts.set_data_source(data_source=data_source)
|
393
|
+
|
394
|
+
|
395
|
+
if state_kwargs.get('graph_depth', 0) - 1 <= state_kwargs.get('graph_depth_limit', 0):
|
396
|
+
ts._set_state_with_sessions(**state_kwargs)
|
397
|
+
return ts
|
398
|
+
def _handle_pydantic_model(self, value: Dict, **state_kwargs) -> Any:
|
399
|
+
path_info = value["pydantic_model_import_path"]
|
400
|
+
module = importlib.import_module(path_info["module"])
|
401
|
+
PydanticClass = getattr(module, path_info['qualname'])
|
402
|
+
|
403
|
+
rebuilt_value = self.rebuild(value["serialized_model"], **state_kwargs)
|
404
|
+
return PydanticClass(**rebuilt_value)
|
405
|
+
def _handle_api_timeserie(self, value: Dict, **state_kwargs) -> "APIDataNode":
|
406
|
+
"""Handles 'is_api_time_serie_pickled' markers."""
|
407
|
+
import cloudpickle
|
408
|
+
# Note: You need to make APIDataNode available here
|
409
|
+
full_path = get_pickle_path(
|
410
|
+
update_hash=value['update_hash'],
|
411
|
+
data_source_id=value['data_source_id'],is_api=True,
|
412
|
+
)
|
413
|
+
with open(full_path, 'rb') as handle:
|
414
|
+
ts = cloudpickle.load(handle)
|
415
|
+
return ts
|
416
|
+
def _handle_timeseries_config(self, value: Dict, **state_kwargs) -> Dict:
|
417
|
+
"""Handles 'is_time_series_config' markers."""
|
418
|
+
return self.rebuild(value["config_data"], **state_kwargs)
|
419
|
+
|
420
|
+
def _handle_orm_model(self, value: Dict, **state_kwargs) -> Any:
|
421
|
+
"""Handles 'orm_class' markers for single models."""
|
422
|
+
return build_model(value)
|
423
|
+
|
424
|
+
def _handle_complex_type(self, value: Dict, **state_kwargs) -> Any:
|
425
|
+
"""Handles generic '__type__' markers (like tuples)."""
|
426
|
+
rebuild_function = lambda x: self.rebuild(x, **state_kwargs)
|
427
|
+
# Assumes rebuild_with_type handles different __type__ values
|
428
|
+
return rebuild_with_type(value, rebuild_function=rebuild_function)
|
429
|
+
|
430
|
+
|
431
|
+
class ConfigRebuilder(BaseRebuilder):
|
432
|
+
|
433
|
+
@property
|
434
|
+
def registry(self) -> Dict[str, Callable]:
|
435
|
+
return {
|
436
|
+
"pydantic_model_import_path": self._handle_pydantic_model,
|
437
|
+
"is_time_series_config": self._handle_timeseries_config,
|
438
|
+
"orm_class": self._handle_orm_model,
|
439
|
+
"__type__": self._handle_complex_type,
|
440
|
+
}
|
441
|
+
|
442
|
+
def _handle_pydantic_model(self, value: Dict, **kwargs) -> Any:
|
443
|
+
path_info = value["pydantic_model_import_path"]
|
444
|
+
module = importlib.import_module(path_info["module"])
|
445
|
+
PydanticClass = getattr(module, path_info['qualname'])
|
446
|
+
|
447
|
+
rebuilt_value = self.rebuild(value["serialized_model"], **kwargs)
|
448
|
+
return PydanticClass(**rebuilt_value)
|
449
|
+
|
450
|
+
def _handle_timeseries_config(self, value: Dict, **kwargs) -> Dict:
|
451
|
+
return self.rebuild(value["config_data"], **kwargs)
|
452
|
+
|
453
|
+
def _handle_orm_model(self, value: Dict, **kwargs) -> Any:
|
454
|
+
return build_model(value)
|
455
|
+
|
456
|
+
def _handle_complex_type(self, value: Dict, **kwargs) -> Any:
|
457
|
+
# Special case for ORM lists within the generic complex type handler
|
458
|
+
if value.get("__type__") == "orm_model_list":
|
459
|
+
return [build_model(item) for item in value["items"]]
|
460
|
+
# Fallback to the generic rebuild_with_type for other types (like tuples)
|
461
|
+
return rebuild_with_type(value, rebuild_function=lambda x: self.rebuild(x, **kwargs))
|
462
|
+
|
463
|
+
|
464
|
+
class DeserializerManager:
|
465
|
+
"""Handles serialization and deserialization of configurations."""
|
466
|
+
|
467
|
+
def __init__(self):
|
468
|
+
self.pickle_rebuilder = PickleRebuilder()
|
469
|
+
self.config_rebuilder = ConfigRebuilder()
|
470
|
+
|
471
|
+
def rebuild_config(self, config: Dict[str, Any], **kwargs) -> Dict[str, Any]:
|
472
|
+
"""Rebuilds an entire configuration dictionary."""
|
473
|
+
return self.config_rebuilder.rebuild(config, **kwargs)
|
474
|
+
def rebuild_serialized_config(self, config: Dict[str, Any], time_serie_class_name: str) -> Dict[str, Any]:
|
475
|
+
"""
|
476
|
+
Rebuilds a configuration dictionary from a serialized config.
|
477
|
+
|
478
|
+
Args:
|
479
|
+
config: The configuration dictionary.
|
480
|
+
time_serie_class_name: The name of the DataNode class.
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
The rebuilt configuration dictionary.
|
484
|
+
"""
|
485
|
+
config = self.rebuild_config(config=config)
|
486
|
+
|
487
|
+
return config
|
488
|
+
|
489
|
+
|
490
|
+
|
491
|
+
def deserialize_pickle_state(self, state: Any, **kwargs) -> Any:
|
492
|
+
"""Deserializes an entire pickled state object."""
|
493
|
+
return self.pickle_rebuilder.rebuild(state, **kwargs)
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
|
498
|
+
@dataclass
|
499
|
+
class TimeSerieConfig:
|
500
|
+
"""A container for all computed configuration attributes."""
|
501
|
+
init_meta: Any
|
502
|
+
remote_build_metadata: Any
|
503
|
+
update_hash: str
|
504
|
+
storage_hash: str
|
505
|
+
local_initial_configuration: Dict[str, Any]
|
506
|
+
remote_initial_configuration: Dict[str, Any]
|
507
|
+
build_configuration_json_schema:Dict[str,Any]
|
508
|
+
|
509
|
+
|
510
|
+
|
511
|
+
def extract_pydantic_fields_from_dict(d: Mapping[str, Any]) -> Dict[str, Dict[str, Dict[str, Any]]]:
|
512
|
+
"""
|
513
|
+
Returns: {key: {field_name: <metadata>}} for every value in `d` that is a Pydantic model.
|
514
|
+
"""
|
515
|
+
result: Dict[str, Dict[str, Dict[str, Any]]] = {}
|
516
|
+
for k, v in d.items():
|
517
|
+
if isinstance(v, BaseModel):
|
518
|
+
result[k] = v.model_json_schema()
|
519
|
+
return result
|
520
|
+
|
521
|
+
def create_config(ts_class_name: str,
|
522
|
+
arguments_to_ignore_from_storage_hash: List[str],
|
523
|
+
kwargs: Dict[str, Any]):
|
524
|
+
"""
|
525
|
+
Creates the configuration and hashes using the original hash_signature logic.
|
526
|
+
"""
|
527
|
+
global logger
|
528
|
+
|
529
|
+
|
530
|
+
build_configuration_json_schema=extract_pydantic_fields_from_dict(kwargs)
|
531
|
+
|
532
|
+
# 1. Use the helper to separate meta args from core args.
|
533
|
+
core_kwargs, meta_kwargs = prepare_config_kwargs(kwargs)
|
534
|
+
|
535
|
+
# 2. Serialize the core arguments
|
536
|
+
serialized_core_kwargs = Serializer().serialize_init_kwargs(core_kwargs)
|
537
|
+
|
538
|
+
# 3. Prepare the dictionary for hashing
|
539
|
+
dict_to_hash = copy.deepcopy(serialized_core_kwargs)
|
540
|
+
|
541
|
+
|
542
|
+
dict_to_hash['arguments_to_ignore_from_storage_hash'] = arguments_to_ignore_from_storage_hash
|
543
|
+
|
544
|
+
# 4. Generate the hashes
|
545
|
+
update_hash, storage_hash = hash_signature(dict_to_hash)
|
546
|
+
|
547
|
+
# 5. Create the remote configuration by removing ignored keys
|
548
|
+
remote_config = copy.deepcopy(dict_to_hash)
|
549
|
+
|
550
|
+
|
551
|
+
# 6. Return all computed values in the structured dataclass
|
552
|
+
return TimeSerieConfig(
|
553
|
+
init_meta=meta_kwargs["init_meta"],
|
554
|
+
remote_build_metadata=meta_kwargs["build_meta_data"],
|
555
|
+
update_hash=f"{ts_class_name}_{update_hash}".lower(),
|
556
|
+
storage_hash=f"{ts_class_name}_{storage_hash}".lower(),
|
557
|
+
local_initial_configuration=dict_to_hash,
|
558
|
+
remote_initial_configuration=remote_config,
|
559
|
+
build_configuration_json_schema=build_configuration_json_schema,
|
560
|
+
)
|
561
|
+
|
562
|
+
|
563
|
+
def flush_pickle(pickle_path) -> None:
|
564
|
+
"""Deletes the pickle file for this time series."""
|
565
|
+
if os.path.isfile(pickle_path):
|
566
|
+
os.remove(pickle_path)
|
567
|
+
|
568
|
+
|
569
|
+
|
570
|
+
# In class BuildManager:
|
571
|
+
|
572
|
+
@tracer.start_as_current_span("TS: load_from_pickle")
|
573
|
+
def load_from_pickle(pickle_path: str) -> "DataNode":
|
574
|
+
"""
|
575
|
+
Loads a DataNode object from a pickle file, handling both standard and API types.
|
576
|
+
|
577
|
+
Args:
|
578
|
+
pickle_path: The path to the pickle file.
|
579
|
+
|
580
|
+
Returns:
|
581
|
+
The loaded DataNode object.
|
582
|
+
"""
|
583
|
+
import cloudpickle
|
584
|
+
from pathlib import Path
|
585
|
+
|
586
|
+
directory = os.path.dirname(pickle_path)
|
587
|
+
filename = os.path.basename(pickle_path)
|
588
|
+
prefixed_path = os.path.join(directory, f"{API_TS_PICKLE_PREFIFX}{filename}")
|
589
|
+
if os.path.isfile(prefixed_path) and os.path.isfile(pickle_path):
|
590
|
+
raise FileExistsError("Both default and API timeseries pickle exist - cannot decide which to load")
|
591
|
+
|
592
|
+
if os.path.isfile(prefixed_path):
|
593
|
+
pickle_path = prefixed_path
|
594
|
+
|
595
|
+
try:
|
596
|
+
with open(pickle_path, 'rb') as handle:
|
597
|
+
time_serie = cloudpickle.load(handle)
|
598
|
+
except Exception as e:
|
599
|
+
raise e
|
600
|
+
|
601
|
+
if time_serie.is_api:
|
602
|
+
return time_serie
|
603
|
+
|
604
|
+
data_source = load_data_source_from_pickle(pickle_path=pickle_path)
|
605
|
+
|
606
|
+
# set objects that are not pickleable
|
607
|
+
time_serie.set_data_source(data_source=data_source)
|
608
|
+
time_serie._local_persist_manager = None
|
609
|
+
# verify pickle
|
610
|
+
verify_backend_git_hash_with_pickle(local_persist_manager=time_serie.local_persist_manager,
|
611
|
+
time_serie_class=time_serie.__class__,
|
612
|
+
)
|
613
|
+
return time_serie
|
614
|
+
|
615
|
+
|
616
|
+
|
617
|
+
def get_pickle_path(update_hash: str, data_source_id: int, is_api=False) -> str:
|
618
|
+
if is_api:
|
619
|
+
return os.path.join(ogm.pickle_storage_path, str(data_source_id), f"{API_TS_PICKLE_PREFIFX}{update_hash}.pickle")
|
620
|
+
return os.path.join(ogm.pickle_storage_path, str(data_source_id), f"{update_hash}.pickle")
|
621
|
+
|
622
|
+
def load_data_source_from_pickle( pickle_path: str) -> Any:
|
623
|
+
data_path = Path(pickle_path).parent / "data_source.pickle"
|
624
|
+
with open(data_path, 'rb') as handle:
|
625
|
+
data_source = cloudpickle.load(handle)
|
626
|
+
return data_source
|
627
|
+
|
628
|
+
def rebuild_and_set_from_update_hash( update_hash: int, data_source_id: int, set_dependencies_df: bool = False,
|
629
|
+
graph_depth_limit: int = 1) -> Tuple["DataNode", str]:
|
630
|
+
"""
|
631
|
+
Rebuilds a DataNode from its local hash ID and pickles it if it doesn't exist.
|
632
|
+
|
633
|
+
Args:
|
634
|
+
update_hash: The local hash ID of the DataNode.
|
635
|
+
data_source_id: The data source ID.
|
636
|
+
set_dependencies_df: Whether to set the dependencies DataFrame.
|
637
|
+
graph_depth_limit: The depth limit for graph traversal.
|
638
|
+
|
639
|
+
Returns:
|
640
|
+
A tuple containing the DataNode object and the path to its pickle file.
|
641
|
+
"""
|
642
|
+
pickle_path = get_pickle_path(update_hash=update_hash,
|
643
|
+
data_source_id=data_source_id,
|
644
|
+
)
|
645
|
+
if os.path.isfile(pickle_path) == False or os.stat(pickle_path).st_size == 0:
|
646
|
+
# rebuild time serie and pickle
|
647
|
+
ts = rebuild_from_configuration(
|
648
|
+
update_hash=update_hash,
|
649
|
+
data_source=data_source_id,
|
650
|
+
)
|
651
|
+
if set_dependencies_df == True:
|
652
|
+
ts.set_relation_tree()
|
653
|
+
|
654
|
+
ts.persist_to_pickle()
|
655
|
+
ts.logger.info(f"ts {update_hash} pickled ")
|
656
|
+
|
657
|
+
ts = load_and_set_from_pickle(
|
658
|
+
pickle_path=pickle_path,
|
659
|
+
graph_depth_limit=graph_depth_limit,
|
660
|
+
)
|
661
|
+
ts.logger.debug(f"ts {update_hash} loaded from pickle ")
|
662
|
+
return ts, pickle_path
|
663
|
+
|
664
|
+
|
665
|
+
|
666
|
+
|
667
|
+
def load_and_set_from_pickle( pickle_path: str, graph_depth_limit: int = 1) -> "DataNode":
|
668
|
+
"""
|
669
|
+
Loads a DataNode from a pickle file and sets its state.
|
670
|
+
|
671
|
+
Args:
|
672
|
+
pickle_path: The path to the pickle file.
|
673
|
+
graph_depth_limit: The depth limit for setting the state.
|
674
|
+
|
675
|
+
Returns:
|
676
|
+
The loaded and configured DataNode object.
|
677
|
+
"""
|
678
|
+
ts = load_from_pickle(pickle_path)
|
679
|
+
ts._set_state_with_sessions(
|
680
|
+
graph_depth=0,
|
681
|
+
graph_depth_limit=graph_depth_limit,
|
682
|
+
include_vam_client_objects=False)
|
683
|
+
return ts
|
684
|
+
|
685
|
+
@tracer.start_as_current_span("TS: Rebuild From Configuration")
|
686
|
+
def rebuild_from_configuration( update_hash: str,
|
687
|
+
data_source: Union[int, object]) -> "DataNode":
|
688
|
+
"""
|
689
|
+
Rebuilds a DataNode instance from its configuration.
|
690
|
+
|
691
|
+
Args:
|
692
|
+
update_hash: The local hash ID of the DataNode.
|
693
|
+
data_source: The data source ID or object.
|
694
|
+
|
695
|
+
Returns:
|
696
|
+
The rebuilt DataNode instance.
|
697
|
+
"""
|
698
|
+
import importlib
|
699
|
+
|
700
|
+
tracer_instrumentator.append_attribute_to_current_span("update_hash", update_hash)
|
701
|
+
|
702
|
+
if isinstance(data_source, int):
|
703
|
+
pickle_path = get_pickle_path(data_source_id=data_source,
|
704
|
+
update_hash=update_hash)
|
705
|
+
if os.path.isfile(pickle_path) == False:
|
706
|
+
data_source = ms_client.DynamicTableDataSource.get(pk=data_source)
|
707
|
+
data_source.persist_to_pickle(data_source_pickle_path(data_source.id))
|
708
|
+
|
709
|
+
data_source = load_data_source_from_pickle(pickle_path=pickle_path)
|
710
|
+
|
711
|
+
persist_manager = PersistManager.get_from_data_type(update_hash=update_hash,
|
712
|
+
data_source=data_source,
|
713
|
+
)
|
714
|
+
try:
|
715
|
+
time_serie_config = persist_manager.local_build_configuration
|
716
|
+
except Exception as e:
|
717
|
+
raise e
|
718
|
+
|
719
|
+
try:
|
720
|
+
mod = importlib.import_module(time_serie_config["time_series_class_import_path"]["module"])
|
721
|
+
TimeSerieClass = getattr(mod, time_serie_config["time_series_class_import_path"]["qualname"])
|
722
|
+
except Exception as e:
|
723
|
+
raise e
|
724
|
+
|
725
|
+
time_serie_class_name = time_serie_config["time_series_class_import_path"]["qualname"]
|
726
|
+
|
727
|
+
time_serie_config.pop("time_series_class_import_path")
|
728
|
+
time_serie_config = DeserializerManager().rebuild_serialized_config(time_serie_config,
|
729
|
+
time_serie_class_name=time_serie_class_name)
|
730
|
+
time_serie_config["init_meta"] = {}
|
731
|
+
|
732
|
+
re_build_ts = TimeSerieClass(**time_serie_config)
|
733
|
+
|
734
|
+
return re_build_ts
|
735
|
+
|
736
|
+
|
737
|
+
|
738
|
+
def load_and_set_from_hash_id( update_hash: int, data_source_id: int) -> "DataNode":
|
739
|
+
path = get_pickle_path(update_hash=update_hash ,data_source_id=data_source_id)
|
740
|
+
ts = load_and_set_from_pickle(pickle_path=path)
|
741
|
+
return ts
|
742
|
+
|
743
|
+
|
744
|
+
|
745
|
+
|
746
|
+
|
747
|
+
|
748
|
+
|
749
|
+
|
750
|
+
|
751
|
+
|