mainsequence 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mainsequence/__init__.py +0 -0
- mainsequence/__main__.py +9 -0
- mainsequence/cli/__init__.py +1 -0
- mainsequence/cli/api.py +157 -0
- mainsequence/cli/cli.py +442 -0
- mainsequence/cli/config.py +78 -0
- mainsequence/cli/ssh_utils.py +126 -0
- mainsequence/client/__init__.py +17 -0
- mainsequence/client/base.py +431 -0
- mainsequence/client/data_sources_interfaces/__init__.py +0 -0
- mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
- mainsequence/client/data_sources_interfaces/timescale.py +479 -0
- mainsequence/client/models_helpers.py +113 -0
- mainsequence/client/models_report_studio.py +412 -0
- mainsequence/client/models_tdag.py +2276 -0
- mainsequence/client/models_vam.py +1983 -0
- mainsequence/client/utils.py +387 -0
- mainsequence/dashboards/__init__.py +0 -0
- mainsequence/dashboards/streamlit/__init__.py +0 -0
- mainsequence/dashboards/streamlit/assets/config.toml +12 -0
- mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
- mainsequence/dashboards/streamlit/assets/logo.png +0 -0
- mainsequence/dashboards/streamlit/core/__init__.py +0 -0
- mainsequence/dashboards/streamlit/core/theme.py +212 -0
- mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
- mainsequence/dashboards/streamlit/scaffold.py +220 -0
- mainsequence/instrumentation/__init__.py +7 -0
- mainsequence/instrumentation/utils.py +101 -0
- mainsequence/instruments/__init__.py +1 -0
- mainsequence/instruments/data_interface/__init__.py +10 -0
- mainsequence/instruments/data_interface/data_interface.py +361 -0
- mainsequence/instruments/instruments/__init__.py +3 -0
- mainsequence/instruments/instruments/base_instrument.py +85 -0
- mainsequence/instruments/instruments/bond.py +447 -0
- mainsequence/instruments/instruments/european_option.py +74 -0
- mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
- mainsequence/instruments/instruments/json_codec.py +585 -0
- mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
- mainsequence/instruments/instruments/position.py +475 -0
- mainsequence/instruments/instruments/ql_fields.py +239 -0
- mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
- mainsequence/instruments/pricing_models/__init__.py +0 -0
- mainsequence/instruments/pricing_models/black_scholes.py +49 -0
- mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
- mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
- mainsequence/instruments/pricing_models/indices.py +350 -0
- mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
- mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
- mainsequence/instruments/settings.py +175 -0
- mainsequence/instruments/utils.py +29 -0
- mainsequence/logconf.py +284 -0
- mainsequence/reportbuilder/__init__.py +0 -0
- mainsequence/reportbuilder/__main__.py +0 -0
- mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
- mainsequence/reportbuilder/model.py +713 -0
- mainsequence/reportbuilder/slide_templates.py +532 -0
- mainsequence/tdag/__init__.py +8 -0
- mainsequence/tdag/__main__.py +0 -0
- mainsequence/tdag/config.py +129 -0
- mainsequence/tdag/data_nodes/__init__.py +12 -0
- mainsequence/tdag/data_nodes/build_operations.py +751 -0
- mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
- mainsequence/tdag/data_nodes/persist_managers.py +812 -0
- mainsequence/tdag/data_nodes/run_operations.py +543 -0
- mainsequence/tdag/data_nodes/utils.py +24 -0
- mainsequence/tdag/future_registry.py +25 -0
- mainsequence/tdag/utils.py +40 -0
- mainsequence/virtualfundbuilder/__init__.py +45 -0
- mainsequence/virtualfundbuilder/__main__.py +235 -0
- mainsequence/virtualfundbuilder/agent_interface.py +77 -0
- mainsequence/virtualfundbuilder/config_handling.py +86 -0
- mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
- mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
- mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
- mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
- mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
- mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
- mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
- mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
- mainsequence/virtualfundbuilder/data_nodes.py +637 -0
- mainsequence/virtualfundbuilder/enums.py +23 -0
- mainsequence/virtualfundbuilder/models.py +282 -0
- mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
- mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
- mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
- mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
- mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
- mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
- mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
- mainsequence/virtualfundbuilder/utils.py +381 -0
- mainsequence-2.0.0.dist-info/METADATA +105 -0
- mainsequence-2.0.0.dist-info/RECORD +110 -0
- mainsequence-2.0.0.dist-info/WHEEL +5 -0
- mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
- mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,543 @@
|
|
1
|
+
|
2
|
+
# Standard Library Imports
|
3
|
+
import gc
|
4
|
+
import time
|
5
|
+
import datetime
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
7
|
+
|
8
|
+
# Third-Party Library Imports
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
import structlog.contextvars as cvars
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
# Client and ORM Models
|
17
|
+
import mainsequence.client as ms_client
|
18
|
+
import pytz
|
19
|
+
from mainsequence.client import UpdateStatistics
|
20
|
+
|
21
|
+
# Instrumentation and Logging
|
22
|
+
from mainsequence.instrumentation import (
|
23
|
+
tracer,
|
24
|
+
tracer_instrumentator,
|
25
|
+
TracerInstrumentator
|
26
|
+
)
|
27
|
+
from mainsequence.instrumentation.utils import Status, StatusCode
|
28
|
+
|
29
|
+
# TDAG Core Components and Helpers
|
30
|
+
from mainsequence.tdag.data_nodes import build_operations
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
# Custom Exceptions
|
37
|
+
class DependencyUpdateError(Exception):
|
38
|
+
pass
|
39
|
+
|
40
|
+
|
41
|
+
class UpdateRunner:
|
42
|
+
"""
|
43
|
+
Orchestrates the entire update process for a DataNode instance.
|
44
|
+
It handles scheduling, dependency resolution, execution, and error handling.
|
45
|
+
"""
|
46
|
+
|
47
|
+
def __init__(self, time_serie: "DataNode", debug_mode: bool = False, force_update: bool = False,
|
48
|
+
update_tree: bool = True, update_only_tree: bool = False,
|
49
|
+
remote_scheduler: Optional[ms_client.Scheduler] = None,
|
50
|
+
override_update_stats:Optional[UpdateStatistics]=None
|
51
|
+
):
|
52
|
+
self.ts = time_serie
|
53
|
+
self.logger = self.ts.logger
|
54
|
+
self.debug_mode = debug_mode
|
55
|
+
self.force_update = force_update
|
56
|
+
self.update_tree = update_tree
|
57
|
+
self.update_only_tree = update_only_tree
|
58
|
+
if self.update_tree:
|
59
|
+
self.update_only_tree = False
|
60
|
+
|
61
|
+
self.remote_scheduler = remote_scheduler
|
62
|
+
self.scheduler: Optional[ms_client.Scheduler] = None
|
63
|
+
self.override_update_stats=override_update_stats
|
64
|
+
|
65
|
+
def _setup_scheduler(self) -> None:
|
66
|
+
"""Initializes or retrieves the scheduler and starts its heartbeat."""
|
67
|
+
if self.remote_scheduler:
|
68
|
+
self.scheduler = self.remote_scheduler
|
69
|
+
return
|
70
|
+
|
71
|
+
name_prefix = "DEBUG_" if self.debug_mode else ""
|
72
|
+
self.scheduler = ms_client.Scheduler.build_and_assign_to_ts(
|
73
|
+
scheduler_name=f"{name_prefix}{self.ts.local_time_serie.id}",
|
74
|
+
time_serie_ids=[self.ts.local_time_serie.id],
|
75
|
+
remove_from_other_schedulers=True,
|
76
|
+
running_in_debug_mode=self.debug_mode
|
77
|
+
)
|
78
|
+
self.scheduler.start_heart_beat()
|
79
|
+
|
80
|
+
def _pre_update_routines(self, local_metadata: Optional[dict] = None) -> Tuple[Dict[int,ms_client.LocalTimeSerie], Any]:
|
81
|
+
"""
|
82
|
+
Prepares the DataNode and its dependencies for an update by fetching the
|
83
|
+
latest metadata for the entire dependency graph.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
local_metadata: Optional dictionary with metadata for the head node,
|
87
|
+
used to synchronize before fetching the full tree.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
A tuple containing a dictionary of all local metadata objects in the
|
91
|
+
tree (keyed by ID) and the corresponding state data.
|
92
|
+
"""
|
93
|
+
# 1. Synchronize the head node and load its dependency structure.
|
94
|
+
self.ts.local_persist_manager.synchronize_metadata(local_metadata=local_metadata)
|
95
|
+
self.ts.set_relation_tree()
|
96
|
+
|
97
|
+
# The `load_dependencies` logic is now integrated here.
|
98
|
+
if self.ts.dependencies_df is None:
|
99
|
+
self.ts.set_dependencies_df()
|
100
|
+
|
101
|
+
# 2. Connect the dependency tree to the scheduler if it hasn't been already.
|
102
|
+
if not self.ts._scheduler_tree_connected and self.update_tree:
|
103
|
+
self.logger.debug("Connecting dependency tree to scheduler...")
|
104
|
+
if not self.ts.depth_df.empty:
|
105
|
+
all_ids = self.ts.depth_df["local_time_serie_id"].to_list() + [self.ts.local_time_serie.id]
|
106
|
+
self.scheduler.in_active_tree_connect(local_time_series_ids=all_ids)
|
107
|
+
self.ts._scheduler_tree_connected = True
|
108
|
+
|
109
|
+
# 3. Collect all IDs in the dependency graph to fetch their metadata.
|
110
|
+
# This correctly initializes the list, fixing the original bug.
|
111
|
+
if not self.ts.depth_df.empty:
|
112
|
+
all_ids_in_tree = self.ts.depth_df["local_time_serie_id"].to_list()
|
113
|
+
else:
|
114
|
+
all_ids_in_tree = []
|
115
|
+
|
116
|
+
# Always include the head node itself.
|
117
|
+
all_ids_in_tree.append(self.ts.local_time_serie.id)
|
118
|
+
|
119
|
+
# 4. Fetch the latest metadata for the entire tree from the backend.
|
120
|
+
update_details_batch = dict(
|
121
|
+
error_on_last_update=False,
|
122
|
+
active_update_scheduler_id=self.scheduler.id,
|
123
|
+
active_update_status="Q" # Assuming queue status is always set here
|
124
|
+
)
|
125
|
+
|
126
|
+
all_metadatas_response = ms_client.LocalTimeSerie.get_metadatas_and_set_updates(
|
127
|
+
local_time_series_ids=all_ids_in_tree,
|
128
|
+
update_details_kwargs=update_details_batch,
|
129
|
+
update_priority_dict=None
|
130
|
+
)
|
131
|
+
|
132
|
+
# 5. Process and return the results.
|
133
|
+
state_data = all_metadatas_response['state_data']
|
134
|
+
local_metadatas_list = all_metadatas_response["local_metadatas"]
|
135
|
+
local_metadatas_map = {m.id: m for m in local_metadatas_list}
|
136
|
+
|
137
|
+
self.ts.scheduler = self.scheduler
|
138
|
+
self.ts.update_details_tree = {key: v.run_configuration for key, v in local_metadatas_map.items()}
|
139
|
+
|
140
|
+
return local_metadatas_map, state_data
|
141
|
+
|
142
|
+
def _setup_execution_environment(self) -> Dict[int, ms_client.LocalTimeSerie]:
|
143
|
+
local_metadatas, state_data = self._pre_update_routines()
|
144
|
+
return local_metadatas
|
145
|
+
|
146
|
+
def _start_update(self, use_state_for_update: bool,override_update_stats:Optional[UpdateStatistics]=None) -> [bool,pd.DataFrame]:
|
147
|
+
"""Orchestrates a single DataNode update, including pre/post routines."""
|
148
|
+
historical_update = self.ts.local_persist_manager.local_metadata.set_start_of_execution(
|
149
|
+
active_update_scheduler_id=self.scheduler.id
|
150
|
+
)
|
151
|
+
|
152
|
+
must_update = historical_update.must_update or self.force_update
|
153
|
+
|
154
|
+
# Ensure metadata is fully loaded with relationship details before proceeding.
|
155
|
+
self.ts.local_persist_manager.set_local_metadata_lazy(include_relations_detail=True)
|
156
|
+
|
157
|
+
|
158
|
+
if override_update_stats is not None:
|
159
|
+
|
160
|
+
self.ts.update_statistics = override_update_stats
|
161
|
+
else:
|
162
|
+
update_statistics = historical_update.update_statistics
|
163
|
+
# The DataNode defines how to scope its statistics
|
164
|
+
self.ts._set_update_statistics(update_statistics)
|
165
|
+
|
166
|
+
updated_df=pd.DataFrame()
|
167
|
+
error_on_last_update = False
|
168
|
+
try:
|
169
|
+
if must_update:
|
170
|
+
self.logger.debug(f"Update required for {self.ts}.")
|
171
|
+
updated_df=self._update_local(
|
172
|
+
overwrite_latest_value=historical_update.last_time_index_value,
|
173
|
+
use_state_for_update=use_state_for_update
|
174
|
+
)
|
175
|
+
else:
|
176
|
+
self.logger.debug(f"Already up-to-date. Skipping update for {self.ts}.")
|
177
|
+
except Exception as e:
|
178
|
+
error_on_last_update = True
|
179
|
+
raise e
|
180
|
+
finally:
|
181
|
+
self.ts.local_persist_manager.local_metadata.set_end_of_execution(
|
182
|
+
historical_update_id=historical_update.id,
|
183
|
+
error_on_update=error_on_last_update
|
184
|
+
)
|
185
|
+
|
186
|
+
# Always set last relations details after the run completes.
|
187
|
+
self.ts.local_persist_manager.set_local_metadata_lazy(include_relations_detail=True)
|
188
|
+
|
189
|
+
self.ts.run_post_update_routines(error_on_last_update=error_on_last_update)
|
190
|
+
self.ts.local_persist_manager.set_column_metadata(columns_metadata=self.ts.get_column_metadata())
|
191
|
+
table_metadata = self.ts.get_table_metadata()
|
192
|
+
|
193
|
+
if self.ts.data_source.related_resource.class_type!=ms_client.DUCK_DB:
|
194
|
+
self.ts.local_persist_manager.set_table_metadata(table_metadata=table_metadata)
|
195
|
+
|
196
|
+
|
197
|
+
return error_on_last_update,updated_df
|
198
|
+
|
199
|
+
def _validate_update_dataframe(self, df: pd.DataFrame) -> None:
|
200
|
+
"""
|
201
|
+
Performs a series of critical checks on the DataFrame before persistence.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
df: The DataFrame returned from the DataNode's update method.
|
205
|
+
|
206
|
+
Raises:
|
207
|
+
AssertionError or Exception if any validation check fails.
|
208
|
+
"""
|
209
|
+
# Check for infinite values
|
210
|
+
df.replace([np.inf, -np.inf], np.nan, inplace=True)
|
211
|
+
|
212
|
+
# Check that the time index is a UTC datetime
|
213
|
+
time_index = df.index.get_level_values(0)
|
214
|
+
if not pd.api.types.is_datetime64_ns_dtype(time_index) or str(time_index.tz) !=str(datetime.timezone.utc) :
|
215
|
+
raise TypeError(f"Time index must be datetime64[ns, UTC], but found {time_index.dtype}")
|
216
|
+
|
217
|
+
# Check for forbidden data types and enforce lowercase columns
|
218
|
+
if self.ts.data_source.related_resource.class_type!=ms_client.DUCK_DB:
|
219
|
+
|
220
|
+
for col, dtype in df.dtypes.items():
|
221
|
+
if not isinstance(col, str) or not col.islower():
|
222
|
+
raise ValueError(f"Column name '{col}' must be a lowercase string.")
|
223
|
+
if "datetime64" in str(dtype):
|
224
|
+
raise TypeError(f"Column '{col}' has a forbidden datetime64 dtype.")
|
225
|
+
@tracer.start_as_current_span("UpdateRunner._update_local")
|
226
|
+
def _update_local(
|
227
|
+
self,
|
228
|
+
overwrite_latest_value: Optional[datetime.datetime],
|
229
|
+
use_state_for_update: bool,
|
230
|
+
) -> pd.DataFrame:
|
231
|
+
"""
|
232
|
+
Calculates, validates, and persists the data update for the time series.
|
233
|
+
"""
|
234
|
+
tmp_df = pd.DataFrame()
|
235
|
+
# 1. Handle dependency tree update first
|
236
|
+
if self.update_tree:
|
237
|
+
self._verify_tree_is_updated(use_state_for_update)
|
238
|
+
if self.update_only_tree:
|
239
|
+
self.logger.info(f'Dependency tree for {self.ts} updated. Halting run as requested.')
|
240
|
+
return tmp_df
|
241
|
+
|
242
|
+
# 2. Execute the core data calculation
|
243
|
+
with tracer.start_as_current_span("Update Calculation") as update_span:
|
244
|
+
|
245
|
+
# Add specific log message for the initial run
|
246
|
+
if not self.ts.update_statistics:
|
247
|
+
self.logger.debug(f"Performing first-time update for {self.ts}...")
|
248
|
+
else:
|
249
|
+
self.logger.debug(f'Calculating update for {self.ts}...')
|
250
|
+
|
251
|
+
try:
|
252
|
+
# Call the business logic defined on the DataNode class
|
253
|
+
temp_df = self.ts.update()
|
254
|
+
|
255
|
+
if temp_df is None:
|
256
|
+
raise Exception(f" {self.ts} update(...) method needs to return a data frame")
|
257
|
+
|
258
|
+
# If the update method returns no data, we're done.
|
259
|
+
if temp_df.empty:
|
260
|
+
self.logger.warning(f"No new data returned from update for {self.ts}.")
|
261
|
+
return temp_df
|
262
|
+
|
263
|
+
# In a normal run, filter out data we already have.
|
264
|
+
if overwrite_latest_value is None and ms_client.SessionDataSource.is_local_duck_db ==False:
|
265
|
+
temp_df = self.ts.update_statistics.filter_df_by_latest_value(temp_df)
|
266
|
+
|
267
|
+
# If filtering left nothing, we're done.
|
268
|
+
if temp_df.empty:
|
269
|
+
self.logger.info(f"No new data to persist for {self.ts} after filtering.")
|
270
|
+
return temp_df
|
271
|
+
|
272
|
+
# Validate the structure and content of the DataFrame
|
273
|
+
self._validate_update_dataframe(temp_df)
|
274
|
+
|
275
|
+
# Persist the validated data
|
276
|
+
self.logger.info(f'Persisting {len(temp_df)} new rows for {self.ts}.')
|
277
|
+
persisted = self.ts.local_persist_manager.persist_updated_data(
|
278
|
+
temp_df=temp_df,
|
279
|
+
overwrite=(overwrite_latest_value is not None)
|
280
|
+
)
|
281
|
+
update_span.set_status(Status(StatusCode.OK))
|
282
|
+
self.logger.info(f'Successfully updated {self.ts}.')
|
283
|
+
return temp_df
|
284
|
+
|
285
|
+
except Exception as e:
|
286
|
+
self.logger.exception("Failed during update calculation or persistence.")
|
287
|
+
update_span.set_status(Status(StatusCode.ERROR, description=str(e)))
|
288
|
+
raise e
|
289
|
+
return tmp_df
|
290
|
+
|
291
|
+
@tracer.start_as_current_span("UpdateRunner._verify_tree_is_updated")
|
292
|
+
def _verify_tree_is_updated(
|
293
|
+
self,
|
294
|
+
use_state_for_update: bool,
|
295
|
+
) -> None:
|
296
|
+
"""
|
297
|
+
Ensures all dependencies in the tree are updated before the head node.
|
298
|
+
|
299
|
+
This method checks if the dependency graph is defined in the backend and
|
300
|
+
then delegates the update execution to either a sequential (debug) or
|
301
|
+
parallel (production) helper method.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
use_state_for_update: If True, uses the current state for the update.
|
305
|
+
"""
|
306
|
+
# 1. Ensure the dependency graph is built in the backend
|
307
|
+
declared_dependencies = self.ts.dependencies() or {}
|
308
|
+
deps_ids=[d.local_time_serie.id if (d.is_api ==False and d.local_time_serie is not None) else None for d in declared_dependencies.values()]
|
309
|
+
|
310
|
+
# 2. Get the list of dependencies to update
|
311
|
+
dependencies_df = self.ts.dependencies_df
|
312
|
+
|
313
|
+
if any([a is None for a in deps_ids]) or any([d not in dependencies_df["local_time_serie_id"].to_list() for d in deps_ids]):
|
314
|
+
#Datanode not update set
|
315
|
+
self.ts.local_persist_manager.local_metadata.patch(ogm_dependencies_linked=False)
|
316
|
+
|
317
|
+
|
318
|
+
if self.ts.local_persist_manager.local_metadata.ogm_dependencies_linked==False:
|
319
|
+
self.logger.info("Dependency tree not set. Building now...")
|
320
|
+
start_time = time.time()
|
321
|
+
self.ts.set_relation_tree()
|
322
|
+
self.logger.debug(f"Tree build took {time.time() - start_time:.2f}s.")
|
323
|
+
self.ts.set_dependencies_df()
|
324
|
+
dependencies_df = self.ts.dependencies_df
|
325
|
+
|
326
|
+
|
327
|
+
|
328
|
+
if dependencies_df.empty:
|
329
|
+
self.logger.debug("No dependencies to update.")
|
330
|
+
return
|
331
|
+
|
332
|
+
# 3. Build a map of dependency instances if needed for debug mode
|
333
|
+
update_map = {}
|
334
|
+
if self.debug_mode and use_state_for_update:
|
335
|
+
update_map = self._get_update_map(declared_dependencies,
|
336
|
+
logger=self.logger
|
337
|
+
)
|
338
|
+
|
339
|
+
|
340
|
+
|
341
|
+
# 4. Delegate to the appropriate execution method
|
342
|
+
self.logger.debug(f"Starting update for {len(dependencies_df)} dependencies...")
|
343
|
+
|
344
|
+
dependencies_df = dependencies_df[dependencies_df["source_class_name"] != "WrapperDataNode"]
|
345
|
+
if dependencies_df.empty:
|
346
|
+
return
|
347
|
+
if self.debug_mode:
|
348
|
+
self._execute_sequential_debug_update(dependencies_df, update_map,)
|
349
|
+
else:
|
350
|
+
self._execute_parallel_distributed_update(dependencies_df)
|
351
|
+
|
352
|
+
self.logger.debug(f'Dependency tree evaluation complete for {self.ts}.')
|
353
|
+
|
354
|
+
|
355
|
+
def _get_update_map(self,declared_dependencies: Dict[str, 'DataNode'],
|
356
|
+
logger: object,
|
357
|
+
dependecy_map: Optional[Dict] = None) -> Dict[
|
358
|
+
Tuple[str, int], Dict[str, Any]]:
|
359
|
+
"""
|
360
|
+
Obtains all DataNode objects in the dependency graph by recursively
|
361
|
+
calling the dependencies() method.
|
362
|
+
|
363
|
+
This approach is more robust than introspecting class members as it relies
|
364
|
+
on an explicit declaration of dependencies.
|
365
|
+
|
366
|
+
Args:
|
367
|
+
time_serie_instance: The DataNode instance from which to start the dependency traversal.
|
368
|
+
dependecy_map: An optional dictionary to store the dependency map, used for recursion.
|
369
|
+
|
370
|
+
Returns:
|
371
|
+
A dictionary mapping (update_hash, data_source_id) to DataNode info.
|
372
|
+
"""
|
373
|
+
# Initialize the map on the first call
|
374
|
+
if dependecy_map is None:
|
375
|
+
dependecy_map = {}
|
376
|
+
|
377
|
+
# Get the explicitly declared dependencies, just like set_relation_tree
|
378
|
+
|
379
|
+
for name, dependency_ts in declared_dependencies.items():
|
380
|
+
key = (dependency_ts.update_hash, dependency_ts.data_source_id)
|
381
|
+
|
382
|
+
# If we have already processed this node, skip it to prevent infinite loops
|
383
|
+
if key in dependecy_map:
|
384
|
+
continue
|
385
|
+
if dependency_ts.is_api == True:
|
386
|
+
continue
|
387
|
+
|
388
|
+
# Ensure the dependency is initialized in the persistence layer
|
389
|
+
dependency_ts.local_persist_manager
|
390
|
+
|
391
|
+
logger.debug(f"Adding dependency '{name}' to update map.")
|
392
|
+
dependecy_map[key] = {"is_pickle": False, "ts": dependency_ts}
|
393
|
+
declared_dependencies = dependency_ts.dependencies() or {}
|
394
|
+
# Recursively call get_update_map on the dependency to traverse the entire graph
|
395
|
+
self._get_update_map(declared_dependencies=declared_dependencies,
|
396
|
+
logger=logger,
|
397
|
+
dependecy_map=dependecy_map)
|
398
|
+
|
399
|
+
return dependecy_map
|
400
|
+
|
401
|
+
def _execute_sequential_debug_update(
|
402
|
+
self,
|
403
|
+
dependencies_df: pd.DataFrame,
|
404
|
+
update_map: Dict[Tuple[str, int], Dict],
|
405
|
+
) -> None:
|
406
|
+
"""Runs dependency updates sequentially in the same process for debugging."""
|
407
|
+
self.logger.info("Executing dependency updates in sequential debug mode.")
|
408
|
+
# Sort by priority to respect the DAG execution order
|
409
|
+
sorted_priorities = sorted(dependencies_df["update_priority"].unique())
|
410
|
+
|
411
|
+
def refresh_update_statistics_of_deps(ts):
|
412
|
+
for _, ts_dep in ts.dependencies().items():
|
413
|
+
ts_dep.update_statistics = ts_dep.local_persist_manager.get_update_statistics_for_table()
|
414
|
+
|
415
|
+
for priority in sorted_priorities:
|
416
|
+
priority_df = dependencies_df[dependencies_df["update_priority"] == priority]
|
417
|
+
# Sort by number of upstreams to potentially optimize within a priority level
|
418
|
+
sorted_deps = priority_df.sort_values("number_of_upstreams", ascending=False)
|
419
|
+
|
420
|
+
for _, ts_row in sorted_deps.iterrows():
|
421
|
+
key = (ts_row["update_hash"], ts_row["data_source_id"])
|
422
|
+
ts_to_update = None
|
423
|
+
try:
|
424
|
+
if key in update_map:
|
425
|
+
ts_to_update = update_map[key]["ts"]
|
426
|
+
|
427
|
+
# update the update_statistics of the dependencies
|
428
|
+
refresh_update_statistics_of_deps(ts_to_update)
|
429
|
+
|
430
|
+
else:
|
431
|
+
# If not in the map, it must be rebuilt from storage
|
432
|
+
ts_to_update, _ = build_operations.rebuild_and_set_from_update_hash(
|
433
|
+
update_hash=key[0], data_source_id=key[1]
|
434
|
+
)
|
435
|
+
|
436
|
+
if ts_to_update:
|
437
|
+
self.logger.debug(f"Running debug update for dependency: {ts_to_update.update_hash}")
|
438
|
+
# Each dependency gets its own clean runner
|
439
|
+
dep_runner = UpdateRunner(
|
440
|
+
time_serie=ts_to_update,
|
441
|
+
debug_mode=True,
|
442
|
+
update_tree=False, # We only update one node at a time
|
443
|
+
force_update=self.force_update,
|
444
|
+
remote_scheduler=self.scheduler,
|
445
|
+
)
|
446
|
+
dep_runner._setup_scheduler()
|
447
|
+
|
448
|
+
dep_runner._start_update(
|
449
|
+
use_state_for_update=False,
|
450
|
+
)
|
451
|
+
except Exception as e:
|
452
|
+
self.logger.exception(f"Failed to update dependency {key[0]}")
|
453
|
+
raise e # Re-raise to halt the entire process on failure
|
454
|
+
|
455
|
+
# refresh update statistics of direct dependencies
|
456
|
+
#for edge case of multicolumn self update
|
457
|
+
self.ts.local_persist_manager.synchronize_metadata(None)
|
458
|
+
us=self.ts.local_persist_manager.get_update_statistics_for_table()
|
459
|
+
self.ts.update_statistics = us
|
460
|
+
|
461
|
+
refresh_update_statistics_of_deps(self.ts)
|
462
|
+
|
463
|
+
# This code is a method within the UpdateRunner class.
|
464
|
+
# Assumes 'ms_client', 'tracer_instrumentator', and 'DependencyUpdateError' are imported.
|
465
|
+
|
466
|
+
@tracer.start_as_current_span("UpdateRunner._execute_parallel_distributed_update")
|
467
|
+
def _execute_parallel_distributed_update(
|
468
|
+
self,
|
469
|
+
dependencies_df: pd.DataFrame,
|
470
|
+
) -> None:
|
471
|
+
"""
|
472
|
+
|
473
|
+
"""
|
474
|
+
# 1. Prepare tasks, prioritizing any pre-loaded time series
|
475
|
+
|
476
|
+
raise Exception("This is an Enterprise feature available only in the Main Sequence Platform")
|
477
|
+
|
478
|
+
def run(self) -> None:
|
479
|
+
"""
|
480
|
+
Executes the full update lifecycle for the time series.
|
481
|
+
|
482
|
+
This is the main entry point for the runner. It orchestrates the setup
|
483
|
+
of scheduling and the execution environment, triggers the core update
|
484
|
+
process, and handles all error reporting and cleanup.
|
485
|
+
"""
|
486
|
+
# Initialize tracing and set initial flags
|
487
|
+
tracer_instrumentator = TracerInstrumentator()
|
488
|
+
tracer = tracer_instrumentator.build_tracer()
|
489
|
+
error_to_raise = None
|
490
|
+
|
491
|
+
# 1. Set up the scheduler for this run
|
492
|
+
try:
|
493
|
+
|
494
|
+
self.ts.verify_and_build_remote_objects()#needed to start sch
|
495
|
+
self._setup_scheduler()
|
496
|
+
cvars.bind_contextvars(scheduler_name=self.scheduler.name, head_local_ts_hash_id=self.ts.update_hash)
|
497
|
+
|
498
|
+
# 2. Start the main execution block with tracing
|
499
|
+
with tracer.start_as_current_span(f"Scheduler Head Update: {self.ts.update_hash}") as span:
|
500
|
+
span.set_attribute("time_serie_update_hash", self.ts.update_hash)
|
501
|
+
span.set_attribute("storage_hash", self.ts.storage_hash)
|
502
|
+
span.set_attribute("head_scheduler", self.scheduler.name)
|
503
|
+
|
504
|
+
# 3. Prepare the execution environment (Ray actors, dependency metadata)
|
505
|
+
_ = self._setup_execution_environment()
|
506
|
+
self.logger.debug("Execution environment and dependency metadata are set.")
|
507
|
+
|
508
|
+
# 4. Wait for the scheduled update time, if not forcing an immediate run
|
509
|
+
if not self.force_update:
|
510
|
+
self.ts.local_time_serie.wait_for_update_time()
|
511
|
+
|
512
|
+
# 5. Trigger the core update process
|
513
|
+
error_on_last_update,updated_df=self._start_update(
|
514
|
+
use_state_for_update=True,
|
515
|
+
override_update_stats=self.override_update_stats
|
516
|
+
)
|
517
|
+
|
518
|
+
return error_on_last_update,updated_df
|
519
|
+
|
520
|
+
except DependencyUpdateError as de:
|
521
|
+
self.logger.error("A dependency failed to update, halting the run.", error=de)
|
522
|
+
error_to_raise = de
|
523
|
+
except TimeoutError as te:
|
524
|
+
self.logger.error("The update process timed out.", error=te)
|
525
|
+
error_to_raise = te
|
526
|
+
except Exception as e:
|
527
|
+
self.logger.exception("An unexpected error occurred during the update run.")
|
528
|
+
error_to_raise = e
|
529
|
+
finally:
|
530
|
+
# 6. Clean up resources
|
531
|
+
# Stop the scheduler heartbeat if it was created by this runner
|
532
|
+
if self.remote_scheduler is None and self.scheduler:
|
533
|
+
self.scheduler.stop_heart_beat()
|
534
|
+
|
535
|
+
# Clean up temporary attributes on the DataNode instance
|
536
|
+
if hasattr(self.ts, 'update_tracker'):
|
537
|
+
del self.ts.update_tracker
|
538
|
+
|
539
|
+
gc.collect()
|
540
|
+
|
541
|
+
# 7. Re-raise any captured exception after cleanup
|
542
|
+
if error_to_raise:
|
543
|
+
raise error_to_raise
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
def string_freq_to_time_delta(frequency):
|
5
|
+
import datetime
|
6
|
+
if "m" in frequency:
|
7
|
+
kwargs={"minutes":int(frequency.replace("m",""))}
|
8
|
+
elif "d" in frequency:
|
9
|
+
kwargs = {"days": int(frequency.replace("d", ""))}
|
10
|
+
else:
|
11
|
+
raise NotImplementedError
|
12
|
+
|
13
|
+
time_delta=datetime.timedelta(**kwargs)
|
14
|
+
return time_delta
|
15
|
+
|
16
|
+
def string_frequency_to_minutes(frequency):
|
17
|
+
if "m" in frequency:
|
18
|
+
minutes= int(frequency.replace("m",""))
|
19
|
+
elif "d" in frequency:
|
20
|
+
minutes = int(frequency.replace("d", "")) * 24 * 60
|
21
|
+
else:
|
22
|
+
raise NotImplementedError
|
23
|
+
|
24
|
+
return minutes
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# future_registry.py
|
2
|
+
import threading
|
3
|
+
|
4
|
+
# Use a thread‑safe set to store pending futures
|
5
|
+
_pending_futures = set()
|
6
|
+
_registry_lock = threading.Lock()
|
7
|
+
|
8
|
+
def add_future(future):
|
9
|
+
"""Add a future to the global registry."""
|
10
|
+
with _registry_lock:
|
11
|
+
_pending_futures.add(future)
|
12
|
+
|
13
|
+
def remove_future(future):
|
14
|
+
"""Remove a future from the global registry."""
|
15
|
+
with _registry_lock:
|
16
|
+
_pending_futures.discard(future)
|
17
|
+
|
18
|
+
def wait_for_all_futures():
|
19
|
+
"""Wait for all registered futures to complete."""
|
20
|
+
with _registry_lock:
|
21
|
+
# Take a snapshot of the current futures
|
22
|
+
futures = list(_pending_futures)
|
23
|
+
for future in futures:
|
24
|
+
# This call blocks until the future completes
|
25
|
+
future.result()
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import os
|
2
|
+
import yaml
|
3
|
+
import hashlib
|
4
|
+
import json
|
5
|
+
import socket
|
6
|
+
from mainsequence.logconf import logger
|
7
|
+
|
8
|
+
def get_host_name():
|
9
|
+
return socket.gethostname()
|
10
|
+
|
11
|
+
def read_yaml(path):
|
12
|
+
#if not exisit create
|
13
|
+
if not os.path.exists(path):
|
14
|
+
empty_yaml = {".":"."}
|
15
|
+
if not os.path.isdir(os.path.dirname(path)):
|
16
|
+
os.makedirs(os.path.dirname(path),exist_ok=True)
|
17
|
+
write_yaml(path, empty_yaml)
|
18
|
+
|
19
|
+
with open(path) as stream:
|
20
|
+
read = yaml.load(stream,Loader=yaml.UnsafeLoader)
|
21
|
+
|
22
|
+
return read
|
23
|
+
|
24
|
+
def write_yaml(path, dict_file):
|
25
|
+
with open(path, 'w') as f:
|
26
|
+
data = yaml.dump(dict_file, f, default_flow_style=False,sort_keys=False)
|
27
|
+
|
28
|
+
def read_key_from_yaml(key,path):
|
29
|
+
yaml_file = read_yaml(path)
|
30
|
+
|
31
|
+
if key in yaml_file:
|
32
|
+
return yaml_file[key]
|
33
|
+
else:
|
34
|
+
return None
|
35
|
+
|
36
|
+
def hash_dict(dict_to_hash: dict) -> str:
|
37
|
+
dhash = hashlib.md5()
|
38
|
+
encoded = json.dumps(dict_to_hash, sort_keys=True,default=str).encode()
|
39
|
+
dhash.update(encoded)
|
40
|
+
return dhash.hexdigest()
|
@@ -0,0 +1,45 @@
|
|
1
|
+
__version__ = '0.1.0'
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
|
7
|
+
from mainsequence.virtualfundbuilder.__main__ import get_pod_configuration
|
8
|
+
|
9
|
+
def load_env():
|
10
|
+
|
11
|
+
assert os.environ.get("VFB_PROJECT_PATH", None) is not None, "VFB_PROJECT_PATH environment variable not set"
|
12
|
+
|
13
|
+
from mainsequence.tdag.config import Configuration
|
14
|
+
# this step is needed to assure env variables are passed to ray cluster
|
15
|
+
Configuration.add_env_variables_to_registry(["VFB_PROJECT_PATH"])
|
16
|
+
|
17
|
+
sys.path.append(str(Path(os.environ.get("VFB_PROJECT_PATH")).parent))
|
18
|
+
|
19
|
+
load_env()
|
20
|
+
from mainsequence.virtualfundbuilder.utils import (
|
21
|
+
GECKO_SYMBOL_MAPPING,
|
22
|
+
TIMEDELTA,
|
23
|
+
reindex_df,
|
24
|
+
convert_to_binance_frequency,
|
25
|
+
get_last_query_times_per_asset,
|
26
|
+
build_rolling_regression_from_df,
|
27
|
+
runs_in_main_process
|
28
|
+
)
|
29
|
+
|
30
|
+
def register_default_strategies():
|
31
|
+
# Keep this in a function to not clutter the libs namespace
|
32
|
+
import mainsequence.virtualfundbuilder.contrib.apps
|
33
|
+
import mainsequence.virtualfundbuilder.contrib.data_nodes
|
34
|
+
import mainsequence.virtualfundbuilder.contrib.rebalance_strategies
|
35
|
+
|
36
|
+
if os.getenv("PROJECT_LIBRARY_NAME") is None:
|
37
|
+
# TODO workaround for now to make local execution work
|
38
|
+
os.environ["PROJECT_LIBRARY_NAME"] = Path(os.environ.get("VFB_PROJECT_PATH")).name
|
39
|
+
|
40
|
+
RUNS_IN_JOB = os.getenv("JOB_ID", None)
|
41
|
+
if RUNS_IN_JOB:
|
42
|
+
register_default_strategies()
|
43
|
+
|
44
|
+
if runs_in_main_process():
|
45
|
+
get_pod_configuration()
|