mainsequence 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. mainsequence/__init__.py +0 -0
  2. mainsequence/__main__.py +9 -0
  3. mainsequence/cli/__init__.py +1 -0
  4. mainsequence/cli/api.py +157 -0
  5. mainsequence/cli/cli.py +442 -0
  6. mainsequence/cli/config.py +78 -0
  7. mainsequence/cli/ssh_utils.py +126 -0
  8. mainsequence/client/__init__.py +17 -0
  9. mainsequence/client/base.py +431 -0
  10. mainsequence/client/data_sources_interfaces/__init__.py +0 -0
  11. mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
  12. mainsequence/client/data_sources_interfaces/timescale.py +479 -0
  13. mainsequence/client/models_helpers.py +113 -0
  14. mainsequence/client/models_report_studio.py +412 -0
  15. mainsequence/client/models_tdag.py +2276 -0
  16. mainsequence/client/models_vam.py +1983 -0
  17. mainsequence/client/utils.py +387 -0
  18. mainsequence/dashboards/__init__.py +0 -0
  19. mainsequence/dashboards/streamlit/__init__.py +0 -0
  20. mainsequence/dashboards/streamlit/assets/config.toml +12 -0
  21. mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
  22. mainsequence/dashboards/streamlit/assets/logo.png +0 -0
  23. mainsequence/dashboards/streamlit/core/__init__.py +0 -0
  24. mainsequence/dashboards/streamlit/core/theme.py +212 -0
  25. mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
  26. mainsequence/dashboards/streamlit/scaffold.py +220 -0
  27. mainsequence/instrumentation/__init__.py +7 -0
  28. mainsequence/instrumentation/utils.py +101 -0
  29. mainsequence/instruments/__init__.py +1 -0
  30. mainsequence/instruments/data_interface/__init__.py +10 -0
  31. mainsequence/instruments/data_interface/data_interface.py +361 -0
  32. mainsequence/instruments/instruments/__init__.py +3 -0
  33. mainsequence/instruments/instruments/base_instrument.py +85 -0
  34. mainsequence/instruments/instruments/bond.py +447 -0
  35. mainsequence/instruments/instruments/european_option.py +74 -0
  36. mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
  37. mainsequence/instruments/instruments/json_codec.py +585 -0
  38. mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
  39. mainsequence/instruments/instruments/position.py +475 -0
  40. mainsequence/instruments/instruments/ql_fields.py +239 -0
  41. mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
  42. mainsequence/instruments/pricing_models/__init__.py +0 -0
  43. mainsequence/instruments/pricing_models/black_scholes.py +49 -0
  44. mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
  45. mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
  46. mainsequence/instruments/pricing_models/indices.py +350 -0
  47. mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
  48. mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
  49. mainsequence/instruments/settings.py +175 -0
  50. mainsequence/instruments/utils.py +29 -0
  51. mainsequence/logconf.py +284 -0
  52. mainsequence/reportbuilder/__init__.py +0 -0
  53. mainsequence/reportbuilder/__main__.py +0 -0
  54. mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
  55. mainsequence/reportbuilder/model.py +713 -0
  56. mainsequence/reportbuilder/slide_templates.py +532 -0
  57. mainsequence/tdag/__init__.py +8 -0
  58. mainsequence/tdag/__main__.py +0 -0
  59. mainsequence/tdag/config.py +129 -0
  60. mainsequence/tdag/data_nodes/__init__.py +12 -0
  61. mainsequence/tdag/data_nodes/build_operations.py +751 -0
  62. mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
  63. mainsequence/tdag/data_nodes/persist_managers.py +812 -0
  64. mainsequence/tdag/data_nodes/run_operations.py +543 -0
  65. mainsequence/tdag/data_nodes/utils.py +24 -0
  66. mainsequence/tdag/future_registry.py +25 -0
  67. mainsequence/tdag/utils.py +40 -0
  68. mainsequence/virtualfundbuilder/__init__.py +45 -0
  69. mainsequence/virtualfundbuilder/__main__.py +235 -0
  70. mainsequence/virtualfundbuilder/agent_interface.py +77 -0
  71. mainsequence/virtualfundbuilder/config_handling.py +86 -0
  72. mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
  73. mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
  74. mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
  75. mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
  76. mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
  77. mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
  78. mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
  79. mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
  80. mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
  81. mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
  82. mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
  83. mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
  84. mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
  85. mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
  86. mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
  87. mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
  88. mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
  89. mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
  90. mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
  91. mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
  92. mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
  93. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
  94. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
  95. mainsequence/virtualfundbuilder/data_nodes.py +637 -0
  96. mainsequence/virtualfundbuilder/enums.py +23 -0
  97. mainsequence/virtualfundbuilder/models.py +282 -0
  98. mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
  99. mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
  100. mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
  101. mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
  102. mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
  103. mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
  104. mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
  105. mainsequence/virtualfundbuilder/utils.py +381 -0
  106. mainsequence-2.0.0.dist-info/METADATA +105 -0
  107. mainsequence-2.0.0.dist-info/RECORD +110 -0
  108. mainsequence-2.0.0.dist-info/WHEEL +5 -0
  109. mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
  110. mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,543 @@
1
+
2
+ # Standard Library Imports
3
+ import gc
4
+ import time
5
+ import datetime
6
+ from typing import Any, Dict, List, Optional, Tuple
7
+
8
+ # Third-Party Library Imports
9
+ import numpy as np
10
+ import pandas as pd
11
+ import structlog.contextvars as cvars
12
+
13
+
14
+
15
+
16
+ # Client and ORM Models
17
+ import mainsequence.client as ms_client
18
+ import pytz
19
+ from mainsequence.client import UpdateStatistics
20
+
21
+ # Instrumentation and Logging
22
+ from mainsequence.instrumentation import (
23
+ tracer,
24
+ tracer_instrumentator,
25
+ TracerInstrumentator
26
+ )
27
+ from mainsequence.instrumentation.utils import Status, StatusCode
28
+
29
+ # TDAG Core Components and Helpers
30
+ from mainsequence.tdag.data_nodes import build_operations
31
+
32
+
33
+
34
+
35
+
36
+ # Custom Exceptions
37
+ class DependencyUpdateError(Exception):
38
+ pass
39
+
40
+
41
+ class UpdateRunner:
42
+ """
43
+ Orchestrates the entire update process for a DataNode instance.
44
+ It handles scheduling, dependency resolution, execution, and error handling.
45
+ """
46
+
47
+ def __init__(self, time_serie: "DataNode", debug_mode: bool = False, force_update: bool = False,
48
+ update_tree: bool = True, update_only_tree: bool = False,
49
+ remote_scheduler: Optional[ms_client.Scheduler] = None,
50
+ override_update_stats:Optional[UpdateStatistics]=None
51
+ ):
52
+ self.ts = time_serie
53
+ self.logger = self.ts.logger
54
+ self.debug_mode = debug_mode
55
+ self.force_update = force_update
56
+ self.update_tree = update_tree
57
+ self.update_only_tree = update_only_tree
58
+ if self.update_tree:
59
+ self.update_only_tree = False
60
+
61
+ self.remote_scheduler = remote_scheduler
62
+ self.scheduler: Optional[ms_client.Scheduler] = None
63
+ self.override_update_stats=override_update_stats
64
+
65
+ def _setup_scheduler(self) -> None:
66
+ """Initializes or retrieves the scheduler and starts its heartbeat."""
67
+ if self.remote_scheduler:
68
+ self.scheduler = self.remote_scheduler
69
+ return
70
+
71
+ name_prefix = "DEBUG_" if self.debug_mode else ""
72
+ self.scheduler = ms_client.Scheduler.build_and_assign_to_ts(
73
+ scheduler_name=f"{name_prefix}{self.ts.local_time_serie.id}",
74
+ time_serie_ids=[self.ts.local_time_serie.id],
75
+ remove_from_other_schedulers=True,
76
+ running_in_debug_mode=self.debug_mode
77
+ )
78
+ self.scheduler.start_heart_beat()
79
+
80
+ def _pre_update_routines(self, local_metadata: Optional[dict] = None) -> Tuple[Dict[int,ms_client.LocalTimeSerie], Any]:
81
+ """
82
+ Prepares the DataNode and its dependencies for an update by fetching the
83
+ latest metadata for the entire dependency graph.
84
+
85
+ Args:
86
+ local_metadata: Optional dictionary with metadata for the head node,
87
+ used to synchronize before fetching the full tree.
88
+
89
+ Returns:
90
+ A tuple containing a dictionary of all local metadata objects in the
91
+ tree (keyed by ID) and the corresponding state data.
92
+ """
93
+ # 1. Synchronize the head node and load its dependency structure.
94
+ self.ts.local_persist_manager.synchronize_metadata(local_metadata=local_metadata)
95
+ self.ts.set_relation_tree()
96
+
97
+ # The `load_dependencies` logic is now integrated here.
98
+ if self.ts.dependencies_df is None:
99
+ self.ts.set_dependencies_df()
100
+
101
+ # 2. Connect the dependency tree to the scheduler if it hasn't been already.
102
+ if not self.ts._scheduler_tree_connected and self.update_tree:
103
+ self.logger.debug("Connecting dependency tree to scheduler...")
104
+ if not self.ts.depth_df.empty:
105
+ all_ids = self.ts.depth_df["local_time_serie_id"].to_list() + [self.ts.local_time_serie.id]
106
+ self.scheduler.in_active_tree_connect(local_time_series_ids=all_ids)
107
+ self.ts._scheduler_tree_connected = True
108
+
109
+ # 3. Collect all IDs in the dependency graph to fetch their metadata.
110
+ # This correctly initializes the list, fixing the original bug.
111
+ if not self.ts.depth_df.empty:
112
+ all_ids_in_tree = self.ts.depth_df["local_time_serie_id"].to_list()
113
+ else:
114
+ all_ids_in_tree = []
115
+
116
+ # Always include the head node itself.
117
+ all_ids_in_tree.append(self.ts.local_time_serie.id)
118
+
119
+ # 4. Fetch the latest metadata for the entire tree from the backend.
120
+ update_details_batch = dict(
121
+ error_on_last_update=False,
122
+ active_update_scheduler_id=self.scheduler.id,
123
+ active_update_status="Q" # Assuming queue status is always set here
124
+ )
125
+
126
+ all_metadatas_response = ms_client.LocalTimeSerie.get_metadatas_and_set_updates(
127
+ local_time_series_ids=all_ids_in_tree,
128
+ update_details_kwargs=update_details_batch,
129
+ update_priority_dict=None
130
+ )
131
+
132
+ # 5. Process and return the results.
133
+ state_data = all_metadatas_response['state_data']
134
+ local_metadatas_list = all_metadatas_response["local_metadatas"]
135
+ local_metadatas_map = {m.id: m for m in local_metadatas_list}
136
+
137
+ self.ts.scheduler = self.scheduler
138
+ self.ts.update_details_tree = {key: v.run_configuration for key, v in local_metadatas_map.items()}
139
+
140
+ return local_metadatas_map, state_data
141
+
142
+ def _setup_execution_environment(self) -> Dict[int, ms_client.LocalTimeSerie]:
143
+ local_metadatas, state_data = self._pre_update_routines()
144
+ return local_metadatas
145
+
146
+ def _start_update(self, use_state_for_update: bool,override_update_stats:Optional[UpdateStatistics]=None) -> [bool,pd.DataFrame]:
147
+ """Orchestrates a single DataNode update, including pre/post routines."""
148
+ historical_update = self.ts.local_persist_manager.local_metadata.set_start_of_execution(
149
+ active_update_scheduler_id=self.scheduler.id
150
+ )
151
+
152
+ must_update = historical_update.must_update or self.force_update
153
+
154
+ # Ensure metadata is fully loaded with relationship details before proceeding.
155
+ self.ts.local_persist_manager.set_local_metadata_lazy(include_relations_detail=True)
156
+
157
+
158
+ if override_update_stats is not None:
159
+
160
+ self.ts.update_statistics = override_update_stats
161
+ else:
162
+ update_statistics = historical_update.update_statistics
163
+ # The DataNode defines how to scope its statistics
164
+ self.ts._set_update_statistics(update_statistics)
165
+
166
+ updated_df=pd.DataFrame()
167
+ error_on_last_update = False
168
+ try:
169
+ if must_update:
170
+ self.logger.debug(f"Update required for {self.ts}.")
171
+ updated_df=self._update_local(
172
+ overwrite_latest_value=historical_update.last_time_index_value,
173
+ use_state_for_update=use_state_for_update
174
+ )
175
+ else:
176
+ self.logger.debug(f"Already up-to-date. Skipping update for {self.ts}.")
177
+ except Exception as e:
178
+ error_on_last_update = True
179
+ raise e
180
+ finally:
181
+ self.ts.local_persist_manager.local_metadata.set_end_of_execution(
182
+ historical_update_id=historical_update.id,
183
+ error_on_update=error_on_last_update
184
+ )
185
+
186
+ # Always set last relations details after the run completes.
187
+ self.ts.local_persist_manager.set_local_metadata_lazy(include_relations_detail=True)
188
+
189
+ self.ts.run_post_update_routines(error_on_last_update=error_on_last_update)
190
+ self.ts.local_persist_manager.set_column_metadata(columns_metadata=self.ts.get_column_metadata())
191
+ table_metadata = self.ts.get_table_metadata()
192
+
193
+ if self.ts.data_source.related_resource.class_type!=ms_client.DUCK_DB:
194
+ self.ts.local_persist_manager.set_table_metadata(table_metadata=table_metadata)
195
+
196
+
197
+ return error_on_last_update,updated_df
198
+
199
+ def _validate_update_dataframe(self, df: pd.DataFrame) -> None:
200
+ """
201
+ Performs a series of critical checks on the DataFrame before persistence.
202
+
203
+ Args:
204
+ df: The DataFrame returned from the DataNode's update method.
205
+
206
+ Raises:
207
+ AssertionError or Exception if any validation check fails.
208
+ """
209
+ # Check for infinite values
210
+ df.replace([np.inf, -np.inf], np.nan, inplace=True)
211
+
212
+ # Check that the time index is a UTC datetime
213
+ time_index = df.index.get_level_values(0)
214
+ if not pd.api.types.is_datetime64_ns_dtype(time_index) or str(time_index.tz) !=str(datetime.timezone.utc) :
215
+ raise TypeError(f"Time index must be datetime64[ns, UTC], but found {time_index.dtype}")
216
+
217
+ # Check for forbidden data types and enforce lowercase columns
218
+ if self.ts.data_source.related_resource.class_type!=ms_client.DUCK_DB:
219
+
220
+ for col, dtype in df.dtypes.items():
221
+ if not isinstance(col, str) or not col.islower():
222
+ raise ValueError(f"Column name '{col}' must be a lowercase string.")
223
+ if "datetime64" in str(dtype):
224
+ raise TypeError(f"Column '{col}' has a forbidden datetime64 dtype.")
225
+ @tracer.start_as_current_span("UpdateRunner._update_local")
226
+ def _update_local(
227
+ self,
228
+ overwrite_latest_value: Optional[datetime.datetime],
229
+ use_state_for_update: bool,
230
+ ) -> pd.DataFrame:
231
+ """
232
+ Calculates, validates, and persists the data update for the time series.
233
+ """
234
+ tmp_df = pd.DataFrame()
235
+ # 1. Handle dependency tree update first
236
+ if self.update_tree:
237
+ self._verify_tree_is_updated(use_state_for_update)
238
+ if self.update_only_tree:
239
+ self.logger.info(f'Dependency tree for {self.ts} updated. Halting run as requested.')
240
+ return tmp_df
241
+
242
+ # 2. Execute the core data calculation
243
+ with tracer.start_as_current_span("Update Calculation") as update_span:
244
+
245
+ # Add specific log message for the initial run
246
+ if not self.ts.update_statistics:
247
+ self.logger.debug(f"Performing first-time update for {self.ts}...")
248
+ else:
249
+ self.logger.debug(f'Calculating update for {self.ts}...')
250
+
251
+ try:
252
+ # Call the business logic defined on the DataNode class
253
+ temp_df = self.ts.update()
254
+
255
+ if temp_df is None:
256
+ raise Exception(f" {self.ts} update(...) method needs to return a data frame")
257
+
258
+ # If the update method returns no data, we're done.
259
+ if temp_df.empty:
260
+ self.logger.warning(f"No new data returned from update for {self.ts}.")
261
+ return temp_df
262
+
263
+ # In a normal run, filter out data we already have.
264
+ if overwrite_latest_value is None and ms_client.SessionDataSource.is_local_duck_db ==False:
265
+ temp_df = self.ts.update_statistics.filter_df_by_latest_value(temp_df)
266
+
267
+ # If filtering left nothing, we're done.
268
+ if temp_df.empty:
269
+ self.logger.info(f"No new data to persist for {self.ts} after filtering.")
270
+ return temp_df
271
+
272
+ # Validate the structure and content of the DataFrame
273
+ self._validate_update_dataframe(temp_df)
274
+
275
+ # Persist the validated data
276
+ self.logger.info(f'Persisting {len(temp_df)} new rows for {self.ts}.')
277
+ persisted = self.ts.local_persist_manager.persist_updated_data(
278
+ temp_df=temp_df,
279
+ overwrite=(overwrite_latest_value is not None)
280
+ )
281
+ update_span.set_status(Status(StatusCode.OK))
282
+ self.logger.info(f'Successfully updated {self.ts}.')
283
+ return temp_df
284
+
285
+ except Exception as e:
286
+ self.logger.exception("Failed during update calculation or persistence.")
287
+ update_span.set_status(Status(StatusCode.ERROR, description=str(e)))
288
+ raise e
289
+ return tmp_df
290
+
291
+ @tracer.start_as_current_span("UpdateRunner._verify_tree_is_updated")
292
+ def _verify_tree_is_updated(
293
+ self,
294
+ use_state_for_update: bool,
295
+ ) -> None:
296
+ """
297
+ Ensures all dependencies in the tree are updated before the head node.
298
+
299
+ This method checks if the dependency graph is defined in the backend and
300
+ then delegates the update execution to either a sequential (debug) or
301
+ parallel (production) helper method.
302
+
303
+ Args:
304
+ use_state_for_update: If True, uses the current state for the update.
305
+ """
306
+ # 1. Ensure the dependency graph is built in the backend
307
+ declared_dependencies = self.ts.dependencies() or {}
308
+ deps_ids=[d.local_time_serie.id if (d.is_api ==False and d.local_time_serie is not None) else None for d in declared_dependencies.values()]
309
+
310
+ # 2. Get the list of dependencies to update
311
+ dependencies_df = self.ts.dependencies_df
312
+
313
+ if any([a is None for a in deps_ids]) or any([d not in dependencies_df["local_time_serie_id"].to_list() for d in deps_ids]):
314
+ #Datanode not update set
315
+ self.ts.local_persist_manager.local_metadata.patch(ogm_dependencies_linked=False)
316
+
317
+
318
+ if self.ts.local_persist_manager.local_metadata.ogm_dependencies_linked==False:
319
+ self.logger.info("Dependency tree not set. Building now...")
320
+ start_time = time.time()
321
+ self.ts.set_relation_tree()
322
+ self.logger.debug(f"Tree build took {time.time() - start_time:.2f}s.")
323
+ self.ts.set_dependencies_df()
324
+ dependencies_df = self.ts.dependencies_df
325
+
326
+
327
+
328
+ if dependencies_df.empty:
329
+ self.logger.debug("No dependencies to update.")
330
+ return
331
+
332
+ # 3. Build a map of dependency instances if needed for debug mode
333
+ update_map = {}
334
+ if self.debug_mode and use_state_for_update:
335
+ update_map = self._get_update_map(declared_dependencies,
336
+ logger=self.logger
337
+ )
338
+
339
+
340
+
341
+ # 4. Delegate to the appropriate execution method
342
+ self.logger.debug(f"Starting update for {len(dependencies_df)} dependencies...")
343
+
344
+ dependencies_df = dependencies_df[dependencies_df["source_class_name"] != "WrapperDataNode"]
345
+ if dependencies_df.empty:
346
+ return
347
+ if self.debug_mode:
348
+ self._execute_sequential_debug_update(dependencies_df, update_map,)
349
+ else:
350
+ self._execute_parallel_distributed_update(dependencies_df)
351
+
352
+ self.logger.debug(f'Dependency tree evaluation complete for {self.ts}.')
353
+
354
+
355
+ def _get_update_map(self,declared_dependencies: Dict[str, 'DataNode'],
356
+ logger: object,
357
+ dependecy_map: Optional[Dict] = None) -> Dict[
358
+ Tuple[str, int], Dict[str, Any]]:
359
+ """
360
+ Obtains all DataNode objects in the dependency graph by recursively
361
+ calling the dependencies() method.
362
+
363
+ This approach is more robust than introspecting class members as it relies
364
+ on an explicit declaration of dependencies.
365
+
366
+ Args:
367
+ time_serie_instance: The DataNode instance from which to start the dependency traversal.
368
+ dependecy_map: An optional dictionary to store the dependency map, used for recursion.
369
+
370
+ Returns:
371
+ A dictionary mapping (update_hash, data_source_id) to DataNode info.
372
+ """
373
+ # Initialize the map on the first call
374
+ if dependecy_map is None:
375
+ dependecy_map = {}
376
+
377
+ # Get the explicitly declared dependencies, just like set_relation_tree
378
+
379
+ for name, dependency_ts in declared_dependencies.items():
380
+ key = (dependency_ts.update_hash, dependency_ts.data_source_id)
381
+
382
+ # If we have already processed this node, skip it to prevent infinite loops
383
+ if key in dependecy_map:
384
+ continue
385
+ if dependency_ts.is_api == True:
386
+ continue
387
+
388
+ # Ensure the dependency is initialized in the persistence layer
389
+ dependency_ts.local_persist_manager
390
+
391
+ logger.debug(f"Adding dependency '{name}' to update map.")
392
+ dependecy_map[key] = {"is_pickle": False, "ts": dependency_ts}
393
+ declared_dependencies = dependency_ts.dependencies() or {}
394
+ # Recursively call get_update_map on the dependency to traverse the entire graph
395
+ self._get_update_map(declared_dependencies=declared_dependencies,
396
+ logger=logger,
397
+ dependecy_map=dependecy_map)
398
+
399
+ return dependecy_map
400
+
401
+ def _execute_sequential_debug_update(
402
+ self,
403
+ dependencies_df: pd.DataFrame,
404
+ update_map: Dict[Tuple[str, int], Dict],
405
+ ) -> None:
406
+ """Runs dependency updates sequentially in the same process for debugging."""
407
+ self.logger.info("Executing dependency updates in sequential debug mode.")
408
+ # Sort by priority to respect the DAG execution order
409
+ sorted_priorities = sorted(dependencies_df["update_priority"].unique())
410
+
411
+ def refresh_update_statistics_of_deps(ts):
412
+ for _, ts_dep in ts.dependencies().items():
413
+ ts_dep.update_statistics = ts_dep.local_persist_manager.get_update_statistics_for_table()
414
+
415
+ for priority in sorted_priorities:
416
+ priority_df = dependencies_df[dependencies_df["update_priority"] == priority]
417
+ # Sort by number of upstreams to potentially optimize within a priority level
418
+ sorted_deps = priority_df.sort_values("number_of_upstreams", ascending=False)
419
+
420
+ for _, ts_row in sorted_deps.iterrows():
421
+ key = (ts_row["update_hash"], ts_row["data_source_id"])
422
+ ts_to_update = None
423
+ try:
424
+ if key in update_map:
425
+ ts_to_update = update_map[key]["ts"]
426
+
427
+ # update the update_statistics of the dependencies
428
+ refresh_update_statistics_of_deps(ts_to_update)
429
+
430
+ else:
431
+ # If not in the map, it must be rebuilt from storage
432
+ ts_to_update, _ = build_operations.rebuild_and_set_from_update_hash(
433
+ update_hash=key[0], data_source_id=key[1]
434
+ )
435
+
436
+ if ts_to_update:
437
+ self.logger.debug(f"Running debug update for dependency: {ts_to_update.update_hash}")
438
+ # Each dependency gets its own clean runner
439
+ dep_runner = UpdateRunner(
440
+ time_serie=ts_to_update,
441
+ debug_mode=True,
442
+ update_tree=False, # We only update one node at a time
443
+ force_update=self.force_update,
444
+ remote_scheduler=self.scheduler,
445
+ )
446
+ dep_runner._setup_scheduler()
447
+
448
+ dep_runner._start_update(
449
+ use_state_for_update=False,
450
+ )
451
+ except Exception as e:
452
+ self.logger.exception(f"Failed to update dependency {key[0]}")
453
+ raise e # Re-raise to halt the entire process on failure
454
+
455
+ # refresh update statistics of direct dependencies
456
+ #for edge case of multicolumn self update
457
+ self.ts.local_persist_manager.synchronize_metadata(None)
458
+ us=self.ts.local_persist_manager.get_update_statistics_for_table()
459
+ self.ts.update_statistics = us
460
+
461
+ refresh_update_statistics_of_deps(self.ts)
462
+
463
+ # This code is a method within the UpdateRunner class.
464
+ # Assumes 'ms_client', 'tracer_instrumentator', and 'DependencyUpdateError' are imported.
465
+
466
+ @tracer.start_as_current_span("UpdateRunner._execute_parallel_distributed_update")
467
+ def _execute_parallel_distributed_update(
468
+ self,
469
+ dependencies_df: pd.DataFrame,
470
+ ) -> None:
471
+ """
472
+
473
+ """
474
+ # 1. Prepare tasks, prioritizing any pre-loaded time series
475
+
476
+ raise Exception("This is an Enterprise feature available only in the Main Sequence Platform")
477
+
478
+ def run(self) -> None:
479
+ """
480
+ Executes the full update lifecycle for the time series.
481
+
482
+ This is the main entry point for the runner. It orchestrates the setup
483
+ of scheduling and the execution environment, triggers the core update
484
+ process, and handles all error reporting and cleanup.
485
+ """
486
+ # Initialize tracing and set initial flags
487
+ tracer_instrumentator = TracerInstrumentator()
488
+ tracer = tracer_instrumentator.build_tracer()
489
+ error_to_raise = None
490
+
491
+ # 1. Set up the scheduler for this run
492
+ try:
493
+
494
+ self.ts.verify_and_build_remote_objects()#needed to start sch
495
+ self._setup_scheduler()
496
+ cvars.bind_contextvars(scheduler_name=self.scheduler.name, head_local_ts_hash_id=self.ts.update_hash)
497
+
498
+ # 2. Start the main execution block with tracing
499
+ with tracer.start_as_current_span(f"Scheduler Head Update: {self.ts.update_hash}") as span:
500
+ span.set_attribute("time_serie_update_hash", self.ts.update_hash)
501
+ span.set_attribute("storage_hash", self.ts.storage_hash)
502
+ span.set_attribute("head_scheduler", self.scheduler.name)
503
+
504
+ # 3. Prepare the execution environment (Ray actors, dependency metadata)
505
+ _ = self._setup_execution_environment()
506
+ self.logger.debug("Execution environment and dependency metadata are set.")
507
+
508
+ # 4. Wait for the scheduled update time, if not forcing an immediate run
509
+ if not self.force_update:
510
+ self.ts.local_time_serie.wait_for_update_time()
511
+
512
+ # 5. Trigger the core update process
513
+ error_on_last_update,updated_df=self._start_update(
514
+ use_state_for_update=True,
515
+ override_update_stats=self.override_update_stats
516
+ )
517
+
518
+ return error_on_last_update,updated_df
519
+
520
+ except DependencyUpdateError as de:
521
+ self.logger.error("A dependency failed to update, halting the run.", error=de)
522
+ error_to_raise = de
523
+ except TimeoutError as te:
524
+ self.logger.error("The update process timed out.", error=te)
525
+ error_to_raise = te
526
+ except Exception as e:
527
+ self.logger.exception("An unexpected error occurred during the update run.")
528
+ error_to_raise = e
529
+ finally:
530
+ # 6. Clean up resources
531
+ # Stop the scheduler heartbeat if it was created by this runner
532
+ if self.remote_scheduler is None and self.scheduler:
533
+ self.scheduler.stop_heart_beat()
534
+
535
+ # Clean up temporary attributes on the DataNode instance
536
+ if hasattr(self.ts, 'update_tracker'):
537
+ del self.ts.update_tracker
538
+
539
+ gc.collect()
540
+
541
+ # 7. Re-raise any captured exception after cleanup
542
+ if error_to_raise:
543
+ raise error_to_raise
@@ -0,0 +1,24 @@
1
+
2
+
3
+
4
+ def string_freq_to_time_delta(frequency):
5
+ import datetime
6
+ if "m" in frequency:
7
+ kwargs={"minutes":int(frequency.replace("m",""))}
8
+ elif "d" in frequency:
9
+ kwargs = {"days": int(frequency.replace("d", ""))}
10
+ else:
11
+ raise NotImplementedError
12
+
13
+ time_delta=datetime.timedelta(**kwargs)
14
+ return time_delta
15
+
16
+ def string_frequency_to_minutes(frequency):
17
+ if "m" in frequency:
18
+ minutes= int(frequency.replace("m",""))
19
+ elif "d" in frequency:
20
+ minutes = int(frequency.replace("d", "")) * 24 * 60
21
+ else:
22
+ raise NotImplementedError
23
+
24
+ return minutes
@@ -0,0 +1,25 @@
1
+ # future_registry.py
2
+ import threading
3
+
4
+ # Use a thread‑safe set to store pending futures
5
+ _pending_futures = set()
6
+ _registry_lock = threading.Lock()
7
+
8
+ def add_future(future):
9
+ """Add a future to the global registry."""
10
+ with _registry_lock:
11
+ _pending_futures.add(future)
12
+
13
+ def remove_future(future):
14
+ """Remove a future from the global registry."""
15
+ with _registry_lock:
16
+ _pending_futures.discard(future)
17
+
18
+ def wait_for_all_futures():
19
+ """Wait for all registered futures to complete."""
20
+ with _registry_lock:
21
+ # Take a snapshot of the current futures
22
+ futures = list(_pending_futures)
23
+ for future in futures:
24
+ # This call blocks until the future completes
25
+ future.result()
@@ -0,0 +1,40 @@
1
+ import os
2
+ import yaml
3
+ import hashlib
4
+ import json
5
+ import socket
6
+ from mainsequence.logconf import logger
7
+
8
+ def get_host_name():
9
+ return socket.gethostname()
10
+
11
+ def read_yaml(path):
12
+ #if not exisit create
13
+ if not os.path.exists(path):
14
+ empty_yaml = {".":"."}
15
+ if not os.path.isdir(os.path.dirname(path)):
16
+ os.makedirs(os.path.dirname(path),exist_ok=True)
17
+ write_yaml(path, empty_yaml)
18
+
19
+ with open(path) as stream:
20
+ read = yaml.load(stream,Loader=yaml.UnsafeLoader)
21
+
22
+ return read
23
+
24
+ def write_yaml(path, dict_file):
25
+ with open(path, 'w') as f:
26
+ data = yaml.dump(dict_file, f, default_flow_style=False,sort_keys=False)
27
+
28
+ def read_key_from_yaml(key,path):
29
+ yaml_file = read_yaml(path)
30
+
31
+ if key in yaml_file:
32
+ return yaml_file[key]
33
+ else:
34
+ return None
35
+
36
+ def hash_dict(dict_to_hash: dict) -> str:
37
+ dhash = hashlib.md5()
38
+ encoded = json.dumps(dict_to_hash, sort_keys=True,default=str).encode()
39
+ dhash.update(encoded)
40
+ return dhash.hexdigest()
@@ -0,0 +1,45 @@
1
+ __version__ = '0.1.0'
2
+
3
+ from pathlib import Path
4
+ import os
5
+ import sys
6
+
7
+ from mainsequence.virtualfundbuilder.__main__ import get_pod_configuration
8
+
9
+ def load_env():
10
+
11
+ assert os.environ.get("VFB_PROJECT_PATH", None) is not None, "VFB_PROJECT_PATH environment variable not set"
12
+
13
+ from mainsequence.tdag.config import Configuration
14
+ # this step is needed to assure env variables are passed to ray cluster
15
+ Configuration.add_env_variables_to_registry(["VFB_PROJECT_PATH"])
16
+
17
+ sys.path.append(str(Path(os.environ.get("VFB_PROJECT_PATH")).parent))
18
+
19
+ load_env()
20
+ from mainsequence.virtualfundbuilder.utils import (
21
+ GECKO_SYMBOL_MAPPING,
22
+ TIMEDELTA,
23
+ reindex_df,
24
+ convert_to_binance_frequency,
25
+ get_last_query_times_per_asset,
26
+ build_rolling_regression_from_df,
27
+ runs_in_main_process
28
+ )
29
+
30
+ def register_default_strategies():
31
+ # Keep this in a function to not clutter the libs namespace
32
+ import mainsequence.virtualfundbuilder.contrib.apps
33
+ import mainsequence.virtualfundbuilder.contrib.data_nodes
34
+ import mainsequence.virtualfundbuilder.contrib.rebalance_strategies
35
+
36
+ if os.getenv("PROJECT_LIBRARY_NAME") is None:
37
+ # TODO workaround for now to make local execution work
38
+ os.environ["PROJECT_LIBRARY_NAME"] = Path(os.environ.get("VFB_PROJECT_PATH")).name
39
+
40
+ RUNS_IN_JOB = os.getenv("JOB_ID", None)
41
+ if RUNS_IN_JOB:
42
+ register_default_strategies()
43
+
44
+ if runs_in_main_process():
45
+ get_pod_configuration()