mainsequence 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. mainsequence/__init__.py +0 -0
  2. mainsequence/__main__.py +9 -0
  3. mainsequence/cli/__init__.py +1 -0
  4. mainsequence/cli/api.py +157 -0
  5. mainsequence/cli/cli.py +442 -0
  6. mainsequence/cli/config.py +78 -0
  7. mainsequence/cli/ssh_utils.py +126 -0
  8. mainsequence/client/__init__.py +17 -0
  9. mainsequence/client/base.py +431 -0
  10. mainsequence/client/data_sources_interfaces/__init__.py +0 -0
  11. mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
  12. mainsequence/client/data_sources_interfaces/timescale.py +479 -0
  13. mainsequence/client/models_helpers.py +113 -0
  14. mainsequence/client/models_report_studio.py +412 -0
  15. mainsequence/client/models_tdag.py +2276 -0
  16. mainsequence/client/models_vam.py +1983 -0
  17. mainsequence/client/utils.py +387 -0
  18. mainsequence/dashboards/__init__.py +0 -0
  19. mainsequence/dashboards/streamlit/__init__.py +0 -0
  20. mainsequence/dashboards/streamlit/assets/config.toml +12 -0
  21. mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
  22. mainsequence/dashboards/streamlit/assets/logo.png +0 -0
  23. mainsequence/dashboards/streamlit/core/__init__.py +0 -0
  24. mainsequence/dashboards/streamlit/core/theme.py +212 -0
  25. mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
  26. mainsequence/dashboards/streamlit/scaffold.py +220 -0
  27. mainsequence/instrumentation/__init__.py +7 -0
  28. mainsequence/instrumentation/utils.py +101 -0
  29. mainsequence/instruments/__init__.py +1 -0
  30. mainsequence/instruments/data_interface/__init__.py +10 -0
  31. mainsequence/instruments/data_interface/data_interface.py +361 -0
  32. mainsequence/instruments/instruments/__init__.py +3 -0
  33. mainsequence/instruments/instruments/base_instrument.py +85 -0
  34. mainsequence/instruments/instruments/bond.py +447 -0
  35. mainsequence/instruments/instruments/european_option.py +74 -0
  36. mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
  37. mainsequence/instruments/instruments/json_codec.py +585 -0
  38. mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
  39. mainsequence/instruments/instruments/position.py +475 -0
  40. mainsequence/instruments/instruments/ql_fields.py +239 -0
  41. mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
  42. mainsequence/instruments/pricing_models/__init__.py +0 -0
  43. mainsequence/instruments/pricing_models/black_scholes.py +49 -0
  44. mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
  45. mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
  46. mainsequence/instruments/pricing_models/indices.py +350 -0
  47. mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
  48. mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
  49. mainsequence/instruments/settings.py +175 -0
  50. mainsequence/instruments/utils.py +29 -0
  51. mainsequence/logconf.py +284 -0
  52. mainsequence/reportbuilder/__init__.py +0 -0
  53. mainsequence/reportbuilder/__main__.py +0 -0
  54. mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
  55. mainsequence/reportbuilder/model.py +713 -0
  56. mainsequence/reportbuilder/slide_templates.py +532 -0
  57. mainsequence/tdag/__init__.py +8 -0
  58. mainsequence/tdag/__main__.py +0 -0
  59. mainsequence/tdag/config.py +129 -0
  60. mainsequence/tdag/data_nodes/__init__.py +12 -0
  61. mainsequence/tdag/data_nodes/build_operations.py +751 -0
  62. mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
  63. mainsequence/tdag/data_nodes/persist_managers.py +812 -0
  64. mainsequence/tdag/data_nodes/run_operations.py +543 -0
  65. mainsequence/tdag/data_nodes/utils.py +24 -0
  66. mainsequence/tdag/future_registry.py +25 -0
  67. mainsequence/tdag/utils.py +40 -0
  68. mainsequence/virtualfundbuilder/__init__.py +45 -0
  69. mainsequence/virtualfundbuilder/__main__.py +235 -0
  70. mainsequence/virtualfundbuilder/agent_interface.py +77 -0
  71. mainsequence/virtualfundbuilder/config_handling.py +86 -0
  72. mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
  73. mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
  74. mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
  75. mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
  76. mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
  77. mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
  78. mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
  79. mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
  80. mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
  81. mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
  82. mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
  83. mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
  84. mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
  85. mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
  86. mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
  87. mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
  88. mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
  89. mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
  90. mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
  91. mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
  92. mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
  93. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
  94. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
  95. mainsequence/virtualfundbuilder/data_nodes.py +637 -0
  96. mainsequence/virtualfundbuilder/enums.py +23 -0
  97. mainsequence/virtualfundbuilder/models.py +282 -0
  98. mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
  99. mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
  100. mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
  101. mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
  102. mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
  103. mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
  104. mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
  105. mainsequence/virtualfundbuilder/utils.py +381 -0
  106. mainsequence-2.0.0.dist-info/METADATA +105 -0
  107. mainsequence-2.0.0.dist-info/RECORD +110 -0
  108. mainsequence-2.0.0.dist-info/WHEEL +5 -0
  109. mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
  110. mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2276 @@
1
+ from importlib.metadata import metadata
2
+
3
+ import yaml
4
+
5
+ from .base import BasePydanticModel, BaseObjectOrm, TDAG_ENDPOINT
6
+ from .data_sources_interfaces.duckdb import DuckDBInterface
7
+ from .utils import (is_process_running, get_network_ip, DateInfo,
8
+ TDAG_CONSTANTS, DataFrequency, UniqueIdentifierRangeMap,
9
+ DATE_FORMAT, AuthLoaders, make_request, set_types_in_table, request_to_datetime, serialize_to_json,
10
+ bios_uuid)
11
+ import copy
12
+ import datetime
13
+ import pytz
14
+ import requests
15
+ import pandas as pd
16
+ import json
17
+ from typing import Union
18
+ import time
19
+ import os
20
+ from mainsequence.logconf import logger
21
+
22
+ from pydantic import BaseModel, Field, field_validator
23
+ from typing import Optional, List, Dict, Any, TypedDict, Tuple
24
+ from .data_sources_interfaces import timescale as TimeScaleInterface
25
+ from functools import wraps
26
+ import math
27
+ import gzip
28
+ import base64
29
+ import numpy as np
30
+ import concurrent.futures
31
+
32
+ _default_data_source = None # Module-level cache
33
+
34
+ JSON_COMPRESSED_PREFIX = ["json_compressed", "jcomp_"]
35
+
36
+ loaders = AuthLoaders()
37
+
38
+ # Global executor (or you could define one on your class)
39
+ _executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
40
+ DUCK_DB = "duck_db"
41
+
42
+
43
+ class AlreadyExist(Exception):
44
+ pass
45
+
46
+
47
+ def build_session(loaders):
48
+ from requests.adapters import HTTPAdapter, Retry
49
+ s = requests.Session()
50
+ s.headers.update(loaders.auth_headers)
51
+ retries = Retry(total=2, backoff_factor=2, )
52
+ s.mount('http://', HTTPAdapter(max_retries=retries))
53
+ return s
54
+
55
+
56
+ session = build_session(loaders=loaders)
57
+
58
+
59
+ class SchedulerDoesNotExist(Exception):
60
+ pass
61
+
62
+
63
+ class LocalTimeSeriesDoesNotExist(Exception):
64
+ pass
65
+
66
+
67
+ class DynamicTableDoesNotExist(Exception):
68
+ pass
69
+
70
+
71
+ class SourceTableConfigurationDoesNotExist(Exception):
72
+ pass
73
+
74
+
75
+ class ColumnMetaData(BasePydanticModel, BaseObjectOrm):
76
+ source_config_id: int = Field(
77
+ ...,
78
+ alias="source_config",
79
+ description="Primary key of the related SourceTableConfiguration"
80
+ )
81
+ column_name: str = Field(
82
+ ...,
83
+ max_length=63,
84
+ description="Name of the column (must match column_dtypes_map key)"
85
+ )
86
+ dtype: str = Field(
87
+ ...,
88
+ max_length=100,
89
+ description="Data type (will be synced from the configuration’s dtype map)"
90
+ )
91
+ label: str = Field(
92
+ ...,
93
+ max_length=250,
94
+ description="Human‐readable label"
95
+ )
96
+ description: str = Field(
97
+ ...,
98
+ description="Longer description of the column"
99
+ )
100
+
101
+
102
+ class SourceTableConfiguration(BasePydanticModel, BaseObjectOrm):
103
+ id: Optional[int] = Field(None, description="Primary key, auto-incremented ID")
104
+ related_table: Union[int, "DynamicTableMetaData"]
105
+ time_index_name: str = Field(..., max_length=100, description="Time index name")
106
+ column_dtypes_map: Dict[str, Any] = Field(..., description="Column data types map")
107
+ index_names: List
108
+ last_time_index_value: Optional[datetime.datetime] = Field(None, description="Last time index value")
109
+ earliest_index_value: Optional[datetime.datetime] = Field(None, description="Earliest index value")
110
+
111
+ # multi_index_stats: Optional[Dict[str, Any]] = Field(None, description="Multi-index statistics JSON field")
112
+ # multi_index_column_stats:Optional[Dict[str, Any]] = Field(None, description="Multi-index statistics JSON field column based")
113
+
114
+ table_partition: Dict[str, Any] = Field(..., description="Table partition settings")
115
+ open_for_everyone: bool = Field(default=False, description="Whether the table configuration is open for everyone")
116
+ columns_metadata: Optional[List[ColumnMetaData]] = None
117
+
118
+ # todo remove
119
+ column_index_names: Optional[list] = [None]
120
+
121
+ def get_data_updates(self):
122
+ max_per_asset = None
123
+
124
+ url = self.get_object_url() + f"/{self.related_table}/get_stats/"
125
+ s = self.build_session()
126
+ r = make_request(s=s, loaders=self.LOADERS, r_type="GET", url=url, accept_gzip=True)
127
+ if r.status_code != 200:
128
+ raise Exception(r.text)
129
+ data = r.json()
130
+ multi_index_stats = data["multi_index_stats"]
131
+ multi_index_column_stats = data["multi_index_column_stats"]
132
+ max_time_index_value = self.last_time_index_value
133
+ if multi_index_stats is not None:
134
+ max_per_asset = multi_index_stats["max_per_asset_symbol"]
135
+ max_per_asset = {k: request_to_datetime(v) for k, v in max_per_asset.items()}
136
+ max_time_index_value = np.max(list(max_per_asset.values()))
137
+
138
+ du = UpdateStatistics(
139
+ max_time_index_value=max_time_index_value,
140
+ asset_time_statistics=max_per_asset,
141
+ multi_index_column_stats=multi_index_column_stats
142
+ )
143
+
144
+ du._max_time_in_update_statistics = max_time_index_value
145
+ return du
146
+
147
+ def get_time_scale_extra_table_indices(self) -> dict:
148
+ url = self.get_object_url() + f"/{self.related_table}/get_time_scale_extra_table_indices/"
149
+ s = self.build_session()
150
+ r = make_request(s=s, loaders=self.LOADERS, r_type="GET", url=url, )
151
+ if r.status_code != 200:
152
+ raise Exception(r.text)
153
+ return r.json()
154
+
155
+ def set_or_update_columns_metadata(self, columns_metadata: List[ColumnMetaData],
156
+ timeout=None) -> None:
157
+ """
158
+ """
159
+
160
+ columns_metadata = [
161
+ c.model_dump(exclude={'orm_class'})
162
+ for c in columns_metadata
163
+ ]
164
+ url = self.get_object_url() + f"/{self.related_table}/set_or_update_columns_metadata/"
165
+ s = self.build_session()
166
+ r = make_request(s=s, loaders=self.LOADERS, r_type="POST",
167
+ time_out=timeout,
168
+ url=url, payload={"json": {"columns_metadata": columns_metadata}})
169
+ if r.status_code not in [200, 201]:
170
+ raise Exception(r.text)
171
+ return r.json()
172
+
173
+ def patch(self, *args, **kwargs):
174
+ # related table is the primary key of this model
175
+ if isinstance(self.related_table, int):
176
+ id = self.related_table
177
+ else:
178
+ id = self.related_table.id
179
+ return self.__class__.patch_by_id(id, *args, **kwargs)
180
+
181
+
182
+ class ColumnMetaData(BasePydanticModel):
183
+ source_config_id: Optional[int] = Field(None, description="FK to SourceTableConfiguration")
184
+ column_name: str = Field(..., max_length=63, description="Name of the column")
185
+ dtype: str = Field(..., max_length=100, description="Data type of the column")
186
+ label: str = Field(..., max_length=255, description="Human-readable label")
187
+ description: str = Field(..., description="Detailed description")
188
+
189
+
190
+ class LocalTimeSerie(BasePydanticModel, BaseObjectOrm):
191
+ id: Optional[int] = Field(None, description="Primary key, auto-incremented ID")
192
+ update_hash: str = Field(..., max_length=63, description="Max length of PostgreSQL table name")
193
+ remote_table: Union[int, "DynamicTableMetaData"]
194
+ build_configuration: Dict[str, Any] = Field(..., description="Configuration in JSON format")
195
+ build_meta_data: Optional[Dict[str, Any]] = Field(None, description="Optional YAML metadata")
196
+ ogm_dependencies_linked: bool = Field(default=False, description="OGM dependencies linked flag")
197
+ tags: Optional[list[str]] = Field(default=[], description="List of tags")
198
+ description: Optional[str] = Field(None, description="Optional HTML description")
199
+ localtimeserieupdatedetails: Optional[Union["LocalTimeSerieUpdateDetails", int]] = None
200
+ run_configuration: Optional["RunConfiguration"] = None
201
+ open_for_everyone: bool = Field(default=False, description="Whether the ts is open for everyone")
202
+
203
+ @property
204
+ def data_source_id(self):
205
+ if isinstance(self.remote_table.data_source, int):
206
+ return self.remote_table.data_source
207
+ else:
208
+ return self.remote_table.data_source.id
209
+
210
+ @classmethod
211
+ def get_or_create(cls, **kwargs):
212
+ url = cls.get_object_url() + "/get_or_create/"
213
+ kwargs = serialize_to_json(kwargs)
214
+
215
+ payload = {"json": kwargs}
216
+ s = cls.build_session()
217
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload)
218
+ if r.status_code not in [200, 201]:
219
+ raise Exception(r.text)
220
+ data = r.json()
221
+
222
+ return cls(**data)
223
+
224
+ def add_tags(self, tags: list, timeout=None):
225
+ base_url = self.get_object_url()
226
+ s = self.build_session()
227
+ payload = {"json": {"tags": tags}}
228
+ # r = self.s.get(, )
229
+ url = f"{base_url}/{self.id}/add_tags/"
230
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url,
231
+ payload=payload,
232
+ time_out=timeout)
233
+ if r.status_code != 200:
234
+ raise Exception(f"Error in request {r.json()}")
235
+ return r.json()
236
+
237
+ @classmethod
238
+ def filter_by_hash_id(cls, local_hash_id_list: list, timeout=None):
239
+ s = cls.build_session()
240
+ base_url = cls.get_object_url()
241
+ url = f"{base_url}/filter_by_hash_id/"
242
+ payload = {"json": {"local_hash_id__in": local_hash_id_list}, }
243
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload, time_out=timeout)
244
+ if r.status_code != 200:
245
+ raise Exception(f"{r.text}")
246
+ all_metadatas = {m["update_hash"]: m for m in r.json()}
247
+ return all_metadatas
248
+
249
+ def set_start_of_execution(self, **kwargs):
250
+ s = self.build_session()
251
+ base_url = self.get_object_url()
252
+ payload = {"json": kwargs}
253
+ url = f"{base_url}/{self.id}/set_start_of_execution/"
254
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url, payload=payload,
255
+ accept_gzip=True)
256
+ if r.status_code != 201:
257
+ raise Exception(f"Error in request {r.text}")
258
+
259
+ def _recurse_to_datetime(node):
260
+ if isinstance(node, dict):
261
+ return {k: _recurse_to_datetime(v) for k, v in node.items()}
262
+ # leaf: assume it’s your timestamp string
263
+ return request_to_datetime(node)
264
+
265
+ result = r.json()
266
+ if result["last_time_index_value"] is not None:
267
+ datetime.datetime.fromtimestamp(result["last_time_index_value"], tz=pytz.utc)
268
+
269
+ if result['asset_time_statistics'] is not None:
270
+ result['asset_time_statistics'] = _recurse_to_datetime(
271
+ result['asset_time_statistics']
272
+ )
273
+
274
+ hu = LocalTimeSeriesHistoricalUpdate(
275
+ **result["historical_update"],
276
+ update_statistics=UpdateStatistics(
277
+ asset_time_statistics=result['asset_time_statistics'],
278
+ max_time_index_value=result["last_time_index_value"],
279
+ multi_index_column_stats=result["multi_index_column_stats"],
280
+ ),
281
+ must_update=result["must_update"],
282
+ direct_dependencies_ids=result["direct_dependencies_ids"]
283
+ )
284
+ return hu
285
+
286
+ def set_end_of_execution(
287
+ self,
288
+ historical_update_id: int,
289
+ timeout=None, threaded_request=True,
290
+ **kwargs
291
+ ):
292
+ s = self.build_session()
293
+ url = self.get_object_url() + f"/{self.id}/set_end_of_execution/"
294
+ kwargs.update(dict(historical_update_id=historical_update_id))
295
+ payload = {"json": kwargs}
296
+
297
+ def _do_request():
298
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url, payload=payload, time_out=timeout)
299
+ if r.status_code != 200:
300
+ raise Exception("Error in request")
301
+ return r
302
+
303
+ if threaded_request:
304
+ # Submit the request to an executor. The returned Future will be non-blocking.
305
+ future = _executor.submit(_do_request)
306
+
307
+ # Optionally, attach a callback to log failures. (Exceptions will also be
308
+ # re-raised when someone calls future.result().)
309
+ def _handle_exception(fut):
310
+ try:
311
+ fut.result() # This will re-raise any exception caught in _do_request.
312
+ except Exception as e:
313
+ logger.error("set_end_of_execution: request failed: %s", e)
314
+
315
+ future.add_done_callback(_handle_exception)
316
+ return future
317
+ else:
318
+ # Synchronous execution that will raise exceptions inline.
319
+ return _do_request()
320
+
321
+ @classmethod
322
+ def batch_set_end_of_execution(cls, update_map: dict, timeout=None):
323
+ s = cls.build_session()
324
+ url = f"{cls.get_object_url()}/batch_set_end_of_execution/"
325
+ payload = {"json": {"update_map": update_map}}
326
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="PATCH", url=url, payload=payload, time_out=timeout)
327
+ if r.status_code != 200:
328
+ raise Exception(f"Error in request ")
329
+
330
+ @classmethod
331
+ def set_last_update_index_time(cls, metadata, timeout=None):
332
+ s = cls.build_session()
333
+ url = cls.get_object_url() + f"/{metadata['id']}/set_last_update_index_time/"
334
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="GET", url=url, time_out=timeout)
335
+
336
+ if r.status_code == 404:
337
+ raise SourceTableConfigurationDoesNotExist
338
+
339
+ if r.status_code != 200:
340
+ raise Exception(f"{metadata['update_hash']}{r.text}")
341
+ return r
342
+
343
+ def set_last_update_index_time_from_update_stats(
344
+ self,
345
+ last_time_index_value: float,
346
+ max_per_asset_symbol,
347
+ multi_index_column_stats,
348
+ timeout=None
349
+ ) -> "LocalTimeSerie":
350
+ s = self.build_session()
351
+ url = self.get_object_url() + f"/{self.id}/set_last_update_index_time_from_update_stats/"
352
+
353
+ data_to_comp = {
354
+ "last_time_index_value": last_time_index_value,
355
+ "max_per_asset_symbol": max_per_asset_symbol,
356
+ "multi_index_column_stats": multi_index_column_stats,
357
+ }
358
+ chunk_json_str = json.dumps(data_to_comp)
359
+ compressed = gzip.compress(chunk_json_str.encode('utf-8'))
360
+ compressed_b64 = base64.b64encode(compressed).decode('utf-8')
361
+ payload = dict(json={
362
+ "data": compressed_b64, # compres
363
+ })
364
+
365
+ r = make_request(s=s, loaders=self.LOADERS, payload=payload, r_type="POST", url=url, time_out=timeout)
366
+
367
+ if r.status_code == 404:
368
+ raise SourceTableConfigurationDoesNotExist
369
+
370
+ if r.status_code != 200:
371
+ raise Exception(f"{self.update_hash}{r.text}")
372
+ return LocalTimeSerie(**r.json())
373
+
374
+ @classmethod
375
+ def create_historical_update(cls, *args, **kwargs):
376
+ s = cls.build_session()
377
+ base_url = cls.ENDPOINT["LocalTimeSerieHistoricalUpdate"]
378
+ data = serialize_to_json(kwargs)
379
+ payload = {"json": data, }
380
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=f"{base_url}/", payload=payload)
381
+ if r.status_code != 201:
382
+ raise Exception(f"Error in request {r.url} {r.text}")
383
+
384
+ @classmethod
385
+ def get_mermaid_dependency_diagram(cls, update_hash, data_source_id, desc=True, timeout=None) -> dict:
386
+ s = cls.build_session()
387
+ url = cls.get_object_url(
388
+ "DataNode") + f"/{update_hash}/dependencies_graph_mermaid?desc={desc}&data_source_id={data_source_id}"
389
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="GET", url=url,
390
+ time_out=timeout)
391
+ if r.status_code != 200:
392
+ raise Exception(f"Error in request {r.text}")
393
+
394
+ return r.json()
395
+
396
+ def get_all_dependencies_update_priority(self, timeout=None) -> pd.DataFrame:
397
+ s = self.build_session()
398
+ url = self.get_object_url() + f"/{self.id}/get_all_dependencies_update_priority/"
399
+ r = make_request(s=s, loaders=self.LOADERS, r_type="GET", url=url, time_out=timeout)
400
+ if r.status_code != 200:
401
+ raise Exception(f"Error in request {r.text}")
402
+
403
+ depth_df = pd.DataFrame(r.json())
404
+ return depth_df
405
+
406
+ @classmethod
407
+ def get_upstream_nodes(cls, storage_hash, data_source_id, timeout=None):
408
+ s = cls.build_session()
409
+ url = cls.get_object_url("DataNode") + f"/{storage_hash}/get_upstream_nodes?data_source_id={data_source_id}"
410
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="GET", url=url, time_out=timeout)
411
+ if r.status_code != 200:
412
+ raise Exception(f"Error in request {r.text}")
413
+
414
+ depth_df = pd.DataFrame(r.json())
415
+ return depth_df
416
+
417
+ @classmethod
418
+ def create(cls, timeout=None, *args, **kwargs):
419
+ url = cls.get_object_url("DataNode") + "/"
420
+ payload = {"json": serialize_to_json(kwargs)}
421
+ s = cls.build_session()
422
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload, time_out=timeout)
423
+ if r.status_code != 201:
424
+ raise Exception(f"Error in request {r.text}")
425
+ instance = cls(**r.json())
426
+ return instance
427
+
428
+ def verify_if_direct_dependencies_are_updated(self) -> dict:
429
+ """
430
+ Response({
431
+ "error_on_update_dependencies": False,
432
+ "updated": all_success,
433
+ })
434
+ """
435
+ s = self.build_session()
436
+ url = self.get_object_url() + f"/{self.id}/verify_if_direct_dependencies_are_updated/"
437
+ r = make_request(s=s, loaders=None, r_type="GET", url=url)
438
+ if r.status_code != 200:
439
+ raise Exception(f"Error in request: {r.text}")
440
+ return r.json()
441
+
442
+ def get_data_between_dates_from_api(
443
+ self,
444
+ *args, **kwargs
445
+ ):
446
+
447
+ return self.remote_table.get_data_between_dates_from_api(*args, **kwargs)
448
+
449
+ @classmethod
450
+ def insert_data_into_table(cls, local_metadata_id, records: List[dict],
451
+ overwrite=True, add_insertion_time=False):
452
+ s = cls.build_session()
453
+ url = cls.get_object_url() + f"/{local_metadata_id}/insert_data_into_table/"
454
+
455
+ chunk_json_str = json.dumps(records)
456
+ compressed = gzip.compress(chunk_json_str.encode('utf-8'))
457
+ compressed_b64 = base64.b64encode(compressed).decode('utf-8')
458
+
459
+ payload = dict(json={
460
+ "data": compressed_b64, # compressed JSON data
461
+ "chunk_stats": None,
462
+ "overwrite": overwrite,
463
+ "chunk_index": 0,
464
+ "total_chunks": 1,
465
+ })
466
+
467
+ try:
468
+ r = make_request(s=s, loaders=None, payload=payload, r_type="POST", url=url, time_out=60 * 15)
469
+ if r.status_code not in [200, 204]:
470
+ logger.warning(f"Error in request: {r.text}")
471
+ logger.info(f"Chunk uploaded successfully.")
472
+ except requests.exceptions.RequestException as e:
473
+ logger.exception(f"Error uploading chunk : {e}")
474
+ # Optionally, you could retry or break here
475
+ raise e
476
+ if r.status_code not in [200, 204]:
477
+ raise Exception(r.text)
478
+
479
+ @classmethod
480
+ def post_data_frame_in_chunks(
481
+ cls,
482
+ serialized_data_frame: pd.DataFrame,
483
+ chunk_size: int = 50_000,
484
+ local_metadata: dict = None,
485
+ data_source: str = None,
486
+ index_names: list = None,
487
+ time_index_name: str = 'timestamp',
488
+ overwrite: bool = False,
489
+ ):
490
+ """
491
+ Sends a large DataFrame to a Django backend in multiple chunks.
492
+ If a chunk is too large (HTTP 413), it's automatically split in half and retried.
493
+ """
494
+ s = cls.build_session()
495
+ url = cls.get_object_url() + f"/{local_metadata.id}/insert_data_into_table/"
496
+
497
+ def _send_chunk_recursively(df_chunk: pd.DataFrame, chunk_idx: int, total_chunks: int,
498
+ is_sub_chunk: bool = False):
499
+ """
500
+ Internal helper to send a chunk. If it receives a 413 error, it splits
501
+ the chunk and calls itself on the two halves.
502
+ """
503
+ if df_chunk.empty:
504
+ return
505
+
506
+ part_label = f"{chunk_idx + 1}/{total_chunks}" if not is_sub_chunk else f"sub-chunk of {chunk_idx + 1}"
507
+
508
+ # Prepare the payload
509
+ chunk_stats, _ = get_chunk_stats(
510
+ chunk_df=df_chunk, index_names=index_names, time_index_name=time_index_name
511
+ )
512
+ chunk_json_str = df_chunk.to_json(orient="records", date_format="iso")
513
+ compressed = gzip.compress(chunk_json_str.encode('utf-8'))
514
+ compressed_b64 = base64.b64encode(compressed).decode('utf-8')
515
+
516
+ # For sub-chunks, we treat it as a new, single-chunk upload.
517
+ payload = dict(json={
518
+ "data": compressed_b64,
519
+ "chunk_stats": chunk_stats,
520
+ "overwrite": overwrite,
521
+ "chunk_index": 0 if is_sub_chunk else chunk_idx,
522
+ "total_chunks": 1 if is_sub_chunk else total_chunks,
523
+ })
524
+
525
+ try:
526
+ r = make_request(s=s, loaders=None, payload=payload, r_type="POST", url=url, time_out=60 * 15)
527
+
528
+ if r.status_code in [200, 204]:
529
+ logger.info(f"Chunk {part_label} ({len(df_chunk)} rows) uploaded successfully.")
530
+ return
531
+
532
+ if r.status_code == 413:
533
+ logger.warning(
534
+ f"Chunk {part_label} ({len(df_chunk)} rows) is too large (413). "
535
+ f"Splitting in half and retrying as new uploads."
536
+ )
537
+ if len(df_chunk) <= 1:
538
+ logger.error(
539
+ f"A single row is too large to upload (from chunk {part_label}). Cannot split further.")
540
+ raise Exception(f"A single row from chunk {part_label} is too large to upload.")
541
+
542
+ mid_point = len(df_chunk) // 2
543
+ first_half = df_chunk.iloc[:mid_point]
544
+ second_half = df_chunk.iloc[mid_point:]
545
+
546
+ # Recursively call for each half, marking them as sub-chunks.
547
+ _send_chunk_recursively(first_half, chunk_idx, total_chunks, is_sub_chunk=True)
548
+ _send_chunk_recursively(second_half, chunk_idx, total_chunks, is_sub_chunk=True)
549
+ return
550
+
551
+ logger.warning(f"Error in request for chunk {part_label}: {r.text}")
552
+ raise Exception(r.text)
553
+
554
+ except requests.exceptions.RequestException as e:
555
+ logger.exception(f"Network error uploading chunk {part_label}: {e}")
556
+ raise e
557
+
558
+ total_rows = len(serialized_data_frame)
559
+ if total_rows == 0:
560
+ logger.info("DataFrame is empty, nothing to upload.")
561
+ return
562
+
563
+ total_chunks = math.ceil(total_rows / chunk_size) if chunk_size > 0 else 1
564
+ logger.info(f"Starting upload of {total_rows} rows in {total_chunks} initial chunk(s).")
565
+
566
+ for i in range(total_chunks):
567
+ start_idx = i * chunk_size
568
+ end_idx = min((i + 1) * chunk_size, total_rows)
569
+ chunk_df = serialized_data_frame.iloc[start_idx:end_idx]
570
+
571
+ _send_chunk_recursively(chunk_df, i, total_chunks)
572
+
573
+ @classmethod
574
+ def get_metadatas_and_set_updates(
575
+ cls,
576
+ local_time_series_ids: list,
577
+ update_details_kwargs,
578
+ update_priority_dict
579
+ ):
580
+ """
581
+ {'local_hash_id__in': [{'update_hash': 'alpacaequitybarstest_97018e7280c1bad321b3f4153cc7e986', 'data_source_id': 1},
582
+ :param local_hash_id__in:
583
+ :param multi_index_asset_symbols_filter:
584
+ :param update_details_kwargs:
585
+ :param update_priority_dict:
586
+ :return:
587
+ """
588
+ base_url = cls.get_object_url()
589
+ s = cls.build_session()
590
+ payload = {"json": dict(local_time_series_ids=local_time_series_ids,
591
+ update_details_kwargs=update_details_kwargs,
592
+ update_priority_dict=update_priority_dict,
593
+ )}
594
+ # r = self.s.post(f"{base_url}/get_metadatas_and_set_updates/", **payload)
595
+ url = f"{base_url}/get_metadatas_and_set_updates/"
596
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload)
597
+ if r.status_code != 200:
598
+ raise Exception(f"Error in request {r.text}")
599
+ r = r.json()
600
+ r["source_table_config_map"] = {int(k): SourceTableConfiguration(**v) if v is not None else v for k, v in
601
+ r["source_table_config_map"].items()}
602
+ r["state_data"] = {int(k): LocalTimeSerieUpdateDetails(**v) for k, v in r["state_data"].items()}
603
+ r["all_index_stats"] = {int(k): v for k, v in r["all_index_stats"].items()}
604
+ r["local_metadatas"] = [LocalTimeSerie(**v) for v in r["local_metadatas"]]
605
+ return r
606
+
607
+ def depends_on_connect(self, target_time_serie_id
608
+ ):
609
+
610
+ url = self.get_object_url() + f"/{self.id}/depends_on_connect/"
611
+ s = self.build_session()
612
+ payload = dict(json={
613
+ "target_time_serie_id": target_time_serie_id,
614
+ })
615
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url, payload=payload)
616
+ if r.status_code != 204:
617
+ raise Exception(f"Error in request {r.text}")
618
+
619
+ def depends_on_connect_to_api_table(self, target_table_id,
620
+ timeout=None):
621
+
622
+ url = self.get_object_url() + f"/{self.id}/depends_on_connect_to_api_table/"
623
+ s = self.build_session()
624
+ payload = dict(json={
625
+ "target_table_id": target_table_id,
626
+ })
627
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url,
628
+ time_out=timeout,
629
+ payload=payload)
630
+ if r.status_code != 204:
631
+ raise Exception(f"Error in request {r.text}")
632
+
633
+ @classmethod
634
+ def _break_pandas_dataframe(cls, data_frame: pd.DataFrame, time_index_name: Union[str, None] = None):
635
+ if time_index_name == None:
636
+ time_index_name = data_frame.index.names[0]
637
+ if time_index_name is None:
638
+ time_index_name = "time_index"
639
+ names = [c if i != 0 else time_index_name for i, c in
640
+ enumerate(data_frame.index.names)]
641
+ data_frame.index.names = names
642
+
643
+ time_col_loc = data_frame.index.names.index(time_index_name)
644
+ index_names = data_frame.index.names
645
+ data_frame = data_frame.reset_index()
646
+ data_frame.columns = [str(c) for c in data_frame.columns]
647
+ data_frame = data_frame.rename(columns={data_frame.columns[time_col_loc]: time_index_name})
648
+ column_dtypes_map = {key: str(value) for key, value in data_frame.dtypes.to_dict().items()}
649
+
650
+ data_frame = data_frame.replace({np.nan: None})
651
+
652
+ return data_frame, index_names, column_dtypes_map, time_index_name
653
+
654
+ def upsert_data_into_table(
655
+ self,
656
+ data: pd.DataFrame,
657
+ data_source: "DynamicTableDataSource",
658
+ ):
659
+
660
+ overwrite = True # ALWAYS OVERWRITE
661
+ metadata = self.remote_table
662
+
663
+ data, index_names, column_dtypes_map, time_index_name = self._break_pandas_dataframe(
664
+ data)
665
+
666
+ # overwrite data origina data frame to release memory
667
+ if not data[time_index_name].is_monotonic_increasing:
668
+ data = data.sort_values(time_index_name)
669
+
670
+ metadata.handle_source_table_configuration_creation(
671
+ column_dtypes_map=column_dtypes_map,
672
+ index_names=index_names,
673
+ time_index_name=time_index_name,
674
+ data=data,
675
+ overwrite=overwrite
676
+ )
677
+
678
+ duplicates_exist = data.duplicated(subset=index_names).any()
679
+ if duplicates_exist:
680
+ raise Exception(f"Duplicates found in columns: {index_names}")
681
+
682
+ global_stats, grouped_dates = get_chunk_stats(
683
+ chunk_df=data,
684
+ index_names=index_names,
685
+ time_index_name=time_index_name
686
+ )
687
+ multi_index_column_stats = {}
688
+ column_names = [c for c in data.columns if c not in index_names]
689
+ for c in column_names:
690
+ multi_index_column_stats[c] = global_stats["_PER_ASSET_"]
691
+
692
+ data_source.insert_data_into_table(
693
+ serialized_data_frame=data,
694
+ local_metadata=self,
695
+ overwrite=overwrite,
696
+ time_index_name=time_index_name,
697
+ index_names=index_names,
698
+ grouped_dates=grouped_dates,
699
+ )
700
+
701
+ min_d, last_time_index_value = global_stats["_GLOBAL_"]["min"], global_stats["_GLOBAL_"]["max"]
702
+ max_per_asset_symbol = None
703
+
704
+ def extract_max(node):
705
+ # Leaf case: a dict with 'min' and 'max'
706
+ if isinstance(node, dict) and "min" in node and "max" in node:
707
+ return node["max"]
708
+ # Otherwise recurse
709
+ return {k: extract_max(v) for k, v in node.items()}
710
+
711
+ if len(index_names) > 1:
712
+ max_per_asset_symbol = {
713
+ uid: extract_max(stats)
714
+ for uid, stats in global_stats["_PER_ASSET_"].items()
715
+ }
716
+ local_metadata = self.set_last_update_index_time_from_update_stats(
717
+ max_per_asset_symbol=max_per_asset_symbol,
718
+ last_time_index_value=last_time_index_value,
719
+ multi_index_column_stats=multi_index_column_stats
720
+ )
721
+ return local_metadata
722
+
723
+ def get_node_time_to_wait(self):
724
+
725
+ next_update = self.localtimeserieupdatedetails.next_update
726
+ time_to_wait = 0.0
727
+ if next_update is not None:
728
+ time_to_wait = (pd.to_datetime(next_update) - datetime.datetime.now(pytz.utc)).total_seconds()
729
+ time_to_wait = max(0, time_to_wait)
730
+ return time_to_wait, next_update
731
+
732
+ def wait_for_update_time(self, ):
733
+ time_to_wait, next_update = self.get_node_time_to_wait()
734
+ if time_to_wait > 0:
735
+
736
+ logger.info(f"Scheduler Waiting for ts update time at {next_update} {time_to_wait}")
737
+ time.sleep(time_to_wait)
738
+ else:
739
+ time_to_wait = max(0, 60 - datetime.datetime.now(pytz.utc).second)
740
+ logger.info(f"Scheduler Waiting for ts update at start of minute")
741
+ time.sleep(time_to_wait)
742
+
743
+
744
+ class TableMetaData(BaseModel):
745
+ identifier: str = None
746
+ description: Optional[str] = None
747
+ data_frequency_id: Optional[DataFrequency] = None
748
+
749
+
750
+ class DynamicTableMetaData(BasePydanticModel, BaseObjectOrm):
751
+ id: int = Field(None, description="Primary key, auto-incremented ID")
752
+ storage_hash: str = Field(..., max_length=63, description="Max length of PostgreSQL table name")
753
+ table_name: Optional[str] = Field(None, max_length=63, description="Max length of PostgreSQL table name")
754
+ creation_date: datetime.datetime = Field(..., description="Creation timestamp")
755
+ created_by_user: Optional[int] = Field(None, description="Foreign key reference to User")
756
+ organization_owner: int = Field(None, description="Foreign key reference to Organization")
757
+ open_for_everyone: bool = Field(default=False, description="Whether the table is open for everyone")
758
+ data_source_open_for_everyone: bool = Field(default=False,
759
+ description="Whether the data source is open for everyone")
760
+ build_configuration: Optional[Dict[str, Any]] = Field(None, description="Configuration in JSON format")
761
+ build_meta_data: Optional[Dict[str, Any]] = Field(None, description="Optional YAML metadata")
762
+ time_serie_source_code_git_hash: Optional[str] = Field(None, max_length=255,
763
+ description="Git hash of the time series source code")
764
+ time_serie_source_code: Optional[str] = Field(None, description="File path for time series source code")
765
+ protect_from_deletion: bool = Field(default=False, description="Flag to protect the record from deletion")
766
+ data_source: Union[int, "DynamicTableDataSource"]
767
+ source_class_name: str
768
+ sourcetableconfiguration: Optional[SourceTableConfiguration] = None
769
+ table_index_names: Optional[Dict] = None
770
+
771
+ # TS specifi
772
+ compression_policy_config: Optional[Dict] = None
773
+ retention_policy_config: Optional[Dict] = None
774
+
775
+ # MetaData
776
+ identifier: Optional[str] = None
777
+ description: Optional[str] = None
778
+ data_frequency_id: Optional[DataFrequency] = None
779
+
780
+ _drop_indices: bool = False # for direct incertion we can pass this values
781
+ _rebuild_indices: bool = False # for direct incertion we can pass this values
782
+
783
+ def patch(self, time_out: Union[None, int] = None, *args, **kwargs, ):
784
+ url = self.get_object_url() + f"/{self.id}/"
785
+ payload = {"json": serialize_to_json(kwargs)}
786
+ s = self.build_session()
787
+ r = make_request(s=s, loaders=self.LOADERS, r_type="PATCH", url=url, payload=payload, time_out=time_out)
788
+ if r.status_code != 200:
789
+ raise Exception(f"Error in request {r.text}")
790
+ return self.__class__(**r.json())
791
+
792
+ @classmethod
793
+ def patch_by_hash(cls, storage_hash: str, *args, **kwargs):
794
+ metadata = cls.get(storage_hash=storage_hash)
795
+ metadata.patch(*args, **kwargs)
796
+
797
+ @classmethod
798
+ def get_or_create(cls, **kwargs):
799
+ kwargs = serialize_to_json(kwargs)
800
+ url = cls.get_object_url() + "/get_or_create/"
801
+ payload = {"json": kwargs}
802
+ s = cls.build_session()
803
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload)
804
+ if r.status_code not in [201, 200]:
805
+ raise Exception(r.text)
806
+ data = r.json()
807
+ return cls(**data)
808
+
809
+ def build_or_update_update_details(self, *args, **kwargs):
810
+ base_url = self.get_object_url()
811
+ payload = {"json": kwargs}
812
+ s = self.build_session()
813
+ url = f"{base_url}/{self.id}/build_or_update_update_details/"
814
+ r = make_request(r_type="PATCH", url=url, payload=payload, s=s, loaders=self.LOADERS, )
815
+ if r.status_code != 202:
816
+ raise Exception(f"Error in request {r.text}")
817
+
818
+ @classmethod
819
+ def patch_build_configuration(
820
+ cls,
821
+ remote_table_patch: Union[dict, None],
822
+ build_meta_data: dict,
823
+ data_source_id: int,
824
+ local_table_patch: dict,
825
+ ):
826
+
827
+ logger.warning("TODO Fix Patch Build Configuration")
828
+ # url = cls.get_object_url() + "/patch_build_configuration"
829
+ # payload = {"json": {"remote_table_patch": remote_table_patch, "local_table_patch": local_table_patch,
830
+ # "build_meta_data": build_meta_data, "data_source_id": data_source_id,
831
+ # }}
832
+ # s = cls.build_session()
833
+ # r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload,
834
+ #
835
+ # )
836
+ # if r.status_code != 200:
837
+ # raise Exception(r.text)
838
+
839
+ def delete_table(self):
840
+ data_source = PodDataSource._get_duck_db()
841
+ duckdb_dynamic_data_source = DynamicTableDataSource.get_or_create_duck_db(
842
+ related_resource=data_source.id,
843
+ )
844
+ if (isinstance(self.data_source, int) and self.data_source.id == duckdb_dynamic_data_source.id) or \
845
+ (not isinstance(self.data_source, int) and self.data_source.related_resource.class_type == DUCK_DB):
846
+ db_interface = DuckDBInterface()
847
+ db_interface.drop_table(self.table_name)
848
+
849
+ self.delete()
850
+
851
+ def handle_source_table_configuration_creation(self,
852
+
853
+ column_dtypes_map: dict,
854
+ index_names: List[str],
855
+ time_index_name,
856
+ data,
857
+ overwrite=False
858
+ ):
859
+ """
860
+ Handles the creation or retrieval of the source table configuration.
861
+
862
+ Parameters:
863
+ ----------
864
+ metadata : dict
865
+ Metadata dictionary containing "sourcetableconfiguration" and "id".
866
+ column_dtypes_map : dict
867
+ Mapping of column names to their data types.
868
+ index_names : list
869
+ List of index names.
870
+ time_index_name : str
871
+ Name of the time index column.
872
+
873
+ data : DataFrame
874
+ The input DataFrame.
875
+ overwrite : bool, optional
876
+ Whether to overwrite existing configurations (default is False).
877
+
878
+ Returns:
879
+ -------
880
+ dict or None
881
+ Updated metadata with the source table configuration, and potentially filtered data.
882
+ """
883
+ stc = self.sourcetableconfiguration
884
+
885
+ if stc is None:
886
+ try:
887
+ stc = SourceTableConfiguration.create(
888
+ column_dtypes_map=column_dtypes_map,
889
+ index_names=index_names,
890
+ time_index_name=time_index_name,
891
+ metadata_id=self.id
892
+ )
893
+ self.sourcetableconfiguration = stc
894
+ except AlreadyExist:
895
+
896
+ if not overwrite:
897
+ raise NotImplementedError("TODO Needs to remove values per asset")
898
+ # Filter the data based on time_index_name and last_time_index_value
899
+
900
+ def get_data_between_dates_from_api(
901
+ self,
902
+ start_date: datetime.datetime = None,
903
+ end_date: datetime.datetime = None,
904
+ great_or_equal: bool = None,
905
+ less_or_equal: bool = None,
906
+ unique_identifier_list: list = None,
907
+ columns: list = None,
908
+ unique_identifier_range_map: Union[None, UniqueIdentifierRangeMap] = None,
909
+ column_range_descriptor: Union[None, UniqueIdentifierRangeMap] = None
910
+ ):
911
+ """ Helper function to make a single batch request (or multiple paged requests if next_offset). """
912
+
913
+ def fetch_one_batch(chunk_range_map):
914
+ all_results_chunk = []
915
+ offset = 0
916
+ while True:
917
+ payload = {
918
+ "json": {
919
+ "start_date": start_date.timestamp() if start_date else None,
920
+ "end_date": end_date.timestamp() if end_date else None,
921
+ "great_or_equal": great_or_equal,
922
+ "less_or_equal": less_or_equal,
923
+ "unique_identifier_list": unique_identifier_list,
924
+ "columns": columns,
925
+ "offset": offset, # pagination offset
926
+ "unique_identifier_range_map": chunk_range_map,
927
+ }
928
+ }
929
+
930
+ # Perform the POST request
931
+ r = make_request(s=s, loaders=self.LOADERS, payload=payload, r_type="POST", url=url)
932
+ if r.status_code != 200:
933
+ logger.warning(f"Error in request: {r.text}")
934
+ return []
935
+
936
+ response_data = r.json()
937
+ # Accumulate results
938
+ chunk = response_data.get("results", [])
939
+ all_results_chunk.extend(chunk)
940
+
941
+ # Retrieve next offset; if None, we've got all the data in this chunk
942
+ next_offset = response_data.get("next_offset")
943
+ if not next_offset:
944
+ break
945
+
946
+ # Update offset for the next iteration
947
+ offset = next_offset
948
+
949
+ return all_results_chunk
950
+
951
+ s = self.build_session()
952
+ url = self.get_object_url() + f"/{self.id}/get_data_between_dates_from_remote/"
953
+
954
+ unique_identifier_range_map = copy.deepcopy(unique_identifier_range_map)
955
+ if unique_identifier_range_map is not None:
956
+ for unique_identifier, date_info in unique_identifier_range_map.items():
957
+ # Convert start_date if present
958
+ if 'start_date' in date_info and isinstance(date_info['start_date'], datetime.datetime):
959
+ date_info['start_date'] = int(date_info['start_date'].timestamp())
960
+
961
+ # Convert end_date if present
962
+ if 'end_date' in date_info and isinstance(date_info['end_date'], datetime.datetime):
963
+ date_info['end_date'] = int(date_info['end_date'].timestamp())
964
+
965
+ all_results = []
966
+ if unique_identifier_range_map:
967
+ keys = list(unique_identifier_range_map.keys())
968
+ chunk_size = 100
969
+ for start_idx in range(0, len(keys), chunk_size):
970
+ key_chunk = keys[start_idx: start_idx + chunk_size]
971
+
972
+ # Build sub-dictionary for this chunk
973
+ chunk_map = {
974
+ k: unique_identifier_range_map[k] for k in key_chunk
975
+ }
976
+
977
+ # Fetch data (including any pagination via next_offset)
978
+ chunk_results = fetch_one_batch(chunk_map)
979
+ all_results.extend(chunk_results)
980
+ else:
981
+ # If unique_identifier_range_map is None, do a single batch with offset-based pagination.
982
+ chunk_results = fetch_one_batch(None)
983
+ all_results.extend(chunk_results)
984
+
985
+ return pd.DataFrame(all_results)
986
+
987
+
988
+ class Scheduler(BasePydanticModel, BaseObjectOrm):
989
+ id: Optional[int] = None
990
+ name: str
991
+ is_running: bool
992
+ running_process_pid: Optional[int]
993
+ running_in_debug_mode: bool
994
+ updates_halted: bool
995
+ host: Optional[str]
996
+ api_address: Optional[str]
997
+ api_port: Optional[int]
998
+ last_heart_beat: Optional[datetime.datetime] = None
999
+ pre_loads_in_tree: Optional[List[LocalTimeSerie]] = None # Assuming this is a list of strings
1000
+ in_active_tree: Optional[List[LocalTimeSerie]] = None # Assuming this is a list of strings
1001
+ schedules_to: Optional[List[LocalTimeSerie]] = None
1002
+ # for heartbeat
1003
+ _stop_heart_beat: bool = False
1004
+ _executor: Optional[object] = None
1005
+
1006
+ @classmethod
1007
+ def get_scheduler_for_ts(cls, ts_id: int):
1008
+ """
1009
+ GET /schedulers/for-ts/?ts_id=<LocalTimeSerie PK>
1010
+ """
1011
+ s = cls.build_session()
1012
+ url = cls.get_object_url() + "/for-ts/"
1013
+ r = make_request(
1014
+ s=s,
1015
+ r_type="GET",
1016
+ url=url,
1017
+ payload={"params": {"ts_id": ts_id}},
1018
+ loaders=cls.LOADERS,
1019
+ )
1020
+ if r.status_code == 404:
1021
+ raise SchedulerDoesNotExist(r.json().get("detail", r.text))
1022
+ r.raise_for_status()
1023
+ return cls(**r.json())
1024
+
1025
+ @classmethod
1026
+ def initialize_debug_for_ts(
1027
+ cls,
1028
+ time_serie_id: int,
1029
+ name_suffix: Union[str, None] = None,
1030
+ ):
1031
+ """
1032
+ POST /schedulers/initialize‑debug/
1033
+ body: { time_serie_id, name_suffix? }
1034
+ """
1035
+ s = cls.build_session()
1036
+ url = cls.get_object_url() + "/initialize-debug/"
1037
+ payload = {
1038
+ "json": {
1039
+ "time_serie_id": time_serie_id,
1040
+ **({"name_suffix": name_suffix} if name_suffix is not None else {}),
1041
+ }
1042
+ }
1043
+ r = make_request(s=s, r_type="POST", url=url, payload=payload, loaders=cls.LOADERS)
1044
+ r.raise_for_status()
1045
+ return cls(**r.json())
1046
+
1047
+ @classmethod
1048
+ def build_and_assign_to_ts(
1049
+ cls,
1050
+ scheduler_name: str,
1051
+ time_serie_ids: List[int],
1052
+ delink_all_ts: bool = False,
1053
+ remove_from_other_schedulers: bool = True,
1054
+ timeout=None,
1055
+ **kwargs,
1056
+ ):
1057
+ """
1058
+ POST /schedulers/build-and-assign/
1059
+ body: {
1060
+ scheduler_name, time_serie_ids, delink_all_ts?,
1061
+ remove_from_other_schedulers?, scheduler_kwargs?
1062
+ }
1063
+ """
1064
+ s = cls.build_session()
1065
+ url = cls.get_object_url() + "/build_and_assign_to_ts/"
1066
+ payload = {
1067
+ "json": {
1068
+ "scheduler_name": scheduler_name,
1069
+ "time_serie_ids": time_serie_ids,
1070
+ "delink_all_ts": delink_all_ts,
1071
+ "remove_from_other_schedulers": remove_from_other_schedulers,
1072
+ "scheduler_kwargs": kwargs or {},
1073
+ }
1074
+ }
1075
+ r = make_request(s=s, r_type="POST", url=url, payload=payload,
1076
+ time_out=timeout,
1077
+ loaders=cls.LOADERS)
1078
+ if r.status_code not in [200, 201]:
1079
+ r.raise_for_status()
1080
+ return cls(**r.json())
1081
+
1082
+ def in_active_tree_connect(self, local_time_series_ids: List[int]):
1083
+ """
1084
+ PATCH /schedulers/{id}/in-active-tree/
1085
+ body: { time_serie_ids }
1086
+ """
1087
+ s = self.build_session()
1088
+ url = f"{self.get_object_url()}/{self.id}/in-active-tree/"
1089
+ r = make_request(
1090
+ s=s,
1091
+ r_type="PATCH",
1092
+ url=url,
1093
+ payload={"json": {"time_serie_ids": local_time_series_ids}},
1094
+ loaders=self.LOADERS,
1095
+ )
1096
+ if r.status_code not in (200, 204):
1097
+ raise Exception(f"Error in request {r.text}")
1098
+
1099
+ def assign_to_scheduler(self, time_serie_ids: List[int]):
1100
+ """
1101
+ PATCH /schedulers/{id}/assign/
1102
+ body: { time_serie_ids }
1103
+ """
1104
+ s = self.build_session()
1105
+ url = f"{self.get_object_url()}/{self.id}/assign/"
1106
+ r = make_request(
1107
+ s=s,
1108
+ r_type="PATCH",
1109
+ url=url,
1110
+ payload={"json": {"time_serie_ids": time_serie_ids}},
1111
+ loaders=self.LOADERS,
1112
+ )
1113
+ r.raise_for_status()
1114
+ return Scheduler(**r.json())
1115
+
1116
+ def is_scheduler_running_in_process(self):
1117
+ # test call
1118
+ if self.is_running == True and hasattr(self, "api_address"):
1119
+ # verify scheduler host is the same
1120
+ if self.api_address == get_network_ip() and is_process_running(self.running_process_pid) == True:
1121
+ return True
1122
+ return False
1123
+
1124
+ def _heart_beat_patch(self):
1125
+ try:
1126
+ scheduler = self.patch(is_running=True,
1127
+ running_process_pid=os.getpid(),
1128
+ running_in_debug_mode=self.running_in_debug_mode,
1129
+ last_heart_beat=datetime.datetime.utcnow().replace(
1130
+ tzinfo=pytz.utc).timestamp(),
1131
+ )
1132
+ for field, value in scheduler.__dict__.items():
1133
+ setattr(self, field, value)
1134
+ except Exception as e:
1135
+ logger.error(e)
1136
+
1137
+ def _heartbeat_runner(self, run_interval):
1138
+ """
1139
+ Runs forever (until the main thread ends),
1140
+ calling _scheduler_heart_beat_patch every 30 seconds.
1141
+ """
1142
+ logger.debug("Heartbeat thread started with interval = %d seconds", run_interval)
1143
+
1144
+ while True:
1145
+ self._heart_beat_patch()
1146
+ # Sleep in a loop so that if we ever decide to
1147
+ # add a cancellation event, we can check it in smaller intervals
1148
+ for _ in range(run_interval):
1149
+ # could check for a stop event here if not daemon
1150
+ if self._stop_heart_beat == True:
1151
+ return
1152
+ time.sleep(1)
1153
+
1154
+ def start_heart_beat(self):
1155
+ from concurrent.futures import ThreadPoolExecutor
1156
+
1157
+ if self._executor is None:
1158
+ self._executor = ThreadPoolExecutor(max_workers=1)
1159
+ run_interval = TDAG_CONSTANTS.SCHEDULER_HEART_BEAT_FREQUENCY_SECONDS
1160
+ self._heartbeat_future = self._executor.submit(self._heartbeat_runner, run_interval)
1161
+
1162
+ def stop_heart_beat(self):
1163
+ """
1164
+ Stop the heartbeat gracefully.
1165
+ """
1166
+ # Signal the runner loop to exit
1167
+ self._stop_heart_beat = True
1168
+
1169
+ # Optionally wait for the future to complete
1170
+ if hasattr(self, "heartbeat_future") and self._heartbeat_future:
1171
+ logger.info("Waiting for the heartbeat thread to finish...")
1172
+ self._heartbeat_future.result() # or .cancel() if you prefer
1173
+
1174
+ # Shut down the executor if no longer needed
1175
+ if self._executor:
1176
+ self._executor.shutdown(wait=True)
1177
+ self._executor = None
1178
+
1179
+ logger.info("Heartbeat thread stopped.")
1180
+
1181
+
1182
+ class RunConfiguration(BasePydanticModel, BaseObjectOrm):
1183
+ local_time_serie_update_details: Optional[int] = None
1184
+ retry_on_error: int = 0
1185
+ seconds_wait_on_retry: float = 50
1186
+ required_cpus: int = 1
1187
+ required_gpus: int = 0
1188
+ execution_time_out_seconds: float = 50
1189
+ update_schedule: str = "*/1 * * * *"
1190
+
1191
+ @classmethod
1192
+ @property
1193
+ def ROOT_URL(cls):
1194
+ return None
1195
+
1196
+
1197
+ class LocalTimeSerieUpdateDetails(BasePydanticModel, BaseObjectOrm):
1198
+ related_table: Union[int, LocalTimeSerie]
1199
+ active_update: bool = Field(default=False, description="Flag to indicate if update is active")
1200
+ update_pid: int = Field(default=0, description="Process ID of the update")
1201
+ error_on_last_update: bool = Field(default=False,
1202
+ description="Flag to indicate if there was an error in the last update")
1203
+ last_update: Optional[datetime.datetime] = Field(None, description="Timestamp of the last update")
1204
+ next_update: Optional[datetime.datetime] = Field(None, description="Timestamp of the next update")
1205
+ update_statistics: Optional[Dict[str, Any]] = Field(None, description="JSON field for update statistics")
1206
+ active_update_status: str = Field(default="Q", max_length=20, description="Current update status")
1207
+ active_update_scheduler: Optional[Union[int, Scheduler]] = Field(None,
1208
+ description="Scheduler for active update")
1209
+ update_priority: int = Field(default=0, description="Priority level of the update")
1210
+ last_updated_by_user: Optional[int] = Field(None, description="Foreign key reference to User")
1211
+
1212
+ run_configuration: Optional["RunConfiguration"] = None
1213
+
1214
+ @staticmethod
1215
+ def _parse_parameters_filter(parameters):
1216
+ for key, value in parameters.items():
1217
+ if "__in" in key:
1218
+ assert isinstance(value, list)
1219
+ parameters[key] = ",".join(value)
1220
+ return parameters
1221
+
1222
+
1223
+ class UpdateStatistics(BaseModel):
1224
+ """
1225
+ This class contains the update details of the table in the main sequence engine
1226
+ """
1227
+ asset_time_statistics: Optional[Dict[str, Union[datetime.datetime, None, Dict]]] = None
1228
+
1229
+ max_time_index_value: Optional[datetime.datetime] = None # does not include fitler
1230
+ asset_list: Optional[List] = None
1231
+ limit_update_time: Optional[datetime.datetime] = None # flag to limit the update of data node
1232
+ _max_time_in_update_statistics: Optional[datetime.datetime] = None # include filter
1233
+ _initial_fallback_date: Optional[datetime.datetime] = None
1234
+
1235
+ # when working with DuckDb and column based storage we want to have also stats by column
1236
+ multi_index_column_stats: Optional[Dict[str, Any]] = None
1237
+ is_backfill: bool = False
1238
+
1239
+ class Config:
1240
+ arbitrary_types_allowed = True
1241
+
1242
+ @staticmethod
1243
+ def _to_utc_datetime(value: Any):
1244
+ # pandas / numpy friendly path first
1245
+ if hasattr(value, "to_pydatetime"): # pandas.Timestamp
1246
+ value = value.to_pydatetime()
1247
+ # Handle numpy.datetime64 without importing numpy explicitly
1248
+ if type(value).__name__ == "datetime64":
1249
+ try:
1250
+ import pandas as pd # only if available
1251
+ value = pd.to_datetime(value).to_pydatetime()
1252
+ except Exception:
1253
+ return value
1254
+
1255
+ if isinstance(value, datetime.datetime):
1256
+ return value.astimezone(datetime.timezone.utc) if value.tzinfo else value.replace(
1257
+ tzinfo=datetime.timezone.utc)
1258
+
1259
+ if isinstance(value, (int, float)):
1260
+ v = float(value)
1261
+ # seconds / ms / µs / ns heuristics by magnitude
1262
+ if v > 1e17: # ns
1263
+ v /= 1e9
1264
+ elif v > 1e14: # µs
1265
+ v /= 1e6
1266
+ elif v > 1e11: # ms
1267
+ v /= 1e3
1268
+ return datetime.datetime.fromtimestamp(v, tz=datetime.timezone.utc)
1269
+
1270
+ if isinstance(value, str):
1271
+ s = value.strip()
1272
+ if s.endswith("Z"): # ISO Z suffix
1273
+ s = s[:-1] + "+00:00"
1274
+ try:
1275
+ dt = datetime.datetime.fromisoformat(s)
1276
+ return dt.astimezone(datetime.timezone.utc) if dt.tzinfo else dt.replace(tzinfo=datetime.timezone.utc)
1277
+ except ValueError:
1278
+ return value
1279
+
1280
+ return value
1281
+
1282
+ @classmethod
1283
+ def _normalize_nested(cls, obj: Any):
1284
+ if obj is None:
1285
+ return None
1286
+ if isinstance(obj, dict):
1287
+ return {k: cls._normalize_nested(v) for k, v in obj.items()}
1288
+ return cls._to_utc_datetime(obj)
1289
+
1290
+ @field_validator("multi_index_column_stats", mode="before")
1291
+ @classmethod
1292
+ def _coerce_multi_index_column_stats(cls, v):
1293
+ # Normalize before standard parsing so ints/strings become datetimes
1294
+ return cls._normalize_nested(v)
1295
+
1296
+ @classmethod
1297
+ def return_empty(cls):
1298
+ return cls()
1299
+
1300
+ def pretty_print(self):
1301
+ print(f"{self.__class__.__name__} summary:")
1302
+
1303
+ # asset_list
1304
+ if self.asset_list is None:
1305
+ print(" asset_list: None")
1306
+ else:
1307
+ print(f" asset_list: {len(self.asset_list)} assets")
1308
+
1309
+ # DataFrame
1310
+ if self.last_observation is None or self.last_observation.empty:
1311
+ print(" last_observation: empty DataFrame")
1312
+ else:
1313
+ rows, cols = self.last_observation.shape
1314
+ print(f" last_observation: DataFrame with {rows} rows × {cols} columns")
1315
+
1316
+ # Other attributes
1317
+ print(f" max_time_index_value: {self.max_time_index_value}")
1318
+ print(f" _max_time_in_update_statistics: {self._max_time_in_update_statistics}")
1319
+
1320
+ def is_empty(self):
1321
+ return self.asset_time_statistics is None and self.max_time_index_value is None
1322
+
1323
+ def asset_identifier(self):
1324
+ return list(self.asset_time_statistics.keys())
1325
+
1326
+ def get_max_time_in_update_statistics(self):
1327
+ if hasattr(self, "_max_time_in_update_statistics") == False:
1328
+ self._max_time_in_update_statistics = self.max_time_index_value or self._initial_fallback_date
1329
+ if self._max_time_in_update_statistics is None and self.asset_time_statistics is not None:
1330
+ new_update_statistics, _max_time_in_asset_time_statistics = self._get_update_statistics(
1331
+
1332
+ asset_list=None, unique_identifier_list=None
1333
+ )
1334
+ self._max_time_in_update_statistics = _max_time_in_asset_time_statistics
1335
+
1336
+ return self._max_time_in_update_statistics
1337
+
1338
+ def get_update_range_map_great_or_equal_columnar(self, extra_time_delta: Optional[datetime.timedelta] = None,
1339
+ column_filter: Optional[List[str]] = None,
1340
+ ):
1341
+ fallback = {c: {a.unique_identifier: {"min": self._initial_fallback_date,
1342
+ "max": self._initial_fallback_date,
1343
+ } for a in self.asset_list} for c in column_filter}
1344
+
1345
+ multi_index_column_stats = self.multi_index_column_stats or {}
1346
+ fallback.update(multi_index_column_stats)
1347
+
1348
+ def _start_dt(bounds):
1349
+ dt = (bounds or {}).get("max") or self._initial_fallback_date
1350
+ if extra_time_delta:
1351
+ dt = dt + extra_time_delta
1352
+ return dt
1353
+
1354
+ target_cols = fallback.keys() if column_filter is None else column_filter
1355
+
1356
+ range_map = {
1357
+ col: {
1358
+ asset_id: DateInfo({
1359
+ "start_date_operand": ">=",
1360
+ "start_date": _start_dt(bounds),
1361
+ })
1362
+ for asset_id, bounds in col_stats.items()
1363
+ }
1364
+ for col, col_stats in fallback.items() if col in column_filter
1365
+ }
1366
+
1367
+ return range_map
1368
+
1369
+ def get_update_range_map_great_or_equal(self,
1370
+ extra_time_delta: Optional[datetime.timedelta] = None,
1371
+ ):
1372
+
1373
+ if extra_time_delta is None:
1374
+ range_map = {k: DateInfo({"start_date_operand": ">=", "start_date": v or self._initial_fallback_date}) for
1375
+ k, v in self.asset_time_statistics.items()}
1376
+ else:
1377
+ range_map = {k: DateInfo(
1378
+ {"start_date_operand": ">=", "start_date": (v or self._initial_fallback_date) + extra_time_delta}) for
1379
+ k, v in self.asset_time_statistics.items()}
1380
+ return range_map
1381
+
1382
+ def get_last_update_index_2d(self, uid):
1383
+ return self.asset_time_statistics[uid] or self._initial_fallback_date
1384
+
1385
+ def get_asset_earliest_multiindex_update(self, asset):
1386
+ stats = self.asset_time_statistics.get(asset.unique_identifier)
1387
+ if not stats:
1388
+ return self._initial_fallback_date
1389
+
1390
+ def _min_in_nested(node):
1391
+ # If this is a dict, recurse into its values
1392
+ if isinstance(node, dict):
1393
+ m = None
1394
+ for v in node.values():
1395
+ cand = _min_in_nested(v)
1396
+ if cand is not None and (m is None or cand < m):
1397
+ m = cand
1398
+ return m
1399
+ # Leaf: assume it’s a timestamp (datetime or numeric)
1400
+ return node
1401
+
1402
+ return _min_in_nested(stats)
1403
+
1404
+ def filter_assets_by_level(self,
1405
+ level: int,
1406
+ filters: List,
1407
+ ):
1408
+ """
1409
+ Prune `self.asset_time_statistics` so that at the specified index level
1410
+ only the given keys remain. Works for any depth of nesting.
1411
+
1412
+ Parameters
1413
+ ----------
1414
+ level_name : str
1415
+ The name of the index-level to filter on (must be one of
1416
+ self.metadata.sourcetableconfiguration.index_names).
1417
+ filters : List
1418
+ The allowed values at that level. Any branches whose key at
1419
+ `level_name` is not in this list will be removed.
1420
+
1421
+ Returns
1422
+ -------
1423
+ self
1424
+ (Allows method chaining.)
1425
+ """
1426
+ # Grab the full list of index names, in order
1427
+
1428
+ # Determine the numeric depth of the target level
1429
+ # 0 == unique_identifier, 1 == first nested level, etc.
1430
+ target_depth = level - 1
1431
+
1432
+ # Special‐case: filtering on unique_identifier itself
1433
+ if target_depth == 0:
1434
+ self.asset_time_statistics = {
1435
+ asset: stats
1436
+ for asset, stats in self.asset_time_statistics.items()
1437
+ if asset in filters
1438
+ }
1439
+ return self
1440
+
1441
+ allowed = set(filters)
1442
+ default = self._initial_fallback_date
1443
+
1444
+ def _prune(node: Any, current_depth: int) -> Any:
1445
+ # leaf timestamp
1446
+ if not isinstance(node, dict):
1447
+ return node
1448
+
1449
+ # we've reached the level to filter
1450
+ if current_depth == target_depth:
1451
+ out: Dict[str, Any] = {}
1452
+ for key in allowed:
1453
+ if key in node:
1454
+ out[key] = node[key]
1455
+ else:
1456
+ # missing filter → assign fallback date
1457
+ out[key] = default
1458
+ return out
1459
+
1460
+ # otherwise recurse deeper
1461
+ pruned: Dict[str, Any] = {}
1462
+ for key, subnode in node.items():
1463
+ new_sub = _prune(subnode, current_depth + 1)
1464
+ # keep non-empty dicts or valid leaves
1465
+ if isinstance(new_sub, dict):
1466
+ if new_sub:
1467
+ pruned[key] = new_sub
1468
+ elif new_sub is not None:
1469
+ pruned[key] = new_sub
1470
+ return pruned
1471
+
1472
+ new_stats: Dict[str, Any] = {}
1473
+ # stats dict sits at depth=1 under each asset
1474
+ for asset, stats in self.asset_time_statistics.items():
1475
+ if stats is None:
1476
+ new_stats[asset] = {f: self._initial_fallback_date for f in allowed}
1477
+ else:
1478
+ pr = _prune(stats, current_depth=1)
1479
+ new_stats[asset] = pr or None
1480
+
1481
+ self.asset_time_statistics = new_stats
1482
+ return self
1483
+
1484
+ def _get_update_statistics(self,
1485
+ asset_list: Optional[List],
1486
+ unique_identifier_list: Union[list, None],init_fallback_date=None):
1487
+ new_update_statistics = {}
1488
+ if asset_list is None and unique_identifier_list is None:
1489
+ assert self.asset_time_statistics is not None
1490
+ unique_identifier_list = list(self.asset_time_statistics.keys())
1491
+
1492
+ else:
1493
+ unique_identifier_list = [a.unique_identifier for a in
1494
+ asset_list] if unique_identifier_list is None else unique_identifier_list
1495
+
1496
+ for unique_identifier in unique_identifier_list:
1497
+
1498
+ if self.asset_time_statistics and unique_identifier in self.asset_time_statistics:
1499
+ new_update_statistics[unique_identifier] = self.asset_time_statistics[unique_identifier]
1500
+ else:
1501
+
1502
+ new_update_statistics[unique_identifier] = init_fallback_date
1503
+
1504
+ def _max_in_nested(d):
1505
+ """
1506
+ Recursively find the max leaf value in a nested dict-of-dicts,
1507
+ where the leaves are comparable (e.g. datetime objects).
1508
+ Returns None if there are no leaves.
1509
+ """
1510
+ max_val = None
1511
+ for v in d.values():
1512
+ if isinstance(v, dict):
1513
+ candidate = _max_in_nested(v)
1514
+ else:
1515
+ candidate = v
1516
+ if candidate is not None and (max_val is None or candidate > max_val):
1517
+ max_val = candidate
1518
+ return max_val
1519
+
1520
+ _max_time_in_asset_time_statistics = _max_in_nested(new_update_statistics) if len(
1521
+ new_update_statistics) > 0 else init_fallback_date
1522
+
1523
+ return new_update_statistics, _max_time_in_asset_time_statistics
1524
+
1525
+ def update_assets(
1526
+ self,
1527
+ asset_list: Optional[List],
1528
+ *,
1529
+ init_fallback_date: datetime = None,
1530
+ unique_identifier_list: Union[list, None] = None
1531
+ ):
1532
+ self.asset_list = asset_list
1533
+ new_update_statistics = self.asset_time_statistics
1534
+ if asset_list is not None or unique_identifier_list is not None:
1535
+ new_update_statistics, _max_time_in_asset_time_statistics = self._get_update_statistics(
1536
+ unique_identifier_list=unique_identifier_list,
1537
+ asset_list=asset_list,init_fallback_date=init_fallback_date,
1538
+ )
1539
+
1540
+ else:
1541
+ _max_time_in_asset_time_statistics = self.max_time_index_value or init_fallback_date
1542
+
1543
+ new_multi_index_column_stats = self.multi_index_column_stats
1544
+ if self.max_time_index_value is not None and self.multi_index_column_stats is not None:
1545
+ new_multi_index_column_stats = {k: v for k, v in self.multi_index_column_stats.items() if
1546
+ k in new_update_statistics.keys()}
1547
+
1548
+ du = UpdateStatistics(
1549
+ asset_time_statistics=new_update_statistics,
1550
+ max_time_index_value=self.max_time_index_value,
1551
+ asset_list=asset_list,
1552
+ multi_index_column_stats=new_multi_index_column_stats
1553
+ )
1554
+ du._max_time_in_update_statistics = _max_time_in_asset_time_statistics
1555
+ du._initial_fallback_date = init_fallback_date
1556
+ return du
1557
+
1558
+ def is_empty(self):
1559
+ return self.max_time_index_value is None
1560
+
1561
+ def __getitem__(self, key: str) -> Any:
1562
+ if self.asset_time_statistics is None:
1563
+ raise KeyError(f"{key} not found (asset_time_statistics is None).")
1564
+ return self.asset_time_statistics[key]
1565
+
1566
+ def __setitem__(self, key: str, value: Any) -> None:
1567
+ if self.asset_time_statistics is None:
1568
+ self.asset_time_statistics = {}
1569
+ self.asset_time_statistics[key] = value
1570
+
1571
+ def __delitem__(self, key: str) -> None:
1572
+ if not self.asset_time_statistics or key not in self.asset_time_statistics:
1573
+ raise KeyError(f"{key} not found in asset_time_statistics.")
1574
+ del self.asset_time_statistics[key]
1575
+
1576
+ def __iter__(self):
1577
+ """Iterate over keys."""
1578
+ if self.asset_time_statistics is None:
1579
+ return iter([])
1580
+ return iter(self.asset_time_statistics)
1581
+
1582
+ def __len__(self) -> int:
1583
+ if not self.asset_time_statistics:
1584
+ return 0
1585
+ return len(self.asset_time_statistics)
1586
+
1587
+ def keys(self):
1588
+ if not self.asset_time_statistics:
1589
+ return []
1590
+ return self.asset_time_statistics.keys()
1591
+
1592
+ def values(self):
1593
+ if not self.asset_time_statistics:
1594
+ return []
1595
+ return self.asset_time_statistics.values()
1596
+
1597
+ def items(self):
1598
+ if not self.asset_time_statistics:
1599
+ return []
1600
+ return self.asset_time_statistics.items()
1601
+
1602
+ def filter_df_by_latest_value(self, df: pd.DataFrame) -> pd.DataFrame:
1603
+ if self.is_empty():
1604
+ return df
1605
+
1606
+ # Single-index time series fallback
1607
+ if (
1608
+ (self.asset_time_statistics is None or "unique_identifier" not in df.index.names)
1609
+ and self.max_time_index_value is not None
1610
+ ):
1611
+ return df[df.index >= self.max_time_index_value]
1612
+
1613
+ names = df.index.names
1614
+ time_level = names[0]
1615
+
1616
+ grouping_levels = [n for n in names if n != time_level]
1617
+
1618
+ # Build a mask by iterating over each row tuple + its timestamp
1619
+ mask = []
1620
+ for idx_tuple, ts in zip(df.index, df.index.get_level_values(time_level)):
1621
+ # map level names → values
1622
+ level_vals = dict(zip(names, idx_tuple))
1623
+ asset = level_vals["unique_identifier"]
1624
+
1625
+ # fetch this asset’s nested stats
1626
+ stats = self.asset_time_statistics.get(asset)
1627
+ if stats is None:
1628
+ # no prior stats for this asset → keep row
1629
+ mask.append(True)
1630
+ continue
1631
+
1632
+ # drill into the nested stats for the remaining levels
1633
+ nested = stats
1634
+ for lvl in grouping_levels[1:]: # skip 'unique_identifier'
1635
+ key = level_vals[lvl]
1636
+ if not isinstance(nested, dict) or key not in nested:
1637
+ # no prior stats for this subgroup → keep row
1638
+ nested = None
1639
+ break
1640
+ nested = nested[key]
1641
+
1642
+ # if we couldn’t find a prior timestamp, or this ts is newer, keep it
1643
+ if nested is None or ts > nested:
1644
+ mask.append(True)
1645
+ else:
1646
+ # ts ≤ last seen → filter out
1647
+ mask.append(False)
1648
+
1649
+ # apply the mask
1650
+ df = df[mask]
1651
+
1652
+ # drop any exact duplicate multi‐index rows that remain
1653
+ dup = df.index.duplicated(keep="first")
1654
+ if dup.any():
1655
+ n = dup.sum()
1656
+ logger.warning(f"Removed {n} duplicated rows after filtering.")
1657
+ df = df[~dup]
1658
+ return df
1659
+
1660
+
1661
+ def get_chunk_stats(chunk_df, time_index_name, index_names):
1662
+ chunk_stats = {
1663
+ "_GLOBAL_": {
1664
+ "max": chunk_df[time_index_name].max().timestamp(),
1665
+ "min": chunk_df[time_index_name].min().timestamp()
1666
+ }
1667
+ }
1668
+ chunk_stats["_PER_ASSET_"] = {}
1669
+ grouped_dates = None
1670
+ if len(index_names) > 1:
1671
+ grouped_dates = chunk_df.groupby(index_names[1:])[
1672
+ time_index_name].agg(
1673
+ ["min", "max"])
1674
+
1675
+ # 2) decompose the grouped index names
1676
+ first, *rest = grouped_dates.index.names
1677
+
1678
+ # 3) reset to a flat DataFrame for easy iteration
1679
+ df = grouped_dates.reset_index()
1680
+
1681
+ # 4) build the nested dict
1682
+ per_asset: dict = {}
1683
+ for _, row in df.iterrows():
1684
+ uid = row[first] # e.g. the unique_identifier
1685
+ # only one extra level beyond uid?
1686
+ if len(rest) == 0:
1687
+
1688
+ per_asset[uid] = {
1689
+ "min": row["min"].timestamp(),
1690
+ "max": row["max"].timestamp(),
1691
+ }
1692
+ else:
1693
+ # multiple extra levels → walk a path of dicts
1694
+ keys = [row[level] for level in rest]
1695
+ sub = per_asset.setdefault(uid, {})
1696
+ for key in keys[:-1]:
1697
+ sub = sub.setdefault(key, {})
1698
+ sub[keys[-1]] = {
1699
+ "min": row["min"].timestamp(),
1700
+ "max": row["max"].timestamp(),
1701
+ }
1702
+ # 5) assign into your stats structure
1703
+ chunk_stats["_PER_ASSET_"] = per_asset
1704
+ return chunk_stats, grouped_dates
1705
+
1706
+
1707
+ class LocalTimeSeriesHistoricalUpdate(BasePydanticModel, BaseObjectOrm):
1708
+ id: Optional[int] = None
1709
+ related_table: int # Assuming you're using the ID of the related table
1710
+ update_time_start: datetime.datetime
1711
+ update_time_end: Optional[datetime.datetime] = None
1712
+ error_on_update: bool = False
1713
+ trace_id: Optional[str] = Field(default=None, max_length=255)
1714
+ updated_by_user: Optional[int] = None # Assuming you're using the ID of the user
1715
+
1716
+ last_time_index_value: Optional[datetime.datetime] = None
1717
+
1718
+ # extra fields for local control
1719
+ update_statistics: Optional[UpdateStatistics]
1720
+ must_update: Optional[bool]
1721
+ direct_dependencies_ids: Optional[List[int]]
1722
+
1723
+
1724
+ class DataSource(BasePydanticModel, BaseObjectOrm):
1725
+ id: Optional[int] = Field(None, description="The unique identifier of the Local Disk Source Lake")
1726
+ display_name: str
1727
+ organization: Optional[int] = Field(None, description="The unique identifier of the Local Disk Source Lake")
1728
+ class_type: str
1729
+ status: str
1730
+ extra_arguments: Optional[Dict] = None
1731
+
1732
+ @classmethod
1733
+ def get_or_create_duck_db(cls, time_out=None, *args, **kwargs):
1734
+ url = cls.get_object_url() + f"/get_or_create_duck_db/"
1735
+ payload = {"json": serialize_to_json(kwargs)}
1736
+ s = cls.build_session()
1737
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload, time_out=time_out)
1738
+ if r.status_code not in [200, 201]:
1739
+ raise Exception(f"Error in request {r.text}")
1740
+ return cls(**r.json())
1741
+
1742
+ def insert_data_into_table(
1743
+ self,
1744
+ serialized_data_frame: pd.DataFrame,
1745
+ local_metadata: LocalTimeSerie,
1746
+ overwrite: bool,
1747
+ time_index_name: str,
1748
+ index_names: list,
1749
+ grouped_dates: dict,
1750
+ ):
1751
+
1752
+ if self.class_type == DUCK_DB:
1753
+ DuckDBInterface().upsert(
1754
+ df=serialized_data_frame,
1755
+ table=local_metadata.remote_table.table_name
1756
+ )
1757
+ else:
1758
+ LocalTimeSerie.post_data_frame_in_chunks(
1759
+ serialized_data_frame=serialized_data_frame,
1760
+ local_metadata=local_metadata,
1761
+ data_source=self,
1762
+ index_names=index_names,
1763
+ time_index_name=time_index_name,
1764
+ overwrite=overwrite,
1765
+ )
1766
+
1767
+ def insert_data_into_local_table(
1768
+ self,
1769
+ serialized_data_frame: pd.DataFrame,
1770
+ local_metadata: LocalTimeSerie,
1771
+ overwrite: bool,
1772
+ time_index_name: str,
1773
+ index_names: list,
1774
+ grouped_dates: dict,
1775
+ ):
1776
+
1777
+ # LocalTimeSerie.post_data_frame_in_chunks(
1778
+ # serialized_data_frame=serialized_data_frame,
1779
+ # local_metadata=local_metadata,
1780
+ # data_source=self,
1781
+ # index_names=index_names,
1782
+ # time_index_name=time_index_name,
1783
+ # overwrite=overwrite,
1784
+ # )
1785
+ raise NotImplementedError
1786
+
1787
+ def get_data_by_time_index(
1788
+ self,
1789
+ local_metadata: dict,
1790
+ start_date: Optional[datetime.datetime] = None,
1791
+ end_date: Optional[datetime.datetime] = None,
1792
+ great_or_equal: bool = True,
1793
+ less_or_equal: bool = True,
1794
+ columns: Optional[List[str]] = None,
1795
+ unique_identifier_list: Optional[List[str]] = None,
1796
+ unique_identifier_range_map: Optional[UniqueIdentifierRangeMap] = None,
1797
+ column_range_descriptor: Optional[Dict[str, UniqueIdentifierRangeMap]] = None,
1798
+ ) -> pd.DataFrame:
1799
+
1800
+ logger.warning("EXTEND THE CONSTRAIN READ HERE!!")
1801
+ if self.class_type == DUCK_DB:
1802
+ db_interface = DuckDBInterface()
1803
+ table_name = local_metadata.remote_table.table_name
1804
+
1805
+ adjusted_start, adjusted_end, adjusted_uirm, _ = db_interface.constrain_read(
1806
+ table=table_name,
1807
+ start=start_date,
1808
+ end=end_date,
1809
+ ids=unique_identifier_list,
1810
+ unique_identifier_range_map=unique_identifier_range_map,
1811
+ )
1812
+ if unique_identifier_range_map is not None and adjusted_end is not None:
1813
+ adjusted_end = datetime.datetime(adjusted_end.year, adjusted_end.month, adjusted_end.day,
1814
+ tzinfo=datetime.timezone.utc)
1815
+ for v in unique_identifier_range_map.values():
1816
+ v["end_date"] = adjusted_end
1817
+ v["end_date_operand"] = "<="
1818
+
1819
+ df = db_interface.read(
1820
+ table=table_name,
1821
+ start=start_date,
1822
+ end=end_date,
1823
+ great_or_equal=great_or_equal,
1824
+ less_or_equal=less_or_equal,
1825
+ ids=unique_identifier_list,
1826
+ columns=columns,
1827
+ unique_identifier_range_map=unique_identifier_range_map, # Pass range map
1828
+ )
1829
+
1830
+
1831
+ else:
1832
+ if column_range_descriptor is not None:
1833
+ raise Exception("On this data source do not use column_range_descriptor")
1834
+ df = local_metadata.get_data_between_dates_from_api(
1835
+ start_date=start_date,
1836
+ end_date=end_date,
1837
+ great_or_equal=great_or_equal,
1838
+ less_or_equal=less_or_equal,
1839
+ unique_identifier_list=unique_identifier_list,
1840
+ columns=columns,
1841
+ unique_identifier_range_map=unique_identifier_range_map
1842
+ )
1843
+ if len(df) == 0:
1844
+ logger.warning(
1845
+ f"No data returned from remote API for {local_metadata.update_hash}"
1846
+ )
1847
+ return df
1848
+
1849
+ stc = local_metadata.remote_table.sourcetableconfiguration
1850
+ try:
1851
+ df[stc.time_index_name] = pd.to_datetime(df[stc.time_index_name], format='ISO8601')
1852
+ except Exception as e:
1853
+ raise e
1854
+ columns_to_loop = columns or stc.column_dtypes_map.keys()
1855
+ for c, c_type in stc.column_dtypes_map.items():
1856
+ if c not in columns_to_loop:
1857
+ continue
1858
+ if c != stc.time_index_name:
1859
+ if c_type == "object":
1860
+ c_type = "str"
1861
+ df[c] = df[c].astype(c_type)
1862
+ df = df.set_index(stc.index_names)
1863
+ return df
1864
+
1865
+ def get_earliest_value(self,
1866
+ local_metadata: LocalTimeSerie,
1867
+ ) -> Tuple[Optional[pd.Timestamp], Dict[Any, Optional[pd.Timestamp]]]:
1868
+ if self.class_type == DUCK_DB:
1869
+ db_interface = DuckDBInterface()
1870
+ table_name = local_metadata.remote_table.table_name
1871
+ return db_interface.time_index_minima(table=table_name)
1872
+
1873
+
1874
+ else:
1875
+ raise NotImplementedError
1876
+
1877
+
1878
+ class DynamicTableDataSource(BasePydanticModel, BaseObjectOrm):
1879
+ id: int
1880
+ related_resource: DataSource
1881
+ related_resource_class_type: str
1882
+
1883
+ class Config:
1884
+ use_enum_values = True # This ensures that enums are stored as their values (e.g., 'TEXT')
1885
+
1886
+ def model_dump_json(self, **json_dumps_kwargs) -> str:
1887
+ """
1888
+ Dump the current instance to a JSON string,
1889
+ ensuring that the dependent `related_resource` is also properly dumped.
1890
+ """
1891
+ # Obtain the dictionary representation using Pydantic's model_dump
1892
+ dump = self.model_dump()
1893
+ # Properly dump the dependent resource if it supports model_dump
1894
+ dump["related_resource"] = self.related_resource.model_dump()
1895
+ # Convert the dict to a JSON string
1896
+ return json.dumps(dump, **json_dumps_kwargs)
1897
+
1898
+ @classmethod
1899
+ def get_default_data_source_for_token(cls):
1900
+ global _default_data_source
1901
+ if _default_data_source is not None:
1902
+ return _default_data_source # Return cached result if already set
1903
+ url = cls.ROOT_URL + "/get_default_data_source_for_token/"
1904
+
1905
+ s = cls.build_session()
1906
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="GET", url=url, payload={})
1907
+
1908
+ if r.status_code != 200:
1909
+ raise Exception(f"Error in request {r.text}")
1910
+ data = r.json()
1911
+
1912
+ return cls(**r.json())
1913
+
1914
+ def persist_to_pickle(self, path):
1915
+ import cloudpickle
1916
+ os.makedirs(os.path.dirname(path), exist_ok=True)
1917
+ with open(path, 'wb') as handle:
1918
+ cloudpickle.dump(self, handle)
1919
+
1920
+ @classmethod
1921
+ def get_or_create_duck_db(cls, *args, **kwargs):
1922
+ url = cls.get_object_url() + "/get_or_create_duck_db/"
1923
+ s = cls.build_session()
1924
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload={"json": kwargs})
1925
+ if r.status_code not in [200, 201]:
1926
+ raise Exception(f"Error in request {r.text}")
1927
+ return cls(**r.json())
1928
+
1929
+ def has_direct_postgres_connection(self):
1930
+ return self.related_resource.class_type == 'direct'
1931
+
1932
+ def get_data_by_time_index(self, *args, **kwargs):
1933
+ if self.has_direct_postgres_connection():
1934
+ stc = kwargs["local_metadata"].remote_table.sourcetableconfiguration
1935
+
1936
+ df = TimeScaleInterface.direct_data_from_db(
1937
+ connection_uri=self.related_resource.get_connection_uri(),
1938
+ *args, **kwargs,
1939
+
1940
+ )
1941
+ df = set_types_in_table(df, stc.column_dtypes_map)
1942
+ return df
1943
+ else:
1944
+ return self.related_resource.get_data_by_time_index(*args, **kwargs)
1945
+
1946
+ def insert_data_into_table(self, *args, **kwargs):
1947
+ if self.has_direct_postgres_connection():
1948
+ TimeScaleInterface.process_and_update_table(
1949
+ data_source=self.related_resource,
1950
+ *args, **kwargs,
1951
+ )
1952
+
1953
+ else:
1954
+ self.related_resource.insert_data_into_table(*args, **kwargs)
1955
+
1956
+
1957
+ class Project(BasePydanticModel, BaseObjectOrm):
1958
+ id: int
1959
+ project_name: str
1960
+ data_source: DynamicTableDataSource
1961
+ git_ssh_url: Optional[str] = None
1962
+
1963
+ @classmethod
1964
+ def get_user_default_project(cls):
1965
+ url = cls.get_object_url() + "/get_user_default_project/"
1966
+
1967
+ s = cls.build_session()
1968
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="GET", url=url, )
1969
+ if r.status_code == 404:
1970
+ raise Exception(r.text)
1971
+ if r.status_code != 200:
1972
+ raise Exception(f"Error in request {r.text}")
1973
+ return cls(**r.json())
1974
+
1975
+ def __str__(self):
1976
+ return yaml.safe_dump(
1977
+ self.model_dump(),
1978
+ sort_keys=False,
1979
+ default_flow_style=False,
1980
+ )
1981
+
1982
+
1983
+ class TimeScaleDB(DataSource):
1984
+ database_user: str
1985
+ password: str
1986
+ host: str
1987
+ database_name: str
1988
+ port: int
1989
+
1990
+ def get_connection_uri(self):
1991
+ password = self.password # Decrypt password if necessary
1992
+ return f"postgresql://{self.database_user}:{password}@{self.host}:{self.port}/{self.database_name}"
1993
+
1994
+ def insert_data_into_table(
1995
+ self,
1996
+ serialized_data_frame: pd.DataFrame,
1997
+ local_metadata: dict,
1998
+ overwrite: bool,
1999
+ time_index_name: str,
2000
+ index_names: list,
2001
+ grouped_dates: dict,
2002
+ ):
2003
+
2004
+ LocalTimeSerie.post_data_frame_in_chunks(
2005
+ serialized_data_frame=serialized_data_frame,
2006
+ local_metadata=local_metadata,
2007
+ data_source=self,
2008
+ index_names=index_names,
2009
+ time_index_name=time_index_name,
2010
+ overwrite=overwrite,
2011
+ )
2012
+
2013
+ def filter_by_assets_ranges(
2014
+ self,
2015
+ asset_ranges_map: dict,
2016
+ metadata: dict,
2017
+ update_hash: str,
2018
+ has_direct_connection: bool
2019
+ ):
2020
+ table_name = metadata.table_name
2021
+ index_names = metadata.sourcetableconfiguration.index_names
2022
+ column_types = metadata.sourcetableconfiguration.column_dtypes_map
2023
+ if has_direct_connection:
2024
+ df = TimeScaleInterface.filter_by_assets_ranges(
2025
+ table_name=table_name,
2026
+ asset_ranges_map=asset_ranges_map,
2027
+ index_names=index_names,
2028
+ data_source=self,
2029
+ column_types=column_types
2030
+ )
2031
+ else:
2032
+ df = LocalTimeSerie.get_data_between_dates_from_api(
2033
+ update_hash=update_hash,
2034
+ data_source_id=self.id,
2035
+ start_date=None,
2036
+ end_date=None,
2037
+ great_or_equal=True,
2038
+ less_or_equal=True,
2039
+ asset_symbols=None,
2040
+ columns=None,
2041
+ execution_venue_symbols=None,
2042
+ symbol_range_map=asset_ranges_map, # <-- key for applying ranges
2043
+ )
2044
+ return df
2045
+
2046
+ def get_data_by_time_index(
2047
+ self,
2048
+ local_metadata: dict,
2049
+ start_date: Optional[datetime.datetime] = None,
2050
+ end_date: Optional[datetime.datetime] = None,
2051
+ great_or_equal: bool = True,
2052
+ less_or_equal: bool = True,
2053
+ columns: Optional[List[str]] = None,
2054
+ unique_identifier_list: Optional[List[str]] = None,
2055
+
2056
+ ) -> pd.DataFrame:
2057
+
2058
+ metadata = local_metadata.remote_table
2059
+
2060
+ df = local_metadata.get_data_between_dates_from_api(
2061
+
2062
+ start_date=start_date,
2063
+ end_date=end_date,
2064
+ great_or_equal=great_or_equal,
2065
+ less_or_equal=less_or_equal,
2066
+ unique_identifier_list=unique_identifier_list,
2067
+ columns=columns,
2068
+ )
2069
+ if len(df) == 0:
2070
+ if logger:
2071
+ logger.warning(
2072
+ f"No data returned from remote API for {local_metadata.update_hash}"
2073
+ )
2074
+ return df
2075
+
2076
+ stc = local_metadata.remote_table.sourcetableconfiguration
2077
+ df[stc.time_index_name] = pd.to_datetime(df[stc.time_index_name])
2078
+ for c, c_type in stc.column_dtypes_map.items():
2079
+ if c != stc.time_index_name:
2080
+ if c_type == "object":
2081
+ c_type = "str"
2082
+ df[c] = df[c].astype(c_type)
2083
+ df = df.set_index(stc.index_names)
2084
+ return df
2085
+
2086
+
2087
+ class DynamicResource(BasePydanticModel, BaseObjectOrm):
2088
+ id: Optional[int] = None
2089
+ name: str
2090
+ type: str
2091
+ object_signature: dict
2092
+ attributes: Optional[dict]
2093
+
2094
+ created_at: datetime.datetime
2095
+ updated_at: datetime.datetime
2096
+ is_production: bool
2097
+ pod: int
2098
+
2099
+
2100
+ def create_configuration_for_strategy(json_payload: dict, timeout=None):
2101
+ url = TDAG_ENDPOINT + "/orm/api/tdag-gpt/create_configuration_for_strategy/"
2102
+ from requests.adapters import HTTPAdapter, Retry
2103
+ s = requests.Session()
2104
+ s.headers.update(loaders.auth_headers)
2105
+ retries = Retry(total=2, backoff_factor=2)
2106
+ s.mount('http://', HTTPAdapter(max_retries=retries))
2107
+
2108
+ r = make_request(s=s, r_type="POST", url=url, payload={"json": json_payload},
2109
+ loaders=loaders, time_out=200)
2110
+ return r
2111
+
2112
+
2113
+ def query_agent(json_payload: dict, timeout=None):
2114
+ url = TDAG_ENDPOINT + "/orm/api/tdag-gpt/query_agent/"
2115
+ from requests.adapters import HTTPAdapter, Retry
2116
+ s = requests.Session()
2117
+ s.headers.update(loaders.auth_headers)
2118
+ retries = Retry(total=2, backoff_factor=2)
2119
+ s.mount('http://', HTTPAdapter(max_retries=retries))
2120
+
2121
+ r = make_request(s=s, r_type="POST", url=url, payload={"json": json_payload},
2122
+ loaders=loaders, time_out=200)
2123
+ return r
2124
+
2125
+
2126
+ def add_created_object_to_jobrun(model_name: str, app_label: str, object_id: int,
2127
+ timeout: Optional[int] = None) -> dict:
2128
+ """
2129
+ Logs a new object that was created by this JobRun instance.
2130
+
2131
+ Args:
2132
+ model_name: The string name of the created model (e.g., "Project").
2133
+ app_label: The Django app label where the model is defined (e.g., "pod_manager").
2134
+ object_id: The primary key of the created object instance.
2135
+ timeout: Optional request timeout in seconds.
2136
+
2137
+ Returns:
2138
+ A dictionary representing the created record.
2139
+ """
2140
+ url = TDAG_ENDPOINT + f"/orm/api/pods/job-run/{os.getenv('JOB_RUN_ID')}/add_created_object/"
2141
+ s = requests.Session()
2142
+ payload = {
2143
+ "json": {
2144
+ "app_label": app_label,
2145
+ "model_name": model_name,
2146
+ "object_id": object_id
2147
+ }
2148
+ }
2149
+ r = make_request(
2150
+ s=s,
2151
+ loaders=loaders,
2152
+ r_type="POST",
2153
+ url=url,
2154
+ payload=payload,
2155
+ time_out=timeout
2156
+ )
2157
+ if r.status_code not in [200, 201]:
2158
+ raise Exception(f"Failed to add created object: {r.status_code} - {r.text}")
2159
+ return r.json()
2160
+
2161
+
2162
+ class Artifact(BasePydanticModel, BaseObjectOrm):
2163
+ id: Optional[int]
2164
+ name: str
2165
+ created_by_resource_name: str
2166
+ bucket_name: str
2167
+ content: Any
2168
+
2169
+ @classmethod
2170
+ def upload_file(cls, filepath, name, created_by_resource_name, bucket_name=None):
2171
+ bucket_name if bucket_name else "default_bucket"
2172
+ return cls.get_or_create(filepath=filepath, name=name, created_by_resource_name=created_by_resource_name,
2173
+ bucket_name=bucket_name)
2174
+
2175
+ @classmethod
2176
+ def get_or_create(cls, filepath, name, created_by_resource_name, bucket_name):
2177
+ url = cls.get_object_url() + "/get_or_create/"
2178
+ s = cls.build_session()
2179
+ with open(filepath, "rb") as f:
2180
+ data = {
2181
+ "name": name,
2182
+ "created_by_resource_name": created_by_resource_name,
2183
+ "bucket_name": bucket_name if bucket_name else "default_bucket",
2184
+ }
2185
+ files = {"content": (str(filepath), f, "application/pdf")}
2186
+ payload = {
2187
+ "json": data,
2188
+ "files": files
2189
+ }
2190
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload=payload)
2191
+
2192
+ if r.status_code not in [200, 201]:
2193
+ raise Exception(f"Failed to get artifact: {r.status_code} - {r.text}")
2194
+
2195
+ return cls(**r.json())
2196
+
2197
+
2198
+ try:
2199
+ POD_PROJECT = Project.get_user_default_project()
2200
+ except Exception as e:
2201
+ POD_PROJECT = None
2202
+ logger.exception(f"Could not retrive pod project {e}")
2203
+ raise e
2204
+
2205
+
2206
+ class PodDataSource:
2207
+ def set_remote_db(self):
2208
+ self.data_source = POD_PROJECT.data_source
2209
+ logger.info(f"Set remote data source to {self.data_source.related_resource}")
2210
+
2211
+ if self.data_source.related_resource.status != "AVAILABLE":
2212
+ raise Exception(f"Project Database {self.data_source} is not available")
2213
+
2214
+ @staticmethod
2215
+ def _get_duck_db():
2216
+ host_uid = bios_uuid()
2217
+ data_source = DataSource.get_or_create_duck_db(
2218
+ display_name=f"DuckDB_{host_uid}",
2219
+ host_mac_address=host_uid
2220
+ )
2221
+ return data_source
2222
+
2223
+ @property
2224
+ def is_local_duck_db(self):
2225
+ return SessionDataSource.data_source.related_resource.class_type == DUCK_DB
2226
+
2227
+ def set_local_db(self):
2228
+ data_source = self._get_duck_db()
2229
+
2230
+ duckdb_dynamic_data_source = DynamicTableDataSource.get_or_create_duck_db(
2231
+ related_resource=data_source.id,
2232
+ )
2233
+
2234
+ # drop local tables that are not in registered in the backend anymore (probably have been deleted)
2235
+ remote_tables = DynamicTableMetaData.filter(data_source__id=duckdb_dynamic_data_source.id, list_tables=True)
2236
+ remote_table_names = [t.table_name for t in remote_tables]
2237
+ from mainsequence.client.data_sources_interfaces.duckdb import DuckDBInterface
2238
+ from mainsequence.client.utils import DataFrequency
2239
+ db_interface = DuckDBInterface()
2240
+ local_table_names = db_interface.list_tables()
2241
+
2242
+ tables_to_delete_locally = set(local_table_names) - set(remote_table_names)
2243
+ for table_name in tables_to_delete_locally:
2244
+ logger.debug(f"Deleting table in local duck db {table_name}")
2245
+ db_interface.drop_table(table_name)
2246
+
2247
+ tables_to_delete_remotely = set(remote_table_names) - set(local_table_names)
2248
+ for remote_table in remote_tables:
2249
+ if remote_table.table_name in tables_to_delete_remotely:
2250
+ logger.debug(f"Deleting table remotely {remote_table.table_name}")
2251
+ if remote_table.protect_from_deletion:
2252
+ remote_table.patch(protect_from_deletion=False)
2253
+
2254
+ remote_table.delete()
2255
+
2256
+ self.data_source = duckdb_dynamic_data_source
2257
+
2258
+ physical_ds = self.data_source.related_resource
2259
+ banner = (
2260
+ "─" * 40 + "\n"
2261
+ f"LOCAL: {physical_ds.display_name} (engine={physical_ds.class_type})\n\n"
2262
+ "import duckdb, pathlib\n"
2263
+ f"path = pathlib.Path('{db_interface.db_path}') / 'duck_meta.duckdb'\n"
2264
+ "conn = duckdb.connect(':memory:')\n"
2265
+ "conn.execute(f\"ATTACH '{path}' AS ro (READ_ONLY)\")\n"
2266
+ "conn.execute('INSTALL ui; LOAD ui; CALL start_ui();')\n"
2267
+ + "─" * 40
2268
+ )
2269
+ logger.info(banner)
2270
+
2271
+ def __repr__(self):
2272
+ return f"{self.data_source.related_resource}"
2273
+
2274
+
2275
+ SessionDataSource = PodDataSource()
2276
+ SessionDataSource.set_remote_db()