mainsequence 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. mainsequence/__init__.py +0 -0
  2. mainsequence/__main__.py +9 -0
  3. mainsequence/cli/__init__.py +1 -0
  4. mainsequence/cli/api.py +157 -0
  5. mainsequence/cli/cli.py +442 -0
  6. mainsequence/cli/config.py +78 -0
  7. mainsequence/cli/ssh_utils.py +126 -0
  8. mainsequence/client/__init__.py +17 -0
  9. mainsequence/client/base.py +431 -0
  10. mainsequence/client/data_sources_interfaces/__init__.py +0 -0
  11. mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
  12. mainsequence/client/data_sources_interfaces/timescale.py +479 -0
  13. mainsequence/client/models_helpers.py +113 -0
  14. mainsequence/client/models_report_studio.py +412 -0
  15. mainsequence/client/models_tdag.py +2276 -0
  16. mainsequence/client/models_vam.py +1983 -0
  17. mainsequence/client/utils.py +387 -0
  18. mainsequence/dashboards/__init__.py +0 -0
  19. mainsequence/dashboards/streamlit/__init__.py +0 -0
  20. mainsequence/dashboards/streamlit/assets/config.toml +12 -0
  21. mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
  22. mainsequence/dashboards/streamlit/assets/logo.png +0 -0
  23. mainsequence/dashboards/streamlit/core/__init__.py +0 -0
  24. mainsequence/dashboards/streamlit/core/theme.py +212 -0
  25. mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
  26. mainsequence/dashboards/streamlit/scaffold.py +220 -0
  27. mainsequence/instrumentation/__init__.py +7 -0
  28. mainsequence/instrumentation/utils.py +101 -0
  29. mainsequence/instruments/__init__.py +1 -0
  30. mainsequence/instruments/data_interface/__init__.py +10 -0
  31. mainsequence/instruments/data_interface/data_interface.py +361 -0
  32. mainsequence/instruments/instruments/__init__.py +3 -0
  33. mainsequence/instruments/instruments/base_instrument.py +85 -0
  34. mainsequence/instruments/instruments/bond.py +447 -0
  35. mainsequence/instruments/instruments/european_option.py +74 -0
  36. mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
  37. mainsequence/instruments/instruments/json_codec.py +585 -0
  38. mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
  39. mainsequence/instruments/instruments/position.py +475 -0
  40. mainsequence/instruments/instruments/ql_fields.py +239 -0
  41. mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
  42. mainsequence/instruments/pricing_models/__init__.py +0 -0
  43. mainsequence/instruments/pricing_models/black_scholes.py +49 -0
  44. mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
  45. mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
  46. mainsequence/instruments/pricing_models/indices.py +350 -0
  47. mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
  48. mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
  49. mainsequence/instruments/settings.py +175 -0
  50. mainsequence/instruments/utils.py +29 -0
  51. mainsequence/logconf.py +284 -0
  52. mainsequence/reportbuilder/__init__.py +0 -0
  53. mainsequence/reportbuilder/__main__.py +0 -0
  54. mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
  55. mainsequence/reportbuilder/model.py +713 -0
  56. mainsequence/reportbuilder/slide_templates.py +532 -0
  57. mainsequence/tdag/__init__.py +8 -0
  58. mainsequence/tdag/__main__.py +0 -0
  59. mainsequence/tdag/config.py +129 -0
  60. mainsequence/tdag/data_nodes/__init__.py +12 -0
  61. mainsequence/tdag/data_nodes/build_operations.py +751 -0
  62. mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
  63. mainsequence/tdag/data_nodes/persist_managers.py +812 -0
  64. mainsequence/tdag/data_nodes/run_operations.py +543 -0
  65. mainsequence/tdag/data_nodes/utils.py +24 -0
  66. mainsequence/tdag/future_registry.py +25 -0
  67. mainsequence/tdag/utils.py +40 -0
  68. mainsequence/virtualfundbuilder/__init__.py +45 -0
  69. mainsequence/virtualfundbuilder/__main__.py +235 -0
  70. mainsequence/virtualfundbuilder/agent_interface.py +77 -0
  71. mainsequence/virtualfundbuilder/config_handling.py +86 -0
  72. mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
  73. mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
  74. mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
  75. mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
  76. mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
  77. mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
  78. mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
  79. mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
  80. mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
  81. mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
  82. mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
  83. mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
  84. mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
  85. mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
  86. mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
  87. mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
  88. mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
  89. mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
  90. mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
  91. mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
  92. mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
  93. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
  94. mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
  95. mainsequence/virtualfundbuilder/data_nodes.py +637 -0
  96. mainsequence/virtualfundbuilder/enums.py +23 -0
  97. mainsequence/virtualfundbuilder/models.py +282 -0
  98. mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
  99. mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
  100. mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
  101. mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
  102. mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
  103. mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
  104. mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
  105. mainsequence/virtualfundbuilder/utils.py +381 -0
  106. mainsequence-2.0.0.dist-info/METADATA +105 -0
  107. mainsequence-2.0.0.dist-info/RECORD +110 -0
  108. mainsequence-2.0.0.dist-info/WHEEL +5 -0
  109. mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
  110. mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,479 @@
1
+ from importlib.metadata import metadata
2
+
3
+ import pandas as pd
4
+
5
+ import tempfile
6
+ import tqdm
7
+
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ import itertools
10
+ import csv
11
+ from io import StringIO
12
+ from tqdm import tqdm # Import tqdm for progress bar
13
+
14
+ import numpy as np
15
+ import json
16
+
17
+ from typing import Dict, List, Union,Optional
18
+ import datetime
19
+ from mainsequence.logconf import logger
20
+ from ..utils import DATE_FORMAT, make_request, set_types_in_table
21
+ import os
22
+
23
+
24
+ def import_psycopg2():
25
+ import psycopg2
26
+ from psycopg2 import errors
27
+ from psycopg2.extras import execute_batch
28
+ from pgcopy import CopyManager
29
+ from psycopg2.extras import execute_values
30
+
31
+ def read_sql_tmpfile(query, time_series_orm_uri_db_connection: str):
32
+ with tempfile.TemporaryFile() as tmpfile:
33
+ copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(
34
+ query=query, head="HEADER"
35
+ )
36
+ # conn = db_engine.raw_connection()
37
+ # cur = conn.cursor()
38
+ with psycopg2.connect(time_series_orm_uri_db_connection) as conn:
39
+ # TEMP FOR FUCKED UP BELOW
40
+ # cur = session.connection().connection.cursor()
41
+ cur = conn.cursor()
42
+ cur.copy_expert(copy_sql, tmpfile)
43
+ tmpfile.seek(0)
44
+ df = pd.read_csv(tmpfile, header=0)
45
+
46
+ return df
47
+
48
+
49
+ def filter_by_assets_ranges(table_name, asset_ranges_map, index_names, data_source, column_types):
50
+ """
51
+ Query time series data dynamically based on asset ranges.
52
+
53
+ Args:
54
+ table_name (str): The name of the table to query.
55
+ asset_ranges_map (dict): A dictionary where keys are asset symbols and values are dictionaries containing:
56
+ - 'start_date' (datetime): The start date of the range.
57
+ - 'start_date_operand' (str): The SQL operand for the start date (e.g., '>=' or '>').
58
+ - 'end_date' (datetime or None): The end date of the range.
59
+ index_names (list): List of column names to set as the DataFrame index.
60
+ data_source: A data source object with a method `get_connection_uri()` to get the database connection URI.
61
+
62
+ Returns:
63
+ pd.DataFrame: A Pandas DataFrame with the queried data, indexed by the specified columns.
64
+ """
65
+ # Base SQL query
66
+ query_base = f"SELECT * FROM {table_name} WHERE"
67
+
68
+ # Initialize a list to store query parts
69
+ query_parts = []
70
+
71
+ # Build query dynamically based on the asset_ranges_map dictionary
72
+ for symbol, range_dict in asset_ranges_map.items():
73
+ if range_dict['end_date'] is not None:
74
+ tmp_query = (
75
+ f" (asset_symbol = '{symbol}' AND "
76
+ f"time_index BETWEEN '{range_dict['start_date']}' AND '{range_dict['end_date']}') "
77
+ )
78
+ else:
79
+ tmp_query = (
80
+ f" (asset_symbol = '{symbol}' AND "
81
+ f"time_index {range_dict['start_date_operand']} '{range_dict['start_date']}') "
82
+ )
83
+ query_parts.append(tmp_query)
84
+
85
+ # Combine all query parts using OR
86
+ full_query = query_base + " OR ".join(query_parts)
87
+
88
+ # Execute the query and load results into a Pandas DataFrame
89
+ df = read_sql_tmpfile(full_query, time_series_orm_uri_db_connection=data_source.get_connection_uri())
90
+
91
+ # set correct types for values
92
+ df = set_types_in_table(df, column_types)
93
+
94
+ # Set the specified columns as the DataFrame index
95
+ df = df.set_index(index_names)
96
+
97
+ return df
98
+
99
+
100
+ def direct_data_from_db(local_metadata: dict, connection_uri: str,
101
+ start_date: Union[datetime.datetime, None] = None,
102
+ great_or_equal: bool = True, less_or_equal: bool = True,
103
+ end_date: Union[datetime.datetime, None] = None,
104
+ columns: Union[list, None] = None,
105
+ unique_identifier_list: Union[list, None] = None,
106
+ unique_identifier_range_map:Optional[dict] = None
107
+ ):
108
+ """
109
+ Connects directly to the DB without passing through the ORM to speed up calculations.
110
+
111
+ Parameters
112
+ ----------
113
+ metadata : dict
114
+ Metadata containing table and column details.
115
+ connection_config : dict
116
+ Connection configuration for the database.
117
+ start_date : datetime.datetime, optional
118
+ The start date for filtering. If None, no lower bound is applied.
119
+ great_or_equal : bool, optional
120
+ Whether the start_date filter is inclusive (>=). Defaults to True.
121
+ less_or_equal : bool, optional
122
+ Whether the end_date filter is inclusive (<=). Defaults to True.
123
+ end_date : datetime.datetime, optional
124
+ The end date for filtering. If None, no upper bound is applied.
125
+ columns : list, optional
126
+ Specific columns to select. If None, all columns are selected.
127
+
128
+ Returns
129
+ -------
130
+ pd.DataFrame
131
+ Data from the table as a pandas DataFrame, optionally filtered by date range.
132
+ """
133
+ import_psycopg2()
134
+ metadata=local_metadata.remote_table
135
+ def fast_table_dump(connection_config, table_name, ):
136
+ query = f"COPY {table_name} TO STDOUT WITH CSV HEADER"
137
+
138
+ with psycopg2.connect(connection_config['connection_details']) as connection:
139
+ with connection.cursor() as cursor:
140
+ import io
141
+ buffer = io.StringIO()
142
+ cursor.copy_expert(query, buffer)
143
+ buffer.seek(0)
144
+ df = pd.read_csv(buffer)
145
+ return df
146
+
147
+ # Build the SELECT clause
148
+ select_clause = ", ".join(columns) if columns else "*"
149
+
150
+ # Build the WHERE clause dynamically
151
+ where_clauses = []
152
+ time_index_name = metadata.sourcetableconfiguration.time_index_name
153
+ if start_date:
154
+ operator = ">=" if great_or_equal else ">"
155
+ where_clauses.append(f"{time_index_name} {operator} '{start_date}'")
156
+ if end_date:
157
+ operator = "<=" if less_or_equal else "<"
158
+ where_clauses.append(f"{time_index_name} {operator} '{end_date}'")
159
+
160
+ if unique_identifier_list:
161
+ helper_symbol = "','"
162
+ where_clauses.append(f"unique_identifier IN ('{helper_symbol.join(unique_identifier_list)}')")
163
+
164
+ # Combine WHERE clauses
165
+ where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
166
+
167
+ # Construct the query
168
+ query = f"SELECT {select_clause} FROM {metadata.table_name} {where_clause}"
169
+ # if where_clause=="":
170
+ # data=fast_table_dump(connection_config, metadata['table_name'])
171
+ # data[metadata["sourcetableconfiguration"]['time_index_name']]=pd.to_datetime(data[metadata["sourcetableconfiguration"]['time_index_name']])
172
+ # else:
173
+ with psycopg2.connect(connection_uri) as connection:
174
+ with connection.cursor() as cursor:
175
+ cursor.execute(query)
176
+ column_names = [desc[0] for desc in cursor.description]
177
+ data = cursor.fetchall()
178
+
179
+ # Convert to DataFrame
180
+ data = pd.DataFrame(data=data, columns=column_names)
181
+
182
+ data = data.set_index(metadata.sourcetableconfiguration.index_names)
183
+
184
+ return data
185
+
186
+
187
+ def direct_table_update(metadata:"DynamicTableMetaData", serialized_data_frame: pd.DataFrame, overwrite: bool,
188
+ grouped_dates,
189
+ table_is_empty: bool,
190
+ time_series_orm_db_connection: Union[str, None] = None,
191
+ use_chunks: bool = True, num_threads: int = 4):
192
+ """
193
+ Updates the database table with the given DataFrame.
194
+
195
+ Parameters:
196
+ - table_name: Name of the database table.
197
+ - serialized_data_frame: DataFrame containing the data to insert.
198
+ - overwrite: If True, existing data in the date range will be deleted before insertion.
199
+ - time_index_name: Name of the time index column.
200
+ - index_names: List of index column names.
201
+ - table_is_empty: If True, the table is empty.
202
+ - time_series_orm_db_connection: Database connection string.
203
+ - use_chunks: If True, data will be inserted in chunks using threads.
204
+ - num_threads: Number of threads to use when use_chunks is True.
205
+ """
206
+ import_psycopg2()
207
+ columns = serialized_data_frame.columns.tolist()
208
+
209
+ index_names=metadata.sourcetableconfiguration.index_names
210
+ table_name=metadata.table_name
211
+ time_index_name=metadata.sourcetableconfiguration.time_index_name
212
+ def drop_indexes(table_name, table_index_names):
213
+ # Use a separate connection for index management
214
+ with psycopg2.connect(time_series_orm_db_connection) as conn:
215
+ with conn.cursor() as cur:
216
+ for index_name in index_names.keys():
217
+ drop_index_query = f'DROP INDEX IF EXISTS "{index_name}";'
218
+ print(f"Dropping index '{index_name}'...")
219
+ cur.execute(drop_index_query)
220
+ # Commit changes after all indexes are processed
221
+ conn.commit()
222
+ print("All specified indexes dropped successfully.")
223
+
224
+ # Drop indexes before insertion
225
+
226
+
227
+
228
+ # do not drop indices this is only done on inception
229
+ if metadata._drop_indices==True:
230
+ table_index_names=metadata.sourcetableconfiguration.get_time_scale_extra_table_indices()
231
+ drop_indexes(table_name, table_index_names)
232
+
233
+ if overwrite and not table_is_empty:
234
+ min_d = serialized_data_frame[time_index_name].min()
235
+ max_d = serialized_data_frame[time_index_name].max()
236
+
237
+ with psycopg2.connect(time_series_orm_db_connection) as conn:
238
+ try:
239
+ with conn.cursor() as cur:
240
+
241
+ if len(index_names) > 1:
242
+
243
+ grouped_dates = grouped_dates.rename(columns={"min": "start_time", "max": "end_time"})
244
+ grouped_dates = grouped_dates.reset_index()
245
+ grouped_dates = grouped_dates.to_dict("records")
246
+
247
+ # Build the DELETE query
248
+ delete_conditions = []
249
+ for item in grouped_dates:
250
+ unique_identifier = item['unique_identifier']
251
+ start_time = item['start_time']
252
+ end_time = item['end_time']
253
+
254
+ # Format timestamps as strings
255
+ start_time_str = start_time.strftime('%Y-%m-%d %H:%M:%S%z')
256
+ end_time_str = end_time.strftime('%Y-%m-%d %H:%M:%S%z')
257
+
258
+ # Escape single quotes
259
+ unique_identifier = unique_identifier.replace("'", "''")
260
+
261
+ # Build the condition string
262
+ condition = f"({time_index_name} >= '{start_time_str}' AND {time_index_name} <= '{end_time_str}' " \
263
+ f"AND unique_identifier = '{unique_identifier}')"
264
+ delete_conditions.append(condition)
265
+
266
+ # Combine all conditions using OR
267
+ where_clause = ' OR '.join(delete_conditions)
268
+ delete_query = f"DELETE FROM public.{table_name} WHERE {where_clause};"
269
+
270
+ # Execute the DELETE query
271
+ cur.execute(delete_query)
272
+ else:
273
+ # Build a basic DELETE query using parameterized values
274
+ delete_query = f"DELETE FROM public.{table_name} WHERE {time_index_name} >= %s AND {time_index_name} <= %s;"
275
+ cur.execute(delete_query, (min_d, max_d))
276
+
277
+ conn.commit()
278
+ except Exception as e:
279
+ conn.rollback()
280
+ print(f"An error occurred during deletion: {e}")
281
+ raise
282
+
283
+ if use_chunks:
284
+ total_rows = len(serialized_data_frame)
285
+ num_threads = min(num_threads, total_rows)
286
+ chunk_size = int(np.ceil(total_rows / num_threads))
287
+
288
+ # Generator to yield chunks without copying data
289
+ def get_dataframe_chunks(df, chunk_size):
290
+ for start_row in range(0, df.shape[0], chunk_size):
291
+ yield df.iloc[start_row:start_row + chunk_size]
292
+
293
+ # Progress bar for chunks
294
+ total_chunks = int(np.ceil(total_rows / chunk_size))
295
+
296
+ def insert_chunk(chunk_df):
297
+ try:
298
+ with psycopg2.connect(time_series_orm_db_connection) as conn:
299
+ with conn.cursor() as cur:
300
+ buffer_size = 10000 # Adjust based on memory and performance requirements
301
+ data_generator = chunk_df.itertuples(index=False, name=None)
302
+
303
+ total_records = len(chunk_df)
304
+ with tqdm(total=total_records, desc="Inserting records", leave=False) as pbar:
305
+ while True:
306
+ batch = list(itertools.islice(data_generator, buffer_size))
307
+ if not batch:
308
+ break
309
+
310
+ # Convert batch to CSV formatted string
311
+ output = StringIO()
312
+ writer = csv.writer(output)
313
+ writer.writerows(batch)
314
+ output.seek(0)
315
+
316
+ copy_query = f"COPY public.{table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
317
+ cur.copy_expert(copy_query, output)
318
+
319
+ # Update progress bar
320
+ pbar.update(len(batch))
321
+
322
+ conn.commit()
323
+ except Exception as e:
324
+ print(f"An error occurred during insertion: {e}")
325
+ raise
326
+
327
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
328
+ list(tqdm(executor.map(insert_chunk, get_dataframe_chunks(serialized_data_frame, chunk_size)),
329
+ total=total_chunks, desc="Processing chunks"))
330
+
331
+ else:
332
+ # Single insert using the same optimized method
333
+ try:
334
+ with psycopg2.connect(time_series_orm_db_connection) as conn:
335
+ with conn.cursor() as cur:
336
+ buffer_size = 10000
337
+ data_generator = serialized_data_frame.itertuples(index=False, name=None)
338
+ total_records = len(serialized_data_frame)
339
+ with tqdm(total=total_records, desc="Inserting records") as pbar:
340
+ while True:
341
+ batch = list(itertools.islice(data_generator, buffer_size))
342
+ if not batch:
343
+ break
344
+ #
345
+ output = StringIO()
346
+ writer = csv.writer(output)
347
+ writer.writerows(batch)
348
+ output.seek(0)
349
+
350
+ copy_query = f"COPY public.{table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
351
+ cur.copy_expert(copy_query, output)
352
+
353
+ # Update progress bar
354
+ pbar.update(len(batch))
355
+
356
+ conn.commit()
357
+ except Exception as e:
358
+ print(f"An error occurred during single insert: {e}")
359
+ raise
360
+ # do not rebuild indices this is only done on inception
361
+ if metadata._rebuild_indices:
362
+ logger.info("Rebuilding indices...")
363
+ extra_indices = metadata.sourcetableconfiguration.get_time_scale_extra_table_indices()
364
+
365
+ with psycopg2.connect(time_series_orm_db_connection) as conn:
366
+ with conn.cursor() as cur:
367
+ # Create each index
368
+ for index_name, index_details in extra_indices.items():
369
+ index_type, index_query = index_details["type"], index_details["query"]
370
+
371
+ if index_type not in ("INDEX", "UNIQUE INDEX"):
372
+ raise Exception(f"Unknown index type: {index_type}")
373
+
374
+ sql_create_index = f"CREATE {index_type} {index_name} ON public.{table_name} {index_query}"
375
+ logger.info(f"Executing SQL: {sql_create_index}")
376
+ cur.execute(sql_create_index)
377
+
378
+ # After creating all indexes, run ANALYZE to update statistics
379
+ sql_analyze = f"ANALYZE public.{table_name}"
380
+ logger.info(f"Executing SQL: {sql_analyze}")
381
+ cur.execute(sql_analyze)
382
+
383
+ # Commit the transaction after creating indexes and analyzing
384
+ conn.commit()
385
+
386
+ logger.info("Index rebuilding and ANALYZE complete.")
387
+
388
+
389
+ def process_and_update_table(
390
+ serialized_data_frame,
391
+ local_metadata: "LocalTimeSerie",
392
+ grouped_dates: List,
393
+ data_source: object,
394
+ index_names: List[str],
395
+ time_index_name: str,
396
+ overwrite: bool = False,
397
+ JSON_COMPRESSED_PREFIX: List[str] = None,
398
+
399
+
400
+ ):
401
+ """
402
+ Process a serialized DataFrame, handle overwriting, and update a database table.
403
+
404
+ Args:
405
+ serialized_data_frame (pd.DataFrame): The DataFrame to process and update.
406
+ metadata (DynamicTableMetaData): Metadata about the table, including table configuration.
407
+ grouped_dates (list): List of grouped dates to assist with the update.
408
+ data_source (object): A data source object with a `get_connection_uri` method.
409
+ index_names (list): List of index column names.
410
+ time_index_name (str): The name of the time index column.
411
+ overwrite (bool): Whether to overwrite the table or not.
412
+ JSON_COMPRESSED_PREFIX (list): List of prefixes to identify JSON-compressed columns.
413
+
414
+ Returns:
415
+ None
416
+ """
417
+ import_psycopg2()
418
+ JSON_COMPRESSED_PREFIX=JSON_COMPRESSED_PREFIX or []
419
+ metadata=local_metadata.remote_table
420
+ if "unique_identifier" in serialized_data_frame.columns:
421
+ serialized_data_frame['unique_identifier'] = serialized_data_frame['unique_identifier'].astype(str)
422
+
423
+ TDAG_ENDPOINT = f"{os.environ.get('TDAG_ENDPOINT')}"
424
+ base_url = TDAG_ENDPOINT + "/orm/api/dynamic_table" #metadata.get("root_url")
425
+ serialized_data_frame = serialized_data_frame.replace({np.nan: None})
426
+
427
+ # Validate JSON-compressed columns
428
+ for c in serialized_data_frame.columns:
429
+ if any([t in c for t in JSON_COMPRESSED_PREFIX]):
430
+ assert isinstance(serialized_data_frame[c].iloc[0], dict)
431
+
432
+ # Encode JSON-compressed columns
433
+ for c in serialized_data_frame.columns:
434
+ if any([t in c for t in JSON_COMPRESSED_PREFIX]):
435
+ serialized_data_frame[c] = serialized_data_frame[c].apply(lambda x: json.dumps(x).encode())
436
+
437
+ # Handle overwrite and decompress chunks if required
438
+ recompress = False
439
+ if overwrite:
440
+ url = f"{base_url}/{metadata.id}/decompress_chunks/"
441
+ from ..models_vam import BaseObject
442
+ s = BaseObject.build_session()
443
+
444
+ r = make_request(
445
+ s=s, loaders=BaseObject.LOADERS,
446
+ r_type="POST",
447
+ url=url,
448
+ payload={
449
+ "json": {
450
+ "start_date": serialized_data_frame[time_index_name].min().strftime(DATE_FORMAT),
451
+ "end_date": serialized_data_frame[time_index_name].max().strftime(DATE_FORMAT),
452
+ }
453
+ },
454
+ time_out=60 * 5,
455
+ )
456
+
457
+ if r.status_code not in [200, 204]:
458
+ logger.error(r.text)
459
+ raise Exception("Error trying to decompress table")
460
+ elif r.status_code == 200:
461
+ recompress = True
462
+
463
+ # Check if the table is empty
464
+ table_is_empty = metadata.sourcetableconfiguration.last_time_index_value is None
465
+
466
+ # Update the table
467
+ direct_table_update(
468
+ serialized_data_frame=serialized_data_frame,
469
+ grouped_dates=grouped_dates,
470
+ time_series_orm_db_connection=data_source.get_connection_uri(),
471
+ metadata=metadata,
472
+ overwrite=overwrite,
473
+ table_is_empty=table_is_empty,
474
+ )
475
+
476
+ # Recompress if needed
477
+ if recompress:
478
+ # Logic to recompress if needed (currently a placeholder)
479
+ pass
@@ -0,0 +1,113 @@
1
+ from .models_vam import *
2
+ from .base import MARKETS_CONSTANTS
3
+ from .models_tdag import DynamicTableMetaData, LocalTimeSerie
4
+ from .models_tdag import LocalTimeSerie, POD_PROJECT
5
+ import datetime
6
+
7
+ from pydantic import BaseModel, Field, PositiveInt
8
+
9
+
10
+ def get_right_account_class(account: Account):
11
+ from mainsequence.client import models_vam as model_module
12
+ execution_venue_symbol = account.execution_venue.symbol
13
+ AccountClass = getattr(model_module, MARKETS_CONSTANTS.ACCOUNT_VENUE_FACTORY[execution_venue_symbol])
14
+ account, _ = AccountClass.get(id=account.id)
15
+ return account
16
+
17
+
18
+
19
+
20
+
21
+
22
+ class Slide(BasePydanticModel):
23
+ id:Optional[int]=None
24
+
25
+ number: PositiveInt = Field(
26
+ ...,
27
+ description="1-based position of the slide within its presentation",
28
+ example=3,
29
+ )
30
+ body: Optional[str] = Field(
31
+ default=None,
32
+ description="Raw slide content in markdown/HTML/etc.",
33
+ )
34
+ created_at: datetime.datetime = Field(
35
+ default_factory=datetime.datetime.utcnow,
36
+ description="Timestamp when the slide row was created",
37
+ example="2025-06-02T12:34:56Z",
38
+ )
39
+ updated_at: datetime.datetime = Field(
40
+ default_factory=datetime.datetime.utcnow,
41
+ description="Timestamp automatically updated on save",
42
+ example="2025-06-02T12:34:56Z",
43
+ )
44
+
45
+ class Presentation(BaseObjectOrm, BasePydanticModel):
46
+ id:Optional[int]=None
47
+ title: str = Field(..., max_length=255)
48
+ description: str = Field("", description="Free-form description of the deck")
49
+ slides:List[Slide]
50
+
51
+ # These come from the DB and are read-only in normal create/update requests
52
+ created_at: Optional[datetime.datetime] = None
53
+ updated_at: Optional[datetime.datetime] = None
54
+
55
+
56
+ class FileResource(BaseModel):
57
+ """Base model for a resource that is a file."""
58
+ path: str = Field(..., min_length=1, description="The filesystem path to the resource.")
59
+
60
+ class ScriptResource(FileResource):
61
+ pass
62
+
63
+ class NotebookResource(FileResource):
64
+ pass
65
+
66
+ class AppResource(BaseModel):
67
+ """An app to be used by a job."""
68
+ name: str = Field(..., min_length=1, description="The name of the app.")
69
+ configuration: Dict[str, Any] = Field(
70
+ default_factory=dict, description="Key-value configuration for the app configuration."
71
+ )
72
+
73
+ Resource = Union[
74
+ Dict[Literal["script"], ScriptResource],
75
+ Dict[Literal["notebook"], NotebookResource],
76
+ Dict[Literal["app"], AppResource],
77
+ ]
78
+
79
+ class CrontabSchedule(BaseModel):
80
+ """A schedule defined by a standard crontab expression."""
81
+ type: Literal["crontab"]
82
+ start_time: Optional[datetime.datetime] = None
83
+ expression: str = Field(..., min_length=1, description="A valid cron string, e.g., '0 5 * * 1-5'.")
84
+
85
+ class IntervalSchedule(BaseModel):
86
+ """A schedule that repeats at a fixed interval."""
87
+ type: Literal["interval"]
88
+ start_time: Optional[datetime.datetime] = None
89
+ every: PositiveInt = Field(..., description="The frequency of the interval (must be > 0).")
90
+ period: Literal["seconds", "minutes", "hours", "days"]
91
+
92
+ Schedule = Union[CrontabSchedule, IntervalSchedule]
93
+
94
+ class Job(BaseObjectOrm, BasePydanticModel):
95
+ """A single, named job with its resource and schedule."""
96
+ name: str = Field(..., min_length=1, description="A human-readable name for the job.")
97
+ resource: Resource
98
+ schedule: Optional[Schedule] = Field(default=None, description="The job's execution schedule.")
99
+
100
+ @classmethod
101
+ def create_from_configuration(cls, job_configuration):
102
+ url = cls.get_object_url() + f"/create_from_configuration/"
103
+ s = cls.build_session()
104
+ job_configuration["project_id"] = POD_PROJECT.id
105
+ r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload={"json": job_configuration})
106
+ if r.status_code not in [200, 201]:
107
+ raise Exception(r.text)
108
+ return r.json()
109
+
110
+ class ProjectConfiguration(BaseModel):
111
+ """The root model for the entire project configuration."""
112
+ name: str = Field(..., min_length=1, description="The name of the project.")
113
+ jobs: List[Job]