mainsequence 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mainsequence/__init__.py +0 -0
- mainsequence/__main__.py +9 -0
- mainsequence/cli/__init__.py +1 -0
- mainsequence/cli/api.py +157 -0
- mainsequence/cli/cli.py +442 -0
- mainsequence/cli/config.py +78 -0
- mainsequence/cli/ssh_utils.py +126 -0
- mainsequence/client/__init__.py +17 -0
- mainsequence/client/base.py +431 -0
- mainsequence/client/data_sources_interfaces/__init__.py +0 -0
- mainsequence/client/data_sources_interfaces/duckdb.py +1468 -0
- mainsequence/client/data_sources_interfaces/timescale.py +479 -0
- mainsequence/client/models_helpers.py +113 -0
- mainsequence/client/models_report_studio.py +412 -0
- mainsequence/client/models_tdag.py +2276 -0
- mainsequence/client/models_vam.py +1983 -0
- mainsequence/client/utils.py +387 -0
- mainsequence/dashboards/__init__.py +0 -0
- mainsequence/dashboards/streamlit/__init__.py +0 -0
- mainsequence/dashboards/streamlit/assets/config.toml +12 -0
- mainsequence/dashboards/streamlit/assets/favicon.png +0 -0
- mainsequence/dashboards/streamlit/assets/logo.png +0 -0
- mainsequence/dashboards/streamlit/core/__init__.py +0 -0
- mainsequence/dashboards/streamlit/core/theme.py +212 -0
- mainsequence/dashboards/streamlit/pages/__init__.py +0 -0
- mainsequence/dashboards/streamlit/scaffold.py +220 -0
- mainsequence/instrumentation/__init__.py +7 -0
- mainsequence/instrumentation/utils.py +101 -0
- mainsequence/instruments/__init__.py +1 -0
- mainsequence/instruments/data_interface/__init__.py +10 -0
- mainsequence/instruments/data_interface/data_interface.py +361 -0
- mainsequence/instruments/instruments/__init__.py +3 -0
- mainsequence/instruments/instruments/base_instrument.py +85 -0
- mainsequence/instruments/instruments/bond.py +447 -0
- mainsequence/instruments/instruments/european_option.py +74 -0
- mainsequence/instruments/instruments/interest_rate_swap.py +217 -0
- mainsequence/instruments/instruments/json_codec.py +585 -0
- mainsequence/instruments/instruments/knockout_fx_option.py +146 -0
- mainsequence/instruments/instruments/position.py +475 -0
- mainsequence/instruments/instruments/ql_fields.py +239 -0
- mainsequence/instruments/instruments/vanilla_fx_option.py +107 -0
- mainsequence/instruments/pricing_models/__init__.py +0 -0
- mainsequence/instruments/pricing_models/black_scholes.py +49 -0
- mainsequence/instruments/pricing_models/bond_pricer.py +182 -0
- mainsequence/instruments/pricing_models/fx_option_pricer.py +90 -0
- mainsequence/instruments/pricing_models/indices.py +350 -0
- mainsequence/instruments/pricing_models/knockout_fx_pricer.py +209 -0
- mainsequence/instruments/pricing_models/swap_pricer.py +502 -0
- mainsequence/instruments/settings.py +175 -0
- mainsequence/instruments/utils.py +29 -0
- mainsequence/logconf.py +284 -0
- mainsequence/reportbuilder/__init__.py +0 -0
- mainsequence/reportbuilder/__main__.py +0 -0
- mainsequence/reportbuilder/examples/ms_template_report.py +706 -0
- mainsequence/reportbuilder/model.py +713 -0
- mainsequence/reportbuilder/slide_templates.py +532 -0
- mainsequence/tdag/__init__.py +8 -0
- mainsequence/tdag/__main__.py +0 -0
- mainsequence/tdag/config.py +129 -0
- mainsequence/tdag/data_nodes/__init__.py +12 -0
- mainsequence/tdag/data_nodes/build_operations.py +751 -0
- mainsequence/tdag/data_nodes/data_nodes.py +1292 -0
- mainsequence/tdag/data_nodes/persist_managers.py +812 -0
- mainsequence/tdag/data_nodes/run_operations.py +543 -0
- mainsequence/tdag/data_nodes/utils.py +24 -0
- mainsequence/tdag/future_registry.py +25 -0
- mainsequence/tdag/utils.py +40 -0
- mainsequence/virtualfundbuilder/__init__.py +45 -0
- mainsequence/virtualfundbuilder/__main__.py +235 -0
- mainsequence/virtualfundbuilder/agent_interface.py +77 -0
- mainsequence/virtualfundbuilder/config_handling.py +86 -0
- mainsequence/virtualfundbuilder/contrib/__init__.py +0 -0
- mainsequence/virtualfundbuilder/contrib/apps/__init__.py +8 -0
- mainsequence/virtualfundbuilder/contrib/apps/etf_replicator_app.py +164 -0
- mainsequence/virtualfundbuilder/contrib/apps/generate_report.py +292 -0
- mainsequence/virtualfundbuilder/contrib/apps/load_external_portfolio.py +107 -0
- mainsequence/virtualfundbuilder/contrib/apps/news_app.py +437 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_report_app.py +91 -0
- mainsequence/virtualfundbuilder/contrib/apps/portfolio_table.py +95 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_named_portfolio.py +45 -0
- mainsequence/virtualfundbuilder/contrib/apps/run_portfolio.py +40 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/base.html +147 -0
- mainsequence/virtualfundbuilder/contrib/apps/templates/report.html +77 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/__init__.py +5 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/external_weights.py +61 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/intraday_trend.py +149 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/market_cap.py +310 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/mock_signal.py +78 -0
- mainsequence/virtualfundbuilder/contrib/data_nodes/portfolio_replicator.py +269 -0
- mainsequence/virtualfundbuilder/contrib/prices/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/prices/data_nodes.py +810 -0
- mainsequence/virtualfundbuilder/contrib/prices/utils.py +11 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/__init__.py +1 -0
- mainsequence/virtualfundbuilder/contrib/rebalance_strategies/rebalance_strategies.py +313 -0
- mainsequence/virtualfundbuilder/data_nodes.py +637 -0
- mainsequence/virtualfundbuilder/enums.py +23 -0
- mainsequence/virtualfundbuilder/models.py +282 -0
- mainsequence/virtualfundbuilder/notebook_handling.py +42 -0
- mainsequence/virtualfundbuilder/portfolio_interface.py +272 -0
- mainsequence/virtualfundbuilder/resource_factory/__init__.py +0 -0
- mainsequence/virtualfundbuilder/resource_factory/app_factory.py +170 -0
- mainsequence/virtualfundbuilder/resource_factory/base_factory.py +238 -0
- mainsequence/virtualfundbuilder/resource_factory/rebalance_factory.py +101 -0
- mainsequence/virtualfundbuilder/resource_factory/signal_factory.py +183 -0
- mainsequence/virtualfundbuilder/utils.py +381 -0
- mainsequence-2.0.0.dist-info/METADATA +105 -0
- mainsequence-2.0.0.dist-info/RECORD +110 -0
- mainsequence-2.0.0.dist-info/WHEEL +5 -0
- mainsequence-2.0.0.dist-info/licenses/LICENSE +40 -0
- mainsequence-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,479 @@
|
|
1
|
+
from importlib.metadata import metadata
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
import tempfile
|
6
|
+
import tqdm
|
7
|
+
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
9
|
+
import itertools
|
10
|
+
import csv
|
11
|
+
from io import StringIO
|
12
|
+
from tqdm import tqdm # Import tqdm for progress bar
|
13
|
+
|
14
|
+
import numpy as np
|
15
|
+
import json
|
16
|
+
|
17
|
+
from typing import Dict, List, Union,Optional
|
18
|
+
import datetime
|
19
|
+
from mainsequence.logconf import logger
|
20
|
+
from ..utils import DATE_FORMAT, make_request, set_types_in_table
|
21
|
+
import os
|
22
|
+
|
23
|
+
|
24
|
+
def import_psycopg2():
|
25
|
+
import psycopg2
|
26
|
+
from psycopg2 import errors
|
27
|
+
from psycopg2.extras import execute_batch
|
28
|
+
from pgcopy import CopyManager
|
29
|
+
from psycopg2.extras import execute_values
|
30
|
+
|
31
|
+
def read_sql_tmpfile(query, time_series_orm_uri_db_connection: str):
|
32
|
+
with tempfile.TemporaryFile() as tmpfile:
|
33
|
+
copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(
|
34
|
+
query=query, head="HEADER"
|
35
|
+
)
|
36
|
+
# conn = db_engine.raw_connection()
|
37
|
+
# cur = conn.cursor()
|
38
|
+
with psycopg2.connect(time_series_orm_uri_db_connection) as conn:
|
39
|
+
# TEMP FOR FUCKED UP BELOW
|
40
|
+
# cur = session.connection().connection.cursor()
|
41
|
+
cur = conn.cursor()
|
42
|
+
cur.copy_expert(copy_sql, tmpfile)
|
43
|
+
tmpfile.seek(0)
|
44
|
+
df = pd.read_csv(tmpfile, header=0)
|
45
|
+
|
46
|
+
return df
|
47
|
+
|
48
|
+
|
49
|
+
def filter_by_assets_ranges(table_name, asset_ranges_map, index_names, data_source, column_types):
|
50
|
+
"""
|
51
|
+
Query time series data dynamically based on asset ranges.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
table_name (str): The name of the table to query.
|
55
|
+
asset_ranges_map (dict): A dictionary where keys are asset symbols and values are dictionaries containing:
|
56
|
+
- 'start_date' (datetime): The start date of the range.
|
57
|
+
- 'start_date_operand' (str): The SQL operand for the start date (e.g., '>=' or '>').
|
58
|
+
- 'end_date' (datetime or None): The end date of the range.
|
59
|
+
index_names (list): List of column names to set as the DataFrame index.
|
60
|
+
data_source: A data source object with a method `get_connection_uri()` to get the database connection URI.
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
pd.DataFrame: A Pandas DataFrame with the queried data, indexed by the specified columns.
|
64
|
+
"""
|
65
|
+
# Base SQL query
|
66
|
+
query_base = f"SELECT * FROM {table_name} WHERE"
|
67
|
+
|
68
|
+
# Initialize a list to store query parts
|
69
|
+
query_parts = []
|
70
|
+
|
71
|
+
# Build query dynamically based on the asset_ranges_map dictionary
|
72
|
+
for symbol, range_dict in asset_ranges_map.items():
|
73
|
+
if range_dict['end_date'] is not None:
|
74
|
+
tmp_query = (
|
75
|
+
f" (asset_symbol = '{symbol}' AND "
|
76
|
+
f"time_index BETWEEN '{range_dict['start_date']}' AND '{range_dict['end_date']}') "
|
77
|
+
)
|
78
|
+
else:
|
79
|
+
tmp_query = (
|
80
|
+
f" (asset_symbol = '{symbol}' AND "
|
81
|
+
f"time_index {range_dict['start_date_operand']} '{range_dict['start_date']}') "
|
82
|
+
)
|
83
|
+
query_parts.append(tmp_query)
|
84
|
+
|
85
|
+
# Combine all query parts using OR
|
86
|
+
full_query = query_base + " OR ".join(query_parts)
|
87
|
+
|
88
|
+
# Execute the query and load results into a Pandas DataFrame
|
89
|
+
df = read_sql_tmpfile(full_query, time_series_orm_uri_db_connection=data_source.get_connection_uri())
|
90
|
+
|
91
|
+
# set correct types for values
|
92
|
+
df = set_types_in_table(df, column_types)
|
93
|
+
|
94
|
+
# Set the specified columns as the DataFrame index
|
95
|
+
df = df.set_index(index_names)
|
96
|
+
|
97
|
+
return df
|
98
|
+
|
99
|
+
|
100
|
+
def direct_data_from_db(local_metadata: dict, connection_uri: str,
|
101
|
+
start_date: Union[datetime.datetime, None] = None,
|
102
|
+
great_or_equal: bool = True, less_or_equal: bool = True,
|
103
|
+
end_date: Union[datetime.datetime, None] = None,
|
104
|
+
columns: Union[list, None] = None,
|
105
|
+
unique_identifier_list: Union[list, None] = None,
|
106
|
+
unique_identifier_range_map:Optional[dict] = None
|
107
|
+
):
|
108
|
+
"""
|
109
|
+
Connects directly to the DB without passing through the ORM to speed up calculations.
|
110
|
+
|
111
|
+
Parameters
|
112
|
+
----------
|
113
|
+
metadata : dict
|
114
|
+
Metadata containing table and column details.
|
115
|
+
connection_config : dict
|
116
|
+
Connection configuration for the database.
|
117
|
+
start_date : datetime.datetime, optional
|
118
|
+
The start date for filtering. If None, no lower bound is applied.
|
119
|
+
great_or_equal : bool, optional
|
120
|
+
Whether the start_date filter is inclusive (>=). Defaults to True.
|
121
|
+
less_or_equal : bool, optional
|
122
|
+
Whether the end_date filter is inclusive (<=). Defaults to True.
|
123
|
+
end_date : datetime.datetime, optional
|
124
|
+
The end date for filtering. If None, no upper bound is applied.
|
125
|
+
columns : list, optional
|
126
|
+
Specific columns to select. If None, all columns are selected.
|
127
|
+
|
128
|
+
Returns
|
129
|
+
-------
|
130
|
+
pd.DataFrame
|
131
|
+
Data from the table as a pandas DataFrame, optionally filtered by date range.
|
132
|
+
"""
|
133
|
+
import_psycopg2()
|
134
|
+
metadata=local_metadata.remote_table
|
135
|
+
def fast_table_dump(connection_config, table_name, ):
|
136
|
+
query = f"COPY {table_name} TO STDOUT WITH CSV HEADER"
|
137
|
+
|
138
|
+
with psycopg2.connect(connection_config['connection_details']) as connection:
|
139
|
+
with connection.cursor() as cursor:
|
140
|
+
import io
|
141
|
+
buffer = io.StringIO()
|
142
|
+
cursor.copy_expert(query, buffer)
|
143
|
+
buffer.seek(0)
|
144
|
+
df = pd.read_csv(buffer)
|
145
|
+
return df
|
146
|
+
|
147
|
+
# Build the SELECT clause
|
148
|
+
select_clause = ", ".join(columns) if columns else "*"
|
149
|
+
|
150
|
+
# Build the WHERE clause dynamically
|
151
|
+
where_clauses = []
|
152
|
+
time_index_name = metadata.sourcetableconfiguration.time_index_name
|
153
|
+
if start_date:
|
154
|
+
operator = ">=" if great_or_equal else ">"
|
155
|
+
where_clauses.append(f"{time_index_name} {operator} '{start_date}'")
|
156
|
+
if end_date:
|
157
|
+
operator = "<=" if less_or_equal else "<"
|
158
|
+
where_clauses.append(f"{time_index_name} {operator} '{end_date}'")
|
159
|
+
|
160
|
+
if unique_identifier_list:
|
161
|
+
helper_symbol = "','"
|
162
|
+
where_clauses.append(f"unique_identifier IN ('{helper_symbol.join(unique_identifier_list)}')")
|
163
|
+
|
164
|
+
# Combine WHERE clauses
|
165
|
+
where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
|
166
|
+
|
167
|
+
# Construct the query
|
168
|
+
query = f"SELECT {select_clause} FROM {metadata.table_name} {where_clause}"
|
169
|
+
# if where_clause=="":
|
170
|
+
# data=fast_table_dump(connection_config, metadata['table_name'])
|
171
|
+
# data[metadata["sourcetableconfiguration"]['time_index_name']]=pd.to_datetime(data[metadata["sourcetableconfiguration"]['time_index_name']])
|
172
|
+
# else:
|
173
|
+
with psycopg2.connect(connection_uri) as connection:
|
174
|
+
with connection.cursor() as cursor:
|
175
|
+
cursor.execute(query)
|
176
|
+
column_names = [desc[0] for desc in cursor.description]
|
177
|
+
data = cursor.fetchall()
|
178
|
+
|
179
|
+
# Convert to DataFrame
|
180
|
+
data = pd.DataFrame(data=data, columns=column_names)
|
181
|
+
|
182
|
+
data = data.set_index(metadata.sourcetableconfiguration.index_names)
|
183
|
+
|
184
|
+
return data
|
185
|
+
|
186
|
+
|
187
|
+
def direct_table_update(metadata:"DynamicTableMetaData", serialized_data_frame: pd.DataFrame, overwrite: bool,
|
188
|
+
grouped_dates,
|
189
|
+
table_is_empty: bool,
|
190
|
+
time_series_orm_db_connection: Union[str, None] = None,
|
191
|
+
use_chunks: bool = True, num_threads: int = 4):
|
192
|
+
"""
|
193
|
+
Updates the database table with the given DataFrame.
|
194
|
+
|
195
|
+
Parameters:
|
196
|
+
- table_name: Name of the database table.
|
197
|
+
- serialized_data_frame: DataFrame containing the data to insert.
|
198
|
+
- overwrite: If True, existing data in the date range will be deleted before insertion.
|
199
|
+
- time_index_name: Name of the time index column.
|
200
|
+
- index_names: List of index column names.
|
201
|
+
- table_is_empty: If True, the table is empty.
|
202
|
+
- time_series_orm_db_connection: Database connection string.
|
203
|
+
- use_chunks: If True, data will be inserted in chunks using threads.
|
204
|
+
- num_threads: Number of threads to use when use_chunks is True.
|
205
|
+
"""
|
206
|
+
import_psycopg2()
|
207
|
+
columns = serialized_data_frame.columns.tolist()
|
208
|
+
|
209
|
+
index_names=metadata.sourcetableconfiguration.index_names
|
210
|
+
table_name=metadata.table_name
|
211
|
+
time_index_name=metadata.sourcetableconfiguration.time_index_name
|
212
|
+
def drop_indexes(table_name, table_index_names):
|
213
|
+
# Use a separate connection for index management
|
214
|
+
with psycopg2.connect(time_series_orm_db_connection) as conn:
|
215
|
+
with conn.cursor() as cur:
|
216
|
+
for index_name in index_names.keys():
|
217
|
+
drop_index_query = f'DROP INDEX IF EXISTS "{index_name}";'
|
218
|
+
print(f"Dropping index '{index_name}'...")
|
219
|
+
cur.execute(drop_index_query)
|
220
|
+
# Commit changes after all indexes are processed
|
221
|
+
conn.commit()
|
222
|
+
print("All specified indexes dropped successfully.")
|
223
|
+
|
224
|
+
# Drop indexes before insertion
|
225
|
+
|
226
|
+
|
227
|
+
|
228
|
+
# do not drop indices this is only done on inception
|
229
|
+
if metadata._drop_indices==True:
|
230
|
+
table_index_names=metadata.sourcetableconfiguration.get_time_scale_extra_table_indices()
|
231
|
+
drop_indexes(table_name, table_index_names)
|
232
|
+
|
233
|
+
if overwrite and not table_is_empty:
|
234
|
+
min_d = serialized_data_frame[time_index_name].min()
|
235
|
+
max_d = serialized_data_frame[time_index_name].max()
|
236
|
+
|
237
|
+
with psycopg2.connect(time_series_orm_db_connection) as conn:
|
238
|
+
try:
|
239
|
+
with conn.cursor() as cur:
|
240
|
+
|
241
|
+
if len(index_names) > 1:
|
242
|
+
|
243
|
+
grouped_dates = grouped_dates.rename(columns={"min": "start_time", "max": "end_time"})
|
244
|
+
grouped_dates = grouped_dates.reset_index()
|
245
|
+
grouped_dates = grouped_dates.to_dict("records")
|
246
|
+
|
247
|
+
# Build the DELETE query
|
248
|
+
delete_conditions = []
|
249
|
+
for item in grouped_dates:
|
250
|
+
unique_identifier = item['unique_identifier']
|
251
|
+
start_time = item['start_time']
|
252
|
+
end_time = item['end_time']
|
253
|
+
|
254
|
+
# Format timestamps as strings
|
255
|
+
start_time_str = start_time.strftime('%Y-%m-%d %H:%M:%S%z')
|
256
|
+
end_time_str = end_time.strftime('%Y-%m-%d %H:%M:%S%z')
|
257
|
+
|
258
|
+
# Escape single quotes
|
259
|
+
unique_identifier = unique_identifier.replace("'", "''")
|
260
|
+
|
261
|
+
# Build the condition string
|
262
|
+
condition = f"({time_index_name} >= '{start_time_str}' AND {time_index_name} <= '{end_time_str}' " \
|
263
|
+
f"AND unique_identifier = '{unique_identifier}')"
|
264
|
+
delete_conditions.append(condition)
|
265
|
+
|
266
|
+
# Combine all conditions using OR
|
267
|
+
where_clause = ' OR '.join(delete_conditions)
|
268
|
+
delete_query = f"DELETE FROM public.{table_name} WHERE {where_clause};"
|
269
|
+
|
270
|
+
# Execute the DELETE query
|
271
|
+
cur.execute(delete_query)
|
272
|
+
else:
|
273
|
+
# Build a basic DELETE query using parameterized values
|
274
|
+
delete_query = f"DELETE FROM public.{table_name} WHERE {time_index_name} >= %s AND {time_index_name} <= %s;"
|
275
|
+
cur.execute(delete_query, (min_d, max_d))
|
276
|
+
|
277
|
+
conn.commit()
|
278
|
+
except Exception as e:
|
279
|
+
conn.rollback()
|
280
|
+
print(f"An error occurred during deletion: {e}")
|
281
|
+
raise
|
282
|
+
|
283
|
+
if use_chunks:
|
284
|
+
total_rows = len(serialized_data_frame)
|
285
|
+
num_threads = min(num_threads, total_rows)
|
286
|
+
chunk_size = int(np.ceil(total_rows / num_threads))
|
287
|
+
|
288
|
+
# Generator to yield chunks without copying data
|
289
|
+
def get_dataframe_chunks(df, chunk_size):
|
290
|
+
for start_row in range(0, df.shape[0], chunk_size):
|
291
|
+
yield df.iloc[start_row:start_row + chunk_size]
|
292
|
+
|
293
|
+
# Progress bar for chunks
|
294
|
+
total_chunks = int(np.ceil(total_rows / chunk_size))
|
295
|
+
|
296
|
+
def insert_chunk(chunk_df):
|
297
|
+
try:
|
298
|
+
with psycopg2.connect(time_series_orm_db_connection) as conn:
|
299
|
+
with conn.cursor() as cur:
|
300
|
+
buffer_size = 10000 # Adjust based on memory and performance requirements
|
301
|
+
data_generator = chunk_df.itertuples(index=False, name=None)
|
302
|
+
|
303
|
+
total_records = len(chunk_df)
|
304
|
+
with tqdm(total=total_records, desc="Inserting records", leave=False) as pbar:
|
305
|
+
while True:
|
306
|
+
batch = list(itertools.islice(data_generator, buffer_size))
|
307
|
+
if not batch:
|
308
|
+
break
|
309
|
+
|
310
|
+
# Convert batch to CSV formatted string
|
311
|
+
output = StringIO()
|
312
|
+
writer = csv.writer(output)
|
313
|
+
writer.writerows(batch)
|
314
|
+
output.seek(0)
|
315
|
+
|
316
|
+
copy_query = f"COPY public.{table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
|
317
|
+
cur.copy_expert(copy_query, output)
|
318
|
+
|
319
|
+
# Update progress bar
|
320
|
+
pbar.update(len(batch))
|
321
|
+
|
322
|
+
conn.commit()
|
323
|
+
except Exception as e:
|
324
|
+
print(f"An error occurred during insertion: {e}")
|
325
|
+
raise
|
326
|
+
|
327
|
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
328
|
+
list(tqdm(executor.map(insert_chunk, get_dataframe_chunks(serialized_data_frame, chunk_size)),
|
329
|
+
total=total_chunks, desc="Processing chunks"))
|
330
|
+
|
331
|
+
else:
|
332
|
+
# Single insert using the same optimized method
|
333
|
+
try:
|
334
|
+
with psycopg2.connect(time_series_orm_db_connection) as conn:
|
335
|
+
with conn.cursor() as cur:
|
336
|
+
buffer_size = 10000
|
337
|
+
data_generator = serialized_data_frame.itertuples(index=False, name=None)
|
338
|
+
total_records = len(serialized_data_frame)
|
339
|
+
with tqdm(total=total_records, desc="Inserting records") as pbar:
|
340
|
+
while True:
|
341
|
+
batch = list(itertools.islice(data_generator, buffer_size))
|
342
|
+
if not batch:
|
343
|
+
break
|
344
|
+
#
|
345
|
+
output = StringIO()
|
346
|
+
writer = csv.writer(output)
|
347
|
+
writer.writerows(batch)
|
348
|
+
output.seek(0)
|
349
|
+
|
350
|
+
copy_query = f"COPY public.{table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
|
351
|
+
cur.copy_expert(copy_query, output)
|
352
|
+
|
353
|
+
# Update progress bar
|
354
|
+
pbar.update(len(batch))
|
355
|
+
|
356
|
+
conn.commit()
|
357
|
+
except Exception as e:
|
358
|
+
print(f"An error occurred during single insert: {e}")
|
359
|
+
raise
|
360
|
+
# do not rebuild indices this is only done on inception
|
361
|
+
if metadata._rebuild_indices:
|
362
|
+
logger.info("Rebuilding indices...")
|
363
|
+
extra_indices = metadata.sourcetableconfiguration.get_time_scale_extra_table_indices()
|
364
|
+
|
365
|
+
with psycopg2.connect(time_series_orm_db_connection) as conn:
|
366
|
+
with conn.cursor() as cur:
|
367
|
+
# Create each index
|
368
|
+
for index_name, index_details in extra_indices.items():
|
369
|
+
index_type, index_query = index_details["type"], index_details["query"]
|
370
|
+
|
371
|
+
if index_type not in ("INDEX", "UNIQUE INDEX"):
|
372
|
+
raise Exception(f"Unknown index type: {index_type}")
|
373
|
+
|
374
|
+
sql_create_index = f"CREATE {index_type} {index_name} ON public.{table_name} {index_query}"
|
375
|
+
logger.info(f"Executing SQL: {sql_create_index}")
|
376
|
+
cur.execute(sql_create_index)
|
377
|
+
|
378
|
+
# After creating all indexes, run ANALYZE to update statistics
|
379
|
+
sql_analyze = f"ANALYZE public.{table_name}"
|
380
|
+
logger.info(f"Executing SQL: {sql_analyze}")
|
381
|
+
cur.execute(sql_analyze)
|
382
|
+
|
383
|
+
# Commit the transaction after creating indexes and analyzing
|
384
|
+
conn.commit()
|
385
|
+
|
386
|
+
logger.info("Index rebuilding and ANALYZE complete.")
|
387
|
+
|
388
|
+
|
389
|
+
def process_and_update_table(
|
390
|
+
serialized_data_frame,
|
391
|
+
local_metadata: "LocalTimeSerie",
|
392
|
+
grouped_dates: List,
|
393
|
+
data_source: object,
|
394
|
+
index_names: List[str],
|
395
|
+
time_index_name: str,
|
396
|
+
overwrite: bool = False,
|
397
|
+
JSON_COMPRESSED_PREFIX: List[str] = None,
|
398
|
+
|
399
|
+
|
400
|
+
):
|
401
|
+
"""
|
402
|
+
Process a serialized DataFrame, handle overwriting, and update a database table.
|
403
|
+
|
404
|
+
Args:
|
405
|
+
serialized_data_frame (pd.DataFrame): The DataFrame to process and update.
|
406
|
+
metadata (DynamicTableMetaData): Metadata about the table, including table configuration.
|
407
|
+
grouped_dates (list): List of grouped dates to assist with the update.
|
408
|
+
data_source (object): A data source object with a `get_connection_uri` method.
|
409
|
+
index_names (list): List of index column names.
|
410
|
+
time_index_name (str): The name of the time index column.
|
411
|
+
overwrite (bool): Whether to overwrite the table or not.
|
412
|
+
JSON_COMPRESSED_PREFIX (list): List of prefixes to identify JSON-compressed columns.
|
413
|
+
|
414
|
+
Returns:
|
415
|
+
None
|
416
|
+
"""
|
417
|
+
import_psycopg2()
|
418
|
+
JSON_COMPRESSED_PREFIX=JSON_COMPRESSED_PREFIX or []
|
419
|
+
metadata=local_metadata.remote_table
|
420
|
+
if "unique_identifier" in serialized_data_frame.columns:
|
421
|
+
serialized_data_frame['unique_identifier'] = serialized_data_frame['unique_identifier'].astype(str)
|
422
|
+
|
423
|
+
TDAG_ENDPOINT = f"{os.environ.get('TDAG_ENDPOINT')}"
|
424
|
+
base_url = TDAG_ENDPOINT + "/orm/api/dynamic_table" #metadata.get("root_url")
|
425
|
+
serialized_data_frame = serialized_data_frame.replace({np.nan: None})
|
426
|
+
|
427
|
+
# Validate JSON-compressed columns
|
428
|
+
for c in serialized_data_frame.columns:
|
429
|
+
if any([t in c for t in JSON_COMPRESSED_PREFIX]):
|
430
|
+
assert isinstance(serialized_data_frame[c].iloc[0], dict)
|
431
|
+
|
432
|
+
# Encode JSON-compressed columns
|
433
|
+
for c in serialized_data_frame.columns:
|
434
|
+
if any([t in c for t in JSON_COMPRESSED_PREFIX]):
|
435
|
+
serialized_data_frame[c] = serialized_data_frame[c].apply(lambda x: json.dumps(x).encode())
|
436
|
+
|
437
|
+
# Handle overwrite and decompress chunks if required
|
438
|
+
recompress = False
|
439
|
+
if overwrite:
|
440
|
+
url = f"{base_url}/{metadata.id}/decompress_chunks/"
|
441
|
+
from ..models_vam import BaseObject
|
442
|
+
s = BaseObject.build_session()
|
443
|
+
|
444
|
+
r = make_request(
|
445
|
+
s=s, loaders=BaseObject.LOADERS,
|
446
|
+
r_type="POST",
|
447
|
+
url=url,
|
448
|
+
payload={
|
449
|
+
"json": {
|
450
|
+
"start_date": serialized_data_frame[time_index_name].min().strftime(DATE_FORMAT),
|
451
|
+
"end_date": serialized_data_frame[time_index_name].max().strftime(DATE_FORMAT),
|
452
|
+
}
|
453
|
+
},
|
454
|
+
time_out=60 * 5,
|
455
|
+
)
|
456
|
+
|
457
|
+
if r.status_code not in [200, 204]:
|
458
|
+
logger.error(r.text)
|
459
|
+
raise Exception("Error trying to decompress table")
|
460
|
+
elif r.status_code == 200:
|
461
|
+
recompress = True
|
462
|
+
|
463
|
+
# Check if the table is empty
|
464
|
+
table_is_empty = metadata.sourcetableconfiguration.last_time_index_value is None
|
465
|
+
|
466
|
+
# Update the table
|
467
|
+
direct_table_update(
|
468
|
+
serialized_data_frame=serialized_data_frame,
|
469
|
+
grouped_dates=grouped_dates,
|
470
|
+
time_series_orm_db_connection=data_source.get_connection_uri(),
|
471
|
+
metadata=metadata,
|
472
|
+
overwrite=overwrite,
|
473
|
+
table_is_empty=table_is_empty,
|
474
|
+
)
|
475
|
+
|
476
|
+
# Recompress if needed
|
477
|
+
if recompress:
|
478
|
+
# Logic to recompress if needed (currently a placeholder)
|
479
|
+
pass
|
@@ -0,0 +1,113 @@
|
|
1
|
+
from .models_vam import *
|
2
|
+
from .base import MARKETS_CONSTANTS
|
3
|
+
from .models_tdag import DynamicTableMetaData, LocalTimeSerie
|
4
|
+
from .models_tdag import LocalTimeSerie, POD_PROJECT
|
5
|
+
import datetime
|
6
|
+
|
7
|
+
from pydantic import BaseModel, Field, PositiveInt
|
8
|
+
|
9
|
+
|
10
|
+
def get_right_account_class(account: Account):
|
11
|
+
from mainsequence.client import models_vam as model_module
|
12
|
+
execution_venue_symbol = account.execution_venue.symbol
|
13
|
+
AccountClass = getattr(model_module, MARKETS_CONSTANTS.ACCOUNT_VENUE_FACTORY[execution_venue_symbol])
|
14
|
+
account, _ = AccountClass.get(id=account.id)
|
15
|
+
return account
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
class Slide(BasePydanticModel):
|
23
|
+
id:Optional[int]=None
|
24
|
+
|
25
|
+
number: PositiveInt = Field(
|
26
|
+
...,
|
27
|
+
description="1-based position of the slide within its presentation",
|
28
|
+
example=3,
|
29
|
+
)
|
30
|
+
body: Optional[str] = Field(
|
31
|
+
default=None,
|
32
|
+
description="Raw slide content in markdown/HTML/etc.",
|
33
|
+
)
|
34
|
+
created_at: datetime.datetime = Field(
|
35
|
+
default_factory=datetime.datetime.utcnow,
|
36
|
+
description="Timestamp when the slide row was created",
|
37
|
+
example="2025-06-02T12:34:56Z",
|
38
|
+
)
|
39
|
+
updated_at: datetime.datetime = Field(
|
40
|
+
default_factory=datetime.datetime.utcnow,
|
41
|
+
description="Timestamp automatically updated on save",
|
42
|
+
example="2025-06-02T12:34:56Z",
|
43
|
+
)
|
44
|
+
|
45
|
+
class Presentation(BaseObjectOrm, BasePydanticModel):
|
46
|
+
id:Optional[int]=None
|
47
|
+
title: str = Field(..., max_length=255)
|
48
|
+
description: str = Field("", description="Free-form description of the deck")
|
49
|
+
slides:List[Slide]
|
50
|
+
|
51
|
+
# These come from the DB and are read-only in normal create/update requests
|
52
|
+
created_at: Optional[datetime.datetime] = None
|
53
|
+
updated_at: Optional[datetime.datetime] = None
|
54
|
+
|
55
|
+
|
56
|
+
class FileResource(BaseModel):
|
57
|
+
"""Base model for a resource that is a file."""
|
58
|
+
path: str = Field(..., min_length=1, description="The filesystem path to the resource.")
|
59
|
+
|
60
|
+
class ScriptResource(FileResource):
|
61
|
+
pass
|
62
|
+
|
63
|
+
class NotebookResource(FileResource):
|
64
|
+
pass
|
65
|
+
|
66
|
+
class AppResource(BaseModel):
|
67
|
+
"""An app to be used by a job."""
|
68
|
+
name: str = Field(..., min_length=1, description="The name of the app.")
|
69
|
+
configuration: Dict[str, Any] = Field(
|
70
|
+
default_factory=dict, description="Key-value configuration for the app configuration."
|
71
|
+
)
|
72
|
+
|
73
|
+
Resource = Union[
|
74
|
+
Dict[Literal["script"], ScriptResource],
|
75
|
+
Dict[Literal["notebook"], NotebookResource],
|
76
|
+
Dict[Literal["app"], AppResource],
|
77
|
+
]
|
78
|
+
|
79
|
+
class CrontabSchedule(BaseModel):
|
80
|
+
"""A schedule defined by a standard crontab expression."""
|
81
|
+
type: Literal["crontab"]
|
82
|
+
start_time: Optional[datetime.datetime] = None
|
83
|
+
expression: str = Field(..., min_length=1, description="A valid cron string, e.g., '0 5 * * 1-5'.")
|
84
|
+
|
85
|
+
class IntervalSchedule(BaseModel):
|
86
|
+
"""A schedule that repeats at a fixed interval."""
|
87
|
+
type: Literal["interval"]
|
88
|
+
start_time: Optional[datetime.datetime] = None
|
89
|
+
every: PositiveInt = Field(..., description="The frequency of the interval (must be > 0).")
|
90
|
+
period: Literal["seconds", "minutes", "hours", "days"]
|
91
|
+
|
92
|
+
Schedule = Union[CrontabSchedule, IntervalSchedule]
|
93
|
+
|
94
|
+
class Job(BaseObjectOrm, BasePydanticModel):
|
95
|
+
"""A single, named job with its resource and schedule."""
|
96
|
+
name: str = Field(..., min_length=1, description="A human-readable name for the job.")
|
97
|
+
resource: Resource
|
98
|
+
schedule: Optional[Schedule] = Field(default=None, description="The job's execution schedule.")
|
99
|
+
|
100
|
+
@classmethod
|
101
|
+
def create_from_configuration(cls, job_configuration):
|
102
|
+
url = cls.get_object_url() + f"/create_from_configuration/"
|
103
|
+
s = cls.build_session()
|
104
|
+
job_configuration["project_id"] = POD_PROJECT.id
|
105
|
+
r = make_request(s=s, loaders=cls.LOADERS, r_type="POST", url=url, payload={"json": job_configuration})
|
106
|
+
if r.status_code not in [200, 201]:
|
107
|
+
raise Exception(r.text)
|
108
|
+
return r.json()
|
109
|
+
|
110
|
+
class ProjectConfiguration(BaseModel):
|
111
|
+
"""The root model for the entire project configuration."""
|
112
|
+
name: str = Field(..., min_length=1, description="The name of the project.")
|
113
|
+
jobs: List[Job]
|