cosmotech-acceleration-library 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +8 -0
- cosmotech/coal/aws/__init__.py +23 -0
- cosmotech/coal/aws/s3.py +235 -0
- cosmotech/coal/azure/__init__.py +23 -0
- cosmotech/coal/azure/adx/__init__.py +26 -0
- cosmotech/coal/azure/adx/auth.py +125 -0
- cosmotech/coal/azure/adx/ingestion.py +329 -0
- cosmotech/coal/azure/adx/query.py +56 -0
- cosmotech/coal/azure/adx/runner.py +217 -0
- cosmotech/coal/azure/adx/store.py +255 -0
- cosmotech/coal/azure/adx/tables.py +118 -0
- cosmotech/coal/azure/adx/utils.py +71 -0
- cosmotech/coal/azure/blob.py +109 -0
- cosmotech/coal/azure/functions.py +72 -0
- cosmotech/coal/azure/storage.py +74 -0
- cosmotech/coal/cosmotech_api/__init__.py +36 -0
- cosmotech/coal/cosmotech_api/connection.py +96 -0
- cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
- cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
- cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
- cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
- cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
- cosmotech/coal/cosmotech_api/parameters.py +48 -0
- cosmotech/coal/cosmotech_api/run.py +25 -0
- cosmotech/coal/cosmotech_api/run_data.py +173 -0
- cosmotech/coal/cosmotech_api/run_template.py +108 -0
- cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
- cosmotech/coal/cosmotech_api/runner/data.py +38 -0
- cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
- cosmotech/coal/cosmotech_api/runner/download.py +146 -0
- cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
- cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
- cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
- cosmotech/coal/cosmotech_api/workspace.py +127 -0
- cosmotech/coal/csm/__init__.py +6 -0
- cosmotech/coal/csm/engine/__init__.py +47 -0
- cosmotech/coal/postgresql/__init__.py +22 -0
- cosmotech/coal/postgresql/runner.py +93 -0
- cosmotech/coal/postgresql/store.py +98 -0
- cosmotech/coal/singlestore/__init__.py +17 -0
- cosmotech/coal/singlestore/store.py +100 -0
- cosmotech/coal/store/__init__.py +42 -0
- cosmotech/coal/store/csv.py +44 -0
- cosmotech/coal/store/native_python.py +25 -0
- cosmotech/coal/store/pandas.py +26 -0
- cosmotech/coal/store/pyarrow.py +23 -0
- cosmotech/coal/store/store.py +79 -0
- cosmotech/coal/utils/__init__.py +18 -0
- cosmotech/coal/utils/api.py +68 -0
- cosmotech/coal/utils/logger.py +10 -0
- cosmotech/coal/utils/postgresql.py +236 -0
- cosmotech/csm_data/__init__.py +6 -0
- cosmotech/csm_data/commands/__init__.py +6 -0
- cosmotech/csm_data/commands/adx_send_data.py +92 -0
- cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
- cosmotech/csm_data/commands/api/__init__.py +6 -0
- cosmotech/csm_data/commands/api/api.py +50 -0
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
- cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
- cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
- cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
- cosmotech/csm_data/commands/api/run_load_data.py +120 -0
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
- cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
- cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
- cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
- cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
- cosmotech/csm_data/commands/az_storage_upload.py +76 -0
- cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
- cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
- cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
- cosmotech/csm_data/commands/store/__init__.py +6 -0
- cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
- cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
- cosmotech/csm_data/commands/store/list_tables.py +48 -0
- cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
- cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
- cosmotech/csm_data/commands/store/reset.py +31 -0
- cosmotech/csm_data/commands/store/store.py +37 -0
- cosmotech/csm_data/main.py +57 -0
- cosmotech/csm_data/utils/__init__.py +6 -0
- cosmotech/csm_data/utils/click.py +18 -0
- cosmotech/csm_data/utils/decorators.py +75 -0
- cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
- cosmotech/translation/coal/__init__.py +6 -0
- cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
- cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
- cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
- cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
- cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
- cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
- cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
- cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
- cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
- cosmotech/translation/coal/en-US/coal/web.yml +2 -0
- cosmotech/translation/csm_data/__init__.py +6 -0
- cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
- cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
- cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
- cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
- cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
- cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
- cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
10
|
+
import uuid
|
|
11
|
+
from typing import Optional, List, Dict, Tuple, Union, Any
|
|
12
|
+
|
|
13
|
+
import pyarrow
|
|
14
|
+
import pyarrow.csv as pc
|
|
15
|
+
import time
|
|
16
|
+
from azure.kusto.data import KustoClient
|
|
17
|
+
from azure.kusto.data.data_format import DataFormat
|
|
18
|
+
from azure.kusto.ingest import IngestionProperties
|
|
19
|
+
from azure.kusto.ingest import QueuedIngestClient
|
|
20
|
+
from azure.kusto.ingest import ReportLevel
|
|
21
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
22
|
+
from time import perf_counter
|
|
23
|
+
|
|
24
|
+
from cosmotech.coal.azure.adx.tables import check_and_create_table, _drop_by_tag
|
|
25
|
+
from cosmotech.coal.azure.adx.auth import initialize_clients
|
|
26
|
+
from cosmotech.coal.azure.adx.ingestion import monitor_ingestion, handle_failures
|
|
27
|
+
from cosmotech.coal.store.store import Store
|
|
28
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
29
|
+
from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def send_table_data(
|
|
33
|
+
ingest_client: QueuedIngestClient, database: str, table_name: str, data: pyarrow.Table, operation_tag: str
|
|
34
|
+
) -> Tuple[str, str]:
|
|
35
|
+
"""
|
|
36
|
+
Send a PyArrow table to ADX.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
ingest_client: The ingest client
|
|
40
|
+
database: The database name
|
|
41
|
+
table_name: The table name
|
|
42
|
+
data: The PyArrow table data
|
|
43
|
+
operation_tag: The operation tag for tracking
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
tuple: (source_id, table_name)
|
|
47
|
+
"""
|
|
48
|
+
LOGGER.debug(T("coal.services.adx.sending_data").format(table_name=table_name))
|
|
49
|
+
result = send_pyarrow_table_to_adx(ingest_client, database, table_name, data, operation_tag)
|
|
50
|
+
return result.source_id, table_name
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def process_tables(
|
|
54
|
+
store: Store, kusto_client: KustoClient, ingest_client: QueuedIngestClient, database: str, operation_tag: str
|
|
55
|
+
) -> Tuple[List[str], Dict[str, str]]:
|
|
56
|
+
"""
|
|
57
|
+
Process all tables in the store.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
store: The data store
|
|
61
|
+
kusto_client: The Kusto client
|
|
62
|
+
ingest_client: The ingest client
|
|
63
|
+
database: The database name
|
|
64
|
+
operation_tag: The operation tag for tracking
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
tuple: (source_ids, table_ingestion_id_mapping)
|
|
68
|
+
"""
|
|
69
|
+
source_ids = []
|
|
70
|
+
table_ingestion_id_mapping = dict()
|
|
71
|
+
|
|
72
|
+
LOGGER.debug(T("coal.services.adx.listing_tables"))
|
|
73
|
+
table_list = list(store.list_tables())
|
|
74
|
+
|
|
75
|
+
for target_table_name in table_list:
|
|
76
|
+
LOGGER.info(T("coal.services.adx.working_on_table").format(table_name=target_table_name))
|
|
77
|
+
data = store.get_table(target_table_name)
|
|
78
|
+
|
|
79
|
+
if data.num_rows < 1:
|
|
80
|
+
LOGGER.warning(T("coal.services.adx.table_empty").format(table_name=target_table_name))
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
check_and_create_table(kusto_client, database, target_table_name, data)
|
|
84
|
+
|
|
85
|
+
source_id, _ = send_table_data(ingest_client, database, target_table_name, data, operation_tag)
|
|
86
|
+
source_ids.append(source_id)
|
|
87
|
+
table_ingestion_id_mapping[source_id] = target_table_name
|
|
88
|
+
|
|
89
|
+
return source_ids, table_ingestion_id_mapping
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def send_pyarrow_table_to_adx(
|
|
93
|
+
client: QueuedIngestClient,
|
|
94
|
+
database: str,
|
|
95
|
+
table_name: str,
|
|
96
|
+
table_data: pyarrow.Table,
|
|
97
|
+
drop_by_tag: Optional[str] = None,
|
|
98
|
+
):
|
|
99
|
+
drop_by_tags = [drop_by_tag] if (drop_by_tag is not None) else None
|
|
100
|
+
|
|
101
|
+
properties = IngestionProperties(
|
|
102
|
+
database=database,
|
|
103
|
+
table=table_name,
|
|
104
|
+
data_format=DataFormat.CSV,
|
|
105
|
+
drop_by_tags=drop_by_tags,
|
|
106
|
+
report_level=ReportLevel.FailuresAndSuccesses,
|
|
107
|
+
flush_immediately=True,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
file_name = f"adx_{database}_{table_name}_{int(time.time())}_{uuid.uuid4()}.csv"
|
|
111
|
+
temp_file_path = os.path.join(os.environ.get("CSM_TEMP_ABSOLUTE_PATH", tempfile.gettempdir()), file_name)
|
|
112
|
+
pc.write_csv(table_data, temp_file_path, pc.WriteOptions(include_header=False))
|
|
113
|
+
try:
|
|
114
|
+
return client.ingest_from_file(temp_file_path, properties)
|
|
115
|
+
finally:
|
|
116
|
+
os.unlink(temp_file_path)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def send_store_to_adx(
|
|
120
|
+
adx_uri: str,
|
|
121
|
+
adx_ingest_uri: str,
|
|
122
|
+
database_name: str,
|
|
123
|
+
wait: bool = False,
|
|
124
|
+
tag: Optional[str] = None,
|
|
125
|
+
store_location: Optional[str] = None,
|
|
126
|
+
) -> Union[bool, Any]:
|
|
127
|
+
"""
|
|
128
|
+
Send data from the store to Azure Data Explorer.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
adx_uri: The Azure Data Explorer resource URI
|
|
132
|
+
adx_ingest_uri: The Azure Data Explorer resource ingest URI
|
|
133
|
+
database_name: The database name
|
|
134
|
+
wait: Whether to wait for ingestion to complete
|
|
135
|
+
tag: The operation tag for tracking (will generate a unique one if not provided)
|
|
136
|
+
store_location: Optional store location (uses default if not provided)
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
bool: True if successful, False otherwise
|
|
140
|
+
"""
|
|
141
|
+
# Generate a unique operation tag if none provided
|
|
142
|
+
operation_tag = tag or f"op-{str(uuid.uuid4())}"
|
|
143
|
+
LOGGER.debug(T("coal.services.adx.starting_ingestion").format(operation_tag=operation_tag))
|
|
144
|
+
|
|
145
|
+
# Initialize clients
|
|
146
|
+
kusto_client, ingest_client = initialize_clients(adx_uri, adx_ingest_uri)
|
|
147
|
+
database = database_name
|
|
148
|
+
|
|
149
|
+
# Load datastore
|
|
150
|
+
LOGGER.debug(T("coal.services.adx.loading_datastore"))
|
|
151
|
+
store = Store(store_location=store_location)
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
# Process tables
|
|
155
|
+
source_ids, table_ingestion_id_mapping = process_tables(
|
|
156
|
+
store, kusto_client, ingest_client, database, operation_tag
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
LOGGER.info(T("coal.services.adx.data_sent"))
|
|
160
|
+
|
|
161
|
+
# Monitor ingestion if wait is True
|
|
162
|
+
has_failures = False
|
|
163
|
+
if wait and source_ids:
|
|
164
|
+
has_failures = monitor_ingestion(ingest_client, source_ids, table_ingestion_id_mapping)
|
|
165
|
+
|
|
166
|
+
# Handle failures
|
|
167
|
+
should_abort = handle_failures(kusto_client, database, operation_tag, has_failures)
|
|
168
|
+
if should_abort:
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
LOGGER.exception(T("coal.services.adx.ingestion_error"))
|
|
175
|
+
# Perform rollback using the tag
|
|
176
|
+
LOGGER.warning(T("coal.services.adx.dropping_data").format(operation_tag=operation_tag))
|
|
177
|
+
_drop_by_tag(kusto_client, database, operation_tag)
|
|
178
|
+
raise e
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def dump_store_to_adx(
|
|
182
|
+
store_folder: str,
|
|
183
|
+
postgres_host: str,
|
|
184
|
+
postgres_port: int,
|
|
185
|
+
postgres_db: str,
|
|
186
|
+
postgres_schema: str,
|
|
187
|
+
postgres_user: str,
|
|
188
|
+
postgres_password: str,
|
|
189
|
+
table_prefix: str = "Cosmotech_",
|
|
190
|
+
replace: bool = True,
|
|
191
|
+
) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Dump Store data to an Azure Data Explorer database.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
store_folder: Folder containing the Store
|
|
197
|
+
postgres_host: PostgreSQL host
|
|
198
|
+
postgres_port: PostgreSQL port
|
|
199
|
+
postgres_db: PostgreSQL database name
|
|
200
|
+
postgres_schema: PostgreSQL schema
|
|
201
|
+
postgres_user: PostgreSQL username
|
|
202
|
+
postgres_password: PostgreSQL password
|
|
203
|
+
table_prefix: Table prefix
|
|
204
|
+
replace: Whether to replace existing tables
|
|
205
|
+
"""
|
|
206
|
+
_s = Store(store_location=store_folder)
|
|
207
|
+
|
|
208
|
+
tables = list(_s.list_tables())
|
|
209
|
+
if len(tables):
|
|
210
|
+
LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
|
|
211
|
+
total_rows = 0
|
|
212
|
+
_process_start = perf_counter()
|
|
213
|
+
for table_name in tables:
|
|
214
|
+
_s_time = perf_counter()
|
|
215
|
+
target_table_name = f"{table_prefix}{table_name}"
|
|
216
|
+
LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
|
|
217
|
+
data = _s.get_table(table_name)
|
|
218
|
+
if not len(data):
|
|
219
|
+
LOGGER.info(T("coal.services.database.no_rows"))
|
|
220
|
+
continue
|
|
221
|
+
_dl_time = perf_counter()
|
|
222
|
+
rows = send_pyarrow_table_to_postgresql(
|
|
223
|
+
data,
|
|
224
|
+
target_table_name,
|
|
225
|
+
postgres_host,
|
|
226
|
+
postgres_port,
|
|
227
|
+
postgres_db,
|
|
228
|
+
postgres_schema,
|
|
229
|
+
postgres_user,
|
|
230
|
+
postgres_password,
|
|
231
|
+
replace,
|
|
232
|
+
)
|
|
233
|
+
total_rows += rows
|
|
234
|
+
_up_time = perf_counter()
|
|
235
|
+
LOGGER.info(T("coal.services.database.row_count").format(count=rows))
|
|
236
|
+
LOGGER.debug(
|
|
237
|
+
T("coal.common.timing.operation_completed").format(
|
|
238
|
+
operation="Load from datastore", time=f"{_dl_time - _s_time:0.3}"
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
LOGGER.debug(
|
|
242
|
+
T("coal.common.timing.operation_completed").format(
|
|
243
|
+
operation="Send to postgresql", time=f"{_up_time - _dl_time:0.3}"
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
_process_end = perf_counter()
|
|
247
|
+
LOGGER.info(
|
|
248
|
+
T("coal.services.database.rows_fetched").format(
|
|
249
|
+
table="all tables",
|
|
250
|
+
count=total_rows,
|
|
251
|
+
time=f"{_process_end - _process_start:0.3}",
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
LOGGER.info(T("coal.services.database.store_empty"))
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any
|
|
9
|
+
|
|
10
|
+
import pyarrow
|
|
11
|
+
from azure.kusto.data import KustoClient
|
|
12
|
+
|
|
13
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
14
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def table_exists(client: KustoClient, database: str, table_name: str) -> bool:
|
|
18
|
+
"""
|
|
19
|
+
Check if a table exists in the database.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
client: The KustoClient to use
|
|
23
|
+
database: The name of the database
|
|
24
|
+
table_name: The name of the table to check
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
bool: True if the table exists, False otherwise
|
|
28
|
+
"""
|
|
29
|
+
LOGGER.debug(T("coal.services.adx.checking_table").format(database=database, table_name=table_name))
|
|
30
|
+
|
|
31
|
+
get_tables_query = f".show database ['{database}'] schema| distinct TableName"
|
|
32
|
+
tables = client.execute(database, get_tables_query)
|
|
33
|
+
|
|
34
|
+
for r in tables.primary_results[0]:
|
|
35
|
+
if table_name == r[0]:
|
|
36
|
+
LOGGER.debug(T("coal.services.adx.table_exists").format(table_name=table_name))
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
LOGGER.debug(T("coal.services.adx.table_not_exists").format(table_name=table_name))
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_and_create_table(kusto_client: KustoClient, database: str, table_name: str, data: pyarrow.Table) -> bool:
|
|
44
|
+
"""
|
|
45
|
+
Check if a table exists and create it if it doesn't.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
kusto_client: The Kusto client
|
|
49
|
+
database: The database name
|
|
50
|
+
table_name: The table name
|
|
51
|
+
data: The PyArrow table data
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
bool: True if the table was created, False if it already existed
|
|
55
|
+
"""
|
|
56
|
+
LOGGER.debug(T("coal.services.adx.checking_table_exists"))
|
|
57
|
+
if not table_exists(kusto_client, database, table_name):
|
|
58
|
+
from cosmotech.coal.azure.adx.utils import create_column_mapping
|
|
59
|
+
|
|
60
|
+
mapping = create_column_mapping(data)
|
|
61
|
+
LOGGER.debug(T("coal.services.adx.creating_nonexistent_table"))
|
|
62
|
+
create_table(kusto_client, database, table_name, mapping)
|
|
63
|
+
return True
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _drop_by_tag(kusto_client: KustoClient, database: str, tag: str) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Drop all data with the specified tag.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
kusto_client: The Kusto client
|
|
73
|
+
database: The database name
|
|
74
|
+
tag: The tag to drop data by
|
|
75
|
+
"""
|
|
76
|
+
LOGGER.info(T("coal.services.adx.dropping_data_by_tag").format(tag=tag))
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Execute the drop by tag command
|
|
80
|
+
drop_command = f'.drop extents <| .show database extents where tags has "drop-by:{tag}"'
|
|
81
|
+
kusto_client.execute_mgmt(database, drop_command)
|
|
82
|
+
LOGGER.info(T("coal.services.adx.drop_completed"))
|
|
83
|
+
except Exception as e:
|
|
84
|
+
LOGGER.error(T("coal.services.adx.drop_error").format(error=str(e)))
|
|
85
|
+
LOGGER.exception(T("coal.services.adx.drop_details"))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def create_table(client: KustoClient, database: str, table_name: str, schema: Dict[str, str]) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Create a table in the database.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
client: The KustoClient to use
|
|
94
|
+
database: The name of the database
|
|
95
|
+
table_name: The name of the table to create
|
|
96
|
+
schema: Dictionary mapping column names to ADX types
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
bool: True if the table was created successfully, False otherwise
|
|
100
|
+
"""
|
|
101
|
+
LOGGER.debug(T("coal.services.adx.creating_table").format(database=database, table_name=table_name))
|
|
102
|
+
|
|
103
|
+
create_query = f".create-merge table {table_name}("
|
|
104
|
+
|
|
105
|
+
for column_name, column_type in schema.items():
|
|
106
|
+
create_query += f"{column_name}:{column_type},"
|
|
107
|
+
|
|
108
|
+
create_query = create_query[:-1] + ")"
|
|
109
|
+
|
|
110
|
+
LOGGER.debug(T("coal.services.adx.create_query").format(query=create_query))
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
client.execute(database, create_query)
|
|
114
|
+
LOGGER.info(T("coal.services.adx.table_created").format(table_name=table_name))
|
|
115
|
+
return True
|
|
116
|
+
except Exception as e:
|
|
117
|
+
LOGGER.error(T("coal.services.adx.table_creation_error").format(table_name=table_name, error=str(e)))
|
|
118
|
+
return False
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import dateutil.parser
|
|
9
|
+
from typing import Any, Dict
|
|
10
|
+
|
|
11
|
+
import pyarrow
|
|
12
|
+
|
|
13
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
14
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_column_mapping(data: pyarrow.Table) -> Dict[str, str]:
|
|
18
|
+
"""
|
|
19
|
+
Create a column mapping for a PyArrow table.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
data: The PyArrow table data
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
dict: A mapping of column names to their ADX types
|
|
26
|
+
"""
|
|
27
|
+
mapping = dict()
|
|
28
|
+
for column_name in data.column_names:
|
|
29
|
+
column = data.column(column_name)
|
|
30
|
+
try:
|
|
31
|
+
ex = next(v for v in column.to_pylist() if v is not None)
|
|
32
|
+
except StopIteration:
|
|
33
|
+
LOGGER.error(T("coal.services.adx.empty_column").format(column_name=column_name))
|
|
34
|
+
mapping[column_name] = type_mapping(column_name, "string")
|
|
35
|
+
continue
|
|
36
|
+
else:
|
|
37
|
+
mapping[column_name] = type_mapping(column_name, ex)
|
|
38
|
+
return mapping
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def type_mapping(key: str, key_example_value: Any) -> str:
|
|
42
|
+
"""
|
|
43
|
+
Map Python types to ADX types.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
key: The name of the key
|
|
47
|
+
key_example_value: A possible value of the key
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: The name of the type used in ADX
|
|
51
|
+
"""
|
|
52
|
+
LOGGER.debug(T("coal.services.adx.mapping_type").format(key=key, value_type=type(key_example_value).__name__))
|
|
53
|
+
|
|
54
|
+
if key == "SimulationRun":
|
|
55
|
+
return "guid"
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Use dateutil parser to test if the value could be a date, in case of error it is not
|
|
59
|
+
dateutil.parser.parse(key_example_value, fuzzy=False)
|
|
60
|
+
return "datetime"
|
|
61
|
+
except (ValueError, TypeError):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
if isinstance(key_example_value, float):
|
|
65
|
+
return "real"
|
|
66
|
+
|
|
67
|
+
if isinstance(key_example_value, int):
|
|
68
|
+
return "long"
|
|
69
|
+
|
|
70
|
+
# Default case to string
|
|
71
|
+
return "string"
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Azure Blob Storage operations module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with Azure Blob Storage,
|
|
12
|
+
including uploading data from the Store.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import pathlib
|
|
16
|
+
from io import BytesIO
|
|
17
|
+
from typing import List, Optional
|
|
18
|
+
|
|
19
|
+
from azure.identity import ClientSecretCredential
|
|
20
|
+
from azure.storage.blob import BlobServiceClient
|
|
21
|
+
|
|
22
|
+
import pyarrow.csv as pc
|
|
23
|
+
import pyarrow.parquet as pq
|
|
24
|
+
|
|
25
|
+
from cosmotech.coal.store.store import Store
|
|
26
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
27
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
28
|
+
|
|
29
|
+
VALID_TYPES = (
|
|
30
|
+
"sqlite",
|
|
31
|
+
"csv",
|
|
32
|
+
"parquet",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def dump_store_to_azure(
|
|
37
|
+
store_folder: str,
|
|
38
|
+
account_name: str,
|
|
39
|
+
container_name: str,
|
|
40
|
+
tenant_id: str,
|
|
41
|
+
client_id: str,
|
|
42
|
+
client_secret: str,
|
|
43
|
+
output_type: str = "sqlite",
|
|
44
|
+
file_prefix: str = "",
|
|
45
|
+
) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Dump Store data to Azure Blob Storage.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
store_folder: Folder containing the Store
|
|
51
|
+
account_name: Azure Storage account name
|
|
52
|
+
container_name: Azure Storage container name
|
|
53
|
+
tenant_id: Azure tenant ID
|
|
54
|
+
client_id: Azure client ID
|
|
55
|
+
client_secret: Azure client secret
|
|
56
|
+
output_type: Output file type (sqlite, csv, or parquet)
|
|
57
|
+
file_prefix: Prefix for uploaded files
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If the output type is invalid
|
|
61
|
+
"""
|
|
62
|
+
_s = Store(store_location=store_folder)
|
|
63
|
+
|
|
64
|
+
if output_type not in VALID_TYPES:
|
|
65
|
+
LOGGER.error(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
|
|
66
|
+
raise ValueError(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
|
|
67
|
+
|
|
68
|
+
container_client = BlobServiceClient(
|
|
69
|
+
account_url=f"https://{account_name}.blob.core.windows.net/",
|
|
70
|
+
credential=ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret),
|
|
71
|
+
).get_container_client(container_name)
|
|
72
|
+
|
|
73
|
+
def data_upload(data_stream: BytesIO, file_name: str):
|
|
74
|
+
uploaded_file_name = file_prefix + file_name
|
|
75
|
+
data_stream.seek(0)
|
|
76
|
+
size = len(data_stream.read())
|
|
77
|
+
data_stream.seek(0)
|
|
78
|
+
|
|
79
|
+
LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
|
|
80
|
+
container_client.upload_blob(name=uploaded_file_name, data=data_stream, length=size, overwrite=True)
|
|
81
|
+
|
|
82
|
+
if output_type == "sqlite":
|
|
83
|
+
_file_path = _s._database_path
|
|
84
|
+
_file_name = "db.sqlite"
|
|
85
|
+
_uploaded_file_name = file_prefix + _file_name
|
|
86
|
+
LOGGER.info(
|
|
87
|
+
T("coal.common.data_transfer.file_sent").format(file_path=_file_path, uploaded_name=_uploaded_file_name)
|
|
88
|
+
)
|
|
89
|
+
with open(_file_path, "rb") as data:
|
|
90
|
+
container_client.upload_blob(name=_uploaded_file_name, data=data, overwrite=True)
|
|
91
|
+
else:
|
|
92
|
+
tables = list(_s.list_tables())
|
|
93
|
+
for table_name in tables:
|
|
94
|
+
_data_stream = BytesIO()
|
|
95
|
+
_file_name = None
|
|
96
|
+
_data = _s.get_table(table_name)
|
|
97
|
+
if not len(_data):
|
|
98
|
+
LOGGER.info(T("coal.common.data_transfer.table_empty").format(table_name=table_name))
|
|
99
|
+
continue
|
|
100
|
+
if output_type == "csv":
|
|
101
|
+
_file_name = table_name + ".csv"
|
|
102
|
+
pc.write_csv(_data, _data_stream)
|
|
103
|
+
elif output_type == "parquet":
|
|
104
|
+
_file_name = table_name + ".parquet"
|
|
105
|
+
pq.write_table(_data, _data_stream)
|
|
106
|
+
LOGGER.info(
|
|
107
|
+
T("coal.common.data_transfer.sending_table").format(table_name=table_name, output_type=output_type)
|
|
108
|
+
)
|
|
109
|
+
data_upload(_data_stream, _file_name)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
import azure.functions as func
|
|
8
|
+
from cosmotech.coal.cosmotech_api.runner.download import download_runner_data
|
|
9
|
+
from cosmotech_api.api.runner_api import RunnerApi
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import http
|
|
13
|
+
import traceback
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def generate_main(apply_update, parallel=True):
|
|
17
|
+
def main(req: func.HttpRequest) -> func.HttpResponse:
|
|
18
|
+
try:
|
|
19
|
+
runner_id = req.params.get("scenario-id") # Keep parameter name for backward compatibility
|
|
20
|
+
organization_id = req.params.get("organization-id")
|
|
21
|
+
workspace_id = req.params.get("workspace-id")
|
|
22
|
+
access_token: str = req.headers.get("authorization", None)
|
|
23
|
+
if access_token:
|
|
24
|
+
access_token = access_token.split(" ")[1]
|
|
25
|
+
|
|
26
|
+
if runner_id is None or organization_id is None or workspace_id is None:
|
|
27
|
+
return func.HttpResponse(
|
|
28
|
+
body=f"Invalid request: organization-id={organization_id}, workspace-id={workspace_id}, scenario-id={runner_id}",
|
|
29
|
+
status_code=http.HTTPStatus.BAD_REQUEST,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Get runner data
|
|
33
|
+
result = download_runner_data(
|
|
34
|
+
organization_id=organization_id,
|
|
35
|
+
workspace_id=workspace_id,
|
|
36
|
+
runner_id=runner_id,
|
|
37
|
+
parameter_folder=None, # We don't need to save to files
|
|
38
|
+
read_files=True,
|
|
39
|
+
parallel=parallel,
|
|
40
|
+
write_json=False,
|
|
41
|
+
write_csv=False,
|
|
42
|
+
fetch_dataset=True,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
content = {
|
|
46
|
+
"datasets": result["datasets"],
|
|
47
|
+
"parameters": result["parameters"],
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
runner_data = result["runner_data"]
|
|
51
|
+
|
|
52
|
+
updated_content = apply_update(
|
|
53
|
+
content=content, scenario_data=runner_data
|
|
54
|
+
) # Keep parameter name for backward compatibility
|
|
55
|
+
|
|
56
|
+
return func.HttpResponse(
|
|
57
|
+
body=json.dumps(updated_content),
|
|
58
|
+
headers={"Content-Type": "application/json"},
|
|
59
|
+
)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
response = {
|
|
62
|
+
"error": getattr(e, "message", str(e)),
|
|
63
|
+
"type": type(e).__name__,
|
|
64
|
+
"trace": traceback.format_exc(),
|
|
65
|
+
}
|
|
66
|
+
return func.HttpResponse(
|
|
67
|
+
status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR,
|
|
68
|
+
body=json.dumps(response),
|
|
69
|
+
headers={"Content-Type": "application/json"},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return main
|