cosmotech-acceleration-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. cosmotech/coal/__init__.py +8 -0
  2. cosmotech/coal/aws/__init__.py +23 -0
  3. cosmotech/coal/aws/s3.py +235 -0
  4. cosmotech/coal/azure/__init__.py +23 -0
  5. cosmotech/coal/azure/adx/__init__.py +26 -0
  6. cosmotech/coal/azure/adx/auth.py +125 -0
  7. cosmotech/coal/azure/adx/ingestion.py +329 -0
  8. cosmotech/coal/azure/adx/query.py +56 -0
  9. cosmotech/coal/azure/adx/runner.py +217 -0
  10. cosmotech/coal/azure/adx/store.py +255 -0
  11. cosmotech/coal/azure/adx/tables.py +118 -0
  12. cosmotech/coal/azure/adx/utils.py +71 -0
  13. cosmotech/coal/azure/blob.py +109 -0
  14. cosmotech/coal/azure/functions.py +72 -0
  15. cosmotech/coal/azure/storage.py +74 -0
  16. cosmotech/coal/cosmotech_api/__init__.py +36 -0
  17. cosmotech/coal/cosmotech_api/connection.py +96 -0
  18. cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
  19. cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
  20. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
  21. cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
  22. cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
  23. cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
  24. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
  25. cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
  26. cosmotech/coal/cosmotech_api/parameters.py +48 -0
  27. cosmotech/coal/cosmotech_api/run.py +25 -0
  28. cosmotech/coal/cosmotech_api/run_data.py +173 -0
  29. cosmotech/coal/cosmotech_api/run_template.py +108 -0
  30. cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
  31. cosmotech/coal/cosmotech_api/runner/data.py +38 -0
  32. cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
  33. cosmotech/coal/cosmotech_api/runner/download.py +146 -0
  34. cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
  35. cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
  36. cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
  37. cosmotech/coal/cosmotech_api/workspace.py +127 -0
  38. cosmotech/coal/csm/__init__.py +6 -0
  39. cosmotech/coal/csm/engine/__init__.py +47 -0
  40. cosmotech/coal/postgresql/__init__.py +22 -0
  41. cosmotech/coal/postgresql/runner.py +93 -0
  42. cosmotech/coal/postgresql/store.py +98 -0
  43. cosmotech/coal/singlestore/__init__.py +17 -0
  44. cosmotech/coal/singlestore/store.py +100 -0
  45. cosmotech/coal/store/__init__.py +42 -0
  46. cosmotech/coal/store/csv.py +44 -0
  47. cosmotech/coal/store/native_python.py +25 -0
  48. cosmotech/coal/store/pandas.py +26 -0
  49. cosmotech/coal/store/pyarrow.py +23 -0
  50. cosmotech/coal/store/store.py +79 -0
  51. cosmotech/coal/utils/__init__.py +18 -0
  52. cosmotech/coal/utils/api.py +68 -0
  53. cosmotech/coal/utils/logger.py +10 -0
  54. cosmotech/coal/utils/postgresql.py +236 -0
  55. cosmotech/csm_data/__init__.py +6 -0
  56. cosmotech/csm_data/commands/__init__.py +6 -0
  57. cosmotech/csm_data/commands/adx_send_data.py +92 -0
  58. cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
  59. cosmotech/csm_data/commands/api/__init__.py +6 -0
  60. cosmotech/csm_data/commands/api/api.py +50 -0
  61. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
  62. cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
  63. cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
  64. cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
  65. cosmotech/csm_data/commands/api/run_load_data.py +120 -0
  66. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
  67. cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
  68. cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
  69. cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
  70. cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
  71. cosmotech/csm_data/commands/az_storage_upload.py +76 -0
  72. cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
  73. cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
  74. cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
  75. cosmotech/csm_data/commands/store/__init__.py +6 -0
  76. cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
  77. cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
  78. cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
  79. cosmotech/csm_data/commands/store/list_tables.py +48 -0
  80. cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
  81. cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
  82. cosmotech/csm_data/commands/store/reset.py +31 -0
  83. cosmotech/csm_data/commands/store/store.py +37 -0
  84. cosmotech/csm_data/main.py +57 -0
  85. cosmotech/csm_data/utils/__init__.py +6 -0
  86. cosmotech/csm_data/utils/click.py +18 -0
  87. cosmotech/csm_data/utils/decorators.py +75 -0
  88. cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
  89. cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
  90. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
  91. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
  92. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
  93. cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
  94. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
  95. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
  96. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
  97. cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
  98. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
  99. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
  100. cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
  101. cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
  102. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
  103. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
  104. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
  105. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
  106. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
  107. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
  108. cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
  109. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
  110. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
  111. cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
  112. cosmotech/translation/coal/__init__.py +6 -0
  113. cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
  114. cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
  115. cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
  116. cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
  117. cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
  118. cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
  119. cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
  120. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
  121. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
  122. cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
  123. cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
  124. cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
  125. cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
  126. cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
  127. cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
  128. cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
  129. cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
  130. cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
  131. cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
  132. cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
  133. cosmotech/translation/coal/en-US/coal/web.yml +2 -0
  134. cosmotech/translation/csm_data/__init__.py +6 -0
  135. cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
  136. cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
  137. cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
  138. cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
  139. cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
  140. cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
  141. cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,255 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import os
9
+ import tempfile
10
+ import uuid
11
+ from typing import Optional, List, Dict, Tuple, Union, Any
12
+
13
+ import pyarrow
14
+ import pyarrow.csv as pc
15
+ import time
16
+ from azure.kusto.data import KustoClient
17
+ from azure.kusto.data.data_format import DataFormat
18
+ from azure.kusto.ingest import IngestionProperties
19
+ from azure.kusto.ingest import QueuedIngestClient
20
+ from azure.kusto.ingest import ReportLevel
21
+ from cosmotech.orchestrator.utils.translate import T
22
+ from time import perf_counter
23
+
24
+ from cosmotech.coal.azure.adx.tables import check_and_create_table, _drop_by_tag
25
+ from cosmotech.coal.azure.adx.auth import initialize_clients
26
+ from cosmotech.coal.azure.adx.ingestion import monitor_ingestion, handle_failures
27
+ from cosmotech.coal.store.store import Store
28
+ from cosmotech.coal.utils.logger import LOGGER
29
+ from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
30
+
31
+
32
+ def send_table_data(
33
+ ingest_client: QueuedIngestClient, database: str, table_name: str, data: pyarrow.Table, operation_tag: str
34
+ ) -> Tuple[str, str]:
35
+ """
36
+ Send a PyArrow table to ADX.
37
+
38
+ Args:
39
+ ingest_client: The ingest client
40
+ database: The database name
41
+ table_name: The table name
42
+ data: The PyArrow table data
43
+ operation_tag: The operation tag for tracking
44
+
45
+ Returns:
46
+ tuple: (source_id, table_name)
47
+ """
48
+ LOGGER.debug(T("coal.services.adx.sending_data").format(table_name=table_name))
49
+ result = send_pyarrow_table_to_adx(ingest_client, database, table_name, data, operation_tag)
50
+ return result.source_id, table_name
51
+
52
+
53
+ def process_tables(
54
+ store: Store, kusto_client: KustoClient, ingest_client: QueuedIngestClient, database: str, operation_tag: str
55
+ ) -> Tuple[List[str], Dict[str, str]]:
56
+ """
57
+ Process all tables in the store.
58
+
59
+ Args:
60
+ store: The data store
61
+ kusto_client: The Kusto client
62
+ ingest_client: The ingest client
63
+ database: The database name
64
+ operation_tag: The operation tag for tracking
65
+
66
+ Returns:
67
+ tuple: (source_ids, table_ingestion_id_mapping)
68
+ """
69
+ source_ids = []
70
+ table_ingestion_id_mapping = dict()
71
+
72
+ LOGGER.debug(T("coal.services.adx.listing_tables"))
73
+ table_list = list(store.list_tables())
74
+
75
+ for target_table_name in table_list:
76
+ LOGGER.info(T("coal.services.adx.working_on_table").format(table_name=target_table_name))
77
+ data = store.get_table(target_table_name)
78
+
79
+ if data.num_rows < 1:
80
+ LOGGER.warning(T("coal.services.adx.table_empty").format(table_name=target_table_name))
81
+ continue
82
+
83
+ check_and_create_table(kusto_client, database, target_table_name, data)
84
+
85
+ source_id, _ = send_table_data(ingest_client, database, target_table_name, data, operation_tag)
86
+ source_ids.append(source_id)
87
+ table_ingestion_id_mapping[source_id] = target_table_name
88
+
89
+ return source_ids, table_ingestion_id_mapping
90
+
91
+
92
+ def send_pyarrow_table_to_adx(
93
+ client: QueuedIngestClient,
94
+ database: str,
95
+ table_name: str,
96
+ table_data: pyarrow.Table,
97
+ drop_by_tag: Optional[str] = None,
98
+ ):
99
+ drop_by_tags = [drop_by_tag] if (drop_by_tag is not None) else None
100
+
101
+ properties = IngestionProperties(
102
+ database=database,
103
+ table=table_name,
104
+ data_format=DataFormat.CSV,
105
+ drop_by_tags=drop_by_tags,
106
+ report_level=ReportLevel.FailuresAndSuccesses,
107
+ flush_immediately=True,
108
+ )
109
+
110
+ file_name = f"adx_{database}_{table_name}_{int(time.time())}_{uuid.uuid4()}.csv"
111
+ temp_file_path = os.path.join(os.environ.get("CSM_TEMP_ABSOLUTE_PATH", tempfile.gettempdir()), file_name)
112
+ pc.write_csv(table_data, temp_file_path, pc.WriteOptions(include_header=False))
113
+ try:
114
+ return client.ingest_from_file(temp_file_path, properties)
115
+ finally:
116
+ os.unlink(temp_file_path)
117
+
118
+
119
+ def send_store_to_adx(
120
+ adx_uri: str,
121
+ adx_ingest_uri: str,
122
+ database_name: str,
123
+ wait: bool = False,
124
+ tag: Optional[str] = None,
125
+ store_location: Optional[str] = None,
126
+ ) -> Union[bool, Any]:
127
+ """
128
+ Send data from the store to Azure Data Explorer.
129
+
130
+ Args:
131
+ adx_uri: The Azure Data Explorer resource URI
132
+ adx_ingest_uri: The Azure Data Explorer resource ingest URI
133
+ database_name: The database name
134
+ wait: Whether to wait for ingestion to complete
135
+ tag: The operation tag for tracking (will generate a unique one if not provided)
136
+ store_location: Optional store location (uses default if not provided)
137
+
138
+ Returns:
139
+ bool: True if successful, False otherwise
140
+ """
141
+ # Generate a unique operation tag if none provided
142
+ operation_tag = tag or f"op-{str(uuid.uuid4())}"
143
+ LOGGER.debug(T("coal.services.adx.starting_ingestion").format(operation_tag=operation_tag))
144
+
145
+ # Initialize clients
146
+ kusto_client, ingest_client = initialize_clients(adx_uri, adx_ingest_uri)
147
+ database = database_name
148
+
149
+ # Load datastore
150
+ LOGGER.debug(T("coal.services.adx.loading_datastore"))
151
+ store = Store(store_location=store_location)
152
+
153
+ try:
154
+ # Process tables
155
+ source_ids, table_ingestion_id_mapping = process_tables(
156
+ store, kusto_client, ingest_client, database, operation_tag
157
+ )
158
+
159
+ LOGGER.info(T("coal.services.adx.data_sent"))
160
+
161
+ # Monitor ingestion if wait is True
162
+ has_failures = False
163
+ if wait and source_ids:
164
+ has_failures = monitor_ingestion(ingest_client, source_ids, table_ingestion_id_mapping)
165
+
166
+ # Handle failures
167
+ should_abort = handle_failures(kusto_client, database, operation_tag, has_failures)
168
+ if should_abort:
169
+ return False
170
+
171
+ return True
172
+
173
+ except Exception as e:
174
+ LOGGER.exception(T("coal.services.adx.ingestion_error"))
175
+ # Perform rollback using the tag
176
+ LOGGER.warning(T("coal.services.adx.dropping_data").format(operation_tag=operation_tag))
177
+ _drop_by_tag(kusto_client, database, operation_tag)
178
+ raise e
179
+
180
+
181
+ def dump_store_to_adx(
182
+ store_folder: str,
183
+ postgres_host: str,
184
+ postgres_port: int,
185
+ postgres_db: str,
186
+ postgres_schema: str,
187
+ postgres_user: str,
188
+ postgres_password: str,
189
+ table_prefix: str = "Cosmotech_",
190
+ replace: bool = True,
191
+ ) -> None:
192
+ """
193
+ Dump Store data to an Azure Data Explorer database.
194
+
195
+ Args:
196
+ store_folder: Folder containing the Store
197
+ postgres_host: PostgreSQL host
198
+ postgres_port: PostgreSQL port
199
+ postgres_db: PostgreSQL database name
200
+ postgres_schema: PostgreSQL schema
201
+ postgres_user: PostgreSQL username
202
+ postgres_password: PostgreSQL password
203
+ table_prefix: Table prefix
204
+ replace: Whether to replace existing tables
205
+ """
206
+ _s = Store(store_location=store_folder)
207
+
208
+ tables = list(_s.list_tables())
209
+ if len(tables):
210
+ LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
211
+ total_rows = 0
212
+ _process_start = perf_counter()
213
+ for table_name in tables:
214
+ _s_time = perf_counter()
215
+ target_table_name = f"{table_prefix}{table_name}"
216
+ LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
217
+ data = _s.get_table(table_name)
218
+ if not len(data):
219
+ LOGGER.info(T("coal.services.database.no_rows"))
220
+ continue
221
+ _dl_time = perf_counter()
222
+ rows = send_pyarrow_table_to_postgresql(
223
+ data,
224
+ target_table_name,
225
+ postgres_host,
226
+ postgres_port,
227
+ postgres_db,
228
+ postgres_schema,
229
+ postgres_user,
230
+ postgres_password,
231
+ replace,
232
+ )
233
+ total_rows += rows
234
+ _up_time = perf_counter()
235
+ LOGGER.info(T("coal.services.database.row_count").format(count=rows))
236
+ LOGGER.debug(
237
+ T("coal.common.timing.operation_completed").format(
238
+ operation="Load from datastore", time=f"{_dl_time - _s_time:0.3}"
239
+ )
240
+ )
241
+ LOGGER.debug(
242
+ T("coal.common.timing.operation_completed").format(
243
+ operation="Send to postgresql", time=f"{_up_time - _dl_time:0.3}"
244
+ )
245
+ )
246
+ _process_end = perf_counter()
247
+ LOGGER.info(
248
+ T("coal.services.database.rows_fetched").format(
249
+ table="all tables",
250
+ count=total_rows,
251
+ time=f"{_process_end - _process_start:0.3}",
252
+ )
253
+ )
254
+ else:
255
+ LOGGER.info(T("coal.services.database.store_empty"))
@@ -0,0 +1,118 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from typing import Dict, Any
9
+
10
+ import pyarrow
11
+ from azure.kusto.data import KustoClient
12
+
13
+ from cosmotech.coal.utils.logger import LOGGER
14
+ from cosmotech.orchestrator.utils.translate import T
15
+
16
+
17
+ def table_exists(client: KustoClient, database: str, table_name: str) -> bool:
18
+ """
19
+ Check if a table exists in the database.
20
+
21
+ Args:
22
+ client: The KustoClient to use
23
+ database: The name of the database
24
+ table_name: The name of the table to check
25
+
26
+ Returns:
27
+ bool: True if the table exists, False otherwise
28
+ """
29
+ LOGGER.debug(T("coal.services.adx.checking_table").format(database=database, table_name=table_name))
30
+
31
+ get_tables_query = f".show database ['{database}'] schema| distinct TableName"
32
+ tables = client.execute(database, get_tables_query)
33
+
34
+ for r in tables.primary_results[0]:
35
+ if table_name == r[0]:
36
+ LOGGER.debug(T("coal.services.adx.table_exists").format(table_name=table_name))
37
+ return True
38
+
39
+ LOGGER.debug(T("coal.services.adx.table_not_exists").format(table_name=table_name))
40
+ return False
41
+
42
+
43
+ def check_and_create_table(kusto_client: KustoClient, database: str, table_name: str, data: pyarrow.Table) -> bool:
44
+ """
45
+ Check if a table exists and create it if it doesn't.
46
+
47
+ Args:
48
+ kusto_client: The Kusto client
49
+ database: The database name
50
+ table_name: The table name
51
+ data: The PyArrow table data
52
+
53
+ Returns:
54
+ bool: True if the table was created, False if it already existed
55
+ """
56
+ LOGGER.debug(T("coal.services.adx.checking_table_exists"))
57
+ if not table_exists(kusto_client, database, table_name):
58
+ from cosmotech.coal.azure.adx.utils import create_column_mapping
59
+
60
+ mapping = create_column_mapping(data)
61
+ LOGGER.debug(T("coal.services.adx.creating_nonexistent_table"))
62
+ create_table(kusto_client, database, table_name, mapping)
63
+ return True
64
+ return False
65
+
66
+
67
+ def _drop_by_tag(kusto_client: KustoClient, database: str, tag: str) -> None:
68
+ """
69
+ Drop all data with the specified tag.
70
+
71
+ Args:
72
+ kusto_client: The Kusto client
73
+ database: The database name
74
+ tag: The tag to drop data by
75
+ """
76
+ LOGGER.info(T("coal.services.adx.dropping_data_by_tag").format(tag=tag))
77
+
78
+ try:
79
+ # Execute the drop by tag command
80
+ drop_command = f'.drop extents <| .show database extents where tags has "drop-by:{tag}"'
81
+ kusto_client.execute_mgmt(database, drop_command)
82
+ LOGGER.info(T("coal.services.adx.drop_completed"))
83
+ except Exception as e:
84
+ LOGGER.error(T("coal.services.adx.drop_error").format(error=str(e)))
85
+ LOGGER.exception(T("coal.services.adx.drop_details"))
86
+
87
+
88
+ def create_table(client: KustoClient, database: str, table_name: str, schema: Dict[str, str]) -> bool:
89
+ """
90
+ Create a table in the database.
91
+
92
+ Args:
93
+ client: The KustoClient to use
94
+ database: The name of the database
95
+ table_name: The name of the table to create
96
+ schema: Dictionary mapping column names to ADX types
97
+
98
+ Returns:
99
+ bool: True if the table was created successfully, False otherwise
100
+ """
101
+ LOGGER.debug(T("coal.services.adx.creating_table").format(database=database, table_name=table_name))
102
+
103
+ create_query = f".create-merge table {table_name}("
104
+
105
+ for column_name, column_type in schema.items():
106
+ create_query += f"{column_name}:{column_type},"
107
+
108
+ create_query = create_query[:-1] + ")"
109
+
110
+ LOGGER.debug(T("coal.services.adx.create_query").format(query=create_query))
111
+
112
+ try:
113
+ client.execute(database, create_query)
114
+ LOGGER.info(T("coal.services.adx.table_created").format(table_name=table_name))
115
+ return True
116
+ except Exception as e:
117
+ LOGGER.error(T("coal.services.adx.table_creation_error").format(table_name=table_name, error=str(e)))
118
+ return False
@@ -0,0 +1,71 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import dateutil.parser
9
+ from typing import Any, Dict
10
+
11
+ import pyarrow
12
+
13
+ from cosmotech.coal.utils.logger import LOGGER
14
+ from cosmotech.orchestrator.utils.translate import T
15
+
16
+
17
+ def create_column_mapping(data: pyarrow.Table) -> Dict[str, str]:
18
+ """
19
+ Create a column mapping for a PyArrow table.
20
+
21
+ Args:
22
+ data: The PyArrow table data
23
+
24
+ Returns:
25
+ dict: A mapping of column names to their ADX types
26
+ """
27
+ mapping = dict()
28
+ for column_name in data.column_names:
29
+ column = data.column(column_name)
30
+ try:
31
+ ex = next(v for v in column.to_pylist() if v is not None)
32
+ except StopIteration:
33
+ LOGGER.error(T("coal.services.adx.empty_column").format(column_name=column_name))
34
+ mapping[column_name] = type_mapping(column_name, "string")
35
+ continue
36
+ else:
37
+ mapping[column_name] = type_mapping(column_name, ex)
38
+ return mapping
39
+
40
+
41
+ def type_mapping(key: str, key_example_value: Any) -> str:
42
+ """
43
+ Map Python types to ADX types.
44
+
45
+ Args:
46
+ key: The name of the key
47
+ key_example_value: A possible value of the key
48
+
49
+ Returns:
50
+ str: The name of the type used in ADX
51
+ """
52
+ LOGGER.debug(T("coal.services.adx.mapping_type").format(key=key, value_type=type(key_example_value).__name__))
53
+
54
+ if key == "SimulationRun":
55
+ return "guid"
56
+
57
+ try:
58
+ # Use dateutil parser to test if the value could be a date, in case of error it is not
59
+ dateutil.parser.parse(key_example_value, fuzzy=False)
60
+ return "datetime"
61
+ except (ValueError, TypeError):
62
+ pass
63
+
64
+ if isinstance(key_example_value, float):
65
+ return "real"
66
+
67
+ if isinstance(key_example_value, int):
68
+ return "long"
69
+
70
+ # Default case to string
71
+ return "string"
@@ -0,0 +1,109 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Azure Blob Storage operations module.
10
+
11
+ This module provides functions for interacting with Azure Blob Storage,
12
+ including uploading data from the Store.
13
+ """
14
+
15
+ import pathlib
16
+ from io import BytesIO
17
+ from typing import List, Optional
18
+
19
+ from azure.identity import ClientSecretCredential
20
+ from azure.storage.blob import BlobServiceClient
21
+
22
+ import pyarrow.csv as pc
23
+ import pyarrow.parquet as pq
24
+
25
+ from cosmotech.coal.store.store import Store
26
+ from cosmotech.coal.utils.logger import LOGGER
27
+ from cosmotech.orchestrator.utils.translate import T
28
+
29
+ VALID_TYPES = (
30
+ "sqlite",
31
+ "csv",
32
+ "parquet",
33
+ )
34
+
35
+
36
+ def dump_store_to_azure(
37
+ store_folder: str,
38
+ account_name: str,
39
+ container_name: str,
40
+ tenant_id: str,
41
+ client_id: str,
42
+ client_secret: str,
43
+ output_type: str = "sqlite",
44
+ file_prefix: str = "",
45
+ ) -> None:
46
+ """
47
+ Dump Store data to Azure Blob Storage.
48
+
49
+ Args:
50
+ store_folder: Folder containing the Store
51
+ account_name: Azure Storage account name
52
+ container_name: Azure Storage container name
53
+ tenant_id: Azure tenant ID
54
+ client_id: Azure client ID
55
+ client_secret: Azure client secret
56
+ output_type: Output file type (sqlite, csv, or parquet)
57
+ file_prefix: Prefix for uploaded files
58
+
59
+ Raises:
60
+ ValueError: If the output type is invalid
61
+ """
62
+ _s = Store(store_location=store_folder)
63
+
64
+ if output_type not in VALID_TYPES:
65
+ LOGGER.error(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
66
+ raise ValueError(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
67
+
68
+ container_client = BlobServiceClient(
69
+ account_url=f"https://{account_name}.blob.core.windows.net/",
70
+ credential=ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret),
71
+ ).get_container_client(container_name)
72
+
73
+ def data_upload(data_stream: BytesIO, file_name: str):
74
+ uploaded_file_name = file_prefix + file_name
75
+ data_stream.seek(0)
76
+ size = len(data_stream.read())
77
+ data_stream.seek(0)
78
+
79
+ LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
80
+ container_client.upload_blob(name=uploaded_file_name, data=data_stream, length=size, overwrite=True)
81
+
82
+ if output_type == "sqlite":
83
+ _file_path = _s._database_path
84
+ _file_name = "db.sqlite"
85
+ _uploaded_file_name = file_prefix + _file_name
86
+ LOGGER.info(
87
+ T("coal.common.data_transfer.file_sent").format(file_path=_file_path, uploaded_name=_uploaded_file_name)
88
+ )
89
+ with open(_file_path, "rb") as data:
90
+ container_client.upload_blob(name=_uploaded_file_name, data=data, overwrite=True)
91
+ else:
92
+ tables = list(_s.list_tables())
93
+ for table_name in tables:
94
+ _data_stream = BytesIO()
95
+ _file_name = None
96
+ _data = _s.get_table(table_name)
97
+ if not len(_data):
98
+ LOGGER.info(T("coal.common.data_transfer.table_empty").format(table_name=table_name))
99
+ continue
100
+ if output_type == "csv":
101
+ _file_name = table_name + ".csv"
102
+ pc.write_csv(_data, _data_stream)
103
+ elif output_type == "parquet":
104
+ _file_name = table_name + ".parquet"
105
+ pq.write_table(_data, _data_stream)
106
+ LOGGER.info(
107
+ T("coal.common.data_transfer.sending_table").format(table_name=table_name, output_type=output_type)
108
+ )
109
+ data_upload(_data_stream, _file_name)
@@ -0,0 +1,72 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+ import azure.functions as func
8
+ from cosmotech.coal.cosmotech_api.runner.download import download_runner_data
9
+ from cosmotech_api.api.runner_api import RunnerApi
10
+
11
+ import json
12
+ import http
13
+ import traceback
14
+
15
+
16
+ def generate_main(apply_update, parallel=True):
17
+ def main(req: func.HttpRequest) -> func.HttpResponse:
18
+ try:
19
+ runner_id = req.params.get("scenario-id") # Keep parameter name for backward compatibility
20
+ organization_id = req.params.get("organization-id")
21
+ workspace_id = req.params.get("workspace-id")
22
+ access_token: str = req.headers.get("authorization", None)
23
+ if access_token:
24
+ access_token = access_token.split(" ")[1]
25
+
26
+ if runner_id is None or organization_id is None or workspace_id is None:
27
+ return func.HttpResponse(
28
+ body=f"Invalid request: organization-id={organization_id}, workspace-id={workspace_id}, scenario-id={runner_id}",
29
+ status_code=http.HTTPStatus.BAD_REQUEST,
30
+ )
31
+
32
+ # Get runner data
33
+ result = download_runner_data(
34
+ organization_id=organization_id,
35
+ workspace_id=workspace_id,
36
+ runner_id=runner_id,
37
+ parameter_folder=None, # We don't need to save to files
38
+ read_files=True,
39
+ parallel=parallel,
40
+ write_json=False,
41
+ write_csv=False,
42
+ fetch_dataset=True,
43
+ )
44
+
45
+ content = {
46
+ "datasets": result["datasets"],
47
+ "parameters": result["parameters"],
48
+ }
49
+
50
+ runner_data = result["runner_data"]
51
+
52
+ updated_content = apply_update(
53
+ content=content, scenario_data=runner_data
54
+ ) # Keep parameter name for backward compatibility
55
+
56
+ return func.HttpResponse(
57
+ body=json.dumps(updated_content),
58
+ headers={"Content-Type": "application/json"},
59
+ )
60
+ except Exception as e:
61
+ response = {
62
+ "error": getattr(e, "message", str(e)),
63
+ "type": type(e).__name__,
64
+ "trace": traceback.format_exc(),
65
+ }
66
+ return func.HttpResponse(
67
+ status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR,
68
+ body=json.dumps(response),
69
+ headers={"Content-Type": "application/json"},
70
+ )
71
+
72
+ return main