cosmotech-acceleration-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. cosmotech/coal/__init__.py +8 -0
  2. cosmotech/coal/aws/__init__.py +23 -0
  3. cosmotech/coal/aws/s3.py +235 -0
  4. cosmotech/coal/azure/__init__.py +23 -0
  5. cosmotech/coal/azure/adx/__init__.py +26 -0
  6. cosmotech/coal/azure/adx/auth.py +125 -0
  7. cosmotech/coal/azure/adx/ingestion.py +329 -0
  8. cosmotech/coal/azure/adx/query.py +56 -0
  9. cosmotech/coal/azure/adx/runner.py +217 -0
  10. cosmotech/coal/azure/adx/store.py +255 -0
  11. cosmotech/coal/azure/adx/tables.py +118 -0
  12. cosmotech/coal/azure/adx/utils.py +71 -0
  13. cosmotech/coal/azure/blob.py +109 -0
  14. cosmotech/coal/azure/functions.py +72 -0
  15. cosmotech/coal/azure/storage.py +74 -0
  16. cosmotech/coal/cosmotech_api/__init__.py +36 -0
  17. cosmotech/coal/cosmotech_api/connection.py +96 -0
  18. cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
  19. cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
  20. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
  21. cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
  22. cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
  23. cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
  24. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
  25. cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
  26. cosmotech/coal/cosmotech_api/parameters.py +48 -0
  27. cosmotech/coal/cosmotech_api/run.py +25 -0
  28. cosmotech/coal/cosmotech_api/run_data.py +173 -0
  29. cosmotech/coal/cosmotech_api/run_template.py +108 -0
  30. cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
  31. cosmotech/coal/cosmotech_api/runner/data.py +38 -0
  32. cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
  33. cosmotech/coal/cosmotech_api/runner/download.py +146 -0
  34. cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
  35. cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
  36. cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
  37. cosmotech/coal/cosmotech_api/workspace.py +127 -0
  38. cosmotech/coal/csm/__init__.py +6 -0
  39. cosmotech/coal/csm/engine/__init__.py +47 -0
  40. cosmotech/coal/postgresql/__init__.py +22 -0
  41. cosmotech/coal/postgresql/runner.py +93 -0
  42. cosmotech/coal/postgresql/store.py +98 -0
  43. cosmotech/coal/singlestore/__init__.py +17 -0
  44. cosmotech/coal/singlestore/store.py +100 -0
  45. cosmotech/coal/store/__init__.py +42 -0
  46. cosmotech/coal/store/csv.py +44 -0
  47. cosmotech/coal/store/native_python.py +25 -0
  48. cosmotech/coal/store/pandas.py +26 -0
  49. cosmotech/coal/store/pyarrow.py +23 -0
  50. cosmotech/coal/store/store.py +79 -0
  51. cosmotech/coal/utils/__init__.py +18 -0
  52. cosmotech/coal/utils/api.py +68 -0
  53. cosmotech/coal/utils/logger.py +10 -0
  54. cosmotech/coal/utils/postgresql.py +236 -0
  55. cosmotech/csm_data/__init__.py +6 -0
  56. cosmotech/csm_data/commands/__init__.py +6 -0
  57. cosmotech/csm_data/commands/adx_send_data.py +92 -0
  58. cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
  59. cosmotech/csm_data/commands/api/__init__.py +6 -0
  60. cosmotech/csm_data/commands/api/api.py +50 -0
  61. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
  62. cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
  63. cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
  64. cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
  65. cosmotech/csm_data/commands/api/run_load_data.py +120 -0
  66. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
  67. cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
  68. cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
  69. cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
  70. cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
  71. cosmotech/csm_data/commands/az_storage_upload.py +76 -0
  72. cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
  73. cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
  74. cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
  75. cosmotech/csm_data/commands/store/__init__.py +6 -0
  76. cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
  77. cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
  78. cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
  79. cosmotech/csm_data/commands/store/list_tables.py +48 -0
  80. cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
  81. cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
  82. cosmotech/csm_data/commands/store/reset.py +31 -0
  83. cosmotech/csm_data/commands/store/store.py +37 -0
  84. cosmotech/csm_data/main.py +57 -0
  85. cosmotech/csm_data/utils/__init__.py +6 -0
  86. cosmotech/csm_data/utils/click.py +18 -0
  87. cosmotech/csm_data/utils/decorators.py +75 -0
  88. cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
  89. cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
  90. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
  91. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
  92. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
  93. cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
  94. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
  95. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
  96. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
  97. cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
  98. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
  99. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
  100. cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
  101. cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
  102. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
  103. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
  104. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
  105. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
  106. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
  107. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
  108. cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
  109. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
  110. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
  111. cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
  112. cosmotech/translation/coal/__init__.py +6 -0
  113. cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
  114. cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
  115. cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
  116. cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
  117. cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
  118. cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
  119. cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
  120. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
  121. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
  122. cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
  123. cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
  124. cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
  125. cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
  126. cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
  127. cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
  128. cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
  129. cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
  130. cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
  131. cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
  132. cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
  133. cosmotech/translation/coal/en-US/coal/web.yml +2 -0
  134. cosmotech/translation/csm_data/__init__.py +6 -0
  135. cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
  136. cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
  137. cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
  138. cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
  139. cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
  140. cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
  141. cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,364 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Dataset handling functions.
10
+ """
11
+
12
+ import multiprocessing
13
+ import tempfile
14
+ from pathlib import Path
15
+ from typing import Dict, List, Any, Optional, Union, Tuple
16
+
17
+ from azure.identity import DefaultAzureCredential
18
+ from cosmotech_api.api.dataset_api import DatasetApi
19
+
20
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
21
+ from cosmotech.coal.cosmotech_api.dataset import (
22
+ convert_graph_dataset_to_files,
23
+ download_adt_dataset,
24
+ download_twingraph_dataset,
25
+ download_legacy_twingraph_dataset,
26
+ download_file_dataset,
27
+ )
28
+ from cosmotech.coal.utils.logger import LOGGER
29
+ from cosmotech.orchestrator.utils.translate import T
30
+
31
+
32
+ def get_dataset_ids_from_runner(runner_data) -> List[str]:
33
+ """
34
+ Extract dataset IDs from runner data.
35
+
36
+ Args:
37
+ runner_data: Runner data object
38
+
39
+ Returns:
40
+ List of dataset IDs
41
+ """
42
+ dataset_ids = runner_data.dataset_list[:]
43
+
44
+ for parameter in runner_data.parameters_values:
45
+ if parameter.var_type == "%DATASETID%" and parameter.value:
46
+ dataset_id = parameter.value
47
+ dataset_ids.append(dataset_id)
48
+
49
+ return dataset_ids
50
+
51
+
52
+ def download_dataset(
53
+ organization_id: str,
54
+ workspace_id: str,
55
+ dataset_id: str,
56
+ read_files: bool = True,
57
+ credentials: Optional[DefaultAzureCredential] = None,
58
+ ) -> Dict[str, Any]:
59
+ """
60
+ Download a single dataset by ID.
61
+
62
+ Args:
63
+ organization_id: Organization ID
64
+ workspace_id: Workspace ID
65
+ dataset_id: Dataset ID
66
+ read_files: Whether to read file contents
67
+ credentials: Azure credentials (if None, uses DefaultAzureCredential if needed)
68
+
69
+ Returns:
70
+ Dataset information dictionary
71
+ """
72
+
73
+ # Get dataset information
74
+ with get_api_client()[0] as api_client:
75
+ api_instance = DatasetApi(api_client)
76
+ dataset = api_instance.find_dataset_by_id(organization_id=organization_id, dataset_id=dataset_id)
77
+
78
+ if dataset.connector is None:
79
+ parameters = []
80
+ else:
81
+ parameters = dataset.connector.parameters_values
82
+
83
+ is_adt = "AZURE_DIGITAL_TWINS_URL" in parameters
84
+ is_storage = "AZURE_STORAGE_CONTAINER_BLOB_PREFIX" in parameters
85
+ is_legacy_twin_cache = "TWIN_CACHE_NAME" in parameters and dataset.twingraph_id is None
86
+ is_in_workspace_file = (
87
+ False if dataset.tags is None else "workspaceFile" in dataset.tags or "dataset_part" in dataset.tags
88
+ )
89
+
90
+ # Download based on dataset type
91
+ if is_adt:
92
+ content, folder_path = download_adt_dataset(
93
+ adt_address=parameters["AZURE_DIGITAL_TWINS_URL"],
94
+ credentials=credentials,
95
+ )
96
+ return {
97
+ "type": "adt",
98
+ "content": content,
99
+ "name": dataset.name,
100
+ "folder_path": str(folder_path),
101
+ "dataset_id": dataset_id,
102
+ }
103
+
104
+ elif is_legacy_twin_cache:
105
+ twin_cache_name = parameters["TWIN_CACHE_NAME"]
106
+ content, folder_path = download_legacy_twingraph_dataset(
107
+ organization_id=organization_id, cache_name=twin_cache_name
108
+ )
109
+ return {
110
+ "type": "twincache",
111
+ "content": content,
112
+ "name": dataset.name,
113
+ "folder_path": str(folder_path),
114
+ "dataset_id": dataset_id,
115
+ }
116
+
117
+ elif is_storage:
118
+ _file_name = parameters["AZURE_STORAGE_CONTAINER_BLOB_PREFIX"].replace("%WORKSPACE_FILE%/", "")
119
+ content, folder_path = download_file_dataset(
120
+ organization_id=organization_id,
121
+ workspace_id=workspace_id,
122
+ file_name=_file_name,
123
+ read_files=read_files,
124
+ )
125
+ return {
126
+ "type": _file_name.split(".")[-1],
127
+ "content": content,
128
+ "name": dataset.name,
129
+ "folder_path": str(folder_path),
130
+ "dataset_id": dataset_id,
131
+ "file_name": _file_name,
132
+ }
133
+
134
+ elif is_in_workspace_file:
135
+ _file_name = dataset.source.location
136
+ content, folder_path = download_file_dataset(
137
+ organization_id=organization_id,
138
+ workspace_id=workspace_id,
139
+ file_name=_file_name,
140
+ read_files=read_files,
141
+ )
142
+ return {
143
+ "type": _file_name.split(".")[-1],
144
+ "content": content,
145
+ "name": dataset.name,
146
+ "folder_path": str(folder_path),
147
+ "dataset_id": dataset_id,
148
+ "file_name": _file_name,
149
+ }
150
+
151
+ else:
152
+ content, folder_path = download_twingraph_dataset(organization_id=organization_id, dataset_id=dataset_id)
153
+ return {
154
+ "type": "twincache",
155
+ "content": content,
156
+ "name": dataset.name,
157
+ "folder_path": str(folder_path),
158
+ "dataset_id": dataset_id,
159
+ }
160
+
161
+
162
+ def download_dataset_process(
163
+ _dataset_id, organization_id, workspace_id, read_files, credentials, _return_dict, _error_dict
164
+ ):
165
+ """
166
+ Process function for downloading a dataset in a separate process.
167
+
168
+ This function is designed to be used with multiprocessing to download datasets in parallel.
169
+ It downloads a single dataset and stores the result in a shared dictionary.
170
+ If an error occurs, it stores the error message in a shared error dictionary and re-raises the exception.
171
+
172
+ Args:
173
+ _dataset_id: Dataset ID to download
174
+ organization_id: Organization ID
175
+ workspace_id: Workspace ID
176
+ read_files: Whether to read file contents
177
+ credentials: Azure credentials (if None, uses DefaultAzureCredential if needed)
178
+ _return_dict: Shared dictionary to store successful download results
179
+ _error_dict: Shared dictionary to store error messages
180
+
181
+ Raises:
182
+ Exception: Any exception that occurs during dataset download is re-raised
183
+ """
184
+ try:
185
+ _c = download_dataset(
186
+ organization_id=organization_id,
187
+ workspace_id=workspace_id,
188
+ dataset_id=_dataset_id,
189
+ read_files=read_files,
190
+ credentials=credentials,
191
+ )
192
+ _return_dict[_dataset_id] = _c
193
+ except Exception as e:
194
+ _error_dict[_dataset_id] = f"{type(e).__name__}: {str(e)}"
195
+ raise e
196
+
197
+
198
+ def download_datasets_parallel(
199
+ organization_id: str,
200
+ workspace_id: str,
201
+ dataset_ids: List[str],
202
+ read_files: bool = True,
203
+ credentials: Optional[DefaultAzureCredential] = None,
204
+ ) -> Dict[str, Dict[str, Any]]:
205
+ """
206
+ Download multiple datasets in parallel.
207
+
208
+ Args:
209
+ organization_id: Organization ID
210
+ workspace_id: Workspace ID
211
+ dataset_ids: List of dataset IDs
212
+ read_files: Whether to read file contents
213
+ credentials: Azure credentials (if None, uses DefaultAzureCredential if needed)
214
+
215
+ Returns:
216
+ Dictionary mapping dataset IDs to dataset information
217
+ """
218
+
219
+ # Use multiprocessing to download datasets in parallel
220
+ manager = multiprocessing.Manager()
221
+ return_dict = manager.dict()
222
+ error_dict = manager.dict()
223
+ processes = [
224
+ (
225
+ dataset_id,
226
+ multiprocessing.Process(
227
+ target=download_dataset_process,
228
+ args=(dataset_id, organization_id, workspace_id, read_files, credentials, return_dict, error_dict),
229
+ ),
230
+ )
231
+ for dataset_id in dataset_ids
232
+ ]
233
+
234
+ LOGGER.info(T("coal.services.dataset.parallel_download").format(count=len(dataset_ids)))
235
+
236
+ [p.start() for _, p in processes]
237
+ [p.join() for _, p in processes]
238
+
239
+ for dataset_id, p in processes:
240
+ # We might hit the following bug: https://bugs.python.org/issue43944
241
+ # As a workaround, only treat non-null exit code as a real issue if we also have stored an error
242
+ # message
243
+ if p.exitcode != 0 and dataset_id in error_dict:
244
+ raise ChildProcessError(f"Failed to download dataset '{dataset_id}': {error_dict[dataset_id]}")
245
+
246
+ return dict(return_dict)
247
+
248
+
249
+ def download_datasets_sequential(
250
+ organization_id: str,
251
+ workspace_id: str,
252
+ dataset_ids: List[str],
253
+ read_files: bool = True,
254
+ credentials: Optional[DefaultAzureCredential] = None,
255
+ ) -> Dict[str, Dict[str, Any]]:
256
+ """
257
+ Download multiple datasets sequentially.
258
+
259
+ Args:
260
+ organization_id: Organization ID
261
+ workspace_id: Workspace ID
262
+ dataset_ids: List of dataset IDs
263
+ read_files: Whether to read file contents
264
+ credentials: Azure credentials (if None, uses DefaultAzureCredential if needed)
265
+
266
+ Returns:
267
+ Dictionary mapping dataset IDs to dataset information
268
+ """
269
+
270
+ return_dict = {}
271
+ error_dict = {}
272
+
273
+ LOGGER.info(T("coal.services.dataset.sequential_download").format(count=len(dataset_ids)))
274
+
275
+ for dataset_id in dataset_ids:
276
+ try:
277
+ return_dict[dataset_id] = download_dataset(
278
+ organization_id=organization_id,
279
+ workspace_id=workspace_id,
280
+ dataset_id=dataset_id,
281
+ read_files=read_files,
282
+ credentials=credentials,
283
+ )
284
+ except Exception as e:
285
+ error_dict[dataset_id] = f"{type(e).__name__}: {str(e)}"
286
+ raise ChildProcessError(f"Failed to download dataset '{dataset_id}': {error_dict.get(dataset_id, '')}")
287
+
288
+ return return_dict
289
+
290
+
291
+ def download_datasets(
292
+ organization_id: str,
293
+ workspace_id: str,
294
+ dataset_ids: List[str],
295
+ read_files: bool = True,
296
+ parallel: bool = True,
297
+ credentials: Optional[DefaultAzureCredential] = None,
298
+ ) -> Dict[str, Dict[str, Any]]:
299
+ """
300
+ Download multiple datasets, either in parallel or sequentially.
301
+
302
+ Args:
303
+ organization_id: Organization ID
304
+ workspace_id: Workspace ID
305
+ dataset_ids: List of dataset IDs
306
+ read_files: Whether to read file contents
307
+ parallel: Whether to download in parallel
308
+ credentials: Azure credentials (if None, uses DefaultAzureCredential if needed)
309
+
310
+ Returns:
311
+ Dictionary mapping dataset IDs to dataset information
312
+ """
313
+ if not dataset_ids:
314
+ return {}
315
+
316
+ if parallel and len(dataset_ids) > 1:
317
+ return download_datasets_parallel(
318
+ organization_id=organization_id,
319
+ workspace_id=workspace_id,
320
+ dataset_ids=dataset_ids,
321
+ read_files=read_files,
322
+ credentials=credentials,
323
+ )
324
+ else:
325
+ return download_datasets_sequential(
326
+ organization_id=organization_id,
327
+ workspace_id=workspace_id,
328
+ dataset_ids=dataset_ids,
329
+ read_files=read_files,
330
+ credentials=credentials,
331
+ )
332
+
333
+
334
+ def dataset_to_file(dataset_info: Dict[str, Any], target_folder: Optional[Union[str, Path]] = None) -> str:
335
+ """
336
+ Convert dataset to files.
337
+
338
+ Args:
339
+ dataset_info: Dataset information dictionary
340
+ target_folder: Optional folder to save files (if None, uses temp dir)
341
+
342
+ Returns:
343
+ Path to folder containing files
344
+ """
345
+ dataset_type = dataset_info["type"]
346
+ content = dataset_info["content"]
347
+
348
+ if dataset_type in ["adt", "twincache"]:
349
+ # Use conversion function
350
+ if target_folder:
351
+ target_folder = convert_graph_dataset_to_files(content, target_folder)
352
+ else:
353
+ target_folder = convert_graph_dataset_to_files(content)
354
+ return str(target_folder)
355
+
356
+ # For file datasets, return the folder path
357
+ if "folder_path" in dataset_info:
358
+ return dataset_info["folder_path"]
359
+
360
+ # Fallback to creating a temp directory
361
+ if target_folder:
362
+ return str(target_folder)
363
+ else:
364
+ return tempfile.mkdtemp()
@@ -0,0 +1,146 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Orchestration functions for downloading runner and run data.
10
+ """
11
+
12
+ import os
13
+ import pathlib
14
+ import shutil
15
+ from typing import Dict, List, Any, Optional
16
+
17
+ from azure.identity import DefaultAzureCredential
18
+ from cosmotech_api.api.runner_api import RunnerApi
19
+ from cosmotech_api.exceptions import ApiException
20
+
21
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
22
+ from cosmotech.coal.cosmotech_api.runner.data import get_runner_data
23
+ from cosmotech.coal.cosmotech_api.runner.parameters import (
24
+ format_parameters_list,
25
+ write_parameters,
26
+ )
27
+ from cosmotech.coal.cosmotech_api.runner.datasets import (
28
+ get_dataset_ids_from_runner,
29
+ download_datasets,
30
+ dataset_to_file,
31
+ )
32
+ from cosmotech.coal.utils.logger import LOGGER
33
+ from cosmotech.orchestrator.utils.translate import T
34
+
35
+
36
+ def download_runner_data(
37
+ organization_id: str,
38
+ workspace_id: str,
39
+ runner_id: str,
40
+ parameter_folder: str,
41
+ dataset_folder: Optional[str] = None,
42
+ read_files: bool = False,
43
+ parallel: bool = True,
44
+ write_json: bool = True,
45
+ write_csv: bool = False,
46
+ fetch_dataset: bool = True,
47
+ ) -> Dict[str, Any]:
48
+ """
49
+ Download all runner data including datasets and parameters.
50
+
51
+ Args:
52
+ organization_id: Organization ID
53
+ workspace_id: Workspace ID
54
+ runner_id: Runner ID
55
+ parameter_folder: Folder to save parameters
56
+ dataset_folder: Folder to save datasets (if None, only saves datasets referenced by parameters)
57
+ read_files: Whether to read file contents
58
+ parallel: Whether to download datasets in parallel
59
+ write_json: Whether to write parameters as JSON
60
+ write_csv: Whether to write parameters as CSV
61
+ fetch_dataset: Whether to fetch datasets
62
+
63
+ Returns:
64
+ Dictionary with runner data, datasets, and parameters
65
+ """
66
+ LOGGER.info(T("coal.cosmotech_api.runner.starting_download"))
67
+
68
+ # Get credentials if needed
69
+ credentials = None
70
+ if get_api_client()[1] == "Azure Entra Connection":
71
+ credentials = DefaultAzureCredential()
72
+
73
+ # Get runner data
74
+ runner_data = get_runner_data(organization_id, workspace_id, runner_id)
75
+
76
+ # Create result dictionary
77
+ result = {"runner_data": runner_data, "datasets": {}, "parameters": {}}
78
+
79
+ # Skip if no parameters found
80
+ if not runner_data.parameters_values:
81
+ LOGGER.warning(T("coal.cosmotech_api.runner.no_parameters"))
82
+ return result
83
+
84
+ LOGGER.info(T("coal.cosmotech_api.runner.loaded_data"))
85
+
86
+ # Format parameters
87
+ parameters = format_parameters_list(runner_data)
88
+ result["parameters"] = {param["parameterId"]: param["value"] for param in parameters}
89
+
90
+ # Download datasets if requested
91
+ if fetch_dataset:
92
+ dataset_ids = get_dataset_ids_from_runner(runner_data)
93
+
94
+ if dataset_ids:
95
+ LOGGER.info(T("coal.cosmotech_api.runner.downloading_datasets").format(count=len(dataset_ids)))
96
+
97
+ datasets = download_datasets(
98
+ organization_id=organization_id,
99
+ workspace_id=workspace_id,
100
+ dataset_ids=dataset_ids,
101
+ read_files=read_files,
102
+ parallel=parallel,
103
+ credentials=credentials,
104
+ )
105
+
106
+ result["datasets"] = datasets
107
+
108
+ # Process datasets
109
+ datasets_parameters_ids = {
110
+ param.value: param.parameter_id
111
+ for param in runner_data.parameters_values
112
+ if param.var_type == "%DATASETID%" and param.value
113
+ }
114
+
115
+ # Save datasets to parameter folders
116
+ for dataset_id, dataset_info in datasets.items():
117
+ # If dataset is referenced by a parameter, save to parameter folder
118
+ if dataset_id in datasets_parameters_ids:
119
+ param_id = datasets_parameters_ids[dataset_id]
120
+ param_dir = os.path.join(parameter_folder, param_id)
121
+ pathlib.Path(param_dir).mkdir(exist_ok=True, parents=True)
122
+
123
+ dataset_folder_path = dataset_to_file(dataset_info)
124
+ shutil.copytree(dataset_folder_path, param_dir, dirs_exist_ok=True)
125
+
126
+ # Update parameter value to point to the folder
127
+ for param in parameters:
128
+ if param["parameterId"] == param_id:
129
+ param["value"] = param_dir
130
+ break
131
+
132
+ # If dataset is in dataset_list and dataset_folder is provided, save there too
133
+ if dataset_folder and dataset_id in runner_data.dataset_list:
134
+ pathlib.Path(dataset_folder).mkdir(parents=True, exist_ok=True)
135
+ dataset_folder_path = dataset_to_file(dataset_info)
136
+ shutil.copytree(dataset_folder_path, dataset_folder, dirs_exist_ok=True)
137
+ LOGGER.debug(
138
+ T("coal.cosmotech_api.runner.dataset_debug").format(folder=dataset_folder, id=dataset_id)
139
+ )
140
+
141
+ # Write parameters to files
142
+ if write_json or write_csv:
143
+ LOGGER.info(T("coal.cosmotech_api.runner.writing_parameters"))
144
+ write_parameters(parameter_folder, parameters, write_csv, write_json)
145
+
146
+ return result
@@ -0,0 +1,42 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Runner metadata retrieval functions.
10
+ """
11
+
12
+ from typing import Any, Optional
13
+
14
+ import cosmotech_api
15
+
16
+
17
+ def get_runner_metadata(
18
+ api_client: cosmotech_api.api_client.ApiClient,
19
+ organization_id: str,
20
+ workspace_id: str,
21
+ runner_id: str,
22
+ include: Optional[list[str]] = None,
23
+ exclude: Optional[list[str]] = None,
24
+ ) -> dict[str, Any]:
25
+ """
26
+ Get runner metadata from the API.
27
+
28
+ Args:
29
+ api_client: The API client to use
30
+ organization_id: The ID of the organization
31
+ workspace_id: The ID of the workspace
32
+ runner_id: The ID of the runner
33
+ include: Optional list of fields to include
34
+ exclude: Optional list of fields to exclude
35
+
36
+ Returns:
37
+ Dictionary with runner metadata
38
+ """
39
+ runner_api = cosmotech_api.RunnerApi(api_client)
40
+ runner: cosmotech_api.Runner = runner_api.get_runner(organization_id, workspace_id, runner_id)
41
+
42
+ return runner.model_dump(by_alias=True, exclude_none=True, include=include, exclude=exclude, mode="json")