cosmotech-acceleration-library 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/aws/__init__.py +1 -9
  3. cosmotech/coal/aws/s3.py +181 -214
  4. cosmotech/coal/azure/adx/auth.py +2 -2
  5. cosmotech/coal/azure/adx/runner.py +13 -14
  6. cosmotech/coal/azure/adx/store.py +5 -86
  7. cosmotech/coal/azure/adx/tables.py +2 -2
  8. cosmotech/coal/azure/blob.py +6 -6
  9. cosmotech/coal/azure/storage.py +3 -3
  10. cosmotech/coal/cosmotech_api/__init__.py +0 -28
  11. cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
  12. cosmotech/coal/cosmotech_api/apis/dataset.py +103 -0
  13. cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
  14. cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
  15. cosmotech/coal/cosmotech_api/apis/run.py +38 -0
  16. cosmotech/coal/cosmotech_api/apis/runner.py +71 -0
  17. cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
  18. cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
  19. cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
  20. cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
  21. cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
  22. cosmotech/coal/postgresql/runner.py +56 -36
  23. cosmotech/coal/postgresql/store.py +60 -14
  24. cosmotech/coal/postgresql/utils.py +254 -0
  25. cosmotech/coal/store/output/__init__.py +0 -0
  26. cosmotech/coal/store/output/aws_channel.py +73 -0
  27. cosmotech/coal/store/output/az_storage_channel.py +42 -0
  28. cosmotech/coal/store/output/channel_interface.py +23 -0
  29. cosmotech/coal/store/output/channel_spliter.py +55 -0
  30. cosmotech/coal/store/output/postgres_channel.py +40 -0
  31. cosmotech/coal/utils/configuration.py +169 -0
  32. cosmotech/coal/utils/decorator.py +4 -7
  33. cosmotech/csm_data/commands/api/api.py +6 -19
  34. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
  35. cosmotech/csm_data/commands/api/run_load_data.py +7 -46
  36. cosmotech/csm_data/commands/api/wsf_load_file.py +13 -16
  37. cosmotech/csm_data/commands/api/wsf_send_file.py +11 -14
  38. cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
  39. cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
  40. cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
  41. cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
  42. cosmotech/csm_data/commands/store/output.py +35 -0
  43. cosmotech/csm_data/commands/store/store.py +3 -3
  44. cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
  45. cosmotech/translation/coal/en-US/coal/services/dataset.yml +4 -14
  46. cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
  47. cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
  48. cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
  49. cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
  50. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/METADATA +5 -8
  51. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/RECORD +55 -73
  52. cosmotech/coal/azure/functions.py +0 -72
  53. cosmotech/coal/cosmotech_api/connection.py +0 -96
  54. cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
  55. cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
  56. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
  57. cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
  58. cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
  59. cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -229
  60. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -185
  61. cosmotech/coal/cosmotech_api/dataset/upload.py +0 -41
  62. cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
  63. cosmotech/coal/cosmotech_api/parameters.py +0 -48
  64. cosmotech/coal/cosmotech_api/run.py +0 -25
  65. cosmotech/coal/cosmotech_api/run_data.py +0 -173
  66. cosmotech/coal/cosmotech_api/run_template.py +0 -108
  67. cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
  68. cosmotech/coal/cosmotech_api/runner/data.py +0 -38
  69. cosmotech/coal/cosmotech_api/runner/datasets.py +0 -416
  70. cosmotech/coal/cosmotech_api/runner/download.py +0 -135
  71. cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
  72. cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
  73. cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
  74. cosmotech/coal/cosmotech_api/workspace.py +0 -127
  75. cosmotech/coal/utils/postgresql.py +0 -236
  76. cosmotech/coal/utils/semver.py +0 -6
  77. cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
  78. cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
  79. cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
  80. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
  81. cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
  82. cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
  83. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
  84. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
  85. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
  86. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
  87. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
  88. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
  89. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
  90. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
  91. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
  92. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
  93. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
  94. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
  95. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/WHEEL +0 -0
  96. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/entry_points.txt +0 -0
  97. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/licenses/LICENSE +0 -0
  98. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,416 +0,0 @@
1
- # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
- # This document and all information contained herein is the exclusive property -
3
- # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
- # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
- # etc., to any person is prohibited unless it has been previously and
6
- # specifically authorized by written means by Cosmo Tech.
7
-
8
- """
9
- Dataset handling functions.
10
- """
11
-
12
- import multiprocessing
13
- import tempfile
14
- from pathlib import Path
15
- from typing import Dict, List, Any, Optional, Union
16
-
17
- from azure.identity import DefaultAzureCredential
18
- from cosmotech_api.api.dataset_api import DatasetApi
19
-
20
- from cosmotech.coal.cosmotech_api.connection import get_api_client
21
- from cosmotech.coal.cosmotech_api.dataset import (
22
- convert_graph_dataset_to_files,
23
- download_adt_dataset,
24
- download_twingraph_dataset,
25
- download_legacy_twingraph_dataset,
26
- download_file_dataset,
27
- )
28
- from cosmotech.coal.cosmotech_api.dataset.download import file
29
- from cosmotech.coal.utils.logger import LOGGER
30
- from cosmotech.orchestrator.utils.translate import T
31
-
32
-
33
- def get_dataset_ids_from_runner(runner_data) -> List[str]:
34
- """
35
- Extract dataset IDs from runner data.
36
-
37
- Args:
38
- runner_data: Runner data object
39
-
40
- Returns:
41
- List of dataset IDs
42
- """
43
- dataset_ids = runner_data.dataset_list[:]
44
-
45
- for parameter in runner_data.parameters_values:
46
- if parameter.var_type == "%DATASETID%" and parameter.value:
47
- dataset_id = parameter.value
48
- dataset_ids.append(dataset_id)
49
-
50
- return dataset_ids
51
-
52
-
53
- def download_dataset(
54
- organization_id: str,
55
- workspace_id: str,
56
- dataset_id: str,
57
- read_files: bool = True,
58
- ) -> Dict[str, Any]:
59
- """
60
- retro-compatibility to cosmo-api v4
61
- """
62
- from cosmotech.coal.utils.semver import semver_of
63
-
64
- csm_version = semver_of("cosmotech_api")
65
- if csm_version.major >= 5:
66
- return download_dataset_v5(organization_id, workspace_id, dataset_id, read_files)
67
- else:
68
- return download_dataset_v4(organization_id, workspace_id, dataset_id, read_files)
69
-
70
-
71
- def download_dataset_v5(
72
- organization_id: str,
73
- workspace_id: str,
74
- dataset_id: str,
75
- read_files: bool = True,
76
- ) -> Dict[str, Any]:
77
- """
78
- Download a single dataset by ID.
79
-
80
- Args:
81
- organization_id: Organization ID
82
- workspace_id: Workspace ID
83
- dataset_id: Dataset ID
84
- read_files: Whether to read file contents
85
-
86
- Returns:
87
- Dataset information dictionary
88
- """
89
-
90
- # Get dataset information
91
- with get_api_client()[0] as api_client:
92
- dataset_api_instance = DatasetApi(api_client)
93
- dataset = dataset_api_instance.get_dataset(
94
- organization_id=organization_id, workspace_id=workspace_id, dataset_id=dataset_id
95
- )
96
-
97
- content = dict()
98
- tmp_dataset_dir = tempfile.mkdtemp()
99
- tmp_dataset_dir_path = Path(tmp_dataset_dir)
100
- for part in dataset.parts:
101
- part_file_path = tmp_dataset_dir_path / part.source_name
102
- part_file_path.parent.mkdir(parents=True, exist_ok=True)
103
- data_part = dataset_api_instance.download_dataset_part(organization_id, workspace_id, dataset_id, part.id)
104
- with open(part_file_path, "wb") as binary_file:
105
- binary_file.write(data_part)
106
-
107
- if read_files:
108
- content.update(file.read_file(part.source_name, part_file_path))
109
-
110
- return {
111
- "type": "csm_dataset",
112
- "content": content,
113
- "name": dataset.name,
114
- "folder_path": tmp_dataset_dir,
115
- "dataset_id": dataset_id,
116
- }
117
-
118
-
119
- def download_dataset_v4(
120
- organization_id: str,
121
- workspace_id: str,
122
- dataset_id: str,
123
- read_files: bool = True,
124
- ) -> Dict[str, Any]:
125
- """
126
- Download a single dataset by ID.
127
-
128
- Args:
129
- organization_id: Organization ID
130
- workspace_id: Workspace ID
131
- dataset_id: Dataset ID
132
- read_files: Whether to read file contents
133
-
134
- Returns:
135
- Dataset information dictionary
136
- """
137
-
138
- # Get dataset information
139
- with get_api_client()[0] as api_client:
140
- api_instance = DatasetApi(api_client)
141
- dataset = api_instance.find_dataset_by_id(organization_id=organization_id, dataset_id=dataset_id)
142
-
143
- if dataset.connector is None:
144
- parameters = []
145
- else:
146
- parameters = dataset.connector.parameters_values
147
-
148
- is_adt = "AZURE_DIGITAL_TWINS_URL" in parameters
149
- is_storage = "AZURE_STORAGE_CONTAINER_BLOB_PREFIX" in parameters
150
- is_legacy_twin_cache = "TWIN_CACHE_NAME" in parameters and dataset.twingraph_id is None
151
- is_in_workspace_file = (
152
- False if dataset.tags is None else "workspaceFile" in dataset.tags or "dataset_part" in dataset.tags
153
- )
154
-
155
- # Download based on dataset type
156
- if is_adt:
157
- content, folder_path = download_adt_dataset(
158
- adt_address=parameters["AZURE_DIGITAL_TWINS_URL"],
159
- credentials=DefaultAzureCredential(),
160
- )
161
- return {
162
- "type": "adt",
163
- "content": content,
164
- "name": dataset.name,
165
- "folder_path": str(folder_path),
166
- "dataset_id": dataset_id,
167
- }
168
-
169
- elif is_legacy_twin_cache:
170
- twin_cache_name = parameters["TWIN_CACHE_NAME"]
171
- content, folder_path = download_legacy_twingraph_dataset(
172
- organization_id=organization_id, cache_name=twin_cache_name
173
- )
174
- return {
175
- "type": "twincache",
176
- "content": content,
177
- "name": dataset.name,
178
- "folder_path": str(folder_path),
179
- "dataset_id": dataset_id,
180
- }
181
-
182
- elif is_storage:
183
- _file_name = parameters["AZURE_STORAGE_CONTAINER_BLOB_PREFIX"].replace("%WORKSPACE_FILE%/", "")
184
- content, folder_path = download_file_dataset(
185
- organization_id=organization_id,
186
- workspace_id=workspace_id,
187
- file_name=_file_name,
188
- read_files=read_files,
189
- )
190
- return {
191
- "type": _file_name.split(".")[-1],
192
- "content": content,
193
- "name": dataset.name,
194
- "folder_path": str(folder_path),
195
- "dataset_id": dataset_id,
196
- "file_name": _file_name,
197
- }
198
-
199
- elif is_in_workspace_file:
200
- _file_name = dataset.source.location
201
- content, folder_path = download_file_dataset(
202
- organization_id=organization_id,
203
- workspace_id=workspace_id,
204
- file_name=_file_name,
205
- read_files=read_files,
206
- )
207
- return {
208
- "type": _file_name.split(".")[-1],
209
- "content": content,
210
- "name": dataset.name,
211
- "folder_path": str(folder_path),
212
- "dataset_id": dataset_id,
213
- "file_name": _file_name,
214
- }
215
-
216
- else:
217
- content, folder_path = download_twingraph_dataset(organization_id=organization_id, dataset_id=dataset_id)
218
- return {
219
- "type": "twincache",
220
- "content": content,
221
- "name": dataset.name,
222
- "folder_path": str(folder_path),
223
- "dataset_id": dataset_id,
224
- }
225
-
226
-
227
- def download_dataset_process(_dataset_id, organization_id, workspace_id, read_files, _return_dict, _error_dict):
228
- """
229
- Process function for downloading a dataset in a separate process.
230
-
231
- This function is designed to be used with multiprocessing to download datasets in parallel.
232
- It downloads a single dataset and stores the result in a shared dictionary.
233
- If an error occurs, it stores the error message in a shared error dictionary and re-raises the exception.
234
-
235
- Args:
236
- _dataset_id: Dataset ID to download
237
- organization_id: Organization ID
238
- workspace_id: Workspace ID
239
- read_files: Whether to read file contents
240
- _return_dict: Shared dictionary to store successful download results
241
- _error_dict: Shared dictionary to store error messages
242
-
243
- Raises:
244
- Exception: Any exception that occurs during dataset download is re-raised
245
- """
246
- try:
247
- _c = download_dataset(
248
- organization_id=organization_id,
249
- workspace_id=workspace_id,
250
- dataset_id=_dataset_id,
251
- read_files=read_files,
252
- )
253
- _return_dict[_dataset_id] = _c
254
- except Exception as e:
255
- _error_dict[_dataset_id] = f"{type(e).__name__}: {str(e)}"
256
- raise e
257
-
258
-
259
- def download_datasets_parallel(
260
- organization_id: str,
261
- workspace_id: str,
262
- dataset_ids: List[str],
263
- read_files: bool = True,
264
- ) -> Dict[str, Dict[str, Any]]:
265
- """
266
- Download multiple datasets in parallel.
267
-
268
- Args:
269
- organization_id: Organization ID
270
- workspace_id: Workspace ID
271
- dataset_ids: List of dataset IDs
272
- read_files: Whether to read file contents
273
-
274
- Returns:
275
- Dictionary mapping dataset IDs to dataset information
276
- """
277
-
278
- # Use multiprocessing to download datasets in parallel
279
- manager = multiprocessing.Manager()
280
- return_dict = manager.dict()
281
- error_dict = manager.dict()
282
- processes = [
283
- (
284
- dataset_id,
285
- multiprocessing.Process(
286
- target=download_dataset_process,
287
- args=(dataset_id, organization_id, workspace_id, read_files, return_dict, error_dict),
288
- ),
289
- )
290
- for dataset_id in dataset_ids
291
- ]
292
-
293
- LOGGER.info(T("coal.services.dataset.parallel_download").format(count=len(dataset_ids)))
294
-
295
- [p.start() for _, p in processes]
296
- [p.join() for _, p in processes]
297
-
298
- for dataset_id, p in processes:
299
- # We might hit the following bug: https://bugs.python.org/issue43944
300
- # As a workaround, only treat non-null exit code as a real issue if we also have stored an error
301
- # message
302
- if p.exitcode != 0 and dataset_id in error_dict:
303
- raise ChildProcessError(f"Failed to download dataset '{dataset_id}': {error_dict[dataset_id]}")
304
-
305
- return dict(return_dict)
306
-
307
-
308
- def download_datasets_sequential(
309
- organization_id: str,
310
- workspace_id: str,
311
- dataset_ids: List[str],
312
- read_files: bool = True,
313
- ) -> Dict[str, Dict[str, Any]]:
314
- """
315
- Download multiple datasets sequentially.
316
-
317
- Args:
318
- organization_id: Organization ID
319
- workspace_id: Workspace ID
320
- dataset_ids: List of dataset IDs
321
- read_files: Whether to read file contents
322
-
323
- Returns:
324
- Dictionary mapping dataset IDs to dataset information
325
- """
326
-
327
- return_dict = {}
328
- error_dict = {}
329
-
330
- LOGGER.info(T("coal.services.dataset.sequential_download").format(count=len(dataset_ids)))
331
-
332
- for dataset_id in dataset_ids:
333
- try:
334
- return_dict[dataset_id] = download_dataset(
335
- organization_id=organization_id,
336
- workspace_id=workspace_id,
337
- dataset_id=dataset_id,
338
- read_files=read_files,
339
- )
340
- except Exception as e:
341
- error_dict[dataset_id] = f"{type(e).__name__}: {str(e)}"
342
- raise ChildProcessError(f"Failed to download dataset '{dataset_id}': {error_dict.get(dataset_id, '')}")
343
-
344
- return return_dict
345
-
346
-
347
- def download_datasets(
348
- organization_id: str,
349
- workspace_id: str,
350
- dataset_ids: List[str],
351
- read_files: bool = True,
352
- parallel: bool = True,
353
- ) -> Dict[str, Dict[str, Any]]:
354
- """
355
- Download multiple datasets, either in parallel or sequentially.
356
-
357
- Args:
358
- organization_id: Organization ID
359
- workspace_id: Workspace ID
360
- dataset_ids: List of dataset IDs
361
- read_files: Whether to read file contents
362
- parallel: Whether to download in parallel
363
-
364
- Returns:
365
- Dictionary mapping dataset IDs to dataset information
366
- """
367
- if not dataset_ids:
368
- return {}
369
-
370
- if parallel and len(dataset_ids) > 1:
371
- return download_datasets_parallel(
372
- organization_id=organization_id,
373
- workspace_id=workspace_id,
374
- dataset_ids=dataset_ids,
375
- read_files=read_files,
376
- )
377
- else:
378
- return download_datasets_sequential(
379
- organization_id=organization_id,
380
- workspace_id=workspace_id,
381
- dataset_ids=dataset_ids,
382
- read_files=read_files,
383
- )
384
-
385
-
386
- def dataset_to_file(dataset_info: Dict[str, Any], target_folder: Optional[Union[str, Path]] = None) -> str:
387
- """
388
- Convert dataset to files.
389
-
390
- Args:
391
- dataset_info: Dataset information dictionary
392
- target_folder: Optional folder to save files (if None, uses temp dir)
393
-
394
- Returns:
395
- Path to folder containing files
396
- """
397
- dataset_type = dataset_info["type"]
398
- content = dataset_info["content"]
399
-
400
- if dataset_type in ["adt", "twincache"]:
401
- # Use conversion function
402
- if target_folder:
403
- target_folder = convert_graph_dataset_to_files(content, target_folder)
404
- else:
405
- target_folder = convert_graph_dataset_to_files(content)
406
- return str(target_folder)
407
-
408
- # For file datasets, return the folder path
409
- if "folder_path" in dataset_info:
410
- return dataset_info["folder_path"]
411
-
412
- # Fallback to creating a temp directory
413
- if target_folder:
414
- return str(target_folder)
415
- else:
416
- return tempfile.mkdtemp()
@@ -1,135 +0,0 @@
1
- # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
- # This document and all information contained herein is the exclusive property -
3
- # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
- # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
- # etc., to any person is prohibited unless it has been previously and
6
- # specifically authorized by written means by Cosmo Tech.
7
-
8
- """
9
- Orchestration functions for downloading runner and run data.
10
- """
11
-
12
- import os
13
- import pathlib
14
- import shutil
15
- from typing import Dict, Any, Optional
16
-
17
- from cosmotech.coal.cosmotech_api.runner.data import get_runner_data
18
- from cosmotech.coal.cosmotech_api.runner.parameters import (
19
- format_parameters_list,
20
- write_parameters,
21
- )
22
- from cosmotech.coal.cosmotech_api.runner.datasets import (
23
- get_dataset_ids_from_runner,
24
- download_datasets,
25
- dataset_to_file,
26
- )
27
- from cosmotech.coal.utils.logger import LOGGER
28
- from cosmotech.orchestrator.utils.translate import T
29
-
30
-
31
- def download_runner_data(
32
- organization_id: str,
33
- workspace_id: str,
34
- runner_id: str,
35
- parameter_folder: str,
36
- dataset_folder: Optional[str] = None,
37
- read_files: bool = False,
38
- parallel: bool = True,
39
- write_json: bool = True,
40
- write_csv: bool = False,
41
- fetch_dataset: bool = True,
42
- ) -> Dict[str, Any]:
43
- """
44
- Download all runner data including datasets and parameters.
45
-
46
- Args:
47
- organization_id: Organization ID
48
- workspace_id: Workspace ID
49
- runner_id: Runner ID
50
- parameter_folder: Folder to save parameters
51
- dataset_folder: Folder to save datasets (if None, only saves datasets referenced by parameters)
52
- read_files: Whether to read file contents
53
- parallel: Whether to download datasets in parallel
54
- write_json: Whether to write parameters as JSON
55
- write_csv: Whether to write parameters as CSV
56
- fetch_dataset: Whether to fetch datasets
57
-
58
- Returns:
59
- Dictionary with runner data, datasets, and parameters
60
- """
61
- LOGGER.info(T("coal.cosmotech_api.runner.starting_download"))
62
-
63
- # Get runner data
64
- runner_data = get_runner_data(organization_id, workspace_id, runner_id)
65
-
66
- # Create result dictionary
67
- result = {"runner_data": runner_data, "datasets": {}, "parameters": {}}
68
-
69
- # Skip if no parameters found
70
- if not runner_data.parameters_values:
71
- LOGGER.warning(T("coal.cosmotech_api.runner.no_parameters"))
72
- return result
73
-
74
- LOGGER.info(T("coal.cosmotech_api.runner.loaded_data"))
75
-
76
- # Format parameters
77
- parameters = format_parameters_list(runner_data)
78
- result["parameters"] = {param["parameterId"]: param["value"] for param in parameters}
79
-
80
- # Download datasets if requested
81
- if fetch_dataset:
82
- dataset_ids = get_dataset_ids_from_runner(runner_data)
83
-
84
- if dataset_ids:
85
- LOGGER.info(T("coal.cosmotech_api.runner.downloading_datasets").format(count=len(dataset_ids)))
86
-
87
- datasets = download_datasets(
88
- organization_id=organization_id,
89
- workspace_id=workspace_id,
90
- dataset_ids=dataset_ids,
91
- read_files=read_files,
92
- parallel=parallel,
93
- )
94
-
95
- result["datasets"] = datasets
96
-
97
- # List datasets set as parameter
98
- datasets_parameters_ids = {
99
- param.value: param.parameter_id
100
- for param in runner_data.parameters_values
101
- if param.var_type == "%DATASETID%" and param.value
102
- }
103
-
104
- # Save parameter datasets to parameter folders
105
- for dataset_id, dataset_info in datasets.items():
106
- # If dataset is referenced by a parameter, save to parameter folder
107
- if dataset_id in datasets_parameters_ids:
108
- param_id = datasets_parameters_ids[dataset_id]
109
- param_dir = os.path.join(parameter_folder, param_id)
110
- pathlib.Path(param_dir).mkdir(exist_ok=True, parents=True)
111
-
112
- dataset_folder_path = dataset_to_file(dataset_info)
113
- shutil.copytree(dataset_folder_path, param_dir, dirs_exist_ok=True)
114
-
115
- # Update parameter value to point to the folder
116
- for param in parameters:
117
- if param["parameterId"] == param_id:
118
- param["value"] = param_dir
119
- break
120
-
121
- # If dataset is in dataset_list and dataset_folder is provided, save there too
122
- if dataset_folder and dataset_id in runner_data.dataset_list:
123
- pathlib.Path(dataset_folder).mkdir(parents=True, exist_ok=True)
124
- dataset_folder_path = dataset_to_file(dataset_info)
125
- shutil.copytree(dataset_folder_path, dataset_folder, dirs_exist_ok=True)
126
- LOGGER.debug(
127
- T("coal.cosmotech_api.runner.dataset_debug").format(folder=dataset_folder, id=dataset_id)
128
- )
129
-
130
- # Write parameters to files
131
- if write_json or write_csv:
132
- LOGGER.info(T("coal.cosmotech_api.runner.writing_parameters"))
133
- write_parameters(parameter_folder, parameters, write_csv, write_json)
134
-
135
- return result
@@ -1,42 +0,0 @@
1
- # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
- # This document and all information contained herein is the exclusive property -
3
- # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
- # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
- # etc., to any person is prohibited unless it has been previously and
6
- # specifically authorized by written means by Cosmo Tech.
7
-
8
- """
9
- Runner metadata retrieval functions.
10
- """
11
-
12
- from typing import Any, Optional
13
-
14
- import cosmotech_api
15
-
16
-
17
- def get_runner_metadata(
18
- api_client: cosmotech_api.api_client.ApiClient,
19
- organization_id: str,
20
- workspace_id: str,
21
- runner_id: str,
22
- include: Optional[list[str]] = None,
23
- exclude: Optional[list[str]] = None,
24
- ) -> dict[str, Any]:
25
- """
26
- Get runner metadata from the API.
27
-
28
- Args:
29
- api_client: The API client to use
30
- organization_id: The ID of the organization
31
- workspace_id: The ID of the workspace
32
- runner_id: The ID of the runner
33
- include: Optional list of fields to include
34
- exclude: Optional list of fields to exclude
35
-
36
- Returns:
37
- Dictionary with runner metadata
38
- """
39
- runner_api = cosmotech_api.RunnerApi(api_client)
40
- runner: cosmotech_api.Runner = runner_api.get_runner(organization_id, workspace_id, runner_id)
41
-
42
- return runner.model_dump(by_alias=True, exclude_none=True, include=include, exclude=exclude, mode="json")