cosmotech-acceleration-library 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +8 -0
- cosmotech/coal/aws/__init__.py +23 -0
- cosmotech/coal/aws/s3.py +235 -0
- cosmotech/coal/azure/__init__.py +23 -0
- cosmotech/coal/azure/adx/__init__.py +26 -0
- cosmotech/coal/azure/adx/auth.py +125 -0
- cosmotech/coal/azure/adx/ingestion.py +329 -0
- cosmotech/coal/azure/adx/query.py +56 -0
- cosmotech/coal/azure/adx/runner.py +217 -0
- cosmotech/coal/azure/adx/store.py +255 -0
- cosmotech/coal/azure/adx/tables.py +118 -0
- cosmotech/coal/azure/adx/utils.py +71 -0
- cosmotech/coal/azure/blob.py +109 -0
- cosmotech/coal/azure/functions.py +72 -0
- cosmotech/coal/azure/storage.py +74 -0
- cosmotech/coal/cosmotech_api/__init__.py +36 -0
- cosmotech/coal/cosmotech_api/connection.py +96 -0
- cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
- cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
- cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
- cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
- cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
- cosmotech/coal/cosmotech_api/parameters.py +48 -0
- cosmotech/coal/cosmotech_api/run.py +25 -0
- cosmotech/coal/cosmotech_api/run_data.py +173 -0
- cosmotech/coal/cosmotech_api/run_template.py +108 -0
- cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
- cosmotech/coal/cosmotech_api/runner/data.py +38 -0
- cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
- cosmotech/coal/cosmotech_api/runner/download.py +146 -0
- cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
- cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
- cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
- cosmotech/coal/cosmotech_api/workspace.py +127 -0
- cosmotech/coal/csm/__init__.py +6 -0
- cosmotech/coal/csm/engine/__init__.py +47 -0
- cosmotech/coal/postgresql/__init__.py +22 -0
- cosmotech/coal/postgresql/runner.py +93 -0
- cosmotech/coal/postgresql/store.py +98 -0
- cosmotech/coal/singlestore/__init__.py +17 -0
- cosmotech/coal/singlestore/store.py +100 -0
- cosmotech/coal/store/__init__.py +42 -0
- cosmotech/coal/store/csv.py +44 -0
- cosmotech/coal/store/native_python.py +25 -0
- cosmotech/coal/store/pandas.py +26 -0
- cosmotech/coal/store/pyarrow.py +23 -0
- cosmotech/coal/store/store.py +79 -0
- cosmotech/coal/utils/__init__.py +18 -0
- cosmotech/coal/utils/api.py +68 -0
- cosmotech/coal/utils/logger.py +10 -0
- cosmotech/coal/utils/postgresql.py +236 -0
- cosmotech/csm_data/__init__.py +6 -0
- cosmotech/csm_data/commands/__init__.py +6 -0
- cosmotech/csm_data/commands/adx_send_data.py +92 -0
- cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
- cosmotech/csm_data/commands/api/__init__.py +6 -0
- cosmotech/csm_data/commands/api/api.py +50 -0
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
- cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
- cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
- cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
- cosmotech/csm_data/commands/api/run_load_data.py +120 -0
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
- cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
- cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
- cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
- cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
- cosmotech/csm_data/commands/az_storage_upload.py +76 -0
- cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
- cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
- cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
- cosmotech/csm_data/commands/store/__init__.py +6 -0
- cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
- cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
- cosmotech/csm_data/commands/store/list_tables.py +48 -0
- cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
- cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
- cosmotech/csm_data/commands/store/reset.py +31 -0
- cosmotech/csm_data/commands/store/store.py +37 -0
- cosmotech/csm_data/main.py +57 -0
- cosmotech/csm_data/utils/__init__.py +6 -0
- cosmotech/csm_data/utils/click.py +18 -0
- cosmotech/csm_data/utils/decorators.py +75 -0
- cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
- cosmotech/translation/coal/__init__.py +6 -0
- cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
- cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
- cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
- cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
- cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
- cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
- cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
- cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
- cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
- cosmotech/translation/coal/en-US/coal/web.yml +2 -0
- cosmotech/translation/csm_data/__init__.py +6 -0
- cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
- cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
- cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
- cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
- cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
- cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
- cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import csv
|
|
9
|
+
import io
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import tempfile
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Any, Optional, Union, Tuple
|
|
16
|
+
|
|
17
|
+
from cosmotech_api import WorkspaceApi
|
|
18
|
+
from openpyxl import load_workbook
|
|
19
|
+
|
|
20
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
21
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
22
|
+
from cosmotech.coal.cosmotech_api.connection import get_api_client
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def download_file_dataset(
|
|
26
|
+
organization_id: str,
|
|
27
|
+
workspace_id: str,
|
|
28
|
+
file_name: str,
|
|
29
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
30
|
+
read_files: bool = True,
|
|
31
|
+
) -> Tuple[Dict[str, Any], Path]:
|
|
32
|
+
"""
|
|
33
|
+
Download file dataset.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
organization_id: Organization ID
|
|
37
|
+
workspace_id: Workspace ID
|
|
38
|
+
file_name: File name to download
|
|
39
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
40
|
+
read_files: Whether to read file contents
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Tuple of (content dict, folder path)
|
|
44
|
+
"""
|
|
45
|
+
start_time = time.time()
|
|
46
|
+
LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="File"))
|
|
47
|
+
LOGGER.debug(
|
|
48
|
+
T("coal.services.dataset.file_downloading").format(
|
|
49
|
+
organization_id=organization_id,
|
|
50
|
+
workspace_id=workspace_id,
|
|
51
|
+
file_name=file_name,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Create temp directory for downloaded files
|
|
56
|
+
if target_folder is None:
|
|
57
|
+
tmp_dataset_dir = tempfile.mkdtemp()
|
|
58
|
+
else:
|
|
59
|
+
tmp_dataset_dir = Path(target_folder)
|
|
60
|
+
tmp_dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
tmp_dataset_dir = str(tmp_dataset_dir)
|
|
62
|
+
|
|
63
|
+
LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=tmp_dataset_dir))
|
|
64
|
+
|
|
65
|
+
content = dict()
|
|
66
|
+
|
|
67
|
+
with get_api_client()[0] as api_client:
|
|
68
|
+
api_ws = WorkspaceApi(api_client)
|
|
69
|
+
|
|
70
|
+
# Find all files matching the pattern
|
|
71
|
+
list_start = time.time()
|
|
72
|
+
LOGGER.debug(T("coal.services.dataset.listing_workspace_files"))
|
|
73
|
+
all_api_files = api_ws.find_all_workspace_files(organization_id, workspace_id)
|
|
74
|
+
|
|
75
|
+
existing_files = list(_f.file_name for _f in all_api_files if _f.file_name.startswith(file_name))
|
|
76
|
+
list_time = time.time() - list_start
|
|
77
|
+
|
|
78
|
+
LOGGER.debug(T("coal.services.dataset.workspace_files_found").format(count=len(existing_files)))
|
|
79
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="file listing", time=list_time))
|
|
80
|
+
|
|
81
|
+
if not existing_files:
|
|
82
|
+
LOGGER.warning(T("coal.services.dataset.no_files_found").format(file_name=file_name))
|
|
83
|
+
return content, Path(tmp_dataset_dir)
|
|
84
|
+
|
|
85
|
+
# Download and process each file
|
|
86
|
+
for _file_name in existing_files:
|
|
87
|
+
download_start = time.time()
|
|
88
|
+
LOGGER.debug(T("coal.services.dataset.downloading_file").format(file_name=_file_name))
|
|
89
|
+
|
|
90
|
+
dl_file = api_ws.download_workspace_file(
|
|
91
|
+
organization_id=organization_id,
|
|
92
|
+
workspace_id=workspace_id,
|
|
93
|
+
file_name=_file_name,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
target_file = os.path.join(tmp_dataset_dir, _file_name.split("/")[-1])
|
|
97
|
+
with open(target_file, "wb") as tmp_file:
|
|
98
|
+
tmp_file.write(dl_file)
|
|
99
|
+
|
|
100
|
+
download_time = time.time() - download_start
|
|
101
|
+
LOGGER.debug(T("coal.services.dataset.file_downloaded").format(file_name=_file_name, path=target_file))
|
|
102
|
+
LOGGER.debug(
|
|
103
|
+
T("coal.common.timing.operation_completed").format(
|
|
104
|
+
operation=f"download {_file_name}", time=download_time
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if not read_files:
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# Process file based on type
|
|
112
|
+
process_start = time.time()
|
|
113
|
+
|
|
114
|
+
if ".xls" in _file_name:
|
|
115
|
+
LOGGER.debug(T("coal.services.dataset.processing_excel").format(file_name=target_file))
|
|
116
|
+
wb = load_workbook(target_file, data_only=True)
|
|
117
|
+
|
|
118
|
+
for sheet_name in wb.sheetnames:
|
|
119
|
+
sheet = wb[sheet_name]
|
|
120
|
+
content[sheet_name] = list()
|
|
121
|
+
headers = next(sheet.iter_rows(max_row=1, values_only=True))
|
|
122
|
+
|
|
123
|
+
def item(_row: tuple) -> dict:
|
|
124
|
+
return {k: v for k, v in zip(headers, _row)}
|
|
125
|
+
|
|
126
|
+
row_count = 0
|
|
127
|
+
for r in sheet.iter_rows(min_row=2, values_only=True):
|
|
128
|
+
row = item(r)
|
|
129
|
+
new_row = dict()
|
|
130
|
+
|
|
131
|
+
for key, value in row.items():
|
|
132
|
+
try:
|
|
133
|
+
converted_value = json.load(io.StringIO(value))
|
|
134
|
+
except (json.decoder.JSONDecodeError, TypeError):
|
|
135
|
+
converted_value = value
|
|
136
|
+
|
|
137
|
+
if converted_value is not None:
|
|
138
|
+
new_row[key] = converted_value
|
|
139
|
+
|
|
140
|
+
if new_row:
|
|
141
|
+
content[sheet_name].append(new_row)
|
|
142
|
+
row_count += 1
|
|
143
|
+
|
|
144
|
+
LOGGER.debug(
|
|
145
|
+
T("coal.services.dataset.sheet_processed").format(sheet_name=sheet_name, rows=row_count)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
elif ".csv" in _file_name:
|
|
149
|
+
LOGGER.debug(T("coal.services.dataset.processing_csv").format(file_name=target_file))
|
|
150
|
+
with open(target_file, "r") as file:
|
|
151
|
+
current_filename = os.path.basename(target_file)[: -len(".csv")]
|
|
152
|
+
content[current_filename] = list()
|
|
153
|
+
|
|
154
|
+
row_count = 0
|
|
155
|
+
for csv_row in csv.DictReader(file):
|
|
156
|
+
csv_row: dict
|
|
157
|
+
new_row = dict()
|
|
158
|
+
|
|
159
|
+
for key, value in csv_row.items():
|
|
160
|
+
try:
|
|
161
|
+
# Try to convert any json row to dict object
|
|
162
|
+
converted_value = json.load(io.StringIO(value))
|
|
163
|
+
except json.decoder.JSONDecodeError:
|
|
164
|
+
converted_value = value
|
|
165
|
+
|
|
166
|
+
if converted_value == "":
|
|
167
|
+
converted_value = None
|
|
168
|
+
|
|
169
|
+
if converted_value is not None:
|
|
170
|
+
new_row[key] = converted_value
|
|
171
|
+
|
|
172
|
+
content[current_filename].append(new_row)
|
|
173
|
+
row_count += 1
|
|
174
|
+
|
|
175
|
+
LOGGER.debug(
|
|
176
|
+
T("coal.services.dataset.csv_processed").format(file_name=current_filename, rows=row_count)
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
elif ".json" in _file_name:
|
|
180
|
+
LOGGER.debug(T("coal.services.dataset.processing_json").format(file_name=target_file))
|
|
181
|
+
with open(target_file, "r") as _file:
|
|
182
|
+
current_filename = os.path.basename(target_file)
|
|
183
|
+
content[current_filename] = json.load(_file)
|
|
184
|
+
|
|
185
|
+
if isinstance(content[current_filename], dict):
|
|
186
|
+
item_count = len(content[current_filename])
|
|
187
|
+
elif isinstance(content[current_filename], list):
|
|
188
|
+
item_count = len(content[current_filename])
|
|
189
|
+
else:
|
|
190
|
+
item_count = 1
|
|
191
|
+
|
|
192
|
+
LOGGER.debug(
|
|
193
|
+
T("coal.services.dataset.json_processed").format(file_name=current_filename, items=item_count)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
else:
|
|
197
|
+
LOGGER.debug(T("coal.services.dataset.processing_text").format(file_name=target_file))
|
|
198
|
+
with open(target_file, "r") as _file:
|
|
199
|
+
current_filename = os.path.basename(target_file)
|
|
200
|
+
content[current_filename] = "\n".join(line for line in _file)
|
|
201
|
+
|
|
202
|
+
line_count = content[current_filename].count("\n") + 1
|
|
203
|
+
LOGGER.debug(
|
|
204
|
+
T("coal.services.dataset.text_processed").format(file_name=current_filename, lines=line_count)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
process_time = time.time() - process_start
|
|
208
|
+
LOGGER.debug(
|
|
209
|
+
T("coal.common.timing.operation_completed").format(operation=f"process {_file_name}", time=process_time)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
elapsed_time = time.time() - start_time
|
|
213
|
+
LOGGER.info(T("coal.common.timing.operation_completed").format(operation="File download", time=elapsed_time))
|
|
214
|
+
LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="File"))
|
|
215
|
+
|
|
216
|
+
return content, Path(tmp_dataset_dir)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Any, Optional, Union, Tuple
|
|
12
|
+
|
|
13
|
+
from cosmotech_api import (
|
|
14
|
+
DatasetApi,
|
|
15
|
+
DatasetTwinGraphQuery,
|
|
16
|
+
TwinGraphQuery,
|
|
17
|
+
TwingraphApi,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
21
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
22
|
+
from cosmotech.coal.cosmotech_api.connection import get_api_client
|
|
23
|
+
from cosmotech.coal.cosmotech_api.dataset.utils import get_content_from_twin_graph_data
|
|
24
|
+
from cosmotech.coal.cosmotech_api.dataset.converters import convert_dataset_to_files
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def download_twingraph_dataset(
|
|
28
|
+
organization_id: str,
|
|
29
|
+
dataset_id: str,
|
|
30
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
31
|
+
) -> Tuple[Dict[str, Any], Path]:
|
|
32
|
+
"""
|
|
33
|
+
Download dataset from TwinGraph.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
organization_id: Organization ID
|
|
37
|
+
dataset_id: Dataset ID
|
|
38
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Tuple of (content dict, folder path)
|
|
42
|
+
"""
|
|
43
|
+
start_time = time.time()
|
|
44
|
+
LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="TwinGraph"))
|
|
45
|
+
LOGGER.debug(
|
|
46
|
+
T("coal.services.dataset.twingraph_downloading").format(organization_id=organization_id, dataset_id=dataset_id)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
with get_api_client()[0] as api_client:
|
|
50
|
+
dataset_api = DatasetApi(api_client)
|
|
51
|
+
|
|
52
|
+
# Query nodes
|
|
53
|
+
nodes_start = time.time()
|
|
54
|
+
LOGGER.debug(T("coal.services.dataset.twingraph_querying_nodes").format(dataset_id=dataset_id))
|
|
55
|
+
nodes_query = DatasetTwinGraphQuery(query="MATCH(n) RETURN n")
|
|
56
|
+
|
|
57
|
+
nodes = dataset_api.twingraph_query(
|
|
58
|
+
organization_id=organization_id,
|
|
59
|
+
dataset_id=dataset_id,
|
|
60
|
+
dataset_twin_graph_query=nodes_query,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
nodes_time = time.time() - nodes_start
|
|
64
|
+
LOGGER.debug(T("coal.services.dataset.twingraph_nodes_found").format(count=len(nodes)))
|
|
65
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="nodes query", time=nodes_time))
|
|
66
|
+
|
|
67
|
+
# Query edges
|
|
68
|
+
edges_start = time.time()
|
|
69
|
+
LOGGER.debug(T("coal.services.dataset.twingraph_querying_edges").format(dataset_id=dataset_id))
|
|
70
|
+
edges_query = DatasetTwinGraphQuery(query="MATCH(n)-[r]->(m) RETURN n as src, r as rel, m as dest")
|
|
71
|
+
|
|
72
|
+
edges = dataset_api.twingraph_query(
|
|
73
|
+
organization_id=organization_id,
|
|
74
|
+
dataset_id=dataset_id,
|
|
75
|
+
dataset_twin_graph_query=edges_query,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
edges_time = time.time() - edges_start
|
|
79
|
+
LOGGER.debug(T("coal.services.dataset.twingraph_edges_found").format(count=len(edges)))
|
|
80
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="edges query", time=edges_time))
|
|
81
|
+
|
|
82
|
+
# Process results
|
|
83
|
+
process_start = time.time()
|
|
84
|
+
content = get_content_from_twin_graph_data(nodes, edges, True)
|
|
85
|
+
process_time = time.time() - process_start
|
|
86
|
+
|
|
87
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="data processing", time=process_time))
|
|
88
|
+
|
|
89
|
+
# Convert to files if target_folder is provided
|
|
90
|
+
if target_folder:
|
|
91
|
+
dataset_info = {
|
|
92
|
+
"type": "twincache",
|
|
93
|
+
"content": content,
|
|
94
|
+
"name": f"TwinGraph Dataset {dataset_id}",
|
|
95
|
+
}
|
|
96
|
+
target_folder = convert_dataset_to_files(dataset_info, target_folder)
|
|
97
|
+
else:
|
|
98
|
+
target_folder = tempfile.mkdtemp()
|
|
99
|
+
|
|
100
|
+
elapsed_time = time.time() - start_time
|
|
101
|
+
LOGGER.info(T("coal.common.timing.operation_completed").format(operation="TwinGraph download", time=elapsed_time))
|
|
102
|
+
LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="TwinGraph"))
|
|
103
|
+
|
|
104
|
+
return content, Path(target_folder)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def download_legacy_twingraph_dataset(
|
|
108
|
+
organization_id: str,
|
|
109
|
+
cache_name: str,
|
|
110
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
111
|
+
) -> Tuple[Dict[str, Any], Path]:
|
|
112
|
+
"""
|
|
113
|
+
Download dataset from legacy TwinGraph.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
organization_id: Organization ID
|
|
117
|
+
cache_name: Twin cache name
|
|
118
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Tuple of (content dict, folder path)
|
|
122
|
+
"""
|
|
123
|
+
start_time = time.time()
|
|
124
|
+
LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="Legacy TwinGraph"))
|
|
125
|
+
LOGGER.debug(
|
|
126
|
+
T("coal.services.dataset.legacy_twingraph_downloading").format(
|
|
127
|
+
organization_id=organization_id, cache_name=cache_name
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
with get_api_client()[0] as api_client:
|
|
132
|
+
api_instance = TwingraphApi(api_client)
|
|
133
|
+
|
|
134
|
+
# Query nodes
|
|
135
|
+
nodes_start = time.time()
|
|
136
|
+
LOGGER.debug(T("coal.services.dataset.legacy_twingraph_querying_nodes").format(cache_name=cache_name))
|
|
137
|
+
_query_nodes = TwinGraphQuery(query="MATCH(n) RETURN n")
|
|
138
|
+
|
|
139
|
+
nodes = api_instance.query(
|
|
140
|
+
organization_id=organization_id,
|
|
141
|
+
graph_id=cache_name,
|
|
142
|
+
twin_graph_query=_query_nodes,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
nodes_time = time.time() - nodes_start
|
|
146
|
+
LOGGER.debug(T("coal.services.dataset.legacy_twingraph_nodes_found").format(count=len(nodes)))
|
|
147
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="nodes query", time=nodes_time))
|
|
148
|
+
|
|
149
|
+
# Query relationships
|
|
150
|
+
rel_start = time.time()
|
|
151
|
+
LOGGER.debug(T("coal.services.dataset.legacy_twingraph_querying_relations").format(cache_name=cache_name))
|
|
152
|
+
_query_rel = TwinGraphQuery(query="MATCH(n)-[r]->(m) RETURN n as src, r as rel, m as dest")
|
|
153
|
+
|
|
154
|
+
rel = api_instance.query(
|
|
155
|
+
organization_id=organization_id,
|
|
156
|
+
graph_id=cache_name,
|
|
157
|
+
twin_graph_query=_query_rel,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
rel_time = time.time() - rel_start
|
|
161
|
+
LOGGER.debug(T("coal.services.dataset.legacy_twingraph_relations_found").format(count=len(rel)))
|
|
162
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="relations query", time=rel_time))
|
|
163
|
+
|
|
164
|
+
# Process results
|
|
165
|
+
process_start = time.time()
|
|
166
|
+
content = get_content_from_twin_graph_data(nodes, rel, False)
|
|
167
|
+
process_time = time.time() - process_start
|
|
168
|
+
|
|
169
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="data processing", time=process_time))
|
|
170
|
+
|
|
171
|
+
# Convert to files if target_folder is provided
|
|
172
|
+
if target_folder:
|
|
173
|
+
dataset_info = {
|
|
174
|
+
"type": "twincache",
|
|
175
|
+
"content": content,
|
|
176
|
+
"name": f"Legacy TwinGraph Dataset {cache_name}",
|
|
177
|
+
}
|
|
178
|
+
target_folder = convert_dataset_to_files(dataset_info, target_folder)
|
|
179
|
+
else:
|
|
180
|
+
target_folder = tempfile.mkdtemp()
|
|
181
|
+
|
|
182
|
+
elapsed_time = time.time() - start_time
|
|
183
|
+
LOGGER.info(
|
|
184
|
+
T("coal.common.timing.operation_completed").format(operation="Legacy TwinGraph download", time=elapsed_time)
|
|
185
|
+
)
|
|
186
|
+
LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="Legacy TwinGraph"))
|
|
187
|
+
|
|
188
|
+
return content, Path(target_folder)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Any
|
|
9
|
+
|
|
10
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
11
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_content_from_twin_graph_data(
|
|
15
|
+
nodes: List[Dict], relationships: List[Dict], restore_names: bool = False
|
|
16
|
+
) -> Dict[str, List[Dict]]:
|
|
17
|
+
"""
|
|
18
|
+
Extract content from twin graph data.
|
|
19
|
+
|
|
20
|
+
When restore_names is True, the "id" value inside the "properties" field in the cypher query response is used
|
|
21
|
+
instead of the numerical id found in the "id" field. When restore_names is set to False, this function
|
|
22
|
+
keeps the previous behavior implemented when adding support for twingraph in v2 (default: False)
|
|
23
|
+
|
|
24
|
+
Example with a sample of cypher response:
|
|
25
|
+
[{
|
|
26
|
+
n: {
|
|
27
|
+
id: "50" <-- this id is used if restore_names is False
|
|
28
|
+
label: "Customer"
|
|
29
|
+
properties: {
|
|
30
|
+
Satisfaction: 0
|
|
31
|
+
SurroundingSatisfaction: 0
|
|
32
|
+
Thirsty: false
|
|
33
|
+
id: "Lars_Coret" <-- this id is used if restore_names is True
|
|
34
|
+
}
|
|
35
|
+
type: "NODE"
|
|
36
|
+
}
|
|
37
|
+
}]
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
nodes: List of node data from cypher query
|
|
41
|
+
relationships: List of relationship data from cypher query
|
|
42
|
+
restore_names: Whether to use property ID instead of node ID
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Dict mapping entity types to lists of entities
|
|
46
|
+
"""
|
|
47
|
+
LOGGER.debug(
|
|
48
|
+
T("coal.services.dataset.processing_graph_data").format(
|
|
49
|
+
nodes_count=len(nodes),
|
|
50
|
+
relationships_count=len(relationships),
|
|
51
|
+
restore_names=restore_names,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
content = dict()
|
|
56
|
+
# build keys
|
|
57
|
+
for item in relationships:
|
|
58
|
+
content[item["src"]["label"]] = list()
|
|
59
|
+
content[item["dest"]["label"]] = list()
|
|
60
|
+
content[item["rel"]["label"]] = list()
|
|
61
|
+
|
|
62
|
+
# Process nodes
|
|
63
|
+
for item in nodes:
|
|
64
|
+
label = item["n"]["label"]
|
|
65
|
+
props = item["n"]["properties"].copy() # Create a copy to avoid modifying the original
|
|
66
|
+
if not restore_names:
|
|
67
|
+
props.update({"id": item["n"]["id"]})
|
|
68
|
+
content.setdefault(label, list())
|
|
69
|
+
content[label].append(props)
|
|
70
|
+
|
|
71
|
+
# Process relationships
|
|
72
|
+
for item in relationships:
|
|
73
|
+
src = item["src"]
|
|
74
|
+
dest = item["dest"]
|
|
75
|
+
rel = item["rel"]
|
|
76
|
+
props = rel["properties"].copy() # Create a copy to avoid modifying the original
|
|
77
|
+
content[rel["label"]].append(
|
|
78
|
+
{
|
|
79
|
+
"id": rel["id"],
|
|
80
|
+
"source": src["properties"]["id"] if restore_names else src["id"],
|
|
81
|
+
"target": dest["properties"]["id"] if restore_names else dest["id"],
|
|
82
|
+
**props,
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Log the number of entities by type
|
|
87
|
+
for entity_type, entities in content.items():
|
|
88
|
+
LOGGER.debug(T("coal.services.dataset.entity_count").format(entity_type=entity_type, count=len(entities)))
|
|
89
|
+
|
|
90
|
+
return content
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def sheet_to_header(sheet_content: List[Dict]) -> List[str]:
|
|
94
|
+
"""
|
|
95
|
+
Extract header fields from sheet content.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
sheet_content: List of dictionaries representing sheet rows
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of field names with id, source, and target fields first if present
|
|
102
|
+
"""
|
|
103
|
+
LOGGER.debug(T("coal.services.dataset.extracting_headers").format(rows=len(sheet_content)))
|
|
104
|
+
|
|
105
|
+
fieldnames = []
|
|
106
|
+
has_src = False
|
|
107
|
+
has_id = False
|
|
108
|
+
|
|
109
|
+
for r in sheet_content:
|
|
110
|
+
for k in r.keys():
|
|
111
|
+
if k not in fieldnames:
|
|
112
|
+
if k in ["source", "target"]:
|
|
113
|
+
has_src = True
|
|
114
|
+
elif k == "id":
|
|
115
|
+
has_id = True
|
|
116
|
+
else:
|
|
117
|
+
fieldnames.append(k)
|
|
118
|
+
|
|
119
|
+
# Ensure source/target and id fields come first
|
|
120
|
+
if has_src:
|
|
121
|
+
fieldnames = ["source", "target"] + fieldnames
|
|
122
|
+
if has_id:
|
|
123
|
+
fieldnames = ["id"] + fieldnames
|
|
124
|
+
|
|
125
|
+
LOGGER.debug(
|
|
126
|
+
T("coal.services.dataset.headers_extracted").format(
|
|
127
|
+
count=len(fieldnames),
|
|
128
|
+
fields=", ".join(fieldnames[:5]) + ("..." if len(fieldnames) > 5 else ""),
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return fieldnames
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Parameter handling functions.
|
|
10
|
+
|
|
11
|
+
This module provides functions for handling parameters in solution templates.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import pathlib
|
|
17
|
+
from csv import DictWriter
|
|
18
|
+
from typing import List, Dict, Any
|
|
19
|
+
|
|
20
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
21
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_parameters(
|
|
25
|
+
parameter_folder: str, parameters: List[Dict[str, Any]], write_csv: bool, write_json: bool
|
|
26
|
+
) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Write parameters to CSV and/or JSON files.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
parameter_folder: The folder to write the parameters to
|
|
32
|
+
parameters: The parameters to write
|
|
33
|
+
write_csv: Whether to write the parameters to a CSV file
|
|
34
|
+
write_json: Whether to write the parameters to a JSON file
|
|
35
|
+
"""
|
|
36
|
+
if write_csv:
|
|
37
|
+
tmp_parameter_file = os.path.join(parameter_folder, "parameters.csv")
|
|
38
|
+
LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
|
|
39
|
+
with open(tmp_parameter_file, "w") as _file:
|
|
40
|
+
_w = DictWriter(_file, fieldnames=["parameterId", "value", "varType", "isInherited"])
|
|
41
|
+
_w.writeheader()
|
|
42
|
+
_w.writerows(parameters)
|
|
43
|
+
|
|
44
|
+
if write_json:
|
|
45
|
+
tmp_parameter_file = os.path.join(parameter_folder, "parameters.json")
|
|
46
|
+
LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
|
|
47
|
+
with open(tmp_parameter_file, "w") as _file:
|
|
48
|
+
json.dump(parameters, _file, indent=2)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
from typing import Any
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import cosmotech_api
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_run_metadata(
|
|
14
|
+
api_client: cosmotech_api.api_client.ApiClient,
|
|
15
|
+
organization_id: str,
|
|
16
|
+
workspace_id: str,
|
|
17
|
+
runner_id: str,
|
|
18
|
+
run_id: str,
|
|
19
|
+
include: Optional[list[str]] = None,
|
|
20
|
+
exclude: Optional[list[str]] = None,
|
|
21
|
+
) -> dict[str, Any]:
|
|
22
|
+
run_api = cosmotech_api.RunApi(api_client)
|
|
23
|
+
|
|
24
|
+
run: cosmotech_api.Run = run_api.get_run(organization_id, workspace_id, runner_id, run_id)
|
|
25
|
+
return run.model_dump(by_alias=True, exclude_none=True, include=include, exclude=exclude, mode="json")
|