cosmotech-acceleration-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. cosmotech/coal/__init__.py +8 -0
  2. cosmotech/coal/aws/__init__.py +23 -0
  3. cosmotech/coal/aws/s3.py +235 -0
  4. cosmotech/coal/azure/__init__.py +23 -0
  5. cosmotech/coal/azure/adx/__init__.py +26 -0
  6. cosmotech/coal/azure/adx/auth.py +125 -0
  7. cosmotech/coal/azure/adx/ingestion.py +329 -0
  8. cosmotech/coal/azure/adx/query.py +56 -0
  9. cosmotech/coal/azure/adx/runner.py +217 -0
  10. cosmotech/coal/azure/adx/store.py +255 -0
  11. cosmotech/coal/azure/adx/tables.py +118 -0
  12. cosmotech/coal/azure/adx/utils.py +71 -0
  13. cosmotech/coal/azure/blob.py +109 -0
  14. cosmotech/coal/azure/functions.py +72 -0
  15. cosmotech/coal/azure/storage.py +74 -0
  16. cosmotech/coal/cosmotech_api/__init__.py +36 -0
  17. cosmotech/coal/cosmotech_api/connection.py +96 -0
  18. cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
  19. cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
  20. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
  21. cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
  22. cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
  23. cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
  24. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
  25. cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
  26. cosmotech/coal/cosmotech_api/parameters.py +48 -0
  27. cosmotech/coal/cosmotech_api/run.py +25 -0
  28. cosmotech/coal/cosmotech_api/run_data.py +173 -0
  29. cosmotech/coal/cosmotech_api/run_template.py +108 -0
  30. cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
  31. cosmotech/coal/cosmotech_api/runner/data.py +38 -0
  32. cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
  33. cosmotech/coal/cosmotech_api/runner/download.py +146 -0
  34. cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
  35. cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
  36. cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
  37. cosmotech/coal/cosmotech_api/workspace.py +127 -0
  38. cosmotech/coal/csm/__init__.py +6 -0
  39. cosmotech/coal/csm/engine/__init__.py +47 -0
  40. cosmotech/coal/postgresql/__init__.py +22 -0
  41. cosmotech/coal/postgresql/runner.py +93 -0
  42. cosmotech/coal/postgresql/store.py +98 -0
  43. cosmotech/coal/singlestore/__init__.py +17 -0
  44. cosmotech/coal/singlestore/store.py +100 -0
  45. cosmotech/coal/store/__init__.py +42 -0
  46. cosmotech/coal/store/csv.py +44 -0
  47. cosmotech/coal/store/native_python.py +25 -0
  48. cosmotech/coal/store/pandas.py +26 -0
  49. cosmotech/coal/store/pyarrow.py +23 -0
  50. cosmotech/coal/store/store.py +79 -0
  51. cosmotech/coal/utils/__init__.py +18 -0
  52. cosmotech/coal/utils/api.py +68 -0
  53. cosmotech/coal/utils/logger.py +10 -0
  54. cosmotech/coal/utils/postgresql.py +236 -0
  55. cosmotech/csm_data/__init__.py +6 -0
  56. cosmotech/csm_data/commands/__init__.py +6 -0
  57. cosmotech/csm_data/commands/adx_send_data.py +92 -0
  58. cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
  59. cosmotech/csm_data/commands/api/__init__.py +6 -0
  60. cosmotech/csm_data/commands/api/api.py +50 -0
  61. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
  62. cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
  63. cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
  64. cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
  65. cosmotech/csm_data/commands/api/run_load_data.py +120 -0
  66. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
  67. cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
  68. cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
  69. cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
  70. cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
  71. cosmotech/csm_data/commands/az_storage_upload.py +76 -0
  72. cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
  73. cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
  74. cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
  75. cosmotech/csm_data/commands/store/__init__.py +6 -0
  76. cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
  77. cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
  78. cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
  79. cosmotech/csm_data/commands/store/list_tables.py +48 -0
  80. cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
  81. cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
  82. cosmotech/csm_data/commands/store/reset.py +31 -0
  83. cosmotech/csm_data/commands/store/store.py +37 -0
  84. cosmotech/csm_data/main.py +57 -0
  85. cosmotech/csm_data/utils/__init__.py +6 -0
  86. cosmotech/csm_data/utils/click.py +18 -0
  87. cosmotech/csm_data/utils/decorators.py +75 -0
  88. cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
  89. cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
  90. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
  91. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
  92. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
  93. cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
  94. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
  95. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
  96. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
  97. cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
  98. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
  99. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
  100. cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
  101. cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
  102. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
  103. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
  104. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
  105. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
  106. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
  107. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
  108. cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
  109. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
  110. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
  111. cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
  112. cosmotech/translation/coal/__init__.py +6 -0
  113. cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
  114. cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
  115. cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
  116. cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
  117. cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
  118. cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
  119. cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
  120. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
  121. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
  122. cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
  123. cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
  124. cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
  125. cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
  126. cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
  127. cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
  128. cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
  129. cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
  130. cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
  131. cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
  132. cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
  133. cosmotech/translation/coal/en-US/coal/web.yml +2 -0
  134. cosmotech/translation/csm_data/__init__.py +6 -0
  135. cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
  136. cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
  137. cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
  138. cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
  139. cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
  140. cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
  141. cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,216 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import csv
9
+ import io
10
+ import json
11
+ import os
12
+ import tempfile
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Dict, List, Any, Optional, Union, Tuple
16
+
17
+ from cosmotech_api import WorkspaceApi
18
+ from openpyxl import load_workbook
19
+
20
+ from cosmotech.coal.utils.logger import LOGGER
21
+ from cosmotech.orchestrator.utils.translate import T
22
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
23
+
24
+
25
+ def download_file_dataset(
26
+ organization_id: str,
27
+ workspace_id: str,
28
+ file_name: str,
29
+ target_folder: Optional[Union[str, Path]] = None,
30
+ read_files: bool = True,
31
+ ) -> Tuple[Dict[str, Any], Path]:
32
+ """
33
+ Download file dataset.
34
+
35
+ Args:
36
+ organization_id: Organization ID
37
+ workspace_id: Workspace ID
38
+ file_name: File name to download
39
+ target_folder: Optional folder to save files (if None, uses temp dir)
40
+ read_files: Whether to read file contents
41
+
42
+ Returns:
43
+ Tuple of (content dict, folder path)
44
+ """
45
+ start_time = time.time()
46
+ LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="File"))
47
+ LOGGER.debug(
48
+ T("coal.services.dataset.file_downloading").format(
49
+ organization_id=organization_id,
50
+ workspace_id=workspace_id,
51
+ file_name=file_name,
52
+ )
53
+ )
54
+
55
+ # Create temp directory for downloaded files
56
+ if target_folder is None:
57
+ tmp_dataset_dir = tempfile.mkdtemp()
58
+ else:
59
+ tmp_dataset_dir = Path(target_folder)
60
+ tmp_dataset_dir.mkdir(parents=True, exist_ok=True)
61
+ tmp_dataset_dir = str(tmp_dataset_dir)
62
+
63
+ LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=tmp_dataset_dir))
64
+
65
+ content = dict()
66
+
67
+ with get_api_client()[0] as api_client:
68
+ api_ws = WorkspaceApi(api_client)
69
+
70
+ # Find all files matching the pattern
71
+ list_start = time.time()
72
+ LOGGER.debug(T("coal.services.dataset.listing_workspace_files"))
73
+ all_api_files = api_ws.find_all_workspace_files(organization_id, workspace_id)
74
+
75
+ existing_files = list(_f.file_name for _f in all_api_files if _f.file_name.startswith(file_name))
76
+ list_time = time.time() - list_start
77
+
78
+ LOGGER.debug(T("coal.services.dataset.workspace_files_found").format(count=len(existing_files)))
79
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="file listing", time=list_time))
80
+
81
+ if not existing_files:
82
+ LOGGER.warning(T("coal.services.dataset.no_files_found").format(file_name=file_name))
83
+ return content, Path(tmp_dataset_dir)
84
+
85
+ # Download and process each file
86
+ for _file_name in existing_files:
87
+ download_start = time.time()
88
+ LOGGER.debug(T("coal.services.dataset.downloading_file").format(file_name=_file_name))
89
+
90
+ dl_file = api_ws.download_workspace_file(
91
+ organization_id=organization_id,
92
+ workspace_id=workspace_id,
93
+ file_name=_file_name,
94
+ )
95
+
96
+ target_file = os.path.join(tmp_dataset_dir, _file_name.split("/")[-1])
97
+ with open(target_file, "wb") as tmp_file:
98
+ tmp_file.write(dl_file)
99
+
100
+ download_time = time.time() - download_start
101
+ LOGGER.debug(T("coal.services.dataset.file_downloaded").format(file_name=_file_name, path=target_file))
102
+ LOGGER.debug(
103
+ T("coal.common.timing.operation_completed").format(
104
+ operation=f"download {_file_name}", time=download_time
105
+ )
106
+ )
107
+
108
+ if not read_files:
109
+ continue
110
+
111
+ # Process file based on type
112
+ process_start = time.time()
113
+
114
+ if ".xls" in _file_name:
115
+ LOGGER.debug(T("coal.services.dataset.processing_excel").format(file_name=target_file))
116
+ wb = load_workbook(target_file, data_only=True)
117
+
118
+ for sheet_name in wb.sheetnames:
119
+ sheet = wb[sheet_name]
120
+ content[sheet_name] = list()
121
+ headers = next(sheet.iter_rows(max_row=1, values_only=True))
122
+
123
+ def item(_row: tuple) -> dict:
124
+ return {k: v for k, v in zip(headers, _row)}
125
+
126
+ row_count = 0
127
+ for r in sheet.iter_rows(min_row=2, values_only=True):
128
+ row = item(r)
129
+ new_row = dict()
130
+
131
+ for key, value in row.items():
132
+ try:
133
+ converted_value = json.load(io.StringIO(value))
134
+ except (json.decoder.JSONDecodeError, TypeError):
135
+ converted_value = value
136
+
137
+ if converted_value is not None:
138
+ new_row[key] = converted_value
139
+
140
+ if new_row:
141
+ content[sheet_name].append(new_row)
142
+ row_count += 1
143
+
144
+ LOGGER.debug(
145
+ T("coal.services.dataset.sheet_processed").format(sheet_name=sheet_name, rows=row_count)
146
+ )
147
+
148
+ elif ".csv" in _file_name:
149
+ LOGGER.debug(T("coal.services.dataset.processing_csv").format(file_name=target_file))
150
+ with open(target_file, "r") as file:
151
+ current_filename = os.path.basename(target_file)[: -len(".csv")]
152
+ content[current_filename] = list()
153
+
154
+ row_count = 0
155
+ for csv_row in csv.DictReader(file):
156
+ csv_row: dict
157
+ new_row = dict()
158
+
159
+ for key, value in csv_row.items():
160
+ try:
161
+ # Try to convert any json row to dict object
162
+ converted_value = json.load(io.StringIO(value))
163
+ except json.decoder.JSONDecodeError:
164
+ converted_value = value
165
+
166
+ if converted_value == "":
167
+ converted_value = None
168
+
169
+ if converted_value is not None:
170
+ new_row[key] = converted_value
171
+
172
+ content[current_filename].append(new_row)
173
+ row_count += 1
174
+
175
+ LOGGER.debug(
176
+ T("coal.services.dataset.csv_processed").format(file_name=current_filename, rows=row_count)
177
+ )
178
+
179
+ elif ".json" in _file_name:
180
+ LOGGER.debug(T("coal.services.dataset.processing_json").format(file_name=target_file))
181
+ with open(target_file, "r") as _file:
182
+ current_filename = os.path.basename(target_file)
183
+ content[current_filename] = json.load(_file)
184
+
185
+ if isinstance(content[current_filename], dict):
186
+ item_count = len(content[current_filename])
187
+ elif isinstance(content[current_filename], list):
188
+ item_count = len(content[current_filename])
189
+ else:
190
+ item_count = 1
191
+
192
+ LOGGER.debug(
193
+ T("coal.services.dataset.json_processed").format(file_name=current_filename, items=item_count)
194
+ )
195
+
196
+ else:
197
+ LOGGER.debug(T("coal.services.dataset.processing_text").format(file_name=target_file))
198
+ with open(target_file, "r") as _file:
199
+ current_filename = os.path.basename(target_file)
200
+ content[current_filename] = "\n".join(line for line in _file)
201
+
202
+ line_count = content[current_filename].count("\n") + 1
203
+ LOGGER.debug(
204
+ T("coal.services.dataset.text_processed").format(file_name=current_filename, lines=line_count)
205
+ )
206
+
207
+ process_time = time.time() - process_start
208
+ LOGGER.debug(
209
+ T("coal.common.timing.operation_completed").format(operation=f"process {_file_name}", time=process_time)
210
+ )
211
+
212
+ elapsed_time = time.time() - start_time
213
+ LOGGER.info(T("coal.common.timing.operation_completed").format(operation="File download", time=elapsed_time))
214
+ LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="File"))
215
+
216
+ return content, Path(tmp_dataset_dir)
@@ -0,0 +1,188 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import time
9
+ import tempfile
10
+ from pathlib import Path
11
+ from typing import Dict, List, Any, Optional, Union, Tuple
12
+
13
+ from cosmotech_api import (
14
+ DatasetApi,
15
+ DatasetTwinGraphQuery,
16
+ TwinGraphQuery,
17
+ TwingraphApi,
18
+ )
19
+
20
+ from cosmotech.coal.utils.logger import LOGGER
21
+ from cosmotech.orchestrator.utils.translate import T
22
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
23
+ from cosmotech.coal.cosmotech_api.dataset.utils import get_content_from_twin_graph_data
24
+ from cosmotech.coal.cosmotech_api.dataset.converters import convert_dataset_to_files
25
+
26
+
27
+ def download_twingraph_dataset(
28
+ organization_id: str,
29
+ dataset_id: str,
30
+ target_folder: Optional[Union[str, Path]] = None,
31
+ ) -> Tuple[Dict[str, Any], Path]:
32
+ """
33
+ Download dataset from TwinGraph.
34
+
35
+ Args:
36
+ organization_id: Organization ID
37
+ dataset_id: Dataset ID
38
+ target_folder: Optional folder to save files (if None, uses temp dir)
39
+
40
+ Returns:
41
+ Tuple of (content dict, folder path)
42
+ """
43
+ start_time = time.time()
44
+ LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="TwinGraph"))
45
+ LOGGER.debug(
46
+ T("coal.services.dataset.twingraph_downloading").format(organization_id=organization_id, dataset_id=dataset_id)
47
+ )
48
+
49
+ with get_api_client()[0] as api_client:
50
+ dataset_api = DatasetApi(api_client)
51
+
52
+ # Query nodes
53
+ nodes_start = time.time()
54
+ LOGGER.debug(T("coal.services.dataset.twingraph_querying_nodes").format(dataset_id=dataset_id))
55
+ nodes_query = DatasetTwinGraphQuery(query="MATCH(n) RETURN n")
56
+
57
+ nodes = dataset_api.twingraph_query(
58
+ organization_id=organization_id,
59
+ dataset_id=dataset_id,
60
+ dataset_twin_graph_query=nodes_query,
61
+ )
62
+
63
+ nodes_time = time.time() - nodes_start
64
+ LOGGER.debug(T("coal.services.dataset.twingraph_nodes_found").format(count=len(nodes)))
65
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="nodes query", time=nodes_time))
66
+
67
+ # Query edges
68
+ edges_start = time.time()
69
+ LOGGER.debug(T("coal.services.dataset.twingraph_querying_edges").format(dataset_id=dataset_id))
70
+ edges_query = DatasetTwinGraphQuery(query="MATCH(n)-[r]->(m) RETURN n as src, r as rel, m as dest")
71
+
72
+ edges = dataset_api.twingraph_query(
73
+ organization_id=organization_id,
74
+ dataset_id=dataset_id,
75
+ dataset_twin_graph_query=edges_query,
76
+ )
77
+
78
+ edges_time = time.time() - edges_start
79
+ LOGGER.debug(T("coal.services.dataset.twingraph_edges_found").format(count=len(edges)))
80
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="edges query", time=edges_time))
81
+
82
+ # Process results
83
+ process_start = time.time()
84
+ content = get_content_from_twin_graph_data(nodes, edges, True)
85
+ process_time = time.time() - process_start
86
+
87
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="data processing", time=process_time))
88
+
89
+ # Convert to files if target_folder is provided
90
+ if target_folder:
91
+ dataset_info = {
92
+ "type": "twincache",
93
+ "content": content,
94
+ "name": f"TwinGraph Dataset {dataset_id}",
95
+ }
96
+ target_folder = convert_dataset_to_files(dataset_info, target_folder)
97
+ else:
98
+ target_folder = tempfile.mkdtemp()
99
+
100
+ elapsed_time = time.time() - start_time
101
+ LOGGER.info(T("coal.common.timing.operation_completed").format(operation="TwinGraph download", time=elapsed_time))
102
+ LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="TwinGraph"))
103
+
104
+ return content, Path(target_folder)
105
+
106
+
107
+ def download_legacy_twingraph_dataset(
108
+ organization_id: str,
109
+ cache_name: str,
110
+ target_folder: Optional[Union[str, Path]] = None,
111
+ ) -> Tuple[Dict[str, Any], Path]:
112
+ """
113
+ Download dataset from legacy TwinGraph.
114
+
115
+ Args:
116
+ organization_id: Organization ID
117
+ cache_name: Twin cache name
118
+ target_folder: Optional folder to save files (if None, uses temp dir)
119
+
120
+ Returns:
121
+ Tuple of (content dict, folder path)
122
+ """
123
+ start_time = time.time()
124
+ LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="Legacy TwinGraph"))
125
+ LOGGER.debug(
126
+ T("coal.services.dataset.legacy_twingraph_downloading").format(
127
+ organization_id=organization_id, cache_name=cache_name
128
+ )
129
+ )
130
+
131
+ with get_api_client()[0] as api_client:
132
+ api_instance = TwingraphApi(api_client)
133
+
134
+ # Query nodes
135
+ nodes_start = time.time()
136
+ LOGGER.debug(T("coal.services.dataset.legacy_twingraph_querying_nodes").format(cache_name=cache_name))
137
+ _query_nodes = TwinGraphQuery(query="MATCH(n) RETURN n")
138
+
139
+ nodes = api_instance.query(
140
+ organization_id=organization_id,
141
+ graph_id=cache_name,
142
+ twin_graph_query=_query_nodes,
143
+ )
144
+
145
+ nodes_time = time.time() - nodes_start
146
+ LOGGER.debug(T("coal.services.dataset.legacy_twingraph_nodes_found").format(count=len(nodes)))
147
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="nodes query", time=nodes_time))
148
+
149
+ # Query relationships
150
+ rel_start = time.time()
151
+ LOGGER.debug(T("coal.services.dataset.legacy_twingraph_querying_relations").format(cache_name=cache_name))
152
+ _query_rel = TwinGraphQuery(query="MATCH(n)-[r]->(m) RETURN n as src, r as rel, m as dest")
153
+
154
+ rel = api_instance.query(
155
+ organization_id=organization_id,
156
+ graph_id=cache_name,
157
+ twin_graph_query=_query_rel,
158
+ )
159
+
160
+ rel_time = time.time() - rel_start
161
+ LOGGER.debug(T("coal.services.dataset.legacy_twingraph_relations_found").format(count=len(rel)))
162
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="relations query", time=rel_time))
163
+
164
+ # Process results
165
+ process_start = time.time()
166
+ content = get_content_from_twin_graph_data(nodes, rel, False)
167
+ process_time = time.time() - process_start
168
+
169
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="data processing", time=process_time))
170
+
171
+ # Convert to files if target_folder is provided
172
+ if target_folder:
173
+ dataset_info = {
174
+ "type": "twincache",
175
+ "content": content,
176
+ "name": f"Legacy TwinGraph Dataset {cache_name}",
177
+ }
178
+ target_folder = convert_dataset_to_files(dataset_info, target_folder)
179
+ else:
180
+ target_folder = tempfile.mkdtemp()
181
+
182
+ elapsed_time = time.time() - start_time
183
+ LOGGER.info(
184
+ T("coal.common.timing.operation_completed").format(operation="Legacy TwinGraph download", time=elapsed_time)
185
+ )
186
+ LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="Legacy TwinGraph"))
187
+
188
+ return content, Path(target_folder)
@@ -0,0 +1,132 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from typing import Dict, List, Any
9
+
10
+ from cosmotech.coal.utils.logger import LOGGER
11
+ from cosmotech.orchestrator.utils.translate import T
12
+
13
+
14
+ def get_content_from_twin_graph_data(
15
+ nodes: List[Dict], relationships: List[Dict], restore_names: bool = False
16
+ ) -> Dict[str, List[Dict]]:
17
+ """
18
+ Extract content from twin graph data.
19
+
20
+ When restore_names is True, the "id" value inside the "properties" field in the cypher query response is used
21
+ instead of the numerical id found in the "id" field. When restore_names is set to False, this function
22
+ keeps the previous behavior implemented when adding support for twingraph in v2 (default: False)
23
+
24
+ Example with a sample of cypher response:
25
+ [{
26
+ n: {
27
+ id: "50" <-- this id is used if restore_names is False
28
+ label: "Customer"
29
+ properties: {
30
+ Satisfaction: 0
31
+ SurroundingSatisfaction: 0
32
+ Thirsty: false
33
+ id: "Lars_Coret" <-- this id is used if restore_names is True
34
+ }
35
+ type: "NODE"
36
+ }
37
+ }]
38
+
39
+ Args:
40
+ nodes: List of node data from cypher query
41
+ relationships: List of relationship data from cypher query
42
+ restore_names: Whether to use property ID instead of node ID
43
+
44
+ Returns:
45
+ Dict mapping entity types to lists of entities
46
+ """
47
+ LOGGER.debug(
48
+ T("coal.services.dataset.processing_graph_data").format(
49
+ nodes_count=len(nodes),
50
+ relationships_count=len(relationships),
51
+ restore_names=restore_names,
52
+ )
53
+ )
54
+
55
+ content = dict()
56
+ # build keys
57
+ for item in relationships:
58
+ content[item["src"]["label"]] = list()
59
+ content[item["dest"]["label"]] = list()
60
+ content[item["rel"]["label"]] = list()
61
+
62
+ # Process nodes
63
+ for item in nodes:
64
+ label = item["n"]["label"]
65
+ props = item["n"]["properties"].copy() # Create a copy to avoid modifying the original
66
+ if not restore_names:
67
+ props.update({"id": item["n"]["id"]})
68
+ content.setdefault(label, list())
69
+ content[label].append(props)
70
+
71
+ # Process relationships
72
+ for item in relationships:
73
+ src = item["src"]
74
+ dest = item["dest"]
75
+ rel = item["rel"]
76
+ props = rel["properties"].copy() # Create a copy to avoid modifying the original
77
+ content[rel["label"]].append(
78
+ {
79
+ "id": rel["id"],
80
+ "source": src["properties"]["id"] if restore_names else src["id"],
81
+ "target": dest["properties"]["id"] if restore_names else dest["id"],
82
+ **props,
83
+ }
84
+ )
85
+
86
+ # Log the number of entities by type
87
+ for entity_type, entities in content.items():
88
+ LOGGER.debug(T("coal.services.dataset.entity_count").format(entity_type=entity_type, count=len(entities)))
89
+
90
+ return content
91
+
92
+
93
+ def sheet_to_header(sheet_content: List[Dict]) -> List[str]:
94
+ """
95
+ Extract header fields from sheet content.
96
+
97
+ Args:
98
+ sheet_content: List of dictionaries representing sheet rows
99
+
100
+ Returns:
101
+ List of field names with id, source, and target fields first if present
102
+ """
103
+ LOGGER.debug(T("coal.services.dataset.extracting_headers").format(rows=len(sheet_content)))
104
+
105
+ fieldnames = []
106
+ has_src = False
107
+ has_id = False
108
+
109
+ for r in sheet_content:
110
+ for k in r.keys():
111
+ if k not in fieldnames:
112
+ if k in ["source", "target"]:
113
+ has_src = True
114
+ elif k == "id":
115
+ has_id = True
116
+ else:
117
+ fieldnames.append(k)
118
+
119
+ # Ensure source/target and id fields come first
120
+ if has_src:
121
+ fieldnames = ["source", "target"] + fieldnames
122
+ if has_id:
123
+ fieldnames = ["id"] + fieldnames
124
+
125
+ LOGGER.debug(
126
+ T("coal.services.dataset.headers_extracted").format(
127
+ count=len(fieldnames),
128
+ fields=", ".join(fieldnames[:5]) + ("..." if len(fieldnames) > 5 else ""),
129
+ )
130
+ )
131
+
132
+ return fieldnames
@@ -0,0 +1,48 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Parameter handling functions.
10
+
11
+ This module provides functions for handling parameters in solution templates.
12
+ """
13
+
14
+ import json
15
+ import os
16
+ import pathlib
17
+ from csv import DictWriter
18
+ from typing import List, Dict, Any
19
+
20
+ from cosmotech.coal.utils.logger import LOGGER
21
+ from cosmotech.orchestrator.utils.translate import T
22
+
23
+
24
+ def write_parameters(
25
+ parameter_folder: str, parameters: List[Dict[str, Any]], write_csv: bool, write_json: bool
26
+ ) -> None:
27
+ """
28
+ Write parameters to CSV and/or JSON files.
29
+
30
+ Args:
31
+ parameter_folder: The folder to write the parameters to
32
+ parameters: The parameters to write
33
+ write_csv: Whether to write the parameters to a CSV file
34
+ write_json: Whether to write the parameters to a JSON file
35
+ """
36
+ if write_csv:
37
+ tmp_parameter_file = os.path.join(parameter_folder, "parameters.csv")
38
+ LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
39
+ with open(tmp_parameter_file, "w") as _file:
40
+ _w = DictWriter(_file, fieldnames=["parameterId", "value", "varType", "isInherited"])
41
+ _w.writeheader()
42
+ _w.writerows(parameters)
43
+
44
+ if write_json:
45
+ tmp_parameter_file = os.path.join(parameter_folder, "parameters.json")
46
+ LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
47
+ with open(tmp_parameter_file, "w") as _file:
48
+ json.dump(parameters, _file, indent=2)
@@ -0,0 +1,25 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+ from typing import Any
8
+ from typing import Optional
9
+
10
+ import cosmotech_api
11
+
12
+
13
+ def get_run_metadata(
14
+ api_client: cosmotech_api.api_client.ApiClient,
15
+ organization_id: str,
16
+ workspace_id: str,
17
+ runner_id: str,
18
+ run_id: str,
19
+ include: Optional[list[str]] = None,
20
+ exclude: Optional[list[str]] = None,
21
+ ) -> dict[str, Any]:
22
+ run_api = cosmotech_api.RunApi(api_client)
23
+
24
+ run: cosmotech_api.Run = run_api.get_run(organization_id, workspace_id, runner_id, run_id)
25
+ return run.model_dump(by_alias=True, exclude_none=True, include=include, exclude=exclude, mode="json")