cosmotech-acceleration-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. cosmotech/coal/__init__.py +8 -0
  2. cosmotech/coal/aws/__init__.py +23 -0
  3. cosmotech/coal/aws/s3.py +235 -0
  4. cosmotech/coal/azure/__init__.py +23 -0
  5. cosmotech/coal/azure/adx/__init__.py +26 -0
  6. cosmotech/coal/azure/adx/auth.py +125 -0
  7. cosmotech/coal/azure/adx/ingestion.py +329 -0
  8. cosmotech/coal/azure/adx/query.py +56 -0
  9. cosmotech/coal/azure/adx/runner.py +217 -0
  10. cosmotech/coal/azure/adx/store.py +255 -0
  11. cosmotech/coal/azure/adx/tables.py +118 -0
  12. cosmotech/coal/azure/adx/utils.py +71 -0
  13. cosmotech/coal/azure/blob.py +109 -0
  14. cosmotech/coal/azure/functions.py +72 -0
  15. cosmotech/coal/azure/storage.py +74 -0
  16. cosmotech/coal/cosmotech_api/__init__.py +36 -0
  17. cosmotech/coal/cosmotech_api/connection.py +96 -0
  18. cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
  19. cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
  20. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
  21. cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
  22. cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
  23. cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
  24. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
  25. cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
  26. cosmotech/coal/cosmotech_api/parameters.py +48 -0
  27. cosmotech/coal/cosmotech_api/run.py +25 -0
  28. cosmotech/coal/cosmotech_api/run_data.py +173 -0
  29. cosmotech/coal/cosmotech_api/run_template.py +108 -0
  30. cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
  31. cosmotech/coal/cosmotech_api/runner/data.py +38 -0
  32. cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
  33. cosmotech/coal/cosmotech_api/runner/download.py +146 -0
  34. cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
  35. cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
  36. cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
  37. cosmotech/coal/cosmotech_api/workspace.py +127 -0
  38. cosmotech/coal/csm/__init__.py +6 -0
  39. cosmotech/coal/csm/engine/__init__.py +47 -0
  40. cosmotech/coal/postgresql/__init__.py +22 -0
  41. cosmotech/coal/postgresql/runner.py +93 -0
  42. cosmotech/coal/postgresql/store.py +98 -0
  43. cosmotech/coal/singlestore/__init__.py +17 -0
  44. cosmotech/coal/singlestore/store.py +100 -0
  45. cosmotech/coal/store/__init__.py +42 -0
  46. cosmotech/coal/store/csv.py +44 -0
  47. cosmotech/coal/store/native_python.py +25 -0
  48. cosmotech/coal/store/pandas.py +26 -0
  49. cosmotech/coal/store/pyarrow.py +23 -0
  50. cosmotech/coal/store/store.py +79 -0
  51. cosmotech/coal/utils/__init__.py +18 -0
  52. cosmotech/coal/utils/api.py +68 -0
  53. cosmotech/coal/utils/logger.py +10 -0
  54. cosmotech/coal/utils/postgresql.py +236 -0
  55. cosmotech/csm_data/__init__.py +6 -0
  56. cosmotech/csm_data/commands/__init__.py +6 -0
  57. cosmotech/csm_data/commands/adx_send_data.py +92 -0
  58. cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
  59. cosmotech/csm_data/commands/api/__init__.py +6 -0
  60. cosmotech/csm_data/commands/api/api.py +50 -0
  61. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
  62. cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
  63. cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
  64. cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
  65. cosmotech/csm_data/commands/api/run_load_data.py +120 -0
  66. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
  67. cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
  68. cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
  69. cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
  70. cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
  71. cosmotech/csm_data/commands/az_storage_upload.py +76 -0
  72. cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
  73. cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
  74. cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
  75. cosmotech/csm_data/commands/store/__init__.py +6 -0
  76. cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
  77. cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
  78. cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
  79. cosmotech/csm_data/commands/store/list_tables.py +48 -0
  80. cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
  81. cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
  82. cosmotech/csm_data/commands/store/reset.py +31 -0
  83. cosmotech/csm_data/commands/store/store.py +37 -0
  84. cosmotech/csm_data/main.py +57 -0
  85. cosmotech/csm_data/utils/__init__.py +6 -0
  86. cosmotech/csm_data/utils/click.py +18 -0
  87. cosmotech/csm_data/utils/decorators.py +75 -0
  88. cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
  89. cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
  90. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
  91. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
  92. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
  93. cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
  94. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
  95. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
  96. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
  97. cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
  98. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
  99. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
  100. cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
  101. cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
  102. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
  103. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
  104. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
  105. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
  106. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
  107. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
  108. cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
  109. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
  110. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
  111. cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
  112. cosmotech/translation/coal/__init__.py +6 -0
  113. cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
  114. cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
  115. cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
  116. cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
  117. cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
  118. cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
  119. cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
  120. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
  121. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
  122. cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
  123. cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
  124. cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
  125. cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
  126. cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
  127. cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
  128. cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
  129. cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
  130. cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
  131. cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
  132. cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
  133. cosmotech/translation/coal/en-US/coal/web.yml +2 -0
  134. cosmotech/translation/csm_data/__init__.py +6 -0
  135. cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
  136. cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
  137. cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
  138. cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
  139. cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
  140. cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
  141. cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,74 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Azure Storage operations module.
10
+
11
+ This module provides functions for interacting with Azure Storage, including
12
+ uploading files to blob storage.
13
+ """
14
+
15
+ import pathlib
16
+
17
+ from azure.storage.blob import ContainerClient
18
+
19
+ from cosmotech.coal.utils.logger import LOGGER
20
+ from cosmotech.orchestrator.utils.translate import T
21
+
22
+
23
+ def upload_file(
24
+ file_path: pathlib.Path,
25
+ blob_name: str,
26
+ az_storage_sas_url: str,
27
+ file_prefix: str = "",
28
+ ) -> None:
29
+ """
30
+ Upload a single file to Azure Blob Storage.
31
+
32
+ Args:
33
+ file_path: Path to the file to upload
34
+ blob_name: Name of the blob container
35
+ az_storage_sas_url: SAS URL for the Azure Storage account
36
+ file_prefix: Prefix to add to the file name in the blob
37
+ """
38
+ uploaded_file_name = blob_name + "/" + file_prefix + file_path.name
39
+ LOGGER.info(T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name))
40
+ ContainerClient.from_container_url(az_storage_sas_url).upload_blob(
41
+ uploaded_file_name, file_path.open("rb"), overwrite=True
42
+ )
43
+
44
+
45
+ def upload_folder(
46
+ source_folder: str,
47
+ blob_name: str,
48
+ az_storage_sas_url: str,
49
+ file_prefix: str = "",
50
+ recursive: bool = False,
51
+ ) -> None:
52
+ """
53
+ Upload files from a folder to Azure Blob Storage.
54
+
55
+ Args:
56
+ source_folder: Path to the folder containing files to upload
57
+ blob_name: Name of the blob container
58
+ az_storage_sas_url: SAS URL for the Azure Storage account
59
+ file_prefix: Prefix to add to the file names in the blob
60
+ recursive: Whether to recursively upload files from subdirectories
61
+ """
62
+ source_path = pathlib.Path(source_folder)
63
+ if not source_path.exists():
64
+ LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
65
+ raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
66
+
67
+ if source_path.is_dir():
68
+ _source_name = str(source_path)
69
+ for _file_path in source_path.glob("**/*" if recursive else "*"):
70
+ if _file_path.is_file():
71
+ _file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
72
+ upload_file(_file_path, blob_name, az_storage_sas_url, file_prefix)
73
+ else:
74
+ upload_file(source_path, blob_name, az_storage_sas_url, file_prefix)
@@ -0,0 +1,36 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Cosmotech API integration module.
10
+
11
+ This module provides functions for interacting with the Cosmotech API.
12
+ """
13
+
14
+ # Re-export functions from the parameters module
15
+ from cosmotech.coal.cosmotech_api.parameters import (
16
+ write_parameters,
17
+ )
18
+
19
+ # Re-export functions from the twin_data_layer module
20
+ from cosmotech.coal.cosmotech_api.twin_data_layer import (
21
+ get_dataset_id_from_runner,
22
+ send_files_to_tdl,
23
+ load_files_from_tdl,
24
+ )
25
+
26
+ # Re-export functions from the run_data module
27
+ from cosmotech.coal.cosmotech_api.run_data import (
28
+ send_csv_to_run_data,
29
+ send_store_to_run_data,
30
+ load_csv_from_run_data,
31
+ )
32
+
33
+ # Re-export functions from the run_template module
34
+ from cosmotech.coal.cosmotech_api.run_template import (
35
+ load_run_template_handlers,
36
+ )
@@ -0,0 +1,96 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+ import os
8
+ import pathlib
9
+ import ssl
10
+
11
+ import cosmotech_api
12
+
13
+ from cosmotech.coal.utils.logger import LOGGER
14
+ from cosmotech.orchestrator.utils.translate import T
15
+
16
+ api_env_keys = {"CSM_API_KEY", "CSM_API_URL"}
17
+ azure_env_keys = {
18
+ "AZURE_CLIENT_ID",
19
+ "AZURE_CLIENT_SECRET",
20
+ "AZURE_TENANT_ID",
21
+ "CSM_API_URL",
22
+ "CSM_API_SCOPE",
23
+ }
24
+ keycloak_env_keys = {
25
+ "IDP_TENANT_ID",
26
+ "IDP_CLIENT_ID",
27
+ "IDP_CLIENT_SECRET",
28
+ "IDP_BASE_URL",
29
+ "CSM_API_URL",
30
+ }
31
+
32
+
33
+ def get_api_client() -> (cosmotech_api.ApiClient, str):
34
+ existing_keys = set(os.environ.keys())
35
+ missing_azure_keys = azure_env_keys - existing_keys
36
+ missing_api_keys = api_env_keys - existing_keys
37
+ missing_keycloak_keys = keycloak_env_keys - existing_keys
38
+ if all((missing_api_keys, missing_azure_keys, missing_keycloak_keys)):
39
+ LOGGER.error(T("coal.common.errors.no_env_vars"))
40
+ LOGGER.error(T("coal.cosmotech_api.connection.existing_sets"))
41
+ LOGGER.error(T("coal.cosmotech_api.connection.azure_connection").format(keys=", ".join(azure_env_keys)))
42
+ LOGGER.error(T("coal.cosmotech_api.connection.api_key_connection").format(keys=", ".join(api_env_keys)))
43
+ LOGGER.error(T("coal.cosmotech_api.connection.keycloak_connection").format(keys=", ".join(keycloak_env_keys)))
44
+ raise EnvironmentError(T("coal.common.errors.no_env_vars"))
45
+
46
+ if not missing_keycloak_keys:
47
+ LOGGER.info(T("coal.cosmotech_api.connection.found_keycloak"))
48
+ from keycloak import KeycloakOpenID
49
+
50
+ server_url = os.environ.get("IDP_BASE_URL")
51
+ if server_url[-1] != "/":
52
+ server_url = server_url + "/"
53
+ keycloack_parameters = dict(
54
+ server_url=server_url,
55
+ client_id=os.environ.get("IDP_CLIENT_ID"),
56
+ realm_name=os.environ.get("IDP_TENANT_ID"),
57
+ client_secret_key=os.environ.get("IDP_CLIENT_SECRET"),
58
+ )
59
+ if (ca_cert_path := os.environ.get("IDP_CA_CERT")) and pathlib.Path(ca_cert_path).exists():
60
+ LOGGER.info(T("coal.cosmotech_api.connection.found_cert_authority"))
61
+ keycloack_parameters["verify"] = ca_cert_path
62
+ keycloak_openid = KeycloakOpenID(**keycloack_parameters)
63
+
64
+ access_token = keycloak_openid.token(grant_type="client_credentials")
65
+
66
+ configuration = cosmotech_api.Configuration(
67
+ host=os.environ.get("CSM_API_URL"),
68
+ access_token=access_token["access_token"],
69
+ )
70
+ return cosmotech_api.ApiClient(configuration), "Keycloak Connection"
71
+
72
+ if not missing_api_keys:
73
+ LOGGER.info(T("coal.cosmotech_api.connection.found_api_key"))
74
+ configuration = cosmotech_api.Configuration(
75
+ host=os.environ.get("CSM_API_URL"),
76
+ )
77
+ return (
78
+ cosmotech_api.ApiClient(
79
+ configuration,
80
+ os.environ.get("CSM_API_KEY_HEADER", "X-CSM-API-KEY"),
81
+ os.environ.get("CSM_API_KEY"),
82
+ ),
83
+ "Cosmo Tech API Key",
84
+ )
85
+
86
+ if not missing_azure_keys:
87
+ LOGGER.info(T("coal.cosmotech_api.connection.found_azure"))
88
+ from azure.identity import EnvironmentCredential
89
+
90
+ credentials = EnvironmentCredential()
91
+ token = credentials.get_token(os.environ.get("CSM_API_SCOPE"))
92
+
93
+ configuration = cosmotech_api.Configuration(host=os.environ.get("CSM_API_URL"), access_token=token.token)
94
+ return cosmotech_api.ApiClient(configuration), "Azure Entra Connection"
95
+
96
+ raise EnvironmentError(T("coal.common.errors.no_valid_connection"))
@@ -0,0 +1,26 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ # Re-export all download functions from download submodule
9
+ from cosmotech.coal.cosmotech_api.dataset.download import (
10
+ download_adt_dataset,
11
+ download_twingraph_dataset,
12
+ download_legacy_twingraph_dataset,
13
+ download_file_dataset,
14
+ download_dataset_by_id,
15
+ )
16
+
17
+ from cosmotech.coal.cosmotech_api.dataset.converters import (
18
+ convert_dataset_to_files,
19
+ convert_graph_dataset_to_files,
20
+ convert_file_dataset_to_files,
21
+ )
22
+
23
+ from cosmotech.coal.cosmotech_api.dataset.utils import (
24
+ get_content_from_twin_graph_data,
25
+ sheet_to_header,
26
+ )
@@ -0,0 +1,164 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import csv
9
+ import json
10
+ import os
11
+ import tempfile
12
+ from pathlib import Path
13
+ from typing import Dict, List, Any, Optional, Union
14
+
15
+ from cosmotech.coal.utils.logger import LOGGER
16
+ from cosmotech.orchestrator.utils.translate import T
17
+ from cosmotech.coal.cosmotech_api.dataset.utils import sheet_to_header
18
+
19
+
20
+ def convert_dataset_to_files(dataset_info: Dict[str, Any], target_folder: Optional[Union[str, Path]] = None) -> Path:
21
+ """
22
+ Convert dataset info to files.
23
+
24
+ Args:
25
+ dataset_info: Dataset info dict with type, content, name
26
+ target_folder: Optional folder to save files (if None, uses temp dir)
27
+
28
+ Returns:
29
+ Path to folder containing files
30
+ """
31
+ dataset_type = dataset_info["type"]
32
+ content = dataset_info["content"]
33
+ name = dataset_info["name"]
34
+
35
+ LOGGER.info(T("coal.services.dataset.converting_to_files").format(dataset_type=dataset_type, dataset_name=name))
36
+
37
+ if target_folder is None:
38
+ target_folder = Path(tempfile.mkdtemp())
39
+ LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
40
+ else:
41
+ target_folder = Path(target_folder)
42
+ target_folder.mkdir(parents=True, exist_ok=True)
43
+ LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
44
+
45
+ if dataset_type in ["adt", "twincache"]:
46
+ return convert_graph_dataset_to_files(content, target_folder)
47
+ else:
48
+ return convert_file_dataset_to_files(content, target_folder, dataset_type)
49
+
50
+
51
+ def convert_graph_dataset_to_files(
52
+ content: Dict[str, List[Dict]], target_folder: Optional[Union[str, Path]] = None
53
+ ) -> Path:
54
+ """
55
+ Convert graph dataset content to CSV files.
56
+
57
+ Args:
58
+ content: Dictionary mapping entity types to lists of entities
59
+ target_folder: Folder to save files (if None, uses temp dir)
60
+
61
+ Returns:
62
+ Path to folder containing files
63
+ """
64
+ if target_folder is None:
65
+ target_folder = Path(tempfile.mkdtemp())
66
+ LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
67
+ else:
68
+ target_folder = Path(target_folder)
69
+ target_folder.mkdir(parents=True, exist_ok=True)
70
+ LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
71
+ file_count = 0
72
+
73
+ LOGGER.info(
74
+ T("coal.services.dataset.converting_graph_data").format(entity_types=len(content), folder=target_folder)
75
+ )
76
+
77
+ for entity_type, entities in content.items():
78
+ if not entities:
79
+ LOGGER.debug(T("coal.services.dataset.skipping_empty_entity").format(entity_type=entity_type))
80
+ continue
81
+
82
+ file_path = target_folder / f"{entity_type}.csv"
83
+ LOGGER.debug(T("coal.services.dataset.writing_csv").format(file_name=file_path.name, count=len(entities)))
84
+
85
+ fieldnames = sheet_to_header(entities)
86
+
87
+ with open(file_path, "w", newline="") as file:
88
+ writer = csv.DictWriter(file, fieldnames=fieldnames, dialect="unix", quoting=csv.QUOTE_MINIMAL)
89
+ writer.writeheader()
90
+
91
+ for entity in entities:
92
+ # Convert values to strings and handle boolean values
93
+ row = {
94
+ k: str(v).replace("'", '"').replace("True", "true").replace("False", "false")
95
+ for k, v in entity.items()
96
+ }
97
+ writer.writerow(row)
98
+
99
+ file_count += 1
100
+ LOGGER.debug(T("coal.services.dataset.file_written").format(file_path=file_path))
101
+
102
+ LOGGER.info(T("coal.services.dataset.files_created").format(count=file_count, folder=target_folder))
103
+
104
+ return target_folder
105
+
106
+
107
+ def convert_file_dataset_to_files(
108
+ content: Dict[str, Any],
109
+ target_folder: Optional[Union[str, Path]] = None,
110
+ file_type: str = "",
111
+ ) -> Path:
112
+ """
113
+ Convert file dataset content to files.
114
+
115
+ Args:
116
+ content: Dictionary mapping file names to content
117
+ target_folder: Folder to save files (if None, uses temp dir)
118
+ file_type: Type of file (csv, json, etc.)
119
+
120
+ Returns:
121
+ Path to folder containing files
122
+ """
123
+ if target_folder is None:
124
+ target_folder = Path(tempfile.mkdtemp())
125
+ LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
126
+ else:
127
+ target_folder = Path(target_folder)
128
+ target_folder.mkdir(parents=True, exist_ok=True)
129
+ LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
130
+ file_count = 0
131
+
132
+ LOGGER.info(
133
+ T("coal.services.dataset.converting_file_data").format(
134
+ file_count=len(content), file_type=file_type, folder=target_folder
135
+ )
136
+ )
137
+
138
+ for file_name, file_content in content.items():
139
+ file_path = target_folder / file_name
140
+
141
+ # Ensure parent directories exist
142
+ file_path.parent.mkdir(parents=True, exist_ok=True)
143
+
144
+ LOGGER.debug(T("coal.services.dataset.writing_file").format(file_name=file_path.name, file_type=file_type))
145
+
146
+ if isinstance(file_content, str):
147
+ # Text content
148
+ with open(file_path, "w") as file:
149
+ file.write(file_content)
150
+ elif isinstance(file_content, dict) or isinstance(file_content, list):
151
+ # JSON content
152
+ with open(file_path, "w") as file:
153
+ json.dump(file_content, file, indent=2)
154
+ else:
155
+ # Other content types
156
+ with open(file_path, "w") as file:
157
+ file.write(str(file_content))
158
+
159
+ file_count += 1
160
+ LOGGER.debug(T("coal.services.dataset.file_written").format(file_path=file_path))
161
+
162
+ LOGGER.info(T("coal.services.dataset.files_created").format(count=file_count, folder=target_folder))
163
+
164
+ return target_folder
@@ -0,0 +1,19 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ """
9
+ Dataset download submodules.
10
+ """
11
+
12
+ # Re-export all download functions
13
+ from cosmotech.coal.cosmotech_api.dataset.download.adt import download_adt_dataset
14
+ from cosmotech.coal.cosmotech_api.dataset.download.twingraph import (
15
+ download_twingraph_dataset,
16
+ download_legacy_twingraph_dataset,
17
+ )
18
+ from cosmotech.coal.cosmotech_api.dataset.download.file import download_file_dataset
19
+ from cosmotech.coal.cosmotech_api.dataset.download.common import download_dataset_by_id
@@ -0,0 +1,119 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import time
9
+ import tempfile
10
+ from pathlib import Path
11
+ from typing import Dict, Any, Optional, Union, Tuple
12
+
13
+ from azure.digitaltwins.core import DigitalTwinsClient
14
+ from azure.identity import DefaultAzureCredential
15
+
16
+ from cosmotech.coal.utils.logger import LOGGER
17
+ from cosmotech.orchestrator.utils.translate import T
18
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
19
+ from cosmotech.coal.cosmotech_api.dataset.converters import convert_dataset_to_files
20
+
21
+
22
+ def download_adt_dataset(
23
+ adt_address: str,
24
+ target_folder: Optional[Union[str, Path]] = None,
25
+ credentials: Optional[DefaultAzureCredential] = None,
26
+ ) -> Tuple[Dict[str, Any], Path]:
27
+ """
28
+ Download dataset from Azure Digital Twins.
29
+
30
+ Args:
31
+ adt_address: The ADT instance address
32
+ target_folder: Optional folder to save files (if None, uses temp dir)
33
+ credentials: Optional Azure credentials (if None, uses DefaultAzureCredential)
34
+
35
+ Returns:
36
+ Tuple of (content dict, folder path)
37
+ """
38
+ start_time = time.time()
39
+ LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="ADT"))
40
+ LOGGER.debug(T("coal.services.dataset.adt_connecting").format(url=adt_address))
41
+
42
+ # Create credentials if not provided
43
+ if credentials is None:
44
+ if get_api_client()[1] == "Azure Entra Connection":
45
+ credentials = DefaultAzureCredential()
46
+ else:
47
+ LOGGER.error(T("coal.services.dataset.adt_no_credentials"))
48
+ raise ValueError("No credentials available for ADT connection")
49
+
50
+ # Create client and download data
51
+ client = DigitalTwinsClient(adt_address, credentials)
52
+
53
+ # Query twins
54
+ query_start = time.time()
55
+ LOGGER.debug(T("coal.services.dataset.adt_querying_twins"))
56
+ query_expression = "SELECT * FROM digitaltwins"
57
+ query_result = client.query_twins(query_expression)
58
+
59
+ json_content = dict()
60
+ twin_count = 0
61
+
62
+ for twin in query_result:
63
+ twin_count += 1
64
+ entity_type = twin.get("$metadata").get("$model").split(":")[-1].split(";")[0]
65
+ t_content = {k: v for k, v in twin.items()}
66
+ t_content["id"] = t_content["$dtId"]
67
+
68
+ # Remove system properties
69
+ for k in list(twin.keys()):
70
+ if k[0] == "$":
71
+ del t_content[k]
72
+
73
+ json_content.setdefault(entity_type, [])
74
+ json_content[entity_type].append(t_content)
75
+
76
+ query_time = time.time() - query_start
77
+ LOGGER.debug(T("coal.services.dataset.adt_twins_found").format(count=twin_count))
78
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="twins query", time=query_time))
79
+
80
+ # Query relationships
81
+ rel_start = time.time()
82
+ LOGGER.debug(T("coal.services.dataset.adt_querying_relations"))
83
+ relations_query = "SELECT * FROM relationships"
84
+ query_result = client.query_twins(relations_query)
85
+
86
+ relation_count = 0
87
+ for relation in query_result:
88
+ relation_count += 1
89
+ tr = {"$relationshipId": "id", "$sourceId": "source", "$targetId": "target"}
90
+ r_content = {k: v for k, v in relation.items()}
91
+
92
+ # Map system properties to standard names
93
+ for k, v in tr.items():
94
+ r_content[v] = r_content[k]
95
+
96
+ # Remove system properties
97
+ for k in list(relation.keys()):
98
+ if k[0] == "$":
99
+ del r_content[k]
100
+
101
+ json_content.setdefault(relation["$relationshipName"], [])
102
+ json_content[relation["$relationshipName"]].append(r_content)
103
+
104
+ rel_time = time.time() - rel_start
105
+ LOGGER.debug(T("coal.services.dataset.adt_relations_found").format(count=relation_count))
106
+ LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="relations query", time=rel_time))
107
+
108
+ # Convert to files if target_folder is provided
109
+ if target_folder:
110
+ dataset_info = {"type": "adt", "content": json_content, "name": "ADT Dataset"}
111
+ target_folder = convert_dataset_to_files(dataset_info, target_folder)
112
+ else:
113
+ target_folder = tempfile.mkdtemp()
114
+
115
+ elapsed_time = time.time() - start_time
116
+ LOGGER.info(T("coal.common.timing.operation_completed").format(operation="ADT download", time=elapsed_time))
117
+ LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="ADT"))
118
+
119
+ return json_content, Path(target_folder)
@@ -0,0 +1,140 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Dict, Any, Optional, Union, Tuple
11
+
12
+ from cosmotech_api import DatasetApi
13
+
14
+ from cosmotech.coal.utils.logger import LOGGER
15
+ from cosmotech.orchestrator.utils.translate import T
16
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
17
+
18
+ # Import specific download functions
19
+ # These imports are defined here to avoid circular imports
20
+ # The functions are imported directly from their modules
21
+ from cosmotech.coal.cosmotech_api.dataset.download.adt import download_adt_dataset
22
+ from cosmotech.coal.cosmotech_api.dataset.download.twingraph import (
23
+ download_twingraph_dataset,
24
+ download_legacy_twingraph_dataset,
25
+ )
26
+ from cosmotech.coal.cosmotech_api.dataset.download.file import download_file_dataset
27
+
28
+
29
+ def download_dataset_by_id(
30
+ organization_id: str,
31
+ workspace_id: str,
32
+ dataset_id: str,
33
+ target_folder: Optional[Union[str, Path]] = None,
34
+ ) -> Tuple[Dict[str, Any], Path]:
35
+ """
36
+ Download dataset by ID.
37
+
38
+ Args:
39
+ organization_id: Organization ID
40
+ workspace_id: Workspace ID
41
+ dataset_id: Dataset ID
42
+ target_folder: Optional folder to save files (if None, uses temp dir)
43
+
44
+ Returns:
45
+ Tuple of (dataset info dict, folder path)
46
+ """
47
+ start_time = time.time()
48
+ LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="Dataset"))
49
+ LOGGER.debug(
50
+ T("coal.services.dataset.dataset_downloading").format(organization_id=organization_id, dataset_id=dataset_id)
51
+ )
52
+
53
+ with get_api_client()[0] as api_client:
54
+ api_instance = DatasetApi(api_client)
55
+
56
+ # Get dataset info
57
+ info_start = time.time()
58
+ dataset = api_instance.find_dataset_by_id(organization_id=organization_id, dataset_id=dataset_id)
59
+ info_time = time.time() - info_start
60
+
61
+ LOGGER.debug(
62
+ T("coal.services.dataset.dataset_info_retrieved").format(dataset_name=dataset.name, dataset_id=dataset_id)
63
+ )
64
+ LOGGER.debug(
65
+ T("coal.common.timing.operation_completed").format(operation="dataset info retrieval", time=info_time)
66
+ )
67
+
68
+ # Determine dataset type and download
69
+ if dataset.connector is None:
70
+ parameters = []
71
+ else:
72
+ parameters = dataset.connector.parameters_values
73
+
74
+ is_adt = "AZURE_DIGITAL_TWINS_URL" in parameters
75
+ is_storage = "AZURE_STORAGE_CONTAINER_BLOB_PREFIX" in parameters
76
+ is_legacy_twin_cache = "TWIN_CACHE_NAME" in parameters and dataset.twingraph_id is None
77
+ is_in_workspace_file = (
78
+ False if dataset.tags is None else "workspaceFile" in dataset.tags or "dataset_part" in dataset.tags
79
+ )
80
+
81
+ download_start = time.time()
82
+
83
+ if is_adt:
84
+ LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="ADT"))
85
+ content, folder = download_adt_dataset(
86
+ adt_address=parameters["AZURE_DIGITAL_TWINS_URL"],
87
+ target_folder=target_folder,
88
+ )
89
+ dataset_type = "adt"
90
+
91
+ elif is_legacy_twin_cache:
92
+ LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Legacy TwinGraph"))
93
+ twin_cache_name = parameters["TWIN_CACHE_NAME"]
94
+ content, folder = download_legacy_twingraph_dataset(
95
+ organization_id=organization_id,
96
+ cache_name=twin_cache_name,
97
+ target_folder=target_folder,
98
+ )
99
+ dataset_type = "twincache"
100
+
101
+ elif is_storage or is_in_workspace_file:
102
+ if is_storage:
103
+ LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Storage"))
104
+ _file_name = parameters["AZURE_STORAGE_CONTAINER_BLOB_PREFIX"].replace("%WORKSPACE_FILE%/", "")
105
+ else:
106
+ LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Workspace File"))
107
+ _file_name = dataset.source.location
108
+
109
+ content, folder = download_file_dataset(
110
+ organization_id=organization_id,
111
+ workspace_id=workspace_id,
112
+ file_name=_file_name,
113
+ target_folder=target_folder,
114
+ )
115
+ dataset_type = _file_name.split(".")[-1]
116
+
117
+ else:
118
+ LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="TwinGraph"))
119
+ content, folder = download_twingraph_dataset(
120
+ organization_id=organization_id,
121
+ dataset_id=dataset_id,
122
+ target_folder=target_folder,
123
+ )
124
+ dataset_type = "twincache"
125
+
126
+ download_time = time.time() - download_start
127
+ LOGGER.debug(
128
+ T("coal.common.timing.operation_completed").format(operation="content download", time=download_time)
129
+ )
130
+
131
+ # Prepare result
132
+ dataset_info = {"type": dataset_type, "content": content, "name": dataset.name}
133
+
134
+ elapsed_time = time.time() - start_time
135
+ LOGGER.info(
136
+ T("coal.common.timing.operation_completed").format(operation="total dataset download", time=elapsed_time)
137
+ )
138
+ LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="Dataset"))
139
+
140
+ return dataset_info, folder