cosmotech-acceleration-library 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +8 -0
- cosmotech/coal/aws/__init__.py +23 -0
- cosmotech/coal/aws/s3.py +235 -0
- cosmotech/coal/azure/__init__.py +23 -0
- cosmotech/coal/azure/adx/__init__.py +26 -0
- cosmotech/coal/azure/adx/auth.py +125 -0
- cosmotech/coal/azure/adx/ingestion.py +329 -0
- cosmotech/coal/azure/adx/query.py +56 -0
- cosmotech/coal/azure/adx/runner.py +217 -0
- cosmotech/coal/azure/adx/store.py +255 -0
- cosmotech/coal/azure/adx/tables.py +118 -0
- cosmotech/coal/azure/adx/utils.py +71 -0
- cosmotech/coal/azure/blob.py +109 -0
- cosmotech/coal/azure/functions.py +72 -0
- cosmotech/coal/azure/storage.py +74 -0
- cosmotech/coal/cosmotech_api/__init__.py +36 -0
- cosmotech/coal/cosmotech_api/connection.py +96 -0
- cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
- cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
- cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
- cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
- cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
- cosmotech/coal/cosmotech_api/parameters.py +48 -0
- cosmotech/coal/cosmotech_api/run.py +25 -0
- cosmotech/coal/cosmotech_api/run_data.py +173 -0
- cosmotech/coal/cosmotech_api/run_template.py +108 -0
- cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
- cosmotech/coal/cosmotech_api/runner/data.py +38 -0
- cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
- cosmotech/coal/cosmotech_api/runner/download.py +146 -0
- cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
- cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
- cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
- cosmotech/coal/cosmotech_api/workspace.py +127 -0
- cosmotech/coal/csm/__init__.py +6 -0
- cosmotech/coal/csm/engine/__init__.py +47 -0
- cosmotech/coal/postgresql/__init__.py +22 -0
- cosmotech/coal/postgresql/runner.py +93 -0
- cosmotech/coal/postgresql/store.py +98 -0
- cosmotech/coal/singlestore/__init__.py +17 -0
- cosmotech/coal/singlestore/store.py +100 -0
- cosmotech/coal/store/__init__.py +42 -0
- cosmotech/coal/store/csv.py +44 -0
- cosmotech/coal/store/native_python.py +25 -0
- cosmotech/coal/store/pandas.py +26 -0
- cosmotech/coal/store/pyarrow.py +23 -0
- cosmotech/coal/store/store.py +79 -0
- cosmotech/coal/utils/__init__.py +18 -0
- cosmotech/coal/utils/api.py +68 -0
- cosmotech/coal/utils/logger.py +10 -0
- cosmotech/coal/utils/postgresql.py +236 -0
- cosmotech/csm_data/__init__.py +6 -0
- cosmotech/csm_data/commands/__init__.py +6 -0
- cosmotech/csm_data/commands/adx_send_data.py +92 -0
- cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
- cosmotech/csm_data/commands/api/__init__.py +6 -0
- cosmotech/csm_data/commands/api/api.py +50 -0
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
- cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
- cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
- cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
- cosmotech/csm_data/commands/api/run_load_data.py +120 -0
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
- cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
- cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
- cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
- cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
- cosmotech/csm_data/commands/az_storage_upload.py +76 -0
- cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
- cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
- cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
- cosmotech/csm_data/commands/store/__init__.py +6 -0
- cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
- cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
- cosmotech/csm_data/commands/store/list_tables.py +48 -0
- cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
- cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
- cosmotech/csm_data/commands/store/reset.py +31 -0
- cosmotech/csm_data/commands/store/store.py +37 -0
- cosmotech/csm_data/main.py +57 -0
- cosmotech/csm_data/utils/__init__.py +6 -0
- cosmotech/csm_data/utils/click.py +18 -0
- cosmotech/csm_data/utils/decorators.py +75 -0
- cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
- cosmotech/translation/coal/__init__.py +6 -0
- cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
- cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
- cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
- cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
- cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
- cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
- cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
- cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
- cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
- cosmotech/translation/coal/en-US/coal/web.yml +2 -0
- cosmotech/translation/csm_data/__init__.py +6 -0
- cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
- cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
- cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
- cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
- cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
- cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
- cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Azure Storage operations module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with Azure Storage, including
|
|
12
|
+
uploading files to blob storage.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import pathlib
|
|
16
|
+
|
|
17
|
+
from azure.storage.blob import ContainerClient
|
|
18
|
+
|
|
19
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
20
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upload_file(
|
|
24
|
+
file_path: pathlib.Path,
|
|
25
|
+
blob_name: str,
|
|
26
|
+
az_storage_sas_url: str,
|
|
27
|
+
file_prefix: str = "",
|
|
28
|
+
) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Upload a single file to Azure Blob Storage.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
file_path: Path to the file to upload
|
|
34
|
+
blob_name: Name of the blob container
|
|
35
|
+
az_storage_sas_url: SAS URL for the Azure Storage account
|
|
36
|
+
file_prefix: Prefix to add to the file name in the blob
|
|
37
|
+
"""
|
|
38
|
+
uploaded_file_name = blob_name + "/" + file_prefix + file_path.name
|
|
39
|
+
LOGGER.info(T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name))
|
|
40
|
+
ContainerClient.from_container_url(az_storage_sas_url).upload_blob(
|
|
41
|
+
uploaded_file_name, file_path.open("rb"), overwrite=True
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def upload_folder(
|
|
46
|
+
source_folder: str,
|
|
47
|
+
blob_name: str,
|
|
48
|
+
az_storage_sas_url: str,
|
|
49
|
+
file_prefix: str = "",
|
|
50
|
+
recursive: bool = False,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Upload files from a folder to Azure Blob Storage.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
source_folder: Path to the folder containing files to upload
|
|
57
|
+
blob_name: Name of the blob container
|
|
58
|
+
az_storage_sas_url: SAS URL for the Azure Storage account
|
|
59
|
+
file_prefix: Prefix to add to the file names in the blob
|
|
60
|
+
recursive: Whether to recursively upload files from subdirectories
|
|
61
|
+
"""
|
|
62
|
+
source_path = pathlib.Path(source_folder)
|
|
63
|
+
if not source_path.exists():
|
|
64
|
+
LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
65
|
+
raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
66
|
+
|
|
67
|
+
if source_path.is_dir():
|
|
68
|
+
_source_name = str(source_path)
|
|
69
|
+
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
70
|
+
if _file_path.is_file():
|
|
71
|
+
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
72
|
+
upload_file(_file_path, blob_name, az_storage_sas_url, file_prefix)
|
|
73
|
+
else:
|
|
74
|
+
upload_file(source_path, blob_name, az_storage_sas_url, file_prefix)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Cosmotech API integration module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with the Cosmotech API.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Re-export functions from the parameters module
|
|
15
|
+
from cosmotech.coal.cosmotech_api.parameters import (
|
|
16
|
+
write_parameters,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Re-export functions from the twin_data_layer module
|
|
20
|
+
from cosmotech.coal.cosmotech_api.twin_data_layer import (
|
|
21
|
+
get_dataset_id_from_runner,
|
|
22
|
+
send_files_to_tdl,
|
|
23
|
+
load_files_from_tdl,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Re-export functions from the run_data module
|
|
27
|
+
from cosmotech.coal.cosmotech_api.run_data import (
|
|
28
|
+
send_csv_to_run_data,
|
|
29
|
+
send_store_to_run_data,
|
|
30
|
+
load_csv_from_run_data,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Re-export functions from the run_template module
|
|
34
|
+
from cosmotech.coal.cosmotech_api.run_template import (
|
|
35
|
+
load_run_template_handlers,
|
|
36
|
+
)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
import os
|
|
8
|
+
import pathlib
|
|
9
|
+
import ssl
|
|
10
|
+
|
|
11
|
+
import cosmotech_api
|
|
12
|
+
|
|
13
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
14
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
15
|
+
|
|
16
|
+
api_env_keys = {"CSM_API_KEY", "CSM_API_URL"}
|
|
17
|
+
azure_env_keys = {
|
|
18
|
+
"AZURE_CLIENT_ID",
|
|
19
|
+
"AZURE_CLIENT_SECRET",
|
|
20
|
+
"AZURE_TENANT_ID",
|
|
21
|
+
"CSM_API_URL",
|
|
22
|
+
"CSM_API_SCOPE",
|
|
23
|
+
}
|
|
24
|
+
keycloak_env_keys = {
|
|
25
|
+
"IDP_TENANT_ID",
|
|
26
|
+
"IDP_CLIENT_ID",
|
|
27
|
+
"IDP_CLIENT_SECRET",
|
|
28
|
+
"IDP_BASE_URL",
|
|
29
|
+
"CSM_API_URL",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_api_client() -> (cosmotech_api.ApiClient, str):
|
|
34
|
+
existing_keys = set(os.environ.keys())
|
|
35
|
+
missing_azure_keys = azure_env_keys - existing_keys
|
|
36
|
+
missing_api_keys = api_env_keys - existing_keys
|
|
37
|
+
missing_keycloak_keys = keycloak_env_keys - existing_keys
|
|
38
|
+
if all((missing_api_keys, missing_azure_keys, missing_keycloak_keys)):
|
|
39
|
+
LOGGER.error(T("coal.common.errors.no_env_vars"))
|
|
40
|
+
LOGGER.error(T("coal.cosmotech_api.connection.existing_sets"))
|
|
41
|
+
LOGGER.error(T("coal.cosmotech_api.connection.azure_connection").format(keys=", ".join(azure_env_keys)))
|
|
42
|
+
LOGGER.error(T("coal.cosmotech_api.connection.api_key_connection").format(keys=", ".join(api_env_keys)))
|
|
43
|
+
LOGGER.error(T("coal.cosmotech_api.connection.keycloak_connection").format(keys=", ".join(keycloak_env_keys)))
|
|
44
|
+
raise EnvironmentError(T("coal.common.errors.no_env_vars"))
|
|
45
|
+
|
|
46
|
+
if not missing_keycloak_keys:
|
|
47
|
+
LOGGER.info(T("coal.cosmotech_api.connection.found_keycloak"))
|
|
48
|
+
from keycloak import KeycloakOpenID
|
|
49
|
+
|
|
50
|
+
server_url = os.environ.get("IDP_BASE_URL")
|
|
51
|
+
if server_url[-1] != "/":
|
|
52
|
+
server_url = server_url + "/"
|
|
53
|
+
keycloack_parameters = dict(
|
|
54
|
+
server_url=server_url,
|
|
55
|
+
client_id=os.environ.get("IDP_CLIENT_ID"),
|
|
56
|
+
realm_name=os.environ.get("IDP_TENANT_ID"),
|
|
57
|
+
client_secret_key=os.environ.get("IDP_CLIENT_SECRET"),
|
|
58
|
+
)
|
|
59
|
+
if (ca_cert_path := os.environ.get("IDP_CA_CERT")) and pathlib.Path(ca_cert_path).exists():
|
|
60
|
+
LOGGER.info(T("coal.cosmotech_api.connection.found_cert_authority"))
|
|
61
|
+
keycloack_parameters["verify"] = ca_cert_path
|
|
62
|
+
keycloak_openid = KeycloakOpenID(**keycloack_parameters)
|
|
63
|
+
|
|
64
|
+
access_token = keycloak_openid.token(grant_type="client_credentials")
|
|
65
|
+
|
|
66
|
+
configuration = cosmotech_api.Configuration(
|
|
67
|
+
host=os.environ.get("CSM_API_URL"),
|
|
68
|
+
access_token=access_token["access_token"],
|
|
69
|
+
)
|
|
70
|
+
return cosmotech_api.ApiClient(configuration), "Keycloak Connection"
|
|
71
|
+
|
|
72
|
+
if not missing_api_keys:
|
|
73
|
+
LOGGER.info(T("coal.cosmotech_api.connection.found_api_key"))
|
|
74
|
+
configuration = cosmotech_api.Configuration(
|
|
75
|
+
host=os.environ.get("CSM_API_URL"),
|
|
76
|
+
)
|
|
77
|
+
return (
|
|
78
|
+
cosmotech_api.ApiClient(
|
|
79
|
+
configuration,
|
|
80
|
+
os.environ.get("CSM_API_KEY_HEADER", "X-CSM-API-KEY"),
|
|
81
|
+
os.environ.get("CSM_API_KEY"),
|
|
82
|
+
),
|
|
83
|
+
"Cosmo Tech API Key",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if not missing_azure_keys:
|
|
87
|
+
LOGGER.info(T("coal.cosmotech_api.connection.found_azure"))
|
|
88
|
+
from azure.identity import EnvironmentCredential
|
|
89
|
+
|
|
90
|
+
credentials = EnvironmentCredential()
|
|
91
|
+
token = credentials.get_token(os.environ.get("CSM_API_SCOPE"))
|
|
92
|
+
|
|
93
|
+
configuration = cosmotech_api.Configuration(host=os.environ.get("CSM_API_URL"), access_token=token.token)
|
|
94
|
+
return cosmotech_api.ApiClient(configuration), "Azure Entra Connection"
|
|
95
|
+
|
|
96
|
+
raise EnvironmentError(T("coal.common.errors.no_valid_connection"))
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
# Re-export all download functions from download submodule
|
|
9
|
+
from cosmotech.coal.cosmotech_api.dataset.download import (
|
|
10
|
+
download_adt_dataset,
|
|
11
|
+
download_twingraph_dataset,
|
|
12
|
+
download_legacy_twingraph_dataset,
|
|
13
|
+
download_file_dataset,
|
|
14
|
+
download_dataset_by_id,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from cosmotech.coal.cosmotech_api.dataset.converters import (
|
|
18
|
+
convert_dataset_to_files,
|
|
19
|
+
convert_graph_dataset_to_files,
|
|
20
|
+
convert_file_dataset_to_files,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from cosmotech.coal.cosmotech_api.dataset.utils import (
|
|
24
|
+
get_content_from_twin_graph_data,
|
|
25
|
+
sheet_to_header,
|
|
26
|
+
)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import csv
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import tempfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, List, Any, Optional, Union
|
|
14
|
+
|
|
15
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
16
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
17
|
+
from cosmotech.coal.cosmotech_api.dataset.utils import sheet_to_header
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def convert_dataset_to_files(dataset_info: Dict[str, Any], target_folder: Optional[Union[str, Path]] = None) -> Path:
|
|
21
|
+
"""
|
|
22
|
+
Convert dataset info to files.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
dataset_info: Dataset info dict with type, content, name
|
|
26
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Path to folder containing files
|
|
30
|
+
"""
|
|
31
|
+
dataset_type = dataset_info["type"]
|
|
32
|
+
content = dataset_info["content"]
|
|
33
|
+
name = dataset_info["name"]
|
|
34
|
+
|
|
35
|
+
LOGGER.info(T("coal.services.dataset.converting_to_files").format(dataset_type=dataset_type, dataset_name=name))
|
|
36
|
+
|
|
37
|
+
if target_folder is None:
|
|
38
|
+
target_folder = Path(tempfile.mkdtemp())
|
|
39
|
+
LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
|
|
40
|
+
else:
|
|
41
|
+
target_folder = Path(target_folder)
|
|
42
|
+
target_folder.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
|
|
44
|
+
|
|
45
|
+
if dataset_type in ["adt", "twincache"]:
|
|
46
|
+
return convert_graph_dataset_to_files(content, target_folder)
|
|
47
|
+
else:
|
|
48
|
+
return convert_file_dataset_to_files(content, target_folder, dataset_type)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def convert_graph_dataset_to_files(
|
|
52
|
+
content: Dict[str, List[Dict]], target_folder: Optional[Union[str, Path]] = None
|
|
53
|
+
) -> Path:
|
|
54
|
+
"""
|
|
55
|
+
Convert graph dataset content to CSV files.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
content: Dictionary mapping entity types to lists of entities
|
|
59
|
+
target_folder: Folder to save files (if None, uses temp dir)
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Path to folder containing files
|
|
63
|
+
"""
|
|
64
|
+
if target_folder is None:
|
|
65
|
+
target_folder = Path(tempfile.mkdtemp())
|
|
66
|
+
LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
|
|
67
|
+
else:
|
|
68
|
+
target_folder = Path(target_folder)
|
|
69
|
+
target_folder.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
|
|
71
|
+
file_count = 0
|
|
72
|
+
|
|
73
|
+
LOGGER.info(
|
|
74
|
+
T("coal.services.dataset.converting_graph_data").format(entity_types=len(content), folder=target_folder)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
for entity_type, entities in content.items():
|
|
78
|
+
if not entities:
|
|
79
|
+
LOGGER.debug(T("coal.services.dataset.skipping_empty_entity").format(entity_type=entity_type))
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
file_path = target_folder / f"{entity_type}.csv"
|
|
83
|
+
LOGGER.debug(T("coal.services.dataset.writing_csv").format(file_name=file_path.name, count=len(entities)))
|
|
84
|
+
|
|
85
|
+
fieldnames = sheet_to_header(entities)
|
|
86
|
+
|
|
87
|
+
with open(file_path, "w", newline="") as file:
|
|
88
|
+
writer = csv.DictWriter(file, fieldnames=fieldnames, dialect="unix", quoting=csv.QUOTE_MINIMAL)
|
|
89
|
+
writer.writeheader()
|
|
90
|
+
|
|
91
|
+
for entity in entities:
|
|
92
|
+
# Convert values to strings and handle boolean values
|
|
93
|
+
row = {
|
|
94
|
+
k: str(v).replace("'", '"').replace("True", "true").replace("False", "false")
|
|
95
|
+
for k, v in entity.items()
|
|
96
|
+
}
|
|
97
|
+
writer.writerow(row)
|
|
98
|
+
|
|
99
|
+
file_count += 1
|
|
100
|
+
LOGGER.debug(T("coal.services.dataset.file_written").format(file_path=file_path))
|
|
101
|
+
|
|
102
|
+
LOGGER.info(T("coal.services.dataset.files_created").format(count=file_count, folder=target_folder))
|
|
103
|
+
|
|
104
|
+
return target_folder
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def convert_file_dataset_to_files(
|
|
108
|
+
content: Dict[str, Any],
|
|
109
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
110
|
+
file_type: str = "",
|
|
111
|
+
) -> Path:
|
|
112
|
+
"""
|
|
113
|
+
Convert file dataset content to files.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
content: Dictionary mapping file names to content
|
|
117
|
+
target_folder: Folder to save files (if None, uses temp dir)
|
|
118
|
+
file_type: Type of file (csv, json, etc.)
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Path to folder containing files
|
|
122
|
+
"""
|
|
123
|
+
if target_folder is None:
|
|
124
|
+
target_folder = Path(tempfile.mkdtemp())
|
|
125
|
+
LOGGER.debug(T("coal.services.dataset.created_temp_folder").format(folder=target_folder))
|
|
126
|
+
else:
|
|
127
|
+
target_folder = Path(target_folder)
|
|
128
|
+
target_folder.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
LOGGER.debug(T("coal.services.dataset.using_folder").format(folder=target_folder))
|
|
130
|
+
file_count = 0
|
|
131
|
+
|
|
132
|
+
LOGGER.info(
|
|
133
|
+
T("coal.services.dataset.converting_file_data").format(
|
|
134
|
+
file_count=len(content), file_type=file_type, folder=target_folder
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
for file_name, file_content in content.items():
|
|
139
|
+
file_path = target_folder / file_name
|
|
140
|
+
|
|
141
|
+
# Ensure parent directories exist
|
|
142
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
|
|
144
|
+
LOGGER.debug(T("coal.services.dataset.writing_file").format(file_name=file_path.name, file_type=file_type))
|
|
145
|
+
|
|
146
|
+
if isinstance(file_content, str):
|
|
147
|
+
# Text content
|
|
148
|
+
with open(file_path, "w") as file:
|
|
149
|
+
file.write(file_content)
|
|
150
|
+
elif isinstance(file_content, dict) or isinstance(file_content, list):
|
|
151
|
+
# JSON content
|
|
152
|
+
with open(file_path, "w") as file:
|
|
153
|
+
json.dump(file_content, file, indent=2)
|
|
154
|
+
else:
|
|
155
|
+
# Other content types
|
|
156
|
+
with open(file_path, "w") as file:
|
|
157
|
+
file.write(str(file_content))
|
|
158
|
+
|
|
159
|
+
file_count += 1
|
|
160
|
+
LOGGER.debug(T("coal.services.dataset.file_written").format(file_path=file_path))
|
|
161
|
+
|
|
162
|
+
LOGGER.info(T("coal.services.dataset.files_created").format(count=file_count, folder=target_folder))
|
|
163
|
+
|
|
164
|
+
return target_folder
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Dataset download submodules.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# Re-export all download functions
|
|
13
|
+
from cosmotech.coal.cosmotech_api.dataset.download.adt import download_adt_dataset
|
|
14
|
+
from cosmotech.coal.cosmotech_api.dataset.download.twingraph import (
|
|
15
|
+
download_twingraph_dataset,
|
|
16
|
+
download_legacy_twingraph_dataset,
|
|
17
|
+
)
|
|
18
|
+
from cosmotech.coal.cosmotech_api.dataset.download.file import download_file_dataset
|
|
19
|
+
from cosmotech.coal.cosmotech_api.dataset.download.common import download_dataset_by_id
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, Any, Optional, Union, Tuple
|
|
12
|
+
|
|
13
|
+
from azure.digitaltwins.core import DigitalTwinsClient
|
|
14
|
+
from azure.identity import DefaultAzureCredential
|
|
15
|
+
|
|
16
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
17
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
18
|
+
from cosmotech.coal.cosmotech_api.connection import get_api_client
|
|
19
|
+
from cosmotech.coal.cosmotech_api.dataset.converters import convert_dataset_to_files
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def download_adt_dataset(
|
|
23
|
+
adt_address: str,
|
|
24
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
25
|
+
credentials: Optional[DefaultAzureCredential] = None,
|
|
26
|
+
) -> Tuple[Dict[str, Any], Path]:
|
|
27
|
+
"""
|
|
28
|
+
Download dataset from Azure Digital Twins.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
adt_address: The ADT instance address
|
|
32
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
33
|
+
credentials: Optional Azure credentials (if None, uses DefaultAzureCredential)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Tuple of (content dict, folder path)
|
|
37
|
+
"""
|
|
38
|
+
start_time = time.time()
|
|
39
|
+
LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="ADT"))
|
|
40
|
+
LOGGER.debug(T("coal.services.dataset.adt_connecting").format(url=adt_address))
|
|
41
|
+
|
|
42
|
+
# Create credentials if not provided
|
|
43
|
+
if credentials is None:
|
|
44
|
+
if get_api_client()[1] == "Azure Entra Connection":
|
|
45
|
+
credentials = DefaultAzureCredential()
|
|
46
|
+
else:
|
|
47
|
+
LOGGER.error(T("coal.services.dataset.adt_no_credentials"))
|
|
48
|
+
raise ValueError("No credentials available for ADT connection")
|
|
49
|
+
|
|
50
|
+
# Create client and download data
|
|
51
|
+
client = DigitalTwinsClient(adt_address, credentials)
|
|
52
|
+
|
|
53
|
+
# Query twins
|
|
54
|
+
query_start = time.time()
|
|
55
|
+
LOGGER.debug(T("coal.services.dataset.adt_querying_twins"))
|
|
56
|
+
query_expression = "SELECT * FROM digitaltwins"
|
|
57
|
+
query_result = client.query_twins(query_expression)
|
|
58
|
+
|
|
59
|
+
json_content = dict()
|
|
60
|
+
twin_count = 0
|
|
61
|
+
|
|
62
|
+
for twin in query_result:
|
|
63
|
+
twin_count += 1
|
|
64
|
+
entity_type = twin.get("$metadata").get("$model").split(":")[-1].split(";")[0]
|
|
65
|
+
t_content = {k: v for k, v in twin.items()}
|
|
66
|
+
t_content["id"] = t_content["$dtId"]
|
|
67
|
+
|
|
68
|
+
# Remove system properties
|
|
69
|
+
for k in list(twin.keys()):
|
|
70
|
+
if k[0] == "$":
|
|
71
|
+
del t_content[k]
|
|
72
|
+
|
|
73
|
+
json_content.setdefault(entity_type, [])
|
|
74
|
+
json_content[entity_type].append(t_content)
|
|
75
|
+
|
|
76
|
+
query_time = time.time() - query_start
|
|
77
|
+
LOGGER.debug(T("coal.services.dataset.adt_twins_found").format(count=twin_count))
|
|
78
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="twins query", time=query_time))
|
|
79
|
+
|
|
80
|
+
# Query relationships
|
|
81
|
+
rel_start = time.time()
|
|
82
|
+
LOGGER.debug(T("coal.services.dataset.adt_querying_relations"))
|
|
83
|
+
relations_query = "SELECT * FROM relationships"
|
|
84
|
+
query_result = client.query_twins(relations_query)
|
|
85
|
+
|
|
86
|
+
relation_count = 0
|
|
87
|
+
for relation in query_result:
|
|
88
|
+
relation_count += 1
|
|
89
|
+
tr = {"$relationshipId": "id", "$sourceId": "source", "$targetId": "target"}
|
|
90
|
+
r_content = {k: v for k, v in relation.items()}
|
|
91
|
+
|
|
92
|
+
# Map system properties to standard names
|
|
93
|
+
for k, v in tr.items():
|
|
94
|
+
r_content[v] = r_content[k]
|
|
95
|
+
|
|
96
|
+
# Remove system properties
|
|
97
|
+
for k in list(relation.keys()):
|
|
98
|
+
if k[0] == "$":
|
|
99
|
+
del r_content[k]
|
|
100
|
+
|
|
101
|
+
json_content.setdefault(relation["$relationshipName"], [])
|
|
102
|
+
json_content[relation["$relationshipName"]].append(r_content)
|
|
103
|
+
|
|
104
|
+
rel_time = time.time() - rel_start
|
|
105
|
+
LOGGER.debug(T("coal.services.dataset.adt_relations_found").format(count=relation_count))
|
|
106
|
+
LOGGER.debug(T("coal.common.timing.operation_completed").format(operation="relations query", time=rel_time))
|
|
107
|
+
|
|
108
|
+
# Convert to files if target_folder is provided
|
|
109
|
+
if target_folder:
|
|
110
|
+
dataset_info = {"type": "adt", "content": json_content, "name": "ADT Dataset"}
|
|
111
|
+
target_folder = convert_dataset_to_files(dataset_info, target_folder)
|
|
112
|
+
else:
|
|
113
|
+
target_folder = tempfile.mkdtemp()
|
|
114
|
+
|
|
115
|
+
elapsed_time = time.time() - start_time
|
|
116
|
+
LOGGER.info(T("coal.common.timing.operation_completed").format(operation="ADT download", time=elapsed_time))
|
|
117
|
+
LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="ADT"))
|
|
118
|
+
|
|
119
|
+
return json_content, Path(target_folder)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Any, Optional, Union, Tuple
|
|
11
|
+
|
|
12
|
+
from cosmotech_api import DatasetApi
|
|
13
|
+
|
|
14
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
15
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
16
|
+
from cosmotech.coal.cosmotech_api.connection import get_api_client
|
|
17
|
+
|
|
18
|
+
# Import specific download functions
|
|
19
|
+
# These imports are defined here to avoid circular imports
|
|
20
|
+
# The functions are imported directly from their modules
|
|
21
|
+
from cosmotech.coal.cosmotech_api.dataset.download.adt import download_adt_dataset
|
|
22
|
+
from cosmotech.coal.cosmotech_api.dataset.download.twingraph import (
|
|
23
|
+
download_twingraph_dataset,
|
|
24
|
+
download_legacy_twingraph_dataset,
|
|
25
|
+
)
|
|
26
|
+
from cosmotech.coal.cosmotech_api.dataset.download.file import download_file_dataset
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def download_dataset_by_id(
|
|
30
|
+
organization_id: str,
|
|
31
|
+
workspace_id: str,
|
|
32
|
+
dataset_id: str,
|
|
33
|
+
target_folder: Optional[Union[str, Path]] = None,
|
|
34
|
+
) -> Tuple[Dict[str, Any], Path]:
|
|
35
|
+
"""
|
|
36
|
+
Download dataset by ID.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
organization_id: Organization ID
|
|
40
|
+
workspace_id: Workspace ID
|
|
41
|
+
dataset_id: Dataset ID
|
|
42
|
+
target_folder: Optional folder to save files (if None, uses temp dir)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Tuple of (dataset info dict, folder path)
|
|
46
|
+
"""
|
|
47
|
+
start_time = time.time()
|
|
48
|
+
LOGGER.info(T("coal.services.dataset.download_started").format(dataset_type="Dataset"))
|
|
49
|
+
LOGGER.debug(
|
|
50
|
+
T("coal.services.dataset.dataset_downloading").format(organization_id=organization_id, dataset_id=dataset_id)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
with get_api_client()[0] as api_client:
|
|
54
|
+
api_instance = DatasetApi(api_client)
|
|
55
|
+
|
|
56
|
+
# Get dataset info
|
|
57
|
+
info_start = time.time()
|
|
58
|
+
dataset = api_instance.find_dataset_by_id(organization_id=organization_id, dataset_id=dataset_id)
|
|
59
|
+
info_time = time.time() - info_start
|
|
60
|
+
|
|
61
|
+
LOGGER.debug(
|
|
62
|
+
T("coal.services.dataset.dataset_info_retrieved").format(dataset_name=dataset.name, dataset_id=dataset_id)
|
|
63
|
+
)
|
|
64
|
+
LOGGER.debug(
|
|
65
|
+
T("coal.common.timing.operation_completed").format(operation="dataset info retrieval", time=info_time)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Determine dataset type and download
|
|
69
|
+
if dataset.connector is None:
|
|
70
|
+
parameters = []
|
|
71
|
+
else:
|
|
72
|
+
parameters = dataset.connector.parameters_values
|
|
73
|
+
|
|
74
|
+
is_adt = "AZURE_DIGITAL_TWINS_URL" in parameters
|
|
75
|
+
is_storage = "AZURE_STORAGE_CONTAINER_BLOB_PREFIX" in parameters
|
|
76
|
+
is_legacy_twin_cache = "TWIN_CACHE_NAME" in parameters and dataset.twingraph_id is None
|
|
77
|
+
is_in_workspace_file = (
|
|
78
|
+
False if dataset.tags is None else "workspaceFile" in dataset.tags or "dataset_part" in dataset.tags
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
download_start = time.time()
|
|
82
|
+
|
|
83
|
+
if is_adt:
|
|
84
|
+
LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="ADT"))
|
|
85
|
+
content, folder = download_adt_dataset(
|
|
86
|
+
adt_address=parameters["AZURE_DIGITAL_TWINS_URL"],
|
|
87
|
+
target_folder=target_folder,
|
|
88
|
+
)
|
|
89
|
+
dataset_type = "adt"
|
|
90
|
+
|
|
91
|
+
elif is_legacy_twin_cache:
|
|
92
|
+
LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Legacy TwinGraph"))
|
|
93
|
+
twin_cache_name = parameters["TWIN_CACHE_NAME"]
|
|
94
|
+
content, folder = download_legacy_twingraph_dataset(
|
|
95
|
+
organization_id=organization_id,
|
|
96
|
+
cache_name=twin_cache_name,
|
|
97
|
+
target_folder=target_folder,
|
|
98
|
+
)
|
|
99
|
+
dataset_type = "twincache"
|
|
100
|
+
|
|
101
|
+
elif is_storage or is_in_workspace_file:
|
|
102
|
+
if is_storage:
|
|
103
|
+
LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Storage"))
|
|
104
|
+
_file_name = parameters["AZURE_STORAGE_CONTAINER_BLOB_PREFIX"].replace("%WORKSPACE_FILE%/", "")
|
|
105
|
+
else:
|
|
106
|
+
LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="Workspace File"))
|
|
107
|
+
_file_name = dataset.source.location
|
|
108
|
+
|
|
109
|
+
content, folder = download_file_dataset(
|
|
110
|
+
organization_id=organization_id,
|
|
111
|
+
workspace_id=workspace_id,
|
|
112
|
+
file_name=_file_name,
|
|
113
|
+
target_folder=target_folder,
|
|
114
|
+
)
|
|
115
|
+
dataset_type = _file_name.split(".")[-1]
|
|
116
|
+
|
|
117
|
+
else:
|
|
118
|
+
LOGGER.debug(T("coal.services.dataset.dataset_type_detected").format(type="TwinGraph"))
|
|
119
|
+
content, folder = download_twingraph_dataset(
|
|
120
|
+
organization_id=organization_id,
|
|
121
|
+
dataset_id=dataset_id,
|
|
122
|
+
target_folder=target_folder,
|
|
123
|
+
)
|
|
124
|
+
dataset_type = "twincache"
|
|
125
|
+
|
|
126
|
+
download_time = time.time() - download_start
|
|
127
|
+
LOGGER.debug(
|
|
128
|
+
T("coal.common.timing.operation_completed").format(operation="content download", time=download_time)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Prepare result
|
|
132
|
+
dataset_info = {"type": dataset_type, "content": content, "name": dataset.name}
|
|
133
|
+
|
|
134
|
+
elapsed_time = time.time() - start_time
|
|
135
|
+
LOGGER.info(
|
|
136
|
+
T("coal.common.timing.operation_completed").format(operation="total dataset download", time=elapsed_time)
|
|
137
|
+
)
|
|
138
|
+
LOGGER.info(T("coal.services.dataset.download_completed").format(dataset_type="Dataset"))
|
|
139
|
+
|
|
140
|
+
return dataset_info, folder
|