cosmotech-acceleration-library 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +8 -0
- cosmotech/coal/aws/__init__.py +23 -0
- cosmotech/coal/aws/s3.py +235 -0
- cosmotech/coal/azure/__init__.py +23 -0
- cosmotech/coal/azure/adx/__init__.py +26 -0
- cosmotech/coal/azure/adx/auth.py +125 -0
- cosmotech/coal/azure/adx/ingestion.py +329 -0
- cosmotech/coal/azure/adx/query.py +56 -0
- cosmotech/coal/azure/adx/runner.py +217 -0
- cosmotech/coal/azure/adx/store.py +255 -0
- cosmotech/coal/azure/adx/tables.py +118 -0
- cosmotech/coal/azure/adx/utils.py +71 -0
- cosmotech/coal/azure/blob.py +109 -0
- cosmotech/coal/azure/functions.py +72 -0
- cosmotech/coal/azure/storage.py +74 -0
- cosmotech/coal/cosmotech_api/__init__.py +36 -0
- cosmotech/coal/cosmotech_api/connection.py +96 -0
- cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
- cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
- cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
- cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
- cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
- cosmotech/coal/cosmotech_api/parameters.py +48 -0
- cosmotech/coal/cosmotech_api/run.py +25 -0
- cosmotech/coal/cosmotech_api/run_data.py +173 -0
- cosmotech/coal/cosmotech_api/run_template.py +108 -0
- cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
- cosmotech/coal/cosmotech_api/runner/data.py +38 -0
- cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
- cosmotech/coal/cosmotech_api/runner/download.py +146 -0
- cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
- cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
- cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
- cosmotech/coal/cosmotech_api/workspace.py +127 -0
- cosmotech/coal/csm/__init__.py +6 -0
- cosmotech/coal/csm/engine/__init__.py +47 -0
- cosmotech/coal/postgresql/__init__.py +22 -0
- cosmotech/coal/postgresql/runner.py +93 -0
- cosmotech/coal/postgresql/store.py +98 -0
- cosmotech/coal/singlestore/__init__.py +17 -0
- cosmotech/coal/singlestore/store.py +100 -0
- cosmotech/coal/store/__init__.py +42 -0
- cosmotech/coal/store/csv.py +44 -0
- cosmotech/coal/store/native_python.py +25 -0
- cosmotech/coal/store/pandas.py +26 -0
- cosmotech/coal/store/pyarrow.py +23 -0
- cosmotech/coal/store/store.py +79 -0
- cosmotech/coal/utils/__init__.py +18 -0
- cosmotech/coal/utils/api.py +68 -0
- cosmotech/coal/utils/logger.py +10 -0
- cosmotech/coal/utils/postgresql.py +236 -0
- cosmotech/csm_data/__init__.py +6 -0
- cosmotech/csm_data/commands/__init__.py +6 -0
- cosmotech/csm_data/commands/adx_send_data.py +92 -0
- cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
- cosmotech/csm_data/commands/api/__init__.py +6 -0
- cosmotech/csm_data/commands/api/api.py +50 -0
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
- cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
- cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
- cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
- cosmotech/csm_data/commands/api/run_load_data.py +120 -0
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
- cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
- cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
- cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
- cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
- cosmotech/csm_data/commands/az_storage_upload.py +76 -0
- cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
- cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
- cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
- cosmotech/csm_data/commands/store/__init__.py +6 -0
- cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
- cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
- cosmotech/csm_data/commands/store/list_tables.py +48 -0
- cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
- cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
- cosmotech/csm_data/commands/store/reset.py +31 -0
- cosmotech/csm_data/commands/store/store.py +37 -0
- cosmotech/csm_data/main.py +57 -0
- cosmotech/csm_data/utils/__init__.py +6 -0
- cosmotech/csm_data/utils/click.py +18 -0
- cosmotech/csm_data/utils/decorators.py +75 -0
- cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
- cosmotech/translation/coal/__init__.py +6 -0
- cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
- cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
- cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
- cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
- cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
- cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
- cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
- cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
- cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
- cosmotech/translation/coal/en-US/coal/web.yml +2 -0
- cosmotech/translation/csm_data/__init__.py +6 -0
- cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
- cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
- cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
- cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
- cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
- cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
- cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import pathlib
|
|
10
|
+
|
|
11
|
+
import pyarrow
|
|
12
|
+
from adbc_driver_sqlite import dbapi
|
|
13
|
+
|
|
14
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
15
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Store:
|
|
19
|
+
@staticmethod
|
|
20
|
+
def sanitize_column(column_name: str) -> str:
|
|
21
|
+
return column_name.replace(" ", "_")
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
reset=False,
|
|
26
|
+
store_location: pathlib.Path = pathlib.Path(os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH", ".")),
|
|
27
|
+
):
|
|
28
|
+
self.store_location = pathlib.Path(store_location) / ".coal/store"
|
|
29
|
+
self.store_location.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
self._tables = dict()
|
|
31
|
+
self._database_path = self.store_location / "db.sqlite"
|
|
32
|
+
if reset:
|
|
33
|
+
self.reset()
|
|
34
|
+
self._database = str(self._database_path)
|
|
35
|
+
|
|
36
|
+
def reset(self):
|
|
37
|
+
if self._database_path.exists():
|
|
38
|
+
self._database_path.unlink()
|
|
39
|
+
|
|
40
|
+
def get_table(self, table_name: str) -> pyarrow.Table:
|
|
41
|
+
if not self.table_exists(table_name):
|
|
42
|
+
raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
|
|
43
|
+
return self.execute_query(f"select * from {table_name}")
|
|
44
|
+
|
|
45
|
+
def table_exists(self, table_name) -> bool:
|
|
46
|
+
return table_name in self.list_tables()
|
|
47
|
+
|
|
48
|
+
def get_table_schema(self, table_name: str) -> pyarrow.Schema:
|
|
49
|
+
if not self.table_exists(table_name):
|
|
50
|
+
raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
|
|
51
|
+
with dbapi.connect(self._database) as conn:
|
|
52
|
+
return conn.adbc_get_table_schema(table_name)
|
|
53
|
+
|
|
54
|
+
def add_table(self, table_name: str, data=pyarrow.Table, replace: bool = False):
|
|
55
|
+
with dbapi.connect(self._database, autocommit=True) as conn:
|
|
56
|
+
with conn.cursor() as curs:
|
|
57
|
+
rows = curs.adbc_ingest(table_name, data, "replace" if replace else "create_append")
|
|
58
|
+
LOGGER.debug(T("coal.common.data_transfer.rows_inserted").format(rows=rows, table_name=table_name))
|
|
59
|
+
|
|
60
|
+
def execute_query(self, sql_query: str) -> pyarrow.Table:
|
|
61
|
+
batch_size = 1024
|
|
62
|
+
batch_size_increment = 1024
|
|
63
|
+
while True:
|
|
64
|
+
try:
|
|
65
|
+
with dbapi.connect(self._database, autocommit=True) as conn:
|
|
66
|
+
with conn.cursor() as curs:
|
|
67
|
+
curs.adbc_statement.set_options(**{"adbc.sqlite.query.batch_rows": str(batch_size)})
|
|
68
|
+
curs.execute(sql_query)
|
|
69
|
+
return curs.fetch_arrow_table()
|
|
70
|
+
except OSError:
|
|
71
|
+
batch_size += batch_size_increment
|
|
72
|
+
|
|
73
|
+
def list_tables(self) -> list[str]:
|
|
74
|
+
with dbapi.connect(self._database) as conn:
|
|
75
|
+
objects = conn.adbc_get_objects(depth="all").read_all()
|
|
76
|
+
tables = objects["catalog_db_schemas"][0][0]["db_schema_tables"]
|
|
77
|
+
for table in tables:
|
|
78
|
+
table_name: pyarrow.StringScalar = table["table_name"]
|
|
79
|
+
yield table_name.as_py()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from cosmotech.coal import __version__
|
|
9
|
+
|
|
10
|
+
WEB_DOCUMENTATION_ROOT = f"https://cosmo-tech.github.io/CosmoTech-Acceleration-Library/{__version__}/"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def strtobool(string: str) -> bool:
|
|
14
|
+
if string.lower() in ["y", "yes", "t", "true", "on", "1"]:
|
|
15
|
+
return True
|
|
16
|
+
if string.lower() in ["n", "no", "f", "false", "off", "0"]:
|
|
17
|
+
return False
|
|
18
|
+
raise ValueError(f'"{string} is not a recognized truth value')
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import pathlib
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
import cosmotech_api
|
|
13
|
+
import yaml
|
|
14
|
+
from cosmotech_api.api.solution_api import Solution
|
|
15
|
+
from cosmotech_api.api.solution_api import SolutionApi
|
|
16
|
+
from cosmotech_api.api.workspace_api import Workspace
|
|
17
|
+
from cosmotech_api.api.workspace_api import WorkspaceApi
|
|
18
|
+
from cosmotech_api.exceptions import ServiceException
|
|
19
|
+
|
|
20
|
+
from cosmotech.coal.cosmotech_api.connection import get_api_client
|
|
21
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
22
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def read_solution_file(solution_file) -> Optional[Solution]:
|
|
26
|
+
solution_path = pathlib.Path(solution_file)
|
|
27
|
+
if solution_path.suffix in [".yaml", ".yml"]:
|
|
28
|
+
open_function = yaml.safe_load
|
|
29
|
+
elif solution_path.suffix == ".json":
|
|
30
|
+
open_function = json.load
|
|
31
|
+
else:
|
|
32
|
+
LOGGER.error(T("coal.cosmotech_api.solution.invalid_file").format(file=solution_file))
|
|
33
|
+
return None
|
|
34
|
+
with solution_path.open() as _sf:
|
|
35
|
+
solution_content = open_function(_sf)
|
|
36
|
+
LOGGER.info(T("coal.cosmotech_api.solution.loaded").format(path=solution_path.absolute()))
|
|
37
|
+
_solution = Solution(
|
|
38
|
+
_configuration=cosmotech_api.Configuration(),
|
|
39
|
+
_spec_property_naming=True,
|
|
40
|
+
**solution_content,
|
|
41
|
+
)
|
|
42
|
+
LOGGER.debug(
|
|
43
|
+
T("coal.services.api.solution_debug").format(solution=json.dumps(_solution.to_dict(), indent=2, default=str))
|
|
44
|
+
)
|
|
45
|
+
return _solution
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_solution(organization_id, workspace_id) -> Optional[Solution]:
|
|
49
|
+
LOGGER.info(T("coal.cosmotech_api.solution.api_configured"))
|
|
50
|
+
with get_api_client()[0] as api_client:
|
|
51
|
+
api_w = WorkspaceApi(api_client)
|
|
52
|
+
|
|
53
|
+
LOGGER.info(T("coal.cosmotech_api.solution.loading_workspace"))
|
|
54
|
+
try:
|
|
55
|
+
r_data: Workspace = api_w.find_workspace_by_id(organization_id=organization_id, workspace_id=workspace_id)
|
|
56
|
+
except ServiceException as e:
|
|
57
|
+
LOGGER.error(
|
|
58
|
+
T("coal.cosmotech_api.workspace.not_found").format(
|
|
59
|
+
workspace_id=workspace_id, organization_id=organization_id
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
LOGGER.debug(e)
|
|
63
|
+
return None
|
|
64
|
+
solution_id = r_data.solution.solution_id
|
|
65
|
+
|
|
66
|
+
api_sol = SolutionApi(api_client)
|
|
67
|
+
sol: Solution = api_sol.find_solution_by_id(organization_id=organization_id, solution_id=solution_id)
|
|
68
|
+
return sol
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from cosmotech.orchestrator.utils.logger import get_logger
|
|
9
|
+
|
|
10
|
+
LOGGER = get_logger("csm.data")
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from urllib.parse import quote
|
|
10
|
+
|
|
11
|
+
import adbc_driver_manager
|
|
12
|
+
import pyarrow as pa
|
|
13
|
+
from adbc_driver_postgresql import dbapi
|
|
14
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
15
|
+
from pyarrow import Table
|
|
16
|
+
|
|
17
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def generate_postgresql_full_uri(
|
|
21
|
+
postgres_host: str,
|
|
22
|
+
postgres_port: str,
|
|
23
|
+
postgres_db: str,
|
|
24
|
+
postgres_user: str,
|
|
25
|
+
postgres_password: str,
|
|
26
|
+
force_encode: bool = False,
|
|
27
|
+
) -> str:
|
|
28
|
+
# Check if password needs percent encoding (contains special characters)
|
|
29
|
+
# We don't log anything about the password for security
|
|
30
|
+
encoded_password = postgres_password
|
|
31
|
+
if force_encode:
|
|
32
|
+
encoded_password = quote(postgres_password, safe="")
|
|
33
|
+
|
|
34
|
+
return (
|
|
35
|
+
"postgresql://" + f"{postgres_user}"
|
|
36
|
+
f":{encoded_password}"
|
|
37
|
+
f"@{postgres_host}"
|
|
38
|
+
f":{postgres_port}"
|
|
39
|
+
f"/{postgres_db}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_postgresql_table_schema(
|
|
44
|
+
target_table_name: str,
|
|
45
|
+
postgres_host: str,
|
|
46
|
+
postgres_port: str,
|
|
47
|
+
postgres_db: str,
|
|
48
|
+
postgres_schema: str,
|
|
49
|
+
postgres_user: str,
|
|
50
|
+
postgres_password: str,
|
|
51
|
+
force_encode: bool = False,
|
|
52
|
+
) -> Optional[pa.Schema]:
|
|
53
|
+
"""
|
|
54
|
+
Get the schema of an existing PostgreSQL table using SQL queries.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
target_table_name: Name of the table
|
|
58
|
+
postgres_host: PostgreSQL host
|
|
59
|
+
postgres_port: PostgreSQL port
|
|
60
|
+
postgres_db: PostgreSQL database name
|
|
61
|
+
postgres_schema: PostgreSQL schema name
|
|
62
|
+
postgres_user: PostgreSQL username
|
|
63
|
+
postgres_password: PostgreSQL password
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
PyArrow Schema if table exists, None otherwise
|
|
67
|
+
"""
|
|
68
|
+
LOGGER.debug(
|
|
69
|
+
T("coal.services.postgresql.getting_schema").format(
|
|
70
|
+
postgres_schema=postgres_schema, target_table_name=target_table_name
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
postgresql_full_uri = generate_postgresql_full_uri(
|
|
75
|
+
postgres_host,
|
|
76
|
+
postgres_port,
|
|
77
|
+
postgres_db,
|
|
78
|
+
postgres_user,
|
|
79
|
+
postgres_password,
|
|
80
|
+
force_encode,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
with dbapi.connect(postgresql_full_uri) as conn:
|
|
84
|
+
try:
|
|
85
|
+
return conn.adbc_get_table_schema(
|
|
86
|
+
target_table_name,
|
|
87
|
+
db_schema_filter=postgres_schema,
|
|
88
|
+
)
|
|
89
|
+
except adbc_driver_manager.ProgrammingError:
|
|
90
|
+
LOGGER.warning(
|
|
91
|
+
T("coal.services.postgresql.table_not_found").format(
|
|
92
|
+
postgres_schema=postgres_schema, target_table_name=target_table_name
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def adapt_table_to_schema(data: pa.Table, target_schema: pa.Schema) -> pa.Table:
|
|
99
|
+
"""
|
|
100
|
+
Adapt a PyArrow table to match a target schema with detailed logging.
|
|
101
|
+
"""
|
|
102
|
+
LOGGER.debug(T("coal.services.postgresql.schema_adaptation_start").format(rows=len(data)))
|
|
103
|
+
LOGGER.debug(T("coal.services.postgresql.original_schema").format(schema=data.schema))
|
|
104
|
+
LOGGER.debug(T("coal.services.postgresql.target_schema").format(schema=target_schema))
|
|
105
|
+
|
|
106
|
+
target_fields = {field.name: field.type for field in target_schema}
|
|
107
|
+
new_columns = []
|
|
108
|
+
|
|
109
|
+
# Track adaptations for summary
|
|
110
|
+
added_columns = []
|
|
111
|
+
dropped_columns = []
|
|
112
|
+
type_conversions = []
|
|
113
|
+
failed_conversions = []
|
|
114
|
+
|
|
115
|
+
# Process each field in target schema
|
|
116
|
+
for field_name, target_type in target_fields.items():
|
|
117
|
+
if field_name in data.column_names:
|
|
118
|
+
# Column exists - try to cast to target type
|
|
119
|
+
col = data[field_name]
|
|
120
|
+
original_type = col.type
|
|
121
|
+
|
|
122
|
+
if original_type != target_type:
|
|
123
|
+
LOGGER.debug(
|
|
124
|
+
T("coal.services.postgresql.casting_column").format(
|
|
125
|
+
field_name=field_name,
|
|
126
|
+
original_type=original_type,
|
|
127
|
+
target_type=target_type,
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
try:
|
|
131
|
+
new_col = pa.compute.cast(col, target_type)
|
|
132
|
+
new_columns.append(new_col)
|
|
133
|
+
type_conversions.append(f"{field_name}: {original_type} -> {target_type}")
|
|
134
|
+
except pa.ArrowInvalid as e:
|
|
135
|
+
LOGGER.warning(
|
|
136
|
+
T("coal.services.postgresql.cast_failed").format(
|
|
137
|
+
field_name=field_name,
|
|
138
|
+
original_type=original_type,
|
|
139
|
+
target_type=target_type,
|
|
140
|
+
error=str(e),
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
new_columns.append(pa.nulls(len(data), type=target_type))
|
|
144
|
+
failed_conversions.append(f"{field_name}: {original_type} -> {target_type}")
|
|
145
|
+
else:
|
|
146
|
+
new_columns.append(col)
|
|
147
|
+
else:
|
|
148
|
+
# Column doesn't exist - add nulls
|
|
149
|
+
LOGGER.debug(T("coal.services.postgresql.adding_missing_column").format(field_name=field_name))
|
|
150
|
+
new_columns.append(pa.nulls(len(data), type=target_type))
|
|
151
|
+
added_columns.append(field_name)
|
|
152
|
+
|
|
153
|
+
# Log columns that will be dropped
|
|
154
|
+
dropped_columns = [name for name in data.column_names if name not in target_fields]
|
|
155
|
+
if dropped_columns:
|
|
156
|
+
LOGGER.debug(T("coal.services.postgresql.dropping_columns").format(columns=dropped_columns))
|
|
157
|
+
|
|
158
|
+
# Create new table
|
|
159
|
+
adapted_table = pa.Table.from_arrays(new_columns, schema=target_schema)
|
|
160
|
+
|
|
161
|
+
# Log summary of adaptations
|
|
162
|
+
LOGGER.debug(T("coal.services.postgresql.adaptation_summary"))
|
|
163
|
+
if added_columns:
|
|
164
|
+
LOGGER.debug(T("coal.services.postgresql.added_columns").format(columns=added_columns))
|
|
165
|
+
if dropped_columns:
|
|
166
|
+
LOGGER.debug(T("coal.services.postgresql.dropped_columns").format(columns=dropped_columns))
|
|
167
|
+
if type_conversions:
|
|
168
|
+
LOGGER.debug(T("coal.services.postgresql.successful_conversions").format(conversions=type_conversions))
|
|
169
|
+
if failed_conversions:
|
|
170
|
+
LOGGER.debug(T("coal.services.postgresql.failed_conversions").format(conversions=failed_conversions))
|
|
171
|
+
|
|
172
|
+
LOGGER.debug(T("coal.services.postgresql.final_schema").format(schema=adapted_table.schema))
|
|
173
|
+
return adapted_table
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def send_pyarrow_table_to_postgresql(
|
|
177
|
+
data: Table,
|
|
178
|
+
target_table_name: str,
|
|
179
|
+
postgres_host: str,
|
|
180
|
+
postgres_port: str,
|
|
181
|
+
postgres_db: str,
|
|
182
|
+
postgres_schema: str,
|
|
183
|
+
postgres_user: str,
|
|
184
|
+
postgres_password: str,
|
|
185
|
+
replace: bool,
|
|
186
|
+
force_encode: bool = False,
|
|
187
|
+
) -> int:
|
|
188
|
+
LOGGER.debug(
|
|
189
|
+
T("coal.services.postgresql.preparing_send").format(
|
|
190
|
+
postgres_schema=postgres_schema, target_table_name=target_table_name
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
LOGGER.debug(T("coal.services.postgresql.input_rows").format(rows=len(data)))
|
|
194
|
+
|
|
195
|
+
# Get existing schema if table exists
|
|
196
|
+
existing_schema = get_postgresql_table_schema(
|
|
197
|
+
target_table_name,
|
|
198
|
+
postgres_host,
|
|
199
|
+
postgres_port,
|
|
200
|
+
postgres_db,
|
|
201
|
+
postgres_schema,
|
|
202
|
+
postgres_user,
|
|
203
|
+
postgres_password,
|
|
204
|
+
force_encode,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if existing_schema is not None:
|
|
208
|
+
LOGGER.debug(T("coal.services.postgresql.found_existing_table").format(schema=existing_schema))
|
|
209
|
+
if not replace:
|
|
210
|
+
LOGGER.debug(T("coal.services.postgresql.adapting_data"))
|
|
211
|
+
data = adapt_table_to_schema(data, existing_schema)
|
|
212
|
+
else:
|
|
213
|
+
LOGGER.debug(T("coal.services.postgresql.replace_mode"))
|
|
214
|
+
else:
|
|
215
|
+
LOGGER.debug(T("coal.services.postgresql.no_existing_table"))
|
|
216
|
+
|
|
217
|
+
# Proceed with ingestion
|
|
218
|
+
total = 0
|
|
219
|
+
postgresql_full_uri = generate_postgresql_full_uri(
|
|
220
|
+
postgres_host,
|
|
221
|
+
postgres_port,
|
|
222
|
+
postgres_db,
|
|
223
|
+
postgres_user,
|
|
224
|
+
postgres_password,
|
|
225
|
+
force_encode,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
LOGGER.debug(T("coal.services.postgresql.connecting"))
|
|
229
|
+
with dbapi.connect(postgresql_full_uri, autocommit=True) as conn:
|
|
230
|
+
with conn.cursor() as curs:
|
|
231
|
+
mode = "replace" if replace else "create_append"
|
|
232
|
+
LOGGER.debug(T("coal.services.postgresql.ingesting_data").format(mode=mode))
|
|
233
|
+
total += curs.adbc_ingest(target_table_name, data, mode, db_schema_name=postgres_schema)
|
|
234
|
+
|
|
235
|
+
LOGGER.debug(T("coal.services.postgresql.ingestion_success").format(rows=total))
|
|
236
|
+
return total
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
8
|
+
|
|
9
|
+
from cosmotech.csm_data.utils.click import click
|
|
10
|
+
from cosmotech.csm_data.utils.decorators import web_help, translate_help
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command()
|
|
14
|
+
@web_help("csm-data/adx-send-data")
|
|
15
|
+
@translate_help("csm-data.commands.storage.adx_send_data.description")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--adx-uri",
|
|
18
|
+
envvar="AZURE_DATA_EXPLORER_RESOURCE_URI",
|
|
19
|
+
show_envvar=True,
|
|
20
|
+
required=True,
|
|
21
|
+
metavar="URI",
|
|
22
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.adx_uri"),
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--adx-ingest-uri",
|
|
26
|
+
envvar="AZURE_DATA_EXPLORER_RESOURCE_INGEST_URI",
|
|
27
|
+
show_envvar=True,
|
|
28
|
+
required=True,
|
|
29
|
+
metavar="URI",
|
|
30
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.adx_ingest_uri"),
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--database-name",
|
|
34
|
+
envvar="AZURE_DATA_EXPLORER_DATABASE_NAME",
|
|
35
|
+
show_envvar=True,
|
|
36
|
+
required=True,
|
|
37
|
+
metavar="NAME",
|
|
38
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.database_name"),
|
|
39
|
+
)
|
|
40
|
+
@click.option(
|
|
41
|
+
"--wait/--no-wait",
|
|
42
|
+
"wait",
|
|
43
|
+
envvar="CSM_DATA_ADX_WAIT_INGESTION",
|
|
44
|
+
show_envvar=True,
|
|
45
|
+
default=False,
|
|
46
|
+
show_default=True,
|
|
47
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.waiting_ingestion"),
|
|
48
|
+
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"--tag",
|
|
51
|
+
envvar="CSM_DATA_ADX_TAG",
|
|
52
|
+
show_envvar=True,
|
|
53
|
+
default=None,
|
|
54
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.adx_tag"),
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
"--store-folder",
|
|
58
|
+
envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
|
|
59
|
+
help=T("csm-data.commands.storage.adx_send_data.parameters.store_folder"),
|
|
60
|
+
metavar="PATH",
|
|
61
|
+
type=str,
|
|
62
|
+
show_envvar=True,
|
|
63
|
+
required=True,
|
|
64
|
+
)
|
|
65
|
+
def adx_send_data(
|
|
66
|
+
adx_uri: str,
|
|
67
|
+
adx_ingest_uri: str,
|
|
68
|
+
database_name: str,
|
|
69
|
+
wait: bool,
|
|
70
|
+
store_folder: str,
|
|
71
|
+
tag: str = None,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Send data from the store to Azure Data Explorer.
|
|
75
|
+
"""
|
|
76
|
+
from cosmotech.coal.azure.adx.store import send_store_to_adx
|
|
77
|
+
|
|
78
|
+
success = send_store_to_adx(
|
|
79
|
+
adx_uri=adx_uri,
|
|
80
|
+
adx_ingest_uri=adx_ingest_uri,
|
|
81
|
+
database_name=database_name,
|
|
82
|
+
wait=wait,
|
|
83
|
+
tag=tag,
|
|
84
|
+
store_location=store_folder,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if not success:
|
|
88
|
+
click.Abort()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
adx_send_data()
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from cosmotech.csm_data.utils.click import click
|
|
9
|
+
from cosmotech.csm_data.utils.decorators import web_help, translate_help
|
|
10
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command()
|
|
14
|
+
@web_help("csm-data/adx-send-runner-data")
|
|
15
|
+
@translate_help("csm-data.commands.storage.adx_send_runnerdata.description")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--dataset-absolute-path",
|
|
18
|
+
envvar="CSM_DATASET_ABSOLUTE_PATH",
|
|
19
|
+
show_envvar=True,
|
|
20
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.dataset_absolute_path"),
|
|
21
|
+
metavar="PATH",
|
|
22
|
+
required=True,
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--parameters-absolute-path",
|
|
26
|
+
envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
|
|
27
|
+
metavar="PATH",
|
|
28
|
+
show_envvar=True,
|
|
29
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.parameters_absolute_path"),
|
|
30
|
+
required=True,
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--runner-id",
|
|
34
|
+
envvar="CSM_RUNNER_ID",
|
|
35
|
+
show_envvar=True,
|
|
36
|
+
required=True,
|
|
37
|
+
metavar="UUID",
|
|
38
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.runner_id"),
|
|
39
|
+
)
|
|
40
|
+
@click.option(
|
|
41
|
+
"--adx-uri",
|
|
42
|
+
envvar="AZURE_DATA_EXPLORER_RESOURCE_URI",
|
|
43
|
+
show_envvar=True,
|
|
44
|
+
required=True,
|
|
45
|
+
metavar="URI",
|
|
46
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.adx_uri"),
|
|
47
|
+
)
|
|
48
|
+
@click.option(
|
|
49
|
+
"--adx-ingest-uri",
|
|
50
|
+
envvar="AZURE_DATA_EXPLORER_RESOURCE_INGEST_URI",
|
|
51
|
+
show_envvar=True,
|
|
52
|
+
required=True,
|
|
53
|
+
metavar="URI",
|
|
54
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.adx_ingest_uri"),
|
|
55
|
+
)
|
|
56
|
+
@click.option(
|
|
57
|
+
"--database-name",
|
|
58
|
+
envvar="AZURE_DATA_EXPLORER_DATABASE_NAME",
|
|
59
|
+
show_envvar=True,
|
|
60
|
+
required=True,
|
|
61
|
+
metavar="NAME",
|
|
62
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.database_name"),
|
|
63
|
+
)
|
|
64
|
+
@click.option(
|
|
65
|
+
"--send-parameters/--no-send-parameters",
|
|
66
|
+
type=bool,
|
|
67
|
+
envvar="CSM_SEND_DATAWAREHOUSE_PARAMETERS",
|
|
68
|
+
show_envvar=True,
|
|
69
|
+
default=False,
|
|
70
|
+
show_default=True,
|
|
71
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.send_parameters"),
|
|
72
|
+
)
|
|
73
|
+
@click.option(
|
|
74
|
+
"--send-datasets/--no-send-datasets",
|
|
75
|
+
type=bool,
|
|
76
|
+
envvar="CSM_SEND_DATAWAREHOUSE_DATASETS",
|
|
77
|
+
show_envvar=True,
|
|
78
|
+
default=False,
|
|
79
|
+
show_default=True,
|
|
80
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.send_datasets"),
|
|
81
|
+
)
|
|
82
|
+
@click.option(
|
|
83
|
+
"--wait/--no-wait",
|
|
84
|
+
envvar="WAIT_FOR_INGESTION",
|
|
85
|
+
show_envvar=True,
|
|
86
|
+
default=False,
|
|
87
|
+
show_default=True,
|
|
88
|
+
help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.wait"),
|
|
89
|
+
)
|
|
90
|
+
def adx_send_runnerdata(
|
|
91
|
+
send_parameters: bool,
|
|
92
|
+
send_datasets: bool,
|
|
93
|
+
dataset_absolute_path: str,
|
|
94
|
+
parameters_absolute_path: str,
|
|
95
|
+
runner_id: str,
|
|
96
|
+
adx_uri: str,
|
|
97
|
+
adx_ingest_uri: str,
|
|
98
|
+
database_name: str,
|
|
99
|
+
wait: bool,
|
|
100
|
+
):
|
|
101
|
+
# Import the function at the start of the command
|
|
102
|
+
from cosmotech.coal.azure.adx.runner import send_runner_data
|
|
103
|
+
|
|
104
|
+
# Send runner data to ADX
|
|
105
|
+
send_runner_data(
|
|
106
|
+
dataset_absolute_path=dataset_absolute_path,
|
|
107
|
+
parameters_absolute_path=parameters_absolute_path,
|
|
108
|
+
runner_id=runner_id,
|
|
109
|
+
adx_uri=adx_uri,
|
|
110
|
+
adx_ingest_uri=adx_ingest_uri,
|
|
111
|
+
database_name=database_name,
|
|
112
|
+
send_parameters=send_parameters,
|
|
113
|
+
send_datasets=send_datasets,
|
|
114
|
+
wait=wait,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
adx_send_runnerdata()
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|