cosmotech-acceleration-library 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +1 -1
- cosmotech/coal/aws/__init__.py +1 -9
- cosmotech/coal/aws/s3.py +181 -214
- cosmotech/coal/azure/adx/auth.py +2 -2
- cosmotech/coal/azure/adx/runner.py +13 -14
- cosmotech/coal/azure/adx/store.py +5 -86
- cosmotech/coal/azure/adx/tables.py +2 -2
- cosmotech/coal/azure/blob.py +6 -6
- cosmotech/coal/azure/storage.py +3 -3
- cosmotech/coal/cosmotech_api/__init__.py +0 -24
- cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
- cosmotech/coal/cosmotech_api/apis/dataset.py +103 -0
- cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
- cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
- cosmotech/coal/cosmotech_api/apis/run.py +38 -0
- cosmotech/coal/cosmotech_api/apis/runner.py +71 -0
- cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
- cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
- cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
- cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
- cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
- cosmotech/coal/postgresql/runner.py +56 -36
- cosmotech/coal/postgresql/store.py +60 -14
- cosmotech/coal/postgresql/utils.py +254 -0
- cosmotech/coal/store/output/__init__.py +0 -0
- cosmotech/coal/store/output/aws_channel.py +73 -0
- cosmotech/coal/store/output/az_storage_channel.py +42 -0
- cosmotech/coal/store/output/channel_interface.py +23 -0
- cosmotech/coal/store/output/channel_spliter.py +55 -0
- cosmotech/coal/store/output/postgres_channel.py +40 -0
- cosmotech/coal/utils/configuration.py +169 -0
- cosmotech/coal/utils/decorator.py +22 -0
- cosmotech/csm_data/commands/api/api.py +6 -19
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
- cosmotech/csm_data/commands/api/run_load_data.py +7 -46
- cosmotech/csm_data/commands/api/wsf_load_file.py +14 -15
- cosmotech/csm_data/commands/api/wsf_send_file.py +12 -13
- cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
- cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
- cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
- cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
- cosmotech/csm_data/commands/store/output.py +35 -0
- cosmotech/csm_data/commands/store/store.py +3 -4
- cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +4 -14
- cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
- cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
- cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
- cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/METADATA +8 -9
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/RECORD +55 -71
- cosmotech/coal/azure/functions.py +0 -72
- cosmotech/coal/cosmotech_api/connection.py +0 -96
- cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
- cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
- cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
- cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -216
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -188
- cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
- cosmotech/coal/cosmotech_api/parameters.py +0 -48
- cosmotech/coal/cosmotech_api/run.py +0 -25
- cosmotech/coal/cosmotech_api/run_data.py +0 -173
- cosmotech/coal/cosmotech_api/run_template.py +0 -108
- cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
- cosmotech/coal/cosmotech_api/runner/data.py +0 -38
- cosmotech/coal/cosmotech_api/runner/datasets.py +0 -364
- cosmotech/coal/cosmotech_api/runner/download.py +0 -146
- cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
- cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
- cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
- cosmotech/coal/cosmotech_api/workspace.py +0 -127
- cosmotech/coal/utils/api.py +0 -68
- cosmotech/coal/utils/postgresql.py +0 -236
- cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
- cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
- cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
- cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
- cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/WHEEL +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/entry_points.txt +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import pathlib
|
|
10
|
+
from csv import DictWriter
|
|
11
|
+
from typing import Any, Dict, List
|
|
12
|
+
|
|
13
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
14
|
+
|
|
15
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Parameters:
|
|
19
|
+
values: Dict[str, Any] = dict()
|
|
20
|
+
parameters_list: List[Dict[str, Any]] = list()
|
|
21
|
+
|
|
22
|
+
def __init__(self, runner_data: Any):
|
|
23
|
+
"""
|
|
24
|
+
Extract parameters from runner data.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
runner_data: Runner data object
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Dictionary mapping parameter IDs to values
|
|
31
|
+
"""
|
|
32
|
+
for parameter in runner_data.parameters_values:
|
|
33
|
+
self.values[parameter.parameter_id] = parameter.value
|
|
34
|
+
self.parameters_list = self.format_parameters_list(runner_data)
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def format_parameters_list(runner_data: Any) -> List[Dict[str, Any]]:
|
|
38
|
+
"""
|
|
39
|
+
Format parameters from runner data as a list of dictionaries.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
runner_data: Runner data object
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of parameter dictionaries
|
|
46
|
+
"""
|
|
47
|
+
parameters = []
|
|
48
|
+
|
|
49
|
+
if not runner_data.parameters_values:
|
|
50
|
+
return parameters
|
|
51
|
+
|
|
52
|
+
max_name_size = max(map(lambda r: len(r.parameter_id), runner_data.parameters_values))
|
|
53
|
+
max_type_size = max(map(lambda r: len(r.var_type), runner_data.parameters_values))
|
|
54
|
+
|
|
55
|
+
for parameter_data in runner_data.parameters_values:
|
|
56
|
+
parameter_name = parameter_data.parameter_id
|
|
57
|
+
value = parameter_data.value
|
|
58
|
+
var_type = parameter_data.var_type
|
|
59
|
+
is_inherited = parameter_data.is_inherited
|
|
60
|
+
|
|
61
|
+
parameters.append(
|
|
62
|
+
{
|
|
63
|
+
"parameterId": parameter_name,
|
|
64
|
+
"value": value,
|
|
65
|
+
"varType": var_type,
|
|
66
|
+
"isInherited": is_inherited,
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
LOGGER.debug(
|
|
71
|
+
T("coal.cosmotech_api.runner.parameter_debug").format(
|
|
72
|
+
param_id=parameter_name,
|
|
73
|
+
max_name_size=max_name_size,
|
|
74
|
+
var_type=var_type,
|
|
75
|
+
max_type_size=max_type_size,
|
|
76
|
+
value=value,
|
|
77
|
+
inherited=" inherited" if is_inherited else "",
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return parameters
|
|
82
|
+
|
|
83
|
+
def write_parameters_to_json(
|
|
84
|
+
self,
|
|
85
|
+
parameter_folder: str,
|
|
86
|
+
) -> str:
|
|
87
|
+
pathlib.Path(parameter_folder).mkdir(exist_ok=True, parents=True)
|
|
88
|
+
tmp_parameter_file = os.path.join(parameter_folder, "parameters.json")
|
|
89
|
+
|
|
90
|
+
LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
|
|
91
|
+
|
|
92
|
+
with open(tmp_parameter_file, "w") as _file:
|
|
93
|
+
json.dump(self.parameters_list, _file, indent=2)
|
|
94
|
+
|
|
95
|
+
return tmp_parameter_file
|
|
96
|
+
|
|
97
|
+
def write_parameters_to_csv(
|
|
98
|
+
self,
|
|
99
|
+
parameter_folder: str,
|
|
100
|
+
) -> str:
|
|
101
|
+
pathlib.Path(parameter_folder).mkdir(exist_ok=True, parents=True)
|
|
102
|
+
tmp_parameter_file = os.path.join(parameter_folder, "parameters.csv")
|
|
103
|
+
|
|
104
|
+
LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
|
|
105
|
+
|
|
106
|
+
with open(tmp_parameter_file, "w") as _file:
|
|
107
|
+
_w = DictWriter(_file, fieldnames=["parameterId", "value", "varType", "isInherited"])
|
|
108
|
+
_w.writeheader()
|
|
109
|
+
_w.writerows(self.parameters_list)
|
|
110
|
+
|
|
111
|
+
return tmp_parameter_file
|
|
112
|
+
|
|
113
|
+
def write_parameters(
|
|
114
|
+
self,
|
|
115
|
+
parameter_folder: str,
|
|
116
|
+
write_csv: bool = True,
|
|
117
|
+
write_json: bool = False,
|
|
118
|
+
) -> Dict[str, str]:
|
|
119
|
+
result = {}
|
|
120
|
+
|
|
121
|
+
if write_csv:
|
|
122
|
+
result["csv"] = self.write_parameters_to_csv(parameter_folder)
|
|
123
|
+
|
|
124
|
+
if write_json:
|
|
125
|
+
result["json"] = self.write_parameters_to_json(parameter_folder)
|
|
126
|
+
|
|
127
|
+
return result
|
|
@@ -13,56 +13,40 @@ for runner metadata operations.
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
from adbc_driver_postgresql import dbapi
|
|
16
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
16
17
|
|
|
17
|
-
from cosmotech.coal.cosmotech_api.
|
|
18
|
-
from cosmotech.coal.
|
|
18
|
+
from cosmotech.coal.cosmotech_api.apis.runner import RunnerApi
|
|
19
|
+
from cosmotech.coal.postgresql.utils import PostgresUtils
|
|
20
|
+
from cosmotech.coal.utils.configuration import Configuration
|
|
19
21
|
from cosmotech.coal.utils.logger import LOGGER
|
|
20
|
-
from cosmotech.coal.utils.postgresql import generate_postgresql_full_uri
|
|
21
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def send_runner_metadata_to_postgresql(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
runner_id: str,
|
|
28
|
-
postgres_host: str,
|
|
29
|
-
postgres_port: int,
|
|
30
|
-
postgres_db: str,
|
|
31
|
-
postgres_schema: str,
|
|
32
|
-
postgres_user: str,
|
|
33
|
-
postgres_password: str,
|
|
34
|
-
table_prefix: str = "Cosmotech_",
|
|
35
|
-
force_encode: bool = False,
|
|
36
|
-
) -> None:
|
|
25
|
+
configuration: Configuration,
|
|
26
|
+
) -> str:
|
|
37
27
|
"""
|
|
38
28
|
Send runner metadata to a PostgreSQL database.
|
|
39
29
|
|
|
40
30
|
Args:
|
|
31
|
+
configuration: coal configuration
|
|
41
32
|
organization_id: Organization ID
|
|
42
33
|
workspace_id: Workspace ID
|
|
43
34
|
runner_id: Runner ID
|
|
44
|
-
postgres_host: PostgreSQL host
|
|
45
|
-
postgres_port: PostgreSQL port
|
|
46
|
-
postgres_db: PostgreSQL database name
|
|
47
|
-
postgres_schema: PostgreSQL schema
|
|
48
|
-
postgres_user: PostgreSQL username
|
|
49
|
-
postgres_password: PostgreSQL password
|
|
50
|
-
table_prefix: Table prefix
|
|
51
|
-
force_encode: force password encoding to percent encoding
|
|
52
35
|
"""
|
|
53
|
-
|
|
54
|
-
with get_api_client()[0] as api_client:
|
|
55
|
-
runner = get_runner_metadata(api_client, organization_id, workspace_id, runner_id)
|
|
36
|
+
_psql = PostgresUtils(configuration)
|
|
56
37
|
|
|
57
|
-
#
|
|
58
|
-
|
|
59
|
-
|
|
38
|
+
# Get runner metadata
|
|
39
|
+
_runner_api = RunnerApi(configuration)
|
|
40
|
+
runner = _runner_api.get_runner_metadata(
|
|
41
|
+
configuration.cosmotech.organization_id,
|
|
42
|
+
configuration.cosmotech.workspace_id,
|
|
43
|
+
configuration.cosmotech.runner_id,
|
|
60
44
|
)
|
|
61
45
|
|
|
62
46
|
# Connect to PostgreSQL and update runner metadata
|
|
63
|
-
with dbapi.connect(
|
|
47
|
+
with dbapi.connect(_psql.full_uri, autocommit=True) as conn:
|
|
64
48
|
with conn.cursor() as curs:
|
|
65
|
-
schema_table = f"{
|
|
49
|
+
schema_table = f"{str(_psql.db_schema)}.{str(_psql.table_prefix)}RunnerMetadata"
|
|
66
50
|
sql_create_table = f"""
|
|
67
51
|
CREATE TABLE IF NOT EXISTS {schema_table} (
|
|
68
52
|
id varchar(32) PRIMARY KEY,
|
|
@@ -71,6 +55,10 @@ def send_runner_metadata_to_postgresql(
|
|
|
71
55
|
run_template_id varchar(32)
|
|
72
56
|
);
|
|
73
57
|
"""
|
|
58
|
+
LOGGER.info(T("coal.services.postgresql.creating_table").format(schema_table=schema_table))
|
|
59
|
+
curs.execute(sql_create_table)
|
|
60
|
+
conn.commit()
|
|
61
|
+
LOGGER.info(T("coal.services.postgresql.metadata"))
|
|
74
62
|
sql_upsert = f"""
|
|
75
63
|
INSERT INTO {schema_table} (id, name, last_run_id, run_template_id)
|
|
76
64
|
VALUES(%s, %s, %s, %s)
|
|
@@ -78,10 +66,6 @@ def send_runner_metadata_to_postgresql(
|
|
|
78
66
|
DO
|
|
79
67
|
UPDATE SET name = EXCLUDED.name, last_run_id = EXCLUDED.last_run_id;
|
|
80
68
|
"""
|
|
81
|
-
LOGGER.info(T("coal.services.postgresql.creating_table").format(schema_table=schema_table))
|
|
82
|
-
curs.execute(sql_create_table)
|
|
83
|
-
conn.commit()
|
|
84
|
-
LOGGER.info(T("coal.services.postgresql.metadata"))
|
|
85
69
|
curs.execute(
|
|
86
70
|
sql_upsert,
|
|
87
71
|
(
|
|
@@ -93,3 +77,39 @@ def send_runner_metadata_to_postgresql(
|
|
|
93
77
|
)
|
|
94
78
|
conn.commit()
|
|
95
79
|
LOGGER.info(T("coal.services.postgresql.metadata_updated"))
|
|
80
|
+
return runner.get("lastRunId")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def remove_runner_metadata_from_postgresql(
|
|
84
|
+
configuration: Configuration,
|
|
85
|
+
) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Removes run_id from metadata table that trigger cascade delete on other tables
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
configuration: coal configuration
|
|
91
|
+
organization_id: Organization ID
|
|
92
|
+
workspace_id: Workspace ID
|
|
93
|
+
runner_id: Runner ID
|
|
94
|
+
"""
|
|
95
|
+
_psql = PostgresUtils(configuration)
|
|
96
|
+
|
|
97
|
+
# Get runner metadata
|
|
98
|
+
_runner_api = RunnerApi(configuration)
|
|
99
|
+
runner = _runner_api.get_runner_metadata(
|
|
100
|
+
configuration.cosmotech.organization_id,
|
|
101
|
+
configuration.cosmotech.workspace_id,
|
|
102
|
+
configuration.cosmotech.runner_id,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Connect to PostgreSQL and remove runner metadata row
|
|
106
|
+
with dbapi.connect(_psql.full_uri, autocommit=True) as conn:
|
|
107
|
+
with conn.cursor() as curs:
|
|
108
|
+
schema_table = f"{_psql.db_schema}.{_psql.table_prefix}RunnerMetadata"
|
|
109
|
+
sql_delete_from_metatable = f"""
|
|
110
|
+
DELETE FROM {schema_table}
|
|
111
|
+
WHERE last_run_id={runner.get("lastRunId")};
|
|
112
|
+
"""
|
|
113
|
+
curs.execute(sql_delete_from_metatable)
|
|
114
|
+
conn.commit()
|
|
115
|
+
return runner.get("lastRunId")
|
|
@@ -13,12 +13,13 @@ for store operations.
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
from time import perf_counter
|
|
16
|
-
import pyarrow
|
|
17
16
|
|
|
17
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
18
|
+
|
|
19
|
+
from cosmotech.coal.postgresql.utils import PostgresUtils
|
|
18
20
|
from cosmotech.coal.store.store import Store
|
|
21
|
+
from cosmotech.coal.utils.configuration import Configuration
|
|
19
22
|
from cosmotech.coal.utils.logger import LOGGER
|
|
20
|
-
from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
|
|
21
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
def dump_store_to_postgresql(
|
|
@@ -32,6 +33,8 @@ def dump_store_to_postgresql(
|
|
|
32
33
|
table_prefix: str = "Cosmotech_",
|
|
33
34
|
replace: bool = True,
|
|
34
35
|
force_encode: bool = False,
|
|
36
|
+
selected_tables: list[str] = [],
|
|
37
|
+
fk_id: str = None,
|
|
35
38
|
) -> None:
|
|
36
39
|
"""
|
|
37
40
|
Dump Store data to a PostgreSQL database.
|
|
@@ -46,36 +49,79 @@ def dump_store_to_postgresql(
|
|
|
46
49
|
postgres_password: PostgreSQL password
|
|
47
50
|
table_prefix: Table prefix
|
|
48
51
|
replace: Whether to replace existing tables
|
|
49
|
-
force_encode: force password encoding
|
|
52
|
+
force_encode: force password encoding to percent encoding
|
|
53
|
+
selected_tables: list of tables to send
|
|
54
|
+
fk_id: foreign key id to add to all table on all rows
|
|
55
|
+
"""
|
|
56
|
+
_c = Configuration()
|
|
57
|
+
_c.postgres.host = postgres_host
|
|
58
|
+
_c.postgres.port = postgres_port
|
|
59
|
+
_c.postgres.db_name = postgres_db
|
|
60
|
+
_c.postgres.db_schema = postgres_schema
|
|
61
|
+
_c.postgres.user_name = postgres_user
|
|
62
|
+
_c.postgres.user_password = postgres_password
|
|
63
|
+
_c.postgres.password_encoding = force_encode
|
|
64
|
+
_c.postgres.table_prefix = table_prefix
|
|
65
|
+
|
|
66
|
+
dump_store_to_postgresql_from_conf(
|
|
67
|
+
configuration=_c, store_folder=store_folder, replace=replace, selected_tables=selected_tables, fk_id=fk_id
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dump_store_to_postgresql_from_conf(
|
|
72
|
+
configuration: Configuration,
|
|
73
|
+
store_folder: str,
|
|
74
|
+
replace: bool = True,
|
|
75
|
+
selected_tables: list[str] = [],
|
|
76
|
+
fk_id: str = None,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Dump Store data to a PostgreSQL database.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
configuration: coal Configuration
|
|
83
|
+
store_folder: Folder containing the Store
|
|
84
|
+
replace: Whether to replace existing tables
|
|
85
|
+
selected_tables: list of tables to send
|
|
86
|
+
fk_id: foreign key id to add to all table on all rows
|
|
50
87
|
"""
|
|
88
|
+
_psql = PostgresUtils(configuration)
|
|
89
|
+
print(_psql.send_pyarrow_table_to_postgresql)
|
|
51
90
|
_s = Store(store_location=store_folder)
|
|
52
91
|
|
|
53
92
|
tables = list(_s.list_tables())
|
|
93
|
+
if selected_tables:
|
|
94
|
+
tables = [t for t in tables if t in selected_tables]
|
|
54
95
|
if len(tables):
|
|
55
|
-
LOGGER.info(T("coal.services.database.sending_data").format(table=f"{
|
|
96
|
+
LOGGER.info(T("coal.services.database.sending_data").format(table=f"{_psql.db_name}.{_psql.db_schema}"))
|
|
56
97
|
total_rows = 0
|
|
57
98
|
_process_start = perf_counter()
|
|
58
99
|
for table_name in tables:
|
|
59
100
|
_s_time = perf_counter()
|
|
60
|
-
target_table_name = f"{table_prefix}{table_name}"
|
|
101
|
+
target_table_name = f"{_psql.table_prefix}{table_name}"
|
|
61
102
|
LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
|
|
103
|
+
if fk_id:
|
|
104
|
+
_s.execute_query(
|
|
105
|
+
f"""
|
|
106
|
+
ALTER TABLE {_psql.table_prefix}{table_name}
|
|
107
|
+
ADD run_id TEXT NOT NULL
|
|
108
|
+
DEFAULT ({fk_id})
|
|
109
|
+
"""
|
|
110
|
+
)
|
|
62
111
|
data = _s.get_table(table_name)
|
|
63
112
|
if not len(data):
|
|
64
113
|
LOGGER.info(T("coal.services.database.no_rows"))
|
|
65
114
|
continue
|
|
66
115
|
_dl_time = perf_counter()
|
|
67
|
-
rows = send_pyarrow_table_to_postgresql(
|
|
116
|
+
rows = _psql.send_pyarrow_table_to_postgresql(
|
|
68
117
|
data,
|
|
69
118
|
target_table_name,
|
|
70
|
-
postgres_host,
|
|
71
|
-
postgres_port,
|
|
72
|
-
postgres_db,
|
|
73
|
-
postgres_schema,
|
|
74
|
-
postgres_user,
|
|
75
|
-
postgres_password,
|
|
76
119
|
replace,
|
|
77
|
-
force_encode,
|
|
78
120
|
)
|
|
121
|
+
if fk_id and _psql.is_metadata_exists():
|
|
122
|
+
metadata_table = f"{_psql.table_prefix}RunnerMetadata"
|
|
123
|
+
_psql.add_fk_constraint(table_name, "run_id", metadata_table, "last_run_id")
|
|
124
|
+
|
|
79
125
|
total_rows += rows
|
|
80
126
|
_up_time = perf_counter()
|
|
81
127
|
LOGGER.info(T("coal.services.database.row_count").format(count=rows))
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from urllib.parse import quote
|
|
10
|
+
|
|
11
|
+
import adbc_driver_manager
|
|
12
|
+
import pyarrow as pa
|
|
13
|
+
from adbc_driver_postgresql import dbapi
|
|
14
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
15
|
+
from pyarrow import Table
|
|
16
|
+
|
|
17
|
+
from cosmotech.coal.utils.configuration import Configuration
|
|
18
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PostgresUtils:
|
|
22
|
+
|
|
23
|
+
def __init__(self, configuration: Configuration):
|
|
24
|
+
self._configuration = configuration.postgres
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def table_prefix(self):
|
|
28
|
+
if "table_prefix" in self._configuration:
|
|
29
|
+
return self._configuration.table_prefix
|
|
30
|
+
return "Cosmotech_"
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def db_name(self):
|
|
34
|
+
return self._configuration.db_name
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def db_schema(self):
|
|
38
|
+
return self._configuration.db_schema
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def host_uri(self):
|
|
42
|
+
return self._configuration.host
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def host_port(self):
|
|
46
|
+
return self._configuration.port
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def user_name(self):
|
|
50
|
+
return self._configuration.user_name
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def user_password(self):
|
|
54
|
+
return self._configuration.user_password
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def password_encoding(self):
|
|
58
|
+
if "password_encoding" in self._configuration:
|
|
59
|
+
return self._configuration.password_encoding
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def full_uri(self) -> str:
|
|
64
|
+
# Check if password needs percent encoding (contains special characters)
|
|
65
|
+
# We don't log anything about the password for security
|
|
66
|
+
encoded_password = self.user_password
|
|
67
|
+
if self.password_encoding:
|
|
68
|
+
encoded_password = quote(self.user_password, safe="")
|
|
69
|
+
|
|
70
|
+
return (
|
|
71
|
+
"postgresql://" + f"{self.user_name}"
|
|
72
|
+
f":{encoded_password}"
|
|
73
|
+
f"@{self.host_uri}"
|
|
74
|
+
f":{self.host_port}"
|
|
75
|
+
f"/{self.db_name}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def metadata_table_name(self):
|
|
79
|
+
return f"{self.table_prefix}RunnerMetadata"
|
|
80
|
+
|
|
81
|
+
def get_postgresql_table_schema(self, target_table_name: str) -> Optional[pa.Schema]:
|
|
82
|
+
"""
|
|
83
|
+
Get the schema of an existing PostgreSQL table using SQL queries.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
target_table_name: Name of the table
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
PyArrow Schema if table exists, None otherwise
|
|
90
|
+
"""
|
|
91
|
+
LOGGER.debug(
|
|
92
|
+
T("coal.services.postgresql.getting_schema").format(
|
|
93
|
+
postgres_schema=self.db_schema, target_table_name=target_table_name
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
with dbapi.connect(self.full_uri) as conn:
|
|
98
|
+
try:
|
|
99
|
+
return conn.adbc_get_table_schema(
|
|
100
|
+
target_table_name,
|
|
101
|
+
db_schema_filter=self.db_schema,
|
|
102
|
+
)
|
|
103
|
+
except adbc_driver_manager.ProgrammingError:
|
|
104
|
+
LOGGER.warning(
|
|
105
|
+
T("coal.services.postgresql.table_not_found").format(
|
|
106
|
+
postgres_schema=self.db_schema, target_table_name=target_table_name
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def send_pyarrow_table_to_postgresql(
|
|
112
|
+
self,
|
|
113
|
+
data: Table,
|
|
114
|
+
target_table_name: str,
|
|
115
|
+
replace: bool,
|
|
116
|
+
) -> int:
|
|
117
|
+
LOGGER.debug(
|
|
118
|
+
T("coal.services.postgresql.preparing_send").format(
|
|
119
|
+
postgres_schema=self.db_schema, target_table_name=target_table_name
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
LOGGER.debug(T("coal.services.postgresql.input_rows").format(rows=len(data)))
|
|
123
|
+
|
|
124
|
+
# Get existing schema if table exists
|
|
125
|
+
existing_schema = self.get_postgresql_table_schema(target_table_name)
|
|
126
|
+
|
|
127
|
+
if existing_schema is not None:
|
|
128
|
+
LOGGER.debug(T("coal.services.postgresql.found_existing_table").format(schema=existing_schema))
|
|
129
|
+
if not replace:
|
|
130
|
+
LOGGER.debug(T("coal.services.postgresql.adapting_data"))
|
|
131
|
+
data = adapt_table_to_schema(data, existing_schema)
|
|
132
|
+
else:
|
|
133
|
+
LOGGER.debug(T("coal.services.postgresql.replace_mode"))
|
|
134
|
+
else:
|
|
135
|
+
LOGGER.debug(T("coal.services.postgresql.no_existing_table"))
|
|
136
|
+
|
|
137
|
+
# Proceed with ingestion
|
|
138
|
+
total = 0
|
|
139
|
+
|
|
140
|
+
LOGGER.debug(T("coal.services.postgresql.connecting"))
|
|
141
|
+
with dbapi.connect(self.full_uri, autocommit=True) as conn:
|
|
142
|
+
with conn.cursor() as curs:
|
|
143
|
+
mode = "replace" if replace else "create_append"
|
|
144
|
+
LOGGER.debug(T("coal.services.postgresql.ingesting_data").format(mode=mode))
|
|
145
|
+
total += curs.adbc_ingest(target_table_name, data, mode, db_schema_name=self.db_schema)
|
|
146
|
+
|
|
147
|
+
LOGGER.debug(T("coal.services.postgresql.ingestion_success").format(rows=total))
|
|
148
|
+
return total
|
|
149
|
+
|
|
150
|
+
def add_fk_constraint(
|
|
151
|
+
self,
|
|
152
|
+
from_table: str,
|
|
153
|
+
from_col: str,
|
|
154
|
+
to_table: str,
|
|
155
|
+
to_col: str,
|
|
156
|
+
) -> None:
|
|
157
|
+
# Connect to PostgreSQL and remove runner metadata row
|
|
158
|
+
with dbapi.connect(self.full_uri, autocommit=True) as conn:
|
|
159
|
+
with conn.cursor() as curs:
|
|
160
|
+
sql_add_fk = f"""
|
|
161
|
+
ALTER TABLE {self.db_schema}.{from_table}
|
|
162
|
+
CONSTRAINT metadata FOREIGN KEY ({from_col}) REFERENCES {to_table}({to_col})
|
|
163
|
+
"""
|
|
164
|
+
curs.execute(sql_add_fk)
|
|
165
|
+
conn.commit()
|
|
166
|
+
|
|
167
|
+
def is_metadata_exists(self) -> None:
|
|
168
|
+
with dbapi.connect(self.full_uri, autocommit=True) as conn:
|
|
169
|
+
try:
|
|
170
|
+
conn.adbc_get_table_schema(
|
|
171
|
+
self.metadata_table_name,
|
|
172
|
+
db_schema_filter=self.db_schema,
|
|
173
|
+
)
|
|
174
|
+
return True
|
|
175
|
+
except adbc_driver_manager.ProgrammingError:
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def adapt_table_to_schema(data: pa.Table, target_schema: pa.Schema) -> pa.Table:
|
|
180
|
+
"""
|
|
181
|
+
Adapt a PyArrow table to match a target schema with detailed logging.
|
|
182
|
+
"""
|
|
183
|
+
LOGGER.debug(T("coal.services.postgresql.schema_adaptation_start").format(rows=len(data)))
|
|
184
|
+
LOGGER.debug(T("coal.services.postgresql.original_schema").format(schema=data.schema))
|
|
185
|
+
LOGGER.debug(T("coal.services.postgresql.target_schema").format(schema=target_schema))
|
|
186
|
+
|
|
187
|
+
target_fields = {field.name: field.type for field in target_schema}
|
|
188
|
+
new_columns = []
|
|
189
|
+
|
|
190
|
+
# Track adaptations for summary
|
|
191
|
+
added_columns = []
|
|
192
|
+
dropped_columns = []
|
|
193
|
+
type_conversions = []
|
|
194
|
+
failed_conversions = []
|
|
195
|
+
|
|
196
|
+
# Process each field in target schema
|
|
197
|
+
for field_name, target_type in target_fields.items():
|
|
198
|
+
if field_name in data.column_names:
|
|
199
|
+
# Column exists - try to cast to target type
|
|
200
|
+
col = data[field_name]
|
|
201
|
+
original_type = col.type
|
|
202
|
+
|
|
203
|
+
if original_type != target_type:
|
|
204
|
+
LOGGER.debug(
|
|
205
|
+
T("coal.services.postgresql.casting_column").format(
|
|
206
|
+
field_name=field_name,
|
|
207
|
+
original_type=original_type,
|
|
208
|
+
target_type=target_type,
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
try:
|
|
212
|
+
new_col = pa.compute.cast(col, target_type)
|
|
213
|
+
new_columns.append(new_col)
|
|
214
|
+
type_conversions.append(f"{field_name}: {original_type} -> {target_type}")
|
|
215
|
+
except pa.ArrowInvalid as e:
|
|
216
|
+
LOGGER.warning(
|
|
217
|
+
T("coal.services.postgresql.cast_failed").format(
|
|
218
|
+
field_name=field_name,
|
|
219
|
+
original_type=original_type,
|
|
220
|
+
target_type=target_type,
|
|
221
|
+
error=str(e),
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
new_columns.append(pa.nulls(len(data), type=target_type))
|
|
225
|
+
failed_conversions.append(f"{field_name}: {original_type} -> {target_type}")
|
|
226
|
+
else:
|
|
227
|
+
new_columns.append(col)
|
|
228
|
+
else:
|
|
229
|
+
# Column doesn't exist - add nulls
|
|
230
|
+
LOGGER.debug(T("coal.services.postgresql.adding_missing_column").format(field_name=field_name))
|
|
231
|
+
new_columns.append(pa.nulls(len(data), type=target_type))
|
|
232
|
+
added_columns.append(field_name)
|
|
233
|
+
|
|
234
|
+
# Log columns that will be dropped
|
|
235
|
+
dropped_columns = [name for name in data.column_names if name not in target_fields]
|
|
236
|
+
if dropped_columns:
|
|
237
|
+
LOGGER.debug(T("coal.services.postgresql.dropping_columns").format(columns=dropped_columns))
|
|
238
|
+
|
|
239
|
+
# Create new table
|
|
240
|
+
adapted_table = pa.Table.from_arrays(new_columns, schema=target_schema)
|
|
241
|
+
|
|
242
|
+
# Log summary of adaptations
|
|
243
|
+
LOGGER.debug(T("coal.services.postgresql.adaptation_summary"))
|
|
244
|
+
if added_columns:
|
|
245
|
+
LOGGER.debug(T("coal.services.postgresql.added_columns").format(columns=added_columns))
|
|
246
|
+
if dropped_columns:
|
|
247
|
+
LOGGER.debug(T("coal.services.postgresql.dropped_columns").format(columns=dropped_columns))
|
|
248
|
+
if type_conversions:
|
|
249
|
+
LOGGER.debug(T("coal.services.postgresql.successful_conversions").format(conversions=type_conversions))
|
|
250
|
+
if failed_conversions:
|
|
251
|
+
LOGGER.debug(T("coal.services.postgresql.failed_conversions").format(conversions=failed_conversions))
|
|
252
|
+
|
|
253
|
+
LOGGER.debug(T("coal.services.postgresql.final_schema").format(schema=adapted_table.schema))
|
|
254
|
+
return adapted_table
|
|
File without changes
|