cosmotech-acceleration-library 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/aws/__init__.py +1 -9
  3. cosmotech/coal/aws/s3.py +181 -214
  4. cosmotech/coal/azure/adx/auth.py +2 -2
  5. cosmotech/coal/azure/adx/runner.py +13 -14
  6. cosmotech/coal/azure/adx/store.py +5 -86
  7. cosmotech/coal/azure/adx/tables.py +2 -2
  8. cosmotech/coal/azure/blob.py +6 -6
  9. cosmotech/coal/azure/storage.py +3 -3
  10. cosmotech/coal/cosmotech_api/__init__.py +0 -24
  11. cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
  12. cosmotech/coal/cosmotech_api/apis/dataset.py +103 -0
  13. cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
  14. cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
  15. cosmotech/coal/cosmotech_api/apis/run.py +38 -0
  16. cosmotech/coal/cosmotech_api/apis/runner.py +71 -0
  17. cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
  18. cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
  19. cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
  20. cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
  21. cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
  22. cosmotech/coal/postgresql/runner.py +56 -36
  23. cosmotech/coal/postgresql/store.py +60 -14
  24. cosmotech/coal/postgresql/utils.py +254 -0
  25. cosmotech/coal/store/output/__init__.py +0 -0
  26. cosmotech/coal/store/output/aws_channel.py +73 -0
  27. cosmotech/coal/store/output/az_storage_channel.py +42 -0
  28. cosmotech/coal/store/output/channel_interface.py +23 -0
  29. cosmotech/coal/store/output/channel_spliter.py +55 -0
  30. cosmotech/coal/store/output/postgres_channel.py +40 -0
  31. cosmotech/coal/utils/configuration.py +169 -0
  32. cosmotech/coal/utils/decorator.py +22 -0
  33. cosmotech/csm_data/commands/api/api.py +6 -19
  34. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
  35. cosmotech/csm_data/commands/api/run_load_data.py +7 -46
  36. cosmotech/csm_data/commands/api/wsf_load_file.py +14 -15
  37. cosmotech/csm_data/commands/api/wsf_send_file.py +12 -13
  38. cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
  39. cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
  40. cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
  41. cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
  42. cosmotech/csm_data/commands/store/output.py +35 -0
  43. cosmotech/csm_data/commands/store/store.py +3 -4
  44. cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
  45. cosmotech/translation/coal/en-US/coal/services/dataset.yml +4 -14
  46. cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
  47. cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
  48. cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
  49. cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
  50. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/METADATA +8 -9
  51. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/RECORD +55 -71
  52. cosmotech/coal/azure/functions.py +0 -72
  53. cosmotech/coal/cosmotech_api/connection.py +0 -96
  54. cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
  55. cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
  56. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
  57. cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
  58. cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
  59. cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -216
  60. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -188
  61. cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
  62. cosmotech/coal/cosmotech_api/parameters.py +0 -48
  63. cosmotech/coal/cosmotech_api/run.py +0 -25
  64. cosmotech/coal/cosmotech_api/run_data.py +0 -173
  65. cosmotech/coal/cosmotech_api/run_template.py +0 -108
  66. cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
  67. cosmotech/coal/cosmotech_api/runner/data.py +0 -38
  68. cosmotech/coal/cosmotech_api/runner/datasets.py +0 -364
  69. cosmotech/coal/cosmotech_api/runner/download.py +0 -146
  70. cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
  71. cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
  72. cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
  73. cosmotech/coal/cosmotech_api/workspace.py +0 -127
  74. cosmotech/coal/utils/api.py +0 -68
  75. cosmotech/coal/utils/postgresql.py +0 -236
  76. cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
  77. cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
  78. cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
  79. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
  80. cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
  81. cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
  82. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
  83. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
  84. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
  85. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
  86. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
  87. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
  88. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
  89. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
  90. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
  91. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
  92. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
  93. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
  94. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/WHEEL +0 -0
  95. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/entry_points.txt +0 -0
  96. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/licenses/LICENSE +0 -0
  97. {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,127 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+ import json
8
+ import os
9
+ import pathlib
10
+ from csv import DictWriter
11
+ from typing import Any, Dict, List
12
+
13
+ from cosmotech.orchestrator.utils.translate import T
14
+
15
+ from cosmotech.coal.utils.logger import LOGGER
16
+
17
+
18
+ class Parameters:
19
+ values: Dict[str, Any] = dict()
20
+ parameters_list: List[Dict[str, Any]] = list()
21
+
22
+ def __init__(self, runner_data: Any):
23
+ """
24
+ Extract parameters from runner data.
25
+
26
+ Args:
27
+ runner_data: Runner data object
28
+
29
+ Returns:
30
+ Dictionary mapping parameter IDs to values
31
+ """
32
+ for parameter in runner_data.parameters_values:
33
+ self.values[parameter.parameter_id] = parameter.value
34
+ self.parameters_list = self.format_parameters_list(runner_data)
35
+
36
+ @staticmethod
37
+ def format_parameters_list(runner_data: Any) -> List[Dict[str, Any]]:
38
+ """
39
+ Format parameters from runner data as a list of dictionaries.
40
+
41
+ Args:
42
+ runner_data: Runner data object
43
+
44
+ Returns:
45
+ List of parameter dictionaries
46
+ """
47
+ parameters = []
48
+
49
+ if not runner_data.parameters_values:
50
+ return parameters
51
+
52
+ max_name_size = max(map(lambda r: len(r.parameter_id), runner_data.parameters_values))
53
+ max_type_size = max(map(lambda r: len(r.var_type), runner_data.parameters_values))
54
+
55
+ for parameter_data in runner_data.parameters_values:
56
+ parameter_name = parameter_data.parameter_id
57
+ value = parameter_data.value
58
+ var_type = parameter_data.var_type
59
+ is_inherited = parameter_data.is_inherited
60
+
61
+ parameters.append(
62
+ {
63
+ "parameterId": parameter_name,
64
+ "value": value,
65
+ "varType": var_type,
66
+ "isInherited": is_inherited,
67
+ }
68
+ )
69
+
70
+ LOGGER.debug(
71
+ T("coal.cosmotech_api.runner.parameter_debug").format(
72
+ param_id=parameter_name,
73
+ max_name_size=max_name_size,
74
+ var_type=var_type,
75
+ max_type_size=max_type_size,
76
+ value=value,
77
+ inherited=" inherited" if is_inherited else "",
78
+ )
79
+ )
80
+
81
+ return parameters
82
+
83
+ def write_parameters_to_json(
84
+ self,
85
+ parameter_folder: str,
86
+ ) -> str:
87
+ pathlib.Path(parameter_folder).mkdir(exist_ok=True, parents=True)
88
+ tmp_parameter_file = os.path.join(parameter_folder, "parameters.json")
89
+
90
+ LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
91
+
92
+ with open(tmp_parameter_file, "w") as _file:
93
+ json.dump(self.parameters_list, _file, indent=2)
94
+
95
+ return tmp_parameter_file
96
+
97
+ def write_parameters_to_csv(
98
+ self,
99
+ parameter_folder: str,
100
+ ) -> str:
101
+ pathlib.Path(parameter_folder).mkdir(exist_ok=True, parents=True)
102
+ tmp_parameter_file = os.path.join(parameter_folder, "parameters.csv")
103
+
104
+ LOGGER.info(T("coal.cosmotech_api.runner.generating_file").format(file=tmp_parameter_file))
105
+
106
+ with open(tmp_parameter_file, "w") as _file:
107
+ _w = DictWriter(_file, fieldnames=["parameterId", "value", "varType", "isInherited"])
108
+ _w.writeheader()
109
+ _w.writerows(self.parameters_list)
110
+
111
+ return tmp_parameter_file
112
+
113
+ def write_parameters(
114
+ self,
115
+ parameter_folder: str,
116
+ write_csv: bool = True,
117
+ write_json: bool = False,
118
+ ) -> Dict[str, str]:
119
+ result = {}
120
+
121
+ if write_csv:
122
+ result["csv"] = self.write_parameters_to_csv(parameter_folder)
123
+
124
+ if write_json:
125
+ result["json"] = self.write_parameters_to_json(parameter_folder)
126
+
127
+ return result
@@ -13,56 +13,40 @@ for runner metadata operations.
13
13
  """
14
14
 
15
15
  from adbc_driver_postgresql import dbapi
16
+ from cosmotech.orchestrator.utils.translate import T
16
17
 
17
- from cosmotech.coal.cosmotech_api.connection import get_api_client
18
- from cosmotech.coal.cosmotech_api.runner.metadata import get_runner_metadata
18
+ from cosmotech.coal.cosmotech_api.apis.runner import RunnerApi
19
+ from cosmotech.coal.postgresql.utils import PostgresUtils
20
+ from cosmotech.coal.utils.configuration import Configuration
19
21
  from cosmotech.coal.utils.logger import LOGGER
20
- from cosmotech.coal.utils.postgresql import generate_postgresql_full_uri
21
- from cosmotech.orchestrator.utils.translate import T
22
22
 
23
23
 
24
24
  def send_runner_metadata_to_postgresql(
25
- organization_id: str,
26
- workspace_id: str,
27
- runner_id: str,
28
- postgres_host: str,
29
- postgres_port: int,
30
- postgres_db: str,
31
- postgres_schema: str,
32
- postgres_user: str,
33
- postgres_password: str,
34
- table_prefix: str = "Cosmotech_",
35
- force_encode: bool = False,
36
- ) -> None:
25
+ configuration: Configuration,
26
+ ) -> str:
37
27
  """
38
28
  Send runner metadata to a PostgreSQL database.
39
29
 
40
30
  Args:
31
+ configuration: coal configuration
41
32
  organization_id: Organization ID
42
33
  workspace_id: Workspace ID
43
34
  runner_id: Runner ID
44
- postgres_host: PostgreSQL host
45
- postgres_port: PostgreSQL port
46
- postgres_db: PostgreSQL database name
47
- postgres_schema: PostgreSQL schema
48
- postgres_user: PostgreSQL username
49
- postgres_password: PostgreSQL password
50
- table_prefix: Table prefix
51
- force_encode: force password encoding to percent encoding
52
35
  """
53
- # Get runner metadata
54
- with get_api_client()[0] as api_client:
55
- runner = get_runner_metadata(api_client, organization_id, workspace_id, runner_id)
36
+ _psql = PostgresUtils(configuration)
56
37
 
57
- # Generate PostgreSQL URI
58
- postgresql_full_uri = generate_postgresql_full_uri(
59
- postgres_host, str(postgres_port), postgres_db, postgres_user, postgres_password, force_encode=force_encode
38
+ # Get runner metadata
39
+ _runner_api = RunnerApi(configuration)
40
+ runner = _runner_api.get_runner_metadata(
41
+ configuration.cosmotech.organization_id,
42
+ configuration.cosmotech.workspace_id,
43
+ configuration.cosmotech.runner_id,
60
44
  )
61
45
 
62
46
  # Connect to PostgreSQL and update runner metadata
63
- with dbapi.connect(postgresql_full_uri, autocommit=True) as conn:
47
+ with dbapi.connect(_psql.full_uri, autocommit=True) as conn:
64
48
  with conn.cursor() as curs:
65
- schema_table = f"{postgres_schema}.{table_prefix}RunnerMetadata"
49
+ schema_table = f"{str(_psql.db_schema)}.{str(_psql.table_prefix)}RunnerMetadata"
66
50
  sql_create_table = f"""
67
51
  CREATE TABLE IF NOT EXISTS {schema_table} (
68
52
  id varchar(32) PRIMARY KEY,
@@ -71,6 +55,10 @@ def send_runner_metadata_to_postgresql(
71
55
  run_template_id varchar(32)
72
56
  );
73
57
  """
58
+ LOGGER.info(T("coal.services.postgresql.creating_table").format(schema_table=schema_table))
59
+ curs.execute(sql_create_table)
60
+ conn.commit()
61
+ LOGGER.info(T("coal.services.postgresql.metadata"))
74
62
  sql_upsert = f"""
75
63
  INSERT INTO {schema_table} (id, name, last_run_id, run_template_id)
76
64
  VALUES(%s, %s, %s, %s)
@@ -78,10 +66,6 @@ def send_runner_metadata_to_postgresql(
78
66
  DO
79
67
  UPDATE SET name = EXCLUDED.name, last_run_id = EXCLUDED.last_run_id;
80
68
  """
81
- LOGGER.info(T("coal.services.postgresql.creating_table").format(schema_table=schema_table))
82
- curs.execute(sql_create_table)
83
- conn.commit()
84
- LOGGER.info(T("coal.services.postgresql.metadata"))
85
69
  curs.execute(
86
70
  sql_upsert,
87
71
  (
@@ -93,3 +77,39 @@ def send_runner_metadata_to_postgresql(
93
77
  )
94
78
  conn.commit()
95
79
  LOGGER.info(T("coal.services.postgresql.metadata_updated"))
80
+ return runner.get("lastRunId")
81
+
82
+
83
+ def remove_runner_metadata_from_postgresql(
84
+ configuration: Configuration,
85
+ ) -> str:
86
+ """
87
+ Removes run_id from metadata table that trigger cascade delete on other tables
88
+
89
+ Args:
90
+ configuration: coal configuration
91
+ organization_id: Organization ID
92
+ workspace_id: Workspace ID
93
+ runner_id: Runner ID
94
+ """
95
+ _psql = PostgresUtils(configuration)
96
+
97
+ # Get runner metadata
98
+ _runner_api = RunnerApi(configuration)
99
+ runner = _runner_api.get_runner_metadata(
100
+ configuration.cosmotech.organization_id,
101
+ configuration.cosmotech.workspace_id,
102
+ configuration.cosmotech.runner_id,
103
+ )
104
+
105
+ # Connect to PostgreSQL and remove runner metadata row
106
+ with dbapi.connect(_psql.full_uri, autocommit=True) as conn:
107
+ with conn.cursor() as curs:
108
+ schema_table = f"{_psql.db_schema}.{_psql.table_prefix}RunnerMetadata"
109
+ sql_delete_from_metatable = f"""
110
+ DELETE FROM {schema_table}
111
+ WHERE last_run_id={runner.get("lastRunId")};
112
+ """
113
+ curs.execute(sql_delete_from_metatable)
114
+ conn.commit()
115
+ return runner.get("lastRunId")
@@ -13,12 +13,13 @@ for store operations.
13
13
  """
14
14
 
15
15
  from time import perf_counter
16
- import pyarrow
17
16
 
17
+ from cosmotech.orchestrator.utils.translate import T
18
+
19
+ from cosmotech.coal.postgresql.utils import PostgresUtils
18
20
  from cosmotech.coal.store.store import Store
21
+ from cosmotech.coal.utils.configuration import Configuration
19
22
  from cosmotech.coal.utils.logger import LOGGER
20
- from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
21
- from cosmotech.orchestrator.utils.translate import T
22
23
 
23
24
 
24
25
  def dump_store_to_postgresql(
@@ -32,6 +33,8 @@ def dump_store_to_postgresql(
32
33
  table_prefix: str = "Cosmotech_",
33
34
  replace: bool = True,
34
35
  force_encode: bool = False,
36
+ selected_tables: list[str] = [],
37
+ fk_id: str = None,
35
38
  ) -> None:
36
39
  """
37
40
  Dump Store data to a PostgreSQL database.
@@ -46,36 +49,79 @@ def dump_store_to_postgresql(
46
49
  postgres_password: PostgreSQL password
47
50
  table_prefix: Table prefix
48
51
  replace: Whether to replace existing tables
49
- force_encode: force password encoding
52
+ force_encode: force password encoding to percent encoding
53
+ selected_tables: list of tables to send
54
+ fk_id: foreign key id to add to all table on all rows
55
+ """
56
+ _c = Configuration()
57
+ _c.postgres.host = postgres_host
58
+ _c.postgres.port = postgres_port
59
+ _c.postgres.db_name = postgres_db
60
+ _c.postgres.db_schema = postgres_schema
61
+ _c.postgres.user_name = postgres_user
62
+ _c.postgres.user_password = postgres_password
63
+ _c.postgres.password_encoding = force_encode
64
+ _c.postgres.table_prefix = table_prefix
65
+
66
+ dump_store_to_postgresql_from_conf(
67
+ configuration=_c, store_folder=store_folder, replace=replace, selected_tables=selected_tables, fk_id=fk_id
68
+ )
69
+
70
+
71
+ def dump_store_to_postgresql_from_conf(
72
+ configuration: Configuration,
73
+ store_folder: str,
74
+ replace: bool = True,
75
+ selected_tables: list[str] = [],
76
+ fk_id: str = None,
77
+ ) -> None:
78
+ """
79
+ Dump Store data to a PostgreSQL database.
80
+
81
+ Args:
82
+ configuration: coal Configuration
83
+ store_folder: Folder containing the Store
84
+ replace: Whether to replace existing tables
85
+ selected_tables: list of tables to send
86
+ fk_id: foreign key id to add to all table on all rows
50
87
  """
88
+ _psql = PostgresUtils(configuration)
89
+ print(_psql.send_pyarrow_table_to_postgresql)
51
90
  _s = Store(store_location=store_folder)
52
91
 
53
92
  tables = list(_s.list_tables())
93
+ if selected_tables:
94
+ tables = [t for t in tables if t in selected_tables]
54
95
  if len(tables):
55
- LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
96
+ LOGGER.info(T("coal.services.database.sending_data").format(table=f"{_psql.db_name}.{_psql.db_schema}"))
56
97
  total_rows = 0
57
98
  _process_start = perf_counter()
58
99
  for table_name in tables:
59
100
  _s_time = perf_counter()
60
- target_table_name = f"{table_prefix}{table_name}"
101
+ target_table_name = f"{_psql.table_prefix}{table_name}"
61
102
  LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
103
+ if fk_id:
104
+ _s.execute_query(
105
+ f"""
106
+ ALTER TABLE {_psql.table_prefix}{table_name}
107
+ ADD run_id TEXT NOT NULL
108
+ DEFAULT ({fk_id})
109
+ """
110
+ )
62
111
  data = _s.get_table(table_name)
63
112
  if not len(data):
64
113
  LOGGER.info(T("coal.services.database.no_rows"))
65
114
  continue
66
115
  _dl_time = perf_counter()
67
- rows = send_pyarrow_table_to_postgresql(
116
+ rows = _psql.send_pyarrow_table_to_postgresql(
68
117
  data,
69
118
  target_table_name,
70
- postgres_host,
71
- postgres_port,
72
- postgres_db,
73
- postgres_schema,
74
- postgres_user,
75
- postgres_password,
76
119
  replace,
77
- force_encode,
78
120
  )
121
+ if fk_id and _psql.is_metadata_exists():
122
+ metadata_table = f"{_psql.table_prefix}RunnerMetadata"
123
+ _psql.add_fk_constraint(table_name, "run_id", metadata_table, "last_run_id")
124
+
79
125
  total_rows += rows
80
126
  _up_time = perf_counter()
81
127
  LOGGER.info(T("coal.services.database.row_count").format(count=rows))
@@ -0,0 +1,254 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from typing import Optional
9
+ from urllib.parse import quote
10
+
11
+ import adbc_driver_manager
12
+ import pyarrow as pa
13
+ from adbc_driver_postgresql import dbapi
14
+ from cosmotech.orchestrator.utils.translate import T
15
+ from pyarrow import Table
16
+
17
+ from cosmotech.coal.utils.configuration import Configuration
18
+ from cosmotech.coal.utils.logger import LOGGER
19
+
20
+
21
+ class PostgresUtils:
22
+
23
+ def __init__(self, configuration: Configuration):
24
+ self._configuration = configuration.postgres
25
+
26
+ @property
27
+ def table_prefix(self):
28
+ if "table_prefix" in self._configuration:
29
+ return self._configuration.table_prefix
30
+ return "Cosmotech_"
31
+
32
+ @property
33
+ def db_name(self):
34
+ return self._configuration.db_name
35
+
36
+ @property
37
+ def db_schema(self):
38
+ return self._configuration.db_schema
39
+
40
+ @property
41
+ def host_uri(self):
42
+ return self._configuration.host
43
+
44
+ @property
45
+ def host_port(self):
46
+ return self._configuration.port
47
+
48
+ @property
49
+ def user_name(self):
50
+ return self._configuration.user_name
51
+
52
+ @property
53
+ def user_password(self):
54
+ return self._configuration.user_password
55
+
56
+ @property
57
+ def password_encoding(self):
58
+ if "password_encoding" in self._configuration:
59
+ return self._configuration.password_encoding
60
+ return False
61
+
62
+ @property
63
+ def full_uri(self) -> str:
64
+ # Check if password needs percent encoding (contains special characters)
65
+ # We don't log anything about the password for security
66
+ encoded_password = self.user_password
67
+ if self.password_encoding:
68
+ encoded_password = quote(self.user_password, safe="")
69
+
70
+ return (
71
+ "postgresql://" + f"{self.user_name}"
72
+ f":{encoded_password}"
73
+ f"@{self.host_uri}"
74
+ f":{self.host_port}"
75
+ f"/{self.db_name}"
76
+ )
77
+
78
+ def metadata_table_name(self):
79
+ return f"{self.table_prefix}RunnerMetadata"
80
+
81
+ def get_postgresql_table_schema(self, target_table_name: str) -> Optional[pa.Schema]:
82
+ """
83
+ Get the schema of an existing PostgreSQL table using SQL queries.
84
+
85
+ Args:
86
+ target_table_name: Name of the table
87
+
88
+ Returns:
89
+ PyArrow Schema if table exists, None otherwise
90
+ """
91
+ LOGGER.debug(
92
+ T("coal.services.postgresql.getting_schema").format(
93
+ postgres_schema=self.db_schema, target_table_name=target_table_name
94
+ )
95
+ )
96
+
97
+ with dbapi.connect(self.full_uri) as conn:
98
+ try:
99
+ return conn.adbc_get_table_schema(
100
+ target_table_name,
101
+ db_schema_filter=self.db_schema,
102
+ )
103
+ except adbc_driver_manager.ProgrammingError:
104
+ LOGGER.warning(
105
+ T("coal.services.postgresql.table_not_found").format(
106
+ postgres_schema=self.db_schema, target_table_name=target_table_name
107
+ )
108
+ )
109
+ return None
110
+
111
+ def send_pyarrow_table_to_postgresql(
112
+ self,
113
+ data: Table,
114
+ target_table_name: str,
115
+ replace: bool,
116
+ ) -> int:
117
+ LOGGER.debug(
118
+ T("coal.services.postgresql.preparing_send").format(
119
+ postgres_schema=self.db_schema, target_table_name=target_table_name
120
+ )
121
+ )
122
+ LOGGER.debug(T("coal.services.postgresql.input_rows").format(rows=len(data)))
123
+
124
+ # Get existing schema if table exists
125
+ existing_schema = self.get_postgresql_table_schema(target_table_name)
126
+
127
+ if existing_schema is not None:
128
+ LOGGER.debug(T("coal.services.postgresql.found_existing_table").format(schema=existing_schema))
129
+ if not replace:
130
+ LOGGER.debug(T("coal.services.postgresql.adapting_data"))
131
+ data = adapt_table_to_schema(data, existing_schema)
132
+ else:
133
+ LOGGER.debug(T("coal.services.postgresql.replace_mode"))
134
+ else:
135
+ LOGGER.debug(T("coal.services.postgresql.no_existing_table"))
136
+
137
+ # Proceed with ingestion
138
+ total = 0
139
+
140
+ LOGGER.debug(T("coal.services.postgresql.connecting"))
141
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
142
+ with conn.cursor() as curs:
143
+ mode = "replace" if replace else "create_append"
144
+ LOGGER.debug(T("coal.services.postgresql.ingesting_data").format(mode=mode))
145
+ total += curs.adbc_ingest(target_table_name, data, mode, db_schema_name=self.db_schema)
146
+
147
+ LOGGER.debug(T("coal.services.postgresql.ingestion_success").format(rows=total))
148
+ return total
149
+
150
+ def add_fk_constraint(
151
+ self,
152
+ from_table: str,
153
+ from_col: str,
154
+ to_table: str,
155
+ to_col: str,
156
+ ) -> None:
157
+ # Connect to PostgreSQL and remove runner metadata row
158
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
159
+ with conn.cursor() as curs:
160
+ sql_add_fk = f"""
161
+ ALTER TABLE {self.db_schema}.{from_table}
162
+ CONSTRAINT metadata FOREIGN KEY ({from_col}) REFERENCES {to_table}({to_col})
163
+ """
164
+ curs.execute(sql_add_fk)
165
+ conn.commit()
166
+
167
+ def is_metadata_exists(self) -> None:
168
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
169
+ try:
170
+ conn.adbc_get_table_schema(
171
+ self.metadata_table_name,
172
+ db_schema_filter=self.db_schema,
173
+ )
174
+ return True
175
+ except adbc_driver_manager.ProgrammingError:
176
+ return False
177
+
178
+
179
+ def adapt_table_to_schema(data: pa.Table, target_schema: pa.Schema) -> pa.Table:
180
+ """
181
+ Adapt a PyArrow table to match a target schema with detailed logging.
182
+ """
183
+ LOGGER.debug(T("coal.services.postgresql.schema_adaptation_start").format(rows=len(data)))
184
+ LOGGER.debug(T("coal.services.postgresql.original_schema").format(schema=data.schema))
185
+ LOGGER.debug(T("coal.services.postgresql.target_schema").format(schema=target_schema))
186
+
187
+ target_fields = {field.name: field.type for field in target_schema}
188
+ new_columns = []
189
+
190
+ # Track adaptations for summary
191
+ added_columns = []
192
+ dropped_columns = []
193
+ type_conversions = []
194
+ failed_conversions = []
195
+
196
+ # Process each field in target schema
197
+ for field_name, target_type in target_fields.items():
198
+ if field_name in data.column_names:
199
+ # Column exists - try to cast to target type
200
+ col = data[field_name]
201
+ original_type = col.type
202
+
203
+ if original_type != target_type:
204
+ LOGGER.debug(
205
+ T("coal.services.postgresql.casting_column").format(
206
+ field_name=field_name,
207
+ original_type=original_type,
208
+ target_type=target_type,
209
+ )
210
+ )
211
+ try:
212
+ new_col = pa.compute.cast(col, target_type)
213
+ new_columns.append(new_col)
214
+ type_conversions.append(f"{field_name}: {original_type} -> {target_type}")
215
+ except pa.ArrowInvalid as e:
216
+ LOGGER.warning(
217
+ T("coal.services.postgresql.cast_failed").format(
218
+ field_name=field_name,
219
+ original_type=original_type,
220
+ target_type=target_type,
221
+ error=str(e),
222
+ )
223
+ )
224
+ new_columns.append(pa.nulls(len(data), type=target_type))
225
+ failed_conversions.append(f"{field_name}: {original_type} -> {target_type}")
226
+ else:
227
+ new_columns.append(col)
228
+ else:
229
+ # Column doesn't exist - add nulls
230
+ LOGGER.debug(T("coal.services.postgresql.adding_missing_column").format(field_name=field_name))
231
+ new_columns.append(pa.nulls(len(data), type=target_type))
232
+ added_columns.append(field_name)
233
+
234
+ # Log columns that will be dropped
235
+ dropped_columns = [name for name in data.column_names if name not in target_fields]
236
+ if dropped_columns:
237
+ LOGGER.debug(T("coal.services.postgresql.dropping_columns").format(columns=dropped_columns))
238
+
239
+ # Create new table
240
+ adapted_table = pa.Table.from_arrays(new_columns, schema=target_schema)
241
+
242
+ # Log summary of adaptations
243
+ LOGGER.debug(T("coal.services.postgresql.adaptation_summary"))
244
+ if added_columns:
245
+ LOGGER.debug(T("coal.services.postgresql.added_columns").format(columns=added_columns))
246
+ if dropped_columns:
247
+ LOGGER.debug(T("coal.services.postgresql.dropped_columns").format(columns=dropped_columns))
248
+ if type_conversions:
249
+ LOGGER.debug(T("coal.services.postgresql.successful_conversions").format(conversions=type_conversions))
250
+ if failed_conversions:
251
+ LOGGER.debug(T("coal.services.postgresql.failed_conversions").format(conversions=failed_conversions))
252
+
253
+ LOGGER.debug(T("coal.services.postgresql.final_schema").format(schema=adapted_table.schema))
254
+ return adapted_table
File without changes