cosmotech-acceleration-library 1.1.0__py3-none-any.whl → 2.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/aws/__init__.py +1 -9
  3. cosmotech/coal/aws/s3.py +181 -214
  4. cosmotech/coal/azure/__init__.py +5 -5
  5. cosmotech/coal/azure/adx/__init__.py +24 -10
  6. cosmotech/coal/azure/adx/auth.py +2 -2
  7. cosmotech/coal/azure/adx/ingestion.py +10 -14
  8. cosmotech/coal/azure/adx/query.py +1 -1
  9. cosmotech/coal/azure/adx/runner.py +13 -14
  10. cosmotech/coal/azure/adx/store.py +5 -86
  11. cosmotech/coal/azure/adx/tables.py +2 -2
  12. cosmotech/coal/azure/adx/utils.py +2 -2
  13. cosmotech/coal/azure/blob.py +20 -26
  14. cosmotech/coal/azure/storage.py +3 -3
  15. cosmotech/coal/cosmotech_api/__init__.py +0 -28
  16. cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
  17. cosmotech/coal/cosmotech_api/apis/dataset.py +222 -0
  18. cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
  19. cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
  20. cosmotech/coal/cosmotech_api/apis/run.py +38 -0
  21. cosmotech/coal/cosmotech_api/apis/runner.py +75 -0
  22. cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
  23. cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
  24. cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
  25. cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
  26. cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
  27. cosmotech/coal/postgresql/runner.py +58 -41
  28. cosmotech/coal/postgresql/store.py +56 -15
  29. cosmotech/coal/postgresql/utils.py +255 -0
  30. cosmotech/coal/singlestore/store.py +3 -2
  31. cosmotech/coal/store/__init__.py +16 -13
  32. cosmotech/coal/store/output/__init__.py +0 -0
  33. cosmotech/coal/store/output/aws_channel.py +74 -0
  34. cosmotech/coal/store/output/az_storage_channel.py +33 -0
  35. cosmotech/coal/store/output/channel_interface.py +38 -0
  36. cosmotech/coal/store/output/channel_spliter.py +61 -0
  37. cosmotech/coal/store/output/postgres_channel.py +37 -0
  38. cosmotech/coal/store/pandas.py +1 -1
  39. cosmotech/coal/store/pyarrow.py +2 -2
  40. cosmotech/coal/store/store.py +4 -7
  41. cosmotech/coal/utils/configuration.py +197 -0
  42. cosmotech/coal/utils/decorator.py +4 -7
  43. cosmotech/csm_data/commands/adx_send_data.py +1 -1
  44. cosmotech/csm_data/commands/adx_send_runnerdata.py +3 -2
  45. cosmotech/csm_data/commands/api/api.py +6 -19
  46. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
  47. cosmotech/csm_data/commands/api/run_load_data.py +15 -52
  48. cosmotech/csm_data/commands/api/wsf_load_file.py +13 -16
  49. cosmotech/csm_data/commands/api/wsf_send_file.py +11 -14
  50. cosmotech/csm_data/commands/az_storage_upload.py +3 -2
  51. cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
  52. cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
  53. cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
  54. cosmotech/csm_data/commands/store/dump_to_azure.py +3 -2
  55. cosmotech/csm_data/commands/store/dump_to_postgresql.py +3 -2
  56. cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
  57. cosmotech/csm_data/commands/store/list_tables.py +3 -2
  58. cosmotech/csm_data/commands/store/load_csv_folder.py +10 -4
  59. cosmotech/csm_data/commands/store/load_from_singlestore.py +3 -2
  60. cosmotech/csm_data/commands/store/output.py +35 -0
  61. cosmotech/csm_data/commands/store/reset.py +8 -3
  62. cosmotech/csm_data/commands/store/store.py +3 -3
  63. cosmotech/csm_data/main.py +4 -4
  64. cosmotech/csm_data/utils/decorators.py +4 -3
  65. cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
  66. cosmotech/translation/coal/en-US/coal/services/dataset.yml +10 -14
  67. cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
  68. cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
  69. cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
  70. cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
  71. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/METADATA +29 -33
  72. cosmotech_acceleration_library-2.1.0rc1.dist-info/RECORD +153 -0
  73. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/WHEEL +1 -1
  74. cosmotech/coal/azure/functions.py +0 -72
  75. cosmotech/coal/cosmotech_api/connection.py +0 -96
  76. cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
  77. cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
  78. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
  79. cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
  80. cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
  81. cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -229
  82. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -185
  83. cosmotech/coal/cosmotech_api/dataset/upload.py +0 -41
  84. cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
  85. cosmotech/coal/cosmotech_api/parameters.py +0 -48
  86. cosmotech/coal/cosmotech_api/run.py +0 -25
  87. cosmotech/coal/cosmotech_api/run_data.py +0 -173
  88. cosmotech/coal/cosmotech_api/run_template.py +0 -108
  89. cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
  90. cosmotech/coal/cosmotech_api/runner/data.py +0 -38
  91. cosmotech/coal/cosmotech_api/runner/datasets.py +0 -416
  92. cosmotech/coal/cosmotech_api/runner/download.py +0 -135
  93. cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
  94. cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
  95. cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
  96. cosmotech/coal/cosmotech_api/workspace.py +0 -127
  97. cosmotech/coal/utils/postgresql.py +0 -236
  98. cosmotech/coal/utils/semver.py +0 -6
  99. cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
  100. cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
  101. cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
  102. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
  103. cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
  104. cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
  105. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
  106. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
  107. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
  108. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
  109. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
  110. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
  111. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
  112. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
  113. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
  114. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
  115. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
  116. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
  117. cosmotech_acceleration_library-1.1.0.dist-info/RECORD +0 -171
  118. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/entry_points.txt +0 -0
  119. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/licenses/LICENSE +0 -0
  120. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -13,12 +13,13 @@ for store operations.
13
13
  """
14
14
 
15
15
  from time import perf_counter
16
- import pyarrow
17
16
 
17
+ from cosmotech.orchestrator.utils.translate import T
18
+
19
+ from cosmotech.coal.postgresql.utils import PostgresUtils
18
20
  from cosmotech.coal.store.store import Store
21
+ from cosmotech.coal.utils.configuration import Configuration
19
22
  from cosmotech.coal.utils.logger import LOGGER
20
- from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
21
- from cosmotech.orchestrator.utils.translate import T
22
23
 
23
24
 
24
25
  def dump_store_to_postgresql(
@@ -32,6 +33,8 @@ def dump_store_to_postgresql(
32
33
  table_prefix: str = "Cosmotech_",
33
34
  replace: bool = True,
34
35
  force_encode: bool = False,
36
+ selected_tables: list[str] = [],
37
+ fk_id: str = None,
35
38
  ) -> None:
36
39
  """
37
40
  Dump Store data to a PostgreSQL database.
@@ -46,36 +49,74 @@ def dump_store_to_postgresql(
46
49
  postgres_password: PostgreSQL password
47
50
  table_prefix: Table prefix
48
51
  replace: Whether to replace existing tables
49
- force_encode: force password encoding
52
+ force_encode: force password encoding to percent encoding
53
+ selected_tables: list of tables to send
54
+ fk_id: foreign key id to add to all table on all rows
50
55
  """
51
- _s = Store(store_location=store_folder)
56
+ _c = Configuration(
57
+ {
58
+ "coal": {"store": store_folder},
59
+ "postgres": {
60
+ "host": postgres_host,
61
+ "port": postgres_port,
62
+ "db_name": postgres_db,
63
+ "db_schema": postgres_schema,
64
+ "user_name": postgres_user,
65
+ "user_password": postgres_password,
66
+ "password_encoding": force_encode,
67
+ "table_prefix": table_prefix,
68
+ },
69
+ }
70
+ )
71
+
72
+ dump_store_to_postgresql_from_conf(configuration=_c, replace=replace, selected_tables=selected_tables, fk_id=fk_id)
73
+
74
+
75
+ def dump_store_to_postgresql_from_conf(
76
+ configuration: Configuration,
77
+ replace: bool = True,
78
+ selected_tables: list[str] = [],
79
+ fk_id: str = None,
80
+ ) -> None:
81
+ """
82
+ Dump Store data to a PostgreSQL database.
83
+
84
+ Args:
85
+ configuration: coal Configuration
86
+ replace: Whether to replace existing tables
87
+ selected_tables: list of tables to send
88
+ fk_id: foreign key id to add to all table on all rows
89
+ """
90
+ _psql = PostgresUtils(configuration)
91
+ _s = Store(configuration=configuration)
52
92
 
53
93
  tables = list(_s.list_tables())
94
+ if selected_tables:
95
+ tables = [t for t in tables if t in selected_tables]
54
96
  if len(tables):
55
- LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
97
+ LOGGER.info(T("coal.services.database.sending_data").format(table=f"{_psql.db_name}.{_psql.db_schema}"))
56
98
  total_rows = 0
57
99
  _process_start = perf_counter()
58
100
  for table_name in tables:
59
101
  _s_time = perf_counter()
60
- target_table_name = f"{table_prefix}{table_name}"
102
+ target_table_name = f"{_psql.table_prefix}{table_name}"
61
103
  LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
62
104
  data = _s.get_table(table_name)
63
105
  if not len(data):
64
106
  LOGGER.info(T("coal.services.database.no_rows"))
65
107
  continue
108
+ if fk_id:
109
+ data = data.append_column("csm_run_id", [[fk_id] * data.num_rows])
66
110
  _dl_time = perf_counter()
67
- rows = send_pyarrow_table_to_postgresql(
111
+ rows = _psql.send_pyarrow_table_to_postgresql(
68
112
  data,
69
113
  target_table_name,
70
- postgres_host,
71
- postgres_port,
72
- postgres_db,
73
- postgres_schema,
74
- postgres_user,
75
- postgres_password,
76
114
  replace,
77
- force_encode,
78
115
  )
116
+ if fk_id and _psql.is_metadata_exists():
117
+ metadata_table = f"{_psql.table_prefix}RunnerMetadata"
118
+ _psql.add_fk_constraint(table_name, "csm_run_id", metadata_table, "last_csm_run_id")
119
+
79
120
  total_rows += rows
80
121
  _up_time = perf_counter()
81
122
  LOGGER.info(T("coal.services.database.row_count").format(count=rows))
@@ -0,0 +1,255 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from typing import Optional
9
+ from urllib.parse import quote
10
+
11
+ import adbc_driver_manager
12
+ import pyarrow as pa
13
+ from adbc_driver_postgresql import dbapi
14
+ from cosmotech.orchestrator.utils.translate import T
15
+ from pyarrow import Table
16
+
17
+ from cosmotech.coal.utils.configuration import Configuration
18
+ from cosmotech.coal.utils.logger import LOGGER
19
+
20
+
21
+ class PostgresUtils:
22
+
23
+ def __init__(self, configuration: Configuration):
24
+ self._configuration = configuration.postgres
25
+
26
+ @property
27
+ def table_prefix(self):
28
+ if "table_prefix" in self._configuration:
29
+ return self._configuration.table_prefix
30
+ return "Cosmotech_"
31
+
32
+ @property
33
+ def db_name(self):
34
+ return self._configuration.db_name
35
+
36
+ @property
37
+ def db_schema(self):
38
+ return self._configuration.db_schema
39
+
40
+ @property
41
+ def host_uri(self):
42
+ return self._configuration.host
43
+
44
+ @property
45
+ def host_port(self):
46
+ return self._configuration.port
47
+
48
+ @property
49
+ def user_name(self):
50
+ return self._configuration.user_name
51
+
52
+ @property
53
+ def user_password(self):
54
+ return self._configuration.user_password
55
+
56
+ @property
57
+ def password_encoding(self):
58
+ if "password_encoding" in self._configuration:
59
+ return self._configuration.password_encoding
60
+ return False
61
+
62
+ @property
63
+ def full_uri(self) -> str:
64
+ # Check if password needs percent encoding (contains special characters)
65
+ # We don't log anything about the password for security
66
+ encoded_password = self.user_password
67
+ if self.password_encoding:
68
+ encoded_password = quote(self.user_password, safe="")
69
+
70
+ return (
71
+ "postgresql://" + f"{self.user_name}"
72
+ f":{encoded_password}"
73
+ f"@{self.host_uri}"
74
+ f":{self.host_port}"
75
+ f"/{self.db_name}"
76
+ )
77
+
78
+ @property
79
+ def metadata_table_name(self) -> str:
80
+ return f"{self.table_prefix}RunnerMetadata"
81
+
82
+ def get_postgresql_table_schema(self, target_table_name: str) -> Optional[pa.Schema]:
83
+ """
84
+ Get the schema of an existing PostgreSQL table using SQL queries.
85
+
86
+ Args:
87
+ target_table_name: Name of the table
88
+
89
+ Returns:
90
+ PyArrow Schema if table exists, None otherwise
91
+ """
92
+ LOGGER.debug(
93
+ T("coal.services.postgresql.getting_schema").format(
94
+ postgres_schema=self.db_schema, target_table_name=target_table_name
95
+ )
96
+ )
97
+
98
+ with dbapi.connect(self.full_uri) as conn:
99
+ try:
100
+ return conn.adbc_get_table_schema(
101
+ target_table_name,
102
+ db_schema_filter=self.db_schema,
103
+ )
104
+ except adbc_driver_manager.ProgrammingError:
105
+ LOGGER.warning(
106
+ T("coal.services.postgresql.table_not_found").format(
107
+ postgres_schema=self.db_schema, target_table_name=target_table_name
108
+ )
109
+ )
110
+ return None
111
+
112
+ def send_pyarrow_table_to_postgresql(
113
+ self,
114
+ data: Table,
115
+ target_table_name: str,
116
+ replace: bool,
117
+ ) -> int:
118
+ LOGGER.debug(
119
+ T("coal.services.postgresql.preparing_send").format(
120
+ postgres_schema=self.db_schema, target_table_name=target_table_name
121
+ )
122
+ )
123
+ LOGGER.debug(T("coal.services.postgresql.input_rows").format(rows=len(data)))
124
+
125
+ # Get existing schema if table exists
126
+ existing_schema = self.get_postgresql_table_schema(target_table_name)
127
+
128
+ if existing_schema is not None:
129
+ LOGGER.debug(T("coal.services.postgresql.found_existing_table").format(schema=existing_schema))
130
+ if not replace:
131
+ LOGGER.debug(T("coal.services.postgresql.adapting_data"))
132
+ data = adapt_table_to_schema(data, existing_schema)
133
+ else:
134
+ LOGGER.debug(T("coal.services.postgresql.replace_mode"))
135
+ else:
136
+ LOGGER.debug(T("coal.services.postgresql.no_existing_table"))
137
+
138
+ # Proceed with ingestion
139
+ total = 0
140
+
141
+ LOGGER.debug(T("coal.services.postgresql.connecting"))
142
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
143
+ with conn.cursor() as curs:
144
+ mode = "replace" if replace else "create_append"
145
+ LOGGER.debug(T("coal.services.postgresql.ingesting_data").format(mode=mode))
146
+ total += curs.adbc_ingest(target_table_name, data, mode, db_schema_name=self.db_schema)
147
+
148
+ LOGGER.debug(T("coal.services.postgresql.ingestion_success").format(rows=total))
149
+ return total
150
+
151
+ def add_fk_constraint(
152
+ self,
153
+ from_table: str,
154
+ from_col: str,
155
+ to_table: str,
156
+ to_col: str,
157
+ ) -> None:
158
+ # Connect to PostgreSQL and remove runner metadata row
159
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
160
+ with conn.cursor() as curs:
161
+ sql_add_fk = f"""
162
+ ALTER TABLE {self.db_schema}.{from_table}
163
+ CONSTRAINT metadata FOREIGN KEY ({from_col}) REFERENCES {to_table}({to_col})
164
+ """
165
+ curs.execute(sql_add_fk)
166
+ conn.commit()
167
+
168
+ def is_metadata_exists(self) -> None:
169
+ with dbapi.connect(self.full_uri, autocommit=True) as conn:
170
+ try:
171
+ conn.adbc_get_table_schema(
172
+ self.metadata_table_name,
173
+ db_schema_filter=self.db_schema,
174
+ )
175
+ return True
176
+ except adbc_driver_manager.ProgrammingError:
177
+ return False
178
+
179
+
180
+ def adapt_table_to_schema(data: pa.Table, target_schema: pa.Schema) -> pa.Table:
181
+ """
182
+ Adapt a PyArrow table to match a target schema with detailed logging.
183
+ """
184
+ LOGGER.debug(T("coal.services.postgresql.schema_adaptation_start").format(rows=len(data)))
185
+ LOGGER.debug(T("coal.services.postgresql.original_schema").format(schema=data.schema))
186
+ LOGGER.debug(T("coal.services.postgresql.target_schema").format(schema=target_schema))
187
+
188
+ target_fields = {field.name: field.type for field in target_schema}
189
+ new_columns = []
190
+
191
+ # Track adaptations for summary
192
+ added_columns = []
193
+ dropped_columns = []
194
+ type_conversions = []
195
+ failed_conversions = []
196
+
197
+ # Process each field in target schema
198
+ for field_name, target_type in target_fields.items():
199
+ if field_name in data.column_names:
200
+ # Column exists - try to cast to target type
201
+ col = data[field_name]
202
+ original_type = col.type
203
+
204
+ if original_type != target_type:
205
+ LOGGER.debug(
206
+ T("coal.services.postgresql.casting_column").format(
207
+ field_name=field_name,
208
+ original_type=original_type,
209
+ target_type=target_type,
210
+ )
211
+ )
212
+ try:
213
+ new_col = pa.compute.cast(col, target_type)
214
+ new_columns.append(new_col)
215
+ type_conversions.append(f"{field_name}: {original_type} -> {target_type}")
216
+ except pa.ArrowInvalid as e:
217
+ LOGGER.warning(
218
+ T("coal.services.postgresql.cast_failed").format(
219
+ field_name=field_name,
220
+ original_type=original_type,
221
+ target_type=target_type,
222
+ error=str(e),
223
+ )
224
+ )
225
+ new_columns.append(pa.nulls(len(data), type=target_type))
226
+ failed_conversions.append(f"{field_name}: {original_type} -> {target_type}")
227
+ else:
228
+ new_columns.append(col)
229
+ else:
230
+ # Column doesn't exist - add nulls
231
+ LOGGER.debug(T("coal.services.postgresql.adding_missing_column").format(field_name=field_name))
232
+ new_columns.append(pa.nulls(len(data), type=target_type))
233
+ added_columns.append(field_name)
234
+
235
+ # Log columns that will be dropped
236
+ dropped_columns = [name for name in data.column_names if name not in target_fields]
237
+ if dropped_columns:
238
+ LOGGER.debug(T("coal.services.postgresql.dropping_columns").format(columns=dropped_columns))
239
+
240
+ # Create new table
241
+ adapted_table = pa.Table.from_arrays(new_columns, schema=target_schema)
242
+
243
+ # Log summary of adaptations
244
+ LOGGER.debug(T("coal.services.postgresql.adaptation_summary"))
245
+ if added_columns:
246
+ LOGGER.debug(T("coal.services.postgresql.added_columns").format(columns=added_columns))
247
+ if dropped_columns:
248
+ LOGGER.debug(T("coal.services.postgresql.dropped_columns").format(columns=dropped_columns))
249
+ if type_conversions:
250
+ LOGGER.debug(T("coal.services.postgresql.successful_conversions").format(conversions=type_conversions))
251
+ if failed_conversions:
252
+ LOGGER.debug(T("coal.services.postgresql.failed_conversions").format(conversions=failed_conversions))
253
+
254
+ LOGGER.debug(T("coal.services.postgresql.final_schema").format(schema=adapted_table.schema))
255
+ return adapted_table
@@ -12,15 +12,16 @@ This module provides functions for interacting with SingleStore databases
12
12
  for store operations.
13
13
  """
14
14
 
15
+ import csv
15
16
  import pathlib
16
17
  import time
17
- import csv
18
+
18
19
  import singlestoredb as s2
20
+ from cosmotech.orchestrator.utils.translate import T
19
21
 
20
22
  from cosmotech.coal.store.csv import store_csv_file
21
23
  from cosmotech.coal.store.store import Store
22
24
  from cosmotech.coal.utils.logger import LOGGER
23
- from cosmotech.orchestrator.utils.translate import T
24
25
 
25
26
 
26
27
  def _get_data(table_name: str, output_directory: str, cursor) -> None:
@@ -12,31 +12,34 @@ This module provides functions for working with the Store,
12
12
  including loading and converting data.
13
13
  """
14
14
 
15
- # Re-export the Store class
16
- from cosmotech.coal.store.store import Store
17
-
18
15
  # Re-export functions from the csv module
19
16
  from cosmotech.coal.store.csv import (
20
- store_csv_file,
21
17
  convert_store_table_to_csv,
18
+ store_csv_file,
22
19
  )
23
20
 
24
21
  # Re-export functions from the native_python module
25
22
  from cosmotech.coal.store.native_python import (
26
- store_pylist,
27
23
  convert_table_as_pylist,
24
+ store_pylist,
28
25
  )
29
-
30
- # Re-export functions from the pandas module (if available)
31
-
32
26
  from cosmotech.coal.store.pandas import (
33
- store_dataframe,
34
27
  convert_store_table_to_dataframe as convert_store_table_to_pandas_dataframe,
35
28
  )
36
-
37
- # Re-export functions from the pyarrow module (if available)
38
-
29
+ from cosmotech.coal.store.pandas import (
30
+ store_dataframe,
31
+ )
39
32
  from cosmotech.coal.store.pyarrow import (
40
- store_table,
41
33
  convert_store_table_to_dataframe as convert_store_table_to_pyarrow_table,
42
34
  )
35
+ from cosmotech.coal.store.pyarrow import (
36
+ store_table,
37
+ )
38
+
39
+ # Re-export the Store class
40
+ from cosmotech.coal.store.store import Store
41
+
42
+ # Re-export functions from the pandas module (if available)
43
+
44
+
45
+ # Re-export functions from the pyarrow module (if available)
File without changes
@@ -0,0 +1,74 @@
1
+ from io import BytesIO
2
+ from typing import Optional
3
+
4
+ import pyarrow.csv as pc
5
+ import pyarrow.parquet as pq
6
+ from cosmotech.orchestrator.utils.translate import T
7
+
8
+ from cosmotech.coal.aws import S3
9
+ from cosmotech.coal.store.output.channel_interface import (
10
+ ChannelInterface,
11
+ MissingChannelConfigError,
12
+ )
13
+ from cosmotech.coal.store.store import Store
14
+ from cosmotech.coal.utils.configuration import Configuration, Dotdict
15
+ from cosmotech.coal.utils.logger import LOGGER
16
+
17
+
18
+ class AwsChannel(ChannelInterface):
19
+ required_keys = {
20
+ "coal": ["store"],
21
+ "s3": ["access_key_id", "endpoint_url", "secret_access_key"],
22
+ }
23
+ requirement_string = required_keys
24
+
25
+ def __init__(self, dct: Dotdict = None):
26
+ super().__init__(dct)
27
+ self._s3 = S3(self.configuration)
28
+
29
+ def send(self, filter: Optional[list[str]] = None) -> bool:
30
+
31
+ _s = Store(configuration=self.configuration)
32
+
33
+ if self._s3.output_type not in ("sqlite", "csv", "parquet"):
34
+ LOGGER.error(T("coal.common.errors.data_invalid_output_type").format(output_type=self._s3.output_type))
35
+ raise ValueError(T("coal.common.errors.data_invalid_output_type").format(output_type=self._s3.output_type))
36
+
37
+ if self._s3.output_type == "sqlite":
38
+ _file_path = _s._database_path
39
+ _file_name = "db.sqlite"
40
+ _uploaded_file_name = self.configuration.s3.bucket_prefix + _file_name
41
+ LOGGER.info(
42
+ T("coal.common.data_transfer.file_sent").format(file_path=_file_path, uploaded_name=_uploaded_file_name)
43
+ )
44
+ self._s3.upload_file(_file_path, _uploaded_file_name)
45
+ else:
46
+ tables = list(_s.list_tables())
47
+ if filter:
48
+ tables = [t for t in tables if t in filter]
49
+
50
+ for table_name in tables:
51
+ _data_stream = BytesIO()
52
+ _file_name = None
53
+ _data = _s.get_table(table_name)
54
+ if not len(_data):
55
+ LOGGER.info(T("coal.common.data_transfer.table_empty").format(table_name=table_name))
56
+ continue
57
+ if self._s3.output_type == "csv":
58
+ _file_name = table_name + ".csv"
59
+ pc.write_csv(_data, _data_stream)
60
+ elif self._s3.output_type == "parquet":
61
+ _file_name = table_name + ".parquet"
62
+ pq.write_table(_data, _data_stream)
63
+ LOGGER.info(
64
+ T("coal.common.data_transfer.sending_table").format(
65
+ table_name=table_name, output_type=self._s3.output_type
66
+ )
67
+ )
68
+ self._s3.upload_data_stream(
69
+ data_stream=_data_stream,
70
+ file_name=_file_name,
71
+ )
72
+
73
+ def delete(self):
74
+ self._s3.delete_objects()
@@ -0,0 +1,33 @@
1
+ from typing import Optional
2
+
3
+ from cosmotech.coal.azure.blob import dump_store_to_azure
4
+ from cosmotech.coal.store.output.channel_interface import (
5
+ ChannelInterface,
6
+ MissingChannelConfigError,
7
+ )
8
+ from cosmotech.coal.utils.configuration import Configuration, Dotdict
9
+
10
+
11
+ class AzureStorageChannel(ChannelInterface):
12
+ required_keys = {
13
+ "coal": ["store"],
14
+ "azure": [
15
+ "account_name",
16
+ "container_name",
17
+ "tenant_id",
18
+ "client_id",
19
+ "client_secret",
20
+ "output_type",
21
+ "file_prefix",
22
+ ],
23
+ }
24
+ requirement_string = required_keys
25
+
26
+ def send(self, filter: Optional[list[str]] = None) -> bool:
27
+ dump_store_to_azure(
28
+ self.configuration,
29
+ selected_tables=filter,
30
+ )
31
+
32
+ def delete(self):
33
+ pass
@@ -0,0 +1,38 @@
1
+ from typing import Optional
2
+
3
+ from cosmotech.orchestrator.utils.translate import T
4
+
5
+ from cosmotech.coal.utils.configuration import Configuration, Dotdict
6
+
7
+
8
+ class ChannelInterface:
9
+ required_keys = {}
10
+ requirement_string: str = T("coal.store.output.data_interface.requirements")
11
+
12
+ def __init__(self, dct: Dotdict = None):
13
+ self.configuration = Configuration(dct)
14
+ if not self.is_available():
15
+ raise MissingChannelConfigError(self)
16
+
17
+ def send(self, filter: Optional[list[str]] = None) -> bool:
18
+ raise NotImplementedError()
19
+
20
+ def delete(self) -> bool:
21
+ raise NotImplementedError()
22
+
23
+ def is_available(self) -> bool:
24
+ try:
25
+ return all(
26
+ all(key in self.configuration[section] for key in self.required_keys[section])
27
+ for section in self.required_keys.keys()
28
+ )
29
+ except KeyError:
30
+ return False
31
+
32
+
33
+ class MissingChannelConfigError(Exception):
34
+ def __init__(self, interface_class):
35
+ self.message = T("coal.store.output.split.requirements").format(
36
+ interface_name=interface_class.__class__.__name__, requirements=interface_class.requirement_string
37
+ )
38
+ super().__init__(self.message)
@@ -0,0 +1,61 @@
1
+ from typing import Optional
2
+
3
+ from cosmotech.orchestrator.utils.translate import T
4
+
5
+ from cosmotech.coal.store.output.aws_channel import AwsChannel
6
+ from cosmotech.coal.store.output.az_storage_channel import AzureStorageChannel
7
+ from cosmotech.coal.store.output.channel_interface import ChannelInterface
8
+ from cosmotech.coal.store.output.postgres_channel import PostgresChannel
9
+ from cosmotech.coal.utils.configuration import Dotdict
10
+ from cosmotech.coal.utils.logger import LOGGER
11
+
12
+
13
+ class ChannelSpliter(ChannelInterface):
14
+ requirement_string: str = "(Requires any working interface)"
15
+ targets = list()
16
+ available_interfaces: dict[str, ChannelInterface] = {
17
+ "s3": AwsChannel,
18
+ "az_storage": AzureStorageChannel,
19
+ "postgres": PostgresChannel,
20
+ }
21
+
22
+ def __init__(self, dct: Dotdict = None):
23
+ super().__init__(dct)
24
+ self.targets = list()
25
+ if "outputs" not in self.configuration:
26
+ raise AttributeError(T("coal.store.output.split.no_targets"))
27
+ for output in self.configuration.outputs:
28
+ channel = self.available_interfaces[output.type]
29
+ _i = channel(output.conf)
30
+ if _i.is_available():
31
+ self.targets.append(_i)
32
+ else:
33
+ LOGGER.warning(
34
+ T("coal.store.output.split.requirements").format(
35
+ interface_name=channel.__class__.__name__, requirements=channel.requirement_string
36
+ )
37
+ )
38
+ if not self.targets:
39
+ raise AttributeError(T("coal.store.output.split.no_targets"))
40
+
41
+ def send(self, filter: Optional[list[str]] = None) -> bool:
42
+ any_ok = False
43
+ for i in self.targets:
44
+ try:
45
+ any_ok = i.send(filter=filter) or any_ok
46
+ except Exception:
47
+ LOGGER.error(T("coal.store.output.split.send.error").format(interface_name=i.__class__.__name__))
48
+ if len(self.targets) < 2:
49
+ raise
50
+ return any_ok
51
+
52
+ def delete(self, filter: Optional[list[str]] = None) -> bool:
53
+ any_ok = False
54
+ for i in self.targets:
55
+ try:
56
+ any_ok = i.delete() or any_ok
57
+ except Exception:
58
+ LOGGER.error(T("coal.store.output.split.delete.error").format(interface_name=i.__class__.__name__))
59
+ if len(self.targets) < 2:
60
+ raise
61
+ return any_ok
@@ -0,0 +1,37 @@
1
+ from typing import Optional
2
+
3
+ from cosmotech.coal.postgresql.runner import (
4
+ remove_runner_metadata_from_postgresql,
5
+ send_runner_metadata_to_postgresql,
6
+ )
7
+ from cosmotech.coal.postgresql.store import dump_store_to_postgresql_from_conf
8
+ from cosmotech.coal.store.output.channel_interface import ChannelInterface
9
+
10
+
11
+ class PostgresChannel(ChannelInterface):
12
+ required_keys = {
13
+ "coal": ["store"],
14
+ "cosmotech": ["organization_id", "workspace_id", "runner_id"],
15
+ "postgres": [
16
+ "host",
17
+ "port",
18
+ "db_name",
19
+ "db_schema",
20
+ "user_name",
21
+ "user_password",
22
+ ],
23
+ }
24
+ requirement_string = required_keys
25
+
26
+ def send(self, filter: Optional[list[str]] = None) -> bool:
27
+ run_id = send_runner_metadata_to_postgresql(self.configuration)
28
+ dump_store_to_postgresql_from_conf(
29
+ configuration=self.configuration,
30
+ selected_tables=filter,
31
+ fk_id=run_id,
32
+ replace=False,
33
+ )
34
+
35
+ def delete(self):
36
+ # removing metadata will trigger cascade delete on real data
37
+ remove_runner_metadata_from_postgresql(self.configuration)