cosmotech-acceleration-library 2.0.0__py3-none-any.whl → 2.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/azure/__init__.py +5 -5
  3. cosmotech/coal/azure/adx/__init__.py +24 -10
  4. cosmotech/coal/azure/adx/ingestion.py +10 -14
  5. cosmotech/coal/azure/adx/query.py +1 -1
  6. cosmotech/coal/azure/adx/utils.py +2 -2
  7. cosmotech/coal/azure/blob.py +14 -20
  8. cosmotech/coal/cosmotech_api/apis/dataset.py +135 -16
  9. cosmotech/coal/cosmotech_api/apis/runner.py +23 -19
  10. cosmotech/coal/postgresql/runner.py +8 -11
  11. cosmotech/coal/postgresql/store.py +20 -25
  12. cosmotech/coal/postgresql/utils.py +2 -1
  13. cosmotech/coal/singlestore/store.py +3 -2
  14. cosmotech/coal/store/__init__.py +16 -13
  15. cosmotech/coal/store/output/aws_channel.py +12 -11
  16. cosmotech/coal/store/output/az_storage_channel.py +9 -18
  17. cosmotech/coal/store/output/channel_interface.py +15 -0
  18. cosmotech/coal/store/output/channel_spliter.py +11 -5
  19. cosmotech/coal/store/output/postgres_channel.py +7 -10
  20. cosmotech/coal/store/pandas.py +1 -1
  21. cosmotech/coal/store/pyarrow.py +2 -2
  22. cosmotech/coal/store/store.py +4 -7
  23. cosmotech/coal/utils/configuration.py +76 -48
  24. cosmotech/csm_data/commands/adx_send_data.py +1 -1
  25. cosmotech/csm_data/commands/adx_send_runnerdata.py +3 -2
  26. cosmotech/csm_data/commands/api/run_load_data.py +10 -8
  27. cosmotech/csm_data/commands/az_storage_upload.py +3 -2
  28. cosmotech/csm_data/commands/store/dump_to_azure.py +3 -2
  29. cosmotech/csm_data/commands/store/dump_to_postgresql.py +3 -2
  30. cosmotech/csm_data/commands/store/list_tables.py +3 -2
  31. cosmotech/csm_data/commands/store/load_csv_folder.py +10 -4
  32. cosmotech/csm_data/commands/store/load_from_singlestore.py +3 -2
  33. cosmotech/csm_data/commands/store/reset.py +8 -3
  34. cosmotech/csm_data/main.py +4 -4
  35. cosmotech/csm_data/utils/decorators.py +4 -3
  36. cosmotech/translation/coal/en-US/coal/services/dataset.yml +6 -0
  37. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/METADATA +26 -27
  38. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/RECORD +42 -42
  39. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/WHEEL +1 -1
  40. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/entry_points.txt +0 -0
  41. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/licenses/LICENSE +0 -0
  42. {cosmotech_acceleration_library-2.0.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -5,4 +5,4 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- __version__ = "2.0.0"
8
+ __version__ = "2.1.0-rc1"
@@ -11,13 +11,13 @@ Azure services integration module.
11
11
  This module provides functions for interacting with Azure services like Storage and ADX.
12
12
  """
13
13
 
14
+ # Re-export blob functions for easier importing
15
+ from cosmotech.coal.azure.blob import (
16
+ dump_store_to_azure,
17
+ )
18
+
14
19
  # Re-export storage functions for easier importing
15
20
  from cosmotech.coal.azure.storage import (
16
21
  upload_file,
17
22
  upload_folder,
18
23
  )
19
-
20
- # Re-export blob functions for easier importing
21
- from cosmotech.coal.azure.blob import (
22
- dump_store_to_azure,
23
- )
@@ -5,22 +5,36 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- from cosmotech.coal.azure.adx.auth import create_kusto_client, create_ingest_client, initialize_clients
9
- from cosmotech.coal.azure.adx.query import run_query, run_command_query
8
+ from cosmotech.coal.azure.adx.auth import (
9
+ create_ingest_client,
10
+ create_kusto_client,
11
+ initialize_clients,
12
+ )
10
13
  from cosmotech.coal.azure.adx.ingestion import (
11
- ingest_dataframe,
12
- send_to_adx,
14
+ IngestionStatus,
13
15
  check_ingestion_status,
14
- monitor_ingestion,
15
16
  handle_failures,
16
- IngestionStatus,
17
+ ingest_dataframe,
18
+ monitor_ingestion,
19
+ send_to_adx,
17
20
  )
18
- from cosmotech.coal.azure.adx.tables import table_exists, create_table, check_and_create_table, _drop_by_tag
19
- from cosmotech.coal.azure.adx.utils import type_mapping, create_column_mapping
20
- from cosmotech.coal.azure.adx.store import send_pyarrow_table_to_adx, send_table_data, process_tables, send_store_to_adx
21
+ from cosmotech.coal.azure.adx.query import run_command_query, run_query
21
22
  from cosmotech.coal.azure.adx.runner import (
22
- prepare_csv_content,
23
23
  construct_create_query,
24
24
  insert_csv_files,
25
+ prepare_csv_content,
25
26
  send_runner_data,
26
27
  )
28
+ from cosmotech.coal.azure.adx.store import (
29
+ process_tables,
30
+ send_pyarrow_table_to_adx,
31
+ send_store_to_adx,
32
+ send_table_data,
33
+ )
34
+ from cosmotech.coal.azure.adx.tables import (
35
+ _drop_by_tag,
36
+ check_and_create_table,
37
+ create_table,
38
+ table_exists,
39
+ )
40
+ from cosmotech.coal.azure.adx.utils import create_column_mapping, type_mapping
@@ -5,28 +5,24 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
+ import os
9
+ import time
8
10
  from enum import Enum
9
- from typing import Dict
10
- from typing import Iterator
11
- from typing import List
12
- from typing import Optional
13
- from typing import Tuple
11
+ from typing import Dict, Iterator, List, Optional, Tuple
14
12
 
15
- import os
16
13
  import pandas as pd
17
- import time
18
14
  import tqdm
19
15
  from azure.kusto.data import KustoClient
20
16
  from azure.kusto.data.data_format import DataFormat
21
- from azure.kusto.ingest import IngestionProperties
22
- from azure.kusto.ingest import QueuedIngestClient
23
- from azure.kusto.ingest import ReportLevel
24
- from azure.kusto.ingest.status import FailureMessage
25
- from azure.kusto.ingest.status import KustoIngestStatusQueues
26
- from azure.kusto.ingest.status import SuccessMessage
17
+ from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
18
+ from azure.kusto.ingest.status import (
19
+ FailureMessage,
20
+ KustoIngestStatusQueues,
21
+ SuccessMessage,
22
+ )
27
23
  from cosmotech.orchestrator.utils.translate import T
28
24
 
29
- from cosmotech.coal.azure.adx.tables import create_table, _drop_by_tag
25
+ from cosmotech.coal.azure.adx.tables import _drop_by_tag, create_table
30
26
  from cosmotech.coal.azure.adx.utils import type_mapping
31
27
  from cosmotech.coal.utils.logger import LOGGER
32
28
 
@@ -7,9 +7,9 @@
7
7
 
8
8
  from azure.kusto.data import KustoClient
9
9
  from azure.kusto.data.response import KustoResponseDataSet
10
+ from cosmotech.orchestrator.utils.translate import T
10
11
 
11
12
  from cosmotech.coal.utils.logger import LOGGER
12
- from cosmotech.orchestrator.utils.translate import T
13
13
 
14
14
 
15
15
  def run_query(client: KustoClient, database: str, query: str) -> KustoResponseDataSet:
@@ -5,13 +5,13 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- import dateutil.parser
9
8
  from typing import Any, Dict
10
9
 
10
+ import dateutil.parser
11
11
  import pyarrow
12
+ from cosmotech.orchestrator.utils.translate import T
12
13
 
13
14
  from cosmotech.coal.utils.logger import LOGGER
14
- from cosmotech.orchestrator.utils.translate import T
15
15
 
16
16
 
17
17
  def create_column_mapping(data: pyarrow.Table) -> Dict[str, str]:
@@ -21,6 +21,7 @@ from azure.storage.blob import BlobServiceClient
21
21
  from cosmotech.orchestrator.utils.translate import T
22
22
 
23
23
  from cosmotech.coal.store.store import Store
24
+ from cosmotech.coal.utils.configuration import Configuration
24
25
  from cosmotech.coal.utils.logger import LOGGER
25
26
 
26
27
  VALID_TYPES = (
@@ -31,42 +32,35 @@ VALID_TYPES = (
31
32
 
32
33
 
33
34
  def dump_store_to_azure(
34
- store_folder: str,
35
- account_name: str,
36
- container_name: str,
37
- tenant_id: str,
38
- client_id: str,
39
- client_secret: str,
40
- output_type: str = "sqlite",
41
- file_prefix: str = "",
35
+ configuration: Configuration = Configuration(),
42
36
  selected_tables: list[str] = [],
43
37
  ) -> None:
44
38
  """
45
39
  Dump Store data to Azure Blob Storage.
46
40
 
47
41
  Args:
48
- store_folder: Folder containing the Store
49
- account_name: Azure Storage account name
50
- container_name: Azure Storage container name
51
- tenant_id: Azure tenant ID
52
- client_id: Azure client ID
53
- client_secret: Azure client secret
54
- output_type: Output file type (sqlite, csv, or parquet)
55
- file_prefix: Prefix for uploaded files
42
+ configuration: Configuration utils class
43
+ selected_tables: List of tables name
56
44
 
57
45
  Raises:
58
46
  ValueError: If the output type is invalid
59
47
  """
60
- _s = Store(store_location=store_folder)
48
+ _s = Store(configuration=configuration)
49
+ output_type = configuration.safe_get("azure.output_type", default="sqlite")
50
+ file_prefix = configuration.safe_get("azure.file_prefix", default="")
61
51
 
62
52
  if output_type not in VALID_TYPES:
63
53
  LOGGER.error(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
64
54
  raise ValueError(T("coal.common.validation.invalid_output_type").format(output_type=output_type))
65
55
 
66
56
  container_client = BlobServiceClient(
67
- account_url=f"https://{account_name}.blob.core.windows.net/",
68
- credential=ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret),
69
- ).get_container_client(container_name)
57
+ account_url=f"https://{configuration.azure.account_name}.blob.core.windows.net/",
58
+ credential=ClientSecretCredential(
59
+ tenant_id=configuration.azure.tenant_id,
60
+ client_id=configuration.azure.client_id,
61
+ client_secret=configuration.azure.client_secret,
62
+ ),
63
+ ).get_container_client(configuration.azure.container_name)
70
64
 
71
65
  def data_upload(data_stream: BytesIO, file_name: str):
72
66
  uploaded_file_name = file_prefix + file_name
@@ -35,30 +35,49 @@ class DatasetApi(BaseDatasetApi, Connection):
35
35
  LOGGER.debug(T("coal.cosmotech_api.initialization.dataset_api_initialized"))
36
36
 
37
37
  def download_dataset(self, dataset_id) -> Dataset:
38
+ LOGGER.debug(f"Downloading dataset {dataset_id}")
38
39
  dataset = self.get_dataset(
39
40
  organization_id=self.configuration.cosmotech.organization_id,
40
41
  workspace_id=self.configuration.cosmotech.workspace_id,
41
42
  dataset_id=dataset_id,
42
43
  )
44
+ # send dataset files under dataset id folder
45
+ destination = Path(self.configuration.cosmotech.dataset_absolute_path) / dataset_id
46
+ for part in dataset.parts:
47
+ self._download_part(dataset_id, part, destination)
48
+ return dataset
43
49
 
44
- dataset_dir = self.configuration.cosmotech.dataset_absolute_path
45
- dataset_dir_path = Path(dataset_dir) / dataset_id
50
+ def download_parameter(self, dataset_id) -> Dataset:
51
+ LOGGER.debug(f"Downloading dataset {dataset_id}")
52
+ dataset = self.get_dataset(
53
+ organization_id=self.configuration.cosmotech.organization_id,
54
+ workspace_id=self.configuration.cosmotech.workspace_id,
55
+ dataset_id=dataset_id,
56
+ )
57
+ # send parameters file under parameters_name folder
58
+ destination = Path(self.configuration.cosmotech.parameters_absolute_path) / dataset_id
46
59
  for part in dataset.parts:
47
- part_file_path = dataset_dir_path / part.source_name
48
- part_file_path.parent.mkdir(parents=True, exist_ok=True)
49
- data_part = self.download_dataset_part(
50
- organization_id=self.configuration.cosmotech.organization_id,
51
- workspace_id=self.configuration.cosmotech.workspace_id,
52
- dataset_id=dataset_id,
53
- dataset_part_id=part.id,
54
- )
55
- with open(part_file_path, "wb") as binary_file:
56
- binary_file.write(data_part)
57
- LOGGER.debug(
58
- T("coal.services.dataset.part_downloaded").format(part_name=part.source_name, file_path=part_file_path)
59
- )
60
+ part_dst = destination / part.name
61
+ self._download_part(dataset_id, part, part_dst)
60
62
  return dataset
61
63
 
64
+ def _download_part(self, dataset_id, dataset_part, destination):
65
+ part_file_path = destination / dataset_part.source_name
66
+ part_file_path.parent.mkdir(parents=True, exist_ok=True)
67
+ data_part = self.download_dataset_part(
68
+ organization_id=self.configuration.cosmotech.organization_id,
69
+ workspace_id=self.configuration.cosmotech.workspace_id,
70
+ dataset_id=dataset_id,
71
+ dataset_part_id=dataset_part.id,
72
+ )
73
+ with open(part_file_path, "wb") as binary_file:
74
+ binary_file.write(data_part)
75
+ LOGGER.debug(
76
+ T("coal.services.dataset.part_downloaded").format(
77
+ part_name=dataset_part.source_name, file_path=part_file_path
78
+ )
79
+ )
80
+
62
81
  @staticmethod
63
82
  def path_to_parts(_path, part_type) -> list[tuple[str, Path, DatasetPartTypeEnum]]:
64
83
  if (_path := Path(_path)).is_dir():
@@ -70,7 +89,21 @@ class DatasetApi(BaseDatasetApi, Connection):
70
89
  dataset_name: str,
71
90
  as_files: Optional[list[Union[Path, str]]] = (),
72
91
  as_db: Optional[list[Union[Path, str]]] = (),
92
+ tags: Optional[list[str]] = None,
93
+ additional_data: Optional[dict] = None,
73
94
  ) -> Dataset:
95
+ """Upload a new dataset with optional tags and additional data.
96
+
97
+ Args:
98
+ dataset_name: The name of the dataset to create
99
+ as_files: List of file paths to upload as FILE type parts
100
+ as_db: List of file paths to upload as DB type parts
101
+ tags: Optional list of tags to associate with the dataset
102
+ additional_data: Optional dictionary of additional metadata
103
+
104
+ Returns:
105
+ The created Dataset object
106
+ """
74
107
  _parts = list()
75
108
 
76
109
  for _f in as_files:
@@ -81,6 +114,8 @@ class DatasetApi(BaseDatasetApi, Connection):
81
114
 
82
115
  d_request = DatasetCreateRequest(
83
116
  name=dataset_name,
117
+ tags=tags,
118
+ additional_data=additional_data,
84
119
  parts=list(
85
120
  DatasetPartCreateRequest(
86
121
  name=_p_name,
@@ -92,12 +127,96 @@ class DatasetApi(BaseDatasetApi, Connection):
92
127
  ),
93
128
  )
94
129
 
130
+ _files = []
131
+ for _p in _parts:
132
+ with _p[1].open("rb") as _p_file:
133
+ _files.append((_p[0], _p_file.read()))
134
+
95
135
  d_ret = self.create_dataset(
96
136
  self.configuration.cosmotech.organization_id,
97
137
  self.configuration.cosmotech.workspace_id,
98
138
  d_request,
99
- files=list((_p[0], _p[1].open("rb").read()) for _p in _parts),
139
+ files=_files,
100
140
  )
101
141
 
102
142
  LOGGER.info(T("coal.services.dataset.dataset_created").format(dataset_id=d_ret.id))
103
143
  return d_ret
144
+
145
+ def upload_dataset_parts(
146
+ self,
147
+ dataset_id: str,
148
+ as_files: Optional[list[Union[Path, str]]] = (),
149
+ as_db: Optional[list[Union[Path, str]]] = (),
150
+ replace_existing: bool = False,
151
+ ) -> Dataset:
152
+ """Upload parts to an existing dataset.
153
+
154
+ Args:
155
+ dataset_id: The ID of the existing dataset
156
+ as_files: List of file paths to upload as FILE type parts
157
+ as_db: List of file paths to upload as DB type parts
158
+ replace_existing: If True, replace existing parts with same name
159
+
160
+ Returns:
161
+ The updated Dataset object
162
+ """
163
+ # Get current dataset to check existing parts
164
+ current_dataset = self.get_dataset(
165
+ organization_id=self.configuration.cosmotech.organization_id,
166
+ workspace_id=self.configuration.cosmotech.workspace_id,
167
+ dataset_id=dataset_id,
168
+ )
169
+
170
+ # Build set of existing part names and their IDs for quick lookup
171
+ existing_parts = {part.source_name: part.id for part in (current_dataset.parts or [])}
172
+
173
+ # Collect parts to upload
174
+ _parts = list()
175
+ for _f in as_files:
176
+ _parts.extend(self.path_to_parts(_f, DatasetPartTypeEnum.FILE))
177
+ for _db in as_db:
178
+ _parts.extend(self.path_to_parts(_db, DatasetPartTypeEnum.DB))
179
+
180
+ # Process each part
181
+ for _p_name, _p_path, _type in _parts:
182
+ if _p_name in existing_parts:
183
+ if replace_existing:
184
+ # Delete existing part before creating new one
185
+ self.delete_dataset_part(
186
+ organization_id=self.configuration.cosmotech.organization_id,
187
+ workspace_id=self.configuration.cosmotech.workspace_id,
188
+ dataset_id=dataset_id,
189
+ dataset_part_id=existing_parts[_p_name],
190
+ )
191
+ LOGGER.info(T("coal.services.dataset.part_replaced").format(part_name=_p_name))
192
+ else:
193
+ LOGGER.warning(T("coal.services.dataset.part_skipped").format(part_name=_p_name))
194
+ continue
195
+
196
+ # Create new part
197
+ part_request = DatasetPartCreateRequest(
198
+ name=_p_name,
199
+ description=_p_name,
200
+ sourceName=_p_name,
201
+ type=_type,
202
+ )
203
+
204
+ with _p_path.open("rb") as _p_file:
205
+ self.create_dataset_part(
206
+ organization_id=self.configuration.cosmotech.organization_id,
207
+ workspace_id=self.configuration.cosmotech.workspace_id,
208
+ dataset_id=dataset_id,
209
+ dataset_part_create_request=part_request,
210
+ file=(_p_name, _p_file.read()),
211
+ )
212
+ LOGGER.debug(T("coal.services.dataset.part_uploaded").format(part_name=_p_name))
213
+
214
+ # Return updated dataset
215
+ updated_dataset = self.get_dataset(
216
+ organization_id=self.configuration.cosmotech.organization_id,
217
+ workspace_id=self.configuration.cosmotech.workspace_id,
218
+ dataset_id=dataset_id,
219
+ )
220
+
221
+ LOGGER.info(T("coal.services.dataset.parts_uploaded").format(dataset_id=dataset_id))
222
+ return updated_dataset
@@ -29,43 +29,47 @@ class RunnerApi(BaseRunnerApi, Connection):
29
29
 
30
30
  def get_runner_metadata(
31
31
  self,
32
- organization_id: str,
33
- workspace_id: str,
34
- runner_id: str,
32
+ runner_id: Optional[str] = None,
35
33
  include: Optional[list[str]] = None,
36
34
  exclude: Optional[list[str]] = None,
37
35
  ) -> dict[str, Any]:
38
- runner = self.get_runner(organization_id, workspace_id, runner_id)
36
+ runner = self.get_runner(
37
+ self.configuration.cosmotech.organization_id,
38
+ self.configuration.cosmotech.workspace_id,
39
+ runner_id or self.configuration.cosmotech.runner_id,
40
+ )
39
41
 
40
42
  return runner.model_dump(by_alias=True, exclude_none=True, include=include, exclude=exclude, mode="json")
41
43
 
42
44
  def download_runner_data(
43
45
  self,
44
- organization_id: str,
45
- workspace_id: str,
46
- runner_id: str,
47
- parameter_folder: str,
48
- dataset_folder: Optional[str] = None,
46
+ download_datasets: Optional[str] = None,
49
47
  ):
50
48
  LOGGER.info(T("coal.cosmotech_api.runner.starting_download"))
51
49
 
52
50
  # Get runner data
53
- runner_data = self.get_runner(organization_id, workspace_id, runner_id)
51
+ runner = self.get_runner(
52
+ self.configuration.cosmotech.organization_id,
53
+ self.configuration.cosmotech.workspace_id,
54
+ self.configuration.cosmotech.runner_id,
55
+ )
54
56
 
55
57
  # Skip if no parameters found
56
- if not runner_data.parameters_values:
58
+ if not runner.parameters_values:
57
59
  LOGGER.warning(T("coal.cosmotech_api.runner.no_parameters"))
58
60
  else:
59
61
  LOGGER.info(T("coal.cosmotech_api.runner.loaded_data"))
60
- parameters = Parameters(runner_data)
61
- parameters.write_parameters_to_json(parameter_folder)
62
+ parameters = Parameters(runner)
63
+ parameters.write_parameters_to_json(self.configuration.cosmotech.parameters_absolute_path)
62
64
 
63
- # Download datasets if requested
64
- if dataset_folder:
65
- datasets_ids = runner_data.datasets.bases
65
+ if runner.datasets.parameter:
66
+ ds_api = DatasetApi(self.configuration)
67
+ ds_api.download_parameter(runner.datasets.parameter)
66
68
 
67
- if datasets_ids:
68
- LOGGER.info(T("coal.cosmotech_api.runner.downloading_datasets").format(count=len(datasets_ids)))
69
+ # Download datasets if requested
70
+ if download_datasets:
71
+ LOGGER.info(T("coal.cosmotech_api.runner.downloading_datasets").format(count=len(runner.datasets.bases)))
72
+ if runner.datasets.bases:
69
73
  ds_api = DatasetApi(self.configuration)
70
- for dataset_id in datasets_ids:
74
+ for dataset_id in runner.datasets.bases:
71
75
  ds_api.download_dataset(dataset_id)
@@ -38,8 +38,6 @@ def send_runner_metadata_to_postgresql(
38
38
  # Get runner metadata
39
39
  _runner_api = RunnerApi(configuration)
40
40
  runner = _runner_api.get_runner_metadata(
41
- configuration.cosmotech.organization_id,
42
- configuration.cosmotech.workspace_id,
43
41
  configuration.cosmotech.runner_id,
44
42
  )
45
43
 
@@ -51,7 +49,7 @@ def send_runner_metadata_to_postgresql(
51
49
  CREATE TABLE IF NOT EXISTS {schema_table} (
52
50
  id varchar(32) PRIMARY KEY,
53
51
  name varchar(256),
54
- last_run_id varchar(32),
52
+ last_csm_run_id varchar(32),
55
53
  run_template_id varchar(32)
56
54
  );
57
55
  """
@@ -60,24 +58,25 @@ def send_runner_metadata_to_postgresql(
60
58
  conn.commit()
61
59
  LOGGER.info(T("coal.services.postgresql.metadata"))
62
60
  sql_upsert = f"""
63
- INSERT INTO {schema_table} (id, name, last_run_id, run_template_id)
64
- VALUES(%s, %s, %s, %s)
61
+ INSERT INTO {schema_table} (id, name, last_csm_run_id, run_template_id)
62
+ VALUES ($1, $2, $3, $4)
65
63
  ON CONFLICT (id)
66
64
  DO
67
- UPDATE SET name = EXCLUDED.name, last_run_id = EXCLUDED.last_run_id;
65
+ UPDATE SET name = EXCLUDED.name, last_csm_run_id = EXCLUDED.last_csm_run_id;
68
66
  """
67
+ LOGGER.debug(runner)
69
68
  curs.execute(
70
69
  sql_upsert,
71
70
  (
72
71
  runner.get("id"),
73
72
  runner.get("name"),
74
- runner.get("lastRunId"),
73
+ runner.get("lastRunInfo").get("lastRunId"),
75
74
  runner.get("runTemplateId"),
76
75
  ),
77
76
  )
78
77
  conn.commit()
79
78
  LOGGER.info(T("coal.services.postgresql.metadata_updated"))
80
- return runner.get("lastRunId")
79
+ return runner.get("lastRunInfo").get("lastRunId")
81
80
 
82
81
 
83
82
  def remove_runner_metadata_from_postgresql(
@@ -97,8 +96,6 @@ def remove_runner_metadata_from_postgresql(
97
96
  # Get runner metadata
98
97
  _runner_api = RunnerApi(configuration)
99
98
  runner = _runner_api.get_runner_metadata(
100
- configuration.cosmotech.organization_id,
101
- configuration.cosmotech.workspace_id,
102
99
  configuration.cosmotech.runner_id,
103
100
  )
104
101
 
@@ -108,7 +105,7 @@ def remove_runner_metadata_from_postgresql(
108
105
  schema_table = f"{_psql.db_schema}.{_psql.table_prefix}RunnerMetadata"
109
106
  sql_delete_from_metatable = f"""
110
107
  DELETE FROM {schema_table}
111
- WHERE last_run_id={runner.get("lastRunId")};
108
+ WHERE last_csm_run_id={runner.get("lastRunId")};
112
109
  """
113
110
  curs.execute(sql_delete_from_metatable)
114
111
  conn.commit()
@@ -53,24 +53,27 @@ def dump_store_to_postgresql(
53
53
  selected_tables: list of tables to send
54
54
  fk_id: foreign key id to add to all table on all rows
55
55
  """
56
- _c = Configuration()
57
- _c.postgres.host = postgres_host
58
- _c.postgres.port = postgres_port
59
- _c.postgres.db_name = postgres_db
60
- _c.postgres.db_schema = postgres_schema
61
- _c.postgres.user_name = postgres_user
62
- _c.postgres.user_password = postgres_password
63
- _c.postgres.password_encoding = force_encode
64
- _c.postgres.table_prefix = table_prefix
65
-
66
- dump_store_to_postgresql_from_conf(
67
- configuration=_c, store_folder=store_folder, replace=replace, selected_tables=selected_tables, fk_id=fk_id
56
+ _c = Configuration(
57
+ {
58
+ "coal": {"store": store_folder},
59
+ "postgres": {
60
+ "host": postgres_host,
61
+ "port": postgres_port,
62
+ "db_name": postgres_db,
63
+ "db_schema": postgres_schema,
64
+ "user_name": postgres_user,
65
+ "user_password": postgres_password,
66
+ "password_encoding": force_encode,
67
+ "table_prefix": table_prefix,
68
+ },
69
+ }
68
70
  )
69
71
 
72
+ dump_store_to_postgresql_from_conf(configuration=_c, replace=replace, selected_tables=selected_tables, fk_id=fk_id)
73
+
70
74
 
71
75
  def dump_store_to_postgresql_from_conf(
72
76
  configuration: Configuration,
73
- store_folder: str,
74
77
  replace: bool = True,
75
78
  selected_tables: list[str] = [],
76
79
  fk_id: str = None,
@@ -80,14 +83,12 @@ def dump_store_to_postgresql_from_conf(
80
83
 
81
84
  Args:
82
85
  configuration: coal Configuration
83
- store_folder: Folder containing the Store
84
86
  replace: Whether to replace existing tables
85
87
  selected_tables: list of tables to send
86
88
  fk_id: foreign key id to add to all table on all rows
87
89
  """
88
90
  _psql = PostgresUtils(configuration)
89
- print(_psql.send_pyarrow_table_to_postgresql)
90
- _s = Store(store_location=store_folder)
91
+ _s = Store(configuration=configuration)
91
92
 
92
93
  tables = list(_s.list_tables())
93
94
  if selected_tables:
@@ -100,18 +101,12 @@ def dump_store_to_postgresql_from_conf(
100
101
  _s_time = perf_counter()
101
102
  target_table_name = f"{_psql.table_prefix}{table_name}"
102
103
  LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
103
- if fk_id:
104
- _s.execute_query(
105
- f"""
106
- ALTER TABLE {_psql.table_prefix}{table_name}
107
- ADD run_id TEXT NOT NULL
108
- DEFAULT ({fk_id})
109
- """
110
- )
111
104
  data = _s.get_table(table_name)
112
105
  if not len(data):
113
106
  LOGGER.info(T("coal.services.database.no_rows"))
114
107
  continue
108
+ if fk_id:
109
+ data = data.append_column("csm_run_id", [[fk_id] * data.num_rows])
115
110
  _dl_time = perf_counter()
116
111
  rows = _psql.send_pyarrow_table_to_postgresql(
117
112
  data,
@@ -120,7 +115,7 @@ def dump_store_to_postgresql_from_conf(
120
115
  )
121
116
  if fk_id and _psql.is_metadata_exists():
122
117
  metadata_table = f"{_psql.table_prefix}RunnerMetadata"
123
- _psql.add_fk_constraint(table_name, "run_id", metadata_table, "last_run_id")
118
+ _psql.add_fk_constraint(table_name, "csm_run_id", metadata_table, "last_csm_run_id")
124
119
 
125
120
  total_rows += rows
126
121
  _up_time = perf_counter()
@@ -75,7 +75,8 @@ class PostgresUtils:
75
75
  f"/{self.db_name}"
76
76
  )
77
77
 
78
- def metadata_table_name(self):
78
+ @property
79
+ def metadata_table_name(self) -> str:
79
80
  return f"{self.table_prefix}RunnerMetadata"
80
81
 
81
82
  def get_postgresql_table_schema(self, target_table_name: str) -> Optional[pa.Schema]:
@@ -12,15 +12,16 @@ This module provides functions for interacting with SingleStore databases
12
12
  for store operations.
13
13
  """
14
14
 
15
+ import csv
15
16
  import pathlib
16
17
  import time
17
- import csv
18
+
18
19
  import singlestoredb as s2
20
+ from cosmotech.orchestrator.utils.translate import T
19
21
 
20
22
  from cosmotech.coal.store.csv import store_csv_file
21
23
  from cosmotech.coal.store.store import Store
22
24
  from cosmotech.coal.utils.logger import LOGGER
23
- from cosmotech.orchestrator.utils.translate import T
24
25
 
25
26
 
26
27
  def _get_data(table_name: str, output_directory: str, cursor) -> None: