cosmotech-acceleration-library 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. cosmotech/coal/__init__.py +8 -0
  2. cosmotech/coal/aws/__init__.py +23 -0
  3. cosmotech/coal/aws/s3.py +235 -0
  4. cosmotech/coal/azure/__init__.py +23 -0
  5. cosmotech/coal/azure/adx/__init__.py +26 -0
  6. cosmotech/coal/azure/adx/auth.py +125 -0
  7. cosmotech/coal/azure/adx/ingestion.py +329 -0
  8. cosmotech/coal/azure/adx/query.py +56 -0
  9. cosmotech/coal/azure/adx/runner.py +217 -0
  10. cosmotech/coal/azure/adx/store.py +255 -0
  11. cosmotech/coal/azure/adx/tables.py +118 -0
  12. cosmotech/coal/azure/adx/utils.py +71 -0
  13. cosmotech/coal/azure/blob.py +109 -0
  14. cosmotech/coal/azure/functions.py +72 -0
  15. cosmotech/coal/azure/storage.py +74 -0
  16. cosmotech/coal/cosmotech_api/__init__.py +36 -0
  17. cosmotech/coal/cosmotech_api/connection.py +96 -0
  18. cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
  19. cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
  20. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
  21. cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
  22. cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
  23. cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
  24. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
  25. cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
  26. cosmotech/coal/cosmotech_api/parameters.py +48 -0
  27. cosmotech/coal/cosmotech_api/run.py +25 -0
  28. cosmotech/coal/cosmotech_api/run_data.py +173 -0
  29. cosmotech/coal/cosmotech_api/run_template.py +108 -0
  30. cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
  31. cosmotech/coal/cosmotech_api/runner/data.py +38 -0
  32. cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
  33. cosmotech/coal/cosmotech_api/runner/download.py +146 -0
  34. cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
  35. cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
  36. cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
  37. cosmotech/coal/cosmotech_api/workspace.py +127 -0
  38. cosmotech/coal/csm/__init__.py +6 -0
  39. cosmotech/coal/csm/engine/__init__.py +47 -0
  40. cosmotech/coal/postgresql/__init__.py +22 -0
  41. cosmotech/coal/postgresql/runner.py +93 -0
  42. cosmotech/coal/postgresql/store.py +98 -0
  43. cosmotech/coal/singlestore/__init__.py +17 -0
  44. cosmotech/coal/singlestore/store.py +100 -0
  45. cosmotech/coal/store/__init__.py +42 -0
  46. cosmotech/coal/store/csv.py +44 -0
  47. cosmotech/coal/store/native_python.py +25 -0
  48. cosmotech/coal/store/pandas.py +26 -0
  49. cosmotech/coal/store/pyarrow.py +23 -0
  50. cosmotech/coal/store/store.py +79 -0
  51. cosmotech/coal/utils/__init__.py +18 -0
  52. cosmotech/coal/utils/api.py +68 -0
  53. cosmotech/coal/utils/logger.py +10 -0
  54. cosmotech/coal/utils/postgresql.py +236 -0
  55. cosmotech/csm_data/__init__.py +6 -0
  56. cosmotech/csm_data/commands/__init__.py +6 -0
  57. cosmotech/csm_data/commands/adx_send_data.py +92 -0
  58. cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
  59. cosmotech/csm_data/commands/api/__init__.py +6 -0
  60. cosmotech/csm_data/commands/api/api.py +50 -0
  61. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
  62. cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
  63. cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
  64. cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
  65. cosmotech/csm_data/commands/api/run_load_data.py +120 -0
  66. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
  67. cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
  68. cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
  69. cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
  70. cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
  71. cosmotech/csm_data/commands/az_storage_upload.py +76 -0
  72. cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
  73. cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
  74. cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
  75. cosmotech/csm_data/commands/store/__init__.py +6 -0
  76. cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
  77. cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
  78. cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
  79. cosmotech/csm_data/commands/store/list_tables.py +48 -0
  80. cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
  81. cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
  82. cosmotech/csm_data/commands/store/reset.py +31 -0
  83. cosmotech/csm_data/commands/store/store.py +37 -0
  84. cosmotech/csm_data/main.py +57 -0
  85. cosmotech/csm_data/utils/__init__.py +6 -0
  86. cosmotech/csm_data/utils/click.py +18 -0
  87. cosmotech/csm_data/utils/decorators.py +75 -0
  88. cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
  89. cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
  90. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
  91. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
  92. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
  93. cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
  94. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
  95. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
  96. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
  97. cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
  98. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
  99. cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
  100. cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
  101. cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
  102. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
  103. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
  104. cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
  105. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
  106. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
  107. cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
  108. cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
  109. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
  110. cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
  111. cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
  112. cosmotech/translation/coal/__init__.py +6 -0
  113. cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
  114. cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
  115. cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
  116. cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
  117. cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
  118. cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
  119. cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
  120. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
  121. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
  122. cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
  123. cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
  124. cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
  125. cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
  126. cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
  127. cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
  128. cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
  129. cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
  130. cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
  131. cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
  132. cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
  133. cosmotech/translation/coal/en-US/coal/web.yml +2 -0
  134. cosmotech/translation/csm_data/__init__.py +6 -0
  135. cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
  136. cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
  137. cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
  138. cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
  139. cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
  140. cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
  141. cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,79 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import os
9
+ import pathlib
10
+
11
+ import pyarrow
12
+ from adbc_driver_sqlite import dbapi
13
+
14
+ from cosmotech.coal.utils.logger import LOGGER
15
+ from cosmotech.orchestrator.utils.translate import T
16
+
17
+
18
+ class Store:
19
+ @staticmethod
20
+ def sanitize_column(column_name: str) -> str:
21
+ return column_name.replace(" ", "_")
22
+
23
+ def __init__(
24
+ self,
25
+ reset=False,
26
+ store_location: pathlib.Path = pathlib.Path(os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH", ".")),
27
+ ):
28
+ self.store_location = pathlib.Path(store_location) / ".coal/store"
29
+ self.store_location.mkdir(parents=True, exist_ok=True)
30
+ self._tables = dict()
31
+ self._database_path = self.store_location / "db.sqlite"
32
+ if reset:
33
+ self.reset()
34
+ self._database = str(self._database_path)
35
+
36
+ def reset(self):
37
+ if self._database_path.exists():
38
+ self._database_path.unlink()
39
+
40
+ def get_table(self, table_name: str) -> pyarrow.Table:
41
+ if not self.table_exists(table_name):
42
+ raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
43
+ return self.execute_query(f"select * from {table_name}")
44
+
45
+ def table_exists(self, table_name) -> bool:
46
+ return table_name in self.list_tables()
47
+
48
+ def get_table_schema(self, table_name: str) -> pyarrow.Schema:
49
+ if not self.table_exists(table_name):
50
+ raise ValueError(T("coal.errors.data.no_table").format(table_name=table_name))
51
+ with dbapi.connect(self._database) as conn:
52
+ return conn.adbc_get_table_schema(table_name)
53
+
54
+ def add_table(self, table_name: str, data=pyarrow.Table, replace: bool = False):
55
+ with dbapi.connect(self._database, autocommit=True) as conn:
56
+ with conn.cursor() as curs:
57
+ rows = curs.adbc_ingest(table_name, data, "replace" if replace else "create_append")
58
+ LOGGER.debug(T("coal.common.data_transfer.rows_inserted").format(rows=rows, table_name=table_name))
59
+
60
+ def execute_query(self, sql_query: str) -> pyarrow.Table:
61
+ batch_size = 1024
62
+ batch_size_increment = 1024
63
+ while True:
64
+ try:
65
+ with dbapi.connect(self._database, autocommit=True) as conn:
66
+ with conn.cursor() as curs:
67
+ curs.adbc_statement.set_options(**{"adbc.sqlite.query.batch_rows": str(batch_size)})
68
+ curs.execute(sql_query)
69
+ return curs.fetch_arrow_table()
70
+ except OSError:
71
+ batch_size += batch_size_increment
72
+
73
+ def list_tables(self) -> list[str]:
74
+ with dbapi.connect(self._database) as conn:
75
+ objects = conn.adbc_get_objects(depth="all").read_all()
76
+ tables = objects["catalog_db_schemas"][0][0]["db_schema_tables"]
77
+ for table in tables:
78
+ table_name: pyarrow.StringScalar = table["table_name"]
79
+ yield table_name.as_py()
@@ -0,0 +1,18 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from cosmotech.coal import __version__
9
+
10
+ WEB_DOCUMENTATION_ROOT = f"https://cosmo-tech.github.io/CosmoTech-Acceleration-Library/{__version__}/"
11
+
12
+
13
+ def strtobool(string: str) -> bool:
14
+ if string.lower() in ["y", "yes", "t", "true", "on", "1"]:
15
+ return True
16
+ if string.lower() in ["n", "no", "f", "false", "off", "0"]:
17
+ return False
18
+ raise ValueError(f'"{string} is not a recognized truth value')
@@ -0,0 +1,68 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ import json
9
+ import pathlib
10
+ from typing import Optional
11
+
12
+ import cosmotech_api
13
+ import yaml
14
+ from cosmotech_api.api.solution_api import Solution
15
+ from cosmotech_api.api.solution_api import SolutionApi
16
+ from cosmotech_api.api.workspace_api import Workspace
17
+ from cosmotech_api.api.workspace_api import WorkspaceApi
18
+ from cosmotech_api.exceptions import ServiceException
19
+
20
+ from cosmotech.coal.cosmotech_api.connection import get_api_client
21
+ from cosmotech.coal.utils.logger import LOGGER
22
+ from cosmotech.orchestrator.utils.translate import T
23
+
24
+
25
+ def read_solution_file(solution_file) -> Optional[Solution]:
26
+ solution_path = pathlib.Path(solution_file)
27
+ if solution_path.suffix in [".yaml", ".yml"]:
28
+ open_function = yaml.safe_load
29
+ elif solution_path.suffix == ".json":
30
+ open_function = json.load
31
+ else:
32
+ LOGGER.error(T("coal.cosmotech_api.solution.invalid_file").format(file=solution_file))
33
+ return None
34
+ with solution_path.open() as _sf:
35
+ solution_content = open_function(_sf)
36
+ LOGGER.info(T("coal.cosmotech_api.solution.loaded").format(path=solution_path.absolute()))
37
+ _solution = Solution(
38
+ _configuration=cosmotech_api.Configuration(),
39
+ _spec_property_naming=True,
40
+ **solution_content,
41
+ )
42
+ LOGGER.debug(
43
+ T("coal.services.api.solution_debug").format(solution=json.dumps(_solution.to_dict(), indent=2, default=str))
44
+ )
45
+ return _solution
46
+
47
+
48
+ def get_solution(organization_id, workspace_id) -> Optional[Solution]:
49
+ LOGGER.info(T("coal.cosmotech_api.solution.api_configured"))
50
+ with get_api_client()[0] as api_client:
51
+ api_w = WorkspaceApi(api_client)
52
+
53
+ LOGGER.info(T("coal.cosmotech_api.solution.loading_workspace"))
54
+ try:
55
+ r_data: Workspace = api_w.find_workspace_by_id(organization_id=organization_id, workspace_id=workspace_id)
56
+ except ServiceException as e:
57
+ LOGGER.error(
58
+ T("coal.cosmotech_api.workspace.not_found").format(
59
+ workspace_id=workspace_id, organization_id=organization_id
60
+ )
61
+ )
62
+ LOGGER.debug(e)
63
+ return None
64
+ solution_id = r_data.solution.solution_id
65
+
66
+ api_sol = SolutionApi(api_client)
67
+ sol: Solution = api_sol.find_solution_by_id(organization_id=organization_id, solution_id=solution_id)
68
+ return sol
@@ -0,0 +1,10 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from cosmotech.orchestrator.utils.logger import get_logger
9
+
10
+ LOGGER = get_logger("csm.data")
@@ -0,0 +1,236 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from typing import Optional
9
+ from urllib.parse import quote
10
+
11
+ import adbc_driver_manager
12
+ import pyarrow as pa
13
+ from adbc_driver_postgresql import dbapi
14
+ from cosmotech.orchestrator.utils.translate import T
15
+ from pyarrow import Table
16
+
17
+ from cosmotech.coal.utils.logger import LOGGER
18
+
19
+
20
+ def generate_postgresql_full_uri(
21
+ postgres_host: str,
22
+ postgres_port: str,
23
+ postgres_db: str,
24
+ postgres_user: str,
25
+ postgres_password: str,
26
+ force_encode: bool = False,
27
+ ) -> str:
28
+ # Check if password needs percent encoding (contains special characters)
29
+ # We don't log anything about the password for security
30
+ encoded_password = postgres_password
31
+ if force_encode:
32
+ encoded_password = quote(postgres_password, safe="")
33
+
34
+ return (
35
+ "postgresql://" + f"{postgres_user}"
36
+ f":{encoded_password}"
37
+ f"@{postgres_host}"
38
+ f":{postgres_port}"
39
+ f"/{postgres_db}"
40
+ )
41
+
42
+
43
+ def get_postgresql_table_schema(
44
+ target_table_name: str,
45
+ postgres_host: str,
46
+ postgres_port: str,
47
+ postgres_db: str,
48
+ postgres_schema: str,
49
+ postgres_user: str,
50
+ postgres_password: str,
51
+ force_encode: bool = False,
52
+ ) -> Optional[pa.Schema]:
53
+ """
54
+ Get the schema of an existing PostgreSQL table using SQL queries.
55
+
56
+ Args:
57
+ target_table_name: Name of the table
58
+ postgres_host: PostgreSQL host
59
+ postgres_port: PostgreSQL port
60
+ postgres_db: PostgreSQL database name
61
+ postgres_schema: PostgreSQL schema name
62
+ postgres_user: PostgreSQL username
63
+ postgres_password: PostgreSQL password
64
+
65
+ Returns:
66
+ PyArrow Schema if table exists, None otherwise
67
+ """
68
+ LOGGER.debug(
69
+ T("coal.services.postgresql.getting_schema").format(
70
+ postgres_schema=postgres_schema, target_table_name=target_table_name
71
+ )
72
+ )
73
+
74
+ postgresql_full_uri = generate_postgresql_full_uri(
75
+ postgres_host,
76
+ postgres_port,
77
+ postgres_db,
78
+ postgres_user,
79
+ postgres_password,
80
+ force_encode,
81
+ )
82
+
83
+ with dbapi.connect(postgresql_full_uri) as conn:
84
+ try:
85
+ return conn.adbc_get_table_schema(
86
+ target_table_name,
87
+ db_schema_filter=postgres_schema,
88
+ )
89
+ except adbc_driver_manager.ProgrammingError:
90
+ LOGGER.warning(
91
+ T("coal.services.postgresql.table_not_found").format(
92
+ postgres_schema=postgres_schema, target_table_name=target_table_name
93
+ )
94
+ )
95
+ return None
96
+
97
+
98
+ def adapt_table_to_schema(data: pa.Table, target_schema: pa.Schema) -> pa.Table:
99
+ """
100
+ Adapt a PyArrow table to match a target schema with detailed logging.
101
+ """
102
+ LOGGER.debug(T("coal.services.postgresql.schema_adaptation_start").format(rows=len(data)))
103
+ LOGGER.debug(T("coal.services.postgresql.original_schema").format(schema=data.schema))
104
+ LOGGER.debug(T("coal.services.postgresql.target_schema").format(schema=target_schema))
105
+
106
+ target_fields = {field.name: field.type for field in target_schema}
107
+ new_columns = []
108
+
109
+ # Track adaptations for summary
110
+ added_columns = []
111
+ dropped_columns = []
112
+ type_conversions = []
113
+ failed_conversions = []
114
+
115
+ # Process each field in target schema
116
+ for field_name, target_type in target_fields.items():
117
+ if field_name in data.column_names:
118
+ # Column exists - try to cast to target type
119
+ col = data[field_name]
120
+ original_type = col.type
121
+
122
+ if original_type != target_type:
123
+ LOGGER.debug(
124
+ T("coal.services.postgresql.casting_column").format(
125
+ field_name=field_name,
126
+ original_type=original_type,
127
+ target_type=target_type,
128
+ )
129
+ )
130
+ try:
131
+ new_col = pa.compute.cast(col, target_type)
132
+ new_columns.append(new_col)
133
+ type_conversions.append(f"{field_name}: {original_type} -> {target_type}")
134
+ except pa.ArrowInvalid as e:
135
+ LOGGER.warning(
136
+ T("coal.services.postgresql.cast_failed").format(
137
+ field_name=field_name,
138
+ original_type=original_type,
139
+ target_type=target_type,
140
+ error=str(e),
141
+ )
142
+ )
143
+ new_columns.append(pa.nulls(len(data), type=target_type))
144
+ failed_conversions.append(f"{field_name}: {original_type} -> {target_type}")
145
+ else:
146
+ new_columns.append(col)
147
+ else:
148
+ # Column doesn't exist - add nulls
149
+ LOGGER.debug(T("coal.services.postgresql.adding_missing_column").format(field_name=field_name))
150
+ new_columns.append(pa.nulls(len(data), type=target_type))
151
+ added_columns.append(field_name)
152
+
153
+ # Log columns that will be dropped
154
+ dropped_columns = [name for name in data.column_names if name not in target_fields]
155
+ if dropped_columns:
156
+ LOGGER.debug(T("coal.services.postgresql.dropping_columns").format(columns=dropped_columns))
157
+
158
+ # Create new table
159
+ adapted_table = pa.Table.from_arrays(new_columns, schema=target_schema)
160
+
161
+ # Log summary of adaptations
162
+ LOGGER.debug(T("coal.services.postgresql.adaptation_summary"))
163
+ if added_columns:
164
+ LOGGER.debug(T("coal.services.postgresql.added_columns").format(columns=added_columns))
165
+ if dropped_columns:
166
+ LOGGER.debug(T("coal.services.postgresql.dropped_columns").format(columns=dropped_columns))
167
+ if type_conversions:
168
+ LOGGER.debug(T("coal.services.postgresql.successful_conversions").format(conversions=type_conversions))
169
+ if failed_conversions:
170
+ LOGGER.debug(T("coal.services.postgresql.failed_conversions").format(conversions=failed_conversions))
171
+
172
+ LOGGER.debug(T("coal.services.postgresql.final_schema").format(schema=adapted_table.schema))
173
+ return adapted_table
174
+
175
+
176
+ def send_pyarrow_table_to_postgresql(
177
+ data: Table,
178
+ target_table_name: str,
179
+ postgres_host: str,
180
+ postgres_port: str,
181
+ postgres_db: str,
182
+ postgres_schema: str,
183
+ postgres_user: str,
184
+ postgres_password: str,
185
+ replace: bool,
186
+ force_encode: bool = False,
187
+ ) -> int:
188
+ LOGGER.debug(
189
+ T("coal.services.postgresql.preparing_send").format(
190
+ postgres_schema=postgres_schema, target_table_name=target_table_name
191
+ )
192
+ )
193
+ LOGGER.debug(T("coal.services.postgresql.input_rows").format(rows=len(data)))
194
+
195
+ # Get existing schema if table exists
196
+ existing_schema = get_postgresql_table_schema(
197
+ target_table_name,
198
+ postgres_host,
199
+ postgres_port,
200
+ postgres_db,
201
+ postgres_schema,
202
+ postgres_user,
203
+ postgres_password,
204
+ force_encode,
205
+ )
206
+
207
+ if existing_schema is not None:
208
+ LOGGER.debug(T("coal.services.postgresql.found_existing_table").format(schema=existing_schema))
209
+ if not replace:
210
+ LOGGER.debug(T("coal.services.postgresql.adapting_data"))
211
+ data = adapt_table_to_schema(data, existing_schema)
212
+ else:
213
+ LOGGER.debug(T("coal.services.postgresql.replace_mode"))
214
+ else:
215
+ LOGGER.debug(T("coal.services.postgresql.no_existing_table"))
216
+
217
+ # Proceed with ingestion
218
+ total = 0
219
+ postgresql_full_uri = generate_postgresql_full_uri(
220
+ postgres_host,
221
+ postgres_port,
222
+ postgres_db,
223
+ postgres_user,
224
+ postgres_password,
225
+ force_encode,
226
+ )
227
+
228
+ LOGGER.debug(T("coal.services.postgresql.connecting"))
229
+ with dbapi.connect(postgresql_full_uri, autocommit=True) as conn:
230
+ with conn.cursor() as curs:
231
+ mode = "replace" if replace else "create_append"
232
+ LOGGER.debug(T("coal.services.postgresql.ingesting_data").format(mode=mode))
233
+ total += curs.adbc_ingest(target_table_name, data, mode, db_schema_name=postgres_schema)
234
+
235
+ LOGGER.debug(T("coal.services.postgresql.ingestion_success").format(rows=total))
236
+ return total
@@ -0,0 +1,6 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
@@ -0,0 +1,6 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
@@ -0,0 +1,92 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+ from cosmotech.orchestrator.utils.translate import T
8
+
9
+ from cosmotech.csm_data.utils.click import click
10
+ from cosmotech.csm_data.utils.decorators import web_help, translate_help
11
+
12
+
13
+ @click.command()
14
+ @web_help("csm-data/adx-send-data")
15
+ @translate_help("csm-data.commands.storage.adx_send_data.description")
16
+ @click.option(
17
+ "--adx-uri",
18
+ envvar="AZURE_DATA_EXPLORER_RESOURCE_URI",
19
+ show_envvar=True,
20
+ required=True,
21
+ metavar="URI",
22
+ help=T("csm-data.commands.storage.adx_send_data.parameters.adx_uri"),
23
+ )
24
+ @click.option(
25
+ "--adx-ingest-uri",
26
+ envvar="AZURE_DATA_EXPLORER_RESOURCE_INGEST_URI",
27
+ show_envvar=True,
28
+ required=True,
29
+ metavar="URI",
30
+ help=T("csm-data.commands.storage.adx_send_data.parameters.adx_ingest_uri"),
31
+ )
32
+ @click.option(
33
+ "--database-name",
34
+ envvar="AZURE_DATA_EXPLORER_DATABASE_NAME",
35
+ show_envvar=True,
36
+ required=True,
37
+ metavar="NAME",
38
+ help=T("csm-data.commands.storage.adx_send_data.parameters.database_name"),
39
+ )
40
+ @click.option(
41
+ "--wait/--no-wait",
42
+ "wait",
43
+ envvar="CSM_DATA_ADX_WAIT_INGESTION",
44
+ show_envvar=True,
45
+ default=False,
46
+ show_default=True,
47
+ help=T("csm-data.commands.storage.adx_send_data.parameters.waiting_ingestion"),
48
+ )
49
+ @click.option(
50
+ "--tag",
51
+ envvar="CSM_DATA_ADX_TAG",
52
+ show_envvar=True,
53
+ default=None,
54
+ help=T("csm-data.commands.storage.adx_send_data.parameters.adx_tag"),
55
+ )
56
+ @click.option(
57
+ "--store-folder",
58
+ envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
59
+ help=T("csm-data.commands.storage.adx_send_data.parameters.store_folder"),
60
+ metavar="PATH",
61
+ type=str,
62
+ show_envvar=True,
63
+ required=True,
64
+ )
65
+ def adx_send_data(
66
+ adx_uri: str,
67
+ adx_ingest_uri: str,
68
+ database_name: str,
69
+ wait: bool,
70
+ store_folder: str,
71
+ tag: str = None,
72
+ ):
73
+ """
74
+ Send data from the store to Azure Data Explorer.
75
+ """
76
+ from cosmotech.coal.azure.adx.store import send_store_to_adx
77
+
78
+ success = send_store_to_adx(
79
+ adx_uri=adx_uri,
80
+ adx_ingest_uri=adx_ingest_uri,
81
+ database_name=database_name,
82
+ wait=wait,
83
+ tag=tag,
84
+ store_location=store_folder,
85
+ )
86
+
87
+ if not success:
88
+ click.Abort()
89
+
90
+
91
+ if __name__ == "__main__":
92
+ adx_send_data()
@@ -0,0 +1,119 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.
7
+
8
+ from cosmotech.csm_data.utils.click import click
9
+ from cosmotech.csm_data.utils.decorators import web_help, translate_help
10
+ from cosmotech.orchestrator.utils.translate import T
11
+
12
+
13
+ @click.command()
14
+ @web_help("csm-data/adx-send-runner-data")
15
+ @translate_help("csm-data.commands.storage.adx_send_runnerdata.description")
16
+ @click.option(
17
+ "--dataset-absolute-path",
18
+ envvar="CSM_DATASET_ABSOLUTE_PATH",
19
+ show_envvar=True,
20
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.dataset_absolute_path"),
21
+ metavar="PATH",
22
+ required=True,
23
+ )
24
+ @click.option(
25
+ "--parameters-absolute-path",
26
+ envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
27
+ metavar="PATH",
28
+ show_envvar=True,
29
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.parameters_absolute_path"),
30
+ required=True,
31
+ )
32
+ @click.option(
33
+ "--runner-id",
34
+ envvar="CSM_RUNNER_ID",
35
+ show_envvar=True,
36
+ required=True,
37
+ metavar="UUID",
38
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.runner_id"),
39
+ )
40
+ @click.option(
41
+ "--adx-uri",
42
+ envvar="AZURE_DATA_EXPLORER_RESOURCE_URI",
43
+ show_envvar=True,
44
+ required=True,
45
+ metavar="URI",
46
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.adx_uri"),
47
+ )
48
+ @click.option(
49
+ "--adx-ingest-uri",
50
+ envvar="AZURE_DATA_EXPLORER_RESOURCE_INGEST_URI",
51
+ show_envvar=True,
52
+ required=True,
53
+ metavar="URI",
54
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.adx_ingest_uri"),
55
+ )
56
+ @click.option(
57
+ "--database-name",
58
+ envvar="AZURE_DATA_EXPLORER_DATABASE_NAME",
59
+ show_envvar=True,
60
+ required=True,
61
+ metavar="NAME",
62
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.database_name"),
63
+ )
64
+ @click.option(
65
+ "--send-parameters/--no-send-parameters",
66
+ type=bool,
67
+ envvar="CSM_SEND_DATAWAREHOUSE_PARAMETERS",
68
+ show_envvar=True,
69
+ default=False,
70
+ show_default=True,
71
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.send_parameters"),
72
+ )
73
+ @click.option(
74
+ "--send-datasets/--no-send-datasets",
75
+ type=bool,
76
+ envvar="CSM_SEND_DATAWAREHOUSE_DATASETS",
77
+ show_envvar=True,
78
+ default=False,
79
+ show_default=True,
80
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.send_datasets"),
81
+ )
82
+ @click.option(
83
+ "--wait/--no-wait",
84
+ envvar="WAIT_FOR_INGESTION",
85
+ show_envvar=True,
86
+ default=False,
87
+ show_default=True,
88
+ help=T("csm-data.commands.storage.adx_send_runnerdata.parameters.wait"),
89
+ )
90
+ def adx_send_runnerdata(
91
+ send_parameters: bool,
92
+ send_datasets: bool,
93
+ dataset_absolute_path: str,
94
+ parameters_absolute_path: str,
95
+ runner_id: str,
96
+ adx_uri: str,
97
+ adx_ingest_uri: str,
98
+ database_name: str,
99
+ wait: bool,
100
+ ):
101
+ # Import the function at the start of the command
102
+ from cosmotech.coal.azure.adx.runner import send_runner_data
103
+
104
+ # Send runner data to ADX
105
+ send_runner_data(
106
+ dataset_absolute_path=dataset_absolute_path,
107
+ parameters_absolute_path=parameters_absolute_path,
108
+ runner_id=runner_id,
109
+ adx_uri=adx_uri,
110
+ adx_ingest_uri=adx_ingest_uri,
111
+ database_name=database_name,
112
+ send_parameters=send_parameters,
113
+ send_datasets=send_datasets,
114
+ wait=wait,
115
+ )
116
+
117
+
118
+ if __name__ == "__main__":
119
+ adx_send_runnerdata()
@@ -0,0 +1,6 @@
1
+ # Copyright (C) - 2023 - 2025 - Cosmo Tech
2
+ # This document and all information contained herein is the exclusive property -
3
+ # including all intellectual property rights pertaining thereto - of Cosmo Tech.
4
+ # Any use, reproduction, translation, broadcasting, transmission, distribution,
5
+ # etc., to any person is prohibited unless it has been previously and
6
+ # specifically authorized by written means by Cosmo Tech.