cosmotech-acceleration-library 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +8 -0
- cosmotech/coal/aws/__init__.py +23 -0
- cosmotech/coal/aws/s3.py +235 -0
- cosmotech/coal/azure/__init__.py +23 -0
- cosmotech/coal/azure/adx/__init__.py +26 -0
- cosmotech/coal/azure/adx/auth.py +125 -0
- cosmotech/coal/azure/adx/ingestion.py +329 -0
- cosmotech/coal/azure/adx/query.py +56 -0
- cosmotech/coal/azure/adx/runner.py +217 -0
- cosmotech/coal/azure/adx/store.py +255 -0
- cosmotech/coal/azure/adx/tables.py +118 -0
- cosmotech/coal/azure/adx/utils.py +71 -0
- cosmotech/coal/azure/blob.py +109 -0
- cosmotech/coal/azure/functions.py +72 -0
- cosmotech/coal/azure/storage.py +74 -0
- cosmotech/coal/cosmotech_api/__init__.py +36 -0
- cosmotech/coal/cosmotech_api/connection.py +96 -0
- cosmotech/coal/cosmotech_api/dataset/__init__.py +26 -0
- cosmotech/coal/cosmotech_api/dataset/converters.py +164 -0
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +19 -0
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +119 -0
- cosmotech/coal/cosmotech_api/dataset/download/common.py +140 -0
- cosmotech/coal/cosmotech_api/dataset/download/file.py +216 -0
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +188 -0
- cosmotech/coal/cosmotech_api/dataset/utils.py +132 -0
- cosmotech/coal/cosmotech_api/parameters.py +48 -0
- cosmotech/coal/cosmotech_api/run.py +25 -0
- cosmotech/coal/cosmotech_api/run_data.py +173 -0
- cosmotech/coal/cosmotech_api/run_template.py +108 -0
- cosmotech/coal/cosmotech_api/runner/__init__.py +28 -0
- cosmotech/coal/cosmotech_api/runner/data.py +38 -0
- cosmotech/coal/cosmotech_api/runner/datasets.py +364 -0
- cosmotech/coal/cosmotech_api/runner/download.py +146 -0
- cosmotech/coal/cosmotech_api/runner/metadata.py +42 -0
- cosmotech/coal/cosmotech_api/runner/parameters.py +157 -0
- cosmotech/coal/cosmotech_api/twin_data_layer.py +512 -0
- cosmotech/coal/cosmotech_api/workspace.py +127 -0
- cosmotech/coal/csm/__init__.py +6 -0
- cosmotech/coal/csm/engine/__init__.py +47 -0
- cosmotech/coal/postgresql/__init__.py +22 -0
- cosmotech/coal/postgresql/runner.py +93 -0
- cosmotech/coal/postgresql/store.py +98 -0
- cosmotech/coal/singlestore/__init__.py +17 -0
- cosmotech/coal/singlestore/store.py +100 -0
- cosmotech/coal/store/__init__.py +42 -0
- cosmotech/coal/store/csv.py +44 -0
- cosmotech/coal/store/native_python.py +25 -0
- cosmotech/coal/store/pandas.py +26 -0
- cosmotech/coal/store/pyarrow.py +23 -0
- cosmotech/coal/store/store.py +79 -0
- cosmotech/coal/utils/__init__.py +18 -0
- cosmotech/coal/utils/api.py +68 -0
- cosmotech/coal/utils/logger.py +10 -0
- cosmotech/coal/utils/postgresql.py +236 -0
- cosmotech/csm_data/__init__.py +6 -0
- cosmotech/csm_data/commands/__init__.py +6 -0
- cosmotech/csm_data/commands/adx_send_data.py +92 -0
- cosmotech/csm_data/commands/adx_send_runnerdata.py +119 -0
- cosmotech/csm_data/commands/api/__init__.py +6 -0
- cosmotech/csm_data/commands/api/api.py +50 -0
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +119 -0
- cosmotech/csm_data/commands/api/rds_load_csv.py +90 -0
- cosmotech/csm_data/commands/api/rds_send_csv.py +74 -0
- cosmotech/csm_data/commands/api/rds_send_store.py +74 -0
- cosmotech/csm_data/commands/api/run_load_data.py +120 -0
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +66 -0
- cosmotech/csm_data/commands/api/tdl_load_files.py +76 -0
- cosmotech/csm_data/commands/api/tdl_send_files.py +82 -0
- cosmotech/csm_data/commands/api/wsf_load_file.py +66 -0
- cosmotech/csm_data/commands/api/wsf_send_file.py +68 -0
- cosmotech/csm_data/commands/az_storage_upload.py +76 -0
- cosmotech/csm_data/commands/s3_bucket_delete.py +107 -0
- cosmotech/csm_data/commands/s3_bucket_download.py +118 -0
- cosmotech/csm_data/commands/s3_bucket_upload.py +128 -0
- cosmotech/csm_data/commands/store/__init__.py +6 -0
- cosmotech/csm_data/commands/store/dump_to_azure.py +120 -0
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +107 -0
- cosmotech/csm_data/commands/store/dump_to_s3.py +169 -0
- cosmotech/csm_data/commands/store/list_tables.py +48 -0
- cosmotech/csm_data/commands/store/load_csv_folder.py +43 -0
- cosmotech/csm_data/commands/store/load_from_singlestore.py +96 -0
- cosmotech/csm_data/commands/store/reset.py +31 -0
- cosmotech/csm_data/commands/store/store.py +37 -0
- cosmotech/csm_data/main.py +57 -0
- cosmotech/csm_data/utils/__init__.py +6 -0
- cosmotech/csm_data/utils/click.py +18 -0
- cosmotech/csm_data/utils/decorators.py +75 -0
- cosmotech/orchestrator_plugins/csm-data/__init__.py +11 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/postgres_send_runner_metadata.json +40 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/run_load_data.json +30 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +32 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +27 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/try_api_connection.json +9 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_load_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/api/wsf_send_file.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/adx_send_runnerdata.json +29 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/az_storage_upload.json +25 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_delete.json +31 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_download.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/main/s3_bucket_upload.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_azure.json +35 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_postgresql.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_dump_to_s3.json +36 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_list_tables.json +15 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_csv_folder.json +18 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_load_from_singlestore.json +34 -0
- cosmotech/orchestrator_plugins/csm-data/templates/store/store_reset.json +15 -0
- cosmotech/translation/coal/__init__.py +6 -0
- cosmotech/translation/coal/en-US/coal/common/data_transfer.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/errors.yml +9 -0
- cosmotech/translation/coal/en-US/coal/common/file_operations.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/progress.yml +6 -0
- cosmotech/translation/coal/en-US/coal/common/timing.yml +5 -0
- cosmotech/translation/coal/en-US/coal/common/validation.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/connection.yml +10 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +2 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_template.yml +8 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/runner.yml +16 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/solution.yml +5 -0
- cosmotech/translation/coal/en-US/coal/cosmotech_api/workspace.yml +7 -0
- cosmotech/translation/coal/en-US/coal/services/adx.yml +59 -0
- cosmotech/translation/coal/en-US/coal/services/api.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/azure_storage.yml +14 -0
- cosmotech/translation/coal/en-US/coal/services/database.yml +19 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +68 -0
- cosmotech/translation/coal/en-US/coal/services/postgresql.yml +28 -0
- cosmotech/translation/coal/en-US/coal/services/s3.yml +9 -0
- cosmotech/translation/coal/en-US/coal/solution.yml +3 -0
- cosmotech/translation/coal/en-US/coal/web.yml +2 -0
- cosmotech/translation/csm_data/__init__.py +6 -0
- cosmotech/translation/csm_data/en-US/csm-data.yml +434 -0
- cosmotech_acceleration_library-1.0.0.dist-info/METADATA +255 -0
- cosmotech_acceleration_library-1.0.0.dist-info/RECORD +141 -0
- cosmotech_acceleration_library-1.0.0.dist-info/WHEEL +5 -0
- cosmotech_acceleration_library-1.0.0.dist-info/entry_points.txt +2 -0
- cosmotech_acceleration_library-1.0.0.dist-info/licenses/LICENSE +17 -0
- cosmotech_acceleration_library-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
AWS services integration module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with AWS services like S3.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Re-export S3 functions for easier importing
|
|
15
|
+
from cosmotech.coal.aws.s3 import (
|
|
16
|
+
create_s3_client,
|
|
17
|
+
create_s3_resource,
|
|
18
|
+
upload_file,
|
|
19
|
+
upload_folder,
|
|
20
|
+
download_files,
|
|
21
|
+
upload_data_stream,
|
|
22
|
+
delete_objects,
|
|
23
|
+
)
|
cosmotech/coal/aws/s3.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
S3 bucket operations module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with S3 buckets, including
|
|
12
|
+
uploading, downloading, and deleting files.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import pathlib
|
|
16
|
+
from io import BytesIO
|
|
17
|
+
from typing import Optional, Dict, Any, List, Iterator
|
|
18
|
+
|
|
19
|
+
import boto3
|
|
20
|
+
|
|
21
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
22
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_s3_client(
|
|
26
|
+
endpoint_url: str,
|
|
27
|
+
access_id: str,
|
|
28
|
+
secret_key: str,
|
|
29
|
+
use_ssl: bool = True,
|
|
30
|
+
ssl_cert_bundle: Optional[str] = None,
|
|
31
|
+
) -> boto3.client:
|
|
32
|
+
"""
|
|
33
|
+
Create an S3 client with the given credentials and configuration.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
endpoint_url: The S3 endpoint URL
|
|
37
|
+
access_id: The AWS access key ID
|
|
38
|
+
secret_key: The AWS secret access key
|
|
39
|
+
use_ssl: Whether to use SSL for the connection
|
|
40
|
+
ssl_cert_bundle: Path to the SSL certificate bundle
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
An S3 client object
|
|
44
|
+
"""
|
|
45
|
+
boto3_parameters = {
|
|
46
|
+
"use_ssl": use_ssl,
|
|
47
|
+
"endpoint_url": endpoint_url,
|
|
48
|
+
"aws_access_key_id": access_id,
|
|
49
|
+
"aws_secret_access_key": secret_key,
|
|
50
|
+
}
|
|
51
|
+
if ssl_cert_bundle:
|
|
52
|
+
boto3_parameters["verify"] = ssl_cert_bundle
|
|
53
|
+
|
|
54
|
+
return boto3.client("s3", **boto3_parameters)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def create_s3_resource(
|
|
58
|
+
endpoint_url: str,
|
|
59
|
+
access_id: str,
|
|
60
|
+
secret_key: str,
|
|
61
|
+
use_ssl: bool = True,
|
|
62
|
+
ssl_cert_bundle: Optional[str] = None,
|
|
63
|
+
) -> boto3.resource:
|
|
64
|
+
"""
|
|
65
|
+
Create an S3 resource with the given credentials and configuration.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
endpoint_url: The S3 endpoint URL
|
|
69
|
+
access_id: The AWS access key ID
|
|
70
|
+
secret_key: The AWS secret access key
|
|
71
|
+
use_ssl: Whether to use SSL for the connection
|
|
72
|
+
ssl_cert_bundle: Path to the SSL certificate bundle
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
An S3 resource object
|
|
76
|
+
"""
|
|
77
|
+
boto3_parameters = {
|
|
78
|
+
"use_ssl": use_ssl,
|
|
79
|
+
"endpoint_url": endpoint_url,
|
|
80
|
+
"aws_access_key_id": access_id,
|
|
81
|
+
"aws_secret_access_key": secret_key,
|
|
82
|
+
}
|
|
83
|
+
if ssl_cert_bundle:
|
|
84
|
+
boto3_parameters["verify"] = ssl_cert_bundle
|
|
85
|
+
|
|
86
|
+
return boto3.resource("s3", **boto3_parameters)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def upload_file(
|
|
90
|
+
file_path: pathlib.Path,
|
|
91
|
+
bucket_name: str,
|
|
92
|
+
s3_resource: boto3.resource,
|
|
93
|
+
file_prefix: str = "",
|
|
94
|
+
) -> None:
|
|
95
|
+
"""
|
|
96
|
+
Upload a single file to an S3 bucket.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
file_path: Path to the file to upload
|
|
100
|
+
bucket_name: Name of the S3 bucket
|
|
101
|
+
s3_resource: S3 resource object
|
|
102
|
+
file_prefix: Prefix to add to the file name in the bucket
|
|
103
|
+
"""
|
|
104
|
+
uploaded_file_name = file_prefix + file_path.name
|
|
105
|
+
LOGGER.info(T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name))
|
|
106
|
+
s3_resource.Bucket(bucket_name).upload_file(str(file_path), uploaded_file_name)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def upload_folder(
|
|
110
|
+
source_folder: str,
|
|
111
|
+
bucket_name: str,
|
|
112
|
+
s3_resource: boto3.resource,
|
|
113
|
+
file_prefix: str = "",
|
|
114
|
+
recursive: bool = False,
|
|
115
|
+
) -> None:
|
|
116
|
+
"""
|
|
117
|
+
Upload files from a folder to an S3 bucket.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
source_folder: Path to the folder containing files to upload
|
|
121
|
+
bucket_name: Name of the S3 bucket
|
|
122
|
+
s3_resource: S3 resource object
|
|
123
|
+
file_prefix: Prefix to add to the file names in the bucket
|
|
124
|
+
recursive: Whether to recursively upload files from subdirectories
|
|
125
|
+
"""
|
|
126
|
+
source_path = pathlib.Path(source_folder)
|
|
127
|
+
if not source_path.exists():
|
|
128
|
+
LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
129
|
+
raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
130
|
+
|
|
131
|
+
if source_path.is_dir():
|
|
132
|
+
_source_name = str(source_path)
|
|
133
|
+
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
134
|
+
if _file_path.is_file():
|
|
135
|
+
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
136
|
+
uploaded_file_name = file_prefix + _file_name
|
|
137
|
+
LOGGER.info(
|
|
138
|
+
T("coal.common.data_transfer.file_sent").format(
|
|
139
|
+
file_path=_file_path, uploaded_name=uploaded_file_name
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
s3_resource.Bucket(bucket_name).upload_file(str(_file_path), uploaded_file_name)
|
|
143
|
+
else:
|
|
144
|
+
upload_file(source_path, bucket_name, s3_resource, file_prefix)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def download_files(
|
|
148
|
+
target_folder: str,
|
|
149
|
+
bucket_name: str,
|
|
150
|
+
s3_resource: boto3.resource,
|
|
151
|
+
file_prefix: Optional[str] = None,
|
|
152
|
+
) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Download files from an S3 bucket to a local folder.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
target_folder: Local folder to download files to
|
|
158
|
+
bucket_name: Name of the S3 bucket
|
|
159
|
+
s3_resource: S3 resource object
|
|
160
|
+
file_prefix: Optional prefix to filter objects to download
|
|
161
|
+
"""
|
|
162
|
+
bucket = s3_resource.Bucket(bucket_name)
|
|
163
|
+
|
|
164
|
+
pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True)
|
|
165
|
+
remove_prefix = False
|
|
166
|
+
if file_prefix:
|
|
167
|
+
bucket_files = bucket.objects.filter(Prefix=file_prefix)
|
|
168
|
+
if file_prefix.endswith("/"):
|
|
169
|
+
remove_prefix = True
|
|
170
|
+
else:
|
|
171
|
+
bucket_files = bucket.objects.all()
|
|
172
|
+
for _file in bucket_files:
|
|
173
|
+
if not (path_name := str(_file.key)).endswith("/"):
|
|
174
|
+
target_file = path_name
|
|
175
|
+
if remove_prefix:
|
|
176
|
+
target_file = target_file.removeprefix(file_prefix)
|
|
177
|
+
output_file = f"{target_folder}/{target_file}"
|
|
178
|
+
pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
|
179
|
+
LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
|
|
180
|
+
bucket.download_file(_file.key, output_file)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def upload_data_stream(
|
|
184
|
+
data_stream: BytesIO,
|
|
185
|
+
bucket_name: str,
|
|
186
|
+
s3_client: boto3.client,
|
|
187
|
+
file_name: str,
|
|
188
|
+
file_prefix: str = "",
|
|
189
|
+
) -> None:
|
|
190
|
+
"""
|
|
191
|
+
Upload a data stream to an S3 bucket.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
data_stream: BytesIO stream containing the data to upload
|
|
195
|
+
bucket_name: Name of the S3 bucket
|
|
196
|
+
s3_client: S3 client object
|
|
197
|
+
file_name: Name of the file to create in the bucket
|
|
198
|
+
file_prefix: Prefix to add to the file name in the bucket
|
|
199
|
+
"""
|
|
200
|
+
uploaded_file_name = file_prefix + file_name
|
|
201
|
+
data_stream.seek(0)
|
|
202
|
+
size = len(data_stream.read())
|
|
203
|
+
data_stream.seek(0)
|
|
204
|
+
|
|
205
|
+
LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
|
|
206
|
+
s3_client.upload_fileobj(data_stream, bucket_name, uploaded_file_name)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def delete_objects(
|
|
210
|
+
bucket_name: str,
|
|
211
|
+
s3_resource: boto3.resource,
|
|
212
|
+
file_prefix: Optional[str] = None,
|
|
213
|
+
) -> None:
|
|
214
|
+
"""
|
|
215
|
+
Delete objects from an S3 bucket, optionally filtered by prefix.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
bucket_name: Name of the S3 bucket
|
|
219
|
+
s3_resource: S3 resource object
|
|
220
|
+
file_prefix: Optional prefix to filter objects to delete
|
|
221
|
+
"""
|
|
222
|
+
bucket = s3_resource.Bucket(bucket_name)
|
|
223
|
+
|
|
224
|
+
if file_prefix:
|
|
225
|
+
bucket_files = bucket.objects.filter(Prefix=file_prefix)
|
|
226
|
+
else:
|
|
227
|
+
bucket_files = bucket.objects.all()
|
|
228
|
+
|
|
229
|
+
boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != file_prefix]
|
|
230
|
+
if boto_objects:
|
|
231
|
+
LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
|
|
232
|
+
boto_delete_request = {"Objects": boto_objects}
|
|
233
|
+
bucket.delete_objects(Delete=boto_delete_request)
|
|
234
|
+
else:
|
|
235
|
+
LOGGER.info(T("coal.services.azure_storage.no_objects"))
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
Azure services integration module.
|
|
10
|
+
|
|
11
|
+
This module provides functions for interacting with Azure services like Storage and ADX.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Re-export storage functions for easier importing
|
|
15
|
+
from cosmotech.coal.azure.storage import (
|
|
16
|
+
upload_file,
|
|
17
|
+
upload_folder,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Re-export blob functions for easier importing
|
|
21
|
+
from cosmotech.coal.azure.blob import (
|
|
22
|
+
dump_store_to_azure,
|
|
23
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
from cosmotech.coal.azure.adx.auth import create_kusto_client, create_ingest_client, initialize_clients
|
|
9
|
+
from cosmotech.coal.azure.adx.query import run_query, run_command_query
|
|
10
|
+
from cosmotech.coal.azure.adx.ingestion import (
|
|
11
|
+
ingest_dataframe,
|
|
12
|
+
send_to_adx,
|
|
13
|
+
check_ingestion_status,
|
|
14
|
+
monitor_ingestion,
|
|
15
|
+
handle_failures,
|
|
16
|
+
IngestionStatus,
|
|
17
|
+
)
|
|
18
|
+
from cosmotech.coal.azure.adx.tables import table_exists, create_table, check_and_create_table, _drop_by_tag
|
|
19
|
+
from cosmotech.coal.azure.adx.utils import type_mapping, create_column_mapping
|
|
20
|
+
from cosmotech.coal.azure.adx.store import send_pyarrow_table_to_adx, send_table_data, process_tables, send_store_to_adx
|
|
21
|
+
from cosmotech.coal.azure.adx.runner import (
|
|
22
|
+
prepare_csv_content,
|
|
23
|
+
construct_create_query,
|
|
24
|
+
insert_csv_files,
|
|
25
|
+
send_runner_data,
|
|
26
|
+
)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright (C) - 2023 - 2025 - Cosmo Tech
|
|
2
|
+
# This document and all information contained herein is the exclusive property -
|
|
3
|
+
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
|
|
4
|
+
# Any use, reproduction, translation, broadcasting, transmission, distribution,
|
|
5
|
+
# etc., to any person is prohibited unless it has been previously and
|
|
6
|
+
# specifically authorized by written means by Cosmo Tech.
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from typing import Union, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
|
|
12
|
+
from azure.kusto.ingest import QueuedIngestClient
|
|
13
|
+
|
|
14
|
+
from cosmotech.coal.utils.logger import LOGGER
|
|
15
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_kusto_client(
|
|
19
|
+
cluster_url: str,
|
|
20
|
+
client_id: Optional[str] = None,
|
|
21
|
+
client_secret: Optional[str] = None,
|
|
22
|
+
tenant_id: Optional[str] = None,
|
|
23
|
+
) -> KustoClient:
|
|
24
|
+
"""
|
|
25
|
+
Create a KustoClient for querying ADX.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
cluster_url: The URL of the ADX cluster
|
|
29
|
+
client_id: Azure client ID (optional, will use environment variable if not provided)
|
|
30
|
+
client_secret: Azure client secret (optional, will use environment variable if not provided)
|
|
31
|
+
tenant_id: Azure tenant ID (optional, will use environment variable if not provided)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
KustoClient: A client for querying ADX
|
|
35
|
+
"""
|
|
36
|
+
LOGGER.debug(T("coal.services.adx.creating_kusto_client").format(cluster_url=cluster_url))
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
az_client_id = client_id or os.environ["AZURE_CLIENT_ID"]
|
|
40
|
+
az_client_secret = client_secret or os.environ["AZURE_CLIENT_SECRET"]
|
|
41
|
+
az_tenant_id = tenant_id or os.environ["AZURE_TENANT_ID"]
|
|
42
|
+
|
|
43
|
+
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
|
|
44
|
+
cluster_url, az_client_id, az_client_secret, az_tenant_id
|
|
45
|
+
)
|
|
46
|
+
LOGGER.debug(T("coal.services.adx.using_app_auth"))
|
|
47
|
+
except KeyError:
|
|
48
|
+
LOGGER.debug(T("coal.services.adx.using_cli_auth"))
|
|
49
|
+
kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster_url)
|
|
50
|
+
|
|
51
|
+
return KustoClient(kcsb)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def create_ingest_client(
|
|
55
|
+
ingest_url: str,
|
|
56
|
+
client_id: Optional[str] = None,
|
|
57
|
+
client_secret: Optional[str] = None,
|
|
58
|
+
tenant_id: Optional[str] = None,
|
|
59
|
+
) -> QueuedIngestClient:
|
|
60
|
+
"""
|
|
61
|
+
Create a QueuedIngestClient for ingesting data to ADX.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
ingest_url: The ingestion URL of the ADX cluster
|
|
65
|
+
client_id: Azure client ID (optional, will use environment variable if not provided)
|
|
66
|
+
client_secret: Azure client secret (optional, will use environment variable if not provided)
|
|
67
|
+
tenant_id: Azure tenant ID (optional, will use environment variable if not provided)
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
QueuedIngestClient: A client for ingesting data to ADX
|
|
71
|
+
"""
|
|
72
|
+
LOGGER.debug(T("coal.services.adx.creating_ingest_client").format(ingest_url=ingest_url))
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
az_client_id = client_id or os.environ["AZURE_CLIENT_ID"]
|
|
76
|
+
az_client_secret = client_secret or os.environ["AZURE_CLIENT_SECRET"]
|
|
77
|
+
az_tenant_id = tenant_id or os.environ["AZURE_TENANT_ID"]
|
|
78
|
+
|
|
79
|
+
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
|
|
80
|
+
ingest_url, az_client_id, az_client_secret, az_tenant_id
|
|
81
|
+
)
|
|
82
|
+
LOGGER.debug(T("coal.services.adx.using_app_auth"))
|
|
83
|
+
except KeyError:
|
|
84
|
+
LOGGER.debug(T("coal.services.adx.using_cli_auth"))
|
|
85
|
+
kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(ingest_url)
|
|
86
|
+
|
|
87
|
+
return QueuedIngestClient(kcsb)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def initialize_clients(adx_uri: str, adx_ingest_uri: str) -> Tuple[KustoClient, QueuedIngestClient]:
|
|
91
|
+
"""
|
|
92
|
+
Initialize and return the Kusto and ingest clients.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
adx_uri: The Azure Data Explorer resource URI
|
|
96
|
+
adx_ingest_uri: The Azure Data Explorer resource ingest URI
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
tuple: (kusto_client, ingest_client)
|
|
100
|
+
"""
|
|
101
|
+
LOGGER.debug(T("coal.services.adx.initializing_clients"))
|
|
102
|
+
kusto_client = create_kusto_client(adx_uri)
|
|
103
|
+
ingest_client = create_ingest_client(adx_ingest_uri)
|
|
104
|
+
return kusto_client, ingest_client
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_cluster_urls(cluster_name: str, cluster_region: str) -> Tuple[str, str]:
|
|
108
|
+
"""
|
|
109
|
+
Generate cluster and ingest URLs from cluster name and region.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
cluster_name: The name of the ADX cluster
|
|
113
|
+
cluster_region: The region of the ADX cluster
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
tuple: (cluster_url, ingest_url)
|
|
117
|
+
"""
|
|
118
|
+
LOGGER.debug(
|
|
119
|
+
T("coal.services.adx.generating_urls").format(cluster_name=cluster_name, cluster_region=cluster_region)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
cluster_url = f"https://{cluster_name}.{cluster_region}.kusto.windows.net"
|
|
123
|
+
ingest_url = f"https://ingest-{cluster_name}.{cluster_region}.kusto.windows.net"
|
|
124
|
+
|
|
125
|
+
return cluster_url, ingest_url
|