cosmotech-acceleration-library 1.1.0__py3-none-any.whl → 2.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +1 -1
- cosmotech/coal/aws/__init__.py +1 -9
- cosmotech/coal/aws/s3.py +181 -214
- cosmotech/coal/azure/__init__.py +5 -5
- cosmotech/coal/azure/adx/__init__.py +24 -10
- cosmotech/coal/azure/adx/auth.py +2 -2
- cosmotech/coal/azure/adx/ingestion.py +10 -14
- cosmotech/coal/azure/adx/query.py +1 -1
- cosmotech/coal/azure/adx/runner.py +13 -14
- cosmotech/coal/azure/adx/store.py +5 -86
- cosmotech/coal/azure/adx/tables.py +2 -2
- cosmotech/coal/azure/adx/utils.py +2 -2
- cosmotech/coal/azure/blob.py +20 -26
- cosmotech/coal/azure/storage.py +3 -3
- cosmotech/coal/cosmotech_api/__init__.py +0 -28
- cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
- cosmotech/coal/cosmotech_api/apis/dataset.py +222 -0
- cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
- cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
- cosmotech/coal/cosmotech_api/apis/run.py +38 -0
- cosmotech/coal/cosmotech_api/apis/runner.py +75 -0
- cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
- cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
- cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
- cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
- cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
- cosmotech/coal/postgresql/runner.py +58 -41
- cosmotech/coal/postgresql/store.py +56 -15
- cosmotech/coal/postgresql/utils.py +255 -0
- cosmotech/coal/singlestore/store.py +3 -2
- cosmotech/coal/store/__init__.py +16 -13
- cosmotech/coal/store/output/__init__.py +0 -0
- cosmotech/coal/store/output/aws_channel.py +74 -0
- cosmotech/coal/store/output/az_storage_channel.py +33 -0
- cosmotech/coal/store/output/channel_interface.py +38 -0
- cosmotech/coal/store/output/channel_spliter.py +61 -0
- cosmotech/coal/store/output/postgres_channel.py +37 -0
- cosmotech/coal/store/pandas.py +1 -1
- cosmotech/coal/store/pyarrow.py +2 -2
- cosmotech/coal/store/store.py +4 -7
- cosmotech/coal/utils/configuration.py +197 -0
- cosmotech/coal/utils/decorator.py +4 -7
- cosmotech/csm_data/commands/adx_send_data.py +1 -1
- cosmotech/csm_data/commands/adx_send_runnerdata.py +3 -2
- cosmotech/csm_data/commands/api/api.py +6 -19
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
- cosmotech/csm_data/commands/api/run_load_data.py +15 -52
- cosmotech/csm_data/commands/api/wsf_load_file.py +13 -16
- cosmotech/csm_data/commands/api/wsf_send_file.py +11 -14
- cosmotech/csm_data/commands/az_storage_upload.py +3 -2
- cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
- cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
- cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
- cosmotech/csm_data/commands/store/dump_to_azure.py +3 -2
- cosmotech/csm_data/commands/store/dump_to_postgresql.py +3 -2
- cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
- cosmotech/csm_data/commands/store/list_tables.py +3 -2
- cosmotech/csm_data/commands/store/load_csv_folder.py +10 -4
- cosmotech/csm_data/commands/store/load_from_singlestore.py +3 -2
- cosmotech/csm_data/commands/store/output.py +35 -0
- cosmotech/csm_data/commands/store/reset.py +8 -3
- cosmotech/csm_data/commands/store/store.py +3 -3
- cosmotech/csm_data/main.py +4 -4
- cosmotech/csm_data/utils/decorators.py +4 -3
- cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +10 -14
- cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
- cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
- cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
- cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
- {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/METADATA +29 -33
- cosmotech_acceleration_library-2.1.0rc1.dist-info/RECORD +153 -0
- {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/WHEEL +1 -1
- cosmotech/coal/azure/functions.py +0 -72
- cosmotech/coal/cosmotech_api/connection.py +0 -96
- cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
- cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
- cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
- cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -229
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -185
- cosmotech/coal/cosmotech_api/dataset/upload.py +0 -41
- cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
- cosmotech/coal/cosmotech_api/parameters.py +0 -48
- cosmotech/coal/cosmotech_api/run.py +0 -25
- cosmotech/coal/cosmotech_api/run_data.py +0 -173
- cosmotech/coal/cosmotech_api/run_template.py +0 -108
- cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
- cosmotech/coal/cosmotech_api/runner/data.py +0 -38
- cosmotech/coal/cosmotech_api/runner/datasets.py +0 -416
- cosmotech/coal/cosmotech_api/runner/download.py +0 -135
- cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
- cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
- cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
- cosmotech/coal/cosmotech_api/workspace.py +0 -127
- cosmotech/coal/utils/postgresql.py +0 -236
- cosmotech/coal/utils/semver.py +0 -6
- cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
- cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
- cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
- cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
- cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
- cosmotech_acceleration_library-1.1.0.dist-info/RECORD +0 -171
- {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/entry_points.txt +0 -0
- {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/top_level.txt +0 -0
cosmotech/coal/__init__.py
CHANGED
cosmotech/coal/aws/__init__.py
CHANGED
|
@@ -12,12 +12,4 @@ This module provides functions for interacting with AWS services like S3.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
# Re-export S3 functions for easier importing
|
|
15
|
-
from cosmotech.coal.aws.s3 import
|
|
16
|
-
create_s3_client,
|
|
17
|
-
create_s3_resource,
|
|
18
|
-
upload_file,
|
|
19
|
-
upload_folder,
|
|
20
|
-
download_files,
|
|
21
|
-
upload_data_stream,
|
|
22
|
-
delete_objects,
|
|
23
|
-
)
|
|
15
|
+
from cosmotech.coal.aws.s3 import S3
|
cosmotech/coal/aws/s3.py
CHANGED
|
@@ -6,230 +6,197 @@
|
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
|
-
|
|
9
|
+
s3 bucket operations module.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
this module provides functions for interacting with S3 buckets, including
|
|
12
12
|
uploading, downloading, and deleting files.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import pathlib
|
|
16
16
|
from io import BytesIO
|
|
17
|
-
from typing import Optional, Dict, Any, List, Iterator
|
|
18
17
|
|
|
19
18
|
import boto3
|
|
19
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
20
20
|
|
|
21
|
+
from cosmotech.coal.utils.configuration import Configuration
|
|
21
22
|
from cosmotech.coal.utils.logger import LOGGER
|
|
22
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def upload_folder(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
_source_name = str(source_path)
|
|
133
|
-
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
134
|
-
if _file_path.is_file():
|
|
135
|
-
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
136
|
-
uploaded_file_name = file_prefix + _file_name
|
|
137
|
-
LOGGER.info(
|
|
138
|
-
T("coal.common.data_transfer.file_sent").format(
|
|
139
|
-
file_path=_file_path, uploaded_name=uploaded_file_name
|
|
25
|
+
class S3:
|
|
26
|
+
|
|
27
|
+
def __init__(self, configuration: Configuration):
|
|
28
|
+
self._configuration = configuration.s3
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def file_prefix(self):
|
|
32
|
+
if "bucket_prefix" in self._configuration:
|
|
33
|
+
return self._configuration.bucket_prefix
|
|
34
|
+
return ""
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def use_ssl(self):
|
|
38
|
+
if "use_ssl" in self._configuration:
|
|
39
|
+
return self._configuration.use_ssl
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def ssl_cert_bundle(self):
|
|
44
|
+
if "ssl_cert_bundle" in self._configuration:
|
|
45
|
+
return self._configuration.ssl_cert_bundle
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def access_key_id(self):
|
|
50
|
+
return self._configuration.access_key_id
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def endpoint_url(self):
|
|
54
|
+
return self._configuration.endpoint_url
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def bucket_name(self):
|
|
58
|
+
return self._configuration.bucket_name
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def secret_access_key(self):
|
|
62
|
+
return self._configuration.secret_access_key
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def output_type(self):
|
|
66
|
+
if "output_type" in self._configuration:
|
|
67
|
+
return self._configuration.output_type
|
|
68
|
+
return "csv"
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def client(self) -> boto3.client:
|
|
72
|
+
boto3_parameters = {
|
|
73
|
+
"use_ssl": self.use_ssl,
|
|
74
|
+
"endpoint_url": self.endpoint_url,
|
|
75
|
+
"aws_access_key_id": self.access_key_id,
|
|
76
|
+
"aws_secret_access_key": self.secret_access_key,
|
|
77
|
+
}
|
|
78
|
+
if self.ssl_cert_bundle:
|
|
79
|
+
boto3_parameters["verify"] = self.ssl_cert_bundle
|
|
80
|
+
|
|
81
|
+
return boto3.client("s3", **boto3_parameters)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def resource(self) -> boto3.resource:
|
|
85
|
+
boto3_parameters = {
|
|
86
|
+
"use_ssl": self.use_ssl,
|
|
87
|
+
"endpoint_url": self.endpoint_url,
|
|
88
|
+
"aws_access_key_id": self.access_key_id,
|
|
89
|
+
"aws_secret_access_key": self.secret_access_key,
|
|
90
|
+
}
|
|
91
|
+
if self.ssl_cert_bundle:
|
|
92
|
+
boto3_parameters["verify"] = self.ssl_cert_bundle
|
|
93
|
+
|
|
94
|
+
return boto3.resource("s3", **boto3_parameters)
|
|
95
|
+
|
|
96
|
+
def upload_file(self, file_path: pathlib.Path) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Upload a single file to an S3 bucket.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
file_path: Path to the file to upload
|
|
102
|
+
"""
|
|
103
|
+
uploaded_file_name = self.file_prefix + file_path.name
|
|
104
|
+
LOGGER.info(
|
|
105
|
+
T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name)
|
|
106
|
+
)
|
|
107
|
+
self.resource.Bucket(self.bucket_name).upload_file(str(file_path), uploaded_file_name)
|
|
108
|
+
|
|
109
|
+
def upload_folder(self, source_folder: str, recursive: bool = False) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Upload files from a folder to an S3 bucket.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
source_folder: Path to the folder containing files to upload
|
|
115
|
+
recursive: Whether to recursively upload files from subdirectories
|
|
116
|
+
"""
|
|
117
|
+
source_path = pathlib.Path(source_folder)
|
|
118
|
+
if not source_path.exists():
|
|
119
|
+
LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
120
|
+
raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
121
|
+
|
|
122
|
+
if source_path.is_dir():
|
|
123
|
+
_source_name = str(source_path)
|
|
124
|
+
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
125
|
+
if _file_path.is_file():
|
|
126
|
+
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
127
|
+
uploaded_file_name = self.file_prefix + _file_name
|
|
128
|
+
LOGGER.info(
|
|
129
|
+
T("coal.common.data_transfer.file_sent").format(
|
|
130
|
+
file_path=_file_path, uploaded_name=uploaded_file_name
|
|
131
|
+
)
|
|
140
132
|
)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
s3_resource: boto3.resource,
|
|
212
|
-
file_prefix: Optional[str] = None,
|
|
213
|
-
) -> None:
|
|
214
|
-
"""
|
|
215
|
-
Delete objects from an S3 bucket, optionally filtered by prefix.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
bucket_name: Name of the S3 bucket
|
|
219
|
-
s3_resource: S3 resource object
|
|
220
|
-
file_prefix: Optional prefix to filter objects to delete
|
|
221
|
-
"""
|
|
222
|
-
bucket = s3_resource.Bucket(bucket_name)
|
|
223
|
-
|
|
224
|
-
if file_prefix:
|
|
225
|
-
bucket_files = bucket.objects.filter(Prefix=file_prefix)
|
|
226
|
-
else:
|
|
227
|
-
bucket_files = bucket.objects.all()
|
|
228
|
-
|
|
229
|
-
boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != file_prefix]
|
|
230
|
-
if boto_objects:
|
|
231
|
-
LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
|
|
232
|
-
boto_delete_request = {"Objects": boto_objects}
|
|
233
|
-
bucket.delete_objects(Delete=boto_delete_request)
|
|
234
|
-
else:
|
|
235
|
-
LOGGER.info(T("coal.services.azure_storage.no_objects"))
|
|
133
|
+
self.resource.Bucket(self.bucket_name).upload_file(str(_file_path), uploaded_file_name)
|
|
134
|
+
else:
|
|
135
|
+
self.upload_file(source_path)
|
|
136
|
+
|
|
137
|
+
def download_files(self, destination_folder: str) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Download files from an S3 bucket to a local folder.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
destination_folder: Local folder to download files to
|
|
143
|
+
"""
|
|
144
|
+
bucket = self.resource.Bucket(self.bucket_name)
|
|
145
|
+
|
|
146
|
+
pathlib.Path(destination_folder).mkdir(parents=True, exist_ok=True)
|
|
147
|
+
remove_prefix = False
|
|
148
|
+
if self.file_prefix:
|
|
149
|
+
bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
|
|
150
|
+
if self.file_prefix.endswith("/"):
|
|
151
|
+
remove_prefix = True
|
|
152
|
+
else:
|
|
153
|
+
bucket_files = bucket.objects.all()
|
|
154
|
+
for _file in bucket_files:
|
|
155
|
+
if not (path_name := str(_file.key)).endswith("/"):
|
|
156
|
+
target_file = path_name
|
|
157
|
+
if remove_prefix:
|
|
158
|
+
target_file = target_file.removeprefix(self.file_prefix)
|
|
159
|
+
output_file = f"{destination_folder}/{target_file}"
|
|
160
|
+
pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
|
161
|
+
LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
|
|
162
|
+
bucket.download_file(_file.key, output_file)
|
|
163
|
+
|
|
164
|
+
def upload_data_stream(self, data_stream: BytesIO, file_name: str) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Upload a data stream to an S3 bucket.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
data_stream: BytesIO stream containing the data to upload
|
|
170
|
+
file_name: Name of the file to create in the bucket
|
|
171
|
+
"""
|
|
172
|
+
uploaded_file_name = self.file_prefix + file_name
|
|
173
|
+
data_stream.seek(0)
|
|
174
|
+
size = len(data_stream.read())
|
|
175
|
+
data_stream.seek(0)
|
|
176
|
+
|
|
177
|
+
LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
|
|
178
|
+
self.client.upload_fileobj(data_stream, self.bucket_name, uploaded_file_name)
|
|
179
|
+
|
|
180
|
+
def delete_objects(self) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Delete objects from an S3 bucket, optionally filtered by prefix.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
bucket_name: Name of the S3 bucket
|
|
186
|
+
s3_resource: S3 resource object
|
|
187
|
+
file_prefix: Optional prefix to filter objects to delete
|
|
188
|
+
"""
|
|
189
|
+
bucket = self.resource.Bucket(self.bucket_name)
|
|
190
|
+
|
|
191
|
+
if self.file_prefix:
|
|
192
|
+
bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
|
|
193
|
+
else:
|
|
194
|
+
bucket_files = bucket.objects.all()
|
|
195
|
+
|
|
196
|
+
boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != self.file_prefix]
|
|
197
|
+
if boto_objects:
|
|
198
|
+
LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
|
|
199
|
+
boto_delete_request = {"Objects": boto_objects}
|
|
200
|
+
bucket.delete_objects(Delete=boto_delete_request)
|
|
201
|
+
else:
|
|
202
|
+
LOGGER.info(T("coal.services.azure_storage.no_objects"))
|
cosmotech/coal/azure/__init__.py
CHANGED
|
@@ -11,13 +11,13 @@ Azure services integration module.
|
|
|
11
11
|
This module provides functions for interacting with Azure services like Storage and ADX.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
# Re-export blob functions for easier importing
|
|
15
|
+
from cosmotech.coal.azure.blob import (
|
|
16
|
+
dump_store_to_azure,
|
|
17
|
+
)
|
|
18
|
+
|
|
14
19
|
# Re-export storage functions for easier importing
|
|
15
20
|
from cosmotech.coal.azure.storage import (
|
|
16
21
|
upload_file,
|
|
17
22
|
upload_folder,
|
|
18
23
|
)
|
|
19
|
-
|
|
20
|
-
# Re-export blob functions for easier importing
|
|
21
|
-
from cosmotech.coal.azure.blob import (
|
|
22
|
-
dump_store_to_azure,
|
|
23
|
-
)
|
|
@@ -5,22 +5,36 @@
|
|
|
5
5
|
# etc., to any person is prohibited unless it has been previously and
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
|
-
from cosmotech.coal.azure.adx.auth import
|
|
9
|
-
|
|
8
|
+
from cosmotech.coal.azure.adx.auth import (
|
|
9
|
+
create_ingest_client,
|
|
10
|
+
create_kusto_client,
|
|
11
|
+
initialize_clients,
|
|
12
|
+
)
|
|
10
13
|
from cosmotech.coal.azure.adx.ingestion import (
|
|
11
|
-
|
|
12
|
-
send_to_adx,
|
|
14
|
+
IngestionStatus,
|
|
13
15
|
check_ingestion_status,
|
|
14
|
-
monitor_ingestion,
|
|
15
16
|
handle_failures,
|
|
16
|
-
|
|
17
|
+
ingest_dataframe,
|
|
18
|
+
monitor_ingestion,
|
|
19
|
+
send_to_adx,
|
|
17
20
|
)
|
|
18
|
-
from cosmotech.coal.azure.adx.
|
|
19
|
-
from cosmotech.coal.azure.adx.utils import type_mapping, create_column_mapping
|
|
20
|
-
from cosmotech.coal.azure.adx.store import send_pyarrow_table_to_adx, send_table_data, process_tables, send_store_to_adx
|
|
21
|
+
from cosmotech.coal.azure.adx.query import run_command_query, run_query
|
|
21
22
|
from cosmotech.coal.azure.adx.runner import (
|
|
22
|
-
prepare_csv_content,
|
|
23
23
|
construct_create_query,
|
|
24
24
|
insert_csv_files,
|
|
25
|
+
prepare_csv_content,
|
|
25
26
|
send_runner_data,
|
|
26
27
|
)
|
|
28
|
+
from cosmotech.coal.azure.adx.store import (
|
|
29
|
+
process_tables,
|
|
30
|
+
send_pyarrow_table_to_adx,
|
|
31
|
+
send_store_to_adx,
|
|
32
|
+
send_table_data,
|
|
33
|
+
)
|
|
34
|
+
from cosmotech.coal.azure.adx.tables import (
|
|
35
|
+
_drop_by_tag,
|
|
36
|
+
check_and_create_table,
|
|
37
|
+
create_table,
|
|
38
|
+
table_exists,
|
|
39
|
+
)
|
|
40
|
+
from cosmotech.coal.azure.adx.utils import create_column_mapping, type_mapping
|
cosmotech/coal/azure/adx/auth.py
CHANGED
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import Optional, Tuple
|
|
10
10
|
|
|
11
11
|
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
|
|
12
12
|
from azure.kusto.ingest import QueuedIngestClient
|
|
13
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
13
14
|
|
|
14
15
|
from cosmotech.coal.utils.logger import LOGGER
|
|
15
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def create_kusto_client(
|
|
@@ -5,28 +5,24 @@
|
|
|
5
5
|
# etc., to any person is prohibited unless it has been previously and
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
8
10
|
from enum import Enum
|
|
9
|
-
from typing import Dict
|
|
10
|
-
from typing import Iterator
|
|
11
|
-
from typing import List
|
|
12
|
-
from typing import Optional
|
|
13
|
-
from typing import Tuple
|
|
11
|
+
from typing import Dict, Iterator, List, Optional, Tuple
|
|
14
12
|
|
|
15
|
-
import os
|
|
16
13
|
import pandas as pd
|
|
17
|
-
import time
|
|
18
14
|
import tqdm
|
|
19
15
|
from azure.kusto.data import KustoClient
|
|
20
16
|
from azure.kusto.data.data_format import DataFormat
|
|
21
|
-
from azure.kusto.ingest import IngestionProperties
|
|
22
|
-
from azure.kusto.ingest import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
17
|
+
from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
|
|
18
|
+
from azure.kusto.ingest.status import (
|
|
19
|
+
FailureMessage,
|
|
20
|
+
KustoIngestStatusQueues,
|
|
21
|
+
SuccessMessage,
|
|
22
|
+
)
|
|
27
23
|
from cosmotech.orchestrator.utils.translate import T
|
|
28
24
|
|
|
29
|
-
from cosmotech.coal.azure.adx.tables import
|
|
25
|
+
from cosmotech.coal.azure.adx.tables import _drop_by_tag, create_table
|
|
30
26
|
from cosmotech.coal.azure.adx.utils import type_mapping
|
|
31
27
|
from cosmotech.coal.utils.logger import LOGGER
|
|
32
28
|
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
|
|
8
8
|
from azure.kusto.data import KustoClient
|
|
9
9
|
from azure.kusto.data.response import KustoResponseDataSet
|
|
10
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
10
11
|
|
|
11
12
|
from cosmotech.coal.utils.logger import LOGGER
|
|
12
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def run_query(client: KustoClient, database: str, query: str) -> KustoResponseDataSet:
|
|
@@ -13,25 +13,24 @@ This module provides functions for ingesting runner data into Azure Data Explore
|
|
|
13
13
|
|
|
14
14
|
import pathlib
|
|
15
15
|
import time
|
|
16
|
-
from
|
|
17
|
-
from typing import Dict, Any, List, Tuple, Optional
|
|
16
|
+
from typing import Any, Dict
|
|
18
17
|
|
|
19
18
|
from azure.kusto.data.response import KustoResponseDataSet
|
|
20
|
-
from azure.kusto.ingest import
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
19
|
+
from azure.kusto.ingest import (
|
|
20
|
+
ColumnMapping,
|
|
21
|
+
FileDescriptor,
|
|
22
|
+
IngestionMappingKind,
|
|
23
|
+
IngestionProperties,
|
|
24
|
+
IngestionResult,
|
|
25
|
+
QueuedIngestClient,
|
|
26
|
+
ReportLevel,
|
|
27
|
+
)
|
|
28
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
29
29
|
|
|
30
30
|
from cosmotech.coal.azure.adx.auth import initialize_clients
|
|
31
|
-
from cosmotech.coal.azure.adx.
|
|
32
|
-
from cosmotech.coal.azure.adx.
|
|
31
|
+
from cosmotech.coal.azure.adx.ingestion import IngestionStatus, check_ingestion_status
|
|
32
|
+
from cosmotech.coal.azure.adx.query import run_query
|
|
33
33
|
from cosmotech.coal.utils.logger import LOGGER
|
|
34
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
35
34
|
|
|
36
35
|
|
|
37
36
|
def prepare_csv_content(folder_path: str) -> Dict[str, Dict[str, Any]]:
|