cosmotech-acceleration-library 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosmotech/coal/__init__.py +1 -1
- cosmotech/coal/aws/__init__.py +1 -9
- cosmotech/coal/aws/s3.py +181 -214
- cosmotech/coal/azure/adx/auth.py +2 -2
- cosmotech/coal/azure/adx/runner.py +13 -14
- cosmotech/coal/azure/adx/store.py +5 -86
- cosmotech/coal/azure/adx/tables.py +2 -2
- cosmotech/coal/azure/blob.py +6 -6
- cosmotech/coal/azure/storage.py +3 -3
- cosmotech/coal/cosmotech_api/__init__.py +0 -24
- cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
- cosmotech/coal/cosmotech_api/apis/dataset.py +103 -0
- cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
- cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
- cosmotech/coal/cosmotech_api/apis/run.py +38 -0
- cosmotech/coal/cosmotech_api/apis/runner.py +71 -0
- cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
- cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
- cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
- cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
- cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
- cosmotech/coal/postgresql/runner.py +56 -36
- cosmotech/coal/postgresql/store.py +60 -14
- cosmotech/coal/postgresql/utils.py +254 -0
- cosmotech/coal/store/output/__init__.py +0 -0
- cosmotech/coal/store/output/aws_channel.py +73 -0
- cosmotech/coal/store/output/az_storage_channel.py +42 -0
- cosmotech/coal/store/output/channel_interface.py +23 -0
- cosmotech/coal/store/output/channel_spliter.py +55 -0
- cosmotech/coal/store/output/postgres_channel.py +40 -0
- cosmotech/coal/utils/configuration.py +169 -0
- cosmotech/coal/utils/decorator.py +22 -0
- cosmotech/csm_data/commands/api/api.py +6 -19
- cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
- cosmotech/csm_data/commands/api/run_load_data.py +7 -46
- cosmotech/csm_data/commands/api/wsf_load_file.py +14 -15
- cosmotech/csm_data/commands/api/wsf_send_file.py +12 -13
- cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
- cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
- cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
- cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
- cosmotech/csm_data/commands/store/output.py +35 -0
- cosmotech/csm_data/commands/store/store.py +3 -4
- cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
- cosmotech/translation/coal/en-US/coal/services/dataset.yml +4 -14
- cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
- cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
- cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
- cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/METADATA +8 -9
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/RECORD +55 -71
- cosmotech/coal/azure/functions.py +0 -72
- cosmotech/coal/cosmotech_api/connection.py +0 -96
- cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
- cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
- cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
- cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
- cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
- cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -216
- cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -188
- cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
- cosmotech/coal/cosmotech_api/parameters.py +0 -48
- cosmotech/coal/cosmotech_api/run.py +0 -25
- cosmotech/coal/cosmotech_api/run_data.py +0 -173
- cosmotech/coal/cosmotech_api/run_template.py +0 -108
- cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
- cosmotech/coal/cosmotech_api/runner/data.py +0 -38
- cosmotech/coal/cosmotech_api/runner/datasets.py +0 -364
- cosmotech/coal/cosmotech_api/runner/download.py +0 -146
- cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
- cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
- cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
- cosmotech/coal/cosmotech_api/workspace.py +0 -127
- cosmotech/coal/utils/api.py +0 -68
- cosmotech/coal/utils/postgresql.py +0 -236
- cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
- cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
- cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
- cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
- cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
- cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
- cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
- cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
- cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/WHEEL +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/entry_points.txt +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {cosmotech_acceleration_library-1.0.1.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/top_level.txt +0 -0
cosmotech/coal/__init__.py
CHANGED
cosmotech/coal/aws/__init__.py
CHANGED
|
@@ -12,12 +12,4 @@ This module provides functions for interacting with AWS services like S3.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
# Re-export S3 functions for easier importing
|
|
15
|
-
from cosmotech.coal.aws.s3 import
|
|
16
|
-
create_s3_client,
|
|
17
|
-
create_s3_resource,
|
|
18
|
-
upload_file,
|
|
19
|
-
upload_folder,
|
|
20
|
-
download_files,
|
|
21
|
-
upload_data_stream,
|
|
22
|
-
delete_objects,
|
|
23
|
-
)
|
|
15
|
+
from cosmotech.coal.aws.s3 import S3
|
cosmotech/coal/aws/s3.py
CHANGED
|
@@ -6,230 +6,197 @@
|
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
|
-
|
|
9
|
+
s3 bucket operations module.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
this module provides functions for interacting with S3 buckets, including
|
|
12
12
|
uploading, downloading, and deleting files.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import pathlib
|
|
16
16
|
from io import BytesIO
|
|
17
|
-
from typing import Optional, Dict, Any, List, Iterator
|
|
18
17
|
|
|
19
18
|
import boto3
|
|
19
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
20
20
|
|
|
21
|
+
from cosmotech.coal.utils.configuration import Configuration
|
|
21
22
|
from cosmotech.coal.utils.logger import LOGGER
|
|
22
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def upload_folder(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
_source_name = str(source_path)
|
|
133
|
-
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
134
|
-
if _file_path.is_file():
|
|
135
|
-
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
136
|
-
uploaded_file_name = file_prefix + _file_name
|
|
137
|
-
LOGGER.info(
|
|
138
|
-
T("coal.common.data_transfer.file_sent").format(
|
|
139
|
-
file_path=_file_path, uploaded_name=uploaded_file_name
|
|
25
|
+
class S3:
|
|
26
|
+
|
|
27
|
+
def __init__(self, configuration: Configuration):
|
|
28
|
+
self._configuration = configuration.s3
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def file_prefix(self):
|
|
32
|
+
if "bucket_prefix" in self._configuration:
|
|
33
|
+
return self._configuration.bucket_prefix
|
|
34
|
+
return ""
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def use_ssl(self):
|
|
38
|
+
if "use_ssl" in self._configuration:
|
|
39
|
+
return self._configuration.use_ssl
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def ssl_cert_bundle(self):
|
|
44
|
+
if "ssl_cert_bundle" in self._configuration:
|
|
45
|
+
return self._configuration.ssl_cert_bundle
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def access_key_id(self):
|
|
50
|
+
return self._configuration.access_key_id
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def endpoint_url(self):
|
|
54
|
+
return self._configuration.endpoint_url
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def bucket_name(self):
|
|
58
|
+
return self._configuration.bucket_name
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def secret_access_key(self):
|
|
62
|
+
return self._configuration.secret_access_key
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def output_type(self):
|
|
66
|
+
if "output_type" in self._configuration:
|
|
67
|
+
return self._configuration.output_type
|
|
68
|
+
return "csv"
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def client(self) -> boto3.client:
|
|
72
|
+
boto3_parameters = {
|
|
73
|
+
"use_ssl": self.use_ssl,
|
|
74
|
+
"endpoint_url": self.endpoint_url,
|
|
75
|
+
"aws_access_key_id": self.access_key_id,
|
|
76
|
+
"aws_secret_access_key": self.secret_access_key,
|
|
77
|
+
}
|
|
78
|
+
if self.ssl_cert_bundle:
|
|
79
|
+
boto3_parameters["verify"] = self.ssl_cert_bundle
|
|
80
|
+
|
|
81
|
+
return boto3.client("s3", **boto3_parameters)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def resource(self) -> boto3.resource:
|
|
85
|
+
boto3_parameters = {
|
|
86
|
+
"use_ssl": self.use_ssl,
|
|
87
|
+
"endpoint_url": self.endpoint_url,
|
|
88
|
+
"aws_access_key_id": self.access_key_id,
|
|
89
|
+
"aws_secret_access_key": self.secret_access_key,
|
|
90
|
+
}
|
|
91
|
+
if self.ssl_cert_bundle:
|
|
92
|
+
boto3_parameters["verify"] = self.ssl_cert_bundle
|
|
93
|
+
|
|
94
|
+
return boto3.resource("s3", **boto3_parameters)
|
|
95
|
+
|
|
96
|
+
def upload_file(self, file_path: pathlib.Path) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Upload a single file to an S3 bucket.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
file_path: Path to the file to upload
|
|
102
|
+
"""
|
|
103
|
+
uploaded_file_name = self.file_prefix + file_path.name
|
|
104
|
+
LOGGER.info(
|
|
105
|
+
T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name)
|
|
106
|
+
)
|
|
107
|
+
self.resource.Bucket(self.bucket_name).upload_file(str(file_path), uploaded_file_name)
|
|
108
|
+
|
|
109
|
+
def upload_folder(self, source_folder: str, recursive: bool = False) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Upload files from a folder to an S3 bucket.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
source_folder: Path to the folder containing files to upload
|
|
115
|
+
recursive: Whether to recursively upload files from subdirectories
|
|
116
|
+
"""
|
|
117
|
+
source_path = pathlib.Path(source_folder)
|
|
118
|
+
if not source_path.exists():
|
|
119
|
+
LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
120
|
+
raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
|
|
121
|
+
|
|
122
|
+
if source_path.is_dir():
|
|
123
|
+
_source_name = str(source_path)
|
|
124
|
+
for _file_path in source_path.glob("**/*" if recursive else "*"):
|
|
125
|
+
if _file_path.is_file():
|
|
126
|
+
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
|
|
127
|
+
uploaded_file_name = self.file_prefix + _file_name
|
|
128
|
+
LOGGER.info(
|
|
129
|
+
T("coal.common.data_transfer.file_sent").format(
|
|
130
|
+
file_path=_file_path, uploaded_name=uploaded_file_name
|
|
131
|
+
)
|
|
140
132
|
)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
s3_resource: boto3.resource,
|
|
212
|
-
file_prefix: Optional[str] = None,
|
|
213
|
-
) -> None:
|
|
214
|
-
"""
|
|
215
|
-
Delete objects from an S3 bucket, optionally filtered by prefix.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
bucket_name: Name of the S3 bucket
|
|
219
|
-
s3_resource: S3 resource object
|
|
220
|
-
file_prefix: Optional prefix to filter objects to delete
|
|
221
|
-
"""
|
|
222
|
-
bucket = s3_resource.Bucket(bucket_name)
|
|
223
|
-
|
|
224
|
-
if file_prefix:
|
|
225
|
-
bucket_files = bucket.objects.filter(Prefix=file_prefix)
|
|
226
|
-
else:
|
|
227
|
-
bucket_files = bucket.objects.all()
|
|
228
|
-
|
|
229
|
-
boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != file_prefix]
|
|
230
|
-
if boto_objects:
|
|
231
|
-
LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
|
|
232
|
-
boto_delete_request = {"Objects": boto_objects}
|
|
233
|
-
bucket.delete_objects(Delete=boto_delete_request)
|
|
234
|
-
else:
|
|
235
|
-
LOGGER.info(T("coal.services.azure_storage.no_objects"))
|
|
133
|
+
self.resource.Bucket(self.bucket_name).upload_file(str(_file_path), uploaded_file_name)
|
|
134
|
+
else:
|
|
135
|
+
self.upload_file(source_path)
|
|
136
|
+
|
|
137
|
+
def download_files(self, destination_folder: str) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Download files from an S3 bucket to a local folder.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
destination_folder: Local folder to download files to
|
|
143
|
+
"""
|
|
144
|
+
bucket = self.resource.Bucket(self.bucket_name)
|
|
145
|
+
|
|
146
|
+
pathlib.Path(destination_folder).mkdir(parents=True, exist_ok=True)
|
|
147
|
+
remove_prefix = False
|
|
148
|
+
if self.file_prefix:
|
|
149
|
+
bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
|
|
150
|
+
if self.file_prefix.endswith("/"):
|
|
151
|
+
remove_prefix = True
|
|
152
|
+
else:
|
|
153
|
+
bucket_files = bucket.objects.all()
|
|
154
|
+
for _file in bucket_files:
|
|
155
|
+
if not (path_name := str(_file.key)).endswith("/"):
|
|
156
|
+
target_file = path_name
|
|
157
|
+
if remove_prefix:
|
|
158
|
+
target_file = target_file.removeprefix(self.file_prefix)
|
|
159
|
+
output_file = f"{destination_folder}/{target_file}"
|
|
160
|
+
pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
|
161
|
+
LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
|
|
162
|
+
bucket.download_file(_file.key, output_file)
|
|
163
|
+
|
|
164
|
+
def upload_data_stream(self, data_stream: BytesIO, file_name: str) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Upload a data stream to an S3 bucket.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
data_stream: BytesIO stream containing the data to upload
|
|
170
|
+
file_name: Name of the file to create in the bucket
|
|
171
|
+
"""
|
|
172
|
+
uploaded_file_name = self.file_prefix + file_name
|
|
173
|
+
data_stream.seek(0)
|
|
174
|
+
size = len(data_stream.read())
|
|
175
|
+
data_stream.seek(0)
|
|
176
|
+
|
|
177
|
+
LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
|
|
178
|
+
self.client.upload_fileobj(data_stream, self.bucket_name, uploaded_file_name)
|
|
179
|
+
|
|
180
|
+
def delete_objects(self) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Delete objects from an S3 bucket, optionally filtered by prefix.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
bucket_name: Name of the S3 bucket
|
|
186
|
+
s3_resource: S3 resource object
|
|
187
|
+
file_prefix: Optional prefix to filter objects to delete
|
|
188
|
+
"""
|
|
189
|
+
bucket = self.resource.Bucket(self.bucket_name)
|
|
190
|
+
|
|
191
|
+
if self.file_prefix:
|
|
192
|
+
bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
|
|
193
|
+
else:
|
|
194
|
+
bucket_files = bucket.objects.all()
|
|
195
|
+
|
|
196
|
+
boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != self.file_prefix]
|
|
197
|
+
if boto_objects:
|
|
198
|
+
LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
|
|
199
|
+
boto_delete_request = {"Objects": boto_objects}
|
|
200
|
+
bucket.delete_objects(Delete=boto_delete_request)
|
|
201
|
+
else:
|
|
202
|
+
LOGGER.info(T("coal.services.azure_storage.no_objects"))
|
cosmotech/coal/azure/adx/auth.py
CHANGED
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import Optional, Tuple
|
|
10
10
|
|
|
11
11
|
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
|
|
12
12
|
from azure.kusto.ingest import QueuedIngestClient
|
|
13
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
13
14
|
|
|
14
15
|
from cosmotech.coal.utils.logger import LOGGER
|
|
15
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def create_kusto_client(
|
|
@@ -13,25 +13,24 @@ This module provides functions for ingesting runner data into Azure Data Explore
|
|
|
13
13
|
|
|
14
14
|
import pathlib
|
|
15
15
|
import time
|
|
16
|
-
from
|
|
17
|
-
from typing import Dict, Any, List, Tuple, Optional
|
|
16
|
+
from typing import Any, Dict
|
|
18
17
|
|
|
19
18
|
from azure.kusto.data.response import KustoResponseDataSet
|
|
20
|
-
from azure.kusto.ingest import
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
19
|
+
from azure.kusto.ingest import (
|
|
20
|
+
ColumnMapping,
|
|
21
|
+
FileDescriptor,
|
|
22
|
+
IngestionMappingKind,
|
|
23
|
+
IngestionProperties,
|
|
24
|
+
IngestionResult,
|
|
25
|
+
QueuedIngestClient,
|
|
26
|
+
ReportLevel,
|
|
27
|
+
)
|
|
28
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
29
29
|
|
|
30
30
|
from cosmotech.coal.azure.adx.auth import initialize_clients
|
|
31
|
-
from cosmotech.coal.azure.adx.
|
|
32
|
-
from cosmotech.coal.azure.adx.
|
|
31
|
+
from cosmotech.coal.azure.adx.ingestion import IngestionStatus, check_ingestion_status
|
|
32
|
+
from cosmotech.coal.azure.adx.query import run_query
|
|
33
33
|
from cosmotech.coal.utils.logger import LOGGER
|
|
34
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
35
34
|
|
|
36
35
|
|
|
37
36
|
def prepare_csv_content(folder_path: str) -> Dict[str, Dict[str, Any]]:
|
|
@@ -7,26 +7,22 @@
|
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
9
|
import tempfile
|
|
10
|
+
import time
|
|
10
11
|
import uuid
|
|
11
|
-
from typing import
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
12
13
|
|
|
13
14
|
import pyarrow
|
|
14
15
|
import pyarrow.csv as pc
|
|
15
|
-
import time
|
|
16
16
|
from azure.kusto.data import KustoClient
|
|
17
17
|
from azure.kusto.data.data_format import DataFormat
|
|
18
|
-
from azure.kusto.ingest import IngestionProperties
|
|
19
|
-
from azure.kusto.ingest import QueuedIngestClient
|
|
20
|
-
from azure.kusto.ingest import ReportLevel
|
|
18
|
+
from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
|
|
21
19
|
from cosmotech.orchestrator.utils.translate import T
|
|
22
|
-
from time import perf_counter
|
|
23
20
|
|
|
24
|
-
from cosmotech.coal.azure.adx.tables import check_and_create_table, _drop_by_tag
|
|
25
21
|
from cosmotech.coal.azure.adx.auth import initialize_clients
|
|
26
|
-
from cosmotech.coal.azure.adx.ingestion import
|
|
22
|
+
from cosmotech.coal.azure.adx.ingestion import handle_failures, monitor_ingestion
|
|
23
|
+
from cosmotech.coal.azure.adx.tables import _drop_by_tag, check_and_create_table
|
|
27
24
|
from cosmotech.coal.store.store import Store
|
|
28
25
|
from cosmotech.coal.utils.logger import LOGGER
|
|
29
|
-
from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
|
|
30
26
|
|
|
31
27
|
|
|
32
28
|
def send_table_data(
|
|
@@ -176,80 +172,3 @@ def send_store_to_adx(
|
|
|
176
172
|
LOGGER.warning(T("coal.services.adx.dropping_data").format(operation_tag=operation_tag))
|
|
177
173
|
_drop_by_tag(kusto_client, database, operation_tag)
|
|
178
174
|
raise e
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def dump_store_to_adx(
|
|
182
|
-
store_folder: str,
|
|
183
|
-
postgres_host: str,
|
|
184
|
-
postgres_port: int,
|
|
185
|
-
postgres_db: str,
|
|
186
|
-
postgres_schema: str,
|
|
187
|
-
postgres_user: str,
|
|
188
|
-
postgres_password: str,
|
|
189
|
-
table_prefix: str = "Cosmotech_",
|
|
190
|
-
replace: bool = True,
|
|
191
|
-
) -> None:
|
|
192
|
-
"""
|
|
193
|
-
Dump Store data to an Azure Data Explorer database.
|
|
194
|
-
|
|
195
|
-
Args:
|
|
196
|
-
store_folder: Folder containing the Store
|
|
197
|
-
postgres_host: PostgreSQL host
|
|
198
|
-
postgres_port: PostgreSQL port
|
|
199
|
-
postgres_db: PostgreSQL database name
|
|
200
|
-
postgres_schema: PostgreSQL schema
|
|
201
|
-
postgres_user: PostgreSQL username
|
|
202
|
-
postgres_password: PostgreSQL password
|
|
203
|
-
table_prefix: Table prefix
|
|
204
|
-
replace: Whether to replace existing tables
|
|
205
|
-
"""
|
|
206
|
-
_s = Store(store_location=store_folder)
|
|
207
|
-
|
|
208
|
-
tables = list(_s.list_tables())
|
|
209
|
-
if len(tables):
|
|
210
|
-
LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
|
|
211
|
-
total_rows = 0
|
|
212
|
-
_process_start = perf_counter()
|
|
213
|
-
for table_name in tables:
|
|
214
|
-
_s_time = perf_counter()
|
|
215
|
-
target_table_name = f"{table_prefix}{table_name}"
|
|
216
|
-
LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
|
|
217
|
-
data = _s.get_table(table_name)
|
|
218
|
-
if not len(data):
|
|
219
|
-
LOGGER.info(T("coal.services.database.no_rows"))
|
|
220
|
-
continue
|
|
221
|
-
_dl_time = perf_counter()
|
|
222
|
-
rows = send_pyarrow_table_to_postgresql(
|
|
223
|
-
data,
|
|
224
|
-
target_table_name,
|
|
225
|
-
postgres_host,
|
|
226
|
-
postgres_port,
|
|
227
|
-
postgres_db,
|
|
228
|
-
postgres_schema,
|
|
229
|
-
postgres_user,
|
|
230
|
-
postgres_password,
|
|
231
|
-
replace,
|
|
232
|
-
)
|
|
233
|
-
total_rows += rows
|
|
234
|
-
_up_time = perf_counter()
|
|
235
|
-
LOGGER.info(T("coal.services.database.row_count").format(count=rows))
|
|
236
|
-
LOGGER.debug(
|
|
237
|
-
T("coal.common.timing.operation_completed").format(
|
|
238
|
-
operation="Load from datastore", time=f"{_dl_time - _s_time:0.3}"
|
|
239
|
-
)
|
|
240
|
-
)
|
|
241
|
-
LOGGER.debug(
|
|
242
|
-
T("coal.common.timing.operation_completed").format(
|
|
243
|
-
operation="Send to postgresql", time=f"{_up_time - _dl_time:0.3}"
|
|
244
|
-
)
|
|
245
|
-
)
|
|
246
|
-
_process_end = perf_counter()
|
|
247
|
-
LOGGER.info(
|
|
248
|
-
T("coal.services.database.rows_fetched").format(
|
|
249
|
-
table="all tables",
|
|
250
|
-
count=total_rows,
|
|
251
|
-
time=f"{_process_end - _process_start:0.3}",
|
|
252
|
-
)
|
|
253
|
-
)
|
|
254
|
-
else:
|
|
255
|
-
LOGGER.info(T("coal.services.database.store_empty"))
|
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
# etc., to any person is prohibited unless it has been previously and
|
|
6
6
|
# specifically authorized by written means by Cosmo Tech.
|
|
7
7
|
|
|
8
|
-
from typing import Dict
|
|
8
|
+
from typing import Dict
|
|
9
9
|
|
|
10
10
|
import pyarrow
|
|
11
11
|
from azure.kusto.data import KustoClient
|
|
12
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
12
13
|
|
|
13
14
|
from cosmotech.coal.utils.logger import LOGGER
|
|
14
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def table_exists(client: KustoClient, database: str, table_name: str) -> bool:
|
cosmotech/coal/azure/blob.py
CHANGED
|
@@ -12,19 +12,16 @@ This module provides functions for interacting with Azure Blob Storage,
|
|
|
12
12
|
including uploading data from the Store.
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
import pathlib
|
|
16
15
|
from io import BytesIO
|
|
17
|
-
from typing import List, Optional
|
|
18
|
-
|
|
19
|
-
from azure.identity import ClientSecretCredential
|
|
20
|
-
from azure.storage.blob import BlobServiceClient
|
|
21
16
|
|
|
22
17
|
import pyarrow.csv as pc
|
|
23
18
|
import pyarrow.parquet as pq
|
|
19
|
+
from azure.identity import ClientSecretCredential
|
|
20
|
+
from azure.storage.blob import BlobServiceClient
|
|
21
|
+
from cosmotech.orchestrator.utils.translate import T
|
|
24
22
|
|
|
25
23
|
from cosmotech.coal.store.store import Store
|
|
26
24
|
from cosmotech.coal.utils.logger import LOGGER
|
|
27
|
-
from cosmotech.orchestrator.utils.translate import T
|
|
28
25
|
|
|
29
26
|
VALID_TYPES = (
|
|
30
27
|
"sqlite",
|
|
@@ -42,6 +39,7 @@ def dump_store_to_azure(
|
|
|
42
39
|
client_secret: str,
|
|
43
40
|
output_type: str = "sqlite",
|
|
44
41
|
file_prefix: str = "",
|
|
42
|
+
selected_tables: list[str] = [],
|
|
45
43
|
) -> None:
|
|
46
44
|
"""
|
|
47
45
|
Dump Store data to Azure Blob Storage.
|
|
@@ -90,6 +88,8 @@ def dump_store_to_azure(
|
|
|
90
88
|
container_client.upload_blob(name=_uploaded_file_name, data=data, overwrite=True)
|
|
91
89
|
else:
|
|
92
90
|
tables = list(_s.list_tables())
|
|
91
|
+
if selected_tables:
|
|
92
|
+
tables = [t for t in tables if t in selected_tables]
|
|
93
93
|
for table_name in tables:
|
|
94
94
|
_data_stream = BytesIO()
|
|
95
95
|
_file_name = None
|