digitalhub 0.13.4__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +3 -8
- digitalhub/context/api.py +1 -5
- digitalhub/context/builder.py +1 -5
- digitalhub/context/context.py +2 -9
- digitalhub/entities/_base/_base/entity.py +0 -15
- digitalhub/entities/_base/context/entity.py +1 -1
- digitalhub/entities/_base/entity/builder.py +5 -5
- digitalhub/entities/_base/entity/entity.py +0 -8
- digitalhub/entities/_base/executable/entity.py +169 -79
- digitalhub/entities/_base/material/entity.py +6 -22
- digitalhub/entities/_base/material/utils.py +1 -4
- digitalhub/entities/_base/runtime_entity/builder.py +53 -18
- digitalhub/entities/_base/unversioned/entity.py +1 -1
- digitalhub/entities/_base/versioned/entity.py +1 -1
- digitalhub/entities/_commons/enums.py +1 -31
- digitalhub/entities/_commons/utils.py +83 -21
- digitalhub/entities/_constructors/_resources.py +151 -0
- digitalhub/entities/{_base/entity/_constructors → _constructors}/name.py +18 -0
- digitalhub/entities/_processors/base/__init__.py +3 -0
- digitalhub/entities/_processors/{base.py → base/crud.py} +14 -226
- digitalhub/entities/_processors/base/import_export.py +123 -0
- digitalhub/entities/_processors/base/processor.py +302 -0
- digitalhub/entities/_processors/base/special_ops.py +108 -0
- digitalhub/entities/_processors/context/__init__.py +3 -0
- digitalhub/entities/_processors/context/crud.py +652 -0
- digitalhub/entities/_processors/context/import_export.py +242 -0
- digitalhub/entities/_processors/context/material.py +123 -0
- digitalhub/entities/_processors/context/processor.py +400 -0
- digitalhub/entities/_processors/context/special_ops.py +476 -0
- digitalhub/entities/_processors/processors.py +12 -0
- digitalhub/entities/_processors/utils.py +12 -11
- digitalhub/entities/artifact/crud.py +58 -22
- digitalhub/entities/artifact/utils.py +3 -3
- digitalhub/entities/dataitem/crud.py +63 -20
- digitalhub/entities/dataitem/table/entity.py +24 -22
- digitalhub/entities/dataitem/utils.py +15 -15
- digitalhub/entities/function/_base/entity.py +3 -3
- digitalhub/entities/function/crud.py +55 -24
- digitalhub/entities/model/_base/entity.py +62 -20
- digitalhub/entities/model/crud.py +58 -22
- digitalhub/entities/model/utils.py +3 -3
- digitalhub/entities/project/_base/entity.py +321 -152
- digitalhub/entities/project/crud.py +15 -23
- digitalhub/entities/run/_base/builder.py +0 -4
- digitalhub/entities/run/_base/entity.py +70 -63
- digitalhub/entities/run/crud.py +79 -26
- digitalhub/entities/secret/_base/entity.py +1 -5
- digitalhub/entities/secret/crud.py +29 -26
- digitalhub/entities/task/_base/builder.py +0 -4
- digitalhub/entities/task/_base/entity.py +5 -5
- digitalhub/entities/task/_base/models.py +13 -16
- digitalhub/entities/task/crud.py +61 -29
- digitalhub/entities/trigger/_base/entity.py +1 -5
- digitalhub/entities/trigger/crud.py +64 -24
- digitalhub/entities/workflow/_base/entity.py +3 -3
- digitalhub/entities/workflow/crud.py +55 -21
- digitalhub/factory/entity.py +283 -0
- digitalhub/factory/enums.py +18 -0
- digitalhub/factory/registry.py +197 -0
- digitalhub/factory/runtime.py +44 -0
- digitalhub/factory/utils.py +3 -54
- digitalhub/runtimes/_base.py +2 -2
- digitalhub/stores/client/_base/enums.py +39 -0
- digitalhub/stores/client/_base/key_builder.py +2 -2
- digitalhub/stores/client/_base/params_builder.py +48 -0
- digitalhub/stores/client/api.py +6 -10
- digitalhub/stores/client/builder.py +4 -4
- digitalhub/stores/client/dhcore/api_builder.py +2 -1
- digitalhub/stores/client/dhcore/client.py +85 -429
- digitalhub/stores/client/dhcore/configurator.py +109 -328
- digitalhub/stores/client/dhcore/enums.py +0 -16
- digitalhub/stores/client/dhcore/error_parser.py +0 -4
- digitalhub/stores/client/dhcore/header_manager.py +61 -0
- digitalhub/stores/client/dhcore/http_handler.py +133 -0
- digitalhub/stores/client/dhcore/params_builder.py +147 -134
- digitalhub/stores/client/dhcore/response_processor.py +102 -0
- digitalhub/stores/client/dhcore/utils.py +6 -72
- digitalhub/stores/client/local/api_builder.py +1 -1
- digitalhub/stores/client/local/client.py +79 -47
- digitalhub/stores/client/local/params_builder.py +18 -41
- digitalhub/stores/credentials/api.py +0 -4
- digitalhub/stores/credentials/configurator.py +2 -28
- digitalhub/stores/credentials/enums.py +3 -0
- digitalhub/stores/credentials/handler.py +0 -12
- digitalhub/stores/credentials/ini_module.py +0 -22
- digitalhub/stores/credentials/store.py +0 -4
- digitalhub/stores/data/_base/store.py +0 -16
- digitalhub/stores/data/builder.py +1 -5
- digitalhub/stores/data/local/store.py +0 -103
- digitalhub/stores/data/remote/store.py +0 -4
- digitalhub/stores/data/s3/configurator.py +60 -14
- digitalhub/stores/data/s3/store.py +49 -16
- digitalhub/stores/data/sql/configurator.py +0 -8
- digitalhub/stores/data/sql/store.py +21 -10
- digitalhub/stores/readers/data/factory.py +0 -8
- digitalhub/stores/readers/data/pandas/reader.py +0 -16
- digitalhub/utils/file_utils.py +0 -17
- digitalhub/utils/generic_utils.py +0 -12
- digitalhub/utils/git_utils.py +0 -8
- digitalhub/utils/io_utils.py +0 -12
- digitalhub/utils/store_utils.py +44 -0
- {digitalhub-0.13.4.dist-info → digitalhub-0.14.0.dist-info}/METADATA +3 -2
- {digitalhub-0.13.4.dist-info → digitalhub-0.14.0.dist-info}/RECORD +111 -95
- digitalhub/entities/_processors/context.py +0 -1450
- digitalhub/entities/task/_base/utils.py +0 -22
- digitalhub/factory/factory.py +0 -381
- digitalhub/stores/client/dhcore/models.py +0 -40
- digitalhub/stores/data/s3/utils.py +0 -78
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/__init__.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/metadata.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/spec.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/status.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/uuid.py +0 -0
- {digitalhub-0.13.4.dist-info → digitalhub-0.14.0.dist-info}/WHEEL +0 -0
- {digitalhub-0.13.4.dist-info → digitalhub-0.14.0.dist-info}/licenses/AUTHORS +0 -0
- {digitalhub-0.13.4.dist-info → digitalhub-0.14.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -114,10 +114,6 @@ class Store:
|
|
|
114
114
|
src : str
|
|
115
115
|
The source path.
|
|
116
116
|
|
|
117
|
-
Returns
|
|
118
|
-
-------
|
|
119
|
-
None
|
|
120
|
-
|
|
121
117
|
Raises
|
|
122
118
|
------
|
|
123
119
|
StoreError
|
|
@@ -135,10 +131,6 @@ class Store:
|
|
|
135
131
|
dst : str
|
|
136
132
|
The destination path.
|
|
137
133
|
|
|
138
|
-
Returns
|
|
139
|
-
-------
|
|
140
|
-
None
|
|
141
|
-
|
|
142
134
|
Raises
|
|
143
135
|
------
|
|
144
136
|
StoreError
|
|
@@ -158,10 +150,6 @@ class Store:
|
|
|
158
150
|
overwrite : bool
|
|
159
151
|
Specify if overwrite an existing file.
|
|
160
152
|
|
|
161
|
-
Returns
|
|
162
|
-
-------
|
|
163
|
-
None
|
|
164
|
-
|
|
165
153
|
Raises
|
|
166
154
|
------
|
|
167
155
|
StoreError
|
|
@@ -179,10 +167,6 @@ class Store:
|
|
|
179
167
|
----------
|
|
180
168
|
path : str | Path
|
|
181
169
|
The path to build.
|
|
182
|
-
|
|
183
|
-
Returns
|
|
184
|
-
-------
|
|
185
|
-
None
|
|
186
170
|
"""
|
|
187
171
|
if not isinstance(path, Path):
|
|
188
172
|
path = Path(path)
|
|
@@ -79,14 +79,10 @@ class StoreBuilder:
|
|
|
79
79
|
The unique identifier for the store type (e.g., 's3', 'sql').
|
|
80
80
|
store : Store
|
|
81
81
|
The store class to register for this type.
|
|
82
|
-
configurator : Configurator
|
|
82
|
+
configurator : Configurator
|
|
83
83
|
The configurator class for store configuration.
|
|
84
84
|
If None, the store will be instantiated without configuration.
|
|
85
85
|
|
|
86
|
-
Returns
|
|
87
|
-
-------
|
|
88
|
-
None
|
|
89
|
-
|
|
90
86
|
Raises
|
|
91
87
|
------
|
|
92
88
|
StoreError
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
-
import shutil
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
from typing import Any
|
|
10
9
|
|
|
@@ -180,105 +179,3 @@ class LocalStore(Store):
|
|
|
180
179
|
reader = get_reader_by_object(df)
|
|
181
180
|
reader.write_df(df, dst, extension=extension, **kwargs)
|
|
182
181
|
return dst
|
|
183
|
-
|
|
184
|
-
##############################
|
|
185
|
-
# Private I/O methods
|
|
186
|
-
##############################
|
|
187
|
-
|
|
188
|
-
def _get_src_dst_files(self, src: Path, dst: Path) -> list[str]:
|
|
189
|
-
"""
|
|
190
|
-
Copy files from source to destination.
|
|
191
|
-
|
|
192
|
-
Parameters
|
|
193
|
-
----------
|
|
194
|
-
src : Path
|
|
195
|
-
The source path.
|
|
196
|
-
dst : Path
|
|
197
|
-
The destination path.
|
|
198
|
-
|
|
199
|
-
Returns
|
|
200
|
-
-------
|
|
201
|
-
list[str]
|
|
202
|
-
Returns the list of destination and source paths of the
|
|
203
|
-
copied files.
|
|
204
|
-
"""
|
|
205
|
-
return [self._get_src_dst_file(i, dst) for i in src.rglob("*") if i.is_file()]
|
|
206
|
-
|
|
207
|
-
def _get_src_dst_file(self, src: Path, dst: Path) -> str:
|
|
208
|
-
"""
|
|
209
|
-
Copy file from source to destination.
|
|
210
|
-
|
|
211
|
-
Parameters
|
|
212
|
-
----------
|
|
213
|
-
src : Path
|
|
214
|
-
The source path.
|
|
215
|
-
dst : Path
|
|
216
|
-
The destination path.
|
|
217
|
-
|
|
218
|
-
Returns
|
|
219
|
-
-------
|
|
220
|
-
str
|
|
221
|
-
"""
|
|
222
|
-
dst_pth = self._copy_file(src, dst, True)
|
|
223
|
-
return str(dst_pth), str(src)
|
|
224
|
-
|
|
225
|
-
def _copy_dir(self, src: Path, dst: Path, overwrite: bool) -> list[str]:
|
|
226
|
-
"""
|
|
227
|
-
Download file from source to destination.
|
|
228
|
-
|
|
229
|
-
Parameters
|
|
230
|
-
----------
|
|
231
|
-
src : Path
|
|
232
|
-
The source path.
|
|
233
|
-
dst : Path
|
|
234
|
-
The destination path.
|
|
235
|
-
|
|
236
|
-
Returns
|
|
237
|
-
-------
|
|
238
|
-
list[str]
|
|
239
|
-
"""
|
|
240
|
-
dst = self._rebuild_path(dst, src)
|
|
241
|
-
shutil.copytree(src, dst, dirs_exist_ok=overwrite)
|
|
242
|
-
return [str(i) for i in dst.rglob("*") if i.is_file()]
|
|
243
|
-
|
|
244
|
-
def _copy_file(self, src: Path, dst: Path, overwrite: bool) -> str:
|
|
245
|
-
"""
|
|
246
|
-
Copy file from source to destination.
|
|
247
|
-
|
|
248
|
-
Parameters
|
|
249
|
-
----------
|
|
250
|
-
src : Path
|
|
251
|
-
The source path.
|
|
252
|
-
dst : Path
|
|
253
|
-
The destination path.
|
|
254
|
-
|
|
255
|
-
Returns
|
|
256
|
-
-------
|
|
257
|
-
str
|
|
258
|
-
"""
|
|
259
|
-
dst = self._rebuild_path(dst, src)
|
|
260
|
-
self._check_overwrite(dst, overwrite)
|
|
261
|
-
return str(shutil.copy2(src, dst))
|
|
262
|
-
|
|
263
|
-
def _rebuild_path(self, dst: Path, src: Path) -> Path:
|
|
264
|
-
"""
|
|
265
|
-
Rebuild path.
|
|
266
|
-
|
|
267
|
-
Parameters
|
|
268
|
-
----------
|
|
269
|
-
dst : Path
|
|
270
|
-
The destination path.
|
|
271
|
-
src : Path
|
|
272
|
-
The source path.
|
|
273
|
-
|
|
274
|
-
Returns
|
|
275
|
-
-------
|
|
276
|
-
Path
|
|
277
|
-
The rebuilt path.
|
|
278
|
-
"""
|
|
279
|
-
if dst.is_dir():
|
|
280
|
-
if src.is_absolute():
|
|
281
|
-
raise StoreError("Source must be a relative path if the destination is a directory.")
|
|
282
|
-
dst = dst / src
|
|
283
|
-
self._build_path(dst)
|
|
284
|
-
return dst
|
|
@@ -46,10 +46,6 @@ class S3StoreConfigurator(Configurator):
|
|
|
46
46
|
def load_env_vars(self) -> None:
|
|
47
47
|
"""
|
|
48
48
|
Loads the credentials from the environment variables.
|
|
49
|
-
|
|
50
|
-
Returns
|
|
51
|
-
-------
|
|
52
|
-
None
|
|
53
49
|
"""
|
|
54
50
|
env_creds = self._creds_handler.load_from_env(self.keys)
|
|
55
51
|
self._creds_handler.set_credentials(self._env, env_creds)
|
|
@@ -57,10 +53,6 @@ class S3StoreConfigurator(Configurator):
|
|
|
57
53
|
def load_file_vars(self) -> None:
|
|
58
54
|
"""
|
|
59
55
|
Loads the credentials from a file.
|
|
60
|
-
|
|
61
|
-
Returns
|
|
62
|
-
-------
|
|
63
|
-
None
|
|
64
56
|
"""
|
|
65
57
|
file_creds = self._creds_handler.load_from_file(self.keys)
|
|
66
58
|
self._creds_handler.set_credentials(self._file, file_creds)
|
|
@@ -74,12 +66,23 @@ class S3StoreConfigurator(Configurator):
|
|
|
74
66
|
dict
|
|
75
67
|
Dictionary containing S3 credentials and configuration.
|
|
76
68
|
"""
|
|
77
|
-
creds = self.
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
69
|
+
creds = self.evaluate_credentials()
|
|
70
|
+
return self.get_creds_dict(creds)
|
|
71
|
+
|
|
72
|
+
def get_creds_dict(self, creds: dict) -> dict:
|
|
73
|
+
"""
|
|
74
|
+
Returns a dictionary containing the S3 credentials.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
creds : dict
|
|
79
|
+
The credentials dictionary.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
dict
|
|
84
|
+
A dictionary containing the S3 credentials.
|
|
85
|
+
"""
|
|
83
86
|
return {
|
|
84
87
|
"endpoint_url": creds[CredsEnvVar.S3_ENDPOINT_URL.value],
|
|
85
88
|
"aws_access_key_id": creds[CredsEnvVar.S3_ACCESS_KEY_ID.value],
|
|
@@ -91,6 +94,49 @@ class S3StoreConfigurator(Configurator):
|
|
|
91
94
|
),
|
|
92
95
|
}
|
|
93
96
|
|
|
97
|
+
def evaluate_credentials(self) -> dict:
|
|
98
|
+
"""
|
|
99
|
+
Evaluates and returns the current valid credentials.
|
|
100
|
+
If the credentials are expired and were loaded from file,
|
|
101
|
+
it refreshes them.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
dict
|
|
106
|
+
The current valid credentials.
|
|
107
|
+
"""
|
|
108
|
+
creds = self.get_credentials(self._origin)
|
|
109
|
+
expired = creds[CredsEnvVar.S3_CREDENTIALS_EXPIRATION.value]
|
|
110
|
+
if self._origin == self._file and self._is_expired(expired):
|
|
111
|
+
refresh_token()
|
|
112
|
+
self.load_file_vars()
|
|
113
|
+
creds = self.get_credentials(self._origin)
|
|
114
|
+
return creds
|
|
115
|
+
|
|
116
|
+
def get_file_config(self) -> dict:
|
|
117
|
+
"""
|
|
118
|
+
Returns the credentials loaded from file.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
dict
|
|
123
|
+
The credentials loaded from file.
|
|
124
|
+
"""
|
|
125
|
+
creds = self.get_credentials(self._file)
|
|
126
|
+
return self.get_creds_dict(creds)
|
|
127
|
+
|
|
128
|
+
def get_env_config(self) -> dict:
|
|
129
|
+
"""
|
|
130
|
+
Returns the credentials loaded from environment variables.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
dict
|
|
135
|
+
The credentials loaded from environment variables.
|
|
136
|
+
"""
|
|
137
|
+
creds = self.get_credentials(self._env)
|
|
138
|
+
return self.get_creds_dict(creds)
|
|
139
|
+
|
|
94
140
|
@staticmethod
|
|
95
141
|
def _is_expired(timestamp: str | None) -> bool:
|
|
96
142
|
"""
|
|
@@ -16,7 +16,6 @@ from boto3.s3.transfer import TransferConfig
|
|
|
16
16
|
from botocore.exceptions import ClientError, NoCredentialsError
|
|
17
17
|
|
|
18
18
|
from digitalhub.stores.data._base.store import Store
|
|
19
|
-
from digitalhub.stores.data.s3.utils import get_bucket_name
|
|
20
19
|
from digitalhub.stores.readers.data.api import get_reader_by_object
|
|
21
20
|
from digitalhub.utils.exceptions import ConfigError, StoreError
|
|
22
21
|
from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
|
|
@@ -30,6 +29,8 @@ if typing.TYPE_CHECKING:
|
|
|
30
29
|
# Type aliases
|
|
31
30
|
S3Client = Type["botocore.client.S3"]
|
|
32
31
|
|
|
32
|
+
MULTIPART_THRESHOLD = 100 * 1024 * 1024
|
|
33
|
+
|
|
33
34
|
|
|
34
35
|
class S3Store(Store):
|
|
35
36
|
"""
|
|
@@ -332,10 +333,50 @@ class S3Store(Store):
|
|
|
332
333
|
str
|
|
333
334
|
The S3 path where the dataframe was saved.
|
|
334
335
|
"""
|
|
335
|
-
fileobj = BytesIO()
|
|
336
336
|
reader = get_reader_by_object(df)
|
|
337
|
-
|
|
338
|
-
|
|
337
|
+
with BytesIO() as fileobj:
|
|
338
|
+
reader.write_df(df, fileobj, extension=extension, **kwargs)
|
|
339
|
+
fileobj.seek(0)
|
|
340
|
+
return self.upload_fileobject(fileobj, dst)
|
|
341
|
+
|
|
342
|
+
##############################
|
|
343
|
+
# Wrapper methods
|
|
344
|
+
##############################
|
|
345
|
+
|
|
346
|
+
def get_s3_source(self, src: str, filename: Path) -> None:
|
|
347
|
+
"""
|
|
348
|
+
Download a file from S3 and save it to a local file.
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
src : str
|
|
353
|
+
S3 path of the object to be downloaded (e.g., 's3://bucket
|
|
354
|
+
filename : Path
|
|
355
|
+
Local path where the downloaded object will be saved.
|
|
356
|
+
"""
|
|
357
|
+
client, bucket = self._check_factory(src)
|
|
358
|
+
key = self._get_key(src)
|
|
359
|
+
self._download_file(key, filename, client, bucket)
|
|
360
|
+
|
|
361
|
+
def get_s3_client(self, file: bool = True) -> S3Client:
|
|
362
|
+
"""
|
|
363
|
+
Get an S3 client object.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
file : bool
|
|
368
|
+
Whether to use file-based credentials. Default is True.
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
S3Client
|
|
373
|
+
Returns a client object that interacts with the S3 storage service.
|
|
374
|
+
"""
|
|
375
|
+
if file:
|
|
376
|
+
cfg = self._configurator.get_file_config()
|
|
377
|
+
else:
|
|
378
|
+
cfg = self._configurator.get_env_config()
|
|
379
|
+
return self._get_client(cfg)
|
|
339
380
|
|
|
340
381
|
##############################
|
|
341
382
|
# Private I/O methods
|
|
@@ -533,10 +574,6 @@ class S3Store(Store):
|
|
|
533
574
|
The S3 client object.
|
|
534
575
|
bucket : str
|
|
535
576
|
The name of the S3 bucket.
|
|
536
|
-
|
|
537
|
-
Returns
|
|
538
|
-
-------
|
|
539
|
-
None
|
|
540
577
|
"""
|
|
541
578
|
extra_args = {}
|
|
542
579
|
mime_type = get_file_mime_type(src)
|
|
@@ -547,7 +584,7 @@ class S3Store(Store):
|
|
|
547
584
|
Bucket=bucket,
|
|
548
585
|
Key=key,
|
|
549
586
|
ExtraArgs=extra_args,
|
|
550
|
-
Config=TransferConfig(multipart_threshold=
|
|
587
|
+
Config=TransferConfig(multipart_threshold=MULTIPART_THRESHOLD),
|
|
551
588
|
)
|
|
552
589
|
|
|
553
590
|
@staticmethod
|
|
@@ -570,16 +607,12 @@ class S3Store(Store):
|
|
|
570
607
|
The S3 client object.
|
|
571
608
|
bucket : str
|
|
572
609
|
The name of the S3 bucket.
|
|
573
|
-
|
|
574
|
-
Returns
|
|
575
|
-
-------
|
|
576
|
-
None
|
|
577
610
|
"""
|
|
578
611
|
client.upload_fileobj(
|
|
579
612
|
Fileobj=fileobj,
|
|
580
613
|
Bucket=bucket,
|
|
581
614
|
Key=key,
|
|
582
|
-
Config=TransferConfig(multipart_threshold=
|
|
615
|
+
Config=TransferConfig(multipart_threshold=MULTIPART_THRESHOLD),
|
|
583
616
|
)
|
|
584
617
|
|
|
585
618
|
##############################
|
|
@@ -595,7 +628,7 @@ class S3Store(Store):
|
|
|
595
628
|
str
|
|
596
629
|
The name of the S3 bucket.
|
|
597
630
|
"""
|
|
598
|
-
return
|
|
631
|
+
return urlparse(root).netloc
|
|
599
632
|
|
|
600
633
|
def _get_client(self, cfg: dict) -> S3Client:
|
|
601
634
|
"""
|
|
@@ -621,7 +654,7 @@ class S3Store(Store):
|
|
|
621
654
|
----------
|
|
622
655
|
s3_path : str
|
|
623
656
|
Path to the S3 bucket (e.g., 's3://bucket/path').
|
|
624
|
-
retry : bool
|
|
657
|
+
retry : bool
|
|
625
658
|
Whether to retry the operation if a ConfigError is raised. Default is True.
|
|
626
659
|
|
|
627
660
|
Returns
|
|
@@ -56,10 +56,6 @@ class SqlStoreConfigurator(Configurator):
|
|
|
56
56
|
Retrieves SQL database connection credentials from the system
|
|
57
57
|
environment variables and stores them in the configurator's
|
|
58
58
|
credential handler for use in database connections.
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
None
|
|
63
59
|
"""
|
|
64
60
|
env_creds = self._creds_handler.load_from_env(self.keys)
|
|
65
61
|
self._creds_handler.set_credentials(self._env, env_creds)
|
|
@@ -71,10 +67,6 @@ class SqlStoreConfigurator(Configurator):
|
|
|
71
67
|
Retrieves SQL database connection credentials from a
|
|
72
68
|
configuration file and stores them in the configurator's
|
|
73
69
|
credential handler for use in database connections.
|
|
74
|
-
|
|
75
|
-
Returns
|
|
76
|
-
-------
|
|
77
|
-
None
|
|
78
70
|
"""
|
|
79
71
|
file_creds = self._creds_handler.load_from_file(self.keys)
|
|
80
72
|
self._creds_handler.set_credentials(self._file, file_creds)
|
|
@@ -159,9 +159,9 @@ class SqlStore(Store):
|
|
|
159
159
|
path : SourcesOrListOfSources
|
|
160
160
|
The SQL URI path to read from in the format
|
|
161
161
|
'sql://database/schema/table'. Only single paths are supported.
|
|
162
|
-
file_format : str
|
|
162
|
+
file_format : str
|
|
163
163
|
File format specification (not used for SQL operations).
|
|
164
|
-
engine : str
|
|
164
|
+
engine : str
|
|
165
165
|
DataFrame engine to use (e.g., 'pandas', 'polars').
|
|
166
166
|
If None, uses the default engine.
|
|
167
167
|
**kwargs : dict
|
|
@@ -209,7 +209,7 @@ class SqlStore(Store):
|
|
|
209
209
|
path : str
|
|
210
210
|
The SQL URI path specifying the database connection
|
|
211
211
|
in the format 'sql://database/schema/table'.
|
|
212
|
-
engine : str
|
|
212
|
+
engine : str
|
|
213
213
|
DataFrame engine to use for result processing
|
|
214
214
|
(e.g., 'pandas', 'polars'). If None, uses the default.
|
|
215
215
|
|
|
@@ -238,7 +238,7 @@ class SqlStore(Store):
|
|
|
238
238
|
dst : str
|
|
239
239
|
The destination SQL URI in the format
|
|
240
240
|
'sql://database/schema/table' or 'sql://database/table'.
|
|
241
|
-
extension : str
|
|
241
|
+
extension : str
|
|
242
242
|
File extension parameter (not used for SQL operations).
|
|
243
243
|
**kwargs : dict
|
|
244
244
|
Additional keyword arguments passed to the DataFrame's
|
|
@@ -253,6 +253,21 @@ class SqlStore(Store):
|
|
|
253
253
|
table = self._get_table_name(dst)
|
|
254
254
|
return self._upload_table(df, schema, table, **kwargs)
|
|
255
255
|
|
|
256
|
+
##############################
|
|
257
|
+
# Wrapper methods
|
|
258
|
+
##############################
|
|
259
|
+
|
|
260
|
+
def get_engine(self, schema: str | None = None) -> Engine:
|
|
261
|
+
"""
|
|
262
|
+
Get a SQLAlchemy engine connected to the database.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
Engine
|
|
267
|
+
A SQLAlchemy engine instance connected to the database.
|
|
268
|
+
"""
|
|
269
|
+
return self._check_factory(schema=schema)
|
|
270
|
+
|
|
256
271
|
##############################
|
|
257
272
|
# Private I/O methods
|
|
258
273
|
##############################
|
|
@@ -359,7 +374,7 @@ class SqlStore(Store):
|
|
|
359
374
|
|
|
360
375
|
Parameters
|
|
361
376
|
----------
|
|
362
|
-
schema : str
|
|
377
|
+
schema : str
|
|
363
378
|
The database schema to set in the search path.
|
|
364
379
|
If provided, sets the PostgreSQL search_path option.
|
|
365
380
|
|
|
@@ -397,7 +412,7 @@ class SqlStore(Store):
|
|
|
397
412
|
retry : bool, default True
|
|
398
413
|
Whether to attempt a retry with different configuration
|
|
399
414
|
if the initial connection fails.
|
|
400
|
-
schema : str
|
|
415
|
+
schema : str
|
|
401
416
|
The database schema to configure in the engine.
|
|
402
417
|
|
|
403
418
|
Returns
|
|
@@ -511,10 +526,6 @@ class SqlStore(Store):
|
|
|
511
526
|
engine : Engine
|
|
512
527
|
The SQLAlchemy engine to test for connectivity.
|
|
513
528
|
|
|
514
|
-
Returns
|
|
515
|
-
-------
|
|
516
|
-
None
|
|
517
|
-
|
|
518
529
|
Raises
|
|
519
530
|
------
|
|
520
531
|
ConfigError
|
|
@@ -33,10 +33,6 @@ class ReaderFactory:
|
|
|
33
33
|
Reader name.
|
|
34
34
|
builder : DataframeReader
|
|
35
35
|
Builder object.
|
|
36
|
-
|
|
37
|
-
Returns
|
|
38
|
-
-------
|
|
39
|
-
None
|
|
40
36
|
"""
|
|
41
37
|
if self._engine_builders is None:
|
|
42
38
|
self._engine_builders = {}
|
|
@@ -104,10 +100,6 @@ class ReaderFactory:
|
|
|
104
100
|
----------
|
|
105
101
|
engine : str
|
|
106
102
|
Engine name.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
None
|
|
111
103
|
"""
|
|
112
104
|
if engine not in self._engine_builders:
|
|
113
105
|
raise BuilderError(f"Engine {engine} not found.")
|
|
@@ -104,10 +104,6 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
104
104
|
The destination of the dataframe.
|
|
105
105
|
**kwargs : dict
|
|
106
106
|
Keyword arguments.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
None
|
|
111
107
|
"""
|
|
112
108
|
if extension == FileExtensions.CSV.value:
|
|
113
109
|
return self.write_csv(df, dst, **kwargs)
|
|
@@ -128,10 +124,6 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
128
124
|
The destination of the dataframe.
|
|
129
125
|
**kwargs : dict
|
|
130
126
|
Keyword arguments.
|
|
131
|
-
|
|
132
|
-
Returns
|
|
133
|
-
-------
|
|
134
|
-
None
|
|
135
127
|
"""
|
|
136
128
|
if "index" not in kwargs:
|
|
137
129
|
kwargs["index"] = False
|
|
@@ -150,10 +142,6 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
150
142
|
The destination of the dataframe.
|
|
151
143
|
**kwargs : dict
|
|
152
144
|
Keyword arguments.
|
|
153
|
-
|
|
154
|
-
Returns
|
|
155
|
-
-------
|
|
156
|
-
None
|
|
157
145
|
"""
|
|
158
146
|
if "index" not in kwargs:
|
|
159
147
|
kwargs["index"] = False
|
|
@@ -176,10 +164,6 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
176
164
|
The destination schema.
|
|
177
165
|
**kwargs : dict
|
|
178
166
|
Keyword arguments.
|
|
179
|
-
|
|
180
|
-
Returns
|
|
181
|
-
-------
|
|
182
|
-
None
|
|
183
167
|
"""
|
|
184
168
|
if "index" not in kwargs:
|
|
185
169
|
kwargs["index"] = False
|
digitalhub/utils/file_utils.py
CHANGED
|
@@ -141,23 +141,6 @@ def get_last_modified(data_path: str) -> str:
|
|
|
141
141
|
return datetime.fromtimestamp(timestamp).astimezone().isoformat()
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
def get_s3_path(src_path: str) -> str:
|
|
145
|
-
"""
|
|
146
|
-
Get the S3 URI of a file path.
|
|
147
|
-
|
|
148
|
-
Parameters
|
|
149
|
-
----------
|
|
150
|
-
src_path : str
|
|
151
|
-
Path to the file.
|
|
152
|
-
|
|
153
|
-
Returns
|
|
154
|
-
-------
|
|
155
|
-
str
|
|
156
|
-
The S3 URI of the file.
|
|
157
|
-
"""
|
|
158
|
-
return Path(src_path).as_uri()
|
|
159
|
-
|
|
160
|
-
|
|
161
144
|
def get_file_info_from_local(path: str, src_path: str) -> None | dict:
|
|
162
145
|
"""
|
|
163
146
|
Get file info from a local path.
|
|
@@ -95,10 +95,6 @@ def requests_chunk_download(source: str, filename: Path) -> None:
|
|
|
95
95
|
URL to download the file from.
|
|
96
96
|
filename : Path
|
|
97
97
|
Path where to save the downloaded file.
|
|
98
|
-
|
|
99
|
-
Returns
|
|
100
|
-
-------
|
|
101
|
-
None
|
|
102
98
|
"""
|
|
103
99
|
with requests.get(source, stream=True) as r:
|
|
104
100
|
r.raise_for_status()
|
|
@@ -117,10 +113,6 @@ def extract_archive(path: Path, filename: Path) -> None:
|
|
|
117
113
|
Directory where to extract the archive.
|
|
118
114
|
filename : Path
|
|
119
115
|
Path to the zip archive file.
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
None
|
|
124
116
|
"""
|
|
125
117
|
with ZipFile(filename, "r") as zip_file:
|
|
126
118
|
zip_file.extractall(path)
|
|
@@ -256,10 +248,6 @@ def carriage_return_warn(string: str) -> None:
|
|
|
256
248
|
----------
|
|
257
249
|
string : str
|
|
258
250
|
The string to check.
|
|
259
|
-
|
|
260
|
-
Returns
|
|
261
|
-
-------
|
|
262
|
-
None
|
|
263
251
|
"""
|
|
264
252
|
if "\r\n" in string:
|
|
265
253
|
warn("String contains a carriage return. It may not be parsed correctly from remote runtimes.")
|
digitalhub/utils/git_utils.py
CHANGED
|
@@ -47,10 +47,6 @@ def clone_repository(path: Path, url: str) -> None:
|
|
|
47
47
|
Path where to save the repository.
|
|
48
48
|
url : str
|
|
49
49
|
URL of the repository.
|
|
50
|
-
|
|
51
|
-
Returns
|
|
52
|
-
-------
|
|
53
|
-
None
|
|
54
50
|
"""
|
|
55
51
|
clean_path(path)
|
|
56
52
|
checkout_object = get_checkout_object(url)
|
|
@@ -85,10 +81,6 @@ def clean_path(path: Path) -> None:
|
|
|
85
81
|
----------
|
|
86
82
|
path : Path
|
|
87
83
|
Path to clean.
|
|
88
|
-
|
|
89
|
-
Returns
|
|
90
|
-
-------
|
|
91
|
-
None
|
|
92
84
|
"""
|
|
93
85
|
|
|
94
86
|
shutil.rmtree(path, ignore_errors=True)
|
digitalhub/utils/io_utils.py
CHANGED
|
@@ -23,10 +23,6 @@ def write_yaml(filepath: str | Path, obj: dict | list[dict]) -> None:
|
|
|
23
23
|
The YAML file path to write.
|
|
24
24
|
obj : dict or list of dict
|
|
25
25
|
The dict or list of dicts to write.
|
|
26
|
-
|
|
27
|
-
Returns
|
|
28
|
-
-------
|
|
29
|
-
None
|
|
30
26
|
"""
|
|
31
27
|
if isinstance(obj, list):
|
|
32
28
|
with open(filepath, "w", encoding="utf-8") as out_file:
|
|
@@ -46,10 +42,6 @@ def write_text(filepath: Path, text: str) -> None:
|
|
|
46
42
|
The file path to write.
|
|
47
43
|
text : str
|
|
48
44
|
The text to write.
|
|
49
|
-
|
|
50
|
-
Returns
|
|
51
|
-
-------
|
|
52
|
-
None
|
|
53
45
|
"""
|
|
54
46
|
filepath.write_text(text, encoding="utf-8")
|
|
55
47
|
|
|
@@ -80,10 +72,6 @@ class NoDatesSafeLoader(yaml.SafeLoader):
|
|
|
80
72
|
----------
|
|
81
73
|
tag_to_remove : str
|
|
82
74
|
The tag to remove.
|
|
83
|
-
|
|
84
|
-
Returns
|
|
85
|
-
-------
|
|
86
|
-
None
|
|
87
75
|
"""
|
|
88
76
|
if "yaml_implicit_resolvers" not in cls.__dict__:
|
|
89
77
|
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
|