digitalhub 0.8.0b0__py3-none-any.whl → 0.8.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +62 -94
- digitalhub/client/__init__.py +0 -0
- digitalhub/client/builder.py +105 -0
- digitalhub/client/objects/__init__.py +0 -0
- digitalhub/client/objects/base.py +56 -0
- digitalhub/client/objects/dhcore.py +681 -0
- digitalhub/client/objects/local.py +533 -0
- digitalhub/context/__init__.py +0 -0
- digitalhub/context/builder.py +178 -0
- digitalhub/context/context.py +136 -0
- digitalhub/datastores/__init__.py +0 -0
- digitalhub/datastores/builder.py +134 -0
- digitalhub/datastores/objects/__init__.py +0 -0
- digitalhub/datastores/objects/base.py +85 -0
- digitalhub/datastores/objects/local.py +42 -0
- digitalhub/datastores/objects/remote.py +23 -0
- digitalhub/datastores/objects/s3.py +38 -0
- digitalhub/datastores/objects/sql.py +60 -0
- digitalhub/entities/__init__.py +0 -0
- digitalhub/entities/_base/__init__.py +0 -0
- digitalhub/entities/_base/api.py +346 -0
- digitalhub/entities/_base/base.py +82 -0
- digitalhub/entities/_base/crud.py +610 -0
- digitalhub/entities/_base/entity/__init__.py +0 -0
- digitalhub/entities/_base/entity/base.py +132 -0
- digitalhub/entities/_base/entity/context.py +118 -0
- digitalhub/entities/_base/entity/executable.py +380 -0
- digitalhub/entities/_base/entity/material.py +214 -0
- digitalhub/entities/_base/entity/unversioned.py +87 -0
- digitalhub/entities/_base/entity/versioned.py +94 -0
- digitalhub/entities/_base/metadata.py +59 -0
- digitalhub/entities/_base/spec/__init__.py +0 -0
- digitalhub/entities/_base/spec/base.py +58 -0
- digitalhub/entities/_base/spec/material.py +22 -0
- digitalhub/entities/_base/state.py +31 -0
- digitalhub/entities/_base/status/__init__.py +0 -0
- digitalhub/entities/_base/status/base.py +32 -0
- digitalhub/entities/_base/status/material.py +49 -0
- digitalhub/entities/_builders/__init__.py +0 -0
- digitalhub/entities/_builders/metadata.py +60 -0
- digitalhub/entities/_builders/name.py +31 -0
- digitalhub/entities/_builders/spec.py +43 -0
- digitalhub/entities/_builders/status.py +62 -0
- digitalhub/entities/_builders/uuid.py +33 -0
- digitalhub/entities/artifact/__init__.py +0 -0
- digitalhub/entities/artifact/builder.py +133 -0
- digitalhub/entities/artifact/crud.py +358 -0
- digitalhub/entities/artifact/entity/__init__.py +0 -0
- digitalhub/entities/artifact/entity/_base.py +39 -0
- digitalhub/entities/artifact/entity/artifact.py +9 -0
- digitalhub/entities/artifact/spec.py +39 -0
- digitalhub/entities/artifact/status.py +15 -0
- digitalhub/entities/dataitem/__init__.py +0 -0
- digitalhub/entities/dataitem/builder.py +144 -0
- digitalhub/entities/dataitem/crud.py +395 -0
- digitalhub/entities/dataitem/entity/__init__.py +0 -0
- digitalhub/entities/dataitem/entity/_base.py +75 -0
- digitalhub/entities/dataitem/entity/dataitem.py +9 -0
- digitalhub/entities/dataitem/entity/iceberg.py +7 -0
- digitalhub/entities/dataitem/entity/table.py +125 -0
- digitalhub/entities/dataitem/models.py +62 -0
- digitalhub/entities/dataitem/spec.py +61 -0
- digitalhub/entities/dataitem/status.py +38 -0
- digitalhub/entities/entity_types.py +19 -0
- digitalhub/entities/function/__init__.py +0 -0
- digitalhub/entities/function/builder.py +86 -0
- digitalhub/entities/function/crud.py +305 -0
- digitalhub/entities/function/entity.py +101 -0
- digitalhub/entities/function/models.py +118 -0
- digitalhub/entities/function/spec.py +81 -0
- digitalhub/entities/function/status.py +9 -0
- digitalhub/entities/model/__init__.py +0 -0
- digitalhub/entities/model/builder.py +152 -0
- digitalhub/entities/model/crud.py +358 -0
- digitalhub/entities/model/entity/__init__.py +0 -0
- digitalhub/entities/model/entity/_base.py +34 -0
- digitalhub/entities/model/entity/huggingface.py +9 -0
- digitalhub/entities/model/entity/mlflow.py +90 -0
- digitalhub/entities/model/entity/model.py +9 -0
- digitalhub/entities/model/entity/sklearn.py +9 -0
- digitalhub/entities/model/models.py +26 -0
- digitalhub/entities/model/spec.py +146 -0
- digitalhub/entities/model/status.py +33 -0
- digitalhub/entities/project/__init__.py +0 -0
- digitalhub/entities/project/builder.py +82 -0
- digitalhub/entities/project/crud.py +350 -0
- digitalhub/entities/project/entity.py +2060 -0
- digitalhub/entities/project/spec.py +50 -0
- digitalhub/entities/project/status.py +9 -0
- digitalhub/entities/registries.py +48 -0
- digitalhub/entities/run/__init__.py +0 -0
- digitalhub/entities/run/builder.py +77 -0
- digitalhub/entities/run/crud.py +232 -0
- digitalhub/entities/run/entity.py +461 -0
- digitalhub/entities/run/spec.py +153 -0
- digitalhub/entities/run/status.py +114 -0
- digitalhub/entities/secret/__init__.py +0 -0
- digitalhub/entities/secret/builder.py +93 -0
- digitalhub/entities/secret/crud.py +294 -0
- digitalhub/entities/secret/entity.py +73 -0
- digitalhub/entities/secret/spec.py +35 -0
- digitalhub/entities/secret/status.py +9 -0
- digitalhub/entities/task/__init__.py +0 -0
- digitalhub/entities/task/builder.py +74 -0
- digitalhub/entities/task/crud.py +241 -0
- digitalhub/entities/task/entity.py +135 -0
- digitalhub/entities/task/models.py +199 -0
- digitalhub/entities/task/spec.py +51 -0
- digitalhub/entities/task/status.py +9 -0
- digitalhub/entities/utils.py +184 -0
- digitalhub/entities/workflow/__init__.py +0 -0
- digitalhub/entities/workflow/builder.py +91 -0
- digitalhub/entities/workflow/crud.py +304 -0
- digitalhub/entities/workflow/entity.py +77 -0
- digitalhub/entities/workflow/spec.py +15 -0
- digitalhub/entities/workflow/status.py +9 -0
- digitalhub/readers/__init__.py +0 -0
- digitalhub/readers/builder.py +54 -0
- digitalhub/readers/objects/__init__.py +0 -0
- digitalhub/readers/objects/base.py +70 -0
- digitalhub/readers/objects/pandas.py +207 -0
- digitalhub/readers/registry.py +15 -0
- digitalhub/registry/__init__.py +0 -0
- digitalhub/registry/models.py +87 -0
- digitalhub/registry/registry.py +74 -0
- digitalhub/registry/utils.py +150 -0
- digitalhub/runtimes/__init__.py +0 -0
- digitalhub/runtimes/base.py +164 -0
- digitalhub/runtimes/builder.py +53 -0
- digitalhub/runtimes/kind_registry.py +170 -0
- digitalhub/stores/__init__.py +0 -0
- digitalhub/stores/builder.py +257 -0
- digitalhub/stores/objects/__init__.py +0 -0
- digitalhub/stores/objects/base.py +189 -0
- digitalhub/stores/objects/local.py +230 -0
- digitalhub/stores/objects/remote.py +143 -0
- digitalhub/stores/objects/s3.py +563 -0
- digitalhub/stores/objects/sql.py +328 -0
- digitalhub/utils/__init__.py +0 -0
- digitalhub/utils/data_utils.py +127 -0
- digitalhub/utils/env_utils.py +123 -0
- digitalhub/utils/exceptions.py +55 -0
- digitalhub/utils/file_utils.py +204 -0
- digitalhub/utils/generic_utils.py +207 -0
- digitalhub/utils/git_utils.py +148 -0
- digitalhub/utils/io_utils.py +79 -0
- digitalhub/utils/logger.py +17 -0
- digitalhub/utils/uri_utils.py +56 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/METADATA +27 -12
- digitalhub-0.8.0b2.dist-info/RECORD +161 -0
- test/test_crud_artifacts.py +1 -1
- test/test_crud_dataitems.py +1 -1
- test/test_crud_functions.py +1 -1
- test/test_crud_runs.py +1 -1
- test/test_crud_tasks.py +1 -1
- digitalhub-0.8.0b0.dist-info/RECORD +0 -14
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Type
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
import botocore.client # pylint: disable=unused-import
|
|
10
|
+
from botocore.exceptions import ClientError
|
|
11
|
+
|
|
12
|
+
from digitalhub.stores.objects.base import Store, StoreConfig
|
|
13
|
+
from digitalhub.utils.exceptions import StoreError
|
|
14
|
+
from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
|
|
15
|
+
|
|
16
|
+
# Type aliases
|
|
17
|
+
S3Client = Type["botocore.client.S3"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class S3StoreConfig(StoreConfig):
|
|
21
|
+
"""
|
|
22
|
+
S3 store configuration class.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
endpoint_url: str
|
|
26
|
+
"""S3 endpoint URL."""
|
|
27
|
+
|
|
28
|
+
aws_access_key_id: str
|
|
29
|
+
"""AWS access key ID."""
|
|
30
|
+
|
|
31
|
+
aws_secret_access_key: str
|
|
32
|
+
"""AWS secret access key."""
|
|
33
|
+
|
|
34
|
+
bucket_name: str
|
|
35
|
+
"""S3 bucket name."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class S3Store(Store):
|
|
39
|
+
"""
|
|
40
|
+
S3 store class. It implements the Store interface and provides methods to fetch and persist
|
|
41
|
+
artifacts on S3 based storage.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, name: str, store_type: str, config: S3StoreConfig) -> None:
|
|
45
|
+
super().__init__(name, store_type)
|
|
46
|
+
self.config = config
|
|
47
|
+
|
|
48
|
+
##############################
|
|
49
|
+
# IO methods
|
|
50
|
+
##############################
|
|
51
|
+
|
|
52
|
+
def download(
|
|
53
|
+
self,
|
|
54
|
+
root: str,
|
|
55
|
+
dst: Path,
|
|
56
|
+
src: list[str],
|
|
57
|
+
overwrite: bool = False,
|
|
58
|
+
) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Download artifacts from storage.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
root : str
|
|
65
|
+
The root path of the artifact.
|
|
66
|
+
dst : str
|
|
67
|
+
The destination of the artifact on local filesystem.
|
|
68
|
+
src : list[str]
|
|
69
|
+
List of sources.
|
|
70
|
+
overwrite : bool
|
|
71
|
+
Specify if overwrite existing file(s).
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
str
|
|
76
|
+
Destination path of the downloaded artifact.
|
|
77
|
+
"""
|
|
78
|
+
client, bucket = self._check_factory()
|
|
79
|
+
|
|
80
|
+
# Build destination directory
|
|
81
|
+
if dst.suffix == "":
|
|
82
|
+
dst.mkdir(parents=True, exist_ok=True)
|
|
83
|
+
else:
|
|
84
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
# Handle src and tree destination
|
|
87
|
+
if self.is_partition(root):
|
|
88
|
+
if not src:
|
|
89
|
+
keys = self._list_objects(client, bucket, root)
|
|
90
|
+
strip_root = self._get_key(root)
|
|
91
|
+
trees = [k.removeprefix(strip_root) for k in keys]
|
|
92
|
+
else:
|
|
93
|
+
keys = self._build_key_from_root(root, src)
|
|
94
|
+
trees = [s for s in src]
|
|
95
|
+
else:
|
|
96
|
+
keys = [self._get_key(root)]
|
|
97
|
+
if not src:
|
|
98
|
+
trees = [Path(self._get_key(root)).name]
|
|
99
|
+
else:
|
|
100
|
+
trees = [s for s in src]
|
|
101
|
+
|
|
102
|
+
if len(keys) != len(trees):
|
|
103
|
+
raise StoreError("Keys and trees must have the same length.")
|
|
104
|
+
|
|
105
|
+
# Download files
|
|
106
|
+
for elements in zip(keys, trees):
|
|
107
|
+
key = elements[0]
|
|
108
|
+
tree = elements[1]
|
|
109
|
+
|
|
110
|
+
# Build destination path
|
|
111
|
+
if dst.suffix == "":
|
|
112
|
+
dst_pth = Path(dst, tree)
|
|
113
|
+
else:
|
|
114
|
+
dst_pth = dst
|
|
115
|
+
|
|
116
|
+
# Check if destination path already exists
|
|
117
|
+
self._check_overwrite(dst_pth, overwrite)
|
|
118
|
+
|
|
119
|
+
self._build_path(dst_pth.parent)
|
|
120
|
+
|
|
121
|
+
self._download_file(key, dst_pth, client, bucket)
|
|
122
|
+
|
|
123
|
+
if len(trees) == 1:
|
|
124
|
+
if dst.suffix == "":
|
|
125
|
+
return str(Path(dst, trees[0]))
|
|
126
|
+
return str(dst)
|
|
127
|
+
|
|
128
|
+
def upload(self, src: str | list[str], dst: str | None = None) -> list[tuple[str, str]]:
|
|
129
|
+
"""
|
|
130
|
+
Upload an artifact to storage.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
src : str
|
|
135
|
+
List of sources.
|
|
136
|
+
dst : str
|
|
137
|
+
The destination of the artifact on storage.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
list[tuple[str, str]]
|
|
142
|
+
Returns the list of destination and source paths of the uploaded artifacts.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
# Destination handling
|
|
146
|
+
|
|
147
|
+
# If no destination is provided, build key from source
|
|
148
|
+
# Otherwise build key from destination
|
|
149
|
+
if dst is None:
|
|
150
|
+
raise StoreError(
|
|
151
|
+
"Destination must be provided. " + "If source is a list of files or a directory, "
|
|
152
|
+
"destination must be a partition, e.g. 's3://bucket/partition/', ",
|
|
153
|
+
"otherwise a destination key, e.g. 's3://bucket/key'",
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
dst = self._get_key(dst)
|
|
157
|
+
|
|
158
|
+
# Source handling
|
|
159
|
+
if not isinstance(src, list):
|
|
160
|
+
self._check_local_src(src)
|
|
161
|
+
src_is_dir = Path(src).is_dir()
|
|
162
|
+
else:
|
|
163
|
+
for s in src:
|
|
164
|
+
self._check_local_src(s)
|
|
165
|
+
src_is_dir = False
|
|
166
|
+
if len(src) == 1:
|
|
167
|
+
src = src[0]
|
|
168
|
+
|
|
169
|
+
# If source is a directory, destination must be a partition
|
|
170
|
+
if (src_is_dir or isinstance(src, list)) and not dst.endswith("/"):
|
|
171
|
+
raise StoreError("Destination must be a partition if the source is a directory or a list of files.")
|
|
172
|
+
|
|
173
|
+
# Directory
|
|
174
|
+
if src_is_dir:
|
|
175
|
+
return self._upload_dir(src, dst)
|
|
176
|
+
|
|
177
|
+
# List of files
|
|
178
|
+
elif isinstance(src, list):
|
|
179
|
+
return self._upload_file_list(src, dst)
|
|
180
|
+
|
|
181
|
+
# Single file
|
|
182
|
+
return self._upload_single_file(src, dst)
|
|
183
|
+
|
|
184
|
+
def upload_fileobject(self, src: BytesIO, dst: str) -> str:
|
|
185
|
+
"""
|
|
186
|
+
Upload an BytesIO to S3 based storage.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
src : BytesIO
|
|
191
|
+
The source object to be persisted.
|
|
192
|
+
dst : str
|
|
193
|
+
The destination partition for the artifact.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
str
|
|
198
|
+
S3 key of the uploaded artifact.
|
|
199
|
+
"""
|
|
200
|
+
client, bucket = self._check_factory()
|
|
201
|
+
self._upload_fileobject(src, dst, client, bucket)
|
|
202
|
+
return f"s3://{bucket}/{dst}"
|
|
203
|
+
|
|
204
|
+
def get_file_info(self, paths: list[tuple[str, str]]) -> list[dict]:
|
|
205
|
+
"""
|
|
206
|
+
Method to get file metadata.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
paths : list
|
|
211
|
+
List of source paths.
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
list[dict]
|
|
216
|
+
Returns files metadata.
|
|
217
|
+
"""
|
|
218
|
+
client, bucket = self._check_factory()
|
|
219
|
+
|
|
220
|
+
infos = []
|
|
221
|
+
for i in paths:
|
|
222
|
+
key, src_path = i
|
|
223
|
+
|
|
224
|
+
# Rebuild key in case here arrive an s3://bucket prefix
|
|
225
|
+
key = self._get_key(key)
|
|
226
|
+
|
|
227
|
+
# Get metadata
|
|
228
|
+
metadata = client.head_object(Bucket=bucket, Key=key)
|
|
229
|
+
|
|
230
|
+
# Get file info
|
|
231
|
+
info = get_file_info_from_s3(src_path, metadata)
|
|
232
|
+
infos.append(info)
|
|
233
|
+
|
|
234
|
+
return infos
|
|
235
|
+
|
|
236
|
+
##############################
|
|
237
|
+
# Private I/O methods
|
|
238
|
+
##############################
|
|
239
|
+
|
|
240
|
+
def _download_file(
|
|
241
|
+
self,
|
|
242
|
+
key: str,
|
|
243
|
+
dst_pth: Path,
|
|
244
|
+
client: S3Client,
|
|
245
|
+
bucket: str,
|
|
246
|
+
) -> list[str]:
|
|
247
|
+
"""
|
|
248
|
+
Download files from S3 partition.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
keys : str
|
|
253
|
+
The list of keys to be downloaded.
|
|
254
|
+
dst_pth : str
|
|
255
|
+
The destination of the files on local filesystem.
|
|
256
|
+
client : S3Client
|
|
257
|
+
The S3 client object.
|
|
258
|
+
bucket : str
|
|
259
|
+
The name of the S3 bucket.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
list[str]
|
|
264
|
+
The list of paths of the downloaded files.
|
|
265
|
+
"""
|
|
266
|
+
# Download file
|
|
267
|
+
client.download_file(bucket, key, dst_pth)
|
|
268
|
+
|
|
269
|
+
def _upload_dir(self, src: str, dst: str) -> list[tuple[str, str]]:
|
|
270
|
+
"""
|
|
271
|
+
Upload directory to storage.
|
|
272
|
+
|
|
273
|
+
Parameters
|
|
274
|
+
----------
|
|
275
|
+
src : str
|
|
276
|
+
List of sources.
|
|
277
|
+
dst : str
|
|
278
|
+
The destination of the artifact on storage.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
list[tuple[str, str]]
|
|
283
|
+
Returns the list of destination and source paths of the uploaded artifacts.
|
|
284
|
+
"""
|
|
285
|
+
client, bucket = self._check_factory()
|
|
286
|
+
|
|
287
|
+
src_pth = Path(src)
|
|
288
|
+
files = [i for i in src_pth.rglob("*") if i.is_file()]
|
|
289
|
+
keys = []
|
|
290
|
+
for i in files:
|
|
291
|
+
if src_pth.is_absolute():
|
|
292
|
+
i = i.relative_to(src_pth)
|
|
293
|
+
keys.append(f"{dst}{i}")
|
|
294
|
+
|
|
295
|
+
# Upload files
|
|
296
|
+
paths = []
|
|
297
|
+
for i in zip(files, keys):
|
|
298
|
+
f, k = i
|
|
299
|
+
self._upload_file(f, k, client, bucket)
|
|
300
|
+
if src_pth.is_absolute():
|
|
301
|
+
f = f.relative_to(src_pth)
|
|
302
|
+
paths.append((k, str(f)))
|
|
303
|
+
return paths
|
|
304
|
+
|
|
305
|
+
def _upload_file_list(self, src: list[str], dst: str) -> list[tuple[str, str]]:
|
|
306
|
+
"""
|
|
307
|
+
Upload list of files to storage.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
src : list
|
|
312
|
+
List of sources.
|
|
313
|
+
dst : str
|
|
314
|
+
The destination of the artifact on storage.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
list[tuple[str, str]]
|
|
319
|
+
Returns the list of destination and source paths of the uploaded artifacts.
|
|
320
|
+
"""
|
|
321
|
+
client, bucket = self._check_factory()
|
|
322
|
+
files = src
|
|
323
|
+
keys = []
|
|
324
|
+
for i in files:
|
|
325
|
+
keys.append(f"{dst}{Path(i).name}")
|
|
326
|
+
if len(set(keys)) != len(keys):
|
|
327
|
+
raise StoreError("Keys must be unique (Select files with different names, otherwise upload a directory).")
|
|
328
|
+
|
|
329
|
+
# Upload files
|
|
330
|
+
paths = []
|
|
331
|
+
for i in zip(files, keys):
|
|
332
|
+
f, k = i
|
|
333
|
+
self._upload_file(f, k, client, bucket)
|
|
334
|
+
paths.append((k, Path(f).name))
|
|
335
|
+
return paths
|
|
336
|
+
|
|
337
|
+
def _upload_single_file(self, src: str, dst: str) -> str:
|
|
338
|
+
"""
|
|
339
|
+
Upload a single file to storage.
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
src : str
|
|
344
|
+
List of sources.
|
|
345
|
+
dst : str
|
|
346
|
+
The destination of the artifact on storage.
|
|
347
|
+
|
|
348
|
+
Returns
|
|
349
|
+
-------
|
|
350
|
+
str
|
|
351
|
+
Returns the list of destination and source paths of the uploaded artifacts.
|
|
352
|
+
"""
|
|
353
|
+
client, bucket = self._check_factory()
|
|
354
|
+
|
|
355
|
+
if dst.endswith("/"):
|
|
356
|
+
dst = f"{dst.removeprefix('/')}{Path(src).name}"
|
|
357
|
+
|
|
358
|
+
# Upload file
|
|
359
|
+
self._upload_file(src, dst, client, bucket)
|
|
360
|
+
name = Path(self._get_key(dst)).name
|
|
361
|
+
return [(dst, name)]
|
|
362
|
+
|
|
363
|
+
@staticmethod
|
|
364
|
+
def _upload_file(src: str, key: str, client: S3Client, bucket: str) -> None:
|
|
365
|
+
"""
|
|
366
|
+
Upload a file to S3 based storage. The function checks if the
|
|
367
|
+
bucket is accessible.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
src : str
|
|
372
|
+
The source path of the file on local filesystem.
|
|
373
|
+
key : str
|
|
374
|
+
The key of the file on S3 based storage.
|
|
375
|
+
client : S3Client
|
|
376
|
+
The S3 client object.
|
|
377
|
+
bucket : str
|
|
378
|
+
The name of the S3 bucket.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
None
|
|
383
|
+
"""
|
|
384
|
+
extra_args = {}
|
|
385
|
+
mime_type = get_file_mime_type(src)
|
|
386
|
+
if mime_type is not None:
|
|
387
|
+
extra_args["ContentType"] = mime_type
|
|
388
|
+
client.upload_file(Filename=src, Bucket=bucket, Key=key, ExtraArgs=extra_args)
|
|
389
|
+
|
|
390
|
+
@staticmethod
|
|
391
|
+
def _upload_fileobject(fileobj: BytesIO, key: str, client: S3Client, bucket: str) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Upload a fileobject to S3 based storage. The function checks if the bucket is accessible.
|
|
394
|
+
|
|
395
|
+
Parameters
|
|
396
|
+
----------
|
|
397
|
+
fileobj : BytesIO
|
|
398
|
+
The fileobject to be uploaded.
|
|
399
|
+
key : str
|
|
400
|
+
The key of the file on S3 based storage.
|
|
401
|
+
client : S3Client
|
|
402
|
+
The S3 client object.
|
|
403
|
+
bucket : str
|
|
404
|
+
The name of the S3 bucket.
|
|
405
|
+
|
|
406
|
+
Returns
|
|
407
|
+
-------
|
|
408
|
+
None
|
|
409
|
+
"""
|
|
410
|
+
client.put_object(Bucket=bucket, Key=key, Body=fileobj.getvalue())
|
|
411
|
+
|
|
412
|
+
##############################
|
|
413
|
+
# Private helper methods
|
|
414
|
+
##############################
|
|
415
|
+
|
|
416
|
+
def _get_bucket(self) -> str:
|
|
417
|
+
"""
|
|
418
|
+
Get the name of the S3 bucket from the URI.
|
|
419
|
+
|
|
420
|
+
Returns
|
|
421
|
+
-------
|
|
422
|
+
str
|
|
423
|
+
The name of the S3 bucket.
|
|
424
|
+
"""
|
|
425
|
+
return str(self.config.bucket_name)
|
|
426
|
+
|
|
427
|
+
def _get_client(self) -> S3Client:
|
|
428
|
+
"""
|
|
429
|
+
Get an S3 client object.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
S3Client
|
|
434
|
+
Returns a client object that interacts with the S3 storage service.
|
|
435
|
+
"""
|
|
436
|
+
cfg = {
|
|
437
|
+
"endpoint_url": self.config.endpoint_url,
|
|
438
|
+
"aws_access_key_id": self.config.aws_access_key_id,
|
|
439
|
+
"aws_secret_access_key": self.config.aws_secret_access_key,
|
|
440
|
+
}
|
|
441
|
+
return boto3.client("s3", **cfg)
|
|
442
|
+
|
|
443
|
+
def _check_factory(self) -> tuple[S3Client, str]:
|
|
444
|
+
"""
|
|
445
|
+
Check if the S3 bucket is accessible by sending a head_bucket request.
|
|
446
|
+
|
|
447
|
+
Returns
|
|
448
|
+
-------
|
|
449
|
+
tuple[S3Client, str]
|
|
450
|
+
A tuple containing the S3 client object and the name of the S3 bucket.
|
|
451
|
+
"""
|
|
452
|
+
client = self._get_client()
|
|
453
|
+
bucket = self._get_bucket()
|
|
454
|
+
self._check_access_to_storage(client, bucket)
|
|
455
|
+
return client, bucket
|
|
456
|
+
|
|
457
|
+
def _check_access_to_storage(self, client: S3Client, bucket: str) -> None:
|
|
458
|
+
"""
|
|
459
|
+
Check if the S3 bucket is accessible by sending a head_bucket request.
|
|
460
|
+
|
|
461
|
+
Parameters
|
|
462
|
+
----------
|
|
463
|
+
client : S3Client
|
|
464
|
+
The S3 client object.
|
|
465
|
+
bucket : str
|
|
466
|
+
The name of the S3 bucket.
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
None
|
|
471
|
+
|
|
472
|
+
Raises
|
|
473
|
+
------
|
|
474
|
+
ClientError:
|
|
475
|
+
If access to the specified bucket is not available.
|
|
476
|
+
"""
|
|
477
|
+
try:
|
|
478
|
+
client.head_bucket(Bucket=bucket)
|
|
479
|
+
except ClientError as e:
|
|
480
|
+
raise ClientError("No access to s3 bucket!") from e
|
|
481
|
+
|
|
482
|
+
@staticmethod
|
|
483
|
+
def _get_key(path: str) -> str:
|
|
484
|
+
"""
|
|
485
|
+
Build key.
|
|
486
|
+
|
|
487
|
+
Parameters
|
|
488
|
+
----------
|
|
489
|
+
path : str
|
|
490
|
+
The source path to get the key from.
|
|
491
|
+
|
|
492
|
+
Returns
|
|
493
|
+
-------
|
|
494
|
+
str
|
|
495
|
+
The key.
|
|
496
|
+
"""
|
|
497
|
+
key = urlparse(path).path.replace("\\", "/")
|
|
498
|
+
if key.startswith("/"):
|
|
499
|
+
key = key[1:]
|
|
500
|
+
return key
|
|
501
|
+
|
|
502
|
+
def _build_key_from_root(self, root: str, paths: list[str]) -> list[str]:
|
|
503
|
+
"""
|
|
504
|
+
Method to build object path.
|
|
505
|
+
|
|
506
|
+
Parameters
|
|
507
|
+
----------
|
|
508
|
+
root : str
|
|
509
|
+
The root of the object path.
|
|
510
|
+
paths : list[str]
|
|
511
|
+
The path to build.
|
|
512
|
+
|
|
513
|
+
Returns
|
|
514
|
+
-------
|
|
515
|
+
list[str]
|
|
516
|
+
List of keys.
|
|
517
|
+
"""
|
|
518
|
+
keys = []
|
|
519
|
+
for path in paths:
|
|
520
|
+
clean_path = self._get_key(path)
|
|
521
|
+
key = self._get_key(f"{root}{clean_path}")
|
|
522
|
+
keys.append(key)
|
|
523
|
+
return keys
|
|
524
|
+
|
|
525
|
+
def _list_objects(self, client: S3Client, bucket: str, partition: str) -> list[str]:
|
|
526
|
+
"""
|
|
527
|
+
List objects in a S3 partition.
|
|
528
|
+
|
|
529
|
+
Parameters
|
|
530
|
+
----------
|
|
531
|
+
client : S3Client
|
|
532
|
+
The S3 client object.
|
|
533
|
+
bucket : str
|
|
534
|
+
The name of the S3 bucket.
|
|
535
|
+
partition : str
|
|
536
|
+
The partition.
|
|
537
|
+
|
|
538
|
+
Returns
|
|
539
|
+
-------
|
|
540
|
+
list[str]
|
|
541
|
+
The list of keys under the partition.
|
|
542
|
+
"""
|
|
543
|
+
key = self._get_key(partition)
|
|
544
|
+
file_list = client.list_objects_v2(Bucket=bucket, Prefix=key).get("Contents", [])
|
|
545
|
+
return [f["Key"] for f in file_list]
|
|
546
|
+
|
|
547
|
+
@staticmethod
|
|
548
|
+
def is_partition(path: str) -> bool:
|
|
549
|
+
"""
|
|
550
|
+
Check if path is a directory or a partition.
|
|
551
|
+
|
|
552
|
+
Parameters
|
|
553
|
+
----------
|
|
554
|
+
path : str
|
|
555
|
+
The path to check.
|
|
556
|
+
|
|
557
|
+
Returns
|
|
558
|
+
-------
|
|
559
|
+
bool
|
|
560
|
+
"""
|
|
561
|
+
if path.endswith("/"):
|
|
562
|
+
return True
|
|
563
|
+
return False
|