digitalhub 0.8.0b15__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +67 -0
- digitalhub/client/_base/key_builder.py +52 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +100 -48
- digitalhub/client/dhcore/enums.py +27 -0
- digitalhub/client/dhcore/env.py +4 -2
- digitalhub/client/dhcore/key_builder.py +58 -0
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +22 -0
- digitalhub/client/local/key_builder.py +58 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/_constructors/uuid.py +1 -1
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +32 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +49 -17
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +3 -24
- digitalhub/entities/_base/versioned/entity.py +2 -26
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1873 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +6 -2
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -89
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +15 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +21 -14
- digitalhub/entities/task/_base/models.py +35 -6
- digitalhub/entities/task/_base/spec.py +50 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +22 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/runtimes/enums.py +12 -0
- digitalhub/stores/_base/store.py +59 -11
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +43 -4
- digitalhub/stores/remote/store.py +31 -5
- digitalhub/stores/s3/store.py +136 -57
- digitalhub/stores/sql/store.py +122 -47
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/s3_utils.py +17 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.0b15.dist-info → digitalhub-0.9.0.dist-info}/LICENSE.txt +1 -1
- {digitalhub-0.8.0b15.dist-info → digitalhub-0.9.0.dist-info}/METADATA +11 -11
- {digitalhub-0.8.0b15.dist-info → digitalhub-0.9.0.dist-info}/RECORD +117 -115
- {digitalhub-0.8.0b15.dist-info → digitalhub-0.9.0.dist-info}/WHEEL +1 -1
- test/local/instances/test_validate.py +55 -0
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.0b15.dist-info → digitalhub-0.9.0.dist-info}/top_level.txt +0 -0
digitalhub/stores/local/store.py
CHANGED
|
@@ -2,7 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
7
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
6
8
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
7
9
|
from digitalhub.utils.exceptions import StoreError
|
|
8
10
|
from digitalhub.utils.file_utils import get_file_info_from_local
|
|
@@ -28,7 +30,7 @@ class LocalStore(Store):
|
|
|
28
30
|
self.config = config
|
|
29
31
|
|
|
30
32
|
##############################
|
|
31
|
-
#
|
|
33
|
+
# I/O methods
|
|
32
34
|
##############################
|
|
33
35
|
|
|
34
36
|
def download(
|
|
@@ -59,7 +61,7 @@ class LocalStore(Store):
|
|
|
59
61
|
"""
|
|
60
62
|
raise StoreError("Local store does not support download.")
|
|
61
63
|
|
|
62
|
-
def upload(self, src: str | list[str], dst: str
|
|
64
|
+
def upload(self, src: str | list[str], dst: str) -> list[tuple[str, str]]:
|
|
63
65
|
"""
|
|
64
66
|
Upload an artifact to storage.
|
|
65
67
|
|
|
@@ -70,7 +72,11 @@ class LocalStore(Store):
|
|
|
70
72
|
"""
|
|
71
73
|
raise StoreError("Local store does not support upload.")
|
|
72
74
|
|
|
73
|
-
def get_file_info(
|
|
75
|
+
def get_file_info(
|
|
76
|
+
self,
|
|
77
|
+
root: str,
|
|
78
|
+
paths: list[tuple[str, str]],
|
|
79
|
+
) -> list[dict]:
|
|
74
80
|
"""
|
|
75
81
|
Method to get file metadata.
|
|
76
82
|
|
|
@@ -189,7 +195,40 @@ class LocalStore(Store):
|
|
|
189
195
|
return dst
|
|
190
196
|
|
|
191
197
|
##############################
|
|
192
|
-
#
|
|
198
|
+
# Datastore methods
|
|
199
|
+
##############################
|
|
200
|
+
|
|
201
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
202
|
+
"""
|
|
203
|
+
Method to write a dataframe to a file. Kwargs are passed to df.to_parquet().
|
|
204
|
+
If destination is not provided, the dataframe is written to the default
|
|
205
|
+
store path with generated name.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
df : Any
|
|
210
|
+
The dataframe to write.
|
|
211
|
+
dst : str
|
|
212
|
+
The destination of the dataframe.
|
|
213
|
+
**kwargs : dict
|
|
214
|
+
Keyword arguments.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
str
|
|
219
|
+
Path of written dataframe.
|
|
220
|
+
"""
|
|
221
|
+
self.store._check_local_dst(dst)
|
|
222
|
+
self._validate_extension(Path(dst).suffix.removeprefix("."))
|
|
223
|
+
|
|
224
|
+
# Write dataframe
|
|
225
|
+
reader = get_reader_by_object(df)
|
|
226
|
+
reader.write_df(df, dst, extension=extension, **kwargs)
|
|
227
|
+
|
|
228
|
+
return dst
|
|
229
|
+
|
|
230
|
+
##############################
|
|
231
|
+
# Helper methods
|
|
193
232
|
##############################
|
|
194
233
|
|
|
195
234
|
@staticmethod
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
|
|
@@ -25,7 +26,7 @@ class RemoteStore(Store):
|
|
|
25
26
|
self.config = config
|
|
26
27
|
|
|
27
28
|
##############################
|
|
28
|
-
#
|
|
29
|
+
# I/O methods
|
|
29
30
|
##############################
|
|
30
31
|
|
|
31
32
|
def download(
|
|
@@ -68,7 +69,7 @@ class RemoteStore(Store):
|
|
|
68
69
|
|
|
69
70
|
return self._download_file(root, dst, overwrite)
|
|
70
71
|
|
|
71
|
-
def upload(self, src: str | list[str], dst: str
|
|
72
|
+
def upload(self, src: str | list[str], dst: str) -> list[tuple[str, str]]:
|
|
72
73
|
"""
|
|
73
74
|
Upload an artifact to storage.
|
|
74
75
|
|
|
@@ -79,19 +80,44 @@ class RemoteStore(Store):
|
|
|
79
80
|
"""
|
|
80
81
|
raise StoreError("Remote HTTP store does not support upload.")
|
|
81
82
|
|
|
82
|
-
def get_file_info(
|
|
83
|
+
def get_file_info(
|
|
84
|
+
self,
|
|
85
|
+
root: str,
|
|
86
|
+
paths: list[tuple[str, str]],
|
|
87
|
+
) -> list[dict]:
|
|
83
88
|
"""
|
|
84
89
|
Get file information from HTTP(s) storage.
|
|
85
90
|
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
paths : list[str]
|
|
94
|
+
List of source paths.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
list[dict]
|
|
99
|
+
Returns files metadata.
|
|
100
|
+
"""
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
##############################
|
|
104
|
+
# Datastore methods
|
|
105
|
+
##############################
|
|
106
|
+
|
|
107
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Method to write a dataframe to a file. Note that this method is not implemented
|
|
110
|
+
since the remote store is not meant to write dataframes.
|
|
111
|
+
|
|
86
112
|
Raises
|
|
87
113
|
------
|
|
88
114
|
NotImplementedError
|
|
89
115
|
This method is not implemented.
|
|
90
116
|
"""
|
|
91
|
-
raise NotImplementedError("Remote store does not support
|
|
117
|
+
raise NotImplementedError("Remote store does not support write_df.")
|
|
92
118
|
|
|
93
119
|
##############################
|
|
94
|
-
#
|
|
120
|
+
# Helper methods
|
|
95
121
|
##############################
|
|
96
122
|
|
|
97
123
|
@staticmethod
|
digitalhub/stores/s3/store.py
CHANGED
|
@@ -2,16 +2,18 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type
|
|
5
|
+
from typing import Any, Type
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
import boto3
|
|
9
9
|
import botocore.client # pylint: disable=unused-import
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
11
11
|
|
|
12
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
12
13
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
14
|
from digitalhub.utils.exceptions import StoreError
|
|
14
15
|
from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
|
|
16
|
+
from digitalhub.utils.s3_utils import get_bucket_name
|
|
15
17
|
|
|
16
18
|
# Type aliases
|
|
17
19
|
S3Client = Type["botocore.client.S3"]
|
|
@@ -46,7 +48,7 @@ class S3Store(Store):
|
|
|
46
48
|
self.config = config
|
|
47
49
|
|
|
48
50
|
##############################
|
|
49
|
-
#
|
|
51
|
+
# I/O methods
|
|
50
52
|
##############################
|
|
51
53
|
|
|
52
54
|
def download(
|
|
@@ -75,7 +77,7 @@ class S3Store(Store):
|
|
|
75
77
|
str
|
|
76
78
|
Destination path of the downloaded artifact.
|
|
77
79
|
"""
|
|
78
|
-
client, bucket = self._check_factory()
|
|
80
|
+
client, bucket = self._check_factory(root)
|
|
79
81
|
|
|
80
82
|
# Build destination directory
|
|
81
83
|
if dst.suffix == "":
|
|
@@ -125,14 +127,18 @@ class S3Store(Store):
|
|
|
125
127
|
return str(Path(dst, trees[0]))
|
|
126
128
|
return str(dst)
|
|
127
129
|
|
|
128
|
-
def upload(
|
|
130
|
+
def upload(
|
|
131
|
+
self,
|
|
132
|
+
src: str | list[str],
|
|
133
|
+
dst: str,
|
|
134
|
+
) -> list[tuple[str, str]]:
|
|
129
135
|
"""
|
|
130
136
|
Upload an artifact to storage.
|
|
131
137
|
|
|
132
138
|
Parameters
|
|
133
139
|
----------
|
|
134
|
-
src : str
|
|
135
|
-
|
|
140
|
+
src : str | list[str]
|
|
141
|
+
Source(s).
|
|
136
142
|
dst : str
|
|
137
143
|
The destination of the artifact on storage.
|
|
138
144
|
|
|
@@ -141,22 +147,12 @@ class S3Store(Store):
|
|
|
141
147
|
list[tuple[str, str]]
|
|
142
148
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
143
149
|
"""
|
|
144
|
-
|
|
145
150
|
# Destination handling
|
|
151
|
+
key = self._get_key(dst)
|
|
146
152
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
if
|
|
150
|
-
raise StoreError(
|
|
151
|
-
"Destination must be provided. " + "If source is a list of files or a directory, "
|
|
152
|
-
"destination must be a partition, e.g. 's3://bucket/partition/', ",
|
|
153
|
-
"otherwise a destination key, e.g. 's3://bucket/key'",
|
|
154
|
-
)
|
|
155
|
-
else:
|
|
156
|
-
dst = self._get_key(dst)
|
|
157
|
-
|
|
158
|
-
# Source handling
|
|
159
|
-
if not isinstance(src, list):
|
|
153
|
+
# Source handling (files list, dir or single file)
|
|
154
|
+
src_is_list = isinstance(src, list)
|
|
155
|
+
if not src_is_list:
|
|
160
156
|
self._check_local_src(src)
|
|
161
157
|
src_is_dir = Path(src).is_dir()
|
|
162
158
|
else:
|
|
@@ -167,21 +163,31 @@ class S3Store(Store):
|
|
|
167
163
|
src = src[0]
|
|
168
164
|
|
|
169
165
|
# If source is a directory, destination must be a partition
|
|
170
|
-
if (src_is_dir or
|
|
171
|
-
raise StoreError(
|
|
166
|
+
if (src_is_dir or src_is_list) and not dst.endswith("/"):
|
|
167
|
+
raise StoreError(
|
|
168
|
+
"If source is a list of files or a directory, "
|
|
169
|
+
"destination must be a partition, e.g. 's3://bucket/partition/'"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# S3 client
|
|
173
|
+
client, bucket = self._check_factory(dst)
|
|
172
174
|
|
|
173
175
|
# Directory
|
|
174
176
|
if src_is_dir:
|
|
175
|
-
return self._upload_dir(src,
|
|
177
|
+
return self._upload_dir(src, key, client, bucket)
|
|
176
178
|
|
|
177
179
|
# List of files
|
|
178
|
-
elif
|
|
179
|
-
return self._upload_file_list(src,
|
|
180
|
+
elif src_is_list:
|
|
181
|
+
return self._upload_file_list(src, key, client, bucket)
|
|
180
182
|
|
|
181
183
|
# Single file
|
|
182
|
-
return self._upload_single_file(src,
|
|
184
|
+
return self._upload_single_file(src, key, client, bucket)
|
|
183
185
|
|
|
184
|
-
def upload_fileobject(
|
|
186
|
+
def upload_fileobject(
|
|
187
|
+
self,
|
|
188
|
+
src: BytesIO,
|
|
189
|
+
dst: str,
|
|
190
|
+
) -> str:
|
|
185
191
|
"""
|
|
186
192
|
Upload an BytesIO to S3 based storage.
|
|
187
193
|
|
|
@@ -190,18 +196,23 @@ class S3Store(Store):
|
|
|
190
196
|
src : BytesIO
|
|
191
197
|
The source object to be persisted.
|
|
192
198
|
dst : str
|
|
193
|
-
The destination
|
|
199
|
+
The destination path of the artifact.
|
|
194
200
|
|
|
195
201
|
Returns
|
|
196
202
|
-------
|
|
197
203
|
str
|
|
198
204
|
S3 key of the uploaded artifact.
|
|
199
205
|
"""
|
|
200
|
-
client, bucket = self._check_factory()
|
|
201
|
-
self.
|
|
202
|
-
|
|
206
|
+
client, bucket = self._check_factory(dst)
|
|
207
|
+
key = self._get_key(dst)
|
|
208
|
+
self._upload_fileobject(src, key, client, bucket)
|
|
209
|
+
return f"s3://{bucket}/{key}"
|
|
203
210
|
|
|
204
|
-
def get_file_info(
|
|
211
|
+
def get_file_info(
|
|
212
|
+
self,
|
|
213
|
+
root: str,
|
|
214
|
+
paths: list[tuple[str, str]],
|
|
215
|
+
) -> list[dict]:
|
|
205
216
|
"""
|
|
206
217
|
Method to get file metadata.
|
|
207
218
|
|
|
@@ -215,7 +226,7 @@ class S3Store(Store):
|
|
|
215
226
|
list[dict]
|
|
216
227
|
Returns files metadata.
|
|
217
228
|
"""
|
|
218
|
-
client, bucket = self._check_factory()
|
|
229
|
+
client, bucket = self._check_factory(root)
|
|
219
230
|
|
|
220
231
|
infos = []
|
|
221
232
|
for i in paths:
|
|
@@ -266,7 +277,13 @@ class S3Store(Store):
|
|
|
266
277
|
# Download file
|
|
267
278
|
client.download_file(bucket, key, dst_pth)
|
|
268
279
|
|
|
269
|
-
def _upload_dir(
|
|
280
|
+
def _upload_dir(
|
|
281
|
+
self,
|
|
282
|
+
src: str,
|
|
283
|
+
dst: str,
|
|
284
|
+
client: S3Client,
|
|
285
|
+
bucket: str,
|
|
286
|
+
) -> list[tuple[str, str]]:
|
|
270
287
|
"""
|
|
271
288
|
Upload directory to storage.
|
|
272
289
|
|
|
@@ -276,33 +293,40 @@ class S3Store(Store):
|
|
|
276
293
|
List of sources.
|
|
277
294
|
dst : str
|
|
278
295
|
The destination of the artifact on storage.
|
|
296
|
+
client : S3Client
|
|
297
|
+
The S3 client object.
|
|
298
|
+
bucket : str
|
|
299
|
+
The name of the S3 bucket.
|
|
279
300
|
|
|
280
301
|
Returns
|
|
281
302
|
-------
|
|
282
303
|
list[tuple[str, str]]
|
|
283
304
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
284
305
|
"""
|
|
285
|
-
|
|
286
|
-
|
|
306
|
+
# Get list of files
|
|
287
307
|
src_pth = Path(src)
|
|
288
308
|
files = [i for i in src_pth.rglob("*") if i.is_file()]
|
|
309
|
+
|
|
310
|
+
# Build keys
|
|
289
311
|
keys = []
|
|
290
312
|
for i in files:
|
|
291
|
-
|
|
292
|
-
i = i.relative_to(src_pth)
|
|
313
|
+
i = i.relative_to(src_pth)
|
|
293
314
|
keys.append(f"{dst}{i}")
|
|
294
315
|
|
|
295
316
|
# Upload files
|
|
296
317
|
paths = []
|
|
297
|
-
for
|
|
298
|
-
f, k = i
|
|
318
|
+
for f, k in zip(files, keys):
|
|
299
319
|
self._upload_file(f, k, client, bucket)
|
|
300
|
-
|
|
301
|
-
f = f.relative_to(src_pth)
|
|
302
|
-
paths.append((k, str(f)))
|
|
320
|
+
paths.append((k, str(f.relative_to(src_pth))))
|
|
303
321
|
return paths
|
|
304
322
|
|
|
305
|
-
def _upload_file_list(
|
|
323
|
+
def _upload_file_list(
|
|
324
|
+
self,
|
|
325
|
+
src: list[str],
|
|
326
|
+
dst: str,
|
|
327
|
+
client: S3Client,
|
|
328
|
+
bucket: str,
|
|
329
|
+
) -> list[tuple[str, str]]:
|
|
306
330
|
"""
|
|
307
331
|
Upload list of files to storage.
|
|
308
332
|
|
|
@@ -312,13 +336,16 @@ class S3Store(Store):
|
|
|
312
336
|
List of sources.
|
|
313
337
|
dst : str
|
|
314
338
|
The destination of the artifact on storage.
|
|
339
|
+
client : S3Client
|
|
340
|
+
The S3 client object.
|
|
341
|
+
bucket : str
|
|
342
|
+
The name of the S3 bucket.
|
|
315
343
|
|
|
316
344
|
Returns
|
|
317
345
|
-------
|
|
318
346
|
list[tuple[str, str]]
|
|
319
347
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
320
348
|
"""
|
|
321
|
-
client, bucket = self._check_factory()
|
|
322
349
|
files = src
|
|
323
350
|
keys = []
|
|
324
351
|
for i in files:
|
|
@@ -328,13 +355,18 @@ class S3Store(Store):
|
|
|
328
355
|
|
|
329
356
|
# Upload files
|
|
330
357
|
paths = []
|
|
331
|
-
for
|
|
332
|
-
f, k = i
|
|
358
|
+
for f, k in zip(files, keys):
|
|
333
359
|
self._upload_file(f, k, client, bucket)
|
|
334
360
|
paths.append((k, Path(f).name))
|
|
335
361
|
return paths
|
|
336
362
|
|
|
337
|
-
def _upload_single_file(
|
|
363
|
+
def _upload_single_file(
|
|
364
|
+
self,
|
|
365
|
+
src: str,
|
|
366
|
+
dst: str,
|
|
367
|
+
client: S3Client,
|
|
368
|
+
bucket: str,
|
|
369
|
+
) -> str:
|
|
338
370
|
"""
|
|
339
371
|
Upload a single file to storage.
|
|
340
372
|
|
|
@@ -344,14 +376,16 @@ class S3Store(Store):
|
|
|
344
376
|
List of sources.
|
|
345
377
|
dst : str
|
|
346
378
|
The destination of the artifact on storage.
|
|
379
|
+
client : S3Client
|
|
380
|
+
The S3 client object.
|
|
381
|
+
bucket : str
|
|
382
|
+
The name of the S3 bucket.
|
|
347
383
|
|
|
348
384
|
Returns
|
|
349
385
|
-------
|
|
350
386
|
str
|
|
351
387
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
352
388
|
"""
|
|
353
|
-
client, bucket = self._check_factory()
|
|
354
|
-
|
|
355
389
|
if dst.endswith("/"):
|
|
356
390
|
dst = f"{dst.removeprefix('/')}{Path(src).name}"
|
|
357
391
|
|
|
@@ -361,7 +395,12 @@ class S3Store(Store):
|
|
|
361
395
|
return [(dst, name)]
|
|
362
396
|
|
|
363
397
|
@staticmethod
|
|
364
|
-
def _upload_file(
|
|
398
|
+
def _upload_file(
|
|
399
|
+
src: str,
|
|
400
|
+
key: str,
|
|
401
|
+
client: S3Client,
|
|
402
|
+
bucket: str,
|
|
403
|
+
) -> None:
|
|
365
404
|
"""
|
|
366
405
|
Upload a file to S3 based storage. The function checks if the
|
|
367
406
|
bucket is accessible.
|
|
@@ -388,7 +427,12 @@ class S3Store(Store):
|
|
|
388
427
|
client.upload_file(Filename=src, Bucket=bucket, Key=key, ExtraArgs=extra_args)
|
|
389
428
|
|
|
390
429
|
@staticmethod
|
|
391
|
-
def _upload_fileobject(
|
|
430
|
+
def _upload_fileobject(
|
|
431
|
+
fileobj: BytesIO,
|
|
432
|
+
key: str,
|
|
433
|
+
client: S3Client,
|
|
434
|
+
bucket: str,
|
|
435
|
+
) -> None:
|
|
392
436
|
"""
|
|
393
437
|
Upload a fileobject to S3 based storage. The function checks if the bucket is accessible.
|
|
394
438
|
|
|
@@ -410,10 +454,45 @@ class S3Store(Store):
|
|
|
410
454
|
client.put_object(Bucket=bucket, Key=key, Body=fileobj.getvalue())
|
|
411
455
|
|
|
412
456
|
##############################
|
|
413
|
-
#
|
|
457
|
+
# Datastore methods
|
|
458
|
+
##############################
|
|
459
|
+
|
|
460
|
+
def write_df(
|
|
461
|
+
self,
|
|
462
|
+
df: Any,
|
|
463
|
+
dst: str,
|
|
464
|
+
extension: str | None = None,
|
|
465
|
+
**kwargs,
|
|
466
|
+
) -> str:
|
|
467
|
+
"""
|
|
468
|
+
Write a dataframe to S3 based storage. Kwargs are passed to df.to_parquet().
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
df : Any
|
|
473
|
+
The dataframe.
|
|
474
|
+
dst : str
|
|
475
|
+
The destination path on S3 based storage.
|
|
476
|
+
extension : str
|
|
477
|
+
The extension of the file.
|
|
478
|
+
**kwargs : dict
|
|
479
|
+
Keyword arguments.
|
|
480
|
+
|
|
481
|
+
Returns
|
|
482
|
+
-------
|
|
483
|
+
str
|
|
484
|
+
The S3 path where the dataframe was saved.
|
|
485
|
+
"""
|
|
486
|
+
fileobj = BytesIO()
|
|
487
|
+
reader = get_reader_by_object(df)
|
|
488
|
+
reader.write_df(df, fileobj, extension=extension, **kwargs)
|
|
489
|
+
return self.upload_fileobject(fileobj, dst)
|
|
490
|
+
|
|
491
|
+
##############################
|
|
492
|
+
# Helper methods
|
|
414
493
|
##############################
|
|
415
494
|
|
|
416
|
-
def _get_bucket(self) -> str:
|
|
495
|
+
def _get_bucket(self, root: str) -> str:
|
|
417
496
|
"""
|
|
418
497
|
Get the name of the S3 bucket from the URI.
|
|
419
498
|
|
|
@@ -422,7 +501,7 @@ class S3Store(Store):
|
|
|
422
501
|
str
|
|
423
502
|
The name of the S3 bucket.
|
|
424
503
|
"""
|
|
425
|
-
return
|
|
504
|
+
return get_bucket_name(root)
|
|
426
505
|
|
|
427
506
|
def _get_client(self) -> S3Client:
|
|
428
507
|
"""
|
|
@@ -440,7 +519,7 @@ class S3Store(Store):
|
|
|
440
519
|
}
|
|
441
520
|
return boto3.client("s3", **cfg)
|
|
442
521
|
|
|
443
|
-
def _check_factory(self) -> tuple[S3Client, str]:
|
|
522
|
+
def _check_factory(self, root: str) -> tuple[S3Client, str]:
|
|
444
523
|
"""
|
|
445
524
|
Check if the S3 bucket is accessible by sending a head_bucket request.
|
|
446
525
|
|
|
@@ -450,7 +529,7 @@ class S3Store(Store):
|
|
|
450
529
|
A tuple containing the S3 client object and the name of the S3 bucket.
|
|
451
530
|
"""
|
|
452
531
|
client = self._get_client()
|
|
453
|
-
bucket = self._get_bucket()
|
|
532
|
+
bucket = self._get_bucket(root)
|
|
454
533
|
self._check_access_to_storage(client, bucket)
|
|
455
534
|
return client, bucket
|
|
456
535
|
|