digitalhub 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +67 -0
- digitalhub/client/_base/key_builder.py +52 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +81 -25
- digitalhub/client/dhcore/enums.py +27 -0
- digitalhub/client/dhcore/env.py +2 -2
- digitalhub/client/dhcore/key_builder.py +58 -0
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +22 -0
- digitalhub/client/local/key_builder.py +58 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +32 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +49 -17
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +3 -24
- digitalhub/entities/_base/versioned/entity.py +2 -26
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1873 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +6 -2
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -91
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +15 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +21 -14
- digitalhub/entities/task/_base/models.py +35 -6
- digitalhub/entities/task/_base/spec.py +50 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +22 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/runtimes/enums.py +12 -0
- digitalhub/stores/_base/store.py +59 -11
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +43 -4
- digitalhub/stores/remote/store.py +31 -5
- digitalhub/stores/s3/store.py +129 -48
- digitalhub/stores/sql/store.py +122 -47
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/s3_utils.py +17 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/LICENSE.txt +1 -1
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/METADATA +3 -3
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/RECORD +116 -114
- test/local/instances/test_validate.py +55 -0
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/top_level.txt +0 -0
digitalhub/stores/local/store.py
CHANGED
|
@@ -2,7 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
7
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
6
8
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
7
9
|
from digitalhub.utils.exceptions import StoreError
|
|
8
10
|
from digitalhub.utils.file_utils import get_file_info_from_local
|
|
@@ -28,7 +30,7 @@ class LocalStore(Store):
|
|
|
28
30
|
self.config = config
|
|
29
31
|
|
|
30
32
|
##############################
|
|
31
|
-
#
|
|
33
|
+
# I/O methods
|
|
32
34
|
##############################
|
|
33
35
|
|
|
34
36
|
def download(
|
|
@@ -59,7 +61,7 @@ class LocalStore(Store):
|
|
|
59
61
|
"""
|
|
60
62
|
raise StoreError("Local store does not support download.")
|
|
61
63
|
|
|
62
|
-
def upload(self, src: str | list[str], dst: str
|
|
64
|
+
def upload(self, src: str | list[str], dst: str) -> list[tuple[str, str]]:
|
|
63
65
|
"""
|
|
64
66
|
Upload an artifact to storage.
|
|
65
67
|
|
|
@@ -70,7 +72,11 @@ class LocalStore(Store):
|
|
|
70
72
|
"""
|
|
71
73
|
raise StoreError("Local store does not support upload.")
|
|
72
74
|
|
|
73
|
-
def get_file_info(
|
|
75
|
+
def get_file_info(
|
|
76
|
+
self,
|
|
77
|
+
root: str,
|
|
78
|
+
paths: list[tuple[str, str]],
|
|
79
|
+
) -> list[dict]:
|
|
74
80
|
"""
|
|
75
81
|
Method to get file metadata.
|
|
76
82
|
|
|
@@ -189,7 +195,40 @@ class LocalStore(Store):
|
|
|
189
195
|
return dst
|
|
190
196
|
|
|
191
197
|
##############################
|
|
192
|
-
#
|
|
198
|
+
# Datastore methods
|
|
199
|
+
##############################
|
|
200
|
+
|
|
201
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
202
|
+
"""
|
|
203
|
+
Method to write a dataframe to a file. Kwargs are passed to df.to_parquet().
|
|
204
|
+
If destination is not provided, the dataframe is written to the default
|
|
205
|
+
store path with generated name.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
df : Any
|
|
210
|
+
The dataframe to write.
|
|
211
|
+
dst : str
|
|
212
|
+
The destination of the dataframe.
|
|
213
|
+
**kwargs : dict
|
|
214
|
+
Keyword arguments.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
str
|
|
219
|
+
Path of written dataframe.
|
|
220
|
+
"""
|
|
221
|
+
self.store._check_local_dst(dst)
|
|
222
|
+
self._validate_extension(Path(dst).suffix.removeprefix("."))
|
|
223
|
+
|
|
224
|
+
# Write dataframe
|
|
225
|
+
reader = get_reader_by_object(df)
|
|
226
|
+
reader.write_df(df, dst, extension=extension, **kwargs)
|
|
227
|
+
|
|
228
|
+
return dst
|
|
229
|
+
|
|
230
|
+
##############################
|
|
231
|
+
# Helper methods
|
|
193
232
|
##############################
|
|
194
233
|
|
|
195
234
|
@staticmethod
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
|
|
@@ -25,7 +26,7 @@ class RemoteStore(Store):
|
|
|
25
26
|
self.config = config
|
|
26
27
|
|
|
27
28
|
##############################
|
|
28
|
-
#
|
|
29
|
+
# I/O methods
|
|
29
30
|
##############################
|
|
30
31
|
|
|
31
32
|
def download(
|
|
@@ -68,7 +69,7 @@ class RemoteStore(Store):
|
|
|
68
69
|
|
|
69
70
|
return self._download_file(root, dst, overwrite)
|
|
70
71
|
|
|
71
|
-
def upload(self, src: str | list[str], dst: str
|
|
72
|
+
def upload(self, src: str | list[str], dst: str) -> list[tuple[str, str]]:
|
|
72
73
|
"""
|
|
73
74
|
Upload an artifact to storage.
|
|
74
75
|
|
|
@@ -79,19 +80,44 @@ class RemoteStore(Store):
|
|
|
79
80
|
"""
|
|
80
81
|
raise StoreError("Remote HTTP store does not support upload.")
|
|
81
82
|
|
|
82
|
-
def get_file_info(
|
|
83
|
+
def get_file_info(
|
|
84
|
+
self,
|
|
85
|
+
root: str,
|
|
86
|
+
paths: list[tuple[str, str]],
|
|
87
|
+
) -> list[dict]:
|
|
83
88
|
"""
|
|
84
89
|
Get file information from HTTP(s) storage.
|
|
85
90
|
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
paths : list[str]
|
|
94
|
+
List of source paths.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
list[dict]
|
|
99
|
+
Returns files metadata.
|
|
100
|
+
"""
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
##############################
|
|
104
|
+
# Datastore methods
|
|
105
|
+
##############################
|
|
106
|
+
|
|
107
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Method to write a dataframe to a file. Note that this method is not implemented
|
|
110
|
+
since the remote store is not meant to write dataframes.
|
|
111
|
+
|
|
86
112
|
Raises
|
|
87
113
|
------
|
|
88
114
|
NotImplementedError
|
|
89
115
|
This method is not implemented.
|
|
90
116
|
"""
|
|
91
|
-
raise NotImplementedError("Remote store does not support
|
|
117
|
+
raise NotImplementedError("Remote store does not support write_df.")
|
|
92
118
|
|
|
93
119
|
##############################
|
|
94
|
-
#
|
|
120
|
+
# Helper methods
|
|
95
121
|
##############################
|
|
96
122
|
|
|
97
123
|
@staticmethod
|
digitalhub/stores/s3/store.py
CHANGED
|
@@ -2,16 +2,18 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type
|
|
5
|
+
from typing import Any, Type
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
import boto3
|
|
9
9
|
import botocore.client # pylint: disable=unused-import
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
11
11
|
|
|
12
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
12
13
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
14
|
from digitalhub.utils.exceptions import StoreError
|
|
14
15
|
from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
|
|
16
|
+
from digitalhub.utils.s3_utils import get_bucket_name
|
|
15
17
|
|
|
16
18
|
# Type aliases
|
|
17
19
|
S3Client = Type["botocore.client.S3"]
|
|
@@ -46,7 +48,7 @@ class S3Store(Store):
|
|
|
46
48
|
self.config = config
|
|
47
49
|
|
|
48
50
|
##############################
|
|
49
|
-
#
|
|
51
|
+
# I/O methods
|
|
50
52
|
##############################
|
|
51
53
|
|
|
52
54
|
def download(
|
|
@@ -75,7 +77,7 @@ class S3Store(Store):
|
|
|
75
77
|
str
|
|
76
78
|
Destination path of the downloaded artifact.
|
|
77
79
|
"""
|
|
78
|
-
client, bucket = self._check_factory()
|
|
80
|
+
client, bucket = self._check_factory(root)
|
|
79
81
|
|
|
80
82
|
# Build destination directory
|
|
81
83
|
if dst.suffix == "":
|
|
@@ -125,14 +127,18 @@ class S3Store(Store):
|
|
|
125
127
|
return str(Path(dst, trees[0]))
|
|
126
128
|
return str(dst)
|
|
127
129
|
|
|
128
|
-
def upload(
|
|
130
|
+
def upload(
|
|
131
|
+
self,
|
|
132
|
+
src: str | list[str],
|
|
133
|
+
dst: str,
|
|
134
|
+
) -> list[tuple[str, str]]:
|
|
129
135
|
"""
|
|
130
136
|
Upload an artifact to storage.
|
|
131
137
|
|
|
132
138
|
Parameters
|
|
133
139
|
----------
|
|
134
|
-
src : str
|
|
135
|
-
|
|
140
|
+
src : str | list[str]
|
|
141
|
+
Source(s).
|
|
136
142
|
dst : str
|
|
137
143
|
The destination of the artifact on storage.
|
|
138
144
|
|
|
@@ -141,22 +147,12 @@ class S3Store(Store):
|
|
|
141
147
|
list[tuple[str, str]]
|
|
142
148
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
143
149
|
"""
|
|
144
|
-
|
|
145
150
|
# Destination handling
|
|
151
|
+
key = self._get_key(dst)
|
|
146
152
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
if
|
|
150
|
-
raise StoreError(
|
|
151
|
-
"Destination must be provided. " + "If source is a list of files or a directory, "
|
|
152
|
-
"destination must be a partition, e.g. 's3://bucket/partition/', ",
|
|
153
|
-
"otherwise a destination key, e.g. 's3://bucket/key'",
|
|
154
|
-
)
|
|
155
|
-
else:
|
|
156
|
-
dst = self._get_key(dst)
|
|
157
|
-
|
|
158
|
-
# Source handling
|
|
159
|
-
if not isinstance(src, list):
|
|
153
|
+
# Source handling (files list, dir or single file)
|
|
154
|
+
src_is_list = isinstance(src, list)
|
|
155
|
+
if not src_is_list:
|
|
160
156
|
self._check_local_src(src)
|
|
161
157
|
src_is_dir = Path(src).is_dir()
|
|
162
158
|
else:
|
|
@@ -167,21 +163,31 @@ class S3Store(Store):
|
|
|
167
163
|
src = src[0]
|
|
168
164
|
|
|
169
165
|
# If source is a directory, destination must be a partition
|
|
170
|
-
if (src_is_dir or
|
|
171
|
-
raise StoreError(
|
|
166
|
+
if (src_is_dir or src_is_list) and not dst.endswith("/"):
|
|
167
|
+
raise StoreError(
|
|
168
|
+
"If source is a list of files or a directory, "
|
|
169
|
+
"destination must be a partition, e.g. 's3://bucket/partition/'"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# S3 client
|
|
173
|
+
client, bucket = self._check_factory(dst)
|
|
172
174
|
|
|
173
175
|
# Directory
|
|
174
176
|
if src_is_dir:
|
|
175
|
-
return self._upload_dir(src,
|
|
177
|
+
return self._upload_dir(src, key, client, bucket)
|
|
176
178
|
|
|
177
179
|
# List of files
|
|
178
|
-
elif
|
|
179
|
-
return self._upload_file_list(src,
|
|
180
|
+
elif src_is_list:
|
|
181
|
+
return self._upload_file_list(src, key, client, bucket)
|
|
180
182
|
|
|
181
183
|
# Single file
|
|
182
|
-
return self._upload_single_file(src,
|
|
184
|
+
return self._upload_single_file(src, key, client, bucket)
|
|
183
185
|
|
|
184
|
-
def upload_fileobject(
|
|
186
|
+
def upload_fileobject(
|
|
187
|
+
self,
|
|
188
|
+
src: BytesIO,
|
|
189
|
+
dst: str,
|
|
190
|
+
) -> str:
|
|
185
191
|
"""
|
|
186
192
|
Upload an BytesIO to S3 based storage.
|
|
187
193
|
|
|
@@ -190,18 +196,23 @@ class S3Store(Store):
|
|
|
190
196
|
src : BytesIO
|
|
191
197
|
The source object to be persisted.
|
|
192
198
|
dst : str
|
|
193
|
-
The destination
|
|
199
|
+
The destination path of the artifact.
|
|
194
200
|
|
|
195
201
|
Returns
|
|
196
202
|
-------
|
|
197
203
|
str
|
|
198
204
|
S3 key of the uploaded artifact.
|
|
199
205
|
"""
|
|
200
|
-
client, bucket = self._check_factory()
|
|
201
|
-
self.
|
|
202
|
-
|
|
206
|
+
client, bucket = self._check_factory(dst)
|
|
207
|
+
key = self._get_key(dst)
|
|
208
|
+
self._upload_fileobject(src, key, client, bucket)
|
|
209
|
+
return f"s3://{bucket}/{key}"
|
|
203
210
|
|
|
204
|
-
def get_file_info(
|
|
211
|
+
def get_file_info(
|
|
212
|
+
self,
|
|
213
|
+
root: str,
|
|
214
|
+
paths: list[tuple[str, str]],
|
|
215
|
+
) -> list[dict]:
|
|
205
216
|
"""
|
|
206
217
|
Method to get file metadata.
|
|
207
218
|
|
|
@@ -215,7 +226,7 @@ class S3Store(Store):
|
|
|
215
226
|
list[dict]
|
|
216
227
|
Returns files metadata.
|
|
217
228
|
"""
|
|
218
|
-
client, bucket = self._check_factory()
|
|
229
|
+
client, bucket = self._check_factory(root)
|
|
219
230
|
|
|
220
231
|
infos = []
|
|
221
232
|
for i in paths:
|
|
@@ -266,7 +277,13 @@ class S3Store(Store):
|
|
|
266
277
|
# Download file
|
|
267
278
|
client.download_file(bucket, key, dst_pth)
|
|
268
279
|
|
|
269
|
-
def _upload_dir(
|
|
280
|
+
def _upload_dir(
|
|
281
|
+
self,
|
|
282
|
+
src: str,
|
|
283
|
+
dst: str,
|
|
284
|
+
client: S3Client,
|
|
285
|
+
bucket: str,
|
|
286
|
+
) -> list[tuple[str, str]]:
|
|
270
287
|
"""
|
|
271
288
|
Upload directory to storage.
|
|
272
289
|
|
|
@@ -276,14 +293,16 @@ class S3Store(Store):
|
|
|
276
293
|
List of sources.
|
|
277
294
|
dst : str
|
|
278
295
|
The destination of the artifact on storage.
|
|
296
|
+
client : S3Client
|
|
297
|
+
The S3 client object.
|
|
298
|
+
bucket : str
|
|
299
|
+
The name of the S3 bucket.
|
|
279
300
|
|
|
280
301
|
Returns
|
|
281
302
|
-------
|
|
282
303
|
list[tuple[str, str]]
|
|
283
304
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
284
305
|
"""
|
|
285
|
-
client, bucket = self._check_factory()
|
|
286
|
-
|
|
287
306
|
# Get list of files
|
|
288
307
|
src_pth = Path(src)
|
|
289
308
|
files = [i for i in src_pth.rglob("*") if i.is_file()]
|
|
@@ -301,7 +320,13 @@ class S3Store(Store):
|
|
|
301
320
|
paths.append((k, str(f.relative_to(src_pth))))
|
|
302
321
|
return paths
|
|
303
322
|
|
|
304
|
-
def _upload_file_list(
|
|
323
|
+
def _upload_file_list(
|
|
324
|
+
self,
|
|
325
|
+
src: list[str],
|
|
326
|
+
dst: str,
|
|
327
|
+
client: S3Client,
|
|
328
|
+
bucket: str,
|
|
329
|
+
) -> list[tuple[str, str]]:
|
|
305
330
|
"""
|
|
306
331
|
Upload list of files to storage.
|
|
307
332
|
|
|
@@ -311,13 +336,16 @@ class S3Store(Store):
|
|
|
311
336
|
List of sources.
|
|
312
337
|
dst : str
|
|
313
338
|
The destination of the artifact on storage.
|
|
339
|
+
client : S3Client
|
|
340
|
+
The S3 client object.
|
|
341
|
+
bucket : str
|
|
342
|
+
The name of the S3 bucket.
|
|
314
343
|
|
|
315
344
|
Returns
|
|
316
345
|
-------
|
|
317
346
|
list[tuple[str, str]]
|
|
318
347
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
319
348
|
"""
|
|
320
|
-
client, bucket = self._check_factory()
|
|
321
349
|
files = src
|
|
322
350
|
keys = []
|
|
323
351
|
for i in files:
|
|
@@ -332,7 +360,13 @@ class S3Store(Store):
|
|
|
332
360
|
paths.append((k, Path(f).name))
|
|
333
361
|
return paths
|
|
334
362
|
|
|
335
|
-
def _upload_single_file(
|
|
363
|
+
def _upload_single_file(
|
|
364
|
+
self,
|
|
365
|
+
src: str,
|
|
366
|
+
dst: str,
|
|
367
|
+
client: S3Client,
|
|
368
|
+
bucket: str,
|
|
369
|
+
) -> str:
|
|
336
370
|
"""
|
|
337
371
|
Upload a single file to storage.
|
|
338
372
|
|
|
@@ -342,14 +376,16 @@ class S3Store(Store):
|
|
|
342
376
|
List of sources.
|
|
343
377
|
dst : str
|
|
344
378
|
The destination of the artifact on storage.
|
|
379
|
+
client : S3Client
|
|
380
|
+
The S3 client object.
|
|
381
|
+
bucket : str
|
|
382
|
+
The name of the S3 bucket.
|
|
345
383
|
|
|
346
384
|
Returns
|
|
347
385
|
-------
|
|
348
386
|
str
|
|
349
387
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
350
388
|
"""
|
|
351
|
-
client, bucket = self._check_factory()
|
|
352
|
-
|
|
353
389
|
if dst.endswith("/"):
|
|
354
390
|
dst = f"{dst.removeprefix('/')}{Path(src).name}"
|
|
355
391
|
|
|
@@ -359,7 +395,12 @@ class S3Store(Store):
|
|
|
359
395
|
return [(dst, name)]
|
|
360
396
|
|
|
361
397
|
@staticmethod
|
|
362
|
-
def _upload_file(
|
|
398
|
+
def _upload_file(
|
|
399
|
+
src: str,
|
|
400
|
+
key: str,
|
|
401
|
+
client: S3Client,
|
|
402
|
+
bucket: str,
|
|
403
|
+
) -> None:
|
|
363
404
|
"""
|
|
364
405
|
Upload a file to S3 based storage. The function checks if the
|
|
365
406
|
bucket is accessible.
|
|
@@ -386,7 +427,12 @@ class S3Store(Store):
|
|
|
386
427
|
client.upload_file(Filename=src, Bucket=bucket, Key=key, ExtraArgs=extra_args)
|
|
387
428
|
|
|
388
429
|
@staticmethod
|
|
389
|
-
def _upload_fileobject(
|
|
430
|
+
def _upload_fileobject(
|
|
431
|
+
fileobj: BytesIO,
|
|
432
|
+
key: str,
|
|
433
|
+
client: S3Client,
|
|
434
|
+
bucket: str,
|
|
435
|
+
) -> None:
|
|
390
436
|
"""
|
|
391
437
|
Upload a fileobject to S3 based storage. The function checks if the bucket is accessible.
|
|
392
438
|
|
|
@@ -408,10 +454,45 @@ class S3Store(Store):
|
|
|
408
454
|
client.put_object(Bucket=bucket, Key=key, Body=fileobj.getvalue())
|
|
409
455
|
|
|
410
456
|
##############################
|
|
411
|
-
#
|
|
457
|
+
# Datastore methods
|
|
458
|
+
##############################
|
|
459
|
+
|
|
460
|
+
def write_df(
|
|
461
|
+
self,
|
|
462
|
+
df: Any,
|
|
463
|
+
dst: str,
|
|
464
|
+
extension: str | None = None,
|
|
465
|
+
**kwargs,
|
|
466
|
+
) -> str:
|
|
467
|
+
"""
|
|
468
|
+
Write a dataframe to S3 based storage. Kwargs are passed to df.to_parquet().
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
df : Any
|
|
473
|
+
The dataframe.
|
|
474
|
+
dst : str
|
|
475
|
+
The destination path on S3 based storage.
|
|
476
|
+
extension : str
|
|
477
|
+
The extension of the file.
|
|
478
|
+
**kwargs : dict
|
|
479
|
+
Keyword arguments.
|
|
480
|
+
|
|
481
|
+
Returns
|
|
482
|
+
-------
|
|
483
|
+
str
|
|
484
|
+
The S3 path where the dataframe was saved.
|
|
485
|
+
"""
|
|
486
|
+
fileobj = BytesIO()
|
|
487
|
+
reader = get_reader_by_object(df)
|
|
488
|
+
reader.write_df(df, fileobj, extension=extension, **kwargs)
|
|
489
|
+
return self.upload_fileobject(fileobj, dst)
|
|
490
|
+
|
|
491
|
+
##############################
|
|
492
|
+
# Helper methods
|
|
412
493
|
##############################
|
|
413
494
|
|
|
414
|
-
def _get_bucket(self) -> str:
|
|
495
|
+
def _get_bucket(self, root: str) -> str:
|
|
415
496
|
"""
|
|
416
497
|
Get the name of the S3 bucket from the URI.
|
|
417
498
|
|
|
@@ -420,7 +501,7 @@ class S3Store(Store):
|
|
|
420
501
|
str
|
|
421
502
|
The name of the S3 bucket.
|
|
422
503
|
"""
|
|
423
|
-
return
|
|
504
|
+
return get_bucket_name(root)
|
|
424
505
|
|
|
425
506
|
def _get_client(self) -> S3Client:
|
|
426
507
|
"""
|
|
@@ -438,7 +519,7 @@ class S3Store(Store):
|
|
|
438
519
|
}
|
|
439
520
|
return boto3.client("s3", **cfg)
|
|
440
521
|
|
|
441
|
-
def _check_factory(self) -> tuple[S3Client, str]:
|
|
522
|
+
def _check_factory(self, root: str) -> tuple[S3Client, str]:
|
|
442
523
|
"""
|
|
443
524
|
Check if the S3 bucket is accessible by sending a head_bucket request.
|
|
444
525
|
|
|
@@ -448,7 +529,7 @@ class S3Store(Store):
|
|
|
448
529
|
A tuple containing the S3 client object and the name of the S3 bucket.
|
|
449
530
|
"""
|
|
450
531
|
client = self._get_client()
|
|
451
|
-
bucket = self._get_bucket()
|
|
532
|
+
bucket = self._get_bucket(root)
|
|
452
533
|
self._check_access_to_storage(client, bucket)
|
|
453
534
|
return client, bucket
|
|
454
535
|
|