digitalhub 0.8.1__py3-none-any.whl → 0.9.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +31 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +77 -24
- digitalhub/client/dhcore/enums.py +26 -0
- digitalhub/client/dhcore/env.py +2 -2
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +20 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +30 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +48 -16
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +1 -23
- digitalhub/entities/_base/versioned/entity.py +0 -25
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1747 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +5 -1
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -91
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +11 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +6 -6
- digitalhub/entities/task/_base/models.py +29 -6
- digitalhub/entities/task/_base/spec.py +44 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +14 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/stores/_base/store.py +53 -9
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +37 -2
- digitalhub/stores/remote/store.py +25 -3
- digitalhub/stores/s3/store.py +34 -7
- digitalhub/stores/sql/store.py +112 -45
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/METADATA +2 -2
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/RECORD +110 -113
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/top_level.txt +0 -0
digitalhub/stores/s3/store.py
CHANGED
|
@@ -2,13 +2,14 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type
|
|
5
|
+
from typing import Any, Type
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
import boto3
|
|
9
9
|
import botocore.client # pylint: disable=unused-import
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
11
11
|
|
|
12
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
12
13
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
14
|
from digitalhub.utils.exceptions import StoreError
|
|
14
15
|
from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
|
|
@@ -46,7 +47,7 @@ class S3Store(Store):
|
|
|
46
47
|
self.config = config
|
|
47
48
|
|
|
48
49
|
##############################
|
|
49
|
-
#
|
|
50
|
+
# I/O methods
|
|
50
51
|
##############################
|
|
51
52
|
|
|
52
53
|
def download(
|
|
@@ -141,16 +142,13 @@ class S3Store(Store):
|
|
|
141
142
|
list[tuple[str, str]]
|
|
142
143
|
Returns the list of destination and source paths of the uploaded artifacts.
|
|
143
144
|
"""
|
|
144
|
-
|
|
145
145
|
# Destination handling
|
|
146
146
|
|
|
147
147
|
# If no destination is provided, build key from source
|
|
148
148
|
# Otherwise build key from destination
|
|
149
149
|
if dst is None:
|
|
150
150
|
raise StoreError(
|
|
151
|
-
"Destination must be provided.
|
|
152
|
-
"destination must be a partition, e.g. 's3://bucket/partition/', ",
|
|
153
|
-
"otherwise a destination key, e.g. 's3://bucket/key'",
|
|
151
|
+
"Destination must be provided. If source is a list of files or a directory, destination must be a partition, e.g. 's3://bucket/partition/' otherwise a destination key, e.g. 's3://bucket/key'"
|
|
154
152
|
)
|
|
155
153
|
else:
|
|
156
154
|
dst = self._get_key(dst)
|
|
@@ -408,7 +406,36 @@ class S3Store(Store):
|
|
|
408
406
|
client.put_object(Bucket=bucket, Key=key, Body=fileobj.getvalue())
|
|
409
407
|
|
|
410
408
|
##############################
|
|
411
|
-
#
|
|
409
|
+
# Datastore methods
|
|
410
|
+
##############################
|
|
411
|
+
|
|
412
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
413
|
+
"""
|
|
414
|
+
Write a dataframe to S3 based storage. Kwargs are passed to df.to_parquet().
|
|
415
|
+
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
df : Any
|
|
419
|
+
The dataframe.
|
|
420
|
+
dst : str
|
|
421
|
+
The destination path on S3 based storage.
|
|
422
|
+
**kwargs : dict
|
|
423
|
+
Keyword arguments.
|
|
424
|
+
|
|
425
|
+
Returns
|
|
426
|
+
-------
|
|
427
|
+
str
|
|
428
|
+
The S3 path where the dataframe was saved.
|
|
429
|
+
"""
|
|
430
|
+
fileobj = BytesIO()
|
|
431
|
+
reader = get_reader_by_object(df)
|
|
432
|
+
reader.write_df(df, fileobj, extension=extension, **kwargs)
|
|
433
|
+
|
|
434
|
+
key = self._get_key(dst)
|
|
435
|
+
return self.upload_fileobject(fileobj, key)
|
|
436
|
+
|
|
437
|
+
##############################
|
|
438
|
+
# Helper methods
|
|
412
439
|
##############################
|
|
413
440
|
|
|
414
441
|
def _get_bucket(self) -> str:
|
digitalhub/stores/sql/store.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import pyarrow as pa
|
|
6
7
|
import pyarrow.parquet as pq
|
|
@@ -9,6 +10,7 @@ from sqlalchemy.engine import Engine
|
|
|
9
10
|
from sqlalchemy.engine.row import LegacyRow
|
|
10
11
|
from sqlalchemy.exc import SQLAlchemyError
|
|
11
12
|
|
|
13
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
12
14
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
15
|
from digitalhub.utils.exceptions import StoreError
|
|
14
16
|
|
|
@@ -45,7 +47,7 @@ class SqlStore(Store):
|
|
|
45
47
|
self.config = config
|
|
46
48
|
|
|
47
49
|
##############################
|
|
48
|
-
#
|
|
50
|
+
# I/O methods
|
|
49
51
|
##############################
|
|
50
52
|
|
|
51
53
|
def download(
|
|
@@ -112,15 +114,118 @@ class SqlStore(Store):
|
|
|
112
114
|
"""
|
|
113
115
|
Get file information from SQL based storage.
|
|
114
116
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
paths : list[str]
|
|
120
|
+
List of source paths.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
list[dict]
|
|
125
|
+
Returns files metadata.
|
|
119
126
|
"""
|
|
120
|
-
|
|
127
|
+
return []
|
|
121
128
|
|
|
122
129
|
##############################
|
|
123
|
-
# Private
|
|
130
|
+
# Private I/O methods
|
|
131
|
+
##############################
|
|
132
|
+
|
|
133
|
+
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Download a table from SQL based storage.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
schema : str
|
|
140
|
+
The origin schema.
|
|
141
|
+
table : str
|
|
142
|
+
The origin table.
|
|
143
|
+
dst : str
|
|
144
|
+
The destination path.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
str
|
|
149
|
+
The destination path.
|
|
150
|
+
"""
|
|
151
|
+
engine = self._check_factory(schema=schema)
|
|
152
|
+
|
|
153
|
+
# Read the table from the database
|
|
154
|
+
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
155
|
+
query = sa_table.select()
|
|
156
|
+
with engine.begin() as conn:
|
|
157
|
+
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
158
|
+
|
|
159
|
+
# Parse the result
|
|
160
|
+
data = self._parse_result(result)
|
|
161
|
+
|
|
162
|
+
# Convert the result to a pyarrow table and
|
|
163
|
+
# write the pyarrow table to a Parquet file
|
|
164
|
+
arrow_table = pa.Table.from_pydict(data)
|
|
165
|
+
pq.write_table(arrow_table, dst)
|
|
166
|
+
|
|
167
|
+
engine.dispose()
|
|
168
|
+
|
|
169
|
+
return dst
|
|
170
|
+
|
|
171
|
+
##############################
|
|
172
|
+
# Datastore methods
|
|
173
|
+
##############################
|
|
174
|
+
|
|
175
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
176
|
+
"""
|
|
177
|
+
Write a dataframe to a database. Kwargs are passed to df.to_sql().
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
df : Any
|
|
182
|
+
The dataframe to write.
|
|
183
|
+
dst : str
|
|
184
|
+
The destination of the dataframe.
|
|
185
|
+
**kwargs : dict
|
|
186
|
+
Keyword arguments.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
str
|
|
191
|
+
Path of written dataframe.
|
|
192
|
+
"""
|
|
193
|
+
schema = self._get_schema(dst)
|
|
194
|
+
table = self._get_table_name(dst)
|
|
195
|
+
return self._upload_table(df, schema, table, **kwargs)
|
|
196
|
+
|
|
197
|
+
##############################
|
|
198
|
+
# Private Datastore methods
|
|
199
|
+
##############################
|
|
200
|
+
|
|
201
|
+
def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
|
|
202
|
+
"""
|
|
203
|
+
Upload a table to SQL based storage.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
df : DataFrame
|
|
208
|
+
The dataframe.
|
|
209
|
+
schema : str
|
|
210
|
+
Destination schema.
|
|
211
|
+
table : str
|
|
212
|
+
Destination table.
|
|
213
|
+
**kwargs : dict
|
|
214
|
+
Keyword arguments.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
str
|
|
219
|
+
The SQL URI where the dataframe was saved.
|
|
220
|
+
"""
|
|
221
|
+
reader = get_reader_by_object(df)
|
|
222
|
+
engine = self._check_factory()
|
|
223
|
+
reader.write_table(df, table, engine, schema, **kwargs)
|
|
224
|
+
engine.dispose()
|
|
225
|
+
return f"sql://{engine.url.database}/{schema}/{table}"
|
|
226
|
+
|
|
227
|
+
##############################
|
|
228
|
+
# Helper methods
|
|
124
229
|
##############################
|
|
125
230
|
|
|
126
231
|
def _get_connection_string(self) -> str:
|
|
@@ -265,44 +370,6 @@ class SqlStore(Store):
|
|
|
265
370
|
engine.dispose()
|
|
266
371
|
raise StoreError("No access to db!")
|
|
267
372
|
|
|
268
|
-
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
269
|
-
"""
|
|
270
|
-
Download a table from SQL based storage.
|
|
271
|
-
|
|
272
|
-
Parameters
|
|
273
|
-
----------
|
|
274
|
-
schema : str
|
|
275
|
-
The origin schema.
|
|
276
|
-
table : str
|
|
277
|
-
The origin table.
|
|
278
|
-
dst : str
|
|
279
|
-
The destination path.
|
|
280
|
-
|
|
281
|
-
Returns
|
|
282
|
-
-------
|
|
283
|
-
str
|
|
284
|
-
The destination path.
|
|
285
|
-
"""
|
|
286
|
-
engine = self._check_factory(schema=schema)
|
|
287
|
-
|
|
288
|
-
# Read the table from the database
|
|
289
|
-
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
290
|
-
query = sa_table.select()
|
|
291
|
-
with engine.begin() as conn:
|
|
292
|
-
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
293
|
-
|
|
294
|
-
# Parse the result
|
|
295
|
-
data = self._parse_result(result)
|
|
296
|
-
|
|
297
|
-
# Convert the result to a pyarrow table and
|
|
298
|
-
# write the pyarrow table to a Parquet file
|
|
299
|
-
arrow_table = pa.Table.from_pydict(data)
|
|
300
|
-
pq.write_table(arrow_table, dst)
|
|
301
|
-
|
|
302
|
-
engine.dispose()
|
|
303
|
-
|
|
304
|
-
return dst
|
|
305
|
-
|
|
306
373
|
@staticmethod
|
|
307
374
|
def _parse_result(result: list[LegacyRow]) -> dict:
|
|
308
375
|
"""
|
digitalhub/utils/exceptions.py
CHANGED
digitalhub/utils/file_utils.py
CHANGED
|
@@ -20,6 +20,9 @@ class FileInfo(BaseModel):
|
|
|
20
20
|
hash: str = None
|
|
21
21
|
last_modified: str = None
|
|
22
22
|
|
|
23
|
+
def to_dict(self):
|
|
24
|
+
return self.model_dump()
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
def calculate_blob_hash(data_path: str) -> str:
|
|
25
28
|
"""
|
|
@@ -157,7 +160,7 @@ def get_file_info_from_local(path: str, src_path: str) -> None | dict:
|
|
|
157
160
|
size=size,
|
|
158
161
|
hash=hash,
|
|
159
162
|
last_modified=last_modified,
|
|
160
|
-
).
|
|
163
|
+
).to_dict()
|
|
161
164
|
except Exception:
|
|
162
165
|
return None
|
|
163
166
|
|
|
@@ -199,6 +202,61 @@ def get_file_info_from_s3(path: str, metadata: dict) -> None | dict:
|
|
|
199
202
|
size=size,
|
|
200
203
|
hash=file_hash,
|
|
201
204
|
last_modified=last_modified,
|
|
202
|
-
).
|
|
205
|
+
).to_dict()
|
|
203
206
|
except Exception:
|
|
204
207
|
return None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def eval_zip_type(source: str) -> bool:
|
|
211
|
+
"""
|
|
212
|
+
Evaluate zip type.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
source : str
|
|
217
|
+
Source.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
bool
|
|
222
|
+
True if path is zip.
|
|
223
|
+
"""
|
|
224
|
+
extension = source.endswith(".zip")
|
|
225
|
+
mime_zip = get_file_mime_type(source) == "application/zip"
|
|
226
|
+
return extension or mime_zip
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def eval_text_type(source: str) -> bool:
|
|
230
|
+
"""
|
|
231
|
+
Evaluate text type.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
source : str
|
|
236
|
+
Source.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
bool
|
|
241
|
+
True if path is text.
|
|
242
|
+
"""
|
|
243
|
+
return get_file_mime_type(source) == "text/plain"
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def eval_py_type(source: str) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Evaluate python type.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
source : str
|
|
253
|
+
Source.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
bool
|
|
258
|
+
True if path is python.
|
|
259
|
+
"""
|
|
260
|
+
extension = source.endswith(".py")
|
|
261
|
+
mime_py = get_file_mime_type(source) == "text/x-python"
|
|
262
|
+
return extension or mime_py
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
+
import importlib.util as imputil
|
|
4
5
|
import json
|
|
5
6
|
from datetime import datetime
|
|
7
|
+
from enum import Enum
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
9
|
+
from typing import Any, Callable
|
|
8
10
|
from zipfile import ZipFile
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
|
-
|
|
13
|
+
import requests
|
|
12
14
|
from slugify import slugify
|
|
13
15
|
|
|
14
16
|
from digitalhub.utils.io_utils import read_text
|
|
@@ -26,7 +28,7 @@ def get_timestamp() -> str:
|
|
|
26
28
|
return datetime.now().astimezone().isoformat()
|
|
27
29
|
|
|
28
30
|
|
|
29
|
-
def
|
|
31
|
+
def decode_base64_string(string: str) -> str:
|
|
30
32
|
"""
|
|
31
33
|
Decode a string from base64.
|
|
32
34
|
|
|
@@ -92,7 +94,7 @@ def requests_chunk_download(source: str, filename: Path) -> None:
|
|
|
92
94
|
-------
|
|
93
95
|
None
|
|
94
96
|
"""
|
|
95
|
-
with
|
|
97
|
+
with requests.get(source, stream=True) as r:
|
|
96
98
|
r.raise_for_status()
|
|
97
99
|
with filename.open("wb") as f:
|
|
98
100
|
for chunk in r.iter_content(chunk_size=8192):
|
|
@@ -181,3 +183,42 @@ def slugify_string(filename: str) -> str:
|
|
|
181
183
|
The sanitized filename.
|
|
182
184
|
"""
|
|
183
185
|
return slugify(filename, max_length=255)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def import_function(path: Path, handler: str) -> Callable:
|
|
189
|
+
"""
|
|
190
|
+
Import a function from a module.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
path : Path
|
|
195
|
+
Path where the function source is located.
|
|
196
|
+
handler : str
|
|
197
|
+
Function name.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
Callable
|
|
202
|
+
Function.
|
|
203
|
+
"""
|
|
204
|
+
spec = imputil.spec_from_file_location(path.stem, path)
|
|
205
|
+
mod = imputil.module_from_spec(spec)
|
|
206
|
+
spec.loader.exec_module(mod)
|
|
207
|
+
return getattr(mod, handler)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def list_enum(enum: Enum) -> list:
|
|
211
|
+
"""
|
|
212
|
+
Get all values of an enum.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
enum : Enum
|
|
217
|
+
Enum to get values from.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
list
|
|
222
|
+
List of enum values.
|
|
223
|
+
"""
|
|
224
|
+
return [e.value for e in enum]
|
digitalhub/utils/io_utils.py
CHANGED
|
@@ -32,6 +32,24 @@ def write_yaml(filepath: str | Path, obj: dict | list[dict]) -> None:
|
|
|
32
32
|
yaml.dump(obj, out_file, sort_keys=False)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def write_text(filepath: Path, text: str) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Write text to a file.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
filepath : Path
|
|
42
|
+
The file path to write.
|
|
43
|
+
text : str
|
|
44
|
+
The text to write.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
None
|
|
49
|
+
"""
|
|
50
|
+
filepath.write_text(text, encoding="utf-8")
|
|
51
|
+
|
|
52
|
+
|
|
35
53
|
##############################
|
|
36
54
|
# Readers
|
|
37
55
|
##############################
|
digitalhub/utils/uri_utils.py
CHANGED
|
@@ -1,7 +1,78 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from urllib.parse import urlparse
|
|
4
5
|
|
|
6
|
+
from digitalhub.utils.generic_utils import list_enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class S3Schemes(Enum):
|
|
10
|
+
"""
|
|
11
|
+
S3 schemes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
S3 = "s3"
|
|
15
|
+
S3A = "s3a"
|
|
16
|
+
S3N = "s3n"
|
|
17
|
+
ZIP_S3 = "zip+s3"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LocalSchemes(Enum):
|
|
21
|
+
"""
|
|
22
|
+
Local schemes.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
LOCAL = ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InvalidLocalSchemes(Enum):
|
|
29
|
+
"""
|
|
30
|
+
Local schemes.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
FILE = "file"
|
|
34
|
+
LOCAL = "local"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RemoteSchemes(Enum):
|
|
38
|
+
"""
|
|
39
|
+
Remote schemes.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
HTTP = "http"
|
|
43
|
+
HTTPS = "https"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SqlSchemes(Enum):
|
|
47
|
+
"""
|
|
48
|
+
Sql schemes.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
SQL = "sql"
|
|
52
|
+
POSTGRESQL = "postgresql"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GitSchemes(Enum):
|
|
56
|
+
"""
|
|
57
|
+
Git schemes.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
GIT = "git"
|
|
61
|
+
GIT_HTTP = "git+http"
|
|
62
|
+
GIT_HTTPS = "git+https"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SchemeCategory(Enum):
|
|
66
|
+
"""
|
|
67
|
+
Scheme types.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
S3 = "s3"
|
|
71
|
+
LOCAL = "local"
|
|
72
|
+
REMOTE = "remote"
|
|
73
|
+
SQL = "sql"
|
|
74
|
+
GIT = "git"
|
|
75
|
+
|
|
5
76
|
|
|
6
77
|
def map_uri_scheme(uri: str) -> str:
|
|
7
78
|
"""
|
|
@@ -23,22 +94,22 @@ def map_uri_scheme(uri: str) -> str:
|
|
|
23
94
|
If the scheme is unknown.
|
|
24
95
|
"""
|
|
25
96
|
scheme = urlparse(uri).scheme
|
|
26
|
-
if scheme in
|
|
27
|
-
return
|
|
28
|
-
if scheme in
|
|
29
|
-
raise ValueError("For local path, do not use any scheme")
|
|
30
|
-
if scheme in
|
|
31
|
-
return
|
|
32
|
-
if scheme in
|
|
33
|
-
return
|
|
34
|
-
if scheme in
|
|
35
|
-
return
|
|
36
|
-
if scheme in
|
|
37
|
-
return
|
|
97
|
+
if scheme in list_enum(LocalSchemes):
|
|
98
|
+
return SchemeCategory.LOCAL.value
|
|
99
|
+
if scheme in list_enum(InvalidLocalSchemes):
|
|
100
|
+
raise ValueError("For local path, do not use any scheme.")
|
|
101
|
+
if scheme in list_enum(RemoteSchemes):
|
|
102
|
+
return SchemeCategory.REMOTE.value
|
|
103
|
+
if scheme in list_enum(S3Schemes):
|
|
104
|
+
return SchemeCategory.S3.value
|
|
105
|
+
if scheme in list_enum(SqlSchemes):
|
|
106
|
+
return SchemeCategory.SQL.value
|
|
107
|
+
if scheme in list_enum(GitSchemes):
|
|
108
|
+
return SchemeCategory.GIT.value
|
|
38
109
|
raise ValueError(f"Unknown scheme '{scheme}'!")
|
|
39
110
|
|
|
40
111
|
|
|
41
|
-
def
|
|
112
|
+
def has_local_scheme(path: str) -> bool:
|
|
42
113
|
"""
|
|
43
114
|
Check if path is local.
|
|
44
115
|
|
|
@@ -52,5 +123,72 @@ def check_local_path(path: str) -> bool:
|
|
|
52
123
|
bool
|
|
53
124
|
True if path is local.
|
|
54
125
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
126
|
+
return map_uri_scheme(path) == SchemeCategory.LOCAL.value
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def has_remote_scheme(path: str) -> bool:
|
|
130
|
+
"""
|
|
131
|
+
Check if path is remote.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
path : str
|
|
136
|
+
Path of some source.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
bool
|
|
141
|
+
True if path is remote.
|
|
142
|
+
"""
|
|
143
|
+
return map_uri_scheme(path) == SchemeCategory.REMOTE.value
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def has_s3_scheme(path: str) -> bool:
|
|
147
|
+
"""
|
|
148
|
+
Check if path is s3.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
path : str
|
|
153
|
+
Path of some source.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
bool
|
|
158
|
+
True if path is s3.
|
|
159
|
+
"""
|
|
160
|
+
return map_uri_scheme(path) == SchemeCategory.S3.value
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def has_sql_scheme(path: str) -> bool:
|
|
164
|
+
"""
|
|
165
|
+
Check if path is sql.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
path : str
|
|
170
|
+
Path of some source.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
bool
|
|
175
|
+
True if path is sql.
|
|
176
|
+
"""
|
|
177
|
+
return map_uri_scheme(path) == SchemeCategory.SQL.value
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def has_git_scheme(path: str) -> bool:
|
|
181
|
+
"""
|
|
182
|
+
Check if path is git.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
path : str
|
|
187
|
+
Path of some source.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
bool
|
|
192
|
+
True if path is git.
|
|
193
|
+
"""
|
|
194
|
+
return map_uri_scheme(path) == SchemeCategory.GIT.value
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: digitalhub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0b0
|
|
4
4
|
Summary: Python SDK for Digitalhub
|
|
5
5
|
Author-email: Fondazione Bruno Kessler <dslab@fbk.eu>, Matteo Martini <mmartini@fbk.eu>
|
|
6
6
|
License: Apache License
|
|
@@ -230,7 +230,7 @@ Requires-Python: >=3.9
|
|
|
230
230
|
Description-Content-Type: text/markdown
|
|
231
231
|
License-File: LICENSE.txt
|
|
232
232
|
Requires-Dist: boto3
|
|
233
|
-
Requires-Dist: pydantic
|
|
233
|
+
Requires-Dist: pydantic
|
|
234
234
|
Requires-Dist: sqlalchemy<2
|
|
235
235
|
Requires-Dist: pyarrow
|
|
236
236
|
Requires-Dist: numpy<2
|