digitalhub 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +67 -0
- digitalhub/client/_base/key_builder.py +52 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +81 -25
- digitalhub/client/dhcore/enums.py +27 -0
- digitalhub/client/dhcore/env.py +2 -2
- digitalhub/client/dhcore/key_builder.py +58 -0
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +22 -0
- digitalhub/client/local/key_builder.py +58 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +32 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +49 -17
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +3 -24
- digitalhub/entities/_base/versioned/entity.py +2 -26
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1873 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +6 -2
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -91
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +15 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +21 -14
- digitalhub/entities/task/_base/models.py +35 -6
- digitalhub/entities/task/_base/spec.py +50 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +22 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/runtimes/enums.py +12 -0
- digitalhub/stores/_base/store.py +59 -11
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +43 -4
- digitalhub/stores/remote/store.py +31 -5
- digitalhub/stores/s3/store.py +129 -48
- digitalhub/stores/sql/store.py +122 -47
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/s3_utils.py +17 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/LICENSE.txt +1 -1
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/METADATA +3 -3
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/RECORD +116 -114
- test/local/instances/test_validate.py +55 -0
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/top_level.txt +0 -0
digitalhub/stores/sql/store.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import pyarrow as pa
|
|
6
7
|
import pyarrow.parquet as pq
|
|
@@ -9,6 +10,7 @@ from sqlalchemy.engine import Engine
|
|
|
9
10
|
from sqlalchemy.engine.row import LegacyRow
|
|
10
11
|
from sqlalchemy.exc import SQLAlchemyError
|
|
11
12
|
|
|
13
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
12
14
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
13
15
|
from digitalhub.utils.exceptions import StoreError
|
|
14
16
|
|
|
@@ -45,7 +47,7 @@ class SqlStore(Store):
|
|
|
45
47
|
self.config = config
|
|
46
48
|
|
|
47
49
|
##############################
|
|
48
|
-
#
|
|
50
|
+
# I/O methods
|
|
49
51
|
##############################
|
|
50
52
|
|
|
51
53
|
def download(
|
|
@@ -97,7 +99,11 @@ class SqlStore(Store):
|
|
|
97
99
|
table = self._get_table_name(root)
|
|
98
100
|
return self._download_table(schema, table, str(dst))
|
|
99
101
|
|
|
100
|
-
def upload(
|
|
102
|
+
def upload(
|
|
103
|
+
self,
|
|
104
|
+
src: str | list[str],
|
|
105
|
+
dst: str,
|
|
106
|
+
) -> list[tuple[str, str]]:
|
|
101
107
|
"""
|
|
102
108
|
Upload an artifact to storage.
|
|
103
109
|
|
|
@@ -108,19 +114,126 @@ class SqlStore(Store):
|
|
|
108
114
|
"""
|
|
109
115
|
raise StoreError("SQL store does not support upload.")
|
|
110
116
|
|
|
111
|
-
def get_file_info(
|
|
117
|
+
def get_file_info(
|
|
118
|
+
self,
|
|
119
|
+
root: str,
|
|
120
|
+
paths: list[tuple[str, str]],
|
|
121
|
+
) -> list[dict]:
|
|
112
122
|
"""
|
|
113
123
|
Get file information from SQL based storage.
|
|
114
124
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
paths : list[str]
|
|
128
|
+
List of source paths.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
list[dict]
|
|
133
|
+
Returns files metadata.
|
|
134
|
+
"""
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
##############################
|
|
138
|
+
# Private I/O methods
|
|
139
|
+
##############################
|
|
140
|
+
|
|
141
|
+
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
142
|
+
"""
|
|
143
|
+
Download a table from SQL based storage.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
schema : str
|
|
148
|
+
The origin schema.
|
|
149
|
+
table : str
|
|
150
|
+
The origin table.
|
|
151
|
+
dst : str
|
|
152
|
+
The destination path.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
str
|
|
157
|
+
The destination path.
|
|
158
|
+
"""
|
|
159
|
+
engine = self._check_factory(schema=schema)
|
|
160
|
+
|
|
161
|
+
# Read the table from the database
|
|
162
|
+
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
163
|
+
query = sa_table.select()
|
|
164
|
+
with engine.begin() as conn:
|
|
165
|
+
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
166
|
+
|
|
167
|
+
# Parse the result
|
|
168
|
+
data = self._parse_result(result)
|
|
169
|
+
|
|
170
|
+
# Convert the result to a pyarrow table and
|
|
171
|
+
# write the pyarrow table to a Parquet file
|
|
172
|
+
arrow_table = pa.Table.from_pydict(data)
|
|
173
|
+
pq.write_table(arrow_table, dst)
|
|
174
|
+
|
|
175
|
+
engine.dispose()
|
|
176
|
+
|
|
177
|
+
return dst
|
|
178
|
+
|
|
179
|
+
##############################
|
|
180
|
+
# Datastore methods
|
|
181
|
+
##############################
|
|
182
|
+
|
|
183
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Write a dataframe to a database. Kwargs are passed to df.to_sql().
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
df : Any
|
|
190
|
+
The dataframe to write.
|
|
191
|
+
dst : str
|
|
192
|
+
The destination of the dataframe.
|
|
193
|
+
**kwargs : dict
|
|
194
|
+
Keyword arguments.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
str
|
|
199
|
+
Path of written dataframe.
|
|
200
|
+
"""
|
|
201
|
+
schema = self._get_schema(dst)
|
|
202
|
+
table = self._get_table_name(dst)
|
|
203
|
+
return self._upload_table(df, schema, table, **kwargs)
|
|
204
|
+
|
|
205
|
+
##############################
|
|
206
|
+
# Private Datastore methods
|
|
207
|
+
##############################
|
|
208
|
+
|
|
209
|
+
def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Upload a table to SQL based storage.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
df : DataFrame
|
|
216
|
+
The dataframe.
|
|
217
|
+
schema : str
|
|
218
|
+
Destination schema.
|
|
219
|
+
table : str
|
|
220
|
+
Destination table.
|
|
221
|
+
**kwargs : dict
|
|
222
|
+
Keyword arguments.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
str
|
|
227
|
+
The SQL URI where the dataframe was saved.
|
|
119
228
|
"""
|
|
120
|
-
|
|
229
|
+
reader = get_reader_by_object(df)
|
|
230
|
+
engine = self._check_factory()
|
|
231
|
+
reader.write_table(df, table, engine, schema, **kwargs)
|
|
232
|
+
engine.dispose()
|
|
233
|
+
return f"sql://{engine.url.database}/{schema}/{table}"
|
|
121
234
|
|
|
122
235
|
##############################
|
|
123
|
-
#
|
|
236
|
+
# Helper methods
|
|
124
237
|
##############################
|
|
125
238
|
|
|
126
239
|
def _get_connection_string(self) -> str:
|
|
@@ -265,44 +378,6 @@ class SqlStore(Store):
|
|
|
265
378
|
engine.dispose()
|
|
266
379
|
raise StoreError("No access to db!")
|
|
267
380
|
|
|
268
|
-
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
269
|
-
"""
|
|
270
|
-
Download a table from SQL based storage.
|
|
271
|
-
|
|
272
|
-
Parameters
|
|
273
|
-
----------
|
|
274
|
-
schema : str
|
|
275
|
-
The origin schema.
|
|
276
|
-
table : str
|
|
277
|
-
The origin table.
|
|
278
|
-
dst : str
|
|
279
|
-
The destination path.
|
|
280
|
-
|
|
281
|
-
Returns
|
|
282
|
-
-------
|
|
283
|
-
str
|
|
284
|
-
The destination path.
|
|
285
|
-
"""
|
|
286
|
-
engine = self._check_factory(schema=schema)
|
|
287
|
-
|
|
288
|
-
# Read the table from the database
|
|
289
|
-
sa_table = Table(table, MetaData(), autoload_with=engine)
|
|
290
|
-
query = sa_table.select()
|
|
291
|
-
with engine.begin() as conn:
|
|
292
|
-
result: list[LegacyRow] = conn.execute(query).fetchall()
|
|
293
|
-
|
|
294
|
-
# Parse the result
|
|
295
|
-
data = self._parse_result(result)
|
|
296
|
-
|
|
297
|
-
# Convert the result to a pyarrow table and
|
|
298
|
-
# write the pyarrow table to a Parquet file
|
|
299
|
-
arrow_table = pa.Table.from_pydict(data)
|
|
300
|
-
pq.write_table(arrow_table, dst)
|
|
301
|
-
|
|
302
|
-
engine.dispose()
|
|
303
|
-
|
|
304
|
-
return dst
|
|
305
|
-
|
|
306
381
|
@staticmethod
|
|
307
382
|
def _parse_result(result: list[LegacyRow]) -> dict:
|
|
308
383
|
"""
|
digitalhub/utils/exceptions.py
CHANGED
digitalhub/utils/file_utils.py
CHANGED
|
@@ -20,6 +20,9 @@ class FileInfo(BaseModel):
|
|
|
20
20
|
hash: str = None
|
|
21
21
|
last_modified: str = None
|
|
22
22
|
|
|
23
|
+
def to_dict(self):
|
|
24
|
+
return self.model_dump()
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
def calculate_blob_hash(data_path: str) -> str:
|
|
25
28
|
"""
|
|
@@ -157,7 +160,7 @@ def get_file_info_from_local(path: str, src_path: str) -> None | dict:
|
|
|
157
160
|
size=size,
|
|
158
161
|
hash=hash,
|
|
159
162
|
last_modified=last_modified,
|
|
160
|
-
).
|
|
163
|
+
).to_dict()
|
|
161
164
|
except Exception:
|
|
162
165
|
return None
|
|
163
166
|
|
|
@@ -199,6 +202,61 @@ def get_file_info_from_s3(path: str, metadata: dict) -> None | dict:
|
|
|
199
202
|
size=size,
|
|
200
203
|
hash=file_hash,
|
|
201
204
|
last_modified=last_modified,
|
|
202
|
-
).
|
|
205
|
+
).to_dict()
|
|
203
206
|
except Exception:
|
|
204
207
|
return None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def eval_zip_type(source: str) -> bool:
|
|
211
|
+
"""
|
|
212
|
+
Evaluate zip type.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
source : str
|
|
217
|
+
Source.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
bool
|
|
222
|
+
True if path is zip.
|
|
223
|
+
"""
|
|
224
|
+
extension = source.endswith(".zip")
|
|
225
|
+
mime_zip = get_file_mime_type(source) == "application/zip"
|
|
226
|
+
return extension or mime_zip
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def eval_text_type(source: str) -> bool:
|
|
230
|
+
"""
|
|
231
|
+
Evaluate text type.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
source : str
|
|
236
|
+
Source.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
bool
|
|
241
|
+
True if path is text.
|
|
242
|
+
"""
|
|
243
|
+
return get_file_mime_type(source) == "text/plain"
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def eval_py_type(source: str) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Evaluate python type.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
source : str
|
|
253
|
+
Source.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
bool
|
|
258
|
+
True if path is python.
|
|
259
|
+
"""
|
|
260
|
+
extension = source.endswith(".py")
|
|
261
|
+
mime_py = get_file_mime_type(source) == "text/x-python"
|
|
262
|
+
return extension or mime_py
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
+
import importlib.util as imputil
|
|
4
5
|
import json
|
|
5
6
|
from datetime import datetime
|
|
7
|
+
from enum import Enum
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
9
|
+
from typing import Any, Callable
|
|
8
10
|
from zipfile import ZipFile
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
|
-
|
|
13
|
+
import requests
|
|
12
14
|
from slugify import slugify
|
|
13
15
|
|
|
14
16
|
from digitalhub.utils.io_utils import read_text
|
|
@@ -26,7 +28,7 @@ def get_timestamp() -> str:
|
|
|
26
28
|
return datetime.now().astimezone().isoformat()
|
|
27
29
|
|
|
28
30
|
|
|
29
|
-
def
|
|
31
|
+
def decode_base64_string(string: str) -> str:
|
|
30
32
|
"""
|
|
31
33
|
Decode a string from base64.
|
|
32
34
|
|
|
@@ -92,7 +94,7 @@ def requests_chunk_download(source: str, filename: Path) -> None:
|
|
|
92
94
|
-------
|
|
93
95
|
None
|
|
94
96
|
"""
|
|
95
|
-
with
|
|
97
|
+
with requests.get(source, stream=True) as r:
|
|
96
98
|
r.raise_for_status()
|
|
97
99
|
with filename.open("wb") as f:
|
|
98
100
|
for chunk in r.iter_content(chunk_size=8192):
|
|
@@ -181,3 +183,42 @@ def slugify_string(filename: str) -> str:
|
|
|
181
183
|
The sanitized filename.
|
|
182
184
|
"""
|
|
183
185
|
return slugify(filename, max_length=255)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def import_function(path: Path, handler: str) -> Callable:
|
|
189
|
+
"""
|
|
190
|
+
Import a function from a module.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
path : Path
|
|
195
|
+
Path where the function source is located.
|
|
196
|
+
handler : str
|
|
197
|
+
Function name.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
Callable
|
|
202
|
+
Function.
|
|
203
|
+
"""
|
|
204
|
+
spec = imputil.spec_from_file_location(path.stem, path)
|
|
205
|
+
mod = imputil.module_from_spec(spec)
|
|
206
|
+
spec.loader.exec_module(mod)
|
|
207
|
+
return getattr(mod, handler)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def list_enum(enum: Enum) -> list:
|
|
211
|
+
"""
|
|
212
|
+
Get all values of an enum.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
enum : Enum
|
|
217
|
+
Enum to get values from.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
list
|
|
222
|
+
List of enum values.
|
|
223
|
+
"""
|
|
224
|
+
return [e.value for e in enum]
|
digitalhub/utils/io_utils.py
CHANGED
|
@@ -32,6 +32,24 @@ def write_yaml(filepath: str | Path, obj: dict | list[dict]) -> None:
|
|
|
32
32
|
yaml.dump(obj, out_file, sort_keys=False)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def write_text(filepath: Path, text: str) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Write text to a file.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
filepath : Path
|
|
42
|
+
The file path to write.
|
|
43
|
+
text : str
|
|
44
|
+
The text to write.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
None
|
|
49
|
+
"""
|
|
50
|
+
filepath.write_text(text, encoding="utf-8")
|
|
51
|
+
|
|
52
|
+
|
|
35
53
|
##############################
|
|
36
54
|
# Readers
|
|
37
55
|
##############################
|
digitalhub/utils/s3_utils.py
CHANGED
|
@@ -7,6 +7,23 @@ from urllib.parse import urlparse
|
|
|
7
7
|
from boto3 import client as boto3_client
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
def get_bucket_name(path: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Get bucket name from path.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
path : str
|
|
17
|
+
The source path to get the key from.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
str
|
|
22
|
+
The bucket name.
|
|
23
|
+
"""
|
|
24
|
+
return urlparse(path).netloc
|
|
25
|
+
|
|
26
|
+
|
|
10
27
|
def get_bucket_and_key(path: str) -> tuple[str, str]:
|
|
11
28
|
"""
|
|
12
29
|
Get bucket and key from path.
|
digitalhub/utils/uri_utils.py
CHANGED
|
@@ -1,7 +1,78 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from urllib.parse import urlparse
|
|
4
5
|
|
|
6
|
+
from digitalhub.utils.generic_utils import list_enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class S3Schemes(Enum):
|
|
10
|
+
"""
|
|
11
|
+
S3 schemes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
S3 = "s3"
|
|
15
|
+
S3A = "s3a"
|
|
16
|
+
S3N = "s3n"
|
|
17
|
+
ZIP_S3 = "zip+s3"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LocalSchemes(Enum):
|
|
21
|
+
"""
|
|
22
|
+
Local schemes.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
LOCAL = ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InvalidLocalSchemes(Enum):
|
|
29
|
+
"""
|
|
30
|
+
Local schemes.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
FILE = "file"
|
|
34
|
+
LOCAL = "local"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RemoteSchemes(Enum):
|
|
38
|
+
"""
|
|
39
|
+
Remote schemes.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
HTTP = "http"
|
|
43
|
+
HTTPS = "https"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SqlSchemes(Enum):
|
|
47
|
+
"""
|
|
48
|
+
Sql schemes.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
SQL = "sql"
|
|
52
|
+
POSTGRESQL = "postgresql"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GitSchemes(Enum):
|
|
56
|
+
"""
|
|
57
|
+
Git schemes.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
GIT = "git"
|
|
61
|
+
GIT_HTTP = "git+http"
|
|
62
|
+
GIT_HTTPS = "git+https"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SchemeCategory(Enum):
|
|
66
|
+
"""
|
|
67
|
+
Scheme types.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
S3 = "s3"
|
|
71
|
+
LOCAL = "local"
|
|
72
|
+
REMOTE = "remote"
|
|
73
|
+
SQL = "sql"
|
|
74
|
+
GIT = "git"
|
|
75
|
+
|
|
5
76
|
|
|
6
77
|
def map_uri_scheme(uri: str) -> str:
|
|
7
78
|
"""
|
|
@@ -23,22 +94,22 @@ def map_uri_scheme(uri: str) -> str:
|
|
|
23
94
|
If the scheme is unknown.
|
|
24
95
|
"""
|
|
25
96
|
scheme = urlparse(uri).scheme
|
|
26
|
-
if scheme in
|
|
27
|
-
return
|
|
28
|
-
if scheme in
|
|
29
|
-
raise ValueError("For local path, do not use any scheme")
|
|
30
|
-
if scheme in
|
|
31
|
-
return
|
|
32
|
-
if scheme in
|
|
33
|
-
return
|
|
34
|
-
if scheme in
|
|
35
|
-
return
|
|
36
|
-
if scheme in
|
|
37
|
-
return
|
|
97
|
+
if scheme in list_enum(LocalSchemes):
|
|
98
|
+
return SchemeCategory.LOCAL.value
|
|
99
|
+
if scheme in list_enum(InvalidLocalSchemes):
|
|
100
|
+
raise ValueError("For local path, do not use any scheme.")
|
|
101
|
+
if scheme in list_enum(RemoteSchemes):
|
|
102
|
+
return SchemeCategory.REMOTE.value
|
|
103
|
+
if scheme in list_enum(S3Schemes):
|
|
104
|
+
return SchemeCategory.S3.value
|
|
105
|
+
if scheme in list_enum(SqlSchemes):
|
|
106
|
+
return SchemeCategory.SQL.value
|
|
107
|
+
if scheme in list_enum(GitSchemes):
|
|
108
|
+
return SchemeCategory.GIT.value
|
|
38
109
|
raise ValueError(f"Unknown scheme '{scheme}'!")
|
|
39
110
|
|
|
40
111
|
|
|
41
|
-
def
|
|
112
|
+
def has_local_scheme(path: str) -> bool:
|
|
42
113
|
"""
|
|
43
114
|
Check if path is local.
|
|
44
115
|
|
|
@@ -52,5 +123,72 @@ def check_local_path(path: str) -> bool:
|
|
|
52
123
|
bool
|
|
53
124
|
True if path is local.
|
|
54
125
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
126
|
+
return map_uri_scheme(path) == SchemeCategory.LOCAL.value
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def has_remote_scheme(path: str) -> bool:
|
|
130
|
+
"""
|
|
131
|
+
Check if path is remote.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
path : str
|
|
136
|
+
Path of some source.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
bool
|
|
141
|
+
True if path is remote.
|
|
142
|
+
"""
|
|
143
|
+
return map_uri_scheme(path) == SchemeCategory.REMOTE.value
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def has_s3_scheme(path: str) -> bool:
|
|
147
|
+
"""
|
|
148
|
+
Check if path is s3.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
path : str
|
|
153
|
+
Path of some source.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
bool
|
|
158
|
+
True if path is s3.
|
|
159
|
+
"""
|
|
160
|
+
return map_uri_scheme(path) == SchemeCategory.S3.value
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def has_sql_scheme(path: str) -> bool:
|
|
164
|
+
"""
|
|
165
|
+
Check if path is sql.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
path : str
|
|
170
|
+
Path of some source.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
bool
|
|
175
|
+
True if path is sql.
|
|
176
|
+
"""
|
|
177
|
+
return map_uri_scheme(path) == SchemeCategory.SQL.value
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def has_git_scheme(path: str) -> bool:
|
|
181
|
+
"""
|
|
182
|
+
Check if path is git.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
path : str
|
|
187
|
+
Path of some source.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
bool
|
|
192
|
+
True if path is git.
|
|
193
|
+
"""
|
|
194
|
+
return map_uri_scheme(path) == SchemeCategory.GIT.value
|
|
@@ -186,7 +186,7 @@ APPENDIX: How to apply the Apache License to your work.
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2024 DSLab, Fondazione Bruno Kessler
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: digitalhub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Python SDK for Digitalhub
|
|
5
5
|
Author-email: Fondazione Bruno Kessler <dslab@fbk.eu>, Matteo Martini <mmartini@fbk.eu>
|
|
6
6
|
License: Apache License
|
|
@@ -191,7 +191,7 @@ License: Apache License
|
|
|
191
191
|
same "printed page" as the copyright notice for easier
|
|
192
192
|
identification within third-party archives.
|
|
193
193
|
|
|
194
|
-
Copyright
|
|
194
|
+
Copyright 2024 DSLab, Fondazione Bruno Kessler
|
|
195
195
|
|
|
196
196
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
197
197
|
you may not use this file except in compliance with the License.
|
|
@@ -230,7 +230,7 @@ Requires-Python: >=3.9
|
|
|
230
230
|
Description-Content-Type: text/markdown
|
|
231
231
|
License-File: LICENSE.txt
|
|
232
232
|
Requires-Dist: boto3
|
|
233
|
-
Requires-Dist: pydantic
|
|
233
|
+
Requires-Dist: pydantic
|
|
234
234
|
Requires-Dist: sqlalchemy<2
|
|
235
235
|
Requires-Dist: pyarrow
|
|
236
236
|
Requires-Dist: numpy<2
|