digitalhub 0.8.1__py3-none-any.whl → 0.9.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +31 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +77 -24
- digitalhub/client/dhcore/enums.py +26 -0
- digitalhub/client/dhcore/env.py +2 -2
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +20 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +30 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +48 -16
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +1 -23
- digitalhub/entities/_base/versioned/entity.py +0 -25
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1747 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +5 -1
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -91
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +11 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +6 -6
- digitalhub/entities/task/_base/models.py +29 -6
- digitalhub/entities/task/_base/spec.py +44 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +14 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/stores/_base/store.py +53 -9
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +37 -2
- digitalhub/stores/remote/store.py +25 -3
- digitalhub/stores/s3/store.py +34 -7
- digitalhub/stores/sql/store.py +112 -45
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/METADATA +2 -2
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/RECORD +110 -113
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0b0.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._base.executable.entity import ExecutableEntity
|
|
6
|
-
from digitalhub.entities.
|
|
6
|
+
from digitalhub.entities._commons.enums import EntityTypes
|
|
7
7
|
from digitalhub.factory.api import get_run_kind, get_task_kind_from_action
|
|
8
8
|
from digitalhub.utils.exceptions import BackendError
|
|
9
9
|
|
|
@@ -41,7 +41,13 @@ class Workflow(ExecutableEntity):
|
|
|
41
41
|
# Workflow Methods
|
|
42
42
|
##############################
|
|
43
43
|
|
|
44
|
-
def run(
|
|
44
|
+
def run(
|
|
45
|
+
self,
|
|
46
|
+
action: str,
|
|
47
|
+
wait: bool = True,
|
|
48
|
+
log_info: bool = True,
|
|
49
|
+
**kwargs,
|
|
50
|
+
) -> Run:
|
|
45
51
|
"""
|
|
46
52
|
Run workflow.
|
|
47
53
|
|
|
@@ -49,6 +55,7 @@ class Workflow(ExecutableEntity):
|
|
|
49
55
|
----------
|
|
50
56
|
action : str
|
|
51
57
|
Action to execute.
|
|
58
|
+
|
|
52
59
|
**kwargs : dict
|
|
53
60
|
Keyword arguments passed to Run builder.
|
|
54
61
|
|
|
@@ -57,9 +64,6 @@ class Workflow(ExecutableEntity):
|
|
|
57
64
|
Run
|
|
58
65
|
Run instance.
|
|
59
66
|
"""
|
|
60
|
-
if action is None:
|
|
61
|
-
action = "pipeline"
|
|
62
|
-
|
|
63
67
|
# Get task and run kind
|
|
64
68
|
task_kind = get_task_kind_from_action(self.kind, action)
|
|
65
69
|
run_kind = get_run_kind(self.kind)
|
|
@@ -71,4 +75,8 @@ class Workflow(ExecutableEntity):
|
|
|
71
75
|
if self._context().local:
|
|
72
76
|
raise BackendError("Cannot run workflow with local backend.")
|
|
73
77
|
|
|
74
|
-
|
|
78
|
+
# Run task
|
|
79
|
+
run = task.run(run_kind, local_execution=False, **kwargs)
|
|
80
|
+
if wait:
|
|
81
|
+
return run.wait(log_info=log_info)
|
|
82
|
+
return run
|
|
@@ -2,15 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
-
from digitalhub.entities.
|
|
6
|
-
|
|
7
|
-
get_context_entity_versions,
|
|
8
|
-
get_versioned_entity,
|
|
9
|
-
import_executable_entity,
|
|
10
|
-
list_context_entities,
|
|
11
|
-
new_context_entity,
|
|
12
|
-
)
|
|
13
|
-
from digitalhub.entities.utils.entity_types import EntityTypes
|
|
5
|
+
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
+
from digitalhub.entities._operations.processor import processor
|
|
14
7
|
|
|
15
8
|
if typing.TYPE_CHECKING:
|
|
16
9
|
from digitalhub.entities.workflow._base.entity import Workflow
|
|
@@ -62,7 +55,7 @@ def new_workflow(
|
|
|
62
55
|
>>> code_src="pipeline.py",
|
|
63
56
|
>>> handler="pipeline-handler")
|
|
64
57
|
"""
|
|
65
|
-
return
|
|
58
|
+
return processor.create_context_entity(
|
|
66
59
|
project=project,
|
|
67
60
|
name=name,
|
|
68
61
|
kind=kind,
|
|
@@ -109,7 +102,7 @@ def get_workflow(
|
|
|
109
102
|
>>> project="my-project",
|
|
110
103
|
>>> entity_id="my-workflow-id")
|
|
111
104
|
"""
|
|
112
|
-
return
|
|
105
|
+
return processor.read_context_entity(
|
|
113
106
|
identifier,
|
|
114
107
|
entity_type=ENTITY_TYPE,
|
|
115
108
|
project=project,
|
|
@@ -149,7 +142,7 @@ def get_workflow_versions(
|
|
|
149
142
|
>>> obj = get_workflow_versions("my-workflow-name"
|
|
150
143
|
>>> project="my-project")
|
|
151
144
|
"""
|
|
152
|
-
return
|
|
145
|
+
return processor.read_context_entity_versions(
|
|
153
146
|
identifier,
|
|
154
147
|
entity_type=ENTITY_TYPE,
|
|
155
148
|
project=project,
|
|
@@ -177,7 +170,7 @@ def list_workflows(project: str, **kwargs) -> list[Workflow]:
|
|
|
177
170
|
--------
|
|
178
171
|
>>> objs = list_workflows(project="my-project")
|
|
179
172
|
"""
|
|
180
|
-
return list_context_entities(
|
|
173
|
+
return processor.list_context_entities(
|
|
181
174
|
project=project,
|
|
182
175
|
entity_type=ENTITY_TYPE,
|
|
183
176
|
**kwargs,
|
|
@@ -186,7 +179,7 @@ def list_workflows(project: str, **kwargs) -> list[Workflow]:
|
|
|
186
179
|
|
|
187
180
|
def import_workflow(file: str) -> Workflow:
|
|
188
181
|
"""
|
|
189
|
-
Import object from a YAML file.
|
|
182
|
+
Import object from a YAML file and create a new object into the backend.
|
|
190
183
|
|
|
191
184
|
Parameters
|
|
192
185
|
----------
|
|
@@ -202,7 +195,28 @@ def import_workflow(file: str) -> Workflow:
|
|
|
202
195
|
--------
|
|
203
196
|
>>> obj = import_workflow("my-workflow.yaml")
|
|
204
197
|
"""
|
|
205
|
-
return import_executable_entity(file)
|
|
198
|
+
return processor.import_executable_entity(file)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def load_workflow(file: str) -> Workflow:
|
|
202
|
+
"""
|
|
203
|
+
Load object from a YAML file and update an existing object into the backend.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
file : str
|
|
208
|
+
Path to YAML file.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
Workflow
|
|
213
|
+
Object instance.
|
|
214
|
+
|
|
215
|
+
Examples
|
|
216
|
+
--------
|
|
217
|
+
>>> obj = load_workflow("my-workflow.yaml")
|
|
218
|
+
"""
|
|
219
|
+
return processor.load_executable_entity(file)
|
|
206
220
|
|
|
207
221
|
|
|
208
222
|
def update_workflow(entity: Workflow) -> Workflow:
|
|
@@ -223,7 +237,12 @@ def update_workflow(entity: Workflow) -> Workflow:
|
|
|
223
237
|
--------
|
|
224
238
|
>>> obj = update_workflow(obj)
|
|
225
239
|
"""
|
|
226
|
-
return
|
|
240
|
+
return processor.update_context_entity(
|
|
241
|
+
project=entity.project,
|
|
242
|
+
entity_type=entity.ENTITY_TYPE,
|
|
243
|
+
entity_id=entity.id,
|
|
244
|
+
entity_dict=entity.to_dict(),
|
|
245
|
+
)
|
|
227
246
|
|
|
228
247
|
|
|
229
248
|
def delete_workflow(
|
|
@@ -267,7 +286,7 @@ def delete_workflow(
|
|
|
267
286
|
>>> project="my-project",
|
|
268
287
|
>>> delete_all_versions=True)
|
|
269
288
|
"""
|
|
270
|
-
return
|
|
289
|
+
return processor.delete_context_entity(
|
|
271
290
|
identifier=identifier,
|
|
272
291
|
entity_type=ENTITY_TYPE,
|
|
273
292
|
project=project,
|
digitalhub/factory/utils.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import
|
|
3
|
+
from abc import abstractmethod
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
class DataframeReader
|
|
7
|
+
class DataframeReader:
|
|
8
8
|
"""
|
|
9
9
|
Dataframe reader abstract class.
|
|
10
10
|
"""
|
digitalhub/readers/api.py
CHANGED
|
@@ -4,6 +4,7 @@ import typing
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from digitalhub.readers.factory import factory
|
|
7
|
+
from digitalhub.utils.exceptions import ReaderError
|
|
7
8
|
|
|
8
9
|
if typing.TYPE_CHECKING:
|
|
9
10
|
from digitalhub.readers._base.reader import DataframeReader
|
|
@@ -30,7 +31,7 @@ def get_reader_by_engine(engine: str | None = None) -> DataframeReader:
|
|
|
30
31
|
except KeyError:
|
|
31
32
|
engines = factory.list_supported_engines()
|
|
32
33
|
msg = f"Unsupported dataframe engine: '{engine}'. Supported engines: {engines}"
|
|
33
|
-
raise
|
|
34
|
+
raise ReaderError(msg)
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
def get_reader_by_object(obj: Any) -> DataframeReader:
|
|
@@ -53,7 +54,7 @@ def get_reader_by_object(obj: Any) -> DataframeReader:
|
|
|
53
54
|
except KeyError:
|
|
54
55
|
types = factory.list_supported_dataframes()
|
|
55
56
|
msg = f"Unsupported dataframe type: '{obj}'. Supported types: {types}"
|
|
56
|
-
raise
|
|
57
|
+
raise ReaderError(msg)
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
def get_supported_engines() -> list[str]:
|
digitalhub/readers/factory.py
CHANGED
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from digitalhub.utils.exceptions import BuilderError
|
|
6
|
+
|
|
5
7
|
if typing.TYPE_CHECKING:
|
|
6
8
|
from digitalhub.readers._base.builder import ReaderBuilder
|
|
7
9
|
from digitalhub.readers._base.reader import DataframeReader
|
|
@@ -35,13 +37,13 @@ class ReaderFactory:
|
|
|
35
37
|
if self._engine_builders is None:
|
|
36
38
|
self._engine_builders = {}
|
|
37
39
|
if engine in self._engine_builders:
|
|
38
|
-
raise
|
|
40
|
+
raise BuilderError(f"Builder for engine '{engine}' already exists.")
|
|
39
41
|
self._engine_builders[engine] = builder
|
|
40
42
|
|
|
41
43
|
if self._dataframe_builders is None:
|
|
42
44
|
self._dataframe_builders = {}
|
|
43
45
|
if dataframe in self._dataframe_builders:
|
|
44
|
-
raise
|
|
46
|
+
raise BuilderError(f"Builder for dataframe '{dataframe}' already exists.")
|
|
45
47
|
self._dataframe_builders[dataframe] = builder
|
|
46
48
|
|
|
47
49
|
def build(self, engine: str | None = None, dataframe: str | None = None, **kwargs) -> DataframeReader:
|
|
@@ -63,7 +65,7 @@ class ReaderFactory:
|
|
|
63
65
|
Reader object.
|
|
64
66
|
"""
|
|
65
67
|
if (engine is None) == (dataframe is None):
|
|
66
|
-
raise
|
|
68
|
+
raise BuilderError("Either engine or dataframe must be provided.")
|
|
67
69
|
if engine is not None:
|
|
68
70
|
return self._engine_builders[engine].build(**kwargs)
|
|
69
71
|
return self._dataframe_builders[dataframe].build(**kwargs)
|
|
@@ -104,7 +106,7 @@ class ReaderFactory:
|
|
|
104
106
|
None
|
|
105
107
|
"""
|
|
106
108
|
if engine not in self._engine_builders:
|
|
107
|
-
raise
|
|
109
|
+
raise BuilderError(f"Engine {engine} not found.")
|
|
108
110
|
self._default = engine
|
|
109
111
|
|
|
110
112
|
def get_default(self) -> str:
|
|
@@ -117,7 +119,7 @@ class ReaderFactory:
|
|
|
117
119
|
Default engine.
|
|
118
120
|
"""
|
|
119
121
|
if self._default is None:
|
|
120
|
-
raise
|
|
122
|
+
raise BuilderError("No default engine set.")
|
|
121
123
|
return self._default
|
|
122
124
|
|
|
123
125
|
|
|
@@ -126,7 +128,11 @@ factory = ReaderFactory()
|
|
|
126
128
|
try:
|
|
127
129
|
from digitalhub.readers.pandas.builder import ReaderBuilderPandas
|
|
128
130
|
|
|
129
|
-
factory.add_builder(
|
|
131
|
+
factory.add_builder(
|
|
132
|
+
ReaderBuilderPandas.ENGINE,
|
|
133
|
+
ReaderBuilderPandas.DATAFRAME_CLASS,
|
|
134
|
+
ReaderBuilderPandas(),
|
|
135
|
+
)
|
|
130
136
|
factory.set_default(ReaderBuilderPandas.ENGINE)
|
|
131
137
|
|
|
132
138
|
except ImportError:
|
|
@@ -8,7 +8,9 @@ import pandas as pd
|
|
|
8
8
|
from pandas.errors import ParserError
|
|
9
9
|
|
|
10
10
|
from digitalhub.readers._base.reader import DataframeReader
|
|
11
|
+
from digitalhub.readers._commons.enums import Extensions
|
|
11
12
|
from digitalhub.utils.data_utils import build_data_preview, get_data_preview
|
|
13
|
+
from digitalhub.utils.exceptions import ReaderError
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class DataframeReaderPandas(DataframeReader):
|
|
@@ -38,15 +40,17 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
38
40
|
pd.DataFrame
|
|
39
41
|
Pandas DataFrame.
|
|
40
42
|
"""
|
|
41
|
-
if extension ==
|
|
43
|
+
if extension == Extensions.CSV.value:
|
|
42
44
|
method = pd.read_csv
|
|
43
|
-
elif extension ==
|
|
45
|
+
elif extension == Extensions.PARQUET.value:
|
|
44
46
|
method = pd.read_parquet
|
|
45
|
-
elif extension ==
|
|
47
|
+
elif extension == Extensions.FILE.value:
|
|
46
48
|
try:
|
|
47
|
-
return self.read_df(path,
|
|
49
|
+
return self.read_df(path, Extensions.CSV.value, **kwargs)
|
|
48
50
|
except ParserError:
|
|
49
|
-
raise
|
|
51
|
+
raise ReaderError(f"Unable to read from {path}.")
|
|
52
|
+
else:
|
|
53
|
+
raise ReaderError(f"Unsupported extension '{extension}' for reading.")
|
|
50
54
|
|
|
51
55
|
if isinstance(path, list):
|
|
52
56
|
dfs = [method(p, **kwargs) for p in path]
|
|
@@ -57,7 +61,13 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
57
61
|
# Write methods
|
|
58
62
|
##############################
|
|
59
63
|
|
|
60
|
-
def write_df(
|
|
64
|
+
def write_df(
|
|
65
|
+
self,
|
|
66
|
+
df: pd.DataFrame,
|
|
67
|
+
dst: str | BytesIO,
|
|
68
|
+
extension: str | None = None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> None:
|
|
61
71
|
"""
|
|
62
72
|
Write DataFrame as parquet.
|
|
63
73
|
|
|
@@ -74,9 +84,11 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
74
84
|
-------
|
|
75
85
|
None
|
|
76
86
|
"""
|
|
77
|
-
if extension ==
|
|
87
|
+
if extension == Extensions.CSV.value:
|
|
78
88
|
return self.write_csv(df, dst, **kwargs)
|
|
79
|
-
|
|
89
|
+
elif extension == Extensions.PARQUET.value:
|
|
90
|
+
return self.write_parquet(df, dst, **kwargs)
|
|
91
|
+
raise ReaderError(f"Unsupported extension '{extension}' for writing.")
|
|
80
92
|
|
|
81
93
|
@staticmethod
|
|
82
94
|
def write_csv(df: pd.DataFrame, dst: str | BytesIO, **kwargs) -> None:
|
digitalhub/runtimes/_base.py
CHANGED
|
@@ -34,13 +34,6 @@ class Runtime:
|
|
|
34
34
|
Execute run task.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
@staticmethod
|
|
38
|
-
@abstractmethod
|
|
39
|
-
def _get_executable(action: str) -> Callable:
|
|
40
|
-
"""
|
|
41
|
-
Get executable from action.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
37
|
##############################
|
|
45
38
|
# Private methods
|
|
46
39
|
##############################
|
digitalhub/stores/_base/store.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import
|
|
3
|
+
from abc import abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from tempfile import mkdtemp
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
|
-
from pydantic import BaseModel
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
9
|
|
|
10
|
+
from digitalhub.readers.api import get_reader_by_engine
|
|
10
11
|
from digitalhub.utils.exceptions import StoreError
|
|
11
|
-
from digitalhub.utils.uri_utils import
|
|
12
|
+
from digitalhub.utils.uri_utils import SchemeCategory, has_local_scheme
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
class Store
|
|
15
|
+
class Store:
|
|
15
16
|
"""
|
|
16
17
|
Store abstract class.
|
|
17
18
|
"""
|
|
@@ -35,7 +36,7 @@ class Store(metaclass=ABCMeta):
|
|
|
35
36
|
self.type = store_type
|
|
36
37
|
|
|
37
38
|
##############################
|
|
38
|
-
#
|
|
39
|
+
# I/O methods
|
|
39
40
|
##############################
|
|
40
41
|
|
|
41
42
|
@abstractmethod
|
|
@@ -62,6 +63,45 @@ class Store(metaclass=ABCMeta):
|
|
|
62
63
|
Method to get file metadata.
|
|
63
64
|
"""
|
|
64
65
|
|
|
66
|
+
##############################
|
|
67
|
+
# Datastore methods
|
|
68
|
+
##############################
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Write DataFrame as parquet or csv.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def read_df(
|
|
77
|
+
self,
|
|
78
|
+
path: str | list[str],
|
|
79
|
+
extension: str,
|
|
80
|
+
engine: str | None = None,
|
|
81
|
+
**kwargs,
|
|
82
|
+
) -> Any:
|
|
83
|
+
"""
|
|
84
|
+
Read DataFrame from path.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
path : str | list[str]
|
|
89
|
+
Path(s) to read DataFrame from.
|
|
90
|
+
extension : str
|
|
91
|
+
Extension of the file.
|
|
92
|
+
engine : str
|
|
93
|
+
Dataframe engine (pandas, polars, etc.).
|
|
94
|
+
**kwargs : dict
|
|
95
|
+
Keyword arguments.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
Any
|
|
100
|
+
DataFrame.
|
|
101
|
+
"""
|
|
102
|
+
reader = get_reader_by_engine(engine)
|
|
103
|
+
return reader.read_df(path, extension, **kwargs)
|
|
104
|
+
|
|
65
105
|
##############################
|
|
66
106
|
# Helpers methods
|
|
67
107
|
##############################
|
|
@@ -84,7 +124,7 @@ class Store(metaclass=ABCMeta):
|
|
|
84
124
|
StoreError
|
|
85
125
|
If the source is not a local path.
|
|
86
126
|
"""
|
|
87
|
-
if
|
|
127
|
+
if not has_local_scheme(src):
|
|
88
128
|
raise StoreError(f"Source '{src}' is not a local path.")
|
|
89
129
|
|
|
90
130
|
def _check_local_dst(self, dst: str) -> None:
|
|
@@ -105,7 +145,7 @@ class Store(metaclass=ABCMeta):
|
|
|
105
145
|
StoreError
|
|
106
146
|
If the destination is not a local path.
|
|
107
147
|
"""
|
|
108
|
-
if
|
|
148
|
+
if not has_local_scheme(dst):
|
|
109
149
|
raise StoreError(f"Destination '{dst}' is not a local path.")
|
|
110
150
|
|
|
111
151
|
def _check_overwrite(self, dst: Path, overwrite: bool) -> None:
|
|
@@ -170,16 +210,20 @@ class StoreConfig(BaseModel):
|
|
|
170
210
|
Store configuration base class.
|
|
171
211
|
"""
|
|
172
212
|
|
|
213
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
214
|
+
|
|
173
215
|
|
|
174
216
|
class StoreParameters(BaseModel):
|
|
175
217
|
"""
|
|
176
218
|
Store configuration class.
|
|
177
219
|
"""
|
|
178
220
|
|
|
221
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
222
|
+
|
|
179
223
|
name: str
|
|
180
224
|
"""Store id."""
|
|
181
225
|
|
|
182
|
-
type:
|
|
226
|
+
type: SchemeCategory
|
|
183
227
|
"""Store type to instantiate."""
|
|
184
228
|
|
|
185
229
|
config: StoreConfig = None
|
digitalhub/stores/builder.py
CHANGED
|
@@ -11,7 +11,7 @@ from digitalhub.stores.remote.store import RemoteStore, RemoteStoreConfig
|
|
|
11
11
|
from digitalhub.stores.s3.store import S3Store, S3StoreConfig
|
|
12
12
|
from digitalhub.stores.sql.store import SqlStore, SQLStoreConfig
|
|
13
13
|
from digitalhub.utils.exceptions import StoreError
|
|
14
|
-
from digitalhub.utils.uri_utils import map_uri_scheme
|
|
14
|
+
from digitalhub.utils.uri_utils import SchemeCategory, map_uri_scheme
|
|
15
15
|
|
|
16
16
|
if typing.TYPE_CHECKING:
|
|
17
17
|
from digitalhub.stores._base.store import Store
|
|
@@ -167,7 +167,7 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
167
167
|
ValueError
|
|
168
168
|
If the scheme is not supported.
|
|
169
169
|
"""
|
|
170
|
-
if scheme ==
|
|
170
|
+
if scheme == SchemeCategory.S3.value:
|
|
171
171
|
return StoreParameters(
|
|
172
172
|
name="s3",
|
|
173
173
|
type="s3",
|
|
@@ -178,7 +178,7 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
178
178
|
bucket_name=os.getenv("S3_BUCKET_NAME"), # type: ignore
|
|
179
179
|
),
|
|
180
180
|
)
|
|
181
|
-
if scheme ==
|
|
181
|
+
if scheme == SchemeCategory.SQL.value:
|
|
182
182
|
return StoreParameters(
|
|
183
183
|
name="sql",
|
|
184
184
|
type="sql",
|
|
@@ -191,13 +191,13 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
191
191
|
pg_schema=os.getenv("POSTGRES_SCHEMA"), # type: ignore
|
|
192
192
|
),
|
|
193
193
|
)
|
|
194
|
-
if scheme ==
|
|
194
|
+
if scheme == SchemeCategory.REMOTE.value:
|
|
195
195
|
return StoreParameters(
|
|
196
196
|
name="remote",
|
|
197
197
|
type="remote",
|
|
198
198
|
config=RemoteStoreConfig(),
|
|
199
199
|
)
|
|
200
|
-
if scheme ==
|
|
200
|
+
if scheme == SchemeCategory.LOCAL.value:
|
|
201
201
|
return StoreParameters(
|
|
202
202
|
name="local",
|
|
203
203
|
type="local",
|
digitalhub/stores/local/store.py
CHANGED
|
@@ -2,7 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
7
|
+
from digitalhub.readers.api import get_reader_by_object
|
|
6
8
|
from digitalhub.stores._base.store import Store, StoreConfig
|
|
7
9
|
from digitalhub.utils.exceptions import StoreError
|
|
8
10
|
from digitalhub.utils.file_utils import get_file_info_from_local
|
|
@@ -28,7 +30,7 @@ class LocalStore(Store):
|
|
|
28
30
|
self.config = config
|
|
29
31
|
|
|
30
32
|
##############################
|
|
31
|
-
#
|
|
33
|
+
# I/O methods
|
|
32
34
|
##############################
|
|
33
35
|
|
|
34
36
|
def download(
|
|
@@ -189,7 +191,40 @@ class LocalStore(Store):
|
|
|
189
191
|
return dst
|
|
190
192
|
|
|
191
193
|
##############################
|
|
192
|
-
#
|
|
194
|
+
# Datastore methods
|
|
195
|
+
##############################
|
|
196
|
+
|
|
197
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Method to write a dataframe to a file. Kwargs are passed to df.to_parquet().
|
|
200
|
+
If destination is not provided, the dataframe is written to the default
|
|
201
|
+
store path with generated name.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
df : Any
|
|
206
|
+
The dataframe to write.
|
|
207
|
+
dst : str
|
|
208
|
+
The destination of the dataframe.
|
|
209
|
+
**kwargs : dict
|
|
210
|
+
Keyword arguments.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
str
|
|
215
|
+
Path of written dataframe.
|
|
216
|
+
"""
|
|
217
|
+
self.store._check_local_dst(dst)
|
|
218
|
+
self._validate_extension(Path(dst).suffix.removeprefix("."))
|
|
219
|
+
|
|
220
|
+
# Write dataframe
|
|
221
|
+
reader = get_reader_by_object(df)
|
|
222
|
+
reader.write_df(df, dst, extension=extension, **kwargs)
|
|
223
|
+
|
|
224
|
+
return dst
|
|
225
|
+
|
|
226
|
+
##############################
|
|
227
|
+
# Helper methods
|
|
193
228
|
##############################
|
|
194
229
|
|
|
195
230
|
@staticmethod
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
|
|
@@ -25,7 +26,7 @@ class RemoteStore(Store):
|
|
|
25
26
|
self.config = config
|
|
26
27
|
|
|
27
28
|
##############################
|
|
28
|
-
#
|
|
29
|
+
# I/O methods
|
|
29
30
|
##############################
|
|
30
31
|
|
|
31
32
|
def download(
|
|
@@ -83,15 +84,36 @@ class RemoteStore(Store):
|
|
|
83
84
|
"""
|
|
84
85
|
Get file information from HTTP(s) storage.
|
|
85
86
|
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
paths : list[str]
|
|
90
|
+
List of source paths.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
list[dict]
|
|
95
|
+
Returns files metadata.
|
|
96
|
+
"""
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
##############################
|
|
100
|
+
# Datastore methods
|
|
101
|
+
##############################
|
|
102
|
+
|
|
103
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
104
|
+
"""
|
|
105
|
+
Method to write a dataframe to a file. Note that this method is not implemented
|
|
106
|
+
since the remote store is not meant to write dataframes.
|
|
107
|
+
|
|
86
108
|
Raises
|
|
87
109
|
------
|
|
88
110
|
NotImplementedError
|
|
89
111
|
This method is not implemented.
|
|
90
112
|
"""
|
|
91
|
-
raise NotImplementedError("Remote store does not support
|
|
113
|
+
raise NotImplementedError("Remote store does not support write_df.")
|
|
92
114
|
|
|
93
115
|
##############################
|
|
94
|
-
#
|
|
116
|
+
# Helper methods
|
|
95
117
|
##############################
|
|
96
118
|
|
|
97
119
|
@staticmethod
|