digitalhub 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +19 -2
- digitalhub/client/_base/api_builder.py +16 -0
- digitalhub/client/_base/client.py +67 -0
- digitalhub/client/_base/key_builder.py +52 -0
- digitalhub/client/api.py +2 -38
- digitalhub/client/dhcore/api_builder.py +100 -0
- digitalhub/client/dhcore/client.py +81 -25
- digitalhub/client/dhcore/enums.py +27 -0
- digitalhub/client/dhcore/env.py +2 -2
- digitalhub/client/dhcore/key_builder.py +58 -0
- digitalhub/client/dhcore/utils.py +17 -17
- digitalhub/client/local/api_builder.py +100 -0
- digitalhub/client/local/client.py +22 -0
- digitalhub/client/local/key_builder.py +58 -0
- digitalhub/context/api.py +3 -38
- digitalhub/context/builder.py +10 -23
- digitalhub/context/context.py +20 -92
- digitalhub/entities/_base/context/entity.py +30 -22
- digitalhub/entities/_base/entity/_constructors/metadata.py +12 -1
- digitalhub/entities/_base/entity/_constructors/name.py +1 -1
- digitalhub/entities/_base/entity/_constructors/spec.py +1 -1
- digitalhub/entities/_base/entity/_constructors/status.py +3 -2
- digitalhub/entities/_base/entity/builder.py +6 -1
- digitalhub/entities/_base/entity/entity.py +32 -10
- digitalhub/entities/_base/entity/metadata.py +22 -0
- digitalhub/entities/_base/entity/spec.py +7 -2
- digitalhub/entities/_base/executable/entity.py +8 -8
- digitalhub/entities/_base/material/entity.py +49 -17
- digitalhub/entities/_base/material/status.py +0 -31
- digitalhub/entities/_base/material/utils.py +106 -0
- digitalhub/entities/_base/project/entity.py +341 -0
- digitalhub/entities/_base/unversioned/entity.py +3 -24
- digitalhub/entities/_base/versioned/entity.py +2 -26
- digitalhub/entities/_commons/enums.py +103 -0
- digitalhub/entities/_commons/utils.py +83 -0
- digitalhub/entities/_operations/processor.py +1873 -0
- digitalhub/entities/artifact/_base/builder.py +1 -1
- digitalhub/entities/artifact/_base/entity.py +1 -1
- digitalhub/entities/artifact/artifact/builder.py +2 -1
- digitalhub/entities/artifact/crud.py +46 -29
- digitalhub/entities/artifact/utils.py +62 -0
- digitalhub/entities/dataitem/_base/builder.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +6 -6
- digitalhub/entities/dataitem/crud.py +50 -66
- digitalhub/entities/dataitem/dataitem/builder.py +2 -1
- digitalhub/entities/dataitem/iceberg/builder.py +2 -1
- digitalhub/entities/dataitem/table/builder.py +2 -1
- digitalhub/entities/dataitem/table/entity.py +5 -10
- digitalhub/entities/dataitem/table/models.py +4 -5
- digitalhub/entities/dataitem/utils.py +137 -0
- digitalhub/entities/function/_base/builder.py +1 -1
- digitalhub/entities/function/_base/entity.py +6 -2
- digitalhub/entities/function/crud.py +36 -17
- digitalhub/entities/model/_base/builder.py +1 -1
- digitalhub/entities/model/_base/entity.py +1 -1
- digitalhub/entities/model/crud.py +46 -29
- digitalhub/entities/model/huggingface/builder.py +2 -1
- digitalhub/entities/model/huggingface/spec.py +4 -2
- digitalhub/entities/model/mlflow/builder.py +2 -1
- digitalhub/entities/model/mlflow/models.py +17 -9
- digitalhub/entities/model/mlflow/spec.py +6 -1
- digitalhub/entities/model/mlflow/utils.py +4 -2
- digitalhub/entities/model/model/builder.py +2 -1
- digitalhub/entities/model/sklearn/builder.py +2 -1
- digitalhub/entities/model/utils.py +62 -0
- digitalhub/entities/project/_base/builder.py +2 -2
- digitalhub/entities/project/_base/entity.py +82 -272
- digitalhub/entities/project/crud.py +110 -91
- digitalhub/entities/project/utils.py +35 -0
- digitalhub/entities/run/_base/builder.py +3 -1
- digitalhub/entities/run/_base/entity.py +52 -54
- digitalhub/entities/run/_base/spec.py +15 -7
- digitalhub/entities/run/crud.py +35 -17
- digitalhub/entities/secret/_base/builder.py +2 -2
- digitalhub/entities/secret/_base/entity.py +4 -10
- digitalhub/entities/secret/crud.py +36 -21
- digitalhub/entities/task/_base/builder.py +14 -14
- digitalhub/entities/task/_base/entity.py +21 -14
- digitalhub/entities/task/_base/models.py +35 -6
- digitalhub/entities/task/_base/spec.py +50 -13
- digitalhub/entities/task/_base/utils.py +18 -0
- digitalhub/entities/task/crud.py +35 -15
- digitalhub/entities/workflow/_base/builder.py +1 -1
- digitalhub/entities/workflow/_base/entity.py +22 -6
- digitalhub/entities/workflow/crud.py +36 -17
- digitalhub/factory/utils.py +1 -1
- digitalhub/readers/_base/reader.py +2 -2
- digitalhub/readers/_commons/enums.py +13 -0
- digitalhub/readers/api.py +3 -2
- digitalhub/readers/factory.py +12 -6
- digitalhub/readers/pandas/reader.py +20 -8
- digitalhub/runtimes/_base.py +0 -7
- digitalhub/runtimes/enums.py +12 -0
- digitalhub/stores/_base/store.py +59 -11
- digitalhub/stores/builder.py +5 -5
- digitalhub/stores/local/store.py +43 -4
- digitalhub/stores/remote/store.py +31 -5
- digitalhub/stores/s3/store.py +129 -48
- digitalhub/stores/sql/store.py +122 -47
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +60 -2
- digitalhub/utils/generic_utils.py +45 -4
- digitalhub/utils/io_utils.py +18 -0
- digitalhub/utils/s3_utils.py +17 -0
- digitalhub/utils/uri_utils.py +153 -15
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/LICENSE.txt +1 -1
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/METADATA +3 -3
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/RECORD +116 -114
- test/local/instances/test_validate.py +55 -0
- test/testkfp.py +4 -1
- digitalhub/datastores/_base/datastore.py +0 -85
- digitalhub/datastores/api.py +0 -37
- digitalhub/datastores/builder.py +0 -110
- digitalhub/datastores/local/datastore.py +0 -50
- digitalhub/datastores/remote/__init__.py +0 -0
- digitalhub/datastores/remote/datastore.py +0 -31
- digitalhub/datastores/s3/__init__.py +0 -0
- digitalhub/datastores/s3/datastore.py +0 -46
- digitalhub/datastores/sql/__init__.py +0 -0
- digitalhub/datastores/sql/datastore.py +0 -68
- digitalhub/entities/_base/api_utils.py +0 -620
- digitalhub/entities/_base/crud.py +0 -468
- digitalhub/entities/function/_base/models.py +0 -118
- digitalhub/entities/utils/__init__.py +0 -0
- digitalhub/entities/utils/api.py +0 -346
- digitalhub/entities/utils/entity_types.py +0 -19
- digitalhub/entities/utils/state.py +0 -31
- digitalhub/entities/utils/utils.py +0 -202
- /digitalhub/{context → entities/_base/project}/__init__.py +0 -0
- /digitalhub/{datastores → entities/_commons}/__init__.py +0 -0
- /digitalhub/{datastores/_base → entities/_operations}/__init__.py +0 -0
- /digitalhub/{datastores/local → readers/_commons}/__init__.py +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.1.dist-info → digitalhub-0.9.0.dist-info}/top_level.txt +0 -0
digitalhub/entities/task/crud.py
CHANGED
|
@@ -2,14 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
-
from digitalhub.entities.
|
|
6
|
-
|
|
7
|
-
get_unversioned_entity,
|
|
8
|
-
import_context_entity,
|
|
9
|
-
list_context_entities,
|
|
10
|
-
new_context_entity,
|
|
11
|
-
)
|
|
12
|
-
from digitalhub.entities.utils.entity_types import EntityTypes
|
|
5
|
+
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
+
from digitalhub.entities._operations.processor import processor
|
|
13
7
|
from digitalhub.utils.exceptions import EntityError
|
|
14
8
|
|
|
15
9
|
if typing.TYPE_CHECKING:
|
|
@@ -56,7 +50,7 @@ def new_task(
|
|
|
56
50
|
>>> kind="python+job",
|
|
57
51
|
>>> function="function-string")
|
|
58
52
|
"""
|
|
59
|
-
return
|
|
53
|
+
return processor.create_context_entity(
|
|
60
54
|
project=project,
|
|
61
55
|
kind=kind,
|
|
62
56
|
uuid=uuid,
|
|
@@ -97,7 +91,7 @@ def get_task(
|
|
|
97
91
|
>>> obj = get_task("my-task-id"
|
|
98
92
|
>>> project="my-project")
|
|
99
93
|
"""
|
|
100
|
-
return
|
|
94
|
+
return processor.read_unversioned_entity(
|
|
101
95
|
identifier,
|
|
102
96
|
entity_type=ENTITY_TYPE,
|
|
103
97
|
project=project,
|
|
@@ -125,7 +119,7 @@ def list_tasks(project: str, **kwargs) -> list[Task]:
|
|
|
125
119
|
--------
|
|
126
120
|
>>> objs = list_tasks(project="my-project")
|
|
127
121
|
"""
|
|
128
|
-
return list_context_entities(
|
|
122
|
+
return processor.list_context_entities(
|
|
129
123
|
project=project,
|
|
130
124
|
entity_type=ENTITY_TYPE,
|
|
131
125
|
**kwargs,
|
|
@@ -134,7 +128,7 @@ def list_tasks(project: str, **kwargs) -> list[Task]:
|
|
|
134
128
|
|
|
135
129
|
def import_task(file: str) -> Task:
|
|
136
130
|
"""
|
|
137
|
-
|
|
131
|
+
Import object from a YAML file and create a new object into the backend.
|
|
138
132
|
|
|
139
133
|
Parameters
|
|
140
134
|
----------
|
|
@@ -150,7 +144,28 @@ def import_task(file: str) -> Task:
|
|
|
150
144
|
-------
|
|
151
145
|
>>> obj = import_task("my-task.yaml")
|
|
152
146
|
"""
|
|
153
|
-
return import_context_entity(file)
|
|
147
|
+
return processor.import_context_entity(file)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def load_task(file: str) -> Task:
|
|
151
|
+
"""
|
|
152
|
+
Load object from a YAML file and update an existing object into the backend.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
file : str
|
|
157
|
+
Path to YAML file.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
Task
|
|
162
|
+
Object instance.
|
|
163
|
+
|
|
164
|
+
Examples
|
|
165
|
+
--------
|
|
166
|
+
>>> obj = load_task("my-task.yaml")
|
|
167
|
+
"""
|
|
168
|
+
return processor.load_context_entity(file)
|
|
154
169
|
|
|
155
170
|
|
|
156
171
|
def update_task(entity: Task) -> Task:
|
|
@@ -171,7 +186,12 @@ def update_task(entity: Task) -> Task:
|
|
|
171
186
|
--------
|
|
172
187
|
>>> obj = update_task(obj)
|
|
173
188
|
"""
|
|
174
|
-
return
|
|
189
|
+
return processor.update_context_entity(
|
|
190
|
+
project=entity.project,
|
|
191
|
+
entity_type=entity.ENTITY_TYPE,
|
|
192
|
+
entity_id=entity.id,
|
|
193
|
+
entity_dict=entity.to_dict(),
|
|
194
|
+
)
|
|
175
195
|
|
|
176
196
|
|
|
177
197
|
def delete_task(
|
|
@@ -217,7 +237,7 @@ def delete_task(
|
|
|
217
237
|
"""
|
|
218
238
|
if not identifier.startswith("store://"):
|
|
219
239
|
raise EntityError("Task has no name. Use key instead.")
|
|
220
|
-
return
|
|
240
|
+
return processor.delete_context_entity(
|
|
221
241
|
identifier=identifier,
|
|
222
242
|
entity_type=ENTITY_TYPE,
|
|
223
243
|
project=project,
|
|
@@ -4,7 +4,7 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._base.runtime_entity.builder import RuntimeEntityBuilder
|
|
6
6
|
from digitalhub.entities._base.versioned.builder import VersionedBuilder
|
|
7
|
-
from digitalhub.entities.
|
|
7
|
+
from digitalhub.entities._commons.enums import EntityTypes
|
|
8
8
|
|
|
9
9
|
if typing.TYPE_CHECKING:
|
|
10
10
|
from digitalhub.entities.workflow._base.entity import Workflow
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._base.executable.entity import ExecutableEntity
|
|
6
|
-
from digitalhub.entities.
|
|
6
|
+
from digitalhub.entities._commons.enums import EntityTypes, Relationship
|
|
7
7
|
from digitalhub.factory.api import get_run_kind, get_task_kind_from_action
|
|
8
8
|
from digitalhub.utils.exceptions import BackendError
|
|
9
9
|
|
|
@@ -41,7 +41,13 @@ class Workflow(ExecutableEntity):
|
|
|
41
41
|
# Workflow Methods
|
|
42
42
|
##############################
|
|
43
43
|
|
|
44
|
-
def run(
|
|
44
|
+
def run(
|
|
45
|
+
self,
|
|
46
|
+
action: str,
|
|
47
|
+
wait: bool = False,
|
|
48
|
+
log_info: bool = True,
|
|
49
|
+
**kwargs,
|
|
50
|
+
) -> Run:
|
|
45
51
|
"""
|
|
46
52
|
Run workflow.
|
|
47
53
|
|
|
@@ -49,6 +55,10 @@ class Workflow(ExecutableEntity):
|
|
|
49
55
|
----------
|
|
50
56
|
action : str
|
|
51
57
|
Action to execute.
|
|
58
|
+
wait : bool
|
|
59
|
+
Flag to wait for execution to finish.
|
|
60
|
+
log_info : bool
|
|
61
|
+
Flag to log information while waiting.
|
|
52
62
|
**kwargs : dict
|
|
53
63
|
Keyword arguments passed to Run builder.
|
|
54
64
|
|
|
@@ -57,9 +67,6 @@ class Workflow(ExecutableEntity):
|
|
|
57
67
|
Run
|
|
58
68
|
Run instance.
|
|
59
69
|
"""
|
|
60
|
-
if action is None:
|
|
61
|
-
action = "pipeline"
|
|
62
|
-
|
|
63
70
|
# Get task and run kind
|
|
64
71
|
task_kind = get_task_kind_from_action(self.kind, action)
|
|
65
72
|
run_kind = get_run_kind(self.kind)
|
|
@@ -71,4 +78,13 @@ class Workflow(ExecutableEntity):
|
|
|
71
78
|
if self._context().local:
|
|
72
79
|
raise BackendError("Cannot run workflow with local backend.")
|
|
73
80
|
|
|
74
|
-
|
|
81
|
+
# Run task
|
|
82
|
+
run = task.run(run_kind, save=False, local_execution=False, **kwargs)
|
|
83
|
+
|
|
84
|
+
# Set as run's parent
|
|
85
|
+
run.add_relationship(Relationship.RUN_OF.value, self.key)
|
|
86
|
+
run.save()
|
|
87
|
+
|
|
88
|
+
if wait:
|
|
89
|
+
return run.wait(log_info=log_info)
|
|
90
|
+
return run
|
|
@@ -2,15 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
-
from digitalhub.entities.
|
|
6
|
-
|
|
7
|
-
get_context_entity_versions,
|
|
8
|
-
get_versioned_entity,
|
|
9
|
-
import_executable_entity,
|
|
10
|
-
list_context_entities,
|
|
11
|
-
new_context_entity,
|
|
12
|
-
)
|
|
13
|
-
from digitalhub.entities.utils.entity_types import EntityTypes
|
|
5
|
+
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
+
from digitalhub.entities._operations.processor import processor
|
|
14
7
|
|
|
15
8
|
if typing.TYPE_CHECKING:
|
|
16
9
|
from digitalhub.entities.workflow._base.entity import Workflow
|
|
@@ -62,7 +55,7 @@ def new_workflow(
|
|
|
62
55
|
>>> code_src="pipeline.py",
|
|
63
56
|
>>> handler="pipeline-handler")
|
|
64
57
|
"""
|
|
65
|
-
return
|
|
58
|
+
return processor.create_context_entity(
|
|
66
59
|
project=project,
|
|
67
60
|
name=name,
|
|
68
61
|
kind=kind,
|
|
@@ -109,7 +102,7 @@ def get_workflow(
|
|
|
109
102
|
>>> project="my-project",
|
|
110
103
|
>>> entity_id="my-workflow-id")
|
|
111
104
|
"""
|
|
112
|
-
return
|
|
105
|
+
return processor.read_context_entity(
|
|
113
106
|
identifier,
|
|
114
107
|
entity_type=ENTITY_TYPE,
|
|
115
108
|
project=project,
|
|
@@ -149,7 +142,7 @@ def get_workflow_versions(
|
|
|
149
142
|
>>> obj = get_workflow_versions("my-workflow-name"
|
|
150
143
|
>>> project="my-project")
|
|
151
144
|
"""
|
|
152
|
-
return
|
|
145
|
+
return processor.read_context_entity_versions(
|
|
153
146
|
identifier,
|
|
154
147
|
entity_type=ENTITY_TYPE,
|
|
155
148
|
project=project,
|
|
@@ -177,7 +170,7 @@ def list_workflows(project: str, **kwargs) -> list[Workflow]:
|
|
|
177
170
|
--------
|
|
178
171
|
>>> objs = list_workflows(project="my-project")
|
|
179
172
|
"""
|
|
180
|
-
return list_context_entities(
|
|
173
|
+
return processor.list_context_entities(
|
|
181
174
|
project=project,
|
|
182
175
|
entity_type=ENTITY_TYPE,
|
|
183
176
|
**kwargs,
|
|
@@ -186,7 +179,7 @@ def list_workflows(project: str, **kwargs) -> list[Workflow]:
|
|
|
186
179
|
|
|
187
180
|
def import_workflow(file: str) -> Workflow:
|
|
188
181
|
"""
|
|
189
|
-
Import object from a YAML file.
|
|
182
|
+
Import object from a YAML file and create a new object into the backend.
|
|
190
183
|
|
|
191
184
|
Parameters
|
|
192
185
|
----------
|
|
@@ -202,7 +195,28 @@ def import_workflow(file: str) -> Workflow:
|
|
|
202
195
|
--------
|
|
203
196
|
>>> obj = import_workflow("my-workflow.yaml")
|
|
204
197
|
"""
|
|
205
|
-
return import_executable_entity(file)
|
|
198
|
+
return processor.import_executable_entity(file)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def load_workflow(file: str) -> Workflow:
|
|
202
|
+
"""
|
|
203
|
+
Load object from a YAML file and update an existing object into the backend.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
file : str
|
|
208
|
+
Path to YAML file.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
Workflow
|
|
213
|
+
Object instance.
|
|
214
|
+
|
|
215
|
+
Examples
|
|
216
|
+
--------
|
|
217
|
+
>>> obj = load_workflow("my-workflow.yaml")
|
|
218
|
+
"""
|
|
219
|
+
return processor.load_executable_entity(file)
|
|
206
220
|
|
|
207
221
|
|
|
208
222
|
def update_workflow(entity: Workflow) -> Workflow:
|
|
@@ -223,7 +237,12 @@ def update_workflow(entity: Workflow) -> Workflow:
|
|
|
223
237
|
--------
|
|
224
238
|
>>> obj = update_workflow(obj)
|
|
225
239
|
"""
|
|
226
|
-
return
|
|
240
|
+
return processor.update_context_entity(
|
|
241
|
+
project=entity.project,
|
|
242
|
+
entity_type=entity.ENTITY_TYPE,
|
|
243
|
+
entity_id=entity.id,
|
|
244
|
+
entity_dict=entity.to_dict(),
|
|
245
|
+
)
|
|
227
246
|
|
|
228
247
|
|
|
229
248
|
def delete_workflow(
|
|
@@ -267,7 +286,7 @@ def delete_workflow(
|
|
|
267
286
|
>>> project="my-project",
|
|
268
287
|
>>> delete_all_versions=True)
|
|
269
288
|
"""
|
|
270
|
-
return
|
|
289
|
+
return processor.delete_context_entity(
|
|
271
290
|
identifier=identifier,
|
|
272
291
|
entity_type=ENTITY_TYPE,
|
|
273
292
|
project=project,
|
digitalhub/factory/utils.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import
|
|
3
|
+
from abc import abstractmethod
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
class DataframeReader
|
|
7
|
+
class DataframeReader:
|
|
8
8
|
"""
|
|
9
9
|
Dataframe reader abstract class.
|
|
10
10
|
"""
|
digitalhub/readers/api.py
CHANGED
|
@@ -4,6 +4,7 @@ import typing
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from digitalhub.readers.factory import factory
|
|
7
|
+
from digitalhub.utils.exceptions import ReaderError
|
|
7
8
|
|
|
8
9
|
if typing.TYPE_CHECKING:
|
|
9
10
|
from digitalhub.readers._base.reader import DataframeReader
|
|
@@ -30,7 +31,7 @@ def get_reader_by_engine(engine: str | None = None) -> DataframeReader:
|
|
|
30
31
|
except KeyError:
|
|
31
32
|
engines = factory.list_supported_engines()
|
|
32
33
|
msg = f"Unsupported dataframe engine: '{engine}'. Supported engines: {engines}"
|
|
33
|
-
raise
|
|
34
|
+
raise ReaderError(msg)
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
def get_reader_by_object(obj: Any) -> DataframeReader:
|
|
@@ -53,7 +54,7 @@ def get_reader_by_object(obj: Any) -> DataframeReader:
|
|
|
53
54
|
except KeyError:
|
|
54
55
|
types = factory.list_supported_dataframes()
|
|
55
56
|
msg = f"Unsupported dataframe type: '{obj}'. Supported types: {types}"
|
|
56
|
-
raise
|
|
57
|
+
raise ReaderError(msg)
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
def get_supported_engines() -> list[str]:
|
digitalhub/readers/factory.py
CHANGED
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from digitalhub.utils.exceptions import BuilderError
|
|
6
|
+
|
|
5
7
|
if typing.TYPE_CHECKING:
|
|
6
8
|
from digitalhub.readers._base.builder import ReaderBuilder
|
|
7
9
|
from digitalhub.readers._base.reader import DataframeReader
|
|
@@ -35,13 +37,13 @@ class ReaderFactory:
|
|
|
35
37
|
if self._engine_builders is None:
|
|
36
38
|
self._engine_builders = {}
|
|
37
39
|
if engine in self._engine_builders:
|
|
38
|
-
raise
|
|
40
|
+
raise BuilderError(f"Builder for engine '{engine}' already exists.")
|
|
39
41
|
self._engine_builders[engine] = builder
|
|
40
42
|
|
|
41
43
|
if self._dataframe_builders is None:
|
|
42
44
|
self._dataframe_builders = {}
|
|
43
45
|
if dataframe in self._dataframe_builders:
|
|
44
|
-
raise
|
|
46
|
+
raise BuilderError(f"Builder for dataframe '{dataframe}' already exists.")
|
|
45
47
|
self._dataframe_builders[dataframe] = builder
|
|
46
48
|
|
|
47
49
|
def build(self, engine: str | None = None, dataframe: str | None = None, **kwargs) -> DataframeReader:
|
|
@@ -63,7 +65,7 @@ class ReaderFactory:
|
|
|
63
65
|
Reader object.
|
|
64
66
|
"""
|
|
65
67
|
if (engine is None) == (dataframe is None):
|
|
66
|
-
raise
|
|
68
|
+
raise BuilderError("Either engine or dataframe must be provided.")
|
|
67
69
|
if engine is not None:
|
|
68
70
|
return self._engine_builders[engine].build(**kwargs)
|
|
69
71
|
return self._dataframe_builders[dataframe].build(**kwargs)
|
|
@@ -104,7 +106,7 @@ class ReaderFactory:
|
|
|
104
106
|
None
|
|
105
107
|
"""
|
|
106
108
|
if engine not in self._engine_builders:
|
|
107
|
-
raise
|
|
109
|
+
raise BuilderError(f"Engine {engine} not found.")
|
|
108
110
|
self._default = engine
|
|
109
111
|
|
|
110
112
|
def get_default(self) -> str:
|
|
@@ -117,7 +119,7 @@ class ReaderFactory:
|
|
|
117
119
|
Default engine.
|
|
118
120
|
"""
|
|
119
121
|
if self._default is None:
|
|
120
|
-
raise
|
|
122
|
+
raise BuilderError("No default engine set.")
|
|
121
123
|
return self._default
|
|
122
124
|
|
|
123
125
|
|
|
@@ -126,7 +128,11 @@ factory = ReaderFactory()
|
|
|
126
128
|
try:
|
|
127
129
|
from digitalhub.readers.pandas.builder import ReaderBuilderPandas
|
|
128
130
|
|
|
129
|
-
factory.add_builder(
|
|
131
|
+
factory.add_builder(
|
|
132
|
+
ReaderBuilderPandas.ENGINE,
|
|
133
|
+
ReaderBuilderPandas.DATAFRAME_CLASS,
|
|
134
|
+
ReaderBuilderPandas(),
|
|
135
|
+
)
|
|
130
136
|
factory.set_default(ReaderBuilderPandas.ENGINE)
|
|
131
137
|
|
|
132
138
|
except ImportError:
|
|
@@ -8,7 +8,9 @@ import pandas as pd
|
|
|
8
8
|
from pandas.errors import ParserError
|
|
9
9
|
|
|
10
10
|
from digitalhub.readers._base.reader import DataframeReader
|
|
11
|
+
from digitalhub.readers._commons.enums import Extensions
|
|
11
12
|
from digitalhub.utils.data_utils import build_data_preview, get_data_preview
|
|
13
|
+
from digitalhub.utils.exceptions import ReaderError
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class DataframeReaderPandas(DataframeReader):
|
|
@@ -38,15 +40,17 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
38
40
|
pd.DataFrame
|
|
39
41
|
Pandas DataFrame.
|
|
40
42
|
"""
|
|
41
|
-
if extension ==
|
|
43
|
+
if extension == Extensions.CSV.value:
|
|
42
44
|
method = pd.read_csv
|
|
43
|
-
elif extension ==
|
|
45
|
+
elif extension == Extensions.PARQUET.value:
|
|
44
46
|
method = pd.read_parquet
|
|
45
|
-
elif extension ==
|
|
47
|
+
elif extension == Extensions.FILE.value:
|
|
46
48
|
try:
|
|
47
|
-
return self.read_df(path,
|
|
49
|
+
return self.read_df(path, Extensions.CSV.value, **kwargs)
|
|
48
50
|
except ParserError:
|
|
49
|
-
raise
|
|
51
|
+
raise ReaderError(f"Unable to read from {path}.")
|
|
52
|
+
else:
|
|
53
|
+
raise ReaderError(f"Unsupported extension '{extension}' for reading.")
|
|
50
54
|
|
|
51
55
|
if isinstance(path, list):
|
|
52
56
|
dfs = [method(p, **kwargs) for p in path]
|
|
@@ -57,7 +61,13 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
57
61
|
# Write methods
|
|
58
62
|
##############################
|
|
59
63
|
|
|
60
|
-
def write_df(
|
|
64
|
+
def write_df(
|
|
65
|
+
self,
|
|
66
|
+
df: pd.DataFrame,
|
|
67
|
+
dst: str | BytesIO,
|
|
68
|
+
extension: str | None = None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> None:
|
|
61
71
|
"""
|
|
62
72
|
Write DataFrame as parquet.
|
|
63
73
|
|
|
@@ -74,9 +84,11 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
74
84
|
-------
|
|
75
85
|
None
|
|
76
86
|
"""
|
|
77
|
-
if extension ==
|
|
87
|
+
if extension == Extensions.CSV.value:
|
|
78
88
|
return self.write_csv(df, dst, **kwargs)
|
|
79
|
-
|
|
89
|
+
elif extension == Extensions.PARQUET.value:
|
|
90
|
+
return self.write_parquet(df, dst, **kwargs)
|
|
91
|
+
raise ReaderError(f"Unsupported extension '{extension}' for writing.")
|
|
80
92
|
|
|
81
93
|
@staticmethod
|
|
82
94
|
def write_csv(df: pd.DataFrame, dst: str | BytesIO, **kwargs) -> None:
|
digitalhub/runtimes/_base.py
CHANGED
|
@@ -34,13 +34,6 @@ class Runtime:
|
|
|
34
34
|
Execute run task.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
@staticmethod
|
|
38
|
-
@abstractmethod
|
|
39
|
-
def _get_executable(action: str) -> Callable:
|
|
40
|
-
"""
|
|
41
|
-
Get executable from action.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
37
|
##############################
|
|
45
38
|
# Private methods
|
|
46
39
|
##############################
|
digitalhub/stores/_base/store.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from abc import
|
|
3
|
+
from abc import abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from tempfile import mkdtemp
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
|
-
from pydantic import BaseModel
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
9
|
|
|
10
|
+
from digitalhub.readers.api import get_reader_by_engine
|
|
10
11
|
from digitalhub.utils.exceptions import StoreError
|
|
11
|
-
from digitalhub.utils.uri_utils import
|
|
12
|
+
from digitalhub.utils.uri_utils import SchemeCategory, has_local_scheme
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
class Store
|
|
15
|
+
class Store:
|
|
15
16
|
"""
|
|
16
17
|
Store abstract class.
|
|
17
18
|
"""
|
|
@@ -35,7 +36,7 @@ class Store(metaclass=ABCMeta):
|
|
|
35
36
|
self.type = store_type
|
|
36
37
|
|
|
37
38
|
##############################
|
|
38
|
-
#
|
|
39
|
+
# I/O methods
|
|
39
40
|
##############################
|
|
40
41
|
|
|
41
42
|
@abstractmethod
|
|
@@ -51,17 +52,60 @@ class Store(metaclass=ABCMeta):
|
|
|
51
52
|
"""
|
|
52
53
|
|
|
53
54
|
@abstractmethod
|
|
54
|
-
def upload(self, src: str | list[str], dst: str
|
|
55
|
+
def upload(self, src: str | list[str], dst: str) -> list[tuple[str, str]]:
|
|
55
56
|
"""
|
|
56
57
|
Method to upload artifact to storage.
|
|
57
58
|
"""
|
|
58
59
|
|
|
59
60
|
@abstractmethod
|
|
60
|
-
def get_file_info(
|
|
61
|
+
def get_file_info(
|
|
62
|
+
self,
|
|
63
|
+
root: str,
|
|
64
|
+
paths: list[tuple[str, str]],
|
|
65
|
+
) -> list[dict]:
|
|
61
66
|
"""
|
|
62
67
|
Method to get file metadata.
|
|
63
68
|
"""
|
|
64
69
|
|
|
70
|
+
##############################
|
|
71
|
+
# Datastore methods
|
|
72
|
+
##############################
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
76
|
+
"""
|
|
77
|
+
Write DataFrame as parquet or csv.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def read_df(
|
|
81
|
+
self,
|
|
82
|
+
path: str | list[str],
|
|
83
|
+
extension: str,
|
|
84
|
+
engine: str | None = None,
|
|
85
|
+
**kwargs,
|
|
86
|
+
) -> Any:
|
|
87
|
+
"""
|
|
88
|
+
Read DataFrame from path.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
path : str | list[str]
|
|
93
|
+
Path(s) to read DataFrame from.
|
|
94
|
+
extension : str
|
|
95
|
+
Extension of the file.
|
|
96
|
+
engine : str
|
|
97
|
+
Dataframe engine (pandas, polars, etc.).
|
|
98
|
+
**kwargs : dict
|
|
99
|
+
Keyword arguments.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
Any
|
|
104
|
+
DataFrame.
|
|
105
|
+
"""
|
|
106
|
+
reader = get_reader_by_engine(engine)
|
|
107
|
+
return reader.read_df(path, extension, **kwargs)
|
|
108
|
+
|
|
65
109
|
##############################
|
|
66
110
|
# Helpers methods
|
|
67
111
|
##############################
|
|
@@ -84,7 +128,7 @@ class Store(metaclass=ABCMeta):
|
|
|
84
128
|
StoreError
|
|
85
129
|
If the source is not a local path.
|
|
86
130
|
"""
|
|
87
|
-
if
|
|
131
|
+
if not has_local_scheme(src):
|
|
88
132
|
raise StoreError(f"Source '{src}' is not a local path.")
|
|
89
133
|
|
|
90
134
|
def _check_local_dst(self, dst: str) -> None:
|
|
@@ -105,7 +149,7 @@ class Store(metaclass=ABCMeta):
|
|
|
105
149
|
StoreError
|
|
106
150
|
If the destination is not a local path.
|
|
107
151
|
"""
|
|
108
|
-
if
|
|
152
|
+
if not has_local_scheme(dst):
|
|
109
153
|
raise StoreError(f"Destination '{dst}' is not a local path.")
|
|
110
154
|
|
|
111
155
|
def _check_overwrite(self, dst: Path, overwrite: bool) -> None:
|
|
@@ -170,16 +214,20 @@ class StoreConfig(BaseModel):
|
|
|
170
214
|
Store configuration base class.
|
|
171
215
|
"""
|
|
172
216
|
|
|
217
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
218
|
+
|
|
173
219
|
|
|
174
220
|
class StoreParameters(BaseModel):
|
|
175
221
|
"""
|
|
176
222
|
Store configuration class.
|
|
177
223
|
"""
|
|
178
224
|
|
|
225
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
226
|
+
|
|
179
227
|
name: str
|
|
180
228
|
"""Store id."""
|
|
181
229
|
|
|
182
|
-
type:
|
|
230
|
+
type: SchemeCategory
|
|
183
231
|
"""Store type to instantiate."""
|
|
184
232
|
|
|
185
233
|
config: StoreConfig = None
|
digitalhub/stores/builder.py
CHANGED
|
@@ -11,7 +11,7 @@ from digitalhub.stores.remote.store import RemoteStore, RemoteStoreConfig
|
|
|
11
11
|
from digitalhub.stores.s3.store import S3Store, S3StoreConfig
|
|
12
12
|
from digitalhub.stores.sql.store import SqlStore, SQLStoreConfig
|
|
13
13
|
from digitalhub.utils.exceptions import StoreError
|
|
14
|
-
from digitalhub.utils.uri_utils import map_uri_scheme
|
|
14
|
+
from digitalhub.utils.uri_utils import SchemeCategory, map_uri_scheme
|
|
15
15
|
|
|
16
16
|
if typing.TYPE_CHECKING:
|
|
17
17
|
from digitalhub.stores._base.store import Store
|
|
@@ -167,7 +167,7 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
167
167
|
ValueError
|
|
168
168
|
If the scheme is not supported.
|
|
169
169
|
"""
|
|
170
|
-
if scheme ==
|
|
170
|
+
if scheme == SchemeCategory.S3.value:
|
|
171
171
|
return StoreParameters(
|
|
172
172
|
name="s3",
|
|
173
173
|
type="s3",
|
|
@@ -178,7 +178,7 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
178
178
|
bucket_name=os.getenv("S3_BUCKET_NAME"), # type: ignore
|
|
179
179
|
),
|
|
180
180
|
)
|
|
181
|
-
if scheme ==
|
|
181
|
+
if scheme == SchemeCategory.SQL.value:
|
|
182
182
|
return StoreParameters(
|
|
183
183
|
name="sql",
|
|
184
184
|
type="sql",
|
|
@@ -191,13 +191,13 @@ def get_env_store_config(scheme: str) -> StoreParameters:
|
|
|
191
191
|
pg_schema=os.getenv("POSTGRES_SCHEMA"), # type: ignore
|
|
192
192
|
),
|
|
193
193
|
)
|
|
194
|
-
if scheme ==
|
|
194
|
+
if scheme == SchemeCategory.REMOTE.value:
|
|
195
195
|
return StoreParameters(
|
|
196
196
|
name="remote",
|
|
197
197
|
type="remote",
|
|
198
198
|
config=RemoteStoreConfig(),
|
|
199
199
|
)
|
|
200
|
-
if scheme ==
|
|
200
|
+
if scheme == SchemeCategory.LOCAL.value:
|
|
201
201
|
return StoreParameters(
|
|
202
202
|
name="local",
|
|
203
203
|
type="local",
|