digitalhub 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +2 -3
- digitalhub/client/_base/api_builder.py +1 -1
- digitalhub/client/_base/client.py +25 -2
- digitalhub/client/_base/params_builder.py +16 -0
- digitalhub/client/dhcore/api_builder.py +10 -4
- digitalhub/client/dhcore/client.py +30 -398
- digitalhub/client/dhcore/configurator.py +361 -0
- digitalhub/client/dhcore/error_parser.py +107 -0
- digitalhub/client/dhcore/models.py +13 -23
- digitalhub/client/dhcore/params_builder.py +178 -0
- digitalhub/client/dhcore/utils.py +4 -44
- digitalhub/client/local/api_builder.py +13 -18
- digitalhub/client/local/client.py +18 -2
- digitalhub/client/local/enums.py +11 -0
- digitalhub/client/local/params_builder.py +116 -0
- digitalhub/configurator/api.py +31 -0
- digitalhub/configurator/configurator.py +195 -0
- digitalhub/configurator/credentials_store.py +65 -0
- digitalhub/configurator/ini_module.py +74 -0
- digitalhub/entities/_base/_base/entity.py +2 -2
- digitalhub/entities/_base/context/entity.py +4 -4
- digitalhub/entities/_base/entity/builder.py +5 -5
- digitalhub/entities/_base/executable/entity.py +2 -2
- digitalhub/entities/_base/material/entity.py +12 -12
- digitalhub/entities/_base/material/status.py +1 -1
- digitalhub/entities/_base/material/utils.py +2 -2
- digitalhub/entities/_base/unversioned/entity.py +2 -2
- digitalhub/entities/_base/versioned/entity.py +2 -2
- digitalhub/entities/_commons/enums.py +2 -0
- digitalhub/entities/_commons/metrics.py +164 -0
- digitalhub/entities/_commons/types.py +5 -0
- digitalhub/entities/_commons/utils.py +2 -2
- digitalhub/entities/_processors/base.py +527 -0
- digitalhub/entities/{_operations/processor.py → _processors/context.py} +212 -837
- digitalhub/entities/_processors/utils.py +158 -0
- digitalhub/entities/artifact/artifact/spec.py +3 -1
- digitalhub/entities/artifact/crud.py +13 -12
- digitalhub/entities/artifact/utils.py +1 -1
- digitalhub/entities/builders.py +6 -18
- digitalhub/entities/dataitem/_base/entity.py +0 -41
- digitalhub/entities/dataitem/crud.py +27 -15
- digitalhub/entities/dataitem/table/entity.py +49 -35
- digitalhub/entities/dataitem/table/models.py +4 -3
- digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +46 -54
- digitalhub/entities/dataitem/utils.py +58 -10
- digitalhub/entities/function/crud.py +9 -9
- digitalhub/entities/model/_base/entity.py +120 -0
- digitalhub/entities/model/_base/spec.py +6 -17
- digitalhub/entities/model/_base/status.py +10 -0
- digitalhub/entities/model/crud.py +13 -12
- digitalhub/entities/model/huggingface/spec.py +9 -4
- digitalhub/entities/model/mlflow/models.py +2 -2
- digitalhub/entities/model/mlflow/spec.py +7 -7
- digitalhub/entities/model/mlflow/utils.py +44 -5
- digitalhub/entities/project/_base/entity.py +317 -9
- digitalhub/entities/project/_base/spec.py +8 -6
- digitalhub/entities/project/crud.py +12 -11
- digitalhub/entities/run/_base/entity.py +103 -6
- digitalhub/entities/run/_base/spec.py +4 -2
- digitalhub/entities/run/_base/status.py +12 -0
- digitalhub/entities/run/crud.py +8 -8
- digitalhub/entities/secret/_base/entity.py +3 -3
- digitalhub/entities/secret/_base/spec.py +4 -2
- digitalhub/entities/secret/crud.py +11 -9
- digitalhub/entities/task/_base/entity.py +4 -4
- digitalhub/entities/task/_base/models.py +51 -40
- digitalhub/entities/task/_base/spec.py +2 -0
- digitalhub/entities/task/_base/utils.py +2 -2
- digitalhub/entities/task/crud.py +12 -8
- digitalhub/entities/workflow/crud.py +9 -9
- digitalhub/factory/utils.py +9 -9
- digitalhub/readers/{_base → data/_base}/builder.py +1 -1
- digitalhub/readers/{_base → data/_base}/reader.py +16 -4
- digitalhub/readers/{api.py → data/api.py} +2 -2
- digitalhub/readers/{factory.py → data/factory.py} +3 -3
- digitalhub/readers/{pandas → data/pandas}/builder.py +2 -2
- digitalhub/readers/{pandas → data/pandas}/reader.py +110 -30
- digitalhub/readers/query/__init__.py +0 -0
- digitalhub/stores/_base/store.py +59 -69
- digitalhub/stores/api.py +8 -33
- digitalhub/stores/builder.py +44 -161
- digitalhub/stores/local/store.py +106 -89
- digitalhub/stores/remote/store.py +86 -11
- digitalhub/stores/s3/configurator.py +108 -0
- digitalhub/stores/s3/enums.py +17 -0
- digitalhub/stores/s3/models.py +21 -0
- digitalhub/stores/s3/store.py +154 -70
- digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
- digitalhub/stores/sql/configurator.py +88 -0
- digitalhub/stores/sql/enums.py +16 -0
- digitalhub/stores/sql/models.py +24 -0
- digitalhub/stores/sql/store.py +106 -85
- digitalhub/{readers/_commons → utils}/enums.py +5 -1
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +8 -7
- digitalhub/utils/generic_utils.py +28 -15
- digitalhub/utils/git_utils.py +16 -9
- digitalhub/utils/types.py +5 -0
- digitalhub/utils/uri_utils.py +2 -2
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/METADATA +25 -31
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/RECORD +108 -99
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/WHEEL +1 -2
- digitalhub/client/dhcore/env.py +0 -23
- digitalhub/entities/_base/project/entity.py +0 -341
- digitalhub-0.9.1.dist-info/top_level.txt +0 -2
- test/local/CRUD/test_artifacts.py +0 -96
- test/local/CRUD/test_dataitems.py +0 -96
- test/local/CRUD/test_models.py +0 -95
- test/local/imports/test_imports.py +0 -66
- test/local/instances/test_validate.py +0 -55
- test/test_crud_functions.py +0 -109
- test/test_crud_runs.py +0 -86
- test/test_crud_tasks.py +0 -81
- test/testkfp.py +0 -37
- test/testkfp_pipeline.py +0 -22
- /digitalhub/{entities/_base/project → configurator}/__init__.py +0 -0
- /digitalhub/entities/{_operations → _processors}/__init__.py +0 -0
- /digitalhub/readers/{_base → data}/__init__.py +0 -0
- /digitalhub/readers/{_commons → data/_base}/__init__.py +0 -0
- /digitalhub/readers/{pandas → data/pandas}/__init__.py +0 -0
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info/licenses}/LICENSE.txt +0 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from digitalhub.client.api import get_client
|
|
6
|
+
from digitalhub.context.api import get_context
|
|
7
|
+
from digitalhub.entities._commons.enums import ApiCategories, BackendOperations, EntityTypes
|
|
8
|
+
from digitalhub.entities._commons.utils import get_project_from_key, parse_entity_key
|
|
9
|
+
from digitalhub.factory.api import build_entity_from_dict
|
|
10
|
+
from digitalhub.utils.exceptions import ContextError, EntityError, EntityNotExistsError
|
|
11
|
+
|
|
12
|
+
if typing.TYPE_CHECKING:
|
|
13
|
+
from digitalhub.client._base.client import Client
|
|
14
|
+
from digitalhub.context.context import Context
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_identifier(
|
|
18
|
+
identifier: str,
|
|
19
|
+
project: str | None = None,
|
|
20
|
+
entity_type: str | None = None,
|
|
21
|
+
entity_kind: str | None = None,
|
|
22
|
+
entity_id: str | None = None,
|
|
23
|
+
) -> tuple[str, str, str | None, str | None, str | None]:
|
|
24
|
+
"""
|
|
25
|
+
Parse entity identifier.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
identifier : str
|
|
30
|
+
Entity key (store://...) or entity name.
|
|
31
|
+
project : str
|
|
32
|
+
Project name.
|
|
33
|
+
entity_type : str
|
|
34
|
+
Entity type.
|
|
35
|
+
entity_id : str
|
|
36
|
+
Entity ID.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
tuple[str, str, str | None, str | None, str | None]
|
|
41
|
+
Project name, entity type, entity kind, entity name, entity ID.
|
|
42
|
+
"""
|
|
43
|
+
if not identifier.startswith("store://"):
|
|
44
|
+
if project is None or entity_type is None:
|
|
45
|
+
raise ValueError("Project and entity type must be specified.")
|
|
46
|
+
return project, entity_type, entity_kind, identifier, entity_id
|
|
47
|
+
return parse_entity_key(identifier)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_context_from_identifier(
|
|
51
|
+
identifier: str,
|
|
52
|
+
project: str | None = None,
|
|
53
|
+
) -> Context:
|
|
54
|
+
"""
|
|
55
|
+
Get context from project.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
identifier : str
|
|
60
|
+
Entity key (store://...) or entity name.
|
|
61
|
+
project : str
|
|
62
|
+
Project name.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
Context
|
|
67
|
+
Context.
|
|
68
|
+
"""
|
|
69
|
+
if not identifier.startswith("store://"):
|
|
70
|
+
if project is None:
|
|
71
|
+
raise EntityError("Specify project if you do not specify entity key.")
|
|
72
|
+
else:
|
|
73
|
+
project = get_project_from_key(identifier)
|
|
74
|
+
|
|
75
|
+
return get_context_from_project(project)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_context_from_project(
|
|
79
|
+
project: str,
|
|
80
|
+
) -> Context:
|
|
81
|
+
"""
|
|
82
|
+
Check if the given project is in the context.
|
|
83
|
+
Otherwise try to get the project from remote.
|
|
84
|
+
Finally return the client.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
project : str
|
|
89
|
+
Project name.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
Context
|
|
94
|
+
Context.
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
return get_context(project)
|
|
98
|
+
except ContextError:
|
|
99
|
+
return get_context_from_remote(project)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_context_from_remote(
|
|
103
|
+
project: str,
|
|
104
|
+
) -> Client:
|
|
105
|
+
"""
|
|
106
|
+
Get context from remote.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
project : str
|
|
111
|
+
Project name.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
Client
|
|
116
|
+
Client.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
client = get_client()
|
|
120
|
+
obj = _read_base_entity(client, EntityTypes.PROJECT.value, project)
|
|
121
|
+
build_entity_from_dict(obj)
|
|
122
|
+
return get_context(project)
|
|
123
|
+
except EntityNotExistsError:
|
|
124
|
+
raise ContextError(f"Project '{project}' not found.")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _read_base_entity(
|
|
128
|
+
client: Client,
|
|
129
|
+
entity_type: str,
|
|
130
|
+
entity_name: str,
|
|
131
|
+
**kwargs,
|
|
132
|
+
) -> dict:
|
|
133
|
+
"""
|
|
134
|
+
Read object from backend.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
client : Client
|
|
139
|
+
Client instance.
|
|
140
|
+
entity_type : str
|
|
141
|
+
Entity type.
|
|
142
|
+
entity_name : str
|
|
143
|
+
Entity name.
|
|
144
|
+
**kwargs : dict
|
|
145
|
+
Parameters to pass to the API call.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
dict
|
|
150
|
+
Object instance.
|
|
151
|
+
"""
|
|
152
|
+
api = client.build_api(
|
|
153
|
+
ApiCategories.BASE.value,
|
|
154
|
+
BackendOperations.READ.value,
|
|
155
|
+
entity_type=entity_type,
|
|
156
|
+
entity_name=entity_name,
|
|
157
|
+
)
|
|
158
|
+
return client.read_object(api, **kwargs)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
3
5
|
from digitalhub.entities.artifact._base.spec import ArtifactSpec, ArtifactValidator
|
|
4
6
|
|
|
5
7
|
|
|
@@ -23,5 +25,5 @@ class ArtifactValidatorArtifact(ArtifactValidator):
|
|
|
23
25
|
ArtifactValidatorArtifact validator.
|
|
24
26
|
"""
|
|
25
27
|
|
|
26
|
-
src_path: str = None
|
|
28
|
+
src_path: Optional[str] = None
|
|
27
29
|
"""Source path of the artifact."""
|
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
-
from digitalhub.entities.
|
|
6
|
+
from digitalhub.entities._processors.context import context_processor
|
|
7
7
|
from digitalhub.entities.artifact._base.entity import Artifact
|
|
8
8
|
from digitalhub.entities.artifact.utils import eval_source, process_kwargs
|
|
9
|
+
from digitalhub.utils.types import SourcesOrListOfSources
|
|
9
10
|
|
|
10
11
|
if typing.TYPE_CHECKING:
|
|
11
12
|
from digitalhub.entities.artifact._base.entity import Artifact
|
|
@@ -61,7 +62,7 @@ def new_artifact(
|
|
|
61
62
|
>>> kind="artifact",
|
|
62
63
|
>>> path="s3://my-bucket/my-key")
|
|
63
64
|
"""
|
|
64
|
-
return
|
|
65
|
+
return context_processor.create_context_entity(
|
|
65
66
|
project=project,
|
|
66
67
|
name=name,
|
|
67
68
|
kind=kind,
|
|
@@ -78,7 +79,7 @@ def log_artifact(
|
|
|
78
79
|
project: str,
|
|
79
80
|
name: str,
|
|
80
81
|
kind: str,
|
|
81
|
-
source:
|
|
82
|
+
source: SourcesOrListOfSources,
|
|
82
83
|
path: str | None = None,
|
|
83
84
|
**kwargs,
|
|
84
85
|
) -> Artifact:
|
|
@@ -93,7 +94,7 @@ def log_artifact(
|
|
|
93
94
|
Object name.
|
|
94
95
|
kind : str
|
|
95
96
|
Kind the object.
|
|
96
|
-
source :
|
|
97
|
+
source : SourcesOrListOfSources
|
|
97
98
|
Artifact location on local path.
|
|
98
99
|
path : str
|
|
99
100
|
Destination path of the artifact. If not provided, it's generated.
|
|
@@ -114,7 +115,7 @@ def log_artifact(
|
|
|
114
115
|
"""
|
|
115
116
|
eval_source(source)
|
|
116
117
|
kwargs = process_kwargs(project, name, source=source, path=path, **kwargs)
|
|
117
|
-
return
|
|
118
|
+
return context_processor.log_material_entity(
|
|
118
119
|
source=source,
|
|
119
120
|
project=project,
|
|
120
121
|
name=name,
|
|
@@ -158,7 +159,7 @@ def get_artifact(
|
|
|
158
159
|
>>> project="my-project",
|
|
159
160
|
>>> entity_id="my-artifact-id")
|
|
160
161
|
"""
|
|
161
|
-
return
|
|
162
|
+
return context_processor.read_context_entity(
|
|
162
163
|
identifier=identifier,
|
|
163
164
|
entity_type=ENTITY_TYPE,
|
|
164
165
|
project=project,
|
|
@@ -198,7 +199,7 @@ def get_artifact_versions(
|
|
|
198
199
|
>>> obj = get_artifact_versions("my-artifact-name"
|
|
199
200
|
>>> project="my-project")
|
|
200
201
|
"""
|
|
201
|
-
return
|
|
202
|
+
return context_processor.read_context_entity_versions(
|
|
202
203
|
identifier=identifier,
|
|
203
204
|
entity_type=ENTITY_TYPE,
|
|
204
205
|
project=project,
|
|
@@ -226,7 +227,7 @@ def list_artifacts(project: str, **kwargs) -> list[Artifact]:
|
|
|
226
227
|
--------
|
|
227
228
|
>>> objs = list_artifacts(project="my-project")
|
|
228
229
|
"""
|
|
229
|
-
return
|
|
230
|
+
return context_processor.list_context_entities(
|
|
230
231
|
project=project,
|
|
231
232
|
entity_type=ENTITY_TYPE,
|
|
232
233
|
**kwargs,
|
|
@@ -251,7 +252,7 @@ def import_artifact(file: str) -> Artifact:
|
|
|
251
252
|
--------
|
|
252
253
|
>>> obj = import_artifact("my-artifact.yaml")
|
|
253
254
|
"""
|
|
254
|
-
return
|
|
255
|
+
return context_processor.import_context_entity(file)
|
|
255
256
|
|
|
256
257
|
|
|
257
258
|
def load_artifact(file: str) -> Artifact:
|
|
@@ -272,7 +273,7 @@ def load_artifact(file: str) -> Artifact:
|
|
|
272
273
|
--------
|
|
273
274
|
>>> obj = load_artifact("my-artifact.yaml")
|
|
274
275
|
"""
|
|
275
|
-
return
|
|
276
|
+
return context_processor.load_context_entity(file)
|
|
276
277
|
|
|
277
278
|
|
|
278
279
|
def update_artifact(entity: Artifact) -> Artifact:
|
|
@@ -293,7 +294,7 @@ def update_artifact(entity: Artifact) -> Artifact:
|
|
|
293
294
|
--------
|
|
294
295
|
>>> obj = update_artifact(obj)
|
|
295
296
|
"""
|
|
296
|
-
return
|
|
297
|
+
return context_processor.update_context_entity(
|
|
297
298
|
project=entity.project,
|
|
298
299
|
entity_type=entity.ENTITY_TYPE,
|
|
299
300
|
entity_id=entity.id,
|
|
@@ -339,7 +340,7 @@ def delete_artifact(
|
|
|
339
340
|
>>> project="my-project",
|
|
340
341
|
>>> delete_all_versions=True)
|
|
341
342
|
"""
|
|
342
|
-
return
|
|
343
|
+
return context_processor.delete_context_entity(
|
|
343
344
|
identifier=identifier,
|
|
344
345
|
entity_type=ENTITY_TYPE,
|
|
345
346
|
project=project,
|
|
@@ -56,7 +56,7 @@ def process_kwargs(
|
|
|
56
56
|
if path is None:
|
|
57
57
|
uuid = build_uuid()
|
|
58
58
|
kwargs["uuid"] = uuid
|
|
59
|
-
kwargs["path"] = build_log_path_from_source(project, EntityTypes.
|
|
59
|
+
kwargs["path"] = build_log_path_from_source(project, EntityTypes.ARTIFACT.value, name, uuid, source)
|
|
60
60
|
else:
|
|
61
61
|
kwargs["path"] = path
|
|
62
62
|
return kwargs
|
digitalhub/entities/builders.py
CHANGED
|
@@ -7,7 +7,7 @@ from digitalhub.entities.model.mlflow.builder import ModelModelBuilder
|
|
|
7
7
|
from digitalhub.entities.project._base.builder import ProjectProjectBuilder
|
|
8
8
|
from digitalhub.entities.secret._base.builder import SecretSecretBuilder
|
|
9
9
|
|
|
10
|
-
entity_builders = (
|
|
10
|
+
entity_builders: tuple = (
|
|
11
11
|
(ProjectProjectBuilder.ENTITY_KIND, ProjectProjectBuilder),
|
|
12
12
|
(SecretSecretBuilder.ENTITY_KIND, SecretSecretBuilder),
|
|
13
13
|
(ArtifactArtifactBuilder.ENTITY_KIND, ArtifactArtifactBuilder),
|
|
@@ -17,17 +17,14 @@ entity_builders = (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
##############################
|
|
20
|
-
#
|
|
20
|
+
# Add custom entities here
|
|
21
21
|
##############################
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
try:
|
|
25
25
|
from digitalhub.entities.dataitem.iceberg.builder import DataitemIcebergBuilder
|
|
26
26
|
|
|
27
|
-
entity_builders
|
|
28
|
-
*entity_builders,
|
|
29
|
-
(DataitemIcebergBuilder.ENTITY_KIND, DataitemIcebergBuilder),
|
|
30
|
-
)
|
|
27
|
+
entity_builders += ((DataitemIcebergBuilder.ENTITY_KIND, DataitemIcebergBuilder),)
|
|
31
28
|
except ImportError:
|
|
32
29
|
...
|
|
33
30
|
|
|
@@ -35,29 +32,20 @@ except ImportError:
|
|
|
35
32
|
try:
|
|
36
33
|
from digitalhub.entities.model.model.builder import ModelMlflowBuilder
|
|
37
34
|
|
|
38
|
-
entity_builders
|
|
39
|
-
*entity_builders,
|
|
40
|
-
(ModelMlflowBuilder.ENTITY_KIND, ModelMlflowBuilder),
|
|
41
|
-
)
|
|
35
|
+
entity_builders += ((ModelMlflowBuilder.ENTITY_KIND, ModelMlflowBuilder),)
|
|
42
36
|
except ImportError:
|
|
43
37
|
...
|
|
44
38
|
|
|
45
39
|
try:
|
|
46
40
|
from digitalhub.entities.model.sklearn.builder import ModelSklearnBuilder
|
|
47
41
|
|
|
48
|
-
entity_builders
|
|
49
|
-
*entity_builders,
|
|
50
|
-
(ModelSklearnBuilder.ENTITY_KIND, ModelSklearnBuilder),
|
|
51
|
-
)
|
|
42
|
+
entity_builders += ((ModelSklearnBuilder.ENTITY_KIND, ModelSklearnBuilder),)
|
|
52
43
|
except ImportError:
|
|
53
44
|
...
|
|
54
45
|
|
|
55
46
|
try:
|
|
56
47
|
from digitalhub.entities.model.huggingface.builder import ModelHuggingfaceBuilder
|
|
57
48
|
|
|
58
|
-
entity_builders
|
|
59
|
-
*entity_builders,
|
|
60
|
-
(ModelHuggingfaceBuilder.ENTITY_KIND, ModelHuggingfaceBuilder),
|
|
61
|
-
)
|
|
49
|
+
entity_builders += ((ModelHuggingfaceBuilder.ENTITY_KIND, ModelHuggingfaceBuilder),)
|
|
62
50
|
except ImportError:
|
|
63
51
|
...
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
from digitalhub.entities._base.material.entity import MaterialEntity
|
|
7
6
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
8
|
-
from digitalhub.readers._commons.enums import Extensions
|
|
9
|
-
from digitalhub.utils.exceptions import EntityError
|
|
10
|
-
from digitalhub.utils.uri_utils import has_sql_scheme
|
|
11
7
|
|
|
12
8
|
if typing.TYPE_CHECKING:
|
|
13
9
|
from digitalhub.entities._base.entity.metadata import Metadata
|
|
@@ -36,40 +32,3 @@ class Dataitem(MaterialEntity):
|
|
|
36
32
|
super().__init__(project, name, uuid, kind, metadata, spec, status, user)
|
|
37
33
|
self.spec: DataitemSpec
|
|
38
34
|
self.status: DataitemStatus
|
|
39
|
-
|
|
40
|
-
##############################
|
|
41
|
-
# Helper methods
|
|
42
|
-
##############################
|
|
43
|
-
|
|
44
|
-
@staticmethod
|
|
45
|
-
def _get_extension(path: str, file_format: str | None = None) -> str:
|
|
46
|
-
"""
|
|
47
|
-
Get extension of path.
|
|
48
|
-
|
|
49
|
-
Parameters
|
|
50
|
-
----------
|
|
51
|
-
path : str
|
|
52
|
-
Path to get extension from.
|
|
53
|
-
file_format : str
|
|
54
|
-
File format.
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
str
|
|
59
|
-
File extension.
|
|
60
|
-
|
|
61
|
-
Raises
|
|
62
|
-
------
|
|
63
|
-
EntityError
|
|
64
|
-
If file format is not supported.
|
|
65
|
-
"""
|
|
66
|
-
if file_format is not None:
|
|
67
|
-
return file_format
|
|
68
|
-
|
|
69
|
-
if has_sql_scheme(path):
|
|
70
|
-
return Extensions.PARQUET.value
|
|
71
|
-
|
|
72
|
-
ext = Path(path).suffix[1:]
|
|
73
|
-
if ext is not None:
|
|
74
|
-
return ext
|
|
75
|
-
raise EntityError("Unknown file format. Only csv and parquet are supported.")
|
|
@@ -4,8 +4,9 @@ import typing
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
7
|
-
from digitalhub.entities.
|
|
8
|
-
from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_source, post_process, process_kwargs
|
|
7
|
+
from digitalhub.entities._processors.context import context_processor
|
|
8
|
+
from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_data, eval_source, post_process, process_kwargs
|
|
9
|
+
from digitalhub.utils.types import SourcesOrListOfSources
|
|
9
10
|
|
|
10
11
|
if typing.TYPE_CHECKING:
|
|
11
12
|
from digitalhub.entities.dataitem._base.entity import Dataitem
|
|
@@ -61,7 +62,7 @@ def new_dataitem(
|
|
|
61
62
|
>>> kind="dataitem",
|
|
62
63
|
>>> path="s3://my-bucket/my-key")
|
|
63
64
|
"""
|
|
64
|
-
return
|
|
65
|
+
return context_processor.create_context_entity(
|
|
65
66
|
project=project,
|
|
66
67
|
name=name,
|
|
67
68
|
kind=kind,
|
|
@@ -78,9 +79,11 @@ def log_dataitem(
|
|
|
78
79
|
project: str,
|
|
79
80
|
name: str,
|
|
80
81
|
kind: str,
|
|
81
|
-
source:
|
|
82
|
+
source: SourcesOrListOfSources | None = None,
|
|
82
83
|
data: Any | None = None,
|
|
83
84
|
path: str | None = None,
|
|
85
|
+
file_format: str | None = None,
|
|
86
|
+
engine: str | None = "pandas",
|
|
84
87
|
**kwargs,
|
|
85
88
|
) -> Dataitem:
|
|
86
89
|
"""
|
|
@@ -94,12 +97,16 @@ def log_dataitem(
|
|
|
94
97
|
Object name.
|
|
95
98
|
kind : str
|
|
96
99
|
Kind the object.
|
|
97
|
-
source :
|
|
100
|
+
source : SourcesOrListOfSources
|
|
98
101
|
Dataitem location on local path.
|
|
99
102
|
data : Any
|
|
100
103
|
Dataframe to log. Alternative to source.
|
|
101
104
|
path : str
|
|
102
105
|
Destination path of the dataitem. If not provided, it's generated.
|
|
106
|
+
file_format : str
|
|
107
|
+
Extension of the file.
|
|
108
|
+
engine : str
|
|
109
|
+
Dataframe engine (pandas, polars, etc.).
|
|
103
110
|
**kwargs : dict
|
|
104
111
|
New dataitem spec parameters.
|
|
105
112
|
|
|
@@ -115,17 +122,22 @@ def log_dataitem(
|
|
|
115
122
|
>>> kind="table",
|
|
116
123
|
>>> data=df)
|
|
117
124
|
"""
|
|
125
|
+
cleanup = False
|
|
126
|
+
if data is not None:
|
|
127
|
+
cleanup = True
|
|
128
|
+
|
|
118
129
|
source = eval_source(source, data, kind, name, project)
|
|
130
|
+
data = eval_data(project, kind, source, data, file_format, engine)
|
|
119
131
|
kwargs = process_kwargs(project, name, kind, source=source, data=data, path=path, **kwargs)
|
|
120
|
-
obj =
|
|
132
|
+
obj = context_processor.log_material_entity(
|
|
121
133
|
source=source,
|
|
122
134
|
project=project,
|
|
123
135
|
name=name,
|
|
124
136
|
kind=kind,
|
|
125
137
|
**kwargs,
|
|
126
138
|
)
|
|
127
|
-
|
|
128
|
-
|
|
139
|
+
obj = post_process(obj, data)
|
|
140
|
+
if cleanup:
|
|
129
141
|
clean_tmp_path(source)
|
|
130
142
|
return obj
|
|
131
143
|
|
|
@@ -165,7 +177,7 @@ def get_dataitem(
|
|
|
165
177
|
>>> project="my-project",
|
|
166
178
|
>>> entity_id="my-dataitem-id")
|
|
167
179
|
"""
|
|
168
|
-
return
|
|
180
|
+
return context_processor.read_context_entity(
|
|
169
181
|
identifier=identifier,
|
|
170
182
|
entity_type=ENTITY_TYPE,
|
|
171
183
|
project=project,
|
|
@@ -205,7 +217,7 @@ def get_dataitem_versions(
|
|
|
205
217
|
>>> objs = get_dataitem_versions("my-dataitem-name",
|
|
206
218
|
>>> project="my-project")
|
|
207
219
|
"""
|
|
208
|
-
return
|
|
220
|
+
return context_processor.read_context_entity_versions(
|
|
209
221
|
identifier=identifier,
|
|
210
222
|
entity_type=ENTITY_TYPE,
|
|
211
223
|
project=project,
|
|
@@ -233,7 +245,7 @@ def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
|
|
|
233
245
|
--------
|
|
234
246
|
>>> objs = list_dataitems(project="my-project")
|
|
235
247
|
"""
|
|
236
|
-
return
|
|
248
|
+
return context_processor.list_context_entities(
|
|
237
249
|
project=project,
|
|
238
250
|
entity_type=ENTITY_TYPE,
|
|
239
251
|
**kwargs,
|
|
@@ -258,7 +270,7 @@ def import_dataitem(file: str) -> Dataitem:
|
|
|
258
270
|
--------
|
|
259
271
|
>>> obj = import_dataitem("my-dataitem.yaml")
|
|
260
272
|
"""
|
|
261
|
-
return
|
|
273
|
+
return context_processor.import_context_entity(file)
|
|
262
274
|
|
|
263
275
|
|
|
264
276
|
def load_dataitem(file: str) -> Dataitem:
|
|
@@ -279,7 +291,7 @@ def load_dataitem(file: str) -> Dataitem:
|
|
|
279
291
|
--------
|
|
280
292
|
>>> obj = load_dataitem("my-dataitem.yaml")
|
|
281
293
|
"""
|
|
282
|
-
return
|
|
294
|
+
return context_processor.load_context_entity(file)
|
|
283
295
|
|
|
284
296
|
|
|
285
297
|
def update_dataitem(entity: Dataitem) -> Dataitem:
|
|
@@ -300,7 +312,7 @@ def update_dataitem(entity: Dataitem) -> Dataitem:
|
|
|
300
312
|
--------
|
|
301
313
|
>>> obj = update_dataitem(obj)
|
|
302
314
|
"""
|
|
303
|
-
return
|
|
315
|
+
return context_processor.update_context_entity(
|
|
304
316
|
project=entity.project,
|
|
305
317
|
entity_type=entity.ENTITY_TYPE,
|
|
306
318
|
entity_id=entity.id,
|
|
@@ -346,7 +358,7 @@ def delete_dataitem(
|
|
|
346
358
|
>>> project="my-project",
|
|
347
359
|
>>> delete_all_versions=True)
|
|
348
360
|
"""
|
|
349
|
-
return
|
|
361
|
+
return context_processor.delete_context_entity(
|
|
350
362
|
identifier=identifier,
|
|
351
363
|
entity_type=ENTITY_TYPE,
|
|
352
364
|
project=project,
|
|
@@ -7,7 +7,7 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
from digitalhub.entities.dataitem._base.entity import Dataitem
|
|
9
9
|
from digitalhub.stores.api import get_store
|
|
10
|
-
from digitalhub.utils.uri_utils import
|
|
10
|
+
from digitalhub.utils.uri_utils import has_sql_scheme
|
|
11
11
|
|
|
12
12
|
if typing.TYPE_CHECKING:
|
|
13
13
|
from digitalhub.entities._base.entity.metadata import Metadata
|
|
@@ -36,19 +36,39 @@ class DataitemTable(Dataitem):
|
|
|
36
36
|
self.spec: DataitemSpecTable
|
|
37
37
|
self.status: DataitemStatusTable
|
|
38
38
|
|
|
39
|
+
self._query: str | None = None
|
|
40
|
+
|
|
41
|
+
def query(self, query: str) -> DataitemTable:
|
|
42
|
+
"""
|
|
43
|
+
Set query to execute.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
query : str
|
|
48
|
+
Query to execute.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
DataitemTable
|
|
53
|
+
Self object.
|
|
54
|
+
"""
|
|
55
|
+
# to remove in future
|
|
56
|
+
if not has_sql_scheme(self.spec.path):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Dataitem path is not a SQL scheme: {self.spec.path}",
|
|
59
|
+
" Query can be made only on a SQL scheme.",
|
|
60
|
+
)
|
|
61
|
+
self._query = query
|
|
62
|
+
return self
|
|
63
|
+
|
|
39
64
|
def as_df(
|
|
40
65
|
self,
|
|
41
66
|
file_format: str | None = None,
|
|
42
|
-
engine: str | None =
|
|
43
|
-
clean_tmp_path: bool = True,
|
|
67
|
+
engine: str | None = "pandas",
|
|
44
68
|
**kwargs,
|
|
45
69
|
) -> Any:
|
|
46
70
|
"""
|
|
47
71
|
Read dataitem file (csv or parquet) as a DataFrame from spec.path.
|
|
48
|
-
If the dataitem is not local, it will be downloaded to a temporary
|
|
49
|
-
folder named tmp_dir in the project context folder.
|
|
50
|
-
If clean_tmp_path is True, the temporary folder will be deleted after the
|
|
51
|
-
method is executed.
|
|
52
72
|
It's possible to pass additional arguments to the this function. These
|
|
53
73
|
keyword arguments will be passed to the DataFrame reader function such as
|
|
54
74
|
pandas's read_csv or read_parquet.
|
|
@@ -56,11 +76,10 @@ class DataitemTable(Dataitem):
|
|
|
56
76
|
Parameters
|
|
57
77
|
----------
|
|
58
78
|
file_format : str
|
|
59
|
-
Format of the file.
|
|
79
|
+
Format of the file to read. By default, it will be inferred from
|
|
80
|
+
the extension of the file.
|
|
60
81
|
engine : str
|
|
61
82
|
Dataframe framework, by default pandas.
|
|
62
|
-
clean_tmp_path : bool
|
|
63
|
-
If True, the temporary folder will be deleted.
|
|
64
83
|
**kwargs : dict
|
|
65
84
|
Keyword arguments passed to the read_df function.
|
|
66
85
|
|
|
@@ -69,30 +88,20 @@ class DataitemTable(Dataitem):
|
|
|
69
88
|
Any
|
|
70
89
|
DataFrame.
|
|
71
90
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
extension = self._get_extension(checker, file_format)
|
|
88
|
-
return get_store("").read_df(data_path, extension, engine, **kwargs)
|
|
89
|
-
|
|
90
|
-
except Exception as e:
|
|
91
|
-
raise e
|
|
92
|
-
|
|
93
|
-
finally:
|
|
94
|
-
# Delete tmp folder
|
|
95
|
-
self._clean_tmp_path(tmp_dir, clean_tmp_path)
|
|
91
|
+
if self._query is not None:
|
|
92
|
+
df = get_store(self.project, self.spec.path).query(
|
|
93
|
+
self._query,
|
|
94
|
+
self.spec.path,
|
|
95
|
+
engine,
|
|
96
|
+
)
|
|
97
|
+
self._query = None
|
|
98
|
+
return df
|
|
99
|
+
return get_store(self.project, self.spec.path).read_df(
|
|
100
|
+
self.spec.path,
|
|
101
|
+
file_format,
|
|
102
|
+
engine,
|
|
103
|
+
**kwargs,
|
|
104
|
+
)
|
|
96
105
|
|
|
97
106
|
def write_df(
|
|
98
107
|
self,
|
|
@@ -119,7 +128,12 @@ class DataitemTable(Dataitem):
|
|
|
119
128
|
str
|
|
120
129
|
Path to the written dataframe.
|
|
121
130
|
"""
|
|
122
|
-
return get_store(self.
|
|
131
|
+
return get_store(self.project, self.spec.path).write_df(
|
|
132
|
+
df,
|
|
133
|
+
self.spec.path,
|
|
134
|
+
extension=extension,
|
|
135
|
+
**kwargs,
|
|
136
|
+
)
|
|
123
137
|
|
|
124
138
|
@staticmethod
|
|
125
139
|
def _clean_tmp_path(pth: Path | None, clean: bool) -> None:
|