digitalhub 0.8.0b0__py3-none-any.whl → 0.8.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +62 -94
- digitalhub/client/__init__.py +0 -0
- digitalhub/client/builder.py +105 -0
- digitalhub/client/objects/__init__.py +0 -0
- digitalhub/client/objects/base.py +56 -0
- digitalhub/client/objects/dhcore.py +681 -0
- digitalhub/client/objects/local.py +533 -0
- digitalhub/context/__init__.py +0 -0
- digitalhub/context/builder.py +178 -0
- digitalhub/context/context.py +136 -0
- digitalhub/datastores/__init__.py +0 -0
- digitalhub/datastores/builder.py +134 -0
- digitalhub/datastores/objects/__init__.py +0 -0
- digitalhub/datastores/objects/base.py +85 -0
- digitalhub/datastores/objects/local.py +42 -0
- digitalhub/datastores/objects/remote.py +23 -0
- digitalhub/datastores/objects/s3.py +38 -0
- digitalhub/datastores/objects/sql.py +60 -0
- digitalhub/entities/__init__.py +0 -0
- digitalhub/entities/_base/__init__.py +0 -0
- digitalhub/entities/_base/api.py +346 -0
- digitalhub/entities/_base/base.py +82 -0
- digitalhub/entities/_base/crud.py +610 -0
- digitalhub/entities/_base/entity/__init__.py +0 -0
- digitalhub/entities/_base/entity/base.py +132 -0
- digitalhub/entities/_base/entity/context.py +118 -0
- digitalhub/entities/_base/entity/executable.py +380 -0
- digitalhub/entities/_base/entity/material.py +214 -0
- digitalhub/entities/_base/entity/unversioned.py +87 -0
- digitalhub/entities/_base/entity/versioned.py +94 -0
- digitalhub/entities/_base/metadata.py +59 -0
- digitalhub/entities/_base/spec/__init__.py +0 -0
- digitalhub/entities/_base/spec/base.py +58 -0
- digitalhub/entities/_base/spec/material.py +22 -0
- digitalhub/entities/_base/state.py +31 -0
- digitalhub/entities/_base/status/__init__.py +0 -0
- digitalhub/entities/_base/status/base.py +32 -0
- digitalhub/entities/_base/status/material.py +49 -0
- digitalhub/entities/_builders/__init__.py +0 -0
- digitalhub/entities/_builders/metadata.py +60 -0
- digitalhub/entities/_builders/name.py +31 -0
- digitalhub/entities/_builders/spec.py +43 -0
- digitalhub/entities/_builders/status.py +62 -0
- digitalhub/entities/_builders/uuid.py +33 -0
- digitalhub/entities/artifact/__init__.py +0 -0
- digitalhub/entities/artifact/builder.py +133 -0
- digitalhub/entities/artifact/crud.py +358 -0
- digitalhub/entities/artifact/entity/__init__.py +0 -0
- digitalhub/entities/artifact/entity/_base.py +39 -0
- digitalhub/entities/artifact/entity/artifact.py +9 -0
- digitalhub/entities/artifact/spec.py +39 -0
- digitalhub/entities/artifact/status.py +15 -0
- digitalhub/entities/dataitem/__init__.py +0 -0
- digitalhub/entities/dataitem/builder.py +144 -0
- digitalhub/entities/dataitem/crud.py +395 -0
- digitalhub/entities/dataitem/entity/__init__.py +0 -0
- digitalhub/entities/dataitem/entity/_base.py +75 -0
- digitalhub/entities/dataitem/entity/dataitem.py +9 -0
- digitalhub/entities/dataitem/entity/iceberg.py +7 -0
- digitalhub/entities/dataitem/entity/table.py +125 -0
- digitalhub/entities/dataitem/models.py +62 -0
- digitalhub/entities/dataitem/spec.py +61 -0
- digitalhub/entities/dataitem/status.py +38 -0
- digitalhub/entities/entity_types.py +19 -0
- digitalhub/entities/function/__init__.py +0 -0
- digitalhub/entities/function/builder.py +86 -0
- digitalhub/entities/function/crud.py +305 -0
- digitalhub/entities/function/entity.py +101 -0
- digitalhub/entities/function/models.py +118 -0
- digitalhub/entities/function/spec.py +81 -0
- digitalhub/entities/function/status.py +9 -0
- digitalhub/entities/model/__init__.py +0 -0
- digitalhub/entities/model/builder.py +152 -0
- digitalhub/entities/model/crud.py +358 -0
- digitalhub/entities/model/entity/__init__.py +0 -0
- digitalhub/entities/model/entity/_base.py +34 -0
- digitalhub/entities/model/entity/huggingface.py +9 -0
- digitalhub/entities/model/entity/mlflow.py +90 -0
- digitalhub/entities/model/entity/model.py +9 -0
- digitalhub/entities/model/entity/sklearn.py +9 -0
- digitalhub/entities/model/models.py +26 -0
- digitalhub/entities/model/spec.py +146 -0
- digitalhub/entities/model/status.py +33 -0
- digitalhub/entities/project/__init__.py +0 -0
- digitalhub/entities/project/builder.py +82 -0
- digitalhub/entities/project/crud.py +350 -0
- digitalhub/entities/project/entity.py +2060 -0
- digitalhub/entities/project/spec.py +50 -0
- digitalhub/entities/project/status.py +9 -0
- digitalhub/entities/registries.py +48 -0
- digitalhub/entities/run/__init__.py +0 -0
- digitalhub/entities/run/builder.py +77 -0
- digitalhub/entities/run/crud.py +232 -0
- digitalhub/entities/run/entity.py +461 -0
- digitalhub/entities/run/spec.py +153 -0
- digitalhub/entities/run/status.py +114 -0
- digitalhub/entities/secret/__init__.py +0 -0
- digitalhub/entities/secret/builder.py +93 -0
- digitalhub/entities/secret/crud.py +294 -0
- digitalhub/entities/secret/entity.py +73 -0
- digitalhub/entities/secret/spec.py +35 -0
- digitalhub/entities/secret/status.py +9 -0
- digitalhub/entities/task/__init__.py +0 -0
- digitalhub/entities/task/builder.py +74 -0
- digitalhub/entities/task/crud.py +241 -0
- digitalhub/entities/task/entity.py +135 -0
- digitalhub/entities/task/models.py +199 -0
- digitalhub/entities/task/spec.py +51 -0
- digitalhub/entities/task/status.py +9 -0
- digitalhub/entities/utils.py +184 -0
- digitalhub/entities/workflow/__init__.py +0 -0
- digitalhub/entities/workflow/builder.py +91 -0
- digitalhub/entities/workflow/crud.py +304 -0
- digitalhub/entities/workflow/entity.py +77 -0
- digitalhub/entities/workflow/spec.py +15 -0
- digitalhub/entities/workflow/status.py +9 -0
- digitalhub/readers/__init__.py +0 -0
- digitalhub/readers/builder.py +54 -0
- digitalhub/readers/objects/__init__.py +0 -0
- digitalhub/readers/objects/base.py +70 -0
- digitalhub/readers/objects/pandas.py +207 -0
- digitalhub/readers/registry.py +15 -0
- digitalhub/registry/__init__.py +0 -0
- digitalhub/registry/models.py +87 -0
- digitalhub/registry/registry.py +74 -0
- digitalhub/registry/utils.py +150 -0
- digitalhub/runtimes/__init__.py +0 -0
- digitalhub/runtimes/base.py +164 -0
- digitalhub/runtimes/builder.py +53 -0
- digitalhub/runtimes/kind_registry.py +170 -0
- digitalhub/stores/__init__.py +0 -0
- digitalhub/stores/builder.py +257 -0
- digitalhub/stores/objects/__init__.py +0 -0
- digitalhub/stores/objects/base.py +189 -0
- digitalhub/stores/objects/local.py +230 -0
- digitalhub/stores/objects/remote.py +143 -0
- digitalhub/stores/objects/s3.py +563 -0
- digitalhub/stores/objects/sql.py +328 -0
- digitalhub/utils/__init__.py +0 -0
- digitalhub/utils/data_utils.py +127 -0
- digitalhub/utils/env_utils.py +123 -0
- digitalhub/utils/exceptions.py +55 -0
- digitalhub/utils/file_utils.py +204 -0
- digitalhub/utils/generic_utils.py +207 -0
- digitalhub/utils/git_utils.py +148 -0
- digitalhub/utils/io_utils.py +79 -0
- digitalhub/utils/logger.py +17 -0
- digitalhub/utils/uri_utils.py +56 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b1.dist-info}/METADATA +27 -12
- digitalhub-0.8.0b1.dist-info/RECORD +161 -0
- test/test_crud_artifacts.py +1 -1
- test/test_crud_dataitems.py +1 -1
- test/test_crud_functions.py +1 -1
- test/test_crud_runs.py +1 -1
- test/test_crud_tasks.py +1 -1
- digitalhub-0.8.0b0.dist-info/RECORD +0 -14
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b1.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b1.dist-info}/WHEEL +0 -0
- {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
from digitalhub.context.builder import check_context
|
|
9
|
+
from digitalhub.entities._base.crud import (
|
|
10
|
+
delete_entity_api_ctx,
|
|
11
|
+
list_entity_api_ctx,
|
|
12
|
+
read_entity_api_ctx,
|
|
13
|
+
read_entity_api_ctx_versions,
|
|
14
|
+
)
|
|
15
|
+
from digitalhub.entities._builders.uuid import build_uuid
|
|
16
|
+
from digitalhub.entities.dataitem.builder import dataitem_from_dict, dataitem_from_parameters
|
|
17
|
+
from digitalhub.entities.entity_types import EntityTypes
|
|
18
|
+
from digitalhub.entities.utils import build_log_path_from_filename, build_log_path_from_source, eval_local_source
|
|
19
|
+
from digitalhub.readers.builder import get_reader_by_object
|
|
20
|
+
from digitalhub.stores.builder import get_store
|
|
21
|
+
from digitalhub.utils.exceptions import EntityAlreadyExistsError
|
|
22
|
+
from digitalhub.utils.io_utils import read_yaml
|
|
23
|
+
|
|
24
|
+
if typing.TYPE_CHECKING:
|
|
25
|
+
from digitalhub.entities.dataitem.entity._base import Dataitem
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
ENTITY_TYPE = EntityTypes.DATAITEM.value
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def new_dataitem(
|
|
32
|
+
project: str,
|
|
33
|
+
name: str,
|
|
34
|
+
kind: str,
|
|
35
|
+
uuid: str | None = None,
|
|
36
|
+
description: str | None = None,
|
|
37
|
+
labels: list[str] | None = None,
|
|
38
|
+
embedded: bool = True,
|
|
39
|
+
path: str | None = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> Dataitem:
|
|
42
|
+
"""
|
|
43
|
+
Create a new object.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
project : str
|
|
48
|
+
Project name.
|
|
49
|
+
name : str
|
|
50
|
+
Object name.
|
|
51
|
+
kind : str
|
|
52
|
+
Kind the object.
|
|
53
|
+
uuid : str
|
|
54
|
+
ID of the object (UUID4, e.g. 40f25c4b-d26b-4221-b048-9527aff291e2).
|
|
55
|
+
description : str
|
|
56
|
+
Description of the object (human readable).
|
|
57
|
+
labels : list[str]
|
|
58
|
+
List of labels.
|
|
59
|
+
embedded : bool
|
|
60
|
+
Flag to determine if object spec must be embedded in project spec.
|
|
61
|
+
path : str
|
|
62
|
+
Object path on local file system or remote storage. It is also the destination path of upload() method.
|
|
63
|
+
**kwargs : dict
|
|
64
|
+
Spec keyword arguments.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
Dataitem
|
|
69
|
+
Object instance.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> obj = new_dataitem(project="my-project",
|
|
74
|
+
>>> name="my-dataitem",
|
|
75
|
+
>>> kind="dataitem",
|
|
76
|
+
>>> path="s3://my-bucket/my-key")
|
|
77
|
+
"""
|
|
78
|
+
check_context(project)
|
|
79
|
+
obj = dataitem_from_parameters(
|
|
80
|
+
project=project,
|
|
81
|
+
name=name,
|
|
82
|
+
kind=kind,
|
|
83
|
+
uuid=uuid,
|
|
84
|
+
description=description,
|
|
85
|
+
labels=labels,
|
|
86
|
+
embedded=embedded,
|
|
87
|
+
path=path,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
obj.save()
|
|
91
|
+
return obj
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def log_dataitem(
|
|
95
|
+
project: str,
|
|
96
|
+
name: str,
|
|
97
|
+
kind: str,
|
|
98
|
+
source: list[str] | str | None = None,
|
|
99
|
+
data: Any | None = None,
|
|
100
|
+
extension: str | None = None,
|
|
101
|
+
path: str | None = None,
|
|
102
|
+
**kwargs,
|
|
103
|
+
) -> Dataitem:
|
|
104
|
+
"""
|
|
105
|
+
Log a dataitem to the project.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
project : str
|
|
110
|
+
Project name.
|
|
111
|
+
name : str
|
|
112
|
+
Object name.
|
|
113
|
+
kind : str
|
|
114
|
+
Kind the object.
|
|
115
|
+
source : str
|
|
116
|
+
Dataitem location on local path.
|
|
117
|
+
data : Any
|
|
118
|
+
Dataframe to log. Alternative to source.
|
|
119
|
+
extension : str
|
|
120
|
+
Extension of the output dataframe.
|
|
121
|
+
path : str
|
|
122
|
+
Destination path of the dataitem. If not provided, it's generated.
|
|
123
|
+
**kwargs : dict
|
|
124
|
+
New dataitem spec parameters.
|
|
125
|
+
|
|
126
|
+
Returns
|
|
127
|
+
-------
|
|
128
|
+
Dataitem
|
|
129
|
+
Object instance.
|
|
130
|
+
|
|
131
|
+
Examples
|
|
132
|
+
--------
|
|
133
|
+
>>> obj = log_dataitem(project="my-project",
|
|
134
|
+
>>> name="my-dataitem",
|
|
135
|
+
>>> kind="table",
|
|
136
|
+
>>> data=df)
|
|
137
|
+
"""
|
|
138
|
+
if (source is None) == (data is None):
|
|
139
|
+
raise ValueError("You must provide source or data.")
|
|
140
|
+
|
|
141
|
+
# Case where source is provided
|
|
142
|
+
if source is not None:
|
|
143
|
+
eval_local_source(source)
|
|
144
|
+
|
|
145
|
+
if path is None:
|
|
146
|
+
uuid = build_uuid()
|
|
147
|
+
kwargs["uuid"] = uuid
|
|
148
|
+
path = build_log_path_from_source(project, ENTITY_TYPE, name, uuid, source)
|
|
149
|
+
|
|
150
|
+
obj = new_dataitem(project=project, name=name, kind=kind, path=path, **kwargs)
|
|
151
|
+
obj.upload(source)
|
|
152
|
+
|
|
153
|
+
# Case where data is provided
|
|
154
|
+
else:
|
|
155
|
+
if path is None:
|
|
156
|
+
uuid = build_uuid()
|
|
157
|
+
kwargs["uuid"] = uuid
|
|
158
|
+
path = build_log_path_from_filename(project, ENTITY_TYPE, name, uuid, "data.parquet")
|
|
159
|
+
|
|
160
|
+
obj = dataitem_from_parameters(project=project, name=name, kind=kind, path=path, **kwargs)
|
|
161
|
+
if kind == "table":
|
|
162
|
+
dst = obj.write_df(df=data, extension=extension)
|
|
163
|
+
reader = get_reader_by_object(data)
|
|
164
|
+
obj.spec.schema = reader.get_schema(data)
|
|
165
|
+
obj.status.preview = reader.get_preview(data)
|
|
166
|
+
store = get_store(obj.spec.path)
|
|
167
|
+
src = Path(urlparse(obj.spec.path).path).name
|
|
168
|
+
paths = [(dst, src)]
|
|
169
|
+
infos = store.get_file_info(paths)
|
|
170
|
+
obj.status.add_files_info(infos)
|
|
171
|
+
obj.save()
|
|
172
|
+
|
|
173
|
+
return obj
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_dataitem(
|
|
177
|
+
identifier: str,
|
|
178
|
+
project: str | None = None,
|
|
179
|
+
entity_id: str | None = None,
|
|
180
|
+
**kwargs,
|
|
181
|
+
) -> Dataitem:
|
|
182
|
+
"""
|
|
183
|
+
Get object from backend.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
identifier : str
|
|
188
|
+
Entity key (store://...) or entity name.
|
|
189
|
+
project : str
|
|
190
|
+
Project name.
|
|
191
|
+
entity_id : str
|
|
192
|
+
Entity ID.
|
|
193
|
+
**kwargs : dict
|
|
194
|
+
Parameters to pass to the API call.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
Dataitem
|
|
199
|
+
Object instance.
|
|
200
|
+
|
|
201
|
+
Examples
|
|
202
|
+
--------
|
|
203
|
+
Using entity key:
|
|
204
|
+
>>> obj = get_dataitem("store://my-dataitem-key")
|
|
205
|
+
|
|
206
|
+
Using entity name:
|
|
207
|
+
>>> obj = get_dataitem("my-dataitem-name"
|
|
208
|
+
>>> project="my-project",
|
|
209
|
+
>>> entity_id="my-dataitem-id")
|
|
210
|
+
"""
|
|
211
|
+
obj = read_entity_api_ctx(
|
|
212
|
+
identifier,
|
|
213
|
+
ENTITY_TYPE,
|
|
214
|
+
project=project,
|
|
215
|
+
entity_id=entity_id,
|
|
216
|
+
**kwargs,
|
|
217
|
+
)
|
|
218
|
+
entity = dataitem_from_dict(obj)
|
|
219
|
+
entity._get_files_info()
|
|
220
|
+
return entity
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_dataitem_versions(
|
|
224
|
+
identifier: str,
|
|
225
|
+
project: str | None = None,
|
|
226
|
+
**kwargs,
|
|
227
|
+
) -> list[Dataitem]:
|
|
228
|
+
"""
|
|
229
|
+
Get object versions from backend.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
identifier : str
|
|
234
|
+
Entity key (store://...) or entity name.
|
|
235
|
+
project : str
|
|
236
|
+
Project name.
|
|
237
|
+
**kwargs : dict
|
|
238
|
+
Parameters to pass to the API call.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
list[Dataitem]
|
|
243
|
+
List of object instances.
|
|
244
|
+
|
|
245
|
+
Examples
|
|
246
|
+
--------
|
|
247
|
+
Using entity key:
|
|
248
|
+
>>> objs = get_dataitem_versions("store://my-dataitem-key")
|
|
249
|
+
|
|
250
|
+
Using entity name:
|
|
251
|
+
>>> objs = get_dataitem_versions("my-dataitem-name",
|
|
252
|
+
>>> project="my-project")
|
|
253
|
+
"""
|
|
254
|
+
objs = read_entity_api_ctx_versions(
|
|
255
|
+
identifier,
|
|
256
|
+
entity_type=ENTITY_TYPE,
|
|
257
|
+
project=project,
|
|
258
|
+
**kwargs,
|
|
259
|
+
)
|
|
260
|
+
objects = []
|
|
261
|
+
for o in objs:
|
|
262
|
+
entity = dataitem_from_dict(o)
|
|
263
|
+
entity._get_files_info()
|
|
264
|
+
objects.append(entity)
|
|
265
|
+
return objects
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
|
|
269
|
+
"""
|
|
270
|
+
List all latest version objects from backend.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
project : str
|
|
275
|
+
Project name.
|
|
276
|
+
**kwargs : dict
|
|
277
|
+
Parameters to pass to the API call.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
list[Dataitem]
|
|
282
|
+
List of object instances.
|
|
283
|
+
|
|
284
|
+
Examples
|
|
285
|
+
--------
|
|
286
|
+
>>> objs = list_dataitems(project="my-project")
|
|
287
|
+
"""
|
|
288
|
+
objs = list_entity_api_ctx(
|
|
289
|
+
project=project,
|
|
290
|
+
entity_type=ENTITY_TYPE,
|
|
291
|
+
**kwargs,
|
|
292
|
+
)
|
|
293
|
+
objects = []
|
|
294
|
+
for o in objs:
|
|
295
|
+
entity = dataitem_from_dict(o)
|
|
296
|
+
entity._get_files_info()
|
|
297
|
+
objects.append(entity)
|
|
298
|
+
return objects
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def import_dataitem(file: str) -> Dataitem:
|
|
302
|
+
"""
|
|
303
|
+
Import object from a YAML file.
|
|
304
|
+
|
|
305
|
+
Parameters
|
|
306
|
+
----------
|
|
307
|
+
file : str
|
|
308
|
+
Path to YAML file.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
Dataitem
|
|
313
|
+
Object instance.
|
|
314
|
+
|
|
315
|
+
Examples
|
|
316
|
+
--------
|
|
317
|
+
>>> obj = import_dataitem("my-dataitem.yaml")
|
|
318
|
+
"""
|
|
319
|
+
dict_obj: dict = read_yaml(file)
|
|
320
|
+
obj = dataitem_from_dict(dict_obj)
|
|
321
|
+
try:
|
|
322
|
+
obj.save()
|
|
323
|
+
except EntityAlreadyExistsError:
|
|
324
|
+
pass
|
|
325
|
+
finally:
|
|
326
|
+
return obj
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def update_dataitem(entity: Dataitem) -> Dataitem:
|
|
330
|
+
"""
|
|
331
|
+
Update object. Note that object spec are immutable.
|
|
332
|
+
|
|
333
|
+
Parameters
|
|
334
|
+
----------
|
|
335
|
+
entity : Dataitem
|
|
336
|
+
Object to update.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
Dataitem
|
|
341
|
+
Entity updated.
|
|
342
|
+
|
|
343
|
+
Examples
|
|
344
|
+
--------
|
|
345
|
+
>>> obj = update_dataitem(obj)
|
|
346
|
+
"""
|
|
347
|
+
return entity.save(update=True)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def delete_dataitem(
|
|
351
|
+
identifier: str,
|
|
352
|
+
project: str | None = None,
|
|
353
|
+
entity_id: str | None = None,
|
|
354
|
+
delete_all_versions: bool = False,
|
|
355
|
+
**kwargs,
|
|
356
|
+
) -> dict:
|
|
357
|
+
"""
|
|
358
|
+
Delete object from backend.
|
|
359
|
+
|
|
360
|
+
Parameters
|
|
361
|
+
----------
|
|
362
|
+
identifier : str
|
|
363
|
+
Entity key (store://...) or entity name.
|
|
364
|
+
project : str
|
|
365
|
+
Project name.
|
|
366
|
+
entity_id : str
|
|
367
|
+
Entity ID.
|
|
368
|
+
delete_all_versions : bool
|
|
369
|
+
Delete all versions of the named entity. If True, use entity name instead of entity key as identifier.
|
|
370
|
+
**kwargs : dict
|
|
371
|
+
Parameters to pass to the API call.
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
dict
|
|
376
|
+
Response from backend.
|
|
377
|
+
|
|
378
|
+
Examples
|
|
379
|
+
--------
|
|
380
|
+
If delete_all_versions is False:
|
|
381
|
+
>>> obj = delete_dataitem("store://my-dataitem-key")
|
|
382
|
+
|
|
383
|
+
Otherwise:
|
|
384
|
+
>>> obj = delete_dataitem("my-dataitem-name",
|
|
385
|
+
>>> project="my-project",
|
|
386
|
+
>>> delete_all_versions=True)
|
|
387
|
+
"""
|
|
388
|
+
return delete_entity_api_ctx(
|
|
389
|
+
identifier=identifier,
|
|
390
|
+
entity_type=ENTITY_TYPE,
|
|
391
|
+
project=project,
|
|
392
|
+
entity_id=entity_id,
|
|
393
|
+
delete_all_versions=delete_all_versions,
|
|
394
|
+
**kwargs,
|
|
395
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from digitalhub.entities._base.entity.material import MaterialEntity
|
|
7
|
+
from digitalhub.entities.entity_types import EntityTypes
|
|
8
|
+
from digitalhub.utils.exceptions import EntityError
|
|
9
|
+
from digitalhub.utils.uri_utils import map_uri_scheme
|
|
10
|
+
|
|
11
|
+
if typing.TYPE_CHECKING:
|
|
12
|
+
from digitalhub.entities._base.metadata import Metadata
|
|
13
|
+
from digitalhub.entities.dataitem.spec import DataitemSpec
|
|
14
|
+
from digitalhub.entities.dataitem.status import DataitemStatus
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Dataitem(MaterialEntity):
|
|
18
|
+
"""
|
|
19
|
+
A class representing a dataitem.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
ENTITY_TYPE = EntityTypes.DATAITEM.value
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
project: str,
|
|
27
|
+
name: str,
|
|
28
|
+
uuid: str,
|
|
29
|
+
kind: str,
|
|
30
|
+
metadata: Metadata,
|
|
31
|
+
spec: DataitemSpec,
|
|
32
|
+
status: DataitemStatus,
|
|
33
|
+
user: str | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
super().__init__(project, name, uuid, kind, metadata, spec, status, user)
|
|
36
|
+
self.spec: DataitemSpec
|
|
37
|
+
self.status: DataitemStatus
|
|
38
|
+
|
|
39
|
+
##############################
|
|
40
|
+
# Private helper methods
|
|
41
|
+
##############################
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _get_extension(path: str, file_format: str | None = None) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Get extension of path.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
path : str
|
|
51
|
+
Path to get extension from.
|
|
52
|
+
file_format : str
|
|
53
|
+
File format.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
str
|
|
58
|
+
File extension.
|
|
59
|
+
|
|
60
|
+
Raises
|
|
61
|
+
------
|
|
62
|
+
EntityError
|
|
63
|
+
If file format is not supported.
|
|
64
|
+
"""
|
|
65
|
+
if file_format is not None:
|
|
66
|
+
return file_format
|
|
67
|
+
|
|
68
|
+
scheme = map_uri_scheme(path)
|
|
69
|
+
if scheme == "sql":
|
|
70
|
+
return "parquet"
|
|
71
|
+
|
|
72
|
+
ext = Path(path).suffix[1:]
|
|
73
|
+
if ext is not None:
|
|
74
|
+
return ext
|
|
75
|
+
raise EntityError("Unknown file format. Only csv and parquet are supported.")
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from digitalhub.datastores.builder import get_datastore
|
|
8
|
+
from digitalhub.entities.dataitem.entity._base import Dataitem
|
|
9
|
+
from digitalhub.utils.uri_utils import check_local_path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataitemTable(Dataitem):
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Table dataitem.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def as_df(
|
|
19
|
+
self,
|
|
20
|
+
file_format: str | None = None,
|
|
21
|
+
engine: str | None = None,
|
|
22
|
+
clean_tmp_path: bool = True,
|
|
23
|
+
**kwargs,
|
|
24
|
+
) -> Any:
|
|
25
|
+
"""
|
|
26
|
+
Read dataitem file (csv or parquet) as a DataFrame from spec.path.
|
|
27
|
+
If the dataitem is not local, it will be downloaded to a temporary
|
|
28
|
+
folder named tmp_dir in the project context folder.
|
|
29
|
+
If clean_tmp_path is True, the temporary folder will be deleted after the
|
|
30
|
+
method is executed.
|
|
31
|
+
It's possible to pass additional arguments to the this function. These
|
|
32
|
+
keyword arguments will be passed to the DataFrame reader function such as
|
|
33
|
+
pandas's read_csv or read_parquet.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
file_format : str
|
|
38
|
+
Format of the file. (Supported csv and parquet).
|
|
39
|
+
engine : str
|
|
40
|
+
Dataframe framework, by default pandas.
|
|
41
|
+
clean_tmp_path : bool
|
|
42
|
+
If True, the temporary folder will be deleted.
|
|
43
|
+
**kwargs : dict
|
|
44
|
+
Keyword arguments passed to the read_df function.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
Any
|
|
49
|
+
DataFrame.
|
|
50
|
+
"""
|
|
51
|
+
if engine is None:
|
|
52
|
+
engine = "pandas"
|
|
53
|
+
try:
|
|
54
|
+
if check_local_path(self.spec.path):
|
|
55
|
+
tmp_dir = None
|
|
56
|
+
data_path = self.spec.path
|
|
57
|
+
else:
|
|
58
|
+
tmp_dir = self._context().root / "tmp_data"
|
|
59
|
+
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
data_path = self.download(destination=str(tmp_dir), overwrite=True)
|
|
61
|
+
|
|
62
|
+
if Path(data_path).is_dir():
|
|
63
|
+
files = [str(i) for i in Path(data_path).rglob("*") if i.is_file()]
|
|
64
|
+
checker = files[0]
|
|
65
|
+
else:
|
|
66
|
+
checker = data_path
|
|
67
|
+
|
|
68
|
+
extension = self._get_extension(checker, file_format)
|
|
69
|
+
datastore = get_datastore("")
|
|
70
|
+
|
|
71
|
+
return datastore.read_df(data_path, extension, engine, **kwargs)
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise e
|
|
75
|
+
|
|
76
|
+
finally:
|
|
77
|
+
# Delete tmp folder
|
|
78
|
+
self._clean_tmp_path(tmp_dir, clean_tmp_path)
|
|
79
|
+
|
|
80
|
+
def write_df(
|
|
81
|
+
self,
|
|
82
|
+
df: Any,
|
|
83
|
+
extension: str | None = None,
|
|
84
|
+
**kwargs,
|
|
85
|
+
) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Write DataFrame as parquet/csv/table into dataitem spec.path.
|
|
88
|
+
keyword arguments will be passed to the DataFrame reader function such as
|
|
89
|
+
pandas's to_csv or to_parquet.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
df : Any
|
|
94
|
+
DataFrame to write.
|
|
95
|
+
extension : str
|
|
96
|
+
Extension of the file.
|
|
97
|
+
**kwargs : dict
|
|
98
|
+
Keyword arguments passed to the write_df function.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
str
|
|
103
|
+
Path to the written dataframe.
|
|
104
|
+
"""
|
|
105
|
+
datastore = get_datastore(self.spec.path)
|
|
106
|
+
return datastore.write_df(df, self.spec.path, extension=extension, **kwargs)
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _clean_tmp_path(pth: Path | None, clean: bool) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Clean temporary path.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
pth : Path | None
|
|
116
|
+
Path to clean.
|
|
117
|
+
clean : bool
|
|
118
|
+
If True, the path will be cleaned.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
None
|
|
123
|
+
"""
|
|
124
|
+
if pth is not None and clean:
|
|
125
|
+
shutil.rmtree(pth)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FieldType(str, Enum):
|
|
9
|
+
"""
|
|
10
|
+
Field type enum.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
STRING = "string"
|
|
14
|
+
NUMBER = "number"
|
|
15
|
+
INTEGER = "integer"
|
|
16
|
+
BOOLEAN = "boolean"
|
|
17
|
+
OBJECT = "object"
|
|
18
|
+
ARRAY = "array"
|
|
19
|
+
DATE = "date"
|
|
20
|
+
TIME = "time"
|
|
21
|
+
DATETIME = "datetime"
|
|
22
|
+
YEAR = "year"
|
|
23
|
+
YEARMONTH = "yearmonth"
|
|
24
|
+
DURATION = "duration"
|
|
25
|
+
GEOPOINT = "geopoint"
|
|
26
|
+
GEOJSON = "geojson"
|
|
27
|
+
ANY = "any"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TableSchemaFieldEntry(BaseModel):
|
|
31
|
+
"""
|
|
32
|
+
Table schema field entry model.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
name: str
|
|
36
|
+
"""Field name."""
|
|
37
|
+
|
|
38
|
+
type_: FieldType = Field(alias="type")
|
|
39
|
+
"""Field type."""
|
|
40
|
+
|
|
41
|
+
title: str = None
|
|
42
|
+
"""Field title."""
|
|
43
|
+
|
|
44
|
+
format_: str = Field(default=None, alias="format")
|
|
45
|
+
"""Field format."""
|
|
46
|
+
|
|
47
|
+
example: str = None
|
|
48
|
+
"""Field example."""
|
|
49
|
+
|
|
50
|
+
description: str = None
|
|
51
|
+
"""Field description."""
|
|
52
|
+
|
|
53
|
+
class Config:
|
|
54
|
+
use_enum_values = True
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TableSchema(BaseModel):
|
|
58
|
+
"""
|
|
59
|
+
Table schema model.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
fields: list[TableSchemaFieldEntry]
|