digitalhub 0.8.0b0__py3-none-any.whl → 0.8.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (159) hide show
  1. digitalhub/__init__.py +62 -94
  2. digitalhub/client/__init__.py +0 -0
  3. digitalhub/client/builder.py +105 -0
  4. digitalhub/client/objects/__init__.py +0 -0
  5. digitalhub/client/objects/base.py +56 -0
  6. digitalhub/client/objects/dhcore.py +681 -0
  7. digitalhub/client/objects/local.py +533 -0
  8. digitalhub/context/__init__.py +0 -0
  9. digitalhub/context/builder.py +178 -0
  10. digitalhub/context/context.py +136 -0
  11. digitalhub/datastores/__init__.py +0 -0
  12. digitalhub/datastores/builder.py +134 -0
  13. digitalhub/datastores/objects/__init__.py +0 -0
  14. digitalhub/datastores/objects/base.py +85 -0
  15. digitalhub/datastores/objects/local.py +42 -0
  16. digitalhub/datastores/objects/remote.py +23 -0
  17. digitalhub/datastores/objects/s3.py +38 -0
  18. digitalhub/datastores/objects/sql.py +60 -0
  19. digitalhub/entities/__init__.py +0 -0
  20. digitalhub/entities/_base/__init__.py +0 -0
  21. digitalhub/entities/_base/api.py +346 -0
  22. digitalhub/entities/_base/base.py +82 -0
  23. digitalhub/entities/_base/crud.py +610 -0
  24. digitalhub/entities/_base/entity/__init__.py +0 -0
  25. digitalhub/entities/_base/entity/base.py +132 -0
  26. digitalhub/entities/_base/entity/context.py +118 -0
  27. digitalhub/entities/_base/entity/executable.py +380 -0
  28. digitalhub/entities/_base/entity/material.py +214 -0
  29. digitalhub/entities/_base/entity/unversioned.py +87 -0
  30. digitalhub/entities/_base/entity/versioned.py +94 -0
  31. digitalhub/entities/_base/metadata.py +59 -0
  32. digitalhub/entities/_base/spec/__init__.py +0 -0
  33. digitalhub/entities/_base/spec/base.py +58 -0
  34. digitalhub/entities/_base/spec/material.py +22 -0
  35. digitalhub/entities/_base/state.py +31 -0
  36. digitalhub/entities/_base/status/__init__.py +0 -0
  37. digitalhub/entities/_base/status/base.py +32 -0
  38. digitalhub/entities/_base/status/material.py +49 -0
  39. digitalhub/entities/_builders/__init__.py +0 -0
  40. digitalhub/entities/_builders/metadata.py +60 -0
  41. digitalhub/entities/_builders/name.py +31 -0
  42. digitalhub/entities/_builders/spec.py +43 -0
  43. digitalhub/entities/_builders/status.py +62 -0
  44. digitalhub/entities/_builders/uuid.py +33 -0
  45. digitalhub/entities/artifact/__init__.py +0 -0
  46. digitalhub/entities/artifact/builder.py +133 -0
  47. digitalhub/entities/artifact/crud.py +358 -0
  48. digitalhub/entities/artifact/entity/__init__.py +0 -0
  49. digitalhub/entities/artifact/entity/_base.py +39 -0
  50. digitalhub/entities/artifact/entity/artifact.py +9 -0
  51. digitalhub/entities/artifact/spec.py +39 -0
  52. digitalhub/entities/artifact/status.py +15 -0
  53. digitalhub/entities/dataitem/__init__.py +0 -0
  54. digitalhub/entities/dataitem/builder.py +144 -0
  55. digitalhub/entities/dataitem/crud.py +395 -0
  56. digitalhub/entities/dataitem/entity/__init__.py +0 -0
  57. digitalhub/entities/dataitem/entity/_base.py +75 -0
  58. digitalhub/entities/dataitem/entity/dataitem.py +9 -0
  59. digitalhub/entities/dataitem/entity/iceberg.py +7 -0
  60. digitalhub/entities/dataitem/entity/table.py +125 -0
  61. digitalhub/entities/dataitem/models.py +62 -0
  62. digitalhub/entities/dataitem/spec.py +61 -0
  63. digitalhub/entities/dataitem/status.py +38 -0
  64. digitalhub/entities/entity_types.py +19 -0
  65. digitalhub/entities/function/__init__.py +0 -0
  66. digitalhub/entities/function/builder.py +86 -0
  67. digitalhub/entities/function/crud.py +305 -0
  68. digitalhub/entities/function/entity.py +101 -0
  69. digitalhub/entities/function/models.py +118 -0
  70. digitalhub/entities/function/spec.py +81 -0
  71. digitalhub/entities/function/status.py +9 -0
  72. digitalhub/entities/model/__init__.py +0 -0
  73. digitalhub/entities/model/builder.py +152 -0
  74. digitalhub/entities/model/crud.py +358 -0
  75. digitalhub/entities/model/entity/__init__.py +0 -0
  76. digitalhub/entities/model/entity/_base.py +34 -0
  77. digitalhub/entities/model/entity/huggingface.py +9 -0
  78. digitalhub/entities/model/entity/mlflow.py +90 -0
  79. digitalhub/entities/model/entity/model.py +9 -0
  80. digitalhub/entities/model/entity/sklearn.py +9 -0
  81. digitalhub/entities/model/models.py +26 -0
  82. digitalhub/entities/model/spec.py +146 -0
  83. digitalhub/entities/model/status.py +33 -0
  84. digitalhub/entities/project/__init__.py +0 -0
  85. digitalhub/entities/project/builder.py +82 -0
  86. digitalhub/entities/project/crud.py +350 -0
  87. digitalhub/entities/project/entity.py +2060 -0
  88. digitalhub/entities/project/spec.py +50 -0
  89. digitalhub/entities/project/status.py +9 -0
  90. digitalhub/entities/registries.py +48 -0
  91. digitalhub/entities/run/__init__.py +0 -0
  92. digitalhub/entities/run/builder.py +77 -0
  93. digitalhub/entities/run/crud.py +232 -0
  94. digitalhub/entities/run/entity.py +461 -0
  95. digitalhub/entities/run/spec.py +153 -0
  96. digitalhub/entities/run/status.py +114 -0
  97. digitalhub/entities/secret/__init__.py +0 -0
  98. digitalhub/entities/secret/builder.py +93 -0
  99. digitalhub/entities/secret/crud.py +294 -0
  100. digitalhub/entities/secret/entity.py +73 -0
  101. digitalhub/entities/secret/spec.py +35 -0
  102. digitalhub/entities/secret/status.py +9 -0
  103. digitalhub/entities/task/__init__.py +0 -0
  104. digitalhub/entities/task/builder.py +74 -0
  105. digitalhub/entities/task/crud.py +241 -0
  106. digitalhub/entities/task/entity.py +135 -0
  107. digitalhub/entities/task/models.py +199 -0
  108. digitalhub/entities/task/spec.py +51 -0
  109. digitalhub/entities/task/status.py +9 -0
  110. digitalhub/entities/utils.py +184 -0
  111. digitalhub/entities/workflow/__init__.py +0 -0
  112. digitalhub/entities/workflow/builder.py +91 -0
  113. digitalhub/entities/workflow/crud.py +304 -0
  114. digitalhub/entities/workflow/entity.py +77 -0
  115. digitalhub/entities/workflow/spec.py +15 -0
  116. digitalhub/entities/workflow/status.py +9 -0
  117. digitalhub/readers/__init__.py +0 -0
  118. digitalhub/readers/builder.py +54 -0
  119. digitalhub/readers/objects/__init__.py +0 -0
  120. digitalhub/readers/objects/base.py +70 -0
  121. digitalhub/readers/objects/pandas.py +207 -0
  122. digitalhub/readers/registry.py +15 -0
  123. digitalhub/registry/__init__.py +0 -0
  124. digitalhub/registry/models.py +87 -0
  125. digitalhub/registry/registry.py +74 -0
  126. digitalhub/registry/utils.py +150 -0
  127. digitalhub/runtimes/__init__.py +0 -0
  128. digitalhub/runtimes/base.py +164 -0
  129. digitalhub/runtimes/builder.py +53 -0
  130. digitalhub/runtimes/kind_registry.py +170 -0
  131. digitalhub/stores/__init__.py +0 -0
  132. digitalhub/stores/builder.py +257 -0
  133. digitalhub/stores/objects/__init__.py +0 -0
  134. digitalhub/stores/objects/base.py +189 -0
  135. digitalhub/stores/objects/local.py +230 -0
  136. digitalhub/stores/objects/remote.py +143 -0
  137. digitalhub/stores/objects/s3.py +563 -0
  138. digitalhub/stores/objects/sql.py +328 -0
  139. digitalhub/utils/__init__.py +0 -0
  140. digitalhub/utils/data_utils.py +127 -0
  141. digitalhub/utils/env_utils.py +123 -0
  142. digitalhub/utils/exceptions.py +55 -0
  143. digitalhub/utils/file_utils.py +204 -0
  144. digitalhub/utils/generic_utils.py +207 -0
  145. digitalhub/utils/git_utils.py +148 -0
  146. digitalhub/utils/io_utils.py +79 -0
  147. digitalhub/utils/logger.py +17 -0
  148. digitalhub/utils/uri_utils.py +56 -0
  149. {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/METADATA +27 -12
  150. digitalhub-0.8.0b2.dist-info/RECORD +161 -0
  151. test/test_crud_artifacts.py +1 -1
  152. test/test_crud_dataitems.py +1 -1
  153. test/test_crud_functions.py +1 -1
  154. test/test_crud_runs.py +1 -1
  155. test/test_crud_tasks.py +1 -1
  156. digitalhub-0.8.0b0.dist-info/RECORD +0 -14
  157. {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/LICENSE.txt +0 -0
  158. {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/WHEEL +0 -0
  159. {digitalhub-0.8.0b0.dist-info → digitalhub-0.8.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,395 @@
1
+ from __future__ import annotations
2
+
3
+ import typing
4
+ from pathlib import Path
5
+ from typing import Any
6
+ from urllib.parse import urlparse
7
+
8
+ from digitalhub.context.builder import check_context
9
+ from digitalhub.entities._base.crud import (
10
+ delete_entity_api_ctx,
11
+ list_entity_api_ctx,
12
+ read_entity_api_ctx,
13
+ read_entity_api_ctx_versions,
14
+ )
15
+ from digitalhub.entities._builders.uuid import build_uuid
16
+ from digitalhub.entities.dataitem.builder import dataitem_from_dict, dataitem_from_parameters
17
+ from digitalhub.entities.entity_types import EntityTypes
18
+ from digitalhub.entities.utils import build_log_path_from_filename, build_log_path_from_source, eval_local_source
19
+ from digitalhub.readers.builder import get_reader_by_object
20
+ from digitalhub.stores.builder import get_store
21
+ from digitalhub.utils.exceptions import EntityAlreadyExistsError
22
+ from digitalhub.utils.io_utils import read_yaml
23
+
24
+ if typing.TYPE_CHECKING:
25
+ from digitalhub.entities.dataitem.entity._base import Dataitem
26
+
27
+
28
+ ENTITY_TYPE = EntityTypes.DATAITEM.value
29
+
30
+
31
+ def new_dataitem(
32
+ project: str,
33
+ name: str,
34
+ kind: str,
35
+ uuid: str | None = None,
36
+ description: str | None = None,
37
+ labels: list[str] | None = None,
38
+ embedded: bool = True,
39
+ path: str | None = None,
40
+ **kwargs,
41
+ ) -> Dataitem:
42
+ """
43
+ Create a new object.
44
+
45
+ Parameters
46
+ ----------
47
+ project : str
48
+ Project name.
49
+ name : str
50
+ Object name.
51
+ kind : str
52
+ Kind the object.
53
+ uuid : str
54
+ ID of the object (UUID4, e.g. 40f25c4b-d26b-4221-b048-9527aff291e2).
55
+ description : str
56
+ Description of the object (human readable).
57
+ labels : list[str]
58
+ List of labels.
59
+ embedded : bool
60
+ Flag to determine if object spec must be embedded in project spec.
61
+ path : str
62
+ Object path on local file system or remote storage. It is also the destination path of upload() method.
63
+ **kwargs : dict
64
+ Spec keyword arguments.
65
+
66
+ Returns
67
+ -------
68
+ Dataitem
69
+ Object instance.
70
+
71
+ Examples
72
+ --------
73
+ >>> obj = new_dataitem(project="my-project",
74
+ >>> name="my-dataitem",
75
+ >>> kind="dataitem",
76
+ >>> path="s3://my-bucket/my-key")
77
+ """
78
+ check_context(project)
79
+ obj = dataitem_from_parameters(
80
+ project=project,
81
+ name=name,
82
+ kind=kind,
83
+ uuid=uuid,
84
+ description=description,
85
+ labels=labels,
86
+ embedded=embedded,
87
+ path=path,
88
+ **kwargs,
89
+ )
90
+ obj.save()
91
+ return obj
92
+
93
+
94
+ def log_dataitem(
95
+ project: str,
96
+ name: str,
97
+ kind: str,
98
+ source: list[str] | str | None = None,
99
+ data: Any | None = None,
100
+ extension: str | None = None,
101
+ path: str | None = None,
102
+ **kwargs,
103
+ ) -> Dataitem:
104
+ """
105
+ Log a dataitem to the project.
106
+
107
+ Parameters
108
+ ----------
109
+ project : str
110
+ Project name.
111
+ name : str
112
+ Object name.
113
+ kind : str
114
+ Kind the object.
115
+ source : str
116
+ Dataitem location on local path.
117
+ data : Any
118
+ Dataframe to log. Alternative to source.
119
+ extension : str
120
+ Extension of the output dataframe.
121
+ path : str
122
+ Destination path of the dataitem. If not provided, it's generated.
123
+ **kwargs : dict
124
+ New dataitem spec parameters.
125
+
126
+ Returns
127
+ -------
128
+ Dataitem
129
+ Object instance.
130
+
131
+ Examples
132
+ --------
133
+ >>> obj = log_dataitem(project="my-project",
134
+ >>> name="my-dataitem",
135
+ >>> kind="table",
136
+ >>> data=df)
137
+ """
138
+ if (source is None) == (data is None):
139
+ raise ValueError("You must provide source or data.")
140
+
141
+ # Case where source is provided
142
+ if source is not None:
143
+ eval_local_source(source)
144
+
145
+ if path is None:
146
+ uuid = build_uuid()
147
+ kwargs["uuid"] = uuid
148
+ path = build_log_path_from_source(project, ENTITY_TYPE, name, uuid, source)
149
+
150
+ obj = new_dataitem(project=project, name=name, kind=kind, path=path, **kwargs)
151
+ obj.upload(source)
152
+
153
+ # Case where data is provided
154
+ else:
155
+ if path is None:
156
+ uuid = build_uuid()
157
+ kwargs["uuid"] = uuid
158
+ path = build_log_path_from_filename(project, ENTITY_TYPE, name, uuid, "data.parquet")
159
+
160
+ obj = dataitem_from_parameters(project=project, name=name, kind=kind, path=path, **kwargs)
161
+ if kind == "table":
162
+ dst = obj.write_df(df=data, extension=extension)
163
+ reader = get_reader_by_object(data)
164
+ obj.spec.schema = reader.get_schema(data)
165
+ obj.status.preview = reader.get_preview(data)
166
+ store = get_store(obj.spec.path)
167
+ src = Path(urlparse(obj.spec.path).path).name
168
+ paths = [(dst, src)]
169
+ infos = store.get_file_info(paths)
170
+ obj.status.add_files_info(infos)
171
+ obj.save()
172
+
173
+ return obj
174
+
175
+
176
+ def get_dataitem(
177
+ identifier: str,
178
+ project: str | None = None,
179
+ entity_id: str | None = None,
180
+ **kwargs,
181
+ ) -> Dataitem:
182
+ """
183
+ Get object from backend.
184
+
185
+ Parameters
186
+ ----------
187
+ identifier : str
188
+ Entity key (store://...) or entity name.
189
+ project : str
190
+ Project name.
191
+ entity_id : str
192
+ Entity ID.
193
+ **kwargs : dict
194
+ Parameters to pass to the API call.
195
+
196
+ Returns
197
+ -------
198
+ Dataitem
199
+ Object instance.
200
+
201
+ Examples
202
+ --------
203
+ Using entity key:
204
+ >>> obj = get_dataitem("store://my-dataitem-key")
205
+
206
+ Using entity name:
207
+ >>> obj = get_dataitem("my-dataitem-name"
208
+ >>> project="my-project",
209
+ >>> entity_id="my-dataitem-id")
210
+ """
211
+ obj = read_entity_api_ctx(
212
+ identifier,
213
+ ENTITY_TYPE,
214
+ project=project,
215
+ entity_id=entity_id,
216
+ **kwargs,
217
+ )
218
+ entity = dataitem_from_dict(obj)
219
+ entity._get_files_info()
220
+ return entity
221
+
222
+
223
+ def get_dataitem_versions(
224
+ identifier: str,
225
+ project: str | None = None,
226
+ **kwargs,
227
+ ) -> list[Dataitem]:
228
+ """
229
+ Get object versions from backend.
230
+
231
+ Parameters
232
+ ----------
233
+ identifier : str
234
+ Entity key (store://...) or entity name.
235
+ project : str
236
+ Project name.
237
+ **kwargs : dict
238
+ Parameters to pass to the API call.
239
+
240
+ Returns
241
+ -------
242
+ list[Dataitem]
243
+ List of object instances.
244
+
245
+ Examples
246
+ --------
247
+ Using entity key:
248
+ >>> objs = get_dataitem_versions("store://my-dataitem-key")
249
+
250
+ Using entity name:
251
+ >>> objs = get_dataitem_versions("my-dataitem-name",
252
+ >>> project="my-project")
253
+ """
254
+ objs = read_entity_api_ctx_versions(
255
+ identifier,
256
+ entity_type=ENTITY_TYPE,
257
+ project=project,
258
+ **kwargs,
259
+ )
260
+ objects = []
261
+ for o in objs:
262
+ entity = dataitem_from_dict(o)
263
+ entity._get_files_info()
264
+ objects.append(entity)
265
+ return objects
266
+
267
+
268
+ def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
269
+ """
270
+ List all latest version objects from backend.
271
+
272
+ Parameters
273
+ ----------
274
+ project : str
275
+ Project name.
276
+ **kwargs : dict
277
+ Parameters to pass to the API call.
278
+
279
+ Returns
280
+ -------
281
+ list[Dataitem]
282
+ List of object instances.
283
+
284
+ Examples
285
+ --------
286
+ >>> objs = list_dataitems(project="my-project")
287
+ """
288
+ objs = list_entity_api_ctx(
289
+ project=project,
290
+ entity_type=ENTITY_TYPE,
291
+ **kwargs,
292
+ )
293
+ objects = []
294
+ for o in objs:
295
+ entity = dataitem_from_dict(o)
296
+ entity._get_files_info()
297
+ objects.append(entity)
298
+ return objects
299
+
300
+
301
+ def import_dataitem(file: str) -> Dataitem:
302
+ """
303
+ Import object from a YAML file.
304
+
305
+ Parameters
306
+ ----------
307
+ file : str
308
+ Path to YAML file.
309
+
310
+ Returns
311
+ -------
312
+ Dataitem
313
+ Object instance.
314
+
315
+ Examples
316
+ --------
317
+ >>> obj = import_dataitem("my-dataitem.yaml")
318
+ """
319
+ dict_obj: dict = read_yaml(file)
320
+ obj = dataitem_from_dict(dict_obj)
321
+ try:
322
+ obj.save()
323
+ except EntityAlreadyExistsError:
324
+ pass
325
+ finally:
326
+ return obj
327
+
328
+
329
+ def update_dataitem(entity: Dataitem) -> Dataitem:
330
+ """
331
+ Update object. Note that object spec are immutable.
332
+
333
+ Parameters
334
+ ----------
335
+ entity : Dataitem
336
+ Object to update.
337
+
338
+ Returns
339
+ -------
340
+ Dataitem
341
+ Entity updated.
342
+
343
+ Examples
344
+ --------
345
+ >>> obj = update_dataitem(obj)
346
+ """
347
+ return entity.save(update=True)
348
+
349
+
350
+ def delete_dataitem(
351
+ identifier: str,
352
+ project: str | None = None,
353
+ entity_id: str | None = None,
354
+ delete_all_versions: bool = False,
355
+ **kwargs,
356
+ ) -> dict:
357
+ """
358
+ Delete object from backend.
359
+
360
+ Parameters
361
+ ----------
362
+ identifier : str
363
+ Entity key (store://...) or entity name.
364
+ project : str
365
+ Project name.
366
+ entity_id : str
367
+ Entity ID.
368
+ delete_all_versions : bool
369
+ Delete all versions of the named entity. If True, use entity name instead of entity key as identifier.
370
+ **kwargs : dict
371
+ Parameters to pass to the API call.
372
+
373
+ Returns
374
+ -------
375
+ dict
376
+ Response from backend.
377
+
378
+ Examples
379
+ --------
380
+ If delete_all_versions is False:
381
+ >>> obj = delete_dataitem("store://my-dataitem-key")
382
+
383
+ Otherwise:
384
+ >>> obj = delete_dataitem("my-dataitem-name",
385
+ >>> project="my-project",
386
+ >>> delete_all_versions=True)
387
+ """
388
+ return delete_entity_api_ctx(
389
+ identifier=identifier,
390
+ entity_type=ENTITY_TYPE,
391
+ project=project,
392
+ entity_id=entity_id,
393
+ delete_all_versions=delete_all_versions,
394
+ **kwargs,
395
+ )
File without changes
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+ import typing
4
+ from pathlib import Path
5
+
6
+ from digitalhub.entities._base.entity.material import MaterialEntity
7
+ from digitalhub.entities.entity_types import EntityTypes
8
+ from digitalhub.utils.exceptions import EntityError
9
+ from digitalhub.utils.uri_utils import map_uri_scheme
10
+
11
+ if typing.TYPE_CHECKING:
12
+ from digitalhub.entities._base.metadata import Metadata
13
+ from digitalhub.entities.dataitem.spec import DataitemSpec
14
+ from digitalhub.entities.dataitem.status import DataitemStatus
15
+
16
+
17
+ class Dataitem(MaterialEntity):
18
+ """
19
+ A class representing a dataitem.
20
+ """
21
+
22
+ ENTITY_TYPE = EntityTypes.DATAITEM.value
23
+
24
+ def __init__(
25
+ self,
26
+ project: str,
27
+ name: str,
28
+ uuid: str,
29
+ kind: str,
30
+ metadata: Metadata,
31
+ spec: DataitemSpec,
32
+ status: DataitemStatus,
33
+ user: str | None = None,
34
+ ) -> None:
35
+ super().__init__(project, name, uuid, kind, metadata, spec, status, user)
36
+ self.spec: DataitemSpec
37
+ self.status: DataitemStatus
38
+
39
+ ##############################
40
+ # Private helper methods
41
+ ##############################
42
+
43
+ @staticmethod
44
+ def _get_extension(path: str, file_format: str | None = None) -> str:
45
+ """
46
+ Get extension of path.
47
+
48
+ Parameters
49
+ ----------
50
+ path : str
51
+ Path to get extension from.
52
+ file_format : str
53
+ File format.
54
+
55
+ Returns
56
+ -------
57
+ str
58
+ File extension.
59
+
60
+ Raises
61
+ ------
62
+ EntityError
63
+ If file format is not supported.
64
+ """
65
+ if file_format is not None:
66
+ return file_format
67
+
68
+ scheme = map_uri_scheme(path)
69
+ if scheme == "sql":
70
+ return "parquet"
71
+
72
+ ext = Path(path).suffix[1:]
73
+ if ext is not None:
74
+ return ext
75
+ raise EntityError("Unknown file format. Only csv and parquet are supported.")
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from digitalhub.entities.dataitem.entity._base import Dataitem
4
+
5
+
6
+ class DataitemDataitem(Dataitem):
7
+ """
8
+ Dataitem dataitem.
9
+ """
@@ -0,0 +1,7 @@
1
+ from digitalhub.entities.dataitem.entity._base import Dataitem
2
+
3
+
4
+ class DataitemIceberg(Dataitem):
5
+ """
6
+ Iceberg dataitem.
7
+ """
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from digitalhub.datastores.builder import get_datastore
8
+ from digitalhub.entities.dataitem.entity._base import Dataitem
9
+ from digitalhub.utils.uri_utils import check_local_path
10
+
11
+
12
+ class DataitemTable(Dataitem):
13
+
14
+ """
15
+ Table dataitem.
16
+ """
17
+
18
+ def as_df(
19
+ self,
20
+ file_format: str | None = None,
21
+ engine: str | None = None,
22
+ clean_tmp_path: bool = True,
23
+ **kwargs,
24
+ ) -> Any:
25
+ """
26
+ Read dataitem file (csv or parquet) as a DataFrame from spec.path.
27
+ If the dataitem is not local, it will be downloaded to a temporary
28
+ folder named tmp_dir in the project context folder.
29
+ If clean_tmp_path is True, the temporary folder will be deleted after the
30
+ method is executed.
31
+ It's possible to pass additional arguments to the this function. These
32
+ keyword arguments will be passed to the DataFrame reader function such as
33
+ pandas's read_csv or read_parquet.
34
+
35
+ Parameters
36
+ ----------
37
+ file_format : str
38
+ Format of the file. (Supported csv and parquet).
39
+ engine : str
40
+ Dataframe framework, by default pandas.
41
+ clean_tmp_path : bool
42
+ If True, the temporary folder will be deleted.
43
+ **kwargs : dict
44
+ Keyword arguments passed to the read_df function.
45
+
46
+ Returns
47
+ -------
48
+ Any
49
+ DataFrame.
50
+ """
51
+ if engine is None:
52
+ engine = "pandas"
53
+ try:
54
+ if check_local_path(self.spec.path):
55
+ tmp_dir = None
56
+ data_path = self.spec.path
57
+ else:
58
+ tmp_dir = self._context().root / "tmp_data"
59
+ tmp_dir.mkdir(parents=True, exist_ok=True)
60
+ data_path = self.download(destination=str(tmp_dir), overwrite=True)
61
+
62
+ if Path(data_path).is_dir():
63
+ files = [str(i) for i in Path(data_path).rglob("*") if i.is_file()]
64
+ checker = files[0]
65
+ else:
66
+ checker = data_path
67
+
68
+ extension = self._get_extension(checker, file_format)
69
+ datastore = get_datastore("")
70
+
71
+ return datastore.read_df(data_path, extension, engine, **kwargs)
72
+
73
+ except Exception as e:
74
+ raise e
75
+
76
+ finally:
77
+ # Delete tmp folder
78
+ self._clean_tmp_path(tmp_dir, clean_tmp_path)
79
+
80
+ def write_df(
81
+ self,
82
+ df: Any,
83
+ extension: str | None = None,
84
+ **kwargs,
85
+ ) -> str:
86
+ """
87
+ Write DataFrame as parquet/csv/table into dataitem spec.path.
88
+ keyword arguments will be passed to the DataFrame reader function such as
89
+ pandas's to_csv or to_parquet.
90
+
91
+ Parameters
92
+ ----------
93
+ df : Any
94
+ DataFrame to write.
95
+ extension : str
96
+ Extension of the file.
97
+ **kwargs : dict
98
+ Keyword arguments passed to the write_df function.
99
+
100
+ Returns
101
+ -------
102
+ str
103
+ Path to the written dataframe.
104
+ """
105
+ datastore = get_datastore(self.spec.path)
106
+ return datastore.write_df(df, self.spec.path, extension=extension, **kwargs)
107
+
108
+ @staticmethod
109
+ def _clean_tmp_path(pth: Path | None, clean: bool) -> None:
110
+ """
111
+ Clean temporary path.
112
+
113
+ Parameters
114
+ ----------
115
+ pth : Path | None
116
+ Path to clean.
117
+ clean : bool
118
+ If True, the path will be cleaned.
119
+
120
+ Returns
121
+ -------
122
+ None
123
+ """
124
+ if pth is not None and clean:
125
+ shutil.rmtree(pth)
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class FieldType(str, Enum):
9
+ """
10
+ Field type enum.
11
+ """
12
+
13
+ STRING = "string"
14
+ NUMBER = "number"
15
+ INTEGER = "integer"
16
+ BOOLEAN = "boolean"
17
+ OBJECT = "object"
18
+ ARRAY = "array"
19
+ DATE = "date"
20
+ TIME = "time"
21
+ DATETIME = "datetime"
22
+ YEAR = "year"
23
+ YEARMONTH = "yearmonth"
24
+ DURATION = "duration"
25
+ GEOPOINT = "geopoint"
26
+ GEOJSON = "geojson"
27
+ ANY = "any"
28
+
29
+
30
+ class TableSchemaFieldEntry(BaseModel):
31
+ """
32
+ Table schema field entry model.
33
+ """
34
+
35
+ name: str
36
+ """Field name."""
37
+
38
+ type_: FieldType = Field(alias="type")
39
+ """Field type."""
40
+
41
+ title: str = None
42
+ """Field title."""
43
+
44
+ format_: str = Field(default=None, alias="format")
45
+ """Field format."""
46
+
47
+ example: str = None
48
+ """Field example."""
49
+
50
+ description: str = None
51
+ """Field description."""
52
+
53
+ class Config:
54
+ use_enum_values = True
55
+
56
+
57
+ class TableSchema(BaseModel):
58
+ """
59
+ Table schema model.
60
+ """
61
+
62
+ fields: list[TableSchemaFieldEntry]