digitalhub 0.10.0b2__py3-none-any.whl → 0.10.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

@@ -5,7 +5,7 @@ from typing import Any
5
5
 
6
6
  from digitalhub.entities._commons.enums import EntityTypes
7
7
  from digitalhub.entities._operations.processor import processor
8
- from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_source, post_process, process_kwargs
8
+ from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_data, eval_source, post_process, process_kwargs
9
9
 
10
10
  if typing.TYPE_CHECKING:
11
11
  from digitalhub.entities.dataitem._base.entity import Dataitem
@@ -81,6 +81,8 @@ def log_dataitem(
81
81
  source: list[str] | str | None = None,
82
82
  data: Any | None = None,
83
83
  path: str | None = None,
84
+ file_format: str | None = None,
85
+ engine: str | None = "pandas",
84
86
  **kwargs,
85
87
  ) -> Dataitem:
86
88
  """
@@ -100,6 +102,10 @@ def log_dataitem(
100
102
  Dataframe to log. Alternative to source.
101
103
  path : str
102
104
  Destination path of the dataitem. If not provided, it's generated.
105
+ file_format : str
106
+ Extension of the file.
107
+ engine : str
108
+ Dataframe engine (pandas, polars, etc.).
103
109
  **kwargs : dict
104
110
  New dataitem spec parameters.
105
111
 
@@ -115,7 +121,12 @@ def log_dataitem(
115
121
  >>> kind="table",
116
122
  >>> data=df)
117
123
  """
124
+ cleanup = False
125
+ if data is not None:
126
+ cleanup = True
127
+
118
128
  source = eval_source(source, data, kind, name, project)
129
+ data = eval_data(project, kind, source, data, file_format, engine)
119
130
  kwargs = process_kwargs(project, name, kind, source=source, data=data, path=path, **kwargs)
120
131
  obj = processor.log_material_entity(
121
132
  source=source,
@@ -124,8 +135,8 @@ def log_dataitem(
124
135
  kind=kind,
125
136
  **kwargs,
126
137
  )
127
- if data is not None:
128
- obj = post_process(obj, data)
138
+ obj = post_process(obj, data)
139
+ if cleanup:
129
140
  clean_tmp_path(source)
130
141
  return obj
131
142
 
@@ -9,6 +9,7 @@ from digitalhub.entities._base.entity._constructors.uuid import build_uuid
9
9
  from digitalhub.entities._base.material.utils import build_log_path_from_source, eval_local_source
10
10
  from digitalhub.entities._commons.enums import EntityKinds, EntityTypes
11
11
  from digitalhub.readers.data.api import get_reader_by_object
12
+ from digitalhub.stores.api import get_store
12
13
  from digitalhub.utils.generic_utils import slugify_string
13
14
 
14
15
  if typing.TYPE_CHECKING:
@@ -54,6 +55,44 @@ def eval_source(
54
55
  raise NotImplementedError
55
56
 
56
57
 
58
+ def eval_data(
59
+ project: str,
60
+ kind: str,
61
+ source: str,
62
+ data: Any | None = None,
63
+ file_format: str | None = None,
64
+ engine: str | None = None,
65
+ ) -> Any:
66
+ """
67
+ Evaluate data is loaded.
68
+
69
+ Parameters
70
+ ----------
71
+ project : str
72
+ Project name.
73
+ source : str
74
+ Source(s).
75
+ data : Any
76
+ Dataframe to log. Alternative to source.
77
+ file_format : str
78
+ Extension of the file.
79
+ engine : str
80
+ Engine to use.
81
+
82
+ Returns
83
+ -------
84
+ None
85
+ """
86
+ if kind == EntityKinds.DATAITEM_TABLE.value:
87
+ if data is None:
88
+ return get_store(project, source).read_df(
89
+ source,
90
+ file_format=file_format,
91
+ engine=engine,
92
+ )
93
+ return data
94
+
95
+
57
96
  def process_kwargs(
58
97
  project: str,
59
98
  name: str,
@@ -59,7 +59,7 @@ class Store:
59
59
  def read_df(
60
60
  self,
61
61
  path: str | list[str],
62
- extension: str,
62
+ file_format: str | None = None,
63
63
  engine: str | None = None,
64
64
  **kwargs,
65
65
  ) -> Any:
@@ -1,7 +1,8 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: digitalhub
3
- Version: 0.10.0b2
3
+ Version: 0.10.0b3
4
4
  Summary: Python SDK for Digitalhub
5
+ Project-URL: Homepage, https://github.com/scc-digitalhub/digitalhub-sdk
5
6
  Author-email: Fondazione Bruno Kessler <dslab@fbk.eu>, Matteo Martini <mmartini@fbk.eu>
6
7
  License: Apache License
7
8
  Version 2.0, January 2004
@@ -219,48 +220,41 @@ License: Apache License
219
220
 
220
221
  (SIL OPEN FONT LICENSE Version 1.1) The Inter font family (https://github.com/rsms/inter)
221
222
  (SIL OPEN FONT LICENSE Version 1.1) The Fira Code font family (https://github.com/tonsky/FiraCode)
222
-
223
- Project-URL: Homepage, https://github.com/scc-digitalhub/digitalhub-sdk
223
+ License-File: LICENSE.txt
224
224
  Keywords: data,dataops,kubernetes
225
225
  Classifier: License :: OSI Approved :: Apache Software License
226
226
  Classifier: Programming Language :: Python :: 3.9
227
227
  Classifier: Programming Language :: Python :: 3.10
228
228
  Classifier: Programming Language :: Python :: 3.11
229
- Requires-Python: >=3.9
230
- Description-Content-Type: text/markdown
231
- License-File: LICENSE.txt
229
+ Requires-Python: <3.13,>=3.9
232
230
  Requires-Dist: boto3
233
- Requires-Dist: pydantic
234
- Requires-Dist: sqlalchemy
235
- Requires-Dist: pyarrow
231
+ Requires-Dist: gitpython>=3
236
232
  Requires-Dist: numpy<2
237
- Requires-Dist: requests
238
- Requires-Dist: PyYAML
239
- Requires-Dist: python-dotenv
240
- Requires-Dist: GitPython>=3
241
233
  Requires-Dist: psycopg2-binary
234
+ Requires-Dist: pyarrow
235
+ Requires-Dist: pydantic
236
+ Requires-Dist: python-dotenv
242
237
  Requires-Dist: python-slugify
238
+ Requires-Dist: pyyaml
239
+ Requires-Dist: requests
240
+ Requires-Dist: sqlalchemy
241
+ Provides-Extra: dev
242
+ Requires-Dist: bumpver; extra == 'dev'
243
+ Requires-Dist: jsonschema; extra == 'dev'
244
+ Requires-Dist: pytest; extra == 'dev'
245
+ Requires-Dist: pytest-cov; extra == 'dev'
243
246
  Provides-Extra: full
244
- Requires-Dist: pandas; extra == "full"
245
- Requires-Dist: mlflow; extra == "full"
246
- Requires-Dist: fsspec; extra == "full"
247
- Requires-Dist: s3fs; extra == "full"
248
- Provides-Extra: pandas
249
- Requires-Dist: pandas; extra == "pandas"
250
- Requires-Dist: fsspec; extra == "pandas"
251
- Requires-Dist: s3fs; extra == "pandas"
247
+ Requires-Dist: fsspec; extra == 'full'
248
+ Requires-Dist: mlflow; extra == 'full'
249
+ Requires-Dist: pandas; extra == 'full'
250
+ Requires-Dist: s3fs; extra == 'full'
252
251
  Provides-Extra: mlflow
253
- Requires-Dist: mlflow; extra == "mlflow"
254
- Provides-Extra: dev
255
- Requires-Dist: black; extra == "dev"
256
- Requires-Dist: pytest; extra == "dev"
257
- Requires-Dist: bumpver; extra == "dev"
258
- Requires-Dist: ruff; extra == "dev"
259
- Requires-Dist: moto; extra == "dev"
260
- Provides-Extra: docs
261
- Requires-Dist: Sphinx>=7; extra == "docs"
262
- Requires-Dist: pydata-sphinx-theme>=0.15; extra == "docs"
263
- Requires-Dist: numpydoc>=1.6; extra == "docs"
252
+ Requires-Dist: mlflow; extra == 'mlflow'
253
+ Provides-Extra: pandas
254
+ Requires-Dist: fsspec; extra == 'pandas'
255
+ Requires-Dist: pandas; extra == 'pandas'
256
+ Requires-Dist: s3fs; extra == 'pandas'
257
+ Description-Content-Type: text/markdown
264
258
 
265
259
  # Digitalhub Library
266
260
 
@@ -84,8 +84,8 @@ digitalhub/entities/artifact/artifact/entity.py,sha256=hj6UlJMWnWimqizhKGcKpvOpc
84
84
  digitalhub/entities/artifact/artifact/spec.py,sha256=iX6czlG6W_ZoW86PD2GPc_-xLk6Zb1Vt3fk8Lzjbp3c,593
85
85
  digitalhub/entities/artifact/artifact/status.py,sha256=x-lTgO2KkjwzlJnEhIfUtF9rzJ1DTIAd3-Hn6ZeLRqo,305
86
86
  digitalhub/entities/dataitem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- digitalhub/entities/dataitem/crud.py,sha256=-y3lTjEtISje6X_lo-rx9Aynf_WkvzYi9G6WY7QJvRA,8090
88
- digitalhub/entities/dataitem/utils.py,sha256=zCxUMyhEdL7jP4nK42tT1obYMkJTT2txo1lGHPwG1H4,3232
87
+ digitalhub/entities/dataitem/crud.py,sha256=Vel2zyJl8DbdJJoNBx1nEoFYy5e2Gi1AmJUsCE0nhZE,8418
88
+ digitalhub/entities/dataitem/utils.py,sha256=frrJrNnk4dJbhmBgqGWHpXqW4tpmD6JgGDhEo_LgGr4,4047
89
89
  digitalhub/entities/dataitem/_base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
90
  digitalhub/entities/dataitem/_base/builder.py,sha256=XZ5Ul-aBkx6ygpN9rhjNQOD-6jzJhulyM0WCD-f3big,2256
91
91
  digitalhub/entities/dataitem/_base/entity.py,sha256=J8G7Xm_AKETg02RtNHlUyM-bmvT__HKu1Npv4Od037A,945
@@ -207,7 +207,7 @@ digitalhub/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
207
207
  digitalhub/stores/api.py,sha256=7bspqJaexE8PKnVDotpAX3mgLsgcCNhrImD1kNI4heQ,560
208
208
  digitalhub/stores/builder.py,sha256=6_19vEYnh3WYkgOY0h5xAjpFujMYR0oHF5PBDupsgnc,2314
209
209
  digitalhub/stores/_base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
- digitalhub/stores/_base/store.py,sha256=cMZVHJKmEaMfeMQ9mUU_d4WhtscBhipUCi0-QXY6Wvw,5132
210
+ digitalhub/stores/_base/store.py,sha256=8YzbQ7x1p1gx5H0WgVpqZsCmNTpezPT67tNIU3fAp_M,5148
211
211
  digitalhub/stores/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
212
  digitalhub/stores/local/store.py,sha256=cpQ1hfw5TZ_akDVS_4gT_rqWdDF1qNMDwHwuXt-iafs,7333
213
213
  digitalhub/stores/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -231,18 +231,7 @@ digitalhub/utils/git_utils.py,sha256=air8jn73FxzSWRxpvObcdOJBWcFOqb5A7D4ISwPEs7A
231
231
  digitalhub/utils/io_utils.py,sha256=8jD4Rp_b7LZEpY5JSMxVUowZsnifKnbGpHT5Hijx9-g,3299
232
232
  digitalhub/utils/logger.py,sha256=ml3ne6D8wuRdNZ4F6ywmvWotSxjmZWnmKgNiuHb4R5M,437
233
233
  digitalhub/utils/uri_utils.py,sha256=AKfvMtdt_0OY_cNtJY1t0YXrQ12OYGf-KYBF_2m0ULA,3821
234
- test/test_crud_functions.py,sha256=tQs_QBaPCuYVSBpbl-he5_6jr_tteCXVmohj1ZluNsA,2769
235
- test/test_crud_runs.py,sha256=lkssy15UPJKymgazmi5gG6RLxyTsG-tM_CpNCowD2gQ,2220
236
- test/test_crud_tasks.py,sha256=sIbY-Hq6C2N20hWHfbCMw9c-zpYS0m_UJGnPINR4Q6s,2111
237
- test/testkfp.py,sha256=fy__EywOCtucL5HUEKD23gwit1qu9SrCWFSjyeKxe68,1085
238
- test/testkfp_pipeline.py,sha256=WceFrCp-avHI7PcwIvnv7Kgs2xK3oQqU6sjaonGamg8,622
239
- test/local/CRUD/test_artifacts.py,sha256=Y3J_C7SDRSsQd2SGIZjPIOvyTL92B1sTFrUONG3YG0Y,2968
240
- test/local/CRUD/test_dataitems.py,sha256=LQqTzI59uwTGy4zoq8jL0yWVe2W9vXlatkgDU9aB6xg,2968
241
- test/local/CRUD/test_models.py,sha256=msosbZuRwIMbZtmi3ZaOva4TjQ4lrzkNu9AguIFhrSo,2929
242
- test/local/imports/test_imports.py,sha256=Z_RomI5EN_5KJUU1p7S1lLl7krTuEwB0L1cBtbXXoOA,1254
243
- test/local/instances/test_validate.py,sha256=bGPKRFR_Tb5nlzzmI_ty_6UVUvYGseE2-pkNVoGWeO0,1842
244
- digitalhub-0.10.0b2.dist-info/LICENSE.txt,sha256=qmrTTXPlgU0kSRlRVbjhlyGs1IXs2QPxo_Y-Mn06J0k,11589
245
- digitalhub-0.10.0b2.dist-info/METADATA,sha256=t4q9tWFbz_YUduJulntoZJoaXVpFjj4SoH0cSnDRgJk,15368
246
- digitalhub-0.10.0b2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
247
- digitalhub-0.10.0b2.dist-info/top_level.txt,sha256=ae9pDfCF27ZoaVAxuBKONMP0lm5P-N_I-e-no1WlvD8,16
248
- digitalhub-0.10.0b2.dist-info/RECORD,,
234
+ digitalhub-0.10.0b3.dist-info/METADATA,sha256=Xmttpbzf2euECdRnht5Q3sDjmJ7gAtHx_UHfBBQ7sqk,15173
235
+ digitalhub-0.10.0b3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ digitalhub-0.10.0b3.dist-info/licenses/LICENSE.txt,sha256=qmrTTXPlgU0kSRlRVbjhlyGs1IXs2QPxo_Y-Mn06J0k,11589
237
+ digitalhub-0.10.0b3.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,2 +0,0 @@
1
- digitalhub
2
- test
@@ -1,96 +0,0 @@
1
- """
2
- Unit tests for the entity Artifact
3
- """
4
-
5
- import digitalhub as dh
6
- from digitalhub.entities.artifact._base.entity import Artifact
7
-
8
-
9
- class TestArtifactCRUD:
10
- def create_test_dicts(self):
11
- names = ["test1", "test2", "test3", "test4"]
12
- uuids = [
13
- "d150bcca-bb64-451d-8455-dff862254b95",
14
- "31acdd2d-0c41-428c-b68b-1b133da9e97b",
15
- "b4a3dfdc-b917-44c4-9a29-613dcf734244",
16
- "2618d9c4-cd61-440f-aebb-7e5761709f3b",
17
- ]
18
- paths = [
19
- "./data/test.csv",
20
- "s3://bucket/key.csv",
21
- "sql://database/schema/table",
22
- "https://url.com/file.csv",
23
- ]
24
- kind = ["artifact", "artifact", "artifact", "artifact"]
25
-
26
- dicts = []
27
- for i in range(len(names)):
28
- dicts.append({"name": names[i], "uuid": uuids[i], "path": paths[i], "kind": kind[i]})
29
-
30
- return dicts
31
-
32
- def test_create_delete(self):
33
- dicts = self.create_test_dicts()
34
- p = dh.get_or_create_project("test", local=True)
35
- # Create and delete artifacts
36
- for i in dicts:
37
- d = dh.new_artifact(p.name, **i)
38
- dh.delete_artifact(d.key)
39
- d = dh.new_artifact(p.name, **i)
40
- dh.delete_artifact(d.name, project=p.name, entity_id=d.id)
41
- d = p.new_artifact(**i)
42
- p.delete_artifact(d.key)
43
-
44
- assert dh.list_artifacts(p.name) == []
45
- dh.delete_project("test", local=True, clean_context=True)
46
-
47
- def test_list(self):
48
- dicts = self.create_test_dicts()
49
- p = dh.get_or_create_project("test", local=True)
50
-
51
- assert dh.list_artifacts(p.name) == []
52
-
53
- for i in dicts:
54
- dh.new_artifact(p.name, **i)
55
-
56
- # List artifacts
57
- l_obj = dh.list_artifacts(p.name)
58
- assert isinstance(l_obj, list)
59
- assert len(l_obj) == 4
60
- for i in l_obj:
61
- assert isinstance(i, Artifact)
62
-
63
- # delete listed objects
64
- for obj in l_obj:
65
- dh.delete_artifact(obj.key)
66
-
67
- assert len(dh.list_artifacts(p.name)) == 0
68
-
69
- dh.delete_project("test", clean_context=True, local=True)
70
-
71
- def test_get(self):
72
- dicts = self.create_test_dicts()
73
- p = dh.get_or_create_project("test", local=True)
74
-
75
- for i in dicts:
76
- o1 = dh.new_artifact(p.name, **i)
77
- assert isinstance(o1, Artifact)
78
-
79
- # Get by name and id
80
- o2 = dh.get_artifact(o1.name, project=p.name, entity_id=o1.id)
81
- assert isinstance(o2, Artifact)
82
- assert o1.id == o2.id
83
-
84
- # Get by key
85
- o3 = dh.get_artifact(o1.key)
86
- assert isinstance(o3, Artifact)
87
- assert o1.id == o3.id
88
-
89
- # delete listed objects
90
- l_obj = dh.list_artifacts(p.name)
91
- for obj in l_obj:
92
- dh.delete_artifact(obj.key)
93
-
94
- assert len(dh.list_artifacts(p.name)) == 0
95
-
96
- dh.delete_project("test", clean_context=True, local=True)
@@ -1,96 +0,0 @@
1
- """
2
- Unit tests for the entity Dataitem
3
- """
4
-
5
- import digitalhub as dh
6
- from digitalhub.entities.dataitem._base.entity import Dataitem
7
-
8
-
9
- class TestDataitemCRUD:
10
- def create_test_dicts(self):
11
- names = ["test1", "test2", "test3", "test4"]
12
- uuids = [
13
- "d150bcca-bb64-451d-8455-dff862254b95",
14
- "31acdd2d-0c41-428c-b68b-1b133da9e97b",
15
- "b4a3dfdc-b917-44c4-9a29-613dcf734244",
16
- "2618d9c4-cd61-440f-aebb-7e5761709f3b",
17
- ]
18
- paths = [
19
- "./data/test.csv",
20
- "s3://bucket/key.csv",
21
- "sql://database/schema/table",
22
- "https://url.com/file.csv",
23
- ]
24
- kind = ["dataitem", "dataitem", "dataitem", "dataitem"]
25
-
26
- dicts = []
27
- for i in range(len(names)):
28
- dicts.append({"name": names[i], "uuid": uuids[i], "path": paths[i], "kind": kind[i]})
29
-
30
- return dicts
31
-
32
- def test_create_delete(self):
33
- dicts = self.create_test_dicts()
34
- p = dh.get_or_create_project("test", local=True)
35
- # Create and delete dataitems
36
- for i in dicts:
37
- d = dh.new_dataitem(p.name, **i)
38
- dh.delete_dataitem(d.key)
39
- d = dh.new_dataitem(p.name, **i)
40
- dh.delete_dataitem(d.name, project=p.name, entity_id=d.id)
41
- d = p.new_dataitem(**i)
42
- p.delete_dataitem(d.key)
43
-
44
- assert dh.list_dataitems(p.name) == []
45
- dh.delete_project("test", local=True, clean_context=True)
46
-
47
- def test_list(self):
48
- dicts = self.create_test_dicts()
49
- p = dh.get_or_create_project("test", local=True)
50
-
51
- assert dh.list_dataitems(p.name) == []
52
-
53
- for i in dicts:
54
- dh.new_dataitem(p.name, **i)
55
-
56
- # List dataitems
57
- l_obj = dh.list_dataitems(p.name)
58
- assert isinstance(l_obj, list)
59
- assert len(l_obj) == 4
60
- for i in l_obj:
61
- assert isinstance(i, Dataitem)
62
-
63
- # delete listed objects
64
- for obj in l_obj:
65
- dh.delete_dataitem(obj.key)
66
-
67
- assert len(dh.list_dataitems(p.name)) == 0
68
-
69
- dh.delete_project("test", clean_context=True, local=True)
70
-
71
- def test_get(self):
72
- dicts = self.create_test_dicts()
73
- p = dh.get_or_create_project("test", local=True)
74
-
75
- for i in dicts:
76
- o1 = dh.new_dataitem(p.name, **i)
77
- assert isinstance(o1, Dataitem)
78
-
79
- # Get by name and id
80
- o2 = dh.get_dataitem(o1.name, project=p.name, entity_id=o1.id)
81
- assert isinstance(o2, Dataitem)
82
- assert o1.id == o2.id
83
-
84
- # Get by key
85
- o3 = dh.get_dataitem(o1.key)
86
- assert isinstance(o3, Dataitem)
87
- assert o1.id == o3.id
88
-
89
- # delete listed objects
90
- l_obj = dh.list_dataitems(p.name)
91
- for obj in l_obj:
92
- dh.delete_dataitem(obj.key)
93
-
94
- assert len(dh.list_dataitems(p.name)) == 0
95
-
96
- dh.delete_project("test", clean_context=True, local=True)
@@ -1,95 +0,0 @@
1
- """
2
- Unit tests for the entity Models
3
- """
4
-
5
- import digitalhub as dh
6
- from digitalhub.entities.model._base.entity import Model
7
-
8
-
9
- class TestModelCRUD:
10
- def create_test_dicts(self):
11
- names = ["test1", "test2", "test3", "test4"]
12
- uuids = [
13
- "d150bcca-bb64-451d-8455-dff862254b95",
14
- "31acdd2d-0c41-428c-b68b-1b133da9e97b",
15
- "b4a3dfdc-b917-44c4-9a29-613dcf734244",
16
- "2618d9c4-cd61-440f-aebb-7e5761709f3b",
17
- ]
18
- paths = [
19
- "./data/my_random_forest_model.pkl",
20
- "s3://bucket/model.pkl",
21
- "sql://database/schema/linear_regression_model.joblib",
22
- "https://url.com/bert_base_uncased.pt",
23
- ]
24
- kind = ["model", "model", "model", "model"]
25
-
26
- dicts = []
27
- for i in range(len(names)):
28
- dicts.append({"name": names[i], "uuid": uuids[i], "path": paths[i], "kind": kind[i]})
29
-
30
- return dicts
31
-
32
- def test_create_delete(self):
33
- dicts = self.create_test_dicts()
34
- p = dh.get_or_create_project("test", local=True)
35
- # Create and delete models
36
- for i in dicts:
37
- d = dh.new_model(p.name, **i)
38
- dh.delete_model(d.key)
39
- d = dh.new_model(p.name, **i)
40
- dh.delete_model(d.name, project=p.name, entity_id=d.id)
41
- d = p.new_model(**i)
42
- p.delete_model(d.key)
43
- assert dh.list_models(p.name) == []
44
- dh.delete_project("test", local=True, clean_context=True)
45
-
46
- def test_list(self):
47
- dicts = self.create_test_dicts()
48
- p = dh.get_or_create_project("test", local=True)
49
-
50
- assert dh.list_models(p.name) == []
51
-
52
- for i in dicts:
53
- dh.new_model(p.name, **i)
54
-
55
- # List models
56
- l_obj = dh.list_models(p.name)
57
- assert isinstance(l_obj, list)
58
- assert len(l_obj) == 4
59
- for i in l_obj:
60
- assert isinstance(i, Model)
61
-
62
- # delete listed objects
63
- for obj in l_obj:
64
- dh.delete_model(obj.key)
65
-
66
- assert len(dh.list_models(p.name)) == 0
67
-
68
- dh.delete_project("test", clean_context=True, local=True)
69
-
70
- def test_get(self):
71
- dicts = self.create_test_dicts()
72
- p = dh.get_or_create_project("test", local=True)
73
-
74
- for i in dicts:
75
- o1 = dh.new_model(p.name, **i)
76
- assert isinstance(o1, Model)
77
-
78
- # Get by name and id
79
- o2 = dh.get_model(o1.name, project=p.name, entity_id=o1.id)
80
- assert isinstance(o2, Model)
81
- assert o1.id == o2.id
82
-
83
- # Get by key
84
- o3 = dh.get_model(o1.key)
85
- assert isinstance(o3, Model)
86
- assert o1.id == o3.id
87
-
88
- # delete listed objects
89
- l_obj = dh.list_models(p.name)
90
- for obj in l_obj:
91
- dh.delete_model(obj.key)
92
-
93
- assert len(dh.list_models(p.name)) == 0
94
-
95
- dh.delete_project("test", clean_context=True, local=True)
@@ -1,65 +0,0 @@
1
- import digitalhub
2
-
3
- METHODS = [
4
- "new_artifact",
5
- "new_function",
6
- "new_project",
7
- "new_run",
8
- "new_secret",
9
- "new_task",
10
- "new_workflow",
11
- "new_dataitem",
12
- "new_model",
13
- "get_artifact",
14
- "get_function",
15
- "get_project",
16
- "get_run",
17
- "get_secret",
18
- "get_task",
19
- "get_workflow",
20
- "get_dataitem",
21
- "get_model",
22
- "import_artifact",
23
- "import_function",
24
- "import_project",
25
- "import_run",
26
- "import_secret",
27
- "import_task",
28
- "import_workflow",
29
- "import_dataitem",
30
- "import_model",
31
- "list_artifacts",
32
- "list_functions",
33
- "list_runs",
34
- "list_secrets",
35
- "list_tasks",
36
- "list_workflows",
37
- "list_dataitems",
38
- "list_models",
39
- "update_artifact",
40
- "update_function",
41
- "update_project",
42
- "update_run",
43
- "update_secret",
44
- "update_task",
45
- "update_workflow",
46
- "update_dataitem",
47
- "update_model",
48
- "delete_artifact",
49
- "delete_function",
50
- "delete_project",
51
- "delete_run",
52
- "delete_secret",
53
- "delete_task",
54
- "delete_workflow",
55
- "delete_dataitem",
56
- "delete_model",
57
- "set_dhcore_env",
58
- "load_project",
59
- "get_or_create_project",
60
- ]
61
-
62
-
63
- def test_imports():
64
- for i in METHODS:
65
- assert hasattr(digitalhub, i)
@@ -1,55 +0,0 @@
1
- import os
2
- import pytest
3
- from glob import glob
4
- from pathlib import Path
5
- import json
6
- from jsonschema import validate
7
- from digitalhub.factory.factory import factory
8
-
9
- entities_path = "test/local/instances/entities"
10
- schemas_path = "test/local/instances/schemas"
11
-
12
- # Build dict: kind -> path to schema file
13
- schemas = {}
14
- for path_to_schema in glob(f"{schemas_path}/**/*.json", recursive=True):
15
- kind = Path(path_to_schema).stem
16
- schemas[kind] = path_to_schema
17
-
18
- # Build dict: name of file to validate -> full path to file
19
- entity_paths = {}
20
- for path_to_file in glob(f"{entities_path}/**/*.json", recursive=True):
21
- file_name = os.path.basename(path_to_file)
22
-
23
- # If a file in a nested directory causes a name collision, use its full path as name
24
- if file_name in entity_paths:
25
- file_name = path_to_file
26
-
27
- entity_paths[file_name] = path_to_file
28
-
29
- # Build object from JSON file using factory
30
- def build_obj(entity_file_path):
31
- with open(entity_file_path) as f:
32
- entity = json.load(f)
33
-
34
- kind = entity["kind"]
35
- spec = entity["spec"]
36
-
37
- built = factory.build_spec(kind, **spec)
38
- return built.to_dict(), kind
39
-
40
- # Validate built object against its kind's schema
41
- def is_valid(built, kind):
42
- with open(schemas[kind]) as schema_file:
43
- schema = json.load(schema_file)
44
-
45
- validate(instance=built, schema=schema)
46
- return True
47
-
48
- # Tests that each JSON file contained in the specified path can successfully be
49
- # used to generate an object through the factory, and that each generated object,
50
- # when exported to dict, validates (through jsonschema) against its kind's schema.
51
- class TestValidate:
52
- @pytest.mark.parametrize('file_name', list(entity_paths.keys()))
53
- def test_validate(self, file_name):
54
- built, kind = build_obj(f"{entity_paths[file_name]}")
55
- assert is_valid(built, kind)
@@ -1,109 +0,0 @@
1
- from copy import deepcopy
2
-
3
- import dotenv
4
-
5
- import digitalhub
6
- from digitalhub.entities.function._base.entity import Function
7
-
8
- dotenv.load_dotenv()
9
-
10
-
11
- def add_param(kwargs) -> dict:
12
- if kwargs["kind"] == "dbt":
13
- kwargs["source"] = {"code": "test"}
14
-
15
- if kwargs["kind"] == "mlrun":
16
- kwargs["source"] = {"code": "test"}
17
-
18
- if kwargs["kind"] == "container":
19
- kwargs["image"] = "test"
20
-
21
- if kwargs["kind"] == "nefertem":
22
- pass
23
-
24
- return kwargs
25
-
26
-
27
- names = ["test1", "test2", "test3", "test4"]
28
- uuids = [
29
- "12a01efa-o44f-4991-b153-9a3c358b7bb0",
30
- "8e367f52-25bb-4df1-b9c9-a58045b377a0",
31
- "1678f9ab-a2e0-48ff-870a-2384o3fa1334",
32
- "adb746dd-4e81-4ff8-82de-4916624o17dc",
33
- ]
34
- kind = ["mlrun", "dbt", "container", "nefertem"]
35
-
36
- dicts = []
37
- for i in range(len(names)):
38
- dicts.append({"name": names[i], "uuid": uuids[i], "kind": kind[i]})
39
-
40
-
41
- digitalhub.delete_project("test")
42
-
43
- p = digitalhub.get_or_create_project("test")
44
-
45
- # Create and delete functions
46
- for i in dicts:
47
- i = add_param(i)
48
- d = digitalhub.new_function(p.name, **i)
49
- digitalhub.delete_function(p.name, entity_id=d.id)
50
- d = digitalhub.new_function(p.name, **i)
51
- digitalhub.delete_function(p.name, entity_name=d.name)
52
- d = p.new_function(**i)
53
- p.delete_function(entity_id=d.id)
54
-
55
- # Create multiple functions
56
- for i in dicts:
57
- i = add_param(i)
58
- digitalhub.new_function(p.name, **i)
59
-
60
- c = deepcopy(i)
61
- c.pop("uuid")
62
- digitalhub.new_function(p.name, **c)
63
- digitalhub.new_function(p.name, **c)
64
- digitalhub.new_function(p.name, **c)
65
- digitalhub.new_function(p.name, **c)
66
-
67
-
68
- # List functions
69
- l_obj = digitalhub.list_functions(p.name)
70
- assert isinstance(l_obj, list)
71
- assert len(l_obj) == 4
72
- for i in l_obj:
73
- assert isinstance(i, dict)
74
-
75
- for uuid in uuids:
76
- digitalhub.delete_function(p.name, entity_id=uuid)
77
-
78
- # Get functions test
79
- for i in dicts:
80
- i = add_param(i)
81
- o1 = digitalhub.new_function(p.name, **i)
82
- assert isinstance(o1, Function)
83
-
84
- # Get by id
85
- o2 = digitalhub.get_function(p.name, entity_id=o1.id)
86
- assert isinstance(o2, Function)
87
- assert o1.id == o2.id
88
-
89
- # Get by name
90
- o3 = digitalhub.get_function(p.name, entity_name=o1.name)
91
- assert isinstance(o3, Function)
92
- assert o1.id == o3.id
93
-
94
- # Get by name as latest
95
- c = deepcopy(i)
96
- c.pop("uuid")
97
- o4 = digitalhub.new_function(p.name, **c)
98
- o5 = digitalhub.get_function(p.name, entity_name=o1.name)
99
- assert isinstance(o5, Function)
100
- assert (o5.id != o1.id) and (o5.name == o1.name) and (o5.id == o4.id)
101
-
102
-
103
- # Delete functions, all versions
104
- for n in names:
105
- digitalhub.delete_function(p.name, entity_name=n, delete_all_versions=True)
106
- l_obj = digitalhub.list_functions(p.name)
107
- assert not l_obj
108
-
109
- digitalhub.delete_project("test")
test/test_crud_runs.py DELETED
@@ -1,86 +0,0 @@
1
- import dotenv
2
-
3
- import digitalhub
4
- from digitalhub.entities.run._base.entity import Run
5
-
6
- dotenv.load_dotenv()
7
-
8
-
9
- def add_param(kwargs) -> dict:
10
- if kwargs["kind"] == "mlrun+run":
11
- kwargs["task"] = t1._get_task_string()
12
-
13
- if kwargs["kind"] == "dbt+run":
14
- kwargs["task"] = t2._get_task_string()
15
-
16
- if kwargs["kind"] == "container+run":
17
- kwargs["task"] = t3._get_task_string()
18
-
19
- if kwargs["kind"] == "nefertem+run":
20
- kwargs["task"] = t4._get_task_string()
21
-
22
- kwargs["local_execution"] = True
23
-
24
- return kwargs
25
-
26
-
27
- names = ["test1", "test2", "test3", "test4"]
28
- uuids = [
29
- "12a01efa-o44f-4991-b153-9a3c358b7bb0",
30
- "8e367f52-25bb-4df1-b9c9-a58045b377a0",
31
- "1678f9ab-a2e0-48ff-870a-2384o3fa1334",
32
- "adb746dd-4e81-4ff8-82de-4916624o17dc",
33
- ]
34
- kind = ["mlrun+run", "dbt+run", "container+run", "nefertem+run"]
35
-
36
- dicts = []
37
- for i in range(len(names)):
38
- dicts.append({"name": names[i], "uuid": uuids[i], "kind": kind[i]})
39
-
40
- digitalhub.delete_project("test")
41
-
42
- p = digitalhub.get_or_create_project("test")
43
-
44
- f1 = p.new_function(name="t1", kind="mlrun", source={"code": "test"})
45
- t1 = f1.new_task(kind="mlrun+job")
46
- f2 = p.new_function(name="t2", kind="dbt", source={"code": "test"})
47
- t2 = f2.new_task(kind="dbt+transform")
48
- f3 = p.new_function(name="t3", kind="container", image="test")
49
- t3 = f3.new_task(kind="container+job")
50
- f4 = p.new_function(name="t4", kind="nefertem")
51
- t4 = f4.new_task(kind="nefertem+infer", framework="test")
52
-
53
-
54
- # Create and delete runs
55
- for i in dicts:
56
- i = add_param(i)
57
- d = digitalhub.new_run(p.name, **i)
58
- digitalhub.delete_run(p.name, entity_id=d.id)
59
-
60
- # Create multiple runs
61
- for i in dicts:
62
- i = add_param(i)
63
- digitalhub.new_run(p.name, **i)
64
-
65
- # List runs
66
- l_obj = digitalhub.list_runs(p.name)
67
- assert isinstance(l_obj, list)
68
- assert len(l_obj) == 4
69
- for i in l_obj:
70
- assert isinstance(i, dict)
71
-
72
- for uuid in uuids:
73
- digitalhub.delete_run(p.name, entity_id=uuid)
74
-
75
- # Get runs test
76
- for i in dicts:
77
- i = add_param(i)
78
- o1 = digitalhub.new_run(p.name, **i)
79
- assert isinstance(o1, Run)
80
-
81
- # Get by id
82
- o2 = digitalhub.get_run(p.name, entity_id=o1.id)
83
- assert isinstance(o2, Run)
84
- assert o1.id == o2.id
85
-
86
- digitalhub.delete_project("test")
test/test_crud_tasks.py DELETED
@@ -1,81 +0,0 @@
1
- import dotenv
2
-
3
- import digitalhub
4
- from digitalhub.entities.task._base.entity import Task
5
-
6
- dotenv.load_dotenv()
7
-
8
-
9
- def add_param(kwargs) -> dict:
10
- if kwargs["kind"] == "mlrun+job":
11
- kwargs["function"] = f1._get_function_string()
12
-
13
- if kwargs["kind"] == "dbt+transform":
14
- kwargs["function"] = f2._get_function_string()
15
-
16
- if kwargs["kind"] == "container+job":
17
- kwargs["function"] = f3._get_function_string()
18
-
19
- if kwargs["kind"] == "nefertem+infer":
20
- kwargs["function"] = f4._get_function_string()
21
- kwargs["framework"] = "test"
22
-
23
- return kwargs
24
-
25
-
26
- names = ["test1", "test2", "test3", "test4"]
27
- uuids = [
28
- "12a01efa-o44f-4991-b153-9a3c358b7bb0",
29
- "8e367f52-25bb-4df1-b9c9-a58045b377a0",
30
- "1678f9ab-a2e0-48ff-870a-2384o3fa1334",
31
- "adb746dd-4e81-4ff8-82de-4916624o17dc",
32
- ]
33
- kind = ["mlrun+job", "dbt+transform", "container+job", "nefertem+infer"]
34
-
35
- dicts = []
36
- for i in range(len(names)):
37
- dicts.append({"name": names[i], "uuid": uuids[i], "kind": kind[i]})
38
-
39
- digitalhub.delete_project("test")
40
-
41
- p = digitalhub.get_or_create_project("test")
42
-
43
- f1 = p.new_function(name="t1", kind="mlrun", source={"code": "test"})
44
- f2 = p.new_function(name="t2", kind="dbt", source={"code": "test"})
45
- f3 = p.new_function(name="t3", kind="container", image="test")
46
- f4 = p.new_function(name="t4", kind="nefertem")
47
-
48
-
49
- # Create and delete tasks
50
- for i in dicts:
51
- i = add_param(i)
52
- d = digitalhub.new_task(p.name, **i)
53
- digitalhub.delete_task(p.name, entity_id=d.id)
54
-
55
- # Create multiple tasks
56
- for i in dicts:
57
- i = add_param(i)
58
- digitalhub.new_task(p.name, **i)
59
-
60
- # List tasks
61
- l_obj = digitalhub.list_tasks(p.name)
62
- assert isinstance(l_obj, list)
63
- assert len(l_obj) == 4
64
- for i in l_obj:
65
- assert isinstance(i, dict)
66
-
67
- for uuid in uuids:
68
- digitalhub.delete_task(p.name, entity_id=uuid)
69
-
70
- # Get tasks test
71
- for i in dicts:
72
- i = add_param(i)
73
- o1 = digitalhub.new_task(p.name, **i)
74
- assert isinstance(o1, Task)
75
-
76
- # Get by id
77
- o2 = digitalhub.get_task(p.name, entity_id=o1.id)
78
- assert isinstance(o2, Task)
79
- assert o1.id == o2.id
80
-
81
- digitalhub.delete_project("test")
test/testkfp.py DELETED
@@ -1,37 +0,0 @@
1
- import os
2
- import time
3
-
4
- import digitalhub as dh
5
-
6
-
7
- def _is_finished(state: str):
8
- return state == "COMPLETED" or state == "ERROR" or state == "STOPPED"
9
-
10
-
11
- os.environ["DHCORE_ENDPOINT"] = "http://localhost:8080/"
12
- os.environ["DHCORE_WORKFLOW_IMAGE"] = "localhost:5000/dhcoreworkflow9:latest"
13
- os.environ["KFP_ENDPOINT"] = "http://localhost:8888/"
14
-
15
- # Get or create project
16
- project = dh.get_or_create_project("project-kfp2")
17
-
18
- url = "https://gist.githubusercontent.com/kevin336/acbb2271e66c10a5b73aacf82ca82784/raw/e38afe62e088394d61ed30884dd50a6826eee0a8/employees.csv"
19
-
20
- di = project.new_dataitem(name="employees", kind="table", path=url)
21
-
22
- function = project.get_function(entity_name="test-kfp")
23
- if function is None:
24
- function = project.new_function(
25
- name="test-kfp",
26
- kind="kfp",
27
- source={"source": "test/testkfp_pipeline.py"},
28
- handler="myhandler",
29
- )
30
-
31
-
32
- run = function.run("pipeline", parameters={"ref": di.key}, local_execution=True)
33
- while not _is_finished(run.status.state):
34
- time.sleep(5)
35
- run = run.refresh()
36
-
37
- print(str(run.status.to_dict()))
test/testkfp_pipeline.py DELETED
@@ -1,22 +0,0 @@
1
- from digitalhub_runtime_kfp.dsl import pipeline_context
2
-
3
-
4
- def myhandler(ref):
5
- with pipeline_context() as pc:
6
- s1 = pc.step(
7
- name="step1",
8
- function="function-dbt",
9
- action="transform",
10
- inputs=[{"employees": ref}],
11
- outputs=[{"output_table": "e60"}],
12
- )
13
-
14
- s2 = pc.step(
15
- name="step2",
16
- function="function-dbt",
17
- action="transform",
18
- inputs=[{"employees": s1.outputs["e60"]}],
19
- outputs=[{"output_table": "employees_pipeline"}],
20
- )
21
-
22
- return s2.outputs["employees_pipeline"]