digitalhub 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (121) hide show
  1. digitalhub/__init__.py +2 -3
  2. digitalhub/client/_base/api_builder.py +1 -1
  3. digitalhub/client/_base/client.py +25 -2
  4. digitalhub/client/_base/params_builder.py +16 -0
  5. digitalhub/client/dhcore/api_builder.py +10 -4
  6. digitalhub/client/dhcore/client.py +30 -398
  7. digitalhub/client/dhcore/configurator.py +361 -0
  8. digitalhub/client/dhcore/error_parser.py +107 -0
  9. digitalhub/client/dhcore/models.py +13 -23
  10. digitalhub/client/dhcore/params_builder.py +178 -0
  11. digitalhub/client/dhcore/utils.py +4 -44
  12. digitalhub/client/local/api_builder.py +13 -18
  13. digitalhub/client/local/client.py +18 -2
  14. digitalhub/client/local/enums.py +11 -0
  15. digitalhub/client/local/params_builder.py +116 -0
  16. digitalhub/configurator/api.py +31 -0
  17. digitalhub/configurator/configurator.py +195 -0
  18. digitalhub/configurator/credentials_store.py +65 -0
  19. digitalhub/configurator/ini_module.py +74 -0
  20. digitalhub/entities/_base/_base/entity.py +2 -2
  21. digitalhub/entities/_base/context/entity.py +4 -4
  22. digitalhub/entities/_base/entity/builder.py +5 -5
  23. digitalhub/entities/_base/executable/entity.py +2 -2
  24. digitalhub/entities/_base/material/entity.py +12 -12
  25. digitalhub/entities/_base/material/status.py +1 -1
  26. digitalhub/entities/_base/material/utils.py +2 -2
  27. digitalhub/entities/_base/unversioned/entity.py +2 -2
  28. digitalhub/entities/_base/versioned/entity.py +2 -2
  29. digitalhub/entities/_commons/enums.py +2 -0
  30. digitalhub/entities/_commons/metrics.py +164 -0
  31. digitalhub/entities/_commons/types.py +5 -0
  32. digitalhub/entities/_commons/utils.py +2 -2
  33. digitalhub/entities/_processors/base.py +527 -0
  34. digitalhub/entities/{_operations/processor.py → _processors/context.py} +212 -837
  35. digitalhub/entities/_processors/utils.py +158 -0
  36. digitalhub/entities/artifact/artifact/spec.py +3 -1
  37. digitalhub/entities/artifact/crud.py +13 -12
  38. digitalhub/entities/artifact/utils.py +1 -1
  39. digitalhub/entities/builders.py +6 -18
  40. digitalhub/entities/dataitem/_base/entity.py +0 -41
  41. digitalhub/entities/dataitem/crud.py +27 -15
  42. digitalhub/entities/dataitem/table/entity.py +49 -35
  43. digitalhub/entities/dataitem/table/models.py +4 -3
  44. digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +46 -54
  45. digitalhub/entities/dataitem/utils.py +58 -10
  46. digitalhub/entities/function/crud.py +9 -9
  47. digitalhub/entities/model/_base/entity.py +120 -0
  48. digitalhub/entities/model/_base/spec.py +6 -17
  49. digitalhub/entities/model/_base/status.py +10 -0
  50. digitalhub/entities/model/crud.py +13 -12
  51. digitalhub/entities/model/huggingface/spec.py +9 -4
  52. digitalhub/entities/model/mlflow/models.py +2 -2
  53. digitalhub/entities/model/mlflow/spec.py +7 -7
  54. digitalhub/entities/model/mlflow/utils.py +44 -5
  55. digitalhub/entities/project/_base/entity.py +317 -9
  56. digitalhub/entities/project/_base/spec.py +8 -6
  57. digitalhub/entities/project/crud.py +12 -11
  58. digitalhub/entities/run/_base/entity.py +103 -6
  59. digitalhub/entities/run/_base/spec.py +4 -2
  60. digitalhub/entities/run/_base/status.py +12 -0
  61. digitalhub/entities/run/crud.py +8 -8
  62. digitalhub/entities/secret/_base/entity.py +3 -3
  63. digitalhub/entities/secret/_base/spec.py +4 -2
  64. digitalhub/entities/secret/crud.py +11 -9
  65. digitalhub/entities/task/_base/entity.py +4 -4
  66. digitalhub/entities/task/_base/models.py +51 -40
  67. digitalhub/entities/task/_base/spec.py +2 -0
  68. digitalhub/entities/task/_base/utils.py +2 -2
  69. digitalhub/entities/task/crud.py +12 -8
  70. digitalhub/entities/workflow/crud.py +9 -9
  71. digitalhub/factory/utils.py +9 -9
  72. digitalhub/readers/{_base → data/_base}/builder.py +1 -1
  73. digitalhub/readers/{_base → data/_base}/reader.py +16 -4
  74. digitalhub/readers/{api.py → data/api.py} +2 -2
  75. digitalhub/readers/{factory.py → data/factory.py} +3 -3
  76. digitalhub/readers/{pandas → data/pandas}/builder.py +2 -2
  77. digitalhub/readers/{pandas → data/pandas}/reader.py +110 -30
  78. digitalhub/readers/query/__init__.py +0 -0
  79. digitalhub/stores/_base/store.py +59 -69
  80. digitalhub/stores/api.py +8 -33
  81. digitalhub/stores/builder.py +44 -161
  82. digitalhub/stores/local/store.py +106 -89
  83. digitalhub/stores/remote/store.py +86 -11
  84. digitalhub/stores/s3/configurator.py +108 -0
  85. digitalhub/stores/s3/enums.py +17 -0
  86. digitalhub/stores/s3/models.py +21 -0
  87. digitalhub/stores/s3/store.py +154 -70
  88. digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
  89. digitalhub/stores/sql/configurator.py +88 -0
  90. digitalhub/stores/sql/enums.py +16 -0
  91. digitalhub/stores/sql/models.py +24 -0
  92. digitalhub/stores/sql/store.py +106 -85
  93. digitalhub/{readers/_commons → utils}/enums.py +5 -1
  94. digitalhub/utils/exceptions.py +6 -0
  95. digitalhub/utils/file_utils.py +8 -7
  96. digitalhub/utils/generic_utils.py +28 -15
  97. digitalhub/utils/git_utils.py +16 -9
  98. digitalhub/utils/types.py +5 -0
  99. digitalhub/utils/uri_utils.py +2 -2
  100. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/METADATA +25 -31
  101. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/RECORD +108 -99
  102. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/WHEEL +1 -2
  103. digitalhub/client/dhcore/env.py +0 -23
  104. digitalhub/entities/_base/project/entity.py +0 -341
  105. digitalhub-0.9.1.dist-info/top_level.txt +0 -2
  106. test/local/CRUD/test_artifacts.py +0 -96
  107. test/local/CRUD/test_dataitems.py +0 -96
  108. test/local/CRUD/test_models.py +0 -95
  109. test/local/imports/test_imports.py +0 -66
  110. test/local/instances/test_validate.py +0 -55
  111. test/test_crud_functions.py +0 -109
  112. test/test_crud_runs.py +0 -86
  113. test/test_crud_tasks.py +0 -81
  114. test/testkfp.py +0 -37
  115. test/testkfp_pipeline.py +0 -22
  116. /digitalhub/{entities/_base/project → configurator}/__init__.py +0 -0
  117. /digitalhub/entities/{_operations → _processors}/__init__.py +0 -0
  118. /digitalhub/readers/{_base → data}/__init__.py +0 -0
  119. /digitalhub/readers/{_commons → data/_base}/__init__.py +0 -0
  120. /digitalhub/readers/{pandas → data/pandas}/__init__.py +0 -0
  121. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info/licenses}/LICENSE.txt +0 -0
@@ -0,0 +1,158 @@
1
+ from __future__ import annotations
2
+
3
+ import typing
4
+
5
+ from digitalhub.client.api import get_client
6
+ from digitalhub.context.api import get_context
7
+ from digitalhub.entities._commons.enums import ApiCategories, BackendOperations, EntityTypes
8
+ from digitalhub.entities._commons.utils import get_project_from_key, parse_entity_key
9
+ from digitalhub.factory.api import build_entity_from_dict
10
+ from digitalhub.utils.exceptions import ContextError, EntityError, EntityNotExistsError
11
+
12
+ if typing.TYPE_CHECKING:
13
+ from digitalhub.client._base.client import Client
14
+ from digitalhub.context.context import Context
15
+
16
+
17
+ def parse_identifier(
18
+ identifier: str,
19
+ project: str | None = None,
20
+ entity_type: str | None = None,
21
+ entity_kind: str | None = None,
22
+ entity_id: str | None = None,
23
+ ) -> tuple[str, str, str | None, str | None, str | None]:
24
+ """
25
+ Parse entity identifier.
26
+
27
+ Parameters
28
+ ----------
29
+ identifier : str
30
+ Entity key (store://...) or entity name.
31
+ project : str
32
+ Project name.
33
+ entity_type : str
34
+ Entity type.
35
+ entity_id : str
36
+ Entity ID.
37
+
38
+ Returns
39
+ -------
40
+ tuple[str, str, str | None, str | None, str | None]
41
+ Project name, entity type, entity kind, entity name, entity ID.
42
+ """
43
+ if not identifier.startswith("store://"):
44
+ if project is None or entity_type is None:
45
+ raise ValueError("Project and entity type must be specified.")
46
+ return project, entity_type, entity_kind, identifier, entity_id
47
+ return parse_entity_key(identifier)
48
+
49
+
50
+ def get_context_from_identifier(
51
+ identifier: str,
52
+ project: str | None = None,
53
+ ) -> Context:
54
+ """
55
+ Get context from project.
56
+
57
+ Parameters
58
+ ----------
59
+ identifier : str
60
+ Entity key (store://...) or entity name.
61
+ project : str
62
+ Project name.
63
+
64
+ Returns
65
+ -------
66
+ Context
67
+ Context.
68
+ """
69
+ if not identifier.startswith("store://"):
70
+ if project is None:
71
+ raise EntityError("Specify project if you do not specify entity key.")
72
+ else:
73
+ project = get_project_from_key(identifier)
74
+
75
+ return get_context_from_project(project)
76
+
77
+
78
+ def get_context_from_project(
79
+ project: str,
80
+ ) -> Context:
81
+ """
82
+ Check if the given project is in the context.
83
+ Otherwise try to get the project from remote.
84
+ Finally return the client.
85
+
86
+ Parameters
87
+ ----------
88
+ project : str
89
+ Project name.
90
+
91
+ Returns
92
+ -------
93
+ Context
94
+ Context.
95
+ """
96
+ try:
97
+ return get_context(project)
98
+ except ContextError:
99
+ return get_context_from_remote(project)
100
+
101
+
102
+ def get_context_from_remote(
103
+ project: str,
104
+ ) -> Client:
105
+ """
106
+ Get context from remote.
107
+
108
+ Parameters
109
+ ----------
110
+ project : str
111
+ Project name.
112
+
113
+ Returns
114
+ -------
115
+ Client
116
+ Client.
117
+ """
118
+ try:
119
+ client = get_client()
120
+ obj = _read_base_entity(client, EntityTypes.PROJECT.value, project)
121
+ build_entity_from_dict(obj)
122
+ return get_context(project)
123
+ except EntityNotExistsError:
124
+ raise ContextError(f"Project '{project}' not found.")
125
+
126
+
127
+ def _read_base_entity(
128
+ client: Client,
129
+ entity_type: str,
130
+ entity_name: str,
131
+ **kwargs,
132
+ ) -> dict:
133
+ """
134
+ Read object from backend.
135
+
136
+ Parameters
137
+ ----------
138
+ client : Client
139
+ Client instance.
140
+ entity_type : str
141
+ Entity type.
142
+ entity_name : str
143
+ Entity name.
144
+ **kwargs : dict
145
+ Parameters to pass to the API call.
146
+
147
+ Returns
148
+ -------
149
+ dict
150
+ Object instance.
151
+ """
152
+ api = client.build_api(
153
+ ApiCategories.BASE.value,
154
+ BackendOperations.READ.value,
155
+ entity_type=entity_type,
156
+ entity_name=entity_name,
157
+ )
158
+ return client.read_object(api, **kwargs)
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import Optional
4
+
3
5
  from digitalhub.entities.artifact._base.spec import ArtifactSpec, ArtifactValidator
4
6
 
5
7
 
@@ -23,5 +25,5 @@ class ArtifactValidatorArtifact(ArtifactValidator):
23
25
  ArtifactValidatorArtifact validator.
24
26
  """
25
27
 
26
- src_path: str = None
28
+ src_path: Optional[str] = None
27
29
  """Source path of the artifact."""
@@ -3,9 +3,10 @@ from __future__ import annotations
3
3
  import typing
4
4
 
5
5
  from digitalhub.entities._commons.enums import EntityTypes
6
- from digitalhub.entities._operations.processor import processor
6
+ from digitalhub.entities._processors.context import context_processor
7
7
  from digitalhub.entities.artifact._base.entity import Artifact
8
8
  from digitalhub.entities.artifact.utils import eval_source, process_kwargs
9
+ from digitalhub.utils.types import SourcesOrListOfSources
9
10
 
10
11
  if typing.TYPE_CHECKING:
11
12
  from digitalhub.entities.artifact._base.entity import Artifact
@@ -61,7 +62,7 @@ def new_artifact(
61
62
  >>> kind="artifact",
62
63
  >>> path="s3://my-bucket/my-key")
63
64
  """
64
- return processor.create_context_entity(
65
+ return context_processor.create_context_entity(
65
66
  project=project,
66
67
  name=name,
67
68
  kind=kind,
@@ -78,7 +79,7 @@ def log_artifact(
78
79
  project: str,
79
80
  name: str,
80
81
  kind: str,
81
- source: list[str] | str,
82
+ source: SourcesOrListOfSources,
82
83
  path: str | None = None,
83
84
  **kwargs,
84
85
  ) -> Artifact:
@@ -93,7 +94,7 @@ def log_artifact(
93
94
  Object name.
94
95
  kind : str
95
96
  Kind the object.
96
- source : str
97
+ source : SourcesOrListOfSources
97
98
  Artifact location on local path.
98
99
  path : str
99
100
  Destination path of the artifact. If not provided, it's generated.
@@ -114,7 +115,7 @@ def log_artifact(
114
115
  """
115
116
  eval_source(source)
116
117
  kwargs = process_kwargs(project, name, source=source, path=path, **kwargs)
117
- return processor.log_material_entity(
118
+ return context_processor.log_material_entity(
118
119
  source=source,
119
120
  project=project,
120
121
  name=name,
@@ -158,7 +159,7 @@ def get_artifact(
158
159
  >>> project="my-project",
159
160
  >>> entity_id="my-artifact-id")
160
161
  """
161
- return processor.read_material_entity(
162
+ return context_processor.read_context_entity(
162
163
  identifier=identifier,
163
164
  entity_type=ENTITY_TYPE,
164
165
  project=project,
@@ -198,7 +199,7 @@ def get_artifact_versions(
198
199
  >>> obj = get_artifact_versions("my-artifact-name"
199
200
  >>> project="my-project")
200
201
  """
201
- return processor.read_material_entity_versions(
202
+ return context_processor.read_context_entity_versions(
202
203
  identifier=identifier,
203
204
  entity_type=ENTITY_TYPE,
204
205
  project=project,
@@ -226,7 +227,7 @@ def list_artifacts(project: str, **kwargs) -> list[Artifact]:
226
227
  --------
227
228
  >>> objs = list_artifacts(project="my-project")
228
229
  """
229
- return processor.list_material_entities(
230
+ return context_processor.list_context_entities(
230
231
  project=project,
231
232
  entity_type=ENTITY_TYPE,
232
233
  **kwargs,
@@ -251,7 +252,7 @@ def import_artifact(file: str) -> Artifact:
251
252
  --------
252
253
  >>> obj = import_artifact("my-artifact.yaml")
253
254
  """
254
- return processor.import_context_entity(file)
255
+ return context_processor.import_context_entity(file)
255
256
 
256
257
 
257
258
  def load_artifact(file: str) -> Artifact:
@@ -272,7 +273,7 @@ def load_artifact(file: str) -> Artifact:
272
273
  --------
273
274
  >>> obj = load_artifact("my-artifact.yaml")
274
275
  """
275
- return processor.load_context_entity(file)
276
+ return context_processor.load_context_entity(file)
276
277
 
277
278
 
278
279
  def update_artifact(entity: Artifact) -> Artifact:
@@ -293,7 +294,7 @@ def update_artifact(entity: Artifact) -> Artifact:
293
294
  --------
294
295
  >>> obj = update_artifact(obj)
295
296
  """
296
- return processor.update_context_entity(
297
+ return context_processor.update_context_entity(
297
298
  project=entity.project,
298
299
  entity_type=entity.ENTITY_TYPE,
299
300
  entity_id=entity.id,
@@ -339,7 +340,7 @@ def delete_artifact(
339
340
  >>> project="my-project",
340
341
  >>> delete_all_versions=True)
341
342
  """
342
- return processor.delete_context_entity(
343
+ return context_processor.delete_context_entity(
343
344
  identifier=identifier,
344
345
  entity_type=ENTITY_TYPE,
345
346
  project=project,
@@ -56,7 +56,7 @@ def process_kwargs(
56
56
  if path is None:
57
57
  uuid = build_uuid()
58
58
  kwargs["uuid"] = uuid
59
- kwargs["path"] = build_log_path_from_source(project, EntityTypes.MODEL.value, name, uuid, source)
59
+ kwargs["path"] = build_log_path_from_source(project, EntityTypes.ARTIFACT.value, name, uuid, source)
60
60
  else:
61
61
  kwargs["path"] = path
62
62
  return kwargs
@@ -7,7 +7,7 @@ from digitalhub.entities.model.mlflow.builder import ModelModelBuilder
7
7
  from digitalhub.entities.project._base.builder import ProjectProjectBuilder
8
8
  from digitalhub.entities.secret._base.builder import SecretSecretBuilder
9
9
 
10
- entity_builders = (
10
+ entity_builders: tuple = (
11
11
  (ProjectProjectBuilder.ENTITY_KIND, ProjectProjectBuilder),
12
12
  (SecretSecretBuilder.ENTITY_KIND, SecretSecretBuilder),
13
13
  (ArtifactArtifactBuilder.ENTITY_KIND, ArtifactArtifactBuilder),
@@ -17,17 +17,14 @@ entity_builders = (
17
17
  )
18
18
 
19
19
  ##############################
20
- # Potential uninstalled entities
20
+ # Add custom entities here
21
21
  ##############################
22
22
 
23
23
 
24
24
  try:
25
25
  from digitalhub.entities.dataitem.iceberg.builder import DataitemIcebergBuilder
26
26
 
27
- entity_builders = (
28
- *entity_builders,
29
- (DataitemIcebergBuilder.ENTITY_KIND, DataitemIcebergBuilder),
30
- )
27
+ entity_builders += ((DataitemIcebergBuilder.ENTITY_KIND, DataitemIcebergBuilder),)
31
28
  except ImportError:
32
29
  ...
33
30
 
@@ -35,29 +32,20 @@ except ImportError:
35
32
  try:
36
33
  from digitalhub.entities.model.model.builder import ModelMlflowBuilder
37
34
 
38
- entity_builders = (
39
- *entity_builders,
40
- (ModelMlflowBuilder.ENTITY_KIND, ModelMlflowBuilder),
41
- )
35
+ entity_builders += ((ModelMlflowBuilder.ENTITY_KIND, ModelMlflowBuilder),)
42
36
  except ImportError:
43
37
  ...
44
38
 
45
39
  try:
46
40
  from digitalhub.entities.model.sklearn.builder import ModelSklearnBuilder
47
41
 
48
- entity_builders = (
49
- *entity_builders,
50
- (ModelSklearnBuilder.ENTITY_KIND, ModelSklearnBuilder),
51
- )
42
+ entity_builders += ((ModelSklearnBuilder.ENTITY_KIND, ModelSklearnBuilder),)
52
43
  except ImportError:
53
44
  ...
54
45
 
55
46
  try:
56
47
  from digitalhub.entities.model.huggingface.builder import ModelHuggingfaceBuilder
57
48
 
58
- entity_builders = (
59
- *entity_builders,
60
- (ModelHuggingfaceBuilder.ENTITY_KIND, ModelHuggingfaceBuilder),
61
- )
49
+ entity_builders += ((ModelHuggingfaceBuilder.ENTITY_KIND, ModelHuggingfaceBuilder),)
62
50
  except ImportError:
63
51
  ...
@@ -1,13 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing
4
- from pathlib import Path
5
4
 
6
5
  from digitalhub.entities._base.material.entity import MaterialEntity
7
6
  from digitalhub.entities._commons.enums import EntityTypes
8
- from digitalhub.readers._commons.enums import Extensions
9
- from digitalhub.utils.exceptions import EntityError
10
- from digitalhub.utils.uri_utils import has_sql_scheme
11
7
 
12
8
  if typing.TYPE_CHECKING:
13
9
  from digitalhub.entities._base.entity.metadata import Metadata
@@ -36,40 +32,3 @@ class Dataitem(MaterialEntity):
36
32
  super().__init__(project, name, uuid, kind, metadata, spec, status, user)
37
33
  self.spec: DataitemSpec
38
34
  self.status: DataitemStatus
39
-
40
- ##############################
41
- # Helper methods
42
- ##############################
43
-
44
- @staticmethod
45
- def _get_extension(path: str, file_format: str | None = None) -> str:
46
- """
47
- Get extension of path.
48
-
49
- Parameters
50
- ----------
51
- path : str
52
- Path to get extension from.
53
- file_format : str
54
- File format.
55
-
56
- Returns
57
- -------
58
- str
59
- File extension.
60
-
61
- Raises
62
- ------
63
- EntityError
64
- If file format is not supported.
65
- """
66
- if file_format is not None:
67
- return file_format
68
-
69
- if has_sql_scheme(path):
70
- return Extensions.PARQUET.value
71
-
72
- ext = Path(path).suffix[1:]
73
- if ext is not None:
74
- return ext
75
- raise EntityError("Unknown file format. Only csv and parquet are supported.")
@@ -4,8 +4,9 @@ import typing
4
4
  from typing import Any
5
5
 
6
6
  from digitalhub.entities._commons.enums import EntityTypes
7
- from digitalhub.entities._operations.processor import processor
8
- from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_source, post_process, process_kwargs
7
+ from digitalhub.entities._processors.context import context_processor
8
+ from digitalhub.entities.dataitem.utils import clean_tmp_path, eval_data, eval_source, post_process, process_kwargs
9
+ from digitalhub.utils.types import SourcesOrListOfSources
9
10
 
10
11
  if typing.TYPE_CHECKING:
11
12
  from digitalhub.entities.dataitem._base.entity import Dataitem
@@ -61,7 +62,7 @@ def new_dataitem(
61
62
  >>> kind="dataitem",
62
63
  >>> path="s3://my-bucket/my-key")
63
64
  """
64
- return processor.create_context_entity(
65
+ return context_processor.create_context_entity(
65
66
  project=project,
66
67
  name=name,
67
68
  kind=kind,
@@ -78,9 +79,11 @@ def log_dataitem(
78
79
  project: str,
79
80
  name: str,
80
81
  kind: str,
81
- source: list[str] | str | None = None,
82
+ source: SourcesOrListOfSources | None = None,
82
83
  data: Any | None = None,
83
84
  path: str | None = None,
85
+ file_format: str | None = None,
86
+ engine: str | None = "pandas",
84
87
  **kwargs,
85
88
  ) -> Dataitem:
86
89
  """
@@ -94,12 +97,16 @@ def log_dataitem(
94
97
  Object name.
95
98
  kind : str
96
99
  Kind the object.
97
- source : str
100
+ source : SourcesOrListOfSources
98
101
  Dataitem location on local path.
99
102
  data : Any
100
103
  Dataframe to log. Alternative to source.
101
104
  path : str
102
105
  Destination path of the dataitem. If not provided, it's generated.
106
+ file_format : str
107
+ Extension of the file.
108
+ engine : str
109
+ Dataframe engine (pandas, polars, etc.).
103
110
  **kwargs : dict
104
111
  New dataitem spec parameters.
105
112
 
@@ -115,17 +122,22 @@ def log_dataitem(
115
122
  >>> kind="table",
116
123
  >>> data=df)
117
124
  """
125
+ cleanup = False
126
+ if data is not None:
127
+ cleanup = True
128
+
118
129
  source = eval_source(source, data, kind, name, project)
130
+ data = eval_data(project, kind, source, data, file_format, engine)
119
131
  kwargs = process_kwargs(project, name, kind, source=source, data=data, path=path, **kwargs)
120
- obj = processor.log_material_entity(
132
+ obj = context_processor.log_material_entity(
121
133
  source=source,
122
134
  project=project,
123
135
  name=name,
124
136
  kind=kind,
125
137
  **kwargs,
126
138
  )
127
- if data is not None:
128
- obj = post_process(obj, data)
139
+ obj = post_process(obj, data)
140
+ if cleanup:
129
141
  clean_tmp_path(source)
130
142
  return obj
131
143
 
@@ -165,7 +177,7 @@ def get_dataitem(
165
177
  >>> project="my-project",
166
178
  >>> entity_id="my-dataitem-id")
167
179
  """
168
- return processor.read_material_entity(
180
+ return context_processor.read_context_entity(
169
181
  identifier=identifier,
170
182
  entity_type=ENTITY_TYPE,
171
183
  project=project,
@@ -205,7 +217,7 @@ def get_dataitem_versions(
205
217
  >>> objs = get_dataitem_versions("my-dataitem-name",
206
218
  >>> project="my-project")
207
219
  """
208
- return processor.read_material_entity_versions(
220
+ return context_processor.read_context_entity_versions(
209
221
  identifier=identifier,
210
222
  entity_type=ENTITY_TYPE,
211
223
  project=project,
@@ -233,7 +245,7 @@ def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
233
245
  --------
234
246
  >>> objs = list_dataitems(project="my-project")
235
247
  """
236
- return processor.list_material_entities(
248
+ return context_processor.list_context_entities(
237
249
  project=project,
238
250
  entity_type=ENTITY_TYPE,
239
251
  **kwargs,
@@ -258,7 +270,7 @@ def import_dataitem(file: str) -> Dataitem:
258
270
  --------
259
271
  >>> obj = import_dataitem("my-dataitem.yaml")
260
272
  """
261
- return processor.import_context_entity(file)
273
+ return context_processor.import_context_entity(file)
262
274
 
263
275
 
264
276
  def load_dataitem(file: str) -> Dataitem:
@@ -279,7 +291,7 @@ def load_dataitem(file: str) -> Dataitem:
279
291
  --------
280
292
  >>> obj = load_dataitem("my-dataitem.yaml")
281
293
  """
282
- return processor.load_context_entity(file)
294
+ return context_processor.load_context_entity(file)
283
295
 
284
296
 
285
297
  def update_dataitem(entity: Dataitem) -> Dataitem:
@@ -300,7 +312,7 @@ def update_dataitem(entity: Dataitem) -> Dataitem:
300
312
  --------
301
313
  >>> obj = update_dataitem(obj)
302
314
  """
303
- return processor.update_context_entity(
315
+ return context_processor.update_context_entity(
304
316
  project=entity.project,
305
317
  entity_type=entity.ENTITY_TYPE,
306
318
  entity_id=entity.id,
@@ -346,7 +358,7 @@ def delete_dataitem(
346
358
  >>> project="my-project",
347
359
  >>> delete_all_versions=True)
348
360
  """
349
- return processor.delete_context_entity(
361
+ return context_processor.delete_context_entity(
350
362
  identifier=identifier,
351
363
  entity_type=ENTITY_TYPE,
352
364
  project=project,
@@ -7,7 +7,7 @@ from typing import Any
7
7
 
8
8
  from digitalhub.entities.dataitem._base.entity import Dataitem
9
9
  from digitalhub.stores.api import get_store
10
- from digitalhub.utils.uri_utils import has_local_scheme
10
+ from digitalhub.utils.uri_utils import has_sql_scheme
11
11
 
12
12
  if typing.TYPE_CHECKING:
13
13
  from digitalhub.entities._base.entity.metadata import Metadata
@@ -36,19 +36,39 @@ class DataitemTable(Dataitem):
36
36
  self.spec: DataitemSpecTable
37
37
  self.status: DataitemStatusTable
38
38
 
39
+ self._query: str | None = None
40
+
41
+ def query(self, query: str) -> DataitemTable:
42
+ """
43
+ Set query to execute.
44
+
45
+ Parameters
46
+ ----------
47
+ query : str
48
+ Query to execute.
49
+
50
+ Returns
51
+ -------
52
+ DataitemTable
53
+ Self object.
54
+ """
55
+ # to remove in future
56
+ if not has_sql_scheme(self.spec.path):
57
+ raise ValueError(
58
+ f"Dataitem path is not a SQL scheme: {self.spec.path}",
59
+ " Query can be made only on a SQL scheme.",
60
+ )
61
+ self._query = query
62
+ return self
63
+
39
64
  def as_df(
40
65
  self,
41
66
  file_format: str | None = None,
42
- engine: str | None = None,
43
- clean_tmp_path: bool = True,
67
+ engine: str | None = "pandas",
44
68
  **kwargs,
45
69
  ) -> Any:
46
70
  """
47
71
  Read dataitem file (csv or parquet) as a DataFrame from spec.path.
48
- If the dataitem is not local, it will be downloaded to a temporary
49
- folder named tmp_dir in the project context folder.
50
- If clean_tmp_path is True, the temporary folder will be deleted after the
51
- method is executed.
52
72
  It's possible to pass additional arguments to the this function. These
53
73
  keyword arguments will be passed to the DataFrame reader function such as
54
74
  pandas's read_csv or read_parquet.
@@ -56,11 +76,10 @@ class DataitemTable(Dataitem):
56
76
  Parameters
57
77
  ----------
58
78
  file_format : str
59
- Format of the file. (Supported csv and parquet).
79
+ Format of the file to read. By default, it will be inferred from
80
+ the extension of the file.
60
81
  engine : str
61
82
  Dataframe framework, by default pandas.
62
- clean_tmp_path : bool
63
- If True, the temporary folder will be deleted.
64
83
  **kwargs : dict
65
84
  Keyword arguments passed to the read_df function.
66
85
 
@@ -69,30 +88,20 @@ class DataitemTable(Dataitem):
69
88
  Any
70
89
  DataFrame.
71
90
  """
72
- try:
73
- if has_local_scheme(self.spec.path):
74
- tmp_dir = None
75
- data_path = self.spec.path
76
- else:
77
- tmp_dir = self._context().root / "tmp_data"
78
- tmp_dir.mkdir(parents=True, exist_ok=True)
79
- data_path = self.download(destination=str(tmp_dir), overwrite=True)
80
-
81
- if Path(data_path).is_dir():
82
- files = [str(i) for i in Path(data_path).rglob("*") if i.is_file()]
83
- checker = files[0]
84
- else:
85
- checker = data_path
86
-
87
- extension = self._get_extension(checker, file_format)
88
- return get_store("").read_df(data_path, extension, engine, **kwargs)
89
-
90
- except Exception as e:
91
- raise e
92
-
93
- finally:
94
- # Delete tmp folder
95
- self._clean_tmp_path(tmp_dir, clean_tmp_path)
91
+ if self._query is not None:
92
+ df = get_store(self.project, self.spec.path).query(
93
+ self._query,
94
+ self.spec.path,
95
+ engine,
96
+ )
97
+ self._query = None
98
+ return df
99
+ return get_store(self.project, self.spec.path).read_df(
100
+ self.spec.path,
101
+ file_format,
102
+ engine,
103
+ **kwargs,
104
+ )
96
105
 
97
106
  def write_df(
98
107
  self,
@@ -119,7 +128,12 @@ class DataitemTable(Dataitem):
119
128
  str
120
129
  Path to the written dataframe.
121
130
  """
122
- return get_store(self.spec.path).write_df(df, self.spec.path, extension=extension, **kwargs)
131
+ return get_store(self.project, self.spec.path).write_df(
132
+ df,
133
+ self.spec.path,
134
+ extension=extension,
135
+ **kwargs,
136
+ )
123
137
 
124
138
  @staticmethod
125
139
  def _clean_tmp_path(pth: Path | None, clean: bool) -> None: