digitalhub 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +2 -3
- digitalhub/client/_base/api_builder.py +1 -1
- digitalhub/client/_base/client.py +25 -2
- digitalhub/client/_base/params_builder.py +16 -0
- digitalhub/client/dhcore/api_builder.py +10 -4
- digitalhub/client/dhcore/client.py +30 -398
- digitalhub/client/dhcore/configurator.py +361 -0
- digitalhub/client/dhcore/error_parser.py +107 -0
- digitalhub/client/dhcore/models.py +13 -23
- digitalhub/client/dhcore/params_builder.py +178 -0
- digitalhub/client/dhcore/utils.py +4 -44
- digitalhub/client/local/api_builder.py +13 -18
- digitalhub/client/local/client.py +18 -2
- digitalhub/client/local/enums.py +11 -0
- digitalhub/client/local/params_builder.py +116 -0
- digitalhub/configurator/api.py +31 -0
- digitalhub/configurator/configurator.py +195 -0
- digitalhub/configurator/credentials_store.py +65 -0
- digitalhub/configurator/ini_module.py +74 -0
- digitalhub/entities/_base/_base/entity.py +2 -2
- digitalhub/entities/_base/context/entity.py +4 -4
- digitalhub/entities/_base/entity/builder.py +5 -5
- digitalhub/entities/_base/executable/entity.py +2 -2
- digitalhub/entities/_base/material/entity.py +12 -12
- digitalhub/entities/_base/material/status.py +1 -1
- digitalhub/entities/_base/material/utils.py +2 -2
- digitalhub/entities/_base/unversioned/entity.py +2 -2
- digitalhub/entities/_base/versioned/entity.py +2 -2
- digitalhub/entities/_commons/enums.py +2 -0
- digitalhub/entities/_commons/metrics.py +164 -0
- digitalhub/entities/_commons/types.py +5 -0
- digitalhub/entities/_commons/utils.py +2 -2
- digitalhub/entities/_processors/base.py +527 -0
- digitalhub/entities/{_operations/processor.py → _processors/context.py} +212 -837
- digitalhub/entities/_processors/utils.py +158 -0
- digitalhub/entities/artifact/artifact/spec.py +3 -1
- digitalhub/entities/artifact/crud.py +13 -12
- digitalhub/entities/artifact/utils.py +1 -1
- digitalhub/entities/builders.py +6 -18
- digitalhub/entities/dataitem/_base/entity.py +0 -41
- digitalhub/entities/dataitem/crud.py +27 -15
- digitalhub/entities/dataitem/table/entity.py +49 -35
- digitalhub/entities/dataitem/table/models.py +4 -3
- digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +46 -54
- digitalhub/entities/dataitem/utils.py +58 -10
- digitalhub/entities/function/crud.py +9 -9
- digitalhub/entities/model/_base/entity.py +120 -0
- digitalhub/entities/model/_base/spec.py +6 -17
- digitalhub/entities/model/_base/status.py +10 -0
- digitalhub/entities/model/crud.py +13 -12
- digitalhub/entities/model/huggingface/spec.py +9 -4
- digitalhub/entities/model/mlflow/models.py +2 -2
- digitalhub/entities/model/mlflow/spec.py +7 -7
- digitalhub/entities/model/mlflow/utils.py +44 -5
- digitalhub/entities/project/_base/entity.py +317 -9
- digitalhub/entities/project/_base/spec.py +8 -6
- digitalhub/entities/project/crud.py +12 -11
- digitalhub/entities/run/_base/entity.py +103 -6
- digitalhub/entities/run/_base/spec.py +4 -2
- digitalhub/entities/run/_base/status.py +12 -0
- digitalhub/entities/run/crud.py +8 -8
- digitalhub/entities/secret/_base/entity.py +3 -3
- digitalhub/entities/secret/_base/spec.py +4 -2
- digitalhub/entities/secret/crud.py +11 -9
- digitalhub/entities/task/_base/entity.py +4 -4
- digitalhub/entities/task/_base/models.py +51 -40
- digitalhub/entities/task/_base/spec.py +2 -0
- digitalhub/entities/task/_base/utils.py +2 -2
- digitalhub/entities/task/crud.py +12 -8
- digitalhub/entities/workflow/crud.py +9 -9
- digitalhub/factory/utils.py +9 -9
- digitalhub/readers/{_base → data/_base}/builder.py +1 -1
- digitalhub/readers/{_base → data/_base}/reader.py +16 -4
- digitalhub/readers/{api.py → data/api.py} +2 -2
- digitalhub/readers/{factory.py → data/factory.py} +3 -3
- digitalhub/readers/{pandas → data/pandas}/builder.py +2 -2
- digitalhub/readers/{pandas → data/pandas}/reader.py +110 -30
- digitalhub/readers/query/__init__.py +0 -0
- digitalhub/stores/_base/store.py +59 -69
- digitalhub/stores/api.py +8 -33
- digitalhub/stores/builder.py +44 -161
- digitalhub/stores/local/store.py +106 -89
- digitalhub/stores/remote/store.py +86 -11
- digitalhub/stores/s3/configurator.py +108 -0
- digitalhub/stores/s3/enums.py +17 -0
- digitalhub/stores/s3/models.py +21 -0
- digitalhub/stores/s3/store.py +154 -70
- digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
- digitalhub/stores/sql/configurator.py +88 -0
- digitalhub/stores/sql/enums.py +16 -0
- digitalhub/stores/sql/models.py +24 -0
- digitalhub/stores/sql/store.py +106 -85
- digitalhub/{readers/_commons → utils}/enums.py +5 -1
- digitalhub/utils/exceptions.py +6 -0
- digitalhub/utils/file_utils.py +8 -7
- digitalhub/utils/generic_utils.py +28 -15
- digitalhub/utils/git_utils.py +16 -9
- digitalhub/utils/types.py +5 -0
- digitalhub/utils/uri_utils.py +2 -2
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/METADATA +25 -31
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/RECORD +108 -99
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info}/WHEEL +1 -2
- digitalhub/client/dhcore/env.py +0 -23
- digitalhub/entities/_base/project/entity.py +0 -341
- digitalhub-0.9.1.dist-info/top_level.txt +0 -2
- test/local/CRUD/test_artifacts.py +0 -96
- test/local/CRUD/test_dataitems.py +0 -96
- test/local/CRUD/test_models.py +0 -95
- test/local/imports/test_imports.py +0 -66
- test/local/instances/test_validate.py +0 -55
- test/test_crud_functions.py +0 -109
- test/test_crud_runs.py +0 -86
- test/test_crud_tasks.py +0 -81
- test/testkfp.py +0 -37
- test/testkfp_pipeline.py +0 -22
- /digitalhub/{entities/_base/project → configurator}/__init__.py +0 -0
- /digitalhub/entities/{_operations → _processors}/__init__.py +0 -0
- /digitalhub/readers/{_base → data}/__init__.py +0 -0
- /digitalhub/readers/{_commons → data/_base}/__init__.py +0 -0
- /digitalhub/readers/{pandas → data/pandas}/__init__.py +0 -0
- {digitalhub-0.9.1.dist-info → digitalhub-0.10.0.dist-info/licenses}/LICENSE.txt +0 -0
digitalhub/entities/task/crud.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
-
from digitalhub.entities.
|
|
6
|
+
from digitalhub.entities._processors.context import context_processor
|
|
7
7
|
from digitalhub.utils.exceptions import EntityError
|
|
8
8
|
|
|
9
9
|
if typing.TYPE_CHECKING:
|
|
@@ -19,6 +19,7 @@ def new_task(
|
|
|
19
19
|
uuid: str | None = None,
|
|
20
20
|
labels: list[str] | None = None,
|
|
21
21
|
function: str | None = None,
|
|
22
|
+
workflow: str | None = None,
|
|
22
23
|
**kwargs,
|
|
23
24
|
) -> Task:
|
|
24
25
|
"""
|
|
@@ -36,6 +37,8 @@ def new_task(
|
|
|
36
37
|
List of labels.
|
|
37
38
|
function : str
|
|
38
39
|
Name of the executable associated with the task.
|
|
40
|
+
workflow : str
|
|
41
|
+
Name of the workflow associated with the task.
|
|
39
42
|
**kwargs : dict
|
|
40
43
|
Spec keyword arguments.
|
|
41
44
|
|
|
@@ -50,12 +53,13 @@ def new_task(
|
|
|
50
53
|
>>> kind="python+job",
|
|
51
54
|
>>> function="function-string")
|
|
52
55
|
"""
|
|
53
|
-
return
|
|
56
|
+
return context_processor.create_context_entity(
|
|
54
57
|
project=project,
|
|
55
58
|
kind=kind,
|
|
56
59
|
uuid=uuid,
|
|
57
60
|
labels=labels,
|
|
58
61
|
function=function,
|
|
62
|
+
workflow=workflow,
|
|
59
63
|
**kwargs,
|
|
60
64
|
)
|
|
61
65
|
|
|
@@ -91,7 +95,7 @@ def get_task(
|
|
|
91
95
|
>>> obj = get_task("my-task-id"
|
|
92
96
|
>>> project="my-project")
|
|
93
97
|
"""
|
|
94
|
-
return
|
|
98
|
+
return context_processor.read_unversioned_entity(
|
|
95
99
|
identifier,
|
|
96
100
|
entity_type=ENTITY_TYPE,
|
|
97
101
|
project=project,
|
|
@@ -119,7 +123,7 @@ def list_tasks(project: str, **kwargs) -> list[Task]:
|
|
|
119
123
|
--------
|
|
120
124
|
>>> objs = list_tasks(project="my-project")
|
|
121
125
|
"""
|
|
122
|
-
return
|
|
126
|
+
return context_processor.list_context_entities(
|
|
123
127
|
project=project,
|
|
124
128
|
entity_type=ENTITY_TYPE,
|
|
125
129
|
**kwargs,
|
|
@@ -144,7 +148,7 @@ def import_task(file: str) -> Task:
|
|
|
144
148
|
-------
|
|
145
149
|
>>> obj = import_task("my-task.yaml")
|
|
146
150
|
"""
|
|
147
|
-
return
|
|
151
|
+
return context_processor.import_context_entity(file)
|
|
148
152
|
|
|
149
153
|
|
|
150
154
|
def load_task(file: str) -> Task:
|
|
@@ -165,7 +169,7 @@ def load_task(file: str) -> Task:
|
|
|
165
169
|
--------
|
|
166
170
|
>>> obj = load_task("my-task.yaml")
|
|
167
171
|
"""
|
|
168
|
-
return
|
|
172
|
+
return context_processor.load_context_entity(file)
|
|
169
173
|
|
|
170
174
|
|
|
171
175
|
def update_task(entity: Task) -> Task:
|
|
@@ -186,7 +190,7 @@ def update_task(entity: Task) -> Task:
|
|
|
186
190
|
--------
|
|
187
191
|
>>> obj = update_task(obj)
|
|
188
192
|
"""
|
|
189
|
-
return
|
|
193
|
+
return context_processor.update_context_entity(
|
|
190
194
|
project=entity.project,
|
|
191
195
|
entity_type=entity.ENTITY_TYPE,
|
|
192
196
|
entity_id=entity.id,
|
|
@@ -237,7 +241,7 @@ def delete_task(
|
|
|
237
241
|
"""
|
|
238
242
|
if not identifier.startswith("store://"):
|
|
239
243
|
raise EntityError("Task has no name. Use key instead.")
|
|
240
|
-
return
|
|
244
|
+
return context_processor.delete_context_entity(
|
|
241
245
|
identifier=identifier,
|
|
242
246
|
entity_type=ENTITY_TYPE,
|
|
243
247
|
project=project,
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
6
|
-
from digitalhub.entities.
|
|
6
|
+
from digitalhub.entities._processors.context import context_processor
|
|
7
7
|
|
|
8
8
|
if typing.TYPE_CHECKING:
|
|
9
9
|
from digitalhub.entities.workflow._base.entity import Workflow
|
|
@@ -55,7 +55,7 @@ def new_workflow(
|
|
|
55
55
|
>>> code_src="pipeline.py",
|
|
56
56
|
>>> handler="pipeline-handler")
|
|
57
57
|
"""
|
|
58
|
-
return
|
|
58
|
+
return context_processor.create_context_entity(
|
|
59
59
|
project=project,
|
|
60
60
|
name=name,
|
|
61
61
|
kind=kind,
|
|
@@ -102,7 +102,7 @@ def get_workflow(
|
|
|
102
102
|
>>> project="my-project",
|
|
103
103
|
>>> entity_id="my-workflow-id")
|
|
104
104
|
"""
|
|
105
|
-
return
|
|
105
|
+
return context_processor.read_context_entity(
|
|
106
106
|
identifier,
|
|
107
107
|
entity_type=ENTITY_TYPE,
|
|
108
108
|
project=project,
|
|
@@ -142,7 +142,7 @@ def get_workflow_versions(
|
|
|
142
142
|
>>> obj = get_workflow_versions("my-workflow-name"
|
|
143
143
|
>>> project="my-project")
|
|
144
144
|
"""
|
|
145
|
-
return
|
|
145
|
+
return context_processor.read_context_entity_versions(
|
|
146
146
|
identifier,
|
|
147
147
|
entity_type=ENTITY_TYPE,
|
|
148
148
|
project=project,
|
|
@@ -170,7 +170,7 @@ def list_workflows(project: str, **kwargs) -> list[Workflow]:
|
|
|
170
170
|
--------
|
|
171
171
|
>>> objs = list_workflows(project="my-project")
|
|
172
172
|
"""
|
|
173
|
-
return
|
|
173
|
+
return context_processor.list_context_entities(
|
|
174
174
|
project=project,
|
|
175
175
|
entity_type=ENTITY_TYPE,
|
|
176
176
|
**kwargs,
|
|
@@ -195,7 +195,7 @@ def import_workflow(file: str) -> Workflow:
|
|
|
195
195
|
--------
|
|
196
196
|
>>> obj = import_workflow("my-workflow.yaml")
|
|
197
197
|
"""
|
|
198
|
-
return
|
|
198
|
+
return context_processor.import_executable_entity(file)
|
|
199
199
|
|
|
200
200
|
|
|
201
201
|
def load_workflow(file: str) -> Workflow:
|
|
@@ -216,7 +216,7 @@ def load_workflow(file: str) -> Workflow:
|
|
|
216
216
|
--------
|
|
217
217
|
>>> obj = load_workflow("my-workflow.yaml")
|
|
218
218
|
"""
|
|
219
|
-
return
|
|
219
|
+
return context_processor.load_executable_entity(file)
|
|
220
220
|
|
|
221
221
|
|
|
222
222
|
def update_workflow(entity: Workflow) -> Workflow:
|
|
@@ -237,7 +237,7 @@ def update_workflow(entity: Workflow) -> Workflow:
|
|
|
237
237
|
--------
|
|
238
238
|
>>> obj = update_workflow(obj)
|
|
239
239
|
"""
|
|
240
|
-
return
|
|
240
|
+
return context_processor.update_context_entity(
|
|
241
241
|
project=entity.project,
|
|
242
242
|
entity_type=entity.ENTITY_TYPE,
|
|
243
243
|
entity_id=entity.id,
|
|
@@ -286,7 +286,7 @@ def delete_workflow(
|
|
|
286
286
|
>>> project="my-project",
|
|
287
287
|
>>> delete_all_versions=True)
|
|
288
288
|
"""
|
|
289
|
-
return
|
|
289
|
+
return context_processor.delete_context_entity(
|
|
290
290
|
identifier=identifier,
|
|
291
291
|
entity_type=ENTITY_TYPE,
|
|
292
292
|
project=project,
|
digitalhub/factory/utils.py
CHANGED
|
@@ -25,10 +25,10 @@ def import_module(package: str) -> ModuleType:
|
|
|
25
25
|
"""
|
|
26
26
|
try:
|
|
27
27
|
return importlib.import_module(package)
|
|
28
|
-
except ModuleNotFoundError:
|
|
29
|
-
raise ModuleNotFoundError(f"Package {package} not found.")
|
|
28
|
+
except ModuleNotFoundError as e:
|
|
29
|
+
raise ModuleNotFoundError(f"Package {package} not found.") from e
|
|
30
30
|
except Exception as e:
|
|
31
|
-
raise e
|
|
31
|
+
raise RuntimeError(f"An error occurred while importing {package}.") from e
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def list_runtimes() -> list[str]:
|
|
@@ -37,18 +37,18 @@ def list_runtimes() -> list[str]:
|
|
|
37
37
|
|
|
38
38
|
Returns
|
|
39
39
|
-------
|
|
40
|
-
list
|
|
40
|
+
list[str]
|
|
41
41
|
List of installed runtimes names.
|
|
42
42
|
"""
|
|
43
43
|
pattern = r"digitalhub_runtime_.*"
|
|
44
|
-
runtimes = []
|
|
44
|
+
runtimes: list[str] = []
|
|
45
45
|
try:
|
|
46
46
|
for _, name, _ in pkgutil.iter_modules():
|
|
47
47
|
if re.match(pattern, name):
|
|
48
48
|
runtimes.append(name)
|
|
49
49
|
return runtimes
|
|
50
|
-
except Exception:
|
|
51
|
-
raise RuntimeError("Error listing installed runtimes.")
|
|
50
|
+
except Exception as e:
|
|
51
|
+
raise RuntimeError("Error listing installed runtimes.") from e
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def register_runtimes_entities() -> None:
|
|
@@ -86,5 +86,5 @@ def register_entities() -> None:
|
|
|
86
86
|
for entity_builder_tuple in entities_builders_list:
|
|
87
87
|
kind, builder = entity_builder_tuple
|
|
88
88
|
factory.add_entity_builder(kind, builder)
|
|
89
|
-
except Exception:
|
|
90
|
-
raise
|
|
89
|
+
except Exception as e:
|
|
90
|
+
raise RuntimeError("Error registering entities.") from e
|
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
|
6
6
|
from digitalhub.utils.exceptions import BuilderError
|
|
7
7
|
|
|
8
8
|
if typing.TYPE_CHECKING:
|
|
9
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
9
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ReaderBuilder:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import IO, Any
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class DataframeReader:
|
|
@@ -13,11 +13,16 @@ class DataframeReader:
|
|
|
13
13
|
# Read methods
|
|
14
14
|
##############################
|
|
15
15
|
|
|
16
|
-
@staticmethod
|
|
17
16
|
@abstractmethod
|
|
18
|
-
def read_df(
|
|
17
|
+
def read_df(self, path_or_buffer: str | IO, extension: str, **kwargs) -> Any:
|
|
18
|
+
"""
|
|
19
|
+
Read DataFrame from path or buffer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def read_table(self, *args, **kwargs) -> Any:
|
|
19
24
|
"""
|
|
20
|
-
Read
|
|
25
|
+
Read table from db.
|
|
21
26
|
"""
|
|
22
27
|
|
|
23
28
|
##############################
|
|
@@ -68,3 +73,10 @@ class DataframeReader:
|
|
|
68
73
|
"""
|
|
69
74
|
Get preview.
|
|
70
75
|
"""
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
@abstractmethod
|
|
79
|
+
def concat_dfs(dfs: list[Any]) -> Any:
|
|
80
|
+
"""
|
|
81
|
+
Concatenate a list of DataFrames.
|
|
82
|
+
"""
|
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from digitalhub.readers.factory import factory
|
|
6
|
+
from digitalhub.readers.data.factory import factory
|
|
7
7
|
from digitalhub.utils.exceptions import ReaderError
|
|
8
8
|
|
|
9
9
|
if typing.TYPE_CHECKING:
|
|
10
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
10
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_reader_by_engine(engine: str | None = None) -> DataframeReader:
|
|
@@ -5,8 +5,8 @@ import typing
|
|
|
5
5
|
from digitalhub.utils.exceptions import BuilderError
|
|
6
6
|
|
|
7
7
|
if typing.TYPE_CHECKING:
|
|
8
|
-
from digitalhub.readers._base.builder import ReaderBuilder
|
|
9
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
8
|
+
from digitalhub.readers.data._base.builder import ReaderBuilder
|
|
9
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ReaderFactory:
|
|
@@ -126,7 +126,7 @@ class ReaderFactory:
|
|
|
126
126
|
factory = ReaderFactory()
|
|
127
127
|
|
|
128
128
|
try:
|
|
129
|
-
from digitalhub.readers.pandas.builder import ReaderBuilderPandas
|
|
129
|
+
from digitalhub.readers.data.pandas.builder import ReaderBuilderPandas
|
|
130
130
|
|
|
131
131
|
factory.add_builder(
|
|
132
132
|
ReaderBuilderPandas.ENGINE,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from digitalhub.readers._base.builder import ReaderBuilder
|
|
4
|
-
from digitalhub.readers.pandas.reader import DataframeReaderPandas
|
|
3
|
+
from digitalhub.readers.data._base.builder import ReaderBuilder
|
|
4
|
+
from digitalhub.readers.data.pandas.reader import DataframeReaderPandas
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class ReaderBuilderPandas(ReaderBuilder):
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from io import BytesIO
|
|
4
|
-
from typing import Any
|
|
5
|
+
from typing import IO, Any
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import pandas as pd
|
|
8
9
|
from pandas.errors import ParserError
|
|
9
10
|
|
|
10
|
-
from digitalhub.
|
|
11
|
-
from digitalhub.readers.
|
|
12
|
-
from digitalhub.utils.
|
|
11
|
+
from digitalhub.entities.dataitem.table.utils import check_preview_size, finalize_preview, prepare_data, prepare_preview
|
|
12
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
13
|
+
from digitalhub.utils.enums import FileExtensions
|
|
13
14
|
from digitalhub.utils.exceptions import ReaderError
|
|
15
|
+
from digitalhub.utils.generic_utils import CustomJsonEncoder
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class DataframeReaderPandas(DataframeReader):
|
|
@@ -22,14 +24,14 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
22
24
|
# Read methods
|
|
23
25
|
##############################
|
|
24
26
|
|
|
25
|
-
def read_df(self,
|
|
27
|
+
def read_df(self, path_or_buffer: str | IO, extension: str, **kwargs) -> pd.DataFrame:
|
|
26
28
|
"""
|
|
27
|
-
Read DataFrame from path.
|
|
29
|
+
Read DataFrame from path or buffer.
|
|
28
30
|
|
|
29
31
|
Parameters
|
|
30
32
|
----------
|
|
31
|
-
|
|
32
|
-
Path
|
|
33
|
+
path_or_buffer : str | IO
|
|
34
|
+
Path or buffer to read DataFrame from.
|
|
33
35
|
extension : str
|
|
34
36
|
Extension of the file.
|
|
35
37
|
**kwargs : dict
|
|
@@ -40,22 +42,41 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
40
42
|
pd.DataFrame
|
|
41
43
|
Pandas DataFrame.
|
|
42
44
|
"""
|
|
43
|
-
if extension ==
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
if extension == FileExtensions.CSV.value:
|
|
46
|
+
return pd.read_csv(path_or_buffer, **kwargs)
|
|
47
|
+
if extension == FileExtensions.PARQUET.value:
|
|
48
|
+
return pd.read_parquet(path_or_buffer, **kwargs)
|
|
49
|
+
if extension == FileExtensions.JSON.value:
|
|
50
|
+
return pd.read_json(path_or_buffer, **kwargs)
|
|
51
|
+
if extension in (FileExtensions.EXCEL.value, FileExtensions.EXCEL_OLD.value):
|
|
52
|
+
return pd.read_excel(path_or_buffer, **kwargs)
|
|
53
|
+
if extension in (FileExtensions.TXT.value, FileExtensions.FILE.value):
|
|
48
54
|
try:
|
|
49
|
-
return self.read_df(
|
|
55
|
+
return self.read_df(path_or_buffer, FileExtensions.CSV.value, **kwargs)
|
|
50
56
|
except ParserError:
|
|
51
|
-
raise ReaderError(f"Unable to read from {
|
|
57
|
+
raise ReaderError(f"Unable to read from {path_or_buffer}.")
|
|
52
58
|
else:
|
|
53
59
|
raise ReaderError(f"Unsupported extension '{extension}' for reading.")
|
|
54
60
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
61
|
+
def read_table(self, sql: str, engine: Any, **kwargs) -> pd.DataFrame:
|
|
62
|
+
"""
|
|
63
|
+
Read table from db.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
sql : str
|
|
68
|
+
SQL query.
|
|
69
|
+
engine : Any
|
|
70
|
+
SQL Engine.
|
|
71
|
+
**kwargs
|
|
72
|
+
Keyword arguments.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
pd.DataFrame
|
|
77
|
+
Pandas DataFrame.
|
|
78
|
+
"""
|
|
79
|
+
return pd.read_sql(sql=sql, con=engine, **kwargs)
|
|
59
80
|
|
|
60
81
|
##############################
|
|
61
82
|
# Write methods
|
|
@@ -84,9 +105,9 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
84
105
|
-------
|
|
85
106
|
None
|
|
86
107
|
"""
|
|
87
|
-
if extension ==
|
|
108
|
+
if extension == FileExtensions.CSV.value:
|
|
88
109
|
return self.write_csv(df, dst, **kwargs)
|
|
89
|
-
|
|
110
|
+
if extension == FileExtensions.PARQUET.value:
|
|
90
111
|
return self.write_parquet(df, dst, **kwargs)
|
|
91
112
|
raise ReaderError(f"Unsupported extension '{extension}' for writing.")
|
|
92
113
|
|
|
@@ -131,7 +152,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
131
152
|
df.to_parquet(dst, index=False, **kwargs)
|
|
132
153
|
|
|
133
154
|
@staticmethod
|
|
134
|
-
def write_table(df: pd.DataFrame, table: str, engine: Any, schema: str, **kwargs) -> None:
|
|
155
|
+
def write_table(df: pd.DataFrame, table: str, engine: Any, schema: str | None = None, **kwargs) -> None:
|
|
135
156
|
"""
|
|
136
157
|
Write DataFrame as table.
|
|
137
158
|
|
|
@@ -142,7 +163,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
142
163
|
table : str
|
|
143
164
|
The destination table.
|
|
144
165
|
engine : Any
|
|
145
|
-
|
|
166
|
+
SQL Engine.
|
|
146
167
|
schema : str
|
|
147
168
|
The destination schema.
|
|
148
169
|
**kwargs : dict
|
|
@@ -158,6 +179,23 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
158
179
|
# Utils
|
|
159
180
|
##############################
|
|
160
181
|
|
|
182
|
+
@staticmethod
|
|
183
|
+
def concat_dfs(dfs: list[pd.DataFrame]) -> pd.DataFrame:
|
|
184
|
+
"""
|
|
185
|
+
Concatenate a list of DataFrames.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
dfs : list[pd.DataFrame]
|
|
190
|
+
The DataFrames to concatenate.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
pd.DataFrame
|
|
195
|
+
The concatenated DataFrame.
|
|
196
|
+
"""
|
|
197
|
+
return pd.concat(dfs, ignore_index=True)
|
|
198
|
+
|
|
161
199
|
@staticmethod
|
|
162
200
|
def get_schema(df: pd.DataFrame) -> Any:
|
|
163
201
|
"""
|
|
@@ -196,7 +234,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
196
234
|
return schema
|
|
197
235
|
|
|
198
236
|
@staticmethod
|
|
199
|
-
def get_preview(df: pd.DataFrame) ->
|
|
237
|
+
def get_preview(df: pd.DataFrame) -> dict:
|
|
200
238
|
"""
|
|
201
239
|
Get preview.
|
|
202
240
|
|
|
@@ -211,9 +249,51 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
211
249
|
The preview.
|
|
212
250
|
"""
|
|
213
251
|
columns = [str(col) for col, _ in df.dtypes.items()]
|
|
214
|
-
head = df.head(10)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
preview =
|
|
218
|
-
|
|
219
|
-
|
|
252
|
+
head = df.head(10).replace({np.nan: None})
|
|
253
|
+
data = head.values.tolist()
|
|
254
|
+
prepared_data = prepare_data(data)
|
|
255
|
+
preview = prepare_preview(columns, prepared_data)
|
|
256
|
+
finalizes = finalize_preview(preview, df.shape[0])
|
|
257
|
+
serialized = _serialize_deserialize_preview(finalizes)
|
|
258
|
+
return check_preview_size(serialized)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class PandasJsonEncoder(CustomJsonEncoder):
|
|
262
|
+
"""
|
|
263
|
+
JSON pd.Timestamp to ISO format serializer.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
def default(self, obj: Any) -> Any:
|
|
267
|
+
"""
|
|
268
|
+
Pandas datetime to ISO format serializer.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
obj : Any
|
|
273
|
+
The object to serialize.
|
|
274
|
+
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
Any
|
|
278
|
+
The serialized object.
|
|
279
|
+
"""
|
|
280
|
+
if isinstance(obj, pd.Timestamp):
|
|
281
|
+
return obj.isoformat()
|
|
282
|
+
return super().default(obj)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _serialize_deserialize_preview(preview: dict) -> dict:
|
|
286
|
+
"""
|
|
287
|
+
Serialize and deserialize preview.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
preview : dict
|
|
292
|
+
The preview.
|
|
293
|
+
|
|
294
|
+
Returns
|
|
295
|
+
-------
|
|
296
|
+
dict
|
|
297
|
+
The serialized preview.
|
|
298
|
+
"""
|
|
299
|
+
return json.loads(json.dumps(preview, cls=PandasJsonEncoder))
|
|
File without changes
|