digitalhub 0.10.0b0__py3-none-any.whl → 0.10.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/client/dhcore/configurator.py +2 -2
- digitalhub/entities/artifact/utils.py +1 -1
- digitalhub/entities/dataitem/_base/entity.py +0 -41
- digitalhub/entities/dataitem/table/entity.py +49 -35
- digitalhub/entities/dataitem/utils.py +1 -1
- digitalhub/readers/{_base → data/_base}/builder.py +1 -1
- digitalhub/readers/{_base → data/_base}/reader.py +16 -4
- digitalhub/readers/{api.py → data/api.py} +2 -2
- digitalhub/readers/{factory.py → data/factory.py} +3 -3
- digitalhub/readers/data/pandas/__init__.py +0 -0
- digitalhub/readers/{pandas → data/pandas}/builder.py +2 -2
- digitalhub/readers/{pandas → data/pandas}/reader.py +56 -24
- digitalhub/readers/query/__init__.py +0 -0
- digitalhub/stores/_base/store.py +59 -22
- digitalhub/stores/local/store.py +101 -71
- digitalhub/stores/remote/store.py +81 -0
- digitalhub/stores/s3/configurator.py +3 -2
- digitalhub/stores/s3/enums.py +1 -1
- digitalhub/stores/s3/store.py +144 -41
- digitalhub/stores/sql/store.py +90 -30
- {digitalhub-0.10.0b0.dist-info → digitalhub-0.10.0b2.dist-info}/METADATA +5 -1
- {digitalhub-0.10.0b0.dist-info → digitalhub-0.10.0b2.dist-info}/RECORD +28 -26
- /digitalhub/readers/{_base → data}/__init__.py +0 -0
- /digitalhub/readers/{pandas → data/_base}/__init__.py +0 -0
- /digitalhub/readers/{pandas → data/pandas}/enums.py +0 -0
- {digitalhub-0.10.0b0.dist-info → digitalhub-0.10.0b2.dist-info}/LICENSE.txt +0 -0
- {digitalhub-0.10.0b0.dist-info → digitalhub-0.10.0b2.dist-info}/WHEEL +0 -0
- {digitalhub-0.10.0b0.dist-info → digitalhub-0.10.0b2.dist-info}/top_level.txt +0 -0
|
@@ -56,7 +56,7 @@ def process_kwargs(
|
|
|
56
56
|
if path is None:
|
|
57
57
|
uuid = build_uuid()
|
|
58
58
|
kwargs["uuid"] = uuid
|
|
59
|
-
kwargs["path"] = build_log_path_from_source(project, EntityTypes.
|
|
59
|
+
kwargs["path"] = build_log_path_from_source(project, EntityTypes.ARTIFACT.value, name, uuid, source)
|
|
60
60
|
else:
|
|
61
61
|
kwargs["path"] = path
|
|
62
62
|
return kwargs
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
from digitalhub.entities._base.material.entity import MaterialEntity
|
|
7
6
|
from digitalhub.entities._commons.enums import EntityTypes
|
|
8
|
-
from digitalhub.entities.dataitem.utils import DEFAULT_EXTENSION
|
|
9
|
-
from digitalhub.utils.exceptions import EntityError
|
|
10
|
-
from digitalhub.utils.uri_utils import has_sql_scheme
|
|
11
7
|
|
|
12
8
|
if typing.TYPE_CHECKING:
|
|
13
9
|
from digitalhub.entities._base.entity.metadata import Metadata
|
|
@@ -36,40 +32,3 @@ class Dataitem(MaterialEntity):
|
|
|
36
32
|
super().__init__(project, name, uuid, kind, metadata, spec, status, user)
|
|
37
33
|
self.spec: DataitemSpec
|
|
38
34
|
self.status: DataitemStatus
|
|
39
|
-
|
|
40
|
-
##############################
|
|
41
|
-
# Helper methods
|
|
42
|
-
##############################
|
|
43
|
-
|
|
44
|
-
@staticmethod
|
|
45
|
-
def _get_extension(path: str, file_format: str | None = None) -> str:
|
|
46
|
-
"""
|
|
47
|
-
Get extension of path.
|
|
48
|
-
|
|
49
|
-
Parameters
|
|
50
|
-
----------
|
|
51
|
-
path : str
|
|
52
|
-
Path to get extension from.
|
|
53
|
-
file_format : str
|
|
54
|
-
File format.
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
str
|
|
59
|
-
File extension.
|
|
60
|
-
|
|
61
|
-
Raises
|
|
62
|
-
------
|
|
63
|
-
EntityError
|
|
64
|
-
If file format is not supported.
|
|
65
|
-
"""
|
|
66
|
-
if file_format is not None:
|
|
67
|
-
return file_format
|
|
68
|
-
|
|
69
|
-
if has_sql_scheme(path):
|
|
70
|
-
return DEFAULT_EXTENSION
|
|
71
|
-
|
|
72
|
-
ext = Path(path).suffix[1:]
|
|
73
|
-
if ext is not None:
|
|
74
|
-
return ext
|
|
75
|
-
raise EntityError("Unknown file format. Only csv and parquet are supported.")
|
|
@@ -7,7 +7,7 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
from digitalhub.entities.dataitem._base.entity import Dataitem
|
|
9
9
|
from digitalhub.stores.api import get_store
|
|
10
|
-
from digitalhub.utils.uri_utils import
|
|
10
|
+
from digitalhub.utils.uri_utils import has_sql_scheme
|
|
11
11
|
|
|
12
12
|
if typing.TYPE_CHECKING:
|
|
13
13
|
from digitalhub.entities._base.entity.metadata import Metadata
|
|
@@ -36,19 +36,39 @@ class DataitemTable(Dataitem):
|
|
|
36
36
|
self.spec: DataitemSpecTable
|
|
37
37
|
self.status: DataitemStatusTable
|
|
38
38
|
|
|
39
|
+
self._query: str | None = None
|
|
40
|
+
|
|
41
|
+
def query(self, query: str) -> DataitemTable:
|
|
42
|
+
"""
|
|
43
|
+
Set query to execute.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
query : str
|
|
48
|
+
Query to execute.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
DataitemTable
|
|
53
|
+
Self object.
|
|
54
|
+
"""
|
|
55
|
+
# to remove in future
|
|
56
|
+
if not has_sql_scheme(self.spec.path):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Dataitem path is not a SQL scheme: {self.spec.path}",
|
|
59
|
+
" Query can be made only on a SQL scheme.",
|
|
60
|
+
)
|
|
61
|
+
self._query = query
|
|
62
|
+
return self
|
|
63
|
+
|
|
39
64
|
def as_df(
|
|
40
65
|
self,
|
|
41
66
|
file_format: str | None = None,
|
|
42
|
-
engine: str | None =
|
|
43
|
-
clean_tmp_path: bool = True,
|
|
67
|
+
engine: str | None = "pandas",
|
|
44
68
|
**kwargs,
|
|
45
69
|
) -> Any:
|
|
46
70
|
"""
|
|
47
71
|
Read dataitem file (csv or parquet) as a DataFrame from spec.path.
|
|
48
|
-
If the dataitem is not local, it will be downloaded to a temporary
|
|
49
|
-
folder named tmp_dir in the project context folder.
|
|
50
|
-
If clean_tmp_path is True, the temporary folder will be deleted after the
|
|
51
|
-
method is executed.
|
|
52
72
|
It's possible to pass additional arguments to the this function. These
|
|
53
73
|
keyword arguments will be passed to the DataFrame reader function such as
|
|
54
74
|
pandas's read_csv or read_parquet.
|
|
@@ -56,11 +76,10 @@ class DataitemTable(Dataitem):
|
|
|
56
76
|
Parameters
|
|
57
77
|
----------
|
|
58
78
|
file_format : str
|
|
59
|
-
Format of the file.
|
|
79
|
+
Format of the file to read. By default, it will be inferred from
|
|
80
|
+
the extension of the file.
|
|
60
81
|
engine : str
|
|
61
82
|
Dataframe framework, by default pandas.
|
|
62
|
-
clean_tmp_path : bool
|
|
63
|
-
If True, the temporary folder will be deleted.
|
|
64
83
|
**kwargs : dict
|
|
65
84
|
Keyword arguments passed to the read_df function.
|
|
66
85
|
|
|
@@ -69,30 +88,20 @@ class DataitemTable(Dataitem):
|
|
|
69
88
|
Any
|
|
70
89
|
DataFrame.
|
|
71
90
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
extension = self._get_extension(checker, file_format)
|
|
88
|
-
return get_store(self.project, "").read_df(data_path, extension, engine, **kwargs)
|
|
89
|
-
|
|
90
|
-
except Exception as e:
|
|
91
|
-
raise e
|
|
92
|
-
|
|
93
|
-
finally:
|
|
94
|
-
# Delete tmp folder
|
|
95
|
-
self._clean_tmp_path(tmp_dir, clean_tmp_path)
|
|
91
|
+
if self._query is not None:
|
|
92
|
+
df = get_store(self.project, self.spec.path).query(
|
|
93
|
+
self._query,
|
|
94
|
+
self.spec.path,
|
|
95
|
+
engine,
|
|
96
|
+
)
|
|
97
|
+
self._query = None
|
|
98
|
+
return df
|
|
99
|
+
return get_store(self.project, self.spec.path).read_df(
|
|
100
|
+
self.spec.path,
|
|
101
|
+
file_format,
|
|
102
|
+
engine,
|
|
103
|
+
**kwargs,
|
|
104
|
+
)
|
|
96
105
|
|
|
97
106
|
def write_df(
|
|
98
107
|
self,
|
|
@@ -119,7 +128,12 @@ class DataitemTable(Dataitem):
|
|
|
119
128
|
str
|
|
120
129
|
Path to the written dataframe.
|
|
121
130
|
"""
|
|
122
|
-
return get_store(self.project, self.spec.path).write_df(
|
|
131
|
+
return get_store(self.project, self.spec.path).write_df(
|
|
132
|
+
df,
|
|
133
|
+
self.spec.path,
|
|
134
|
+
extension=extension,
|
|
135
|
+
**kwargs,
|
|
136
|
+
)
|
|
123
137
|
|
|
124
138
|
@staticmethod
|
|
125
139
|
def _clean_tmp_path(pth: Path | None, clean: bool) -> None:
|
|
@@ -8,7 +8,7 @@ from digitalhub.context.api import get_context
|
|
|
8
8
|
from digitalhub.entities._base.entity._constructors.uuid import build_uuid
|
|
9
9
|
from digitalhub.entities._base.material.utils import build_log_path_from_source, eval_local_source
|
|
10
10
|
from digitalhub.entities._commons.enums import EntityKinds, EntityTypes
|
|
11
|
-
from digitalhub.readers.api import get_reader_by_object
|
|
11
|
+
from digitalhub.readers.data.api import get_reader_by_object
|
|
12
12
|
from digitalhub.utils.generic_utils import slugify_string
|
|
13
13
|
|
|
14
14
|
if typing.TYPE_CHECKING:
|
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
|
6
6
|
from digitalhub.utils.exceptions import BuilderError
|
|
7
7
|
|
|
8
8
|
if typing.TYPE_CHECKING:
|
|
9
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
9
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ReaderBuilder:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import IO, Any
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class DataframeReader:
|
|
@@ -13,11 +13,16 @@ class DataframeReader:
|
|
|
13
13
|
# Read methods
|
|
14
14
|
##############################
|
|
15
15
|
|
|
16
|
-
@staticmethod
|
|
17
16
|
@abstractmethod
|
|
18
|
-
def read_df(
|
|
17
|
+
def read_df(self, path_or_buffer: str | IO, extension: str, **kwargs) -> Any:
|
|
18
|
+
"""
|
|
19
|
+
Read DataFrame from path or buffer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def read_table(self, *args, **kwargs) -> Any:
|
|
19
24
|
"""
|
|
20
|
-
Read
|
|
25
|
+
Read table from db.
|
|
21
26
|
"""
|
|
22
27
|
|
|
23
28
|
##############################
|
|
@@ -68,3 +73,10 @@ class DataframeReader:
|
|
|
68
73
|
"""
|
|
69
74
|
Get preview.
|
|
70
75
|
"""
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
@abstractmethod
|
|
79
|
+
def concat_dfs(dfs: list[Any]) -> Any:
|
|
80
|
+
"""
|
|
81
|
+
Concatenate a list of DataFrames.
|
|
82
|
+
"""
|
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
import typing
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from digitalhub.readers.factory import factory
|
|
6
|
+
from digitalhub.readers.data.factory import factory
|
|
7
7
|
from digitalhub.utils.exceptions import ReaderError
|
|
8
8
|
|
|
9
9
|
if typing.TYPE_CHECKING:
|
|
10
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
10
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_reader_by_engine(engine: str | None = None) -> DataframeReader:
|
|
@@ -5,8 +5,8 @@ import typing
|
|
|
5
5
|
from digitalhub.utils.exceptions import BuilderError
|
|
6
6
|
|
|
7
7
|
if typing.TYPE_CHECKING:
|
|
8
|
-
from digitalhub.readers._base.builder import ReaderBuilder
|
|
9
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
8
|
+
from digitalhub.readers.data._base.builder import ReaderBuilder
|
|
9
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ReaderFactory:
|
|
@@ -126,7 +126,7 @@ class ReaderFactory:
|
|
|
126
126
|
factory = ReaderFactory()
|
|
127
127
|
|
|
128
128
|
try:
|
|
129
|
-
from digitalhub.readers.pandas.builder import ReaderBuilderPandas
|
|
129
|
+
from digitalhub.readers.data.pandas.builder import ReaderBuilderPandas
|
|
130
130
|
|
|
131
131
|
factory.add_builder(
|
|
132
132
|
ReaderBuilderPandas.ENGINE,
|
|
File without changes
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from digitalhub.readers._base.builder import ReaderBuilder
|
|
4
|
-
from digitalhub.readers.pandas.reader import DataframeReaderPandas
|
|
3
|
+
from digitalhub.readers.data._base.builder import ReaderBuilder
|
|
4
|
+
from digitalhub.readers.data.pandas.reader import DataframeReaderPandas
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class ReaderBuilderPandas(ReaderBuilder):
|
|
@@ -2,15 +2,15 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
from io import BytesIO
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import IO, Any
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from pandas.errors import ParserError
|
|
10
10
|
|
|
11
11
|
from digitalhub.entities.dataitem.table.utils import check_preview_size, finalize_preview, prepare_data, prepare_preview
|
|
12
|
-
from digitalhub.readers._base.reader import DataframeReader
|
|
13
|
-
from digitalhub.readers.pandas.enums import Extensions
|
|
12
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
13
|
+
from digitalhub.readers.data.pandas.enums import Extensions
|
|
14
14
|
from digitalhub.utils.exceptions import ReaderError
|
|
15
15
|
from digitalhub.utils.generic_utils import CustomJsonEncoder
|
|
16
16
|
|
|
@@ -24,14 +24,14 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
24
24
|
# Read methods
|
|
25
25
|
##############################
|
|
26
26
|
|
|
27
|
-
def read_df(self,
|
|
27
|
+
def read_df(self, path_or_buffer: str | IO, extension: str, **kwargs) -> pd.DataFrame:
|
|
28
28
|
"""
|
|
29
|
-
Read DataFrame from path.
|
|
29
|
+
Read DataFrame from path or buffer.
|
|
30
30
|
|
|
31
31
|
Parameters
|
|
32
32
|
----------
|
|
33
|
-
|
|
34
|
-
Path
|
|
33
|
+
path_or_buffer : str | IO
|
|
34
|
+
Path or buffer to read DataFrame from.
|
|
35
35
|
extension : str
|
|
36
36
|
Extension of the file.
|
|
37
37
|
**kwargs : dict
|
|
@@ -43,25 +43,40 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
43
43
|
Pandas DataFrame.
|
|
44
44
|
"""
|
|
45
45
|
if extension == Extensions.CSV.value:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
46
|
+
return pd.read_csv(path_or_buffer, **kwargs)
|
|
47
|
+
if extension == Extensions.PARQUET.value:
|
|
48
|
+
return pd.read_parquet(path_or_buffer, **kwargs)
|
|
49
|
+
if extension == Extensions.JSON.value:
|
|
50
|
+
return pd.read_json(path_or_buffer, **kwargs)
|
|
51
|
+
if extension in (Extensions.EXCEL.value, Extensions.EXCEL_OLD.value):
|
|
52
|
+
return pd.read_excel(path_or_buffer, **kwargs)
|
|
53
|
+
if extension in (Extensions.TXT.value, Extensions.FILE.value):
|
|
54
54
|
try:
|
|
55
|
-
return self.read_df(
|
|
55
|
+
return self.read_df(path_or_buffer, Extensions.CSV.value, **kwargs)
|
|
56
56
|
except ParserError:
|
|
57
|
-
raise ReaderError(f"Unable to read from {
|
|
57
|
+
raise ReaderError(f"Unable to read from {path_or_buffer}.")
|
|
58
58
|
else:
|
|
59
59
|
raise ReaderError(f"Unsupported extension '{extension}' for reading.")
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
61
|
+
def read_table(self, sql: str, engine: Any, **kwargs) -> pd.DataFrame:
|
|
62
|
+
"""
|
|
63
|
+
Read table from db.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
sql : str
|
|
68
|
+
SQL query.
|
|
69
|
+
engine : Any
|
|
70
|
+
SQL Engine.
|
|
71
|
+
**kwargs
|
|
72
|
+
Keyword arguments.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
pd.DataFrame
|
|
77
|
+
Pandas DataFrame.
|
|
78
|
+
"""
|
|
79
|
+
return pd.read_sql(sql=sql, con=engine, **kwargs)
|
|
65
80
|
|
|
66
81
|
##############################
|
|
67
82
|
# Write methods
|
|
@@ -92,7 +107,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
92
107
|
"""
|
|
93
108
|
if extension == Extensions.CSV.value:
|
|
94
109
|
return self.write_csv(df, dst, **kwargs)
|
|
95
|
-
|
|
110
|
+
if extension == Extensions.PARQUET.value:
|
|
96
111
|
return self.write_parquet(df, dst, **kwargs)
|
|
97
112
|
raise ReaderError(f"Unsupported extension '{extension}' for writing.")
|
|
98
113
|
|
|
@@ -137,7 +152,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
137
152
|
df.to_parquet(dst, index=False, **kwargs)
|
|
138
153
|
|
|
139
154
|
@staticmethod
|
|
140
|
-
def write_table(df: pd.DataFrame, table: str, engine: Any, schema: str, **kwargs) -> None:
|
|
155
|
+
def write_table(df: pd.DataFrame, table: str, engine: Any, schema: str | None = None, **kwargs) -> None:
|
|
141
156
|
"""
|
|
142
157
|
Write DataFrame as table.
|
|
143
158
|
|
|
@@ -148,7 +163,7 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
148
163
|
table : str
|
|
149
164
|
The destination table.
|
|
150
165
|
engine : Any
|
|
151
|
-
|
|
166
|
+
SQL Engine.
|
|
152
167
|
schema : str
|
|
153
168
|
The destination schema.
|
|
154
169
|
**kwargs : dict
|
|
@@ -164,6 +179,23 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
164
179
|
# Utils
|
|
165
180
|
##############################
|
|
166
181
|
|
|
182
|
+
@staticmethod
|
|
183
|
+
def concat_dfs(dfs: list[pd.DataFrame]) -> pd.DataFrame:
|
|
184
|
+
"""
|
|
185
|
+
Concatenate a list of DataFrames.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
dfs : list[pd.DataFrame]
|
|
190
|
+
The DataFrames to concatenate.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
pd.DataFrame
|
|
195
|
+
The concatenated DataFrame.
|
|
196
|
+
"""
|
|
197
|
+
return pd.concat(dfs, ignore_index=True)
|
|
198
|
+
|
|
167
199
|
@staticmethod
|
|
168
200
|
def get_schema(df: pd.DataFrame) -> Any:
|
|
169
201
|
"""
|
|
File without changes
|
digitalhub/stores/_base/store.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import typing
|
|
3
4
|
from abc import abstractmethod
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from tempfile import mkdtemp
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
|
-
from digitalhub.readers.api import get_reader_by_engine
|
|
9
|
+
from digitalhub.readers.data.api import get_reader_by_engine
|
|
9
10
|
from digitalhub.utils.exceptions import StoreError
|
|
10
11
|
from digitalhub.utils.uri_utils import has_local_scheme
|
|
11
12
|
|
|
13
|
+
if typing.TYPE_CHECKING:
|
|
14
|
+
from digitalhub.readers.data._base.reader import DataframeReader
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
class Store:
|
|
14
18
|
"""
|
|
@@ -52,11 +56,6 @@ class Store:
|
|
|
52
56
|
##############################
|
|
53
57
|
|
|
54
58
|
@abstractmethod
|
|
55
|
-
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
56
|
-
"""
|
|
57
|
-
Write DataFrame as parquet or csv.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
59
|
def read_df(
|
|
61
60
|
self,
|
|
62
61
|
path: str | list[str],
|
|
@@ -66,25 +65,23 @@ class Store:
|
|
|
66
65
|
) -> Any:
|
|
67
66
|
"""
|
|
68
67
|
Read DataFrame from path.
|
|
68
|
+
"""
|
|
69
69
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
Keyword arguments.
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def query(
|
|
72
|
+
self,
|
|
73
|
+
query: str,
|
|
74
|
+
engine: str | None = None,
|
|
75
|
+
) -> Any:
|
|
76
|
+
"""
|
|
77
|
+
Query data from database.
|
|
78
|
+
"""
|
|
80
79
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
@abstractmethod
|
|
81
|
+
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Write DataFrame as parquet or csv.
|
|
85
84
|
"""
|
|
86
|
-
reader = get_reader_by_engine(engine)
|
|
87
|
-
return reader.read_df(path, extension, **kwargs)
|
|
88
85
|
|
|
89
86
|
##############################
|
|
90
87
|
# Helpers methods
|
|
@@ -187,3 +184,43 @@ class Store:
|
|
|
187
184
|
"""
|
|
188
185
|
tmpdir = mkdtemp()
|
|
189
186
|
return Path(tmpdir)
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def _get_reader(engine: str | None = None) -> DataframeReader:
|
|
190
|
+
"""
|
|
191
|
+
Get Dataframe reader.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
engine : str
|
|
196
|
+
Dataframe engine (pandas, polars, etc.).
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
Any
|
|
201
|
+
Reader object.
|
|
202
|
+
"""
|
|
203
|
+
return get_reader_by_engine(engine)
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def _get_extension(extension: str | None = None, path: str | None = None) -> str:
|
|
207
|
+
"""
|
|
208
|
+
Get extension from path.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
extension : str
|
|
213
|
+
The extension to get.
|
|
214
|
+
path : str
|
|
215
|
+
The path to get the extension from.
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
str
|
|
220
|
+
The extension.
|
|
221
|
+
"""
|
|
222
|
+
if extension is not None:
|
|
223
|
+
return extension
|
|
224
|
+
if path is not None:
|
|
225
|
+
return Path(path).suffix.removeprefix(".")
|
|
226
|
+
raise ValueError("Extension or path must be provided.")
|