digitalhub 0.11.0b7__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of digitalhub might be problematic. Click here for more details.
- digitalhub/__init__.py +4 -1
- digitalhub/context/api.py +9 -5
- digitalhub/context/builder.py +7 -5
- digitalhub/context/context.py +13 -1
- digitalhub/entities/__init__.py +3 -0
- digitalhub/entities/_base/__init__.py +3 -0
- digitalhub/entities/_base/_base/__init__.py +3 -0
- digitalhub/entities/_base/_base/entity.py +4 -0
- digitalhub/entities/_base/context/__init__.py +3 -0
- digitalhub/entities/_base/context/entity.py +4 -0
- digitalhub/entities/_base/entity/__init__.py +3 -0
- digitalhub/entities/_base/entity/_constructors/__init__.py +3 -0
- digitalhub/entities/_base/entity/_constructors/metadata.py +4 -0
- digitalhub/entities/_base/entity/_constructors/name.py +4 -0
- digitalhub/entities/_base/entity/_constructors/spec.py +4 -0
- digitalhub/entities/_base/entity/_constructors/status.py +4 -0
- digitalhub/entities/_base/entity/_constructors/uuid.py +4 -0
- digitalhub/entities/_base/entity/builder.py +4 -0
- digitalhub/entities/_base/entity/entity.py +4 -0
- digitalhub/entities/_base/entity/metadata.py +4 -0
- digitalhub/entities/_base/entity/spec.py +4 -0
- digitalhub/entities/_base/entity/status.py +4 -0
- digitalhub/entities/_base/executable/__init__.py +3 -0
- digitalhub/entities/_base/executable/entity.py +109 -57
- digitalhub/entities/_base/material/__init__.py +3 -0
- digitalhub/entities/_base/material/entity.py +15 -18
- digitalhub/entities/_base/material/spec.py +4 -0
- digitalhub/entities/_base/material/status.py +4 -0
- digitalhub/entities/_base/material/utils.py +5 -1
- digitalhub/entities/_base/runtime_entity/__init__.py +3 -0
- digitalhub/entities/_base/runtime_entity/builder.py +4 -0
- digitalhub/entities/_base/unversioned/__init__.py +3 -0
- digitalhub/entities/_base/unversioned/builder.py +4 -0
- digitalhub/entities/_base/unversioned/entity.py +4 -0
- digitalhub/entities/_base/versioned/__init__.py +3 -0
- digitalhub/entities/_base/versioned/builder.py +4 -0
- digitalhub/entities/_base/versioned/entity.py +4 -0
- digitalhub/entities/_commons/__init__.py +3 -0
- digitalhub/entities/_commons/enums.py +4 -0
- digitalhub/entities/_commons/metrics.py +68 -30
- digitalhub/entities/_commons/utils.py +40 -9
- digitalhub/entities/_processors/__init__.py +3 -0
- digitalhub/entities/_processors/base.py +154 -79
- digitalhub/entities/_processors/context.py +370 -215
- digitalhub/entities/_processors/utils.py +78 -30
- digitalhub/entities/artifact/__init__.py +3 -0
- digitalhub/entities/artifact/_base/__init__.py +3 -0
- digitalhub/entities/artifact/_base/builder.py +4 -0
- digitalhub/entities/artifact/_base/entity.py +4 -0
- digitalhub/entities/artifact/_base/spec.py +4 -0
- digitalhub/entities/artifact/_base/status.py +4 -0
- digitalhub/entities/artifact/artifact/__init__.py +3 -0
- digitalhub/entities/artifact/artifact/builder.py +4 -0
- digitalhub/entities/artifact/artifact/entity.py +4 -0
- digitalhub/entities/artifact/artifact/spec.py +4 -0
- digitalhub/entities/artifact/artifact/status.py +4 -0
- digitalhub/entities/artifact/crud.py +8 -0
- digitalhub/entities/artifact/utils.py +32 -13
- digitalhub/entities/builders.py +4 -0
- digitalhub/entities/dataitem/__init__.py +3 -0
- digitalhub/entities/dataitem/_base/__init__.py +3 -0
- digitalhub/entities/dataitem/_base/builder.py +4 -0
- digitalhub/entities/dataitem/_base/entity.py +4 -0
- digitalhub/entities/dataitem/_base/spec.py +4 -0
- digitalhub/entities/dataitem/_base/status.py +4 -0
- digitalhub/entities/dataitem/crud.py +18 -2
- digitalhub/entities/dataitem/dataitem/__init__.py +3 -0
- digitalhub/entities/dataitem/dataitem/builder.py +4 -0
- digitalhub/entities/dataitem/dataitem/entity.py +4 -0
- digitalhub/entities/dataitem/dataitem/spec.py +4 -0
- digitalhub/entities/dataitem/dataitem/status.py +4 -0
- digitalhub/entities/dataitem/iceberg/__init__.py +3 -0
- digitalhub/entities/dataitem/iceberg/builder.py +4 -0
- digitalhub/entities/dataitem/iceberg/entity.py +4 -0
- digitalhub/entities/dataitem/iceberg/spec.py +4 -0
- digitalhub/entities/dataitem/iceberg/status.py +4 -0
- digitalhub/entities/dataitem/table/__init__.py +3 -0
- digitalhub/entities/dataitem/table/builder.py +4 -0
- digitalhub/entities/dataitem/table/entity.py +7 -3
- digitalhub/entities/dataitem/table/models.py +4 -0
- digitalhub/entities/dataitem/table/spec.py +4 -0
- digitalhub/entities/dataitem/table/status.py +4 -0
- digitalhub/entities/dataitem/table/utils.py +4 -0
- digitalhub/entities/dataitem/utils.py +88 -35
- digitalhub/entities/function/__init__.py +3 -0
- digitalhub/entities/function/_base/__init__.py +3 -0
- digitalhub/entities/function/_base/builder.py +4 -0
- digitalhub/entities/function/_base/entity.py +4 -0
- digitalhub/entities/function/_base/spec.py +4 -0
- digitalhub/entities/function/_base/status.py +4 -0
- digitalhub/entities/function/crud.py +4 -0
- digitalhub/entities/model/__init__.py +3 -0
- digitalhub/entities/model/_base/__init__.py +3 -0
- digitalhub/entities/model/_base/builder.py +4 -0
- digitalhub/entities/model/_base/entity.py +4 -0
- digitalhub/entities/model/_base/spec.py +4 -0
- digitalhub/entities/model/_base/status.py +4 -0
- digitalhub/entities/model/crud.py +8 -0
- digitalhub/entities/model/huggingface/__init__.py +3 -0
- digitalhub/entities/model/huggingface/builder.py +4 -0
- digitalhub/entities/model/huggingface/entity.py +4 -0
- digitalhub/entities/model/huggingface/spec.py +4 -0
- digitalhub/entities/model/huggingface/status.py +4 -0
- digitalhub/entities/model/mlflow/__init__.py +3 -0
- digitalhub/entities/model/mlflow/builder.py +4 -0
- digitalhub/entities/model/mlflow/entity.py +4 -0
- digitalhub/entities/model/mlflow/models.py +4 -0
- digitalhub/entities/model/mlflow/spec.py +4 -0
- digitalhub/entities/model/mlflow/status.py +4 -0
- digitalhub/entities/model/mlflow/utils.py +4 -0
- digitalhub/entities/model/model/__init__.py +3 -0
- digitalhub/entities/model/model/builder.py +4 -0
- digitalhub/entities/model/model/entity.py +4 -0
- digitalhub/entities/model/model/spec.py +4 -0
- digitalhub/entities/model/model/status.py +4 -0
- digitalhub/entities/model/sklearn/__init__.py +3 -0
- digitalhub/entities/model/sklearn/builder.py +4 -0
- digitalhub/entities/model/sklearn/entity.py +4 -0
- digitalhub/entities/model/sklearn/spec.py +4 -0
- digitalhub/entities/model/sklearn/status.py +4 -0
- digitalhub/entities/model/utils.py +32 -13
- digitalhub/entities/project/__init__.py +3 -0
- digitalhub/entities/project/_base/__init__.py +3 -0
- digitalhub/entities/project/_base/builder.py +4 -0
- digitalhub/entities/project/_base/entity.py +4 -2
- digitalhub/entities/project/_base/models.py +4 -0
- digitalhub/entities/project/_base/spec.py +4 -0
- digitalhub/entities/project/_base/status.py +4 -0
- digitalhub/entities/project/crud.py +4 -0
- digitalhub/entities/project/utils.py +4 -0
- digitalhub/entities/run/__init__.py +3 -0
- digitalhub/entities/run/_base/__init__.py +3 -0
- digitalhub/entities/run/_base/builder.py +4 -0
- digitalhub/entities/run/_base/entity.py +6 -2
- digitalhub/entities/run/_base/spec.py +4 -0
- digitalhub/entities/run/_base/status.py +4 -0
- digitalhub/entities/run/crud.py +4 -0
- digitalhub/entities/secret/__init__.py +3 -0
- digitalhub/entities/secret/_base/__init__.py +3 -0
- digitalhub/entities/secret/_base/builder.py +4 -0
- digitalhub/entities/secret/_base/entity.py +4 -0
- digitalhub/entities/secret/_base/spec.py +4 -0
- digitalhub/entities/secret/_base/status.py +4 -0
- digitalhub/entities/secret/crud.py +4 -0
- digitalhub/entities/task/__init__.py +3 -0
- digitalhub/entities/task/_base/__init__.py +3 -0
- digitalhub/entities/task/_base/builder.py +4 -0
- digitalhub/entities/task/_base/entity.py +4 -0
- digitalhub/entities/task/_base/models.py +16 -3
- digitalhub/entities/task/_base/spec.py +4 -0
- digitalhub/entities/task/_base/status.py +4 -0
- digitalhub/entities/task/_base/utils.py +4 -0
- digitalhub/entities/task/crud.py +4 -0
- digitalhub/entities/trigger/__init__.py +3 -0
- digitalhub/entities/trigger/_base/__init__.py +3 -0
- digitalhub/entities/trigger/_base/builder.py +4 -0
- digitalhub/entities/trigger/_base/entity.py +15 -0
- digitalhub/entities/trigger/_base/spec.py +4 -0
- digitalhub/entities/trigger/_base/status.py +4 -0
- digitalhub/entities/trigger/crud.py +4 -0
- digitalhub/entities/trigger/lifecycle/__init__.py +3 -0
- digitalhub/entities/trigger/lifecycle/builder.py +4 -0
- digitalhub/entities/trigger/lifecycle/entity.py +4 -0
- digitalhub/entities/trigger/lifecycle/spec.py +4 -0
- digitalhub/entities/trigger/lifecycle/status.py +4 -0
- digitalhub/entities/trigger/scheduler/__init__.py +3 -0
- digitalhub/entities/trigger/scheduler/builder.py +4 -0
- digitalhub/entities/trigger/scheduler/entity.py +4 -0
- digitalhub/entities/trigger/scheduler/spec.py +4 -0
- digitalhub/entities/trigger/scheduler/status.py +4 -0
- digitalhub/entities/workflow/__init__.py +3 -0
- digitalhub/entities/workflow/_base/__init__.py +3 -0
- digitalhub/entities/workflow/_base/builder.py +4 -0
- digitalhub/entities/workflow/_base/entity.py +4 -0
- digitalhub/entities/workflow/_base/spec.py +4 -0
- digitalhub/entities/workflow/_base/status.py +4 -0
- digitalhub/entities/workflow/crud.py +4 -0
- digitalhub/factory/__init__.py +3 -0
- digitalhub/factory/factory.py +29 -3
- digitalhub/factory/utils.py +15 -3
- digitalhub/runtimes/__init__.py +3 -0
- digitalhub/runtimes/_base.py +5 -1
- digitalhub/runtimes/builder.py +22 -1
- digitalhub/runtimes/enums.py +4 -0
- digitalhub/stores/__init__.py +3 -0
- digitalhub/stores/client/__init__.py +15 -0
- digitalhub/stores/client/_base/__init__.py +3 -0
- digitalhub/stores/client/_base/api_builder.py +18 -0
- digitalhub/stores/client/_base/client.py +97 -0
- digitalhub/stores/client/_base/key_builder.py +32 -0
- digitalhub/stores/client/_base/params_builder.py +18 -0
- digitalhub/stores/client/api.py +14 -5
- digitalhub/stores/client/builder.py +7 -1
- digitalhub/stores/client/dhcore/__init__.py +3 -0
- digitalhub/stores/client/dhcore/api_builder.py +21 -0
- digitalhub/stores/client/dhcore/client.py +329 -70
- digitalhub/stores/client/dhcore/configurator.py +489 -193
- digitalhub/stores/client/dhcore/enums.py +7 -0
- digitalhub/stores/client/dhcore/error_parser.py +39 -1
- digitalhub/stores/client/dhcore/key_builder.py +4 -0
- digitalhub/stores/client/dhcore/models.py +4 -0
- digitalhub/stores/client/dhcore/params_builder.py +117 -17
- digitalhub/stores/client/dhcore/utils.py +44 -22
- digitalhub/stores/client/local/__init__.py +3 -0
- digitalhub/stores/client/local/api_builder.py +21 -0
- digitalhub/stores/client/local/client.py +10 -8
- digitalhub/stores/client/local/enums.py +4 -0
- digitalhub/stores/client/local/key_builder.py +4 -0
- digitalhub/stores/client/local/params_builder.py +4 -0
- digitalhub/stores/credentials/__init__.py +3 -0
- digitalhub/stores/credentials/api.py +35 -0
- digitalhub/stores/credentials/configurator.py +210 -0
- digitalhub/stores/credentials/enums.py +68 -0
- digitalhub/stores/credentials/handler.py +176 -0
- digitalhub/stores/credentials/ini_module.py +164 -0
- digitalhub/stores/credentials/store.py +81 -0
- digitalhub/stores/data/__init__.py +3 -0
- digitalhub/stores/data/_base/__init__.py +3 -0
- digitalhub/stores/data/_base/store.py +31 -9
- digitalhub/stores/data/api.py +53 -9
- digitalhub/stores/data/builder.py +94 -41
- digitalhub/stores/data/enums.py +4 -0
- digitalhub/stores/data/local/__init__.py +3 -0
- digitalhub/stores/data/local/store.py +8 -7
- digitalhub/stores/data/remote/__init__.py +3 -0
- digitalhub/stores/data/remote/store.py +8 -7
- digitalhub/stores/data/s3/__init__.py +3 -0
- digitalhub/stores/data/s3/configurator.py +69 -80
- digitalhub/stores/data/s3/store.py +73 -81
- digitalhub/stores/data/s3/utils.py +14 -10
- digitalhub/stores/data/sql/__init__.py +3 -0
- digitalhub/stores/data/sql/configurator.py +80 -73
- digitalhub/stores/data/sql/store.py +195 -102
- digitalhub/stores/readers/__init__.py +3 -0
- digitalhub/stores/readers/data/__init__.py +3 -0
- digitalhub/stores/readers/data/_base/__init__.py +3 -0
- digitalhub/stores/readers/data/_base/builder.py +4 -0
- digitalhub/stores/readers/data/_base/reader.py +4 -0
- digitalhub/stores/readers/data/api.py +4 -0
- digitalhub/stores/readers/data/factory.py +4 -0
- digitalhub/stores/readers/data/pandas/__init__.py +3 -0
- digitalhub/stores/readers/data/pandas/builder.py +4 -0
- digitalhub/stores/readers/data/pandas/reader.py +4 -0
- digitalhub/stores/readers/query/__init__.py +3 -0
- digitalhub/utils/__init__.py +3 -0
- digitalhub/utils/enums.py +4 -0
- digitalhub/utils/exceptions.py +10 -0
- digitalhub/utils/file_utils.py +57 -30
- digitalhub/utils/generic_utils.py +45 -33
- digitalhub/utils/git_utils.py +28 -14
- digitalhub/utils/io_utils.py +23 -18
- digitalhub/utils/logger.py +4 -0
- digitalhub/utils/types.py +4 -0
- digitalhub/utils/uri_utils.py +35 -31
- digitalhub-0.13.0.dist-info/METADATA +301 -0
- digitalhub-0.13.0.dist-info/RECORD +259 -0
- digitalhub-0.13.0.dist-info/licenses/AUTHORS +5 -0
- digitalhub-0.13.0.dist-info/licenses/LICENSE +201 -0
- digitalhub/entities/_commons/types.py +0 -5
- digitalhub/stores/configurator/__init__.py +0 -0
- digitalhub/stores/configurator/api.py +0 -31
- digitalhub/stores/configurator/configurator.py +0 -198
- digitalhub/stores/configurator/credentials_store.py +0 -65
- digitalhub/stores/configurator/enums.py +0 -21
- digitalhub/stores/configurator/ini_module.py +0 -128
- digitalhub/stores/data/s3/enums.py +0 -16
- digitalhub/stores/data/sql/enums.py +0 -16
- digitalhub/stores/data/utils.py +0 -34
- digitalhub-0.11.0b7.dist-info/METADATA +0 -259
- digitalhub-0.11.0b7.dist-info/RECORD +0 -261
- digitalhub-0.11.0b7.dist-info/licenses/LICENSE.txt +0 -216
- {digitalhub-0.11.0b7.dist-info → digitalhub-0.13.0.dist-info}/WHEEL +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: © 2025 DSLab - Fondazione Bruno Kessler
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
from __future__ import annotations
|
|
2
6
|
|
|
3
7
|
import typing
|
|
@@ -10,25 +14,36 @@ from sqlalchemy import MetaData, Table, create_engine, select
|
|
|
10
14
|
from sqlalchemy.engine import Engine
|
|
11
15
|
from sqlalchemy.exc import SQLAlchemyError
|
|
12
16
|
|
|
13
|
-
from digitalhub.stores.configurator.enums import CredsOrigin
|
|
14
17
|
from digitalhub.stores.data._base.store import Store
|
|
15
|
-
from digitalhub.stores.data.sql.configurator import SqlStoreConfigurator
|
|
16
18
|
from digitalhub.stores.readers.data.api import get_reader_by_object
|
|
17
|
-
from digitalhub.utils.exceptions import StoreError
|
|
19
|
+
from digitalhub.utils.exceptions import ConfigError, StoreError
|
|
18
20
|
from digitalhub.utils.types import SourcesOrListOfSources
|
|
19
21
|
|
|
20
22
|
if typing.TYPE_CHECKING:
|
|
21
23
|
from sqlalchemy.engine.row import Row
|
|
22
24
|
|
|
25
|
+
from digitalhub.stores.credentials.configurator import Configurator
|
|
26
|
+
from digitalhub.stores.data.sql.configurator import SqlStoreConfigurator
|
|
27
|
+
|
|
23
28
|
|
|
24
29
|
class SqlStore(Store):
|
|
25
30
|
"""
|
|
26
|
-
SQL store
|
|
27
|
-
|
|
31
|
+
SQL-based data store implementation for database operations.
|
|
32
|
+
|
|
33
|
+
Provides functionality for reading, writing, and managing data in SQL
|
|
34
|
+
databases. Implements the Store interface with SQL-specific operations
|
|
35
|
+
including table downloads, DataFrame operations, and query execution.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
_configurator : SqlStoreConfigurator
|
|
40
|
+
The configurator instance for managing SQL database credentials
|
|
41
|
+
and connection parameters.
|
|
28
42
|
"""
|
|
29
43
|
|
|
30
|
-
def __init__(self) -> None:
|
|
31
|
-
|
|
44
|
+
def __init__(self, configurator: Configurator | None = None) -> None:
|
|
45
|
+
super().__init__(configurator)
|
|
46
|
+
self._configurator: SqlStoreConfigurator
|
|
32
47
|
|
|
33
48
|
##############################
|
|
34
49
|
# I/O methods
|
|
@@ -36,31 +51,40 @@ class SqlStore(Store):
|
|
|
36
51
|
|
|
37
52
|
def download(
|
|
38
53
|
self,
|
|
39
|
-
|
|
54
|
+
src: str,
|
|
40
55
|
dst: Path,
|
|
41
|
-
src: list[str],
|
|
42
56
|
overwrite: bool = False,
|
|
43
57
|
) -> str:
|
|
44
58
|
"""
|
|
45
|
-
Download
|
|
59
|
+
Download a SQL table as a Parquet file to local storage.
|
|
60
|
+
|
|
61
|
+
Retrieves data from a SQL table and saves it as a Parquet file
|
|
62
|
+
at the specified destination. The source path should be in the
|
|
63
|
+
format 'sql://database/schema/table'.
|
|
46
64
|
|
|
47
65
|
Parameters
|
|
48
66
|
----------
|
|
49
|
-
|
|
50
|
-
The
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
overwrite : bool
|
|
56
|
-
|
|
67
|
+
src : str
|
|
68
|
+
The SQL URI path of the table to download in the format
|
|
69
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
70
|
+
dst : Path
|
|
71
|
+
The destination path on the local filesystem where the
|
|
72
|
+
Parquet file will be saved.
|
|
73
|
+
overwrite : bool, default False
|
|
74
|
+
Whether to overwrite existing files at the destination path.
|
|
57
75
|
|
|
58
76
|
Returns
|
|
59
77
|
-------
|
|
60
78
|
str
|
|
61
|
-
|
|
79
|
+
The absolute path of the downloaded Parquet file.
|
|
80
|
+
|
|
81
|
+
Raises
|
|
82
|
+
------
|
|
83
|
+
StoreError
|
|
84
|
+
If the destination path has an invalid extension or if
|
|
85
|
+
file operations fail.
|
|
62
86
|
"""
|
|
63
|
-
table_name = self._get_table_name(
|
|
87
|
+
table_name = self._get_table_name(src) + ".parquet"
|
|
64
88
|
# Case where dst is not provided
|
|
65
89
|
if dst is None:
|
|
66
90
|
dst = Path(self._build_temp("sql")) / table_name
|
|
@@ -79,8 +103,8 @@ class SqlStore(Store):
|
|
|
79
103
|
self._check_overwrite(dst, overwrite)
|
|
80
104
|
self._build_path(dst)
|
|
81
105
|
|
|
82
|
-
schema = self._get_schema(
|
|
83
|
-
table = self._get_table_name(
|
|
106
|
+
schema = self._get_schema(src)
|
|
107
|
+
table = self._get_table_name(src)
|
|
84
108
|
return self._download_table(schema, table, str(dst))
|
|
85
109
|
|
|
86
110
|
def upload(
|
|
@@ -89,12 +113,12 @@ class SqlStore(Store):
|
|
|
89
113
|
dst: str,
|
|
90
114
|
) -> list[tuple[str, str]]:
|
|
91
115
|
"""
|
|
92
|
-
Upload
|
|
116
|
+
Upload artifacts to SQL storage.
|
|
93
117
|
|
|
94
118
|
Raises
|
|
95
119
|
------
|
|
96
120
|
StoreError
|
|
97
|
-
|
|
121
|
+
Always raised as SQL store does not support direct upload.
|
|
98
122
|
"""
|
|
99
123
|
raise StoreError("SQL store does not support upload.")
|
|
100
124
|
|
|
@@ -104,17 +128,12 @@ class SqlStore(Store):
|
|
|
104
128
|
paths: list[tuple[str, str]],
|
|
105
129
|
) -> list[dict]:
|
|
106
130
|
"""
|
|
107
|
-
Get file information from SQL
|
|
108
|
-
|
|
109
|
-
Parameters
|
|
110
|
-
----------
|
|
111
|
-
paths : list[str]
|
|
112
|
-
List of source paths.
|
|
131
|
+
Get file metadata information from SQL storage.
|
|
113
132
|
|
|
114
133
|
Returns
|
|
115
134
|
-------
|
|
116
135
|
list[dict]
|
|
117
|
-
|
|
136
|
+
Empty list.
|
|
118
137
|
"""
|
|
119
138
|
return []
|
|
120
139
|
|
|
@@ -130,23 +149,33 @@ class SqlStore(Store):
|
|
|
130
149
|
**kwargs,
|
|
131
150
|
) -> Any:
|
|
132
151
|
"""
|
|
133
|
-
Read DataFrame from
|
|
152
|
+
Read a DataFrame from a SQL table.
|
|
153
|
+
|
|
154
|
+
Connects to the SQL database and reads data from the specified
|
|
155
|
+
table into a DataFrame using the specified engine (pandas, polars, etc.).
|
|
134
156
|
|
|
135
157
|
Parameters
|
|
136
158
|
----------
|
|
137
159
|
path : SourcesOrListOfSources
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
160
|
+
The SQL URI path to read from in the format
|
|
161
|
+
'sql://database/schema/table'. Only single paths are supported.
|
|
162
|
+
file_format : str, optional
|
|
163
|
+
File format specification (not used for SQL operations).
|
|
164
|
+
engine : str, optional
|
|
165
|
+
DataFrame engine to use (e.g., 'pandas', 'polars').
|
|
166
|
+
If None, uses the default engine.
|
|
143
167
|
**kwargs : dict
|
|
144
|
-
|
|
168
|
+
Additional keyword arguments passed to the reader.
|
|
145
169
|
|
|
146
170
|
Returns
|
|
147
171
|
-------
|
|
148
172
|
Any
|
|
149
|
-
DataFrame.
|
|
173
|
+
DataFrame object containing the table data.
|
|
174
|
+
|
|
175
|
+
Raises
|
|
176
|
+
------
|
|
177
|
+
StoreError
|
|
178
|
+
If a list of paths is provided (only single path supported).
|
|
150
179
|
"""
|
|
151
180
|
if isinstance(path, list):
|
|
152
181
|
raise StoreError("SQL store can only read a single DataFrame at a time.")
|
|
@@ -168,21 +197,26 @@ class SqlStore(Store):
|
|
|
168
197
|
engine: str | None = None,
|
|
169
198
|
) -> Any:
|
|
170
199
|
"""
|
|
171
|
-
|
|
200
|
+
Execute a custom SQL query and return results as a DataFrame.
|
|
201
|
+
|
|
202
|
+
Runs a SQL query against the database specified in the path
|
|
203
|
+
and returns the results using the specified DataFrame engine.
|
|
172
204
|
|
|
173
205
|
Parameters
|
|
174
206
|
----------
|
|
175
207
|
query : str
|
|
176
|
-
The query to execute.
|
|
208
|
+
The SQL query string to execute against the database.
|
|
177
209
|
path : str
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
210
|
+
The SQL URI path specifying the database connection
|
|
211
|
+
in the format 'sql://database/schema/table'.
|
|
212
|
+
engine : str, optional
|
|
213
|
+
DataFrame engine to use for result processing
|
|
214
|
+
(e.g., 'pandas', 'polars'). If None, uses the default.
|
|
181
215
|
|
|
182
216
|
Returns
|
|
183
217
|
-------
|
|
184
218
|
Any
|
|
185
|
-
DataFrame.
|
|
219
|
+
DataFrame object containing the query results.
|
|
186
220
|
"""
|
|
187
221
|
reader = self._get_reader(engine)
|
|
188
222
|
schema = self._get_schema(path)
|
|
@@ -191,21 +225,29 @@ class SqlStore(Store):
|
|
|
191
225
|
|
|
192
226
|
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
193
227
|
"""
|
|
194
|
-
Write a
|
|
228
|
+
Write a DataFrame to a SQL database table.
|
|
229
|
+
|
|
230
|
+
Takes a DataFrame and writes it to the specified SQL table.
|
|
231
|
+
The destination should be in SQL URI format. Additional
|
|
232
|
+
parameters are passed to the underlying to_sql() method.
|
|
195
233
|
|
|
196
234
|
Parameters
|
|
197
235
|
----------
|
|
198
236
|
df : Any
|
|
199
|
-
The
|
|
237
|
+
The DataFrame object to write to the database.
|
|
200
238
|
dst : str
|
|
201
|
-
The destination
|
|
239
|
+
The destination SQL URI in the format
|
|
240
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
241
|
+
extension : str, optional
|
|
242
|
+
File extension parameter (not used for SQL operations).
|
|
202
243
|
**kwargs : dict
|
|
203
|
-
|
|
244
|
+
Additional keyword arguments passed to the DataFrame's
|
|
245
|
+
to_sql() method for controlling write behavior.
|
|
204
246
|
|
|
205
247
|
Returns
|
|
206
248
|
-------
|
|
207
249
|
str
|
|
208
|
-
|
|
250
|
+
The SQL URI path where the DataFrame was written.
|
|
209
251
|
"""
|
|
210
252
|
schema = self._get_schema(dst)
|
|
211
253
|
table = self._get_table_name(dst)
|
|
@@ -217,21 +259,25 @@ class SqlStore(Store):
|
|
|
217
259
|
|
|
218
260
|
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
219
261
|
"""
|
|
220
|
-
Download a table from SQL
|
|
262
|
+
Download a specific table from SQL database to Parquet file.
|
|
263
|
+
|
|
264
|
+
Internal method that handles the actual table download process.
|
|
265
|
+
Connects to the database, retrieves all data from the specified
|
|
266
|
+
table, and writes it to a Parquet file using PyArrow.
|
|
221
267
|
|
|
222
268
|
Parameters
|
|
223
269
|
----------
|
|
224
270
|
schema : str
|
|
225
|
-
The
|
|
271
|
+
The database schema name containing the table.
|
|
226
272
|
table : str
|
|
227
|
-
The
|
|
273
|
+
The name of the table to download.
|
|
228
274
|
dst : str
|
|
229
|
-
The
|
|
275
|
+
The local file path where the Parquet file will be saved.
|
|
230
276
|
|
|
231
277
|
Returns
|
|
232
278
|
-------
|
|
233
279
|
str
|
|
234
|
-
The destination path.
|
|
280
|
+
The destination file path of the created Parquet file.
|
|
235
281
|
"""
|
|
236
282
|
engine = self._check_factory(schema=schema)
|
|
237
283
|
|
|
@@ -255,23 +301,29 @@ class SqlStore(Store):
|
|
|
255
301
|
|
|
256
302
|
def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
|
|
257
303
|
"""
|
|
258
|
-
Upload a
|
|
304
|
+
Upload a DataFrame to a SQL table.
|
|
305
|
+
|
|
306
|
+
Internal method that handles writing a DataFrame to a SQL database
|
|
307
|
+
table. Uses the appropriate reader based on the DataFrame type
|
|
308
|
+
and manages the database connection.
|
|
259
309
|
|
|
260
310
|
Parameters
|
|
261
311
|
----------
|
|
262
|
-
df :
|
|
263
|
-
The
|
|
312
|
+
df : Any
|
|
313
|
+
The DataFrame object to upload to the database.
|
|
264
314
|
schema : str
|
|
265
|
-
|
|
315
|
+
The target database schema name.
|
|
266
316
|
table : str
|
|
267
|
-
|
|
317
|
+
The target table name within the schema.
|
|
268
318
|
**kwargs : dict
|
|
269
|
-
|
|
319
|
+
Additional keyword arguments passed to the write operation,
|
|
320
|
+
such as if_exists, index, method, etc.
|
|
270
321
|
|
|
271
322
|
Returns
|
|
272
323
|
-------
|
|
273
324
|
str
|
|
274
|
-
The SQL URI where the
|
|
325
|
+
The SQL URI where the DataFrame was saved in the format
|
|
326
|
+
'sql://database/schema/table'.
|
|
275
327
|
"""
|
|
276
328
|
reader = get_reader_by_object(df)
|
|
277
329
|
engine = self._check_factory()
|
|
@@ -283,39 +335,45 @@ class SqlStore(Store):
|
|
|
283
335
|
# Helper methods
|
|
284
336
|
##############################
|
|
285
337
|
|
|
286
|
-
def _get_connection_string(self
|
|
338
|
+
def _get_connection_string(self) -> str:
|
|
287
339
|
"""
|
|
288
|
-
|
|
340
|
+
Retrieve the database connection string from the configurator.
|
|
289
341
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
origin : str
|
|
293
|
-
The origin of the credentials.
|
|
342
|
+
Gets the PostgreSQL connection string using the configured
|
|
343
|
+
database credentials (username, password, host, port, database).
|
|
294
344
|
|
|
295
345
|
Returns
|
|
296
346
|
-------
|
|
297
347
|
str
|
|
298
|
-
The connection string
|
|
348
|
+
The PostgreSQL connection string in the format
|
|
349
|
+
'postgresql://username:password@host:port/database'.
|
|
299
350
|
"""
|
|
300
|
-
return self._configurator.get_sql_conn_string(
|
|
351
|
+
return self._configurator.get_sql_conn_string()
|
|
301
352
|
|
|
302
|
-
def _get_engine(self,
|
|
353
|
+
def _get_engine(self, schema: str | None = None) -> Engine:
|
|
303
354
|
"""
|
|
304
|
-
Create engine from connection string.
|
|
355
|
+
Create a SQLAlchemy engine from the connection string.
|
|
356
|
+
|
|
357
|
+
Establishes a database engine using the configured connection
|
|
358
|
+
string with appropriate connection parameters and schema settings.
|
|
305
359
|
|
|
306
360
|
Parameters
|
|
307
361
|
----------
|
|
308
|
-
|
|
309
|
-
The
|
|
310
|
-
|
|
311
|
-
The schema.
|
|
362
|
+
schema : str, optional
|
|
363
|
+
The database schema to set in the search path.
|
|
364
|
+
If provided, sets the PostgreSQL search_path option.
|
|
312
365
|
|
|
313
366
|
Returns
|
|
314
367
|
-------
|
|
315
368
|
Engine
|
|
316
|
-
|
|
369
|
+
A configured SQLAlchemy engine instance.
|
|
370
|
+
|
|
371
|
+
Raises
|
|
372
|
+
------
|
|
373
|
+
StoreError
|
|
374
|
+
If the connection string is invalid or engine creation fails.
|
|
317
375
|
"""
|
|
318
|
-
connection_string = self._get_connection_string(
|
|
376
|
+
connection_string = self._get_connection_string()
|
|
319
377
|
if not isinstance(connection_string, str):
|
|
320
378
|
raise StoreError("Connection string must be a string.")
|
|
321
379
|
try:
|
|
@@ -326,42 +384,68 @@ class SqlStore(Store):
|
|
|
326
384
|
except Exception as ex:
|
|
327
385
|
raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
|
|
328
386
|
|
|
329
|
-
def _check_factory(self, schema: str | None = None) -> Engine:
|
|
387
|
+
def _check_factory(self, retry: bool = True, schema: str | None = None) -> Engine:
|
|
330
388
|
"""
|
|
331
|
-
|
|
389
|
+
Validate database accessibility and return a working engine.
|
|
390
|
+
|
|
391
|
+
Creates and tests a database engine, with retry capability if
|
|
392
|
+
the initial connection fails. Handles configuration changes
|
|
393
|
+
and ensures the database is accessible before returning.
|
|
332
394
|
|
|
333
395
|
Parameters
|
|
334
396
|
----------
|
|
335
|
-
|
|
336
|
-
|
|
397
|
+
retry : bool, default True
|
|
398
|
+
Whether to attempt a retry with different configuration
|
|
399
|
+
if the initial connection fails.
|
|
400
|
+
schema : str, optional
|
|
401
|
+
The database schema to configure in the engine.
|
|
337
402
|
|
|
338
403
|
Returns
|
|
339
404
|
-------
|
|
340
405
|
Engine
|
|
341
|
-
|
|
406
|
+
A validated SQLAlchemy engine with confirmed database access.
|
|
407
|
+
|
|
408
|
+
Raises
|
|
409
|
+
------
|
|
410
|
+
ConfigError
|
|
411
|
+
If database access fails and retry is exhausted or disabled.
|
|
342
412
|
"""
|
|
343
413
|
try:
|
|
344
|
-
engine = self._get_engine(
|
|
345
|
-
self._check_access_to_storage(engine)
|
|
346
|
-
except StoreError:
|
|
347
|
-
engine = self._get_engine(CredsOrigin.FILE.value, schema)
|
|
414
|
+
engine = self._get_engine(schema)
|
|
348
415
|
self._check_access_to_storage(engine)
|
|
349
|
-
|
|
416
|
+
return engine
|
|
417
|
+
except ConfigError as e:
|
|
418
|
+
if retry:
|
|
419
|
+
self._configurator.eval_change_origin()
|
|
420
|
+
return self._check_factory(retry=False, schema=schema)
|
|
421
|
+
raise e
|
|
350
422
|
|
|
351
423
|
@staticmethod
|
|
352
424
|
def _parse_path(path: str) -> dict:
|
|
353
425
|
"""
|
|
354
|
-
Parse
|
|
426
|
+
Parse a SQL URI path into its component parts.
|
|
427
|
+
|
|
428
|
+
Breaks down a SQL URI into database, schema, and table components.
|
|
429
|
+
Supports both full three-part paths and simplified two-part paths
|
|
430
|
+
(using 'public' as default schema).
|
|
355
431
|
|
|
356
432
|
Parameters
|
|
357
433
|
----------
|
|
358
434
|
path : str
|
|
359
|
-
The path
|
|
435
|
+
The SQL URI path to parse in the format
|
|
436
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
360
437
|
|
|
361
438
|
Returns
|
|
362
439
|
-------
|
|
363
440
|
dict
|
|
364
|
-
|
|
441
|
+
Dictionary containing parsed components with keys:
|
|
442
|
+
'database', 'schema', and 'table'.
|
|
443
|
+
|
|
444
|
+
Raises
|
|
445
|
+
------
|
|
446
|
+
ValueError
|
|
447
|
+
If the path format is invalid or doesn't follow the
|
|
448
|
+
expected SQL URI structure.
|
|
365
449
|
"""
|
|
366
450
|
# Parse path
|
|
367
451
|
err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
|
|
@@ -378,45 +462,54 @@ class SqlStore(Store):
|
|
|
378
462
|
|
|
379
463
|
def _get_schema(self, uri: str) -> str:
|
|
380
464
|
"""
|
|
381
|
-
|
|
465
|
+
Extract the schema name from a SQL URI.
|
|
466
|
+
|
|
467
|
+
Parses the SQL URI and returns the schema component.
|
|
468
|
+
Uses 'public' as the default schema if not specified in the URI.
|
|
382
469
|
|
|
383
470
|
Parameters
|
|
384
471
|
----------
|
|
385
472
|
uri : str
|
|
386
|
-
The URI.
|
|
473
|
+
The SQL URI to extract the schema from.
|
|
387
474
|
|
|
388
475
|
Returns
|
|
389
476
|
-------
|
|
390
477
|
str
|
|
391
|
-
The name
|
|
478
|
+
The schema name extracted from the URI.
|
|
392
479
|
"""
|
|
393
480
|
return str(self._parse_path(uri).get("schema"))
|
|
394
481
|
|
|
395
482
|
def _get_table_name(self, uri: str) -> str:
|
|
396
483
|
"""
|
|
397
|
-
|
|
484
|
+
Extract the table name from a SQL URI.
|
|
485
|
+
|
|
486
|
+
Parses the SQL URI and returns the table component,
|
|
487
|
+
which is always the last part of the URI path.
|
|
398
488
|
|
|
399
489
|
Parameters
|
|
400
490
|
----------
|
|
401
491
|
uri : str
|
|
402
|
-
The URI.
|
|
492
|
+
The SQL URI to extract the table name from.
|
|
403
493
|
|
|
404
494
|
Returns
|
|
405
495
|
-------
|
|
406
496
|
str
|
|
407
|
-
The name
|
|
497
|
+
The table name extracted from the URI.
|
|
408
498
|
"""
|
|
409
499
|
return str(self._parse_path(uri).get("table"))
|
|
410
500
|
|
|
411
501
|
@staticmethod
|
|
412
502
|
def _check_access_to_storage(engine: Engine) -> None:
|
|
413
503
|
"""
|
|
414
|
-
|
|
504
|
+
Verify database connectivity using the provided engine.
|
|
505
|
+
|
|
506
|
+
Tests the database connection by attempting to connect.
|
|
507
|
+
Properly disposes of the engine if connection fails.
|
|
415
508
|
|
|
416
509
|
Parameters
|
|
417
510
|
----------
|
|
418
511
|
engine : Engine
|
|
419
|
-
|
|
512
|
+
The SQLAlchemy engine to test for connectivity.
|
|
420
513
|
|
|
421
514
|
Returns
|
|
422
515
|
-------
|
|
@@ -424,11 +517,11 @@ class SqlStore(Store):
|
|
|
424
517
|
|
|
425
518
|
Raises
|
|
426
519
|
------
|
|
427
|
-
|
|
428
|
-
If
|
|
520
|
+
ConfigError
|
|
521
|
+
If database connection cannot be established.
|
|
429
522
|
"""
|
|
430
523
|
try:
|
|
431
524
|
engine.connect()
|
|
432
525
|
except SQLAlchemyError:
|
|
433
526
|
engine.dispose()
|
|
434
|
-
raise
|
|
527
|
+
raise ConfigError("No access to db!")
|
digitalhub/utils/__init__.py
CHANGED
digitalhub/utils/enums.py
CHANGED
digitalhub/utils/exceptions.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: © 2025 DSLab - Fondazione Bruno Kessler
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
1
5
|
from __future__ import annotations
|
|
2
6
|
|
|
3
7
|
|
|
@@ -77,3 +81,9 @@ class ClientError(Exception):
|
|
|
77
81
|
"""
|
|
78
82
|
Raised when incontered errors on clients.
|
|
79
83
|
"""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ConfigError(Exception):
|
|
87
|
+
"""
|
|
88
|
+
Raised when incontered errors on configs.
|
|
89
|
+
"""
|