digitalhub 0.13.0b3__py3-none-any.whl → 0.14.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digitalhub/__init__.py +3 -8
- digitalhub/context/api.py +43 -6
- digitalhub/context/builder.py +1 -5
- digitalhub/context/context.py +28 -13
- digitalhub/entities/_base/_base/entity.py +0 -15
- digitalhub/entities/_base/context/entity.py +1 -4
- digitalhub/entities/_base/entity/builder.py +5 -5
- digitalhub/entities/_base/entity/entity.py +0 -8
- digitalhub/entities/_base/executable/entity.py +195 -87
- digitalhub/entities/_base/material/entity.py +11 -23
- digitalhub/entities/_base/material/utils.py +28 -4
- digitalhub/entities/_base/runtime_entity/builder.py +53 -18
- digitalhub/entities/_base/unversioned/entity.py +1 -1
- digitalhub/entities/_base/versioned/entity.py +1 -1
- digitalhub/entities/_commons/enums.py +1 -31
- digitalhub/entities/_commons/metrics.py +64 -30
- digitalhub/entities/_commons/utils.py +119 -30
- digitalhub/entities/_constructors/_resources.py +151 -0
- digitalhub/entities/{_base/entity/_constructors → _constructors}/name.py +18 -0
- digitalhub/entities/_processors/base/crud.py +381 -0
- digitalhub/entities/_processors/base/import_export.py +118 -0
- digitalhub/entities/_processors/base/processor.py +299 -0
- digitalhub/entities/_processors/base/special_ops.py +104 -0
- digitalhub/entities/_processors/context/crud.py +652 -0
- digitalhub/entities/_processors/context/import_export.py +242 -0
- digitalhub/entities/_processors/context/material.py +123 -0
- digitalhub/entities/_processors/context/processor.py +400 -0
- digitalhub/entities/_processors/context/special_ops.py +476 -0
- digitalhub/entities/_processors/processors.py +12 -0
- digitalhub/entities/_processors/utils.py +38 -102
- digitalhub/entities/artifact/crud.py +58 -22
- digitalhub/entities/artifact/utils.py +28 -13
- digitalhub/entities/builders.py +2 -0
- digitalhub/entities/dataitem/crud.py +63 -20
- digitalhub/entities/dataitem/table/entity.py +27 -22
- digitalhub/entities/dataitem/utils.py +82 -32
- digitalhub/entities/function/_base/entity.py +3 -6
- digitalhub/entities/function/crud.py +55 -24
- digitalhub/entities/model/_base/entity.py +62 -20
- digitalhub/entities/model/crud.py +59 -23
- digitalhub/entities/model/mlflow/utils.py +29 -20
- digitalhub/entities/model/utils.py +28 -13
- digitalhub/entities/project/_base/builder.py +0 -6
- digitalhub/entities/project/_base/entity.py +337 -164
- digitalhub/entities/project/_base/spec.py +4 -4
- digitalhub/entities/project/crud.py +28 -71
- digitalhub/entities/project/utils.py +7 -3
- digitalhub/entities/run/_base/builder.py +0 -4
- digitalhub/entities/run/_base/entity.py +70 -63
- digitalhub/entities/run/crud.py +79 -26
- digitalhub/entities/secret/_base/entity.py +1 -5
- digitalhub/entities/secret/crud.py +31 -28
- digitalhub/entities/task/_base/builder.py +0 -4
- digitalhub/entities/task/_base/entity.py +5 -5
- digitalhub/entities/task/_base/models.py +13 -16
- digitalhub/entities/task/crud.py +61 -29
- digitalhub/entities/trigger/_base/entity.py +1 -5
- digitalhub/entities/trigger/crud.py +89 -30
- digitalhub/entities/workflow/_base/entity.py +3 -8
- digitalhub/entities/workflow/crud.py +55 -24
- digitalhub/factory/entity.py +283 -0
- digitalhub/factory/enums.py +18 -0
- digitalhub/factory/registry.py +197 -0
- digitalhub/factory/runtime.py +44 -0
- digitalhub/factory/utils.py +3 -54
- digitalhub/runtimes/_base.py +2 -2
- digitalhub/stores/client/{dhcore/api_builder.py → api_builder.py} +3 -3
- digitalhub/stores/client/builder.py +19 -31
- digitalhub/stores/client/client.py +322 -0
- digitalhub/stores/client/configurator.py +408 -0
- digitalhub/stores/client/enums.py +50 -0
- digitalhub/stores/client/{dhcore/error_parser.py → error_parser.py} +0 -4
- digitalhub/stores/client/header_manager.py +61 -0
- digitalhub/stores/client/http_handler.py +152 -0
- digitalhub/stores/client/{_base/key_builder.py → key_builder.py} +14 -14
- digitalhub/stores/client/params_builder.py +330 -0
- digitalhub/stores/client/response_processor.py +102 -0
- digitalhub/stores/client/utils.py +35 -0
- digitalhub/stores/{credentials → configurator}/api.py +5 -9
- digitalhub/stores/configurator/configurator.py +123 -0
- digitalhub/stores/{credentials → configurator}/enums.py +27 -10
- digitalhub/stores/configurator/handler.py +213 -0
- digitalhub/stores/{credentials → configurator}/ini_module.py +31 -22
- digitalhub/stores/data/_base/store.py +0 -20
- digitalhub/stores/data/api.py +5 -7
- digitalhub/stores/data/builder.py +53 -27
- digitalhub/stores/data/local/store.py +0 -103
- digitalhub/stores/data/remote/store.py +0 -4
- digitalhub/stores/data/s3/configurator.py +39 -77
- digitalhub/stores/data/s3/store.py +57 -37
- digitalhub/stores/data/sql/configurator.py +66 -46
- digitalhub/stores/data/sql/store.py +171 -104
- digitalhub/stores/readers/data/factory.py +0 -8
- digitalhub/stores/readers/data/pandas/reader.py +9 -19
- digitalhub/utils/file_utils.py +0 -17
- digitalhub/utils/generic_utils.py +1 -14
- digitalhub/utils/git_utils.py +0 -8
- digitalhub/utils/io_utils.py +0 -12
- digitalhub/utils/store_utils.py +44 -0
- {digitalhub-0.13.0b3.dist-info → digitalhub-0.14.9.dist-info}/METADATA +5 -4
- {digitalhub-0.13.0b3.dist-info → digitalhub-0.14.9.dist-info}/RECORD +112 -113
- {digitalhub-0.13.0b3.dist-info → digitalhub-0.14.9.dist-info}/WHEEL +1 -1
- digitalhub/entities/_commons/types.py +0 -9
- digitalhub/entities/_processors/base.py +0 -531
- digitalhub/entities/_processors/context.py +0 -1299
- digitalhub/entities/task/_base/utils.py +0 -22
- digitalhub/factory/factory.py +0 -381
- digitalhub/stores/client/_base/api_builder.py +0 -34
- digitalhub/stores/client/_base/client.py +0 -243
- digitalhub/stores/client/_base/params_builder.py +0 -34
- digitalhub/stores/client/api.py +0 -36
- digitalhub/stores/client/dhcore/client.py +0 -613
- digitalhub/stores/client/dhcore/configurator.py +0 -675
- digitalhub/stores/client/dhcore/enums.py +0 -34
- digitalhub/stores/client/dhcore/key_builder.py +0 -62
- digitalhub/stores/client/dhcore/models.py +0 -40
- digitalhub/stores/client/dhcore/params_builder.py +0 -278
- digitalhub/stores/client/dhcore/utils.py +0 -94
- digitalhub/stores/client/local/api_builder.py +0 -116
- digitalhub/stores/client/local/client.py +0 -573
- digitalhub/stores/client/local/enums.py +0 -15
- digitalhub/stores/client/local/key_builder.py +0 -62
- digitalhub/stores/client/local/params_builder.py +0 -120
- digitalhub/stores/credentials/__init__.py +0 -3
- digitalhub/stores/credentials/configurator.py +0 -210
- digitalhub/stores/credentials/handler.py +0 -176
- digitalhub/stores/credentials/store.py +0 -81
- digitalhub/stores/data/enums.py +0 -15
- digitalhub/stores/data/s3/utils.py +0 -78
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/__init__.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/metadata.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/spec.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/status.py +0 -0
- /digitalhub/entities/{_base/entity/_constructors → _constructors}/uuid.py +0 -0
- /digitalhub/{stores/client/_base → entities/_processors/base}/__init__.py +0 -0
- /digitalhub/{stores/client/dhcore → entities/_processors/context}/__init__.py +0 -0
- /digitalhub/stores/{client/local → configurator}/__init__.py +0 -0
- {digitalhub-0.13.0b3.dist-info → digitalhub-0.14.9.dist-info}/licenses/AUTHORS +0 -0
- {digitalhub-0.13.0b3.dist-info → digitalhub-0.14.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,6 +15,7 @@ from sqlalchemy.engine import Engine
|
|
|
15
15
|
from sqlalchemy.exc import SQLAlchemyError
|
|
16
16
|
|
|
17
17
|
from digitalhub.stores.data._base.store import Store
|
|
18
|
+
from digitalhub.stores.data.sql.configurator import SqlStoreConfigurator
|
|
18
19
|
from digitalhub.stores.readers.data.api import get_reader_by_object
|
|
19
20
|
from digitalhub.utils.exceptions import ConfigError, StoreError
|
|
20
21
|
from digitalhub.utils.types import SourcesOrListOfSources
|
|
@@ -22,19 +23,28 @@ from digitalhub.utils.types import SourcesOrListOfSources
|
|
|
22
23
|
if typing.TYPE_CHECKING:
|
|
23
24
|
from sqlalchemy.engine.row import Row
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
|
|
27
|
+
ENGINE_CONNECTION_TIMEOUT = 30
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class SqlStore(Store):
|
|
30
31
|
"""
|
|
31
|
-
SQL store
|
|
32
|
-
|
|
32
|
+
SQL-based data store implementation for database operations.
|
|
33
|
+
|
|
34
|
+
Provides functionality for reading, writing, and managing data in SQL
|
|
35
|
+
databases. Implements the Store interface with SQL-specific operations
|
|
36
|
+
including table downloads, DataFrame operations, and query execution.
|
|
37
|
+
|
|
38
|
+
Attributes
|
|
39
|
+
----------
|
|
40
|
+
_configurator : SqlStoreConfigurator
|
|
41
|
+
The configurator instance for managing SQL database credentials
|
|
42
|
+
and connection parameters.
|
|
33
43
|
"""
|
|
34
44
|
|
|
35
|
-
def __init__(self
|
|
36
|
-
super().__init__(
|
|
37
|
-
self._configurator: SqlStoreConfigurator
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
super().__init__()
|
|
47
|
+
self._configurator: SqlStoreConfigurator = SqlStoreConfigurator()
|
|
38
48
|
|
|
39
49
|
##############################
|
|
40
50
|
# I/O methods
|
|
@@ -47,21 +57,33 @@ class SqlStore(Store):
|
|
|
47
57
|
overwrite: bool = False,
|
|
48
58
|
) -> str:
|
|
49
59
|
"""
|
|
50
|
-
Download
|
|
60
|
+
Download a SQL table as a Parquet file to local storage.
|
|
61
|
+
|
|
62
|
+
Retrieves data from a SQL table and saves it as a Parquet file
|
|
63
|
+
at the specified destination. The source path should be in the
|
|
64
|
+
format 'sql://database/schema/table'.
|
|
51
65
|
|
|
52
66
|
Parameters
|
|
53
67
|
----------
|
|
54
68
|
src : str
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
69
|
+
The SQL URI path of the table to download in the format
|
|
70
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
71
|
+
dst : Path
|
|
72
|
+
The destination path on the local filesystem where the
|
|
73
|
+
Parquet file will be saved.
|
|
58
74
|
overwrite : bool
|
|
59
|
-
|
|
75
|
+
Whether to overwrite existing files at the destination path.
|
|
60
76
|
|
|
61
77
|
Returns
|
|
62
78
|
-------
|
|
63
79
|
str
|
|
64
|
-
|
|
80
|
+
The absolute path of the downloaded Parquet file.
|
|
81
|
+
|
|
82
|
+
Raises
|
|
83
|
+
------
|
|
84
|
+
StoreError
|
|
85
|
+
If the destination path has an invalid extension or if
|
|
86
|
+
file operations fail.
|
|
65
87
|
"""
|
|
66
88
|
table_name = self._get_table_name(src) + ".parquet"
|
|
67
89
|
# Case where dst is not provided
|
|
@@ -92,12 +114,12 @@ class SqlStore(Store):
|
|
|
92
114
|
dst: str,
|
|
93
115
|
) -> list[tuple[str, str]]:
|
|
94
116
|
"""
|
|
95
|
-
Upload
|
|
117
|
+
Upload artifacts to SQL storage.
|
|
96
118
|
|
|
97
119
|
Raises
|
|
98
120
|
------
|
|
99
121
|
StoreError
|
|
100
|
-
|
|
122
|
+
Always raised as SQL store does not support direct upload.
|
|
101
123
|
"""
|
|
102
124
|
raise StoreError("SQL store does not support upload.")
|
|
103
125
|
|
|
@@ -107,17 +129,12 @@ class SqlStore(Store):
|
|
|
107
129
|
paths: list[tuple[str, str]],
|
|
108
130
|
) -> list[dict]:
|
|
109
131
|
"""
|
|
110
|
-
Get file information from SQL
|
|
111
|
-
|
|
112
|
-
Parameters
|
|
113
|
-
----------
|
|
114
|
-
paths : list[str]
|
|
115
|
-
List of source paths.
|
|
132
|
+
Get file metadata information from SQL storage.
|
|
116
133
|
|
|
117
134
|
Returns
|
|
118
135
|
-------
|
|
119
136
|
list[dict]
|
|
120
|
-
|
|
137
|
+
Empty list.
|
|
121
138
|
"""
|
|
122
139
|
return []
|
|
123
140
|
|
|
@@ -133,23 +150,33 @@ class SqlStore(Store):
|
|
|
133
150
|
**kwargs,
|
|
134
151
|
) -> Any:
|
|
135
152
|
"""
|
|
136
|
-
Read DataFrame from
|
|
153
|
+
Read a DataFrame from a SQL table.
|
|
154
|
+
|
|
155
|
+
Connects to the SQL database and reads data from the specified
|
|
156
|
+
table into a DataFrame using the specified engine (pandas, polars, etc.).
|
|
137
157
|
|
|
138
158
|
Parameters
|
|
139
159
|
----------
|
|
140
160
|
path : SourcesOrListOfSources
|
|
141
|
-
|
|
161
|
+
The SQL URI path to read from in the format
|
|
162
|
+
'sql://database/schema/table'. Only single paths are supported.
|
|
142
163
|
file_format : str
|
|
143
|
-
|
|
164
|
+
File format specification (not used for SQL operations).
|
|
144
165
|
engine : str
|
|
145
|
-
|
|
166
|
+
DataFrame engine to use (e.g., 'pandas', 'polars').
|
|
167
|
+
If None, uses the default engine.
|
|
146
168
|
**kwargs : dict
|
|
147
|
-
|
|
169
|
+
Additional keyword arguments passed to the reader.
|
|
148
170
|
|
|
149
171
|
Returns
|
|
150
172
|
-------
|
|
151
173
|
Any
|
|
152
|
-
DataFrame.
|
|
174
|
+
DataFrame object containing the table data.
|
|
175
|
+
|
|
176
|
+
Raises
|
|
177
|
+
------
|
|
178
|
+
StoreError
|
|
179
|
+
If a list of paths is provided (only single path supported).
|
|
153
180
|
"""
|
|
154
181
|
if isinstance(path, list):
|
|
155
182
|
raise StoreError("SQL store can only read a single DataFrame at a time.")
|
|
@@ -171,21 +198,26 @@ class SqlStore(Store):
|
|
|
171
198
|
engine: str | None = None,
|
|
172
199
|
) -> Any:
|
|
173
200
|
"""
|
|
174
|
-
|
|
201
|
+
Execute a custom SQL query and return results as a DataFrame.
|
|
202
|
+
|
|
203
|
+
Runs a SQL query against the database specified in the path
|
|
204
|
+
and returns the results using the specified DataFrame engine.
|
|
175
205
|
|
|
176
206
|
Parameters
|
|
177
207
|
----------
|
|
178
208
|
query : str
|
|
179
|
-
The query to execute.
|
|
209
|
+
The SQL query string to execute against the database.
|
|
180
210
|
path : str
|
|
181
|
-
|
|
211
|
+
The SQL URI path specifying the database connection
|
|
212
|
+
in the format 'sql://database/schema/table'.
|
|
182
213
|
engine : str
|
|
183
|
-
|
|
214
|
+
DataFrame engine to use for result processing
|
|
215
|
+
(e.g., 'pandas', 'polars'). If None, uses the default.
|
|
184
216
|
|
|
185
217
|
Returns
|
|
186
218
|
-------
|
|
187
219
|
Any
|
|
188
|
-
DataFrame.
|
|
220
|
+
DataFrame object containing the query results.
|
|
189
221
|
"""
|
|
190
222
|
reader = self._get_reader(engine)
|
|
191
223
|
schema = self._get_schema(path)
|
|
@@ -194,47 +226,74 @@ class SqlStore(Store):
|
|
|
194
226
|
|
|
195
227
|
def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
|
|
196
228
|
"""
|
|
197
|
-
Write a
|
|
229
|
+
Write a DataFrame to a SQL database table.
|
|
230
|
+
|
|
231
|
+
Takes a DataFrame and writes it to the specified SQL table.
|
|
232
|
+
The destination should be in SQL URI format. Additional
|
|
233
|
+
parameters are passed to the underlying to_sql() method.
|
|
198
234
|
|
|
199
235
|
Parameters
|
|
200
236
|
----------
|
|
201
237
|
df : Any
|
|
202
|
-
The
|
|
238
|
+
The DataFrame object to write to the database.
|
|
203
239
|
dst : str
|
|
204
|
-
The destination
|
|
240
|
+
The destination SQL URI in the format
|
|
241
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
242
|
+
extension : str
|
|
243
|
+
File extension parameter (not used for SQL operations).
|
|
205
244
|
**kwargs : dict
|
|
206
|
-
|
|
245
|
+
Additional keyword arguments passed to the DataFrame's
|
|
246
|
+
to_sql() method for controlling write behavior.
|
|
207
247
|
|
|
208
248
|
Returns
|
|
209
249
|
-------
|
|
210
250
|
str
|
|
211
|
-
|
|
251
|
+
The SQL URI path where the DataFrame was written.
|
|
212
252
|
"""
|
|
213
253
|
schema = self._get_schema(dst)
|
|
214
254
|
table = self._get_table_name(dst)
|
|
215
255
|
return self._upload_table(df, schema, table, **kwargs)
|
|
216
256
|
|
|
257
|
+
##############################
|
|
258
|
+
# Wrapper methods
|
|
259
|
+
##############################
|
|
260
|
+
|
|
261
|
+
def get_engine(self, schema: str | None = None) -> Engine:
|
|
262
|
+
"""
|
|
263
|
+
Get a SQLAlchemy engine connected to the database.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
Engine
|
|
268
|
+
A SQLAlchemy engine instance connected to the database.
|
|
269
|
+
"""
|
|
270
|
+
return self._check_factory(schema=schema)
|
|
271
|
+
|
|
217
272
|
##############################
|
|
218
273
|
# Private I/O methods
|
|
219
274
|
##############################
|
|
220
275
|
|
|
221
276
|
def _download_table(self, schema: str, table: str, dst: str) -> str:
|
|
222
277
|
"""
|
|
223
|
-
Download a table from SQL
|
|
278
|
+
Download a specific table from SQL database to Parquet file.
|
|
279
|
+
|
|
280
|
+
Internal method that handles the actual table download process.
|
|
281
|
+
Connects to the database, retrieves all data from the specified
|
|
282
|
+
table, and writes it to a Parquet file using PyArrow.
|
|
224
283
|
|
|
225
284
|
Parameters
|
|
226
285
|
----------
|
|
227
286
|
schema : str
|
|
228
|
-
The
|
|
287
|
+
The database schema name containing the table.
|
|
229
288
|
table : str
|
|
230
|
-
The
|
|
289
|
+
The name of the table to download.
|
|
231
290
|
dst : str
|
|
232
|
-
The
|
|
291
|
+
The local file path where the Parquet file will be saved.
|
|
233
292
|
|
|
234
293
|
Returns
|
|
235
294
|
-------
|
|
236
295
|
str
|
|
237
|
-
The destination path.
|
|
296
|
+
The destination file path of the created Parquet file.
|
|
238
297
|
"""
|
|
239
298
|
engine = self._check_factory(schema=schema)
|
|
240
299
|
|
|
@@ -258,23 +317,29 @@ class SqlStore(Store):
|
|
|
258
317
|
|
|
259
318
|
def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
|
|
260
319
|
"""
|
|
261
|
-
Upload a
|
|
320
|
+
Upload a DataFrame to a SQL table.
|
|
321
|
+
|
|
322
|
+
Internal method that handles writing a DataFrame to a SQL database
|
|
323
|
+
table. Uses the appropriate reader based on the DataFrame type
|
|
324
|
+
and manages the database connection.
|
|
262
325
|
|
|
263
326
|
Parameters
|
|
264
327
|
----------
|
|
265
|
-
df :
|
|
266
|
-
The
|
|
328
|
+
df : Any
|
|
329
|
+
The DataFrame object to upload to the database.
|
|
267
330
|
schema : str
|
|
268
|
-
|
|
331
|
+
The target database schema name.
|
|
269
332
|
table : str
|
|
270
|
-
|
|
333
|
+
The target table name within the schema.
|
|
271
334
|
**kwargs : dict
|
|
272
|
-
|
|
335
|
+
Additional keyword arguments passed to the write operation,
|
|
336
|
+
such as if_exists, index, method, etc.
|
|
273
337
|
|
|
274
338
|
Returns
|
|
275
339
|
-------
|
|
276
340
|
str
|
|
277
|
-
The SQL URI where the
|
|
341
|
+
The SQL URI where the DataFrame was saved in the format
|
|
342
|
+
'sql://database/schema/table'.
|
|
278
343
|
"""
|
|
279
344
|
reader = get_reader_by_object(df)
|
|
280
345
|
engine = self._check_factory()
|
|
@@ -286,84 +351,81 @@ class SqlStore(Store):
|
|
|
286
351
|
# Helper methods
|
|
287
352
|
##############################
|
|
288
353
|
|
|
289
|
-
def
|
|
354
|
+
def _get_engine(self, connection_string: str, schema: str | None = None) -> Engine:
|
|
290
355
|
"""
|
|
291
|
-
|
|
356
|
+
Create a SQLAlchemy engine from the connection string.
|
|
292
357
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
str
|
|
296
|
-
The connection string.
|
|
297
|
-
"""
|
|
298
|
-
return self._configurator.get_sql_conn_string()
|
|
299
|
-
|
|
300
|
-
def _get_engine(self, origin: str, schema: str | None = None) -> Engine:
|
|
301
|
-
"""
|
|
302
|
-
Create engine from connection string.
|
|
358
|
+
Establishes a database engine using the configured connection
|
|
359
|
+
string with appropriate connection parameters and schema settings.
|
|
303
360
|
|
|
304
361
|
Parameters
|
|
305
362
|
----------
|
|
306
|
-
|
|
307
|
-
The
|
|
363
|
+
connection_string : str
|
|
364
|
+
The database connection string.
|
|
308
365
|
schema : str
|
|
309
|
-
The schema.
|
|
366
|
+
The database schema to set in the search path.
|
|
367
|
+
If provided, sets the PostgreSQL search_path option.
|
|
310
368
|
|
|
311
369
|
Returns
|
|
312
370
|
-------
|
|
313
371
|
Engine
|
|
314
|
-
|
|
372
|
+
A configured SQLAlchemy engine instance.
|
|
315
373
|
"""
|
|
316
|
-
|
|
317
|
-
if not
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
connect_args["options"] = f"-csearch_path={schema}"
|
|
323
|
-
return create_engine(connection_string, connect_args=connect_args)
|
|
324
|
-
except Exception as ex:
|
|
325
|
-
raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
|
|
326
|
-
|
|
327
|
-
def _check_factory(self, retry: bool = True, schema: str | None = None) -> Engine:
|
|
374
|
+
connect_args = {"connect_timeout": ENGINE_CONNECTION_TIMEOUT}
|
|
375
|
+
if schema is not None:
|
|
376
|
+
connect_args["options"] = f"-csearch_path={schema}"
|
|
377
|
+
return create_engine(connection_string, connect_args=connect_args)
|
|
378
|
+
|
|
379
|
+
def _check_factory(self, schema: str | None = None) -> Engine:
|
|
328
380
|
"""
|
|
329
|
-
|
|
381
|
+
Validate database accessibility and return a working engine.
|
|
330
382
|
|
|
331
383
|
Parameters
|
|
332
384
|
----------
|
|
333
|
-
retry : bool
|
|
334
|
-
Whether to retry if the database is not accessible.
|
|
335
385
|
schema : str
|
|
336
|
-
The schema.
|
|
386
|
+
The database schema to configure in the engine.
|
|
337
387
|
|
|
338
388
|
Returns
|
|
339
389
|
-------
|
|
340
390
|
Engine
|
|
341
|
-
|
|
391
|
+
A validated SQLAlchemy engine with confirmed database access.
|
|
342
392
|
"""
|
|
393
|
+
connection_string = self._configurator.get_sql_conn_string()
|
|
394
|
+
engine = self._get_engine(connection_string, schema)
|
|
343
395
|
try:
|
|
344
|
-
engine = self._get_engine(schema)
|
|
345
396
|
self._check_access_to_storage(engine)
|
|
346
397
|
return engine
|
|
347
398
|
except ConfigError as e:
|
|
348
|
-
if
|
|
349
|
-
self.
|
|
350
|
-
return self._check_factory(retry=False, schema=schema)
|
|
399
|
+
if self._configurator.eval_retry():
|
|
400
|
+
return self._check_factory(schema=schema)
|
|
351
401
|
raise e
|
|
352
402
|
|
|
353
403
|
@staticmethod
|
|
354
404
|
def _parse_path(path: str) -> dict:
|
|
355
405
|
"""
|
|
356
|
-
Parse
|
|
406
|
+
Parse a SQL URI path into its component parts.
|
|
407
|
+
|
|
408
|
+
Breaks down a SQL URI into database, schema, and table components.
|
|
409
|
+
Supports both full three-part paths and simplified two-part paths
|
|
410
|
+
(using 'public' as default schema).
|
|
357
411
|
|
|
358
412
|
Parameters
|
|
359
413
|
----------
|
|
360
414
|
path : str
|
|
361
|
-
The path
|
|
415
|
+
The SQL URI path to parse in the format
|
|
416
|
+
'sql://database/schema/table' or 'sql://database/table'.
|
|
362
417
|
|
|
363
418
|
Returns
|
|
364
419
|
-------
|
|
365
420
|
dict
|
|
366
|
-
|
|
421
|
+
Dictionary containing parsed components with keys:
|
|
422
|
+
'database', 'schema', and 'table'.
|
|
423
|
+
|
|
424
|
+
Raises
|
|
425
|
+
------
|
|
426
|
+
ValueError
|
|
427
|
+
If the path format is invalid or doesn't follow the
|
|
428
|
+
expected SQL URI structure.
|
|
367
429
|
"""
|
|
368
430
|
# Parse path
|
|
369
431
|
err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
|
|
@@ -380,54 +442,59 @@ class SqlStore(Store):
|
|
|
380
442
|
|
|
381
443
|
def _get_schema(self, uri: str) -> str:
|
|
382
444
|
"""
|
|
383
|
-
|
|
445
|
+
Extract the schema name from a SQL URI.
|
|
446
|
+
|
|
447
|
+
Parses the SQL URI and returns the schema component.
|
|
448
|
+
Uses 'public' as the default schema if not specified in the URI.
|
|
384
449
|
|
|
385
450
|
Parameters
|
|
386
451
|
----------
|
|
387
452
|
uri : str
|
|
388
|
-
The URI.
|
|
453
|
+
The SQL URI to extract the schema from.
|
|
389
454
|
|
|
390
455
|
Returns
|
|
391
456
|
-------
|
|
392
457
|
str
|
|
393
|
-
The name
|
|
458
|
+
The schema name extracted from the URI.
|
|
394
459
|
"""
|
|
395
460
|
return str(self._parse_path(uri).get("schema"))
|
|
396
461
|
|
|
397
462
|
def _get_table_name(self, uri: str) -> str:
|
|
398
463
|
"""
|
|
399
|
-
|
|
464
|
+
Extract the table name from a SQL URI.
|
|
465
|
+
|
|
466
|
+
Parses the SQL URI and returns the table component,
|
|
467
|
+
which is always the last part of the URI path.
|
|
400
468
|
|
|
401
469
|
Parameters
|
|
402
470
|
----------
|
|
403
471
|
uri : str
|
|
404
|
-
The URI.
|
|
472
|
+
The SQL URI to extract the table name from.
|
|
405
473
|
|
|
406
474
|
Returns
|
|
407
475
|
-------
|
|
408
476
|
str
|
|
409
|
-
The name
|
|
477
|
+
The table name extracted from the URI.
|
|
410
478
|
"""
|
|
411
479
|
return str(self._parse_path(uri).get("table"))
|
|
412
480
|
|
|
413
481
|
@staticmethod
|
|
414
482
|
def _check_access_to_storage(engine: Engine) -> None:
|
|
415
483
|
"""
|
|
416
|
-
|
|
484
|
+
Verify database connectivity using the provided engine.
|
|
485
|
+
|
|
486
|
+
Tests the database connection by attempting to connect.
|
|
487
|
+
Properly disposes of the engine if connection fails.
|
|
417
488
|
|
|
418
489
|
Parameters
|
|
419
490
|
----------
|
|
420
491
|
engine : Engine
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
Returns
|
|
424
|
-
-------
|
|
425
|
-
None
|
|
492
|
+
The SQLAlchemy engine to test for connectivity.
|
|
426
493
|
|
|
427
494
|
Raises
|
|
428
495
|
------
|
|
429
|
-
|
|
430
|
-
If
|
|
496
|
+
ConfigError
|
|
497
|
+
If database connection cannot be established.
|
|
431
498
|
"""
|
|
432
499
|
try:
|
|
433
500
|
engine.connect()
|
|
@@ -33,10 +33,6 @@ class ReaderFactory:
|
|
|
33
33
|
Reader name.
|
|
34
34
|
builder : DataframeReader
|
|
35
35
|
Builder object.
|
|
36
|
-
|
|
37
|
-
Returns
|
|
38
|
-
-------
|
|
39
|
-
None
|
|
40
36
|
"""
|
|
41
37
|
if self._engine_builders is None:
|
|
42
38
|
self._engine_builders = {}
|
|
@@ -104,10 +100,6 @@ class ReaderFactory:
|
|
|
104
100
|
----------
|
|
105
101
|
engine : str
|
|
106
102
|
Engine name.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
None
|
|
111
103
|
"""
|
|
112
104
|
if engine not in self._engine_builders:
|
|
113
105
|
raise BuilderError(f"Engine {engine} not found.")
|
|
@@ -104,10 +104,6 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
104
104
|
The destination of the dataframe.
|
|
105
105
|
**kwargs : dict
|
|
106
106
|
Keyword arguments.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
None
|
|
111
107
|
"""
|
|
112
108
|
if extension == FileExtensions.CSV.value:
|
|
113
109
|
return self.write_csv(df, dst, **kwargs)
|
|
@@ -128,12 +124,10 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
128
124
|
The destination of the dataframe.
|
|
129
125
|
**kwargs : dict
|
|
130
126
|
Keyword arguments.
|
|
131
|
-
|
|
132
|
-
Returns
|
|
133
|
-
-------
|
|
134
|
-
None
|
|
135
127
|
"""
|
|
136
|
-
|
|
128
|
+
if "index" not in kwargs:
|
|
129
|
+
kwargs["index"] = False
|
|
130
|
+
df.to_csv(dst, **kwargs)
|
|
137
131
|
|
|
138
132
|
@staticmethod
|
|
139
133
|
def write_parquet(df: pd.DataFrame, dst: str | BytesIO, **kwargs) -> None:
|
|
@@ -148,12 +142,10 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
148
142
|
The destination of the dataframe.
|
|
149
143
|
**kwargs : dict
|
|
150
144
|
Keyword arguments.
|
|
151
|
-
|
|
152
|
-
Returns
|
|
153
|
-
-------
|
|
154
|
-
None
|
|
155
145
|
"""
|
|
156
|
-
|
|
146
|
+
if "index" not in kwargs:
|
|
147
|
+
kwargs["index"] = False
|
|
148
|
+
df.to_parquet(dst, **kwargs)
|
|
157
149
|
|
|
158
150
|
@staticmethod
|
|
159
151
|
def write_table(df: pd.DataFrame, table: str, engine: Any, schema: str | None = None, **kwargs) -> None:
|
|
@@ -172,12 +164,10 @@ class DataframeReaderPandas(DataframeReader):
|
|
|
172
164
|
The destination schema.
|
|
173
165
|
**kwargs : dict
|
|
174
166
|
Keyword arguments.
|
|
175
|
-
|
|
176
|
-
Returns
|
|
177
|
-
-------
|
|
178
|
-
None
|
|
179
167
|
"""
|
|
180
|
-
|
|
168
|
+
if "index" not in kwargs:
|
|
169
|
+
kwargs["index"] = False
|
|
170
|
+
df.to_sql(table, engine, schema=schema, **kwargs)
|
|
181
171
|
|
|
182
172
|
##############################
|
|
183
173
|
# Utils
|
digitalhub/utils/file_utils.py
CHANGED
|
@@ -141,23 +141,6 @@ def get_last_modified(data_path: str) -> str:
|
|
|
141
141
|
return datetime.fromtimestamp(timestamp).astimezone().isoformat()
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
def get_s3_path(src_path: str) -> str:
|
|
145
|
-
"""
|
|
146
|
-
Get the S3 URI of a file path.
|
|
147
|
-
|
|
148
|
-
Parameters
|
|
149
|
-
----------
|
|
150
|
-
src_path : str
|
|
151
|
-
Path to the file.
|
|
152
|
-
|
|
153
|
-
Returns
|
|
154
|
-
-------
|
|
155
|
-
str
|
|
156
|
-
The S3 URI of the file.
|
|
157
|
-
"""
|
|
158
|
-
return Path(src_path).as_uri()
|
|
159
|
-
|
|
160
|
-
|
|
161
144
|
def get_file_info_from_local(path: str, src_path: str) -> None | dict:
|
|
162
145
|
"""
|
|
163
146
|
Get file info from a local path.
|
|
@@ -95,10 +95,6 @@ def requests_chunk_download(source: str, filename: Path) -> None:
|
|
|
95
95
|
URL to download the file from.
|
|
96
96
|
filename : Path
|
|
97
97
|
Path where to save the downloaded file.
|
|
98
|
-
|
|
99
|
-
Returns
|
|
100
|
-
-------
|
|
101
|
-
None
|
|
102
98
|
"""
|
|
103
99
|
with requests.get(source, stream=True) as r:
|
|
104
100
|
r.raise_for_status()
|
|
@@ -117,10 +113,6 @@ def extract_archive(path: Path, filename: Path) -> None:
|
|
|
117
113
|
Directory where to extract the archive.
|
|
118
114
|
filename : Path
|
|
119
115
|
Path to the zip archive file.
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
None
|
|
124
116
|
"""
|
|
125
117
|
with ZipFile(filename, "r") as zip_file:
|
|
126
118
|
zip_file.extractall(path)
|
|
@@ -256,14 +248,9 @@ def carriage_return_warn(string: str) -> None:
|
|
|
256
248
|
----------
|
|
257
249
|
string : str
|
|
258
250
|
The string to check.
|
|
259
|
-
|
|
260
|
-
Returns
|
|
261
|
-
-------
|
|
262
|
-
None
|
|
263
251
|
"""
|
|
264
252
|
if "\r\n" in string:
|
|
265
|
-
warn("String contains a carriage return. "
|
|
266
|
-
"It may not be parsed correctly from remote runtimes.")
|
|
253
|
+
warn("String contains a carriage return. It may not be parsed correctly from remote runtimes.")
|
|
267
254
|
|
|
268
255
|
|
|
269
256
|
def read_source(path: str) -> str:
|
digitalhub/utils/git_utils.py
CHANGED
|
@@ -47,10 +47,6 @@ def clone_repository(path: Path, url: str) -> None:
|
|
|
47
47
|
Path where to save the repository.
|
|
48
48
|
url : str
|
|
49
49
|
URL of the repository.
|
|
50
|
-
|
|
51
|
-
Returns
|
|
52
|
-
-------
|
|
53
|
-
None
|
|
54
50
|
"""
|
|
55
51
|
clean_path(path)
|
|
56
52
|
checkout_object = get_checkout_object(url)
|
|
@@ -85,10 +81,6 @@ def clean_path(path: Path) -> None:
|
|
|
85
81
|
----------
|
|
86
82
|
path : Path
|
|
87
83
|
Path to clean.
|
|
88
|
-
|
|
89
|
-
Returns
|
|
90
|
-
-------
|
|
91
|
-
None
|
|
92
84
|
"""
|
|
93
85
|
|
|
94
86
|
shutil.rmtree(path, ignore_errors=True)
|