digitalhub 0.13.0b2__py3-none-any.whl → 0.13.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (61) hide show
  1. digitalhub/__init__.py +1 -1
  2. digitalhub/context/api.py +5 -5
  3. digitalhub/context/builder.py +3 -5
  4. digitalhub/context/context.py +9 -1
  5. digitalhub/entities/_base/material/entity.py +3 -3
  6. digitalhub/entities/_commons/metrics.py +64 -30
  7. digitalhub/entities/_commons/utils.py +36 -9
  8. digitalhub/entities/_processors/base.py +150 -79
  9. digitalhub/entities/_processors/context.py +363 -212
  10. digitalhub/entities/_processors/utils.py +74 -30
  11. digitalhub/entities/artifact/utils.py +28 -13
  12. digitalhub/entities/dataitem/crud.py +10 -2
  13. digitalhub/entities/dataitem/table/entity.py +3 -3
  14. digitalhub/entities/dataitem/utils.py +84 -35
  15. digitalhub/entities/model/utils.py +28 -13
  16. digitalhub/entities/task/_base/models.py +12 -3
  17. digitalhub/factory/factory.py +25 -3
  18. digitalhub/factory/utils.py +11 -3
  19. digitalhub/runtimes/_base.py +1 -1
  20. digitalhub/runtimes/builder.py +18 -1
  21. digitalhub/stores/client/__init__.py +12 -0
  22. digitalhub/stores/client/_base/api_builder.py +14 -0
  23. digitalhub/stores/client/_base/client.py +93 -0
  24. digitalhub/stores/client/_base/key_builder.py +28 -0
  25. digitalhub/stores/client/_base/params_builder.py +14 -0
  26. digitalhub/stores/client/api.py +10 -5
  27. digitalhub/stores/client/builder.py +3 -1
  28. digitalhub/stores/client/dhcore/api_builder.py +17 -0
  29. digitalhub/stores/client/dhcore/client.py +276 -58
  30. digitalhub/stores/client/dhcore/configurator.py +336 -141
  31. digitalhub/stores/client/dhcore/error_parser.py +35 -1
  32. digitalhub/stores/client/dhcore/params_builder.py +113 -17
  33. digitalhub/stores/client/dhcore/utils.py +32 -14
  34. digitalhub/stores/client/local/api_builder.py +17 -0
  35. digitalhub/stores/client/local/client.py +6 -8
  36. digitalhub/stores/credentials/api.py +8 -8
  37. digitalhub/stores/credentials/configurator.py +176 -3
  38. digitalhub/stores/credentials/enums.py +17 -3
  39. digitalhub/stores/credentials/handler.py +73 -45
  40. digitalhub/stores/credentials/ini_module.py +59 -27
  41. digitalhub/stores/credentials/store.py +33 -1
  42. digitalhub/stores/data/_base/store.py +8 -3
  43. digitalhub/stores/data/api.py +20 -16
  44. digitalhub/stores/data/builder.py +69 -13
  45. digitalhub/stores/data/s3/configurator.py +64 -23
  46. digitalhub/stores/data/s3/store.py +30 -27
  47. digitalhub/stores/data/s3/utils.py +9 -9
  48. digitalhub/stores/data/sql/configurator.py +76 -25
  49. digitalhub/stores/data/sql/store.py +180 -91
  50. digitalhub/utils/exceptions.py +6 -0
  51. digitalhub/utils/file_utils.py +53 -30
  52. digitalhub/utils/generic_utils.py +41 -33
  53. digitalhub/utils/git_utils.py +24 -14
  54. digitalhub/utils/io_utils.py +19 -18
  55. digitalhub/utils/uri_utils.py +31 -31
  56. {digitalhub-0.13.0b2.dist-info → digitalhub-0.13.0b4.dist-info}/METADATA +1 -1
  57. {digitalhub-0.13.0b2.dist-info → digitalhub-0.13.0b4.dist-info}/RECORD +60 -61
  58. digitalhub/entities/_commons/types.py +0 -9
  59. {digitalhub-0.13.0b2.dist-info → digitalhub-0.13.0b4.dist-info}/WHEEL +0 -0
  60. {digitalhub-0.13.0b2.dist-info → digitalhub-0.13.0b4.dist-info}/licenses/AUTHORS +0 -0
  61. {digitalhub-0.13.0b2.dist-info → digitalhub-0.13.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -14,10 +14,9 @@ from sqlalchemy import MetaData, Table, create_engine, select
14
14
  from sqlalchemy.engine import Engine
15
15
  from sqlalchemy.exc import SQLAlchemyError
16
16
 
17
- from digitalhub.stores.credentials.enums import CredsOrigin
18
17
  from digitalhub.stores.data._base.store import Store
19
18
  from digitalhub.stores.readers.data.api import get_reader_by_object
20
- from digitalhub.utils.exceptions import StoreError
19
+ from digitalhub.utils.exceptions import ConfigError, StoreError
21
20
  from digitalhub.utils.types import SourcesOrListOfSources
22
21
 
23
22
  if typing.TYPE_CHECKING:
@@ -29,8 +28,17 @@ if typing.TYPE_CHECKING:
29
28
 
30
29
  class SqlStore(Store):
31
30
  """
32
- SQL store class. It implements the Store interface and provides methods to fetch and persist
33
- artifacts on SQL based storage.
31
+ SQL-based data store implementation for database operations.
32
+
33
+ Provides functionality for reading, writing, and managing data in SQL
34
+ databases. Implements the Store interface with SQL-specific operations
35
+ including table downloads, DataFrame operations, and query execution.
36
+
37
+ Attributes
38
+ ----------
39
+ _configurator : SqlStoreConfigurator
40
+ The configurator instance for managing SQL database credentials
41
+ and connection parameters.
34
42
  """
35
43
 
36
44
  def __init__(self, configurator: Configurator | None = None) -> None:
@@ -48,21 +56,33 @@ class SqlStore(Store):
48
56
  overwrite: bool = False,
49
57
  ) -> str:
50
58
  """
51
- Download artifacts from storage.
59
+ Download a SQL table as a Parquet file to local storage.
60
+
61
+ Retrieves data from a SQL table and saves it as a Parquet file
62
+ at the specified destination. The source path should be in the
63
+ format 'sql://database/schema/table'.
52
64
 
53
65
  Parameters
54
66
  ----------
55
67
  src : str
56
- Path of the material entity.
57
- dst : str
58
- The destination of the material entity on local filesystem.
59
- overwrite : bool
60
- Specify if overwrite existing file(s).
68
+ The SQL URI path of the table to download in the format
69
+ 'sql://database/schema/table' or 'sql://database/table'.
70
+ dst : Path
71
+ The destination path on the local filesystem where the
72
+ Parquet file will be saved.
73
+ overwrite : bool, default False
74
+ Whether to overwrite existing files at the destination path.
61
75
 
62
76
  Returns
63
77
  -------
64
78
  str
65
- Destination path of the downloaded files.
79
+ The absolute path of the downloaded Parquet file.
80
+
81
+ Raises
82
+ ------
83
+ StoreError
84
+ If the destination path has an invalid extension or if
85
+ file operations fail.
66
86
  """
67
87
  table_name = self._get_table_name(src) + ".parquet"
68
88
  # Case where dst is not provided
@@ -93,12 +113,12 @@ class SqlStore(Store):
93
113
  dst: str,
94
114
  ) -> list[tuple[str, str]]:
95
115
  """
96
- Upload an artifact to storage.
116
+ Upload artifacts to SQL storage.
97
117
 
98
118
  Raises
99
119
  ------
100
120
  StoreError
101
- This method is not implemented.
121
+ Always raised as SQL store does not support direct upload.
102
122
  """
103
123
  raise StoreError("SQL store does not support upload.")
104
124
 
@@ -108,17 +128,12 @@ class SqlStore(Store):
108
128
  paths: list[tuple[str, str]],
109
129
  ) -> list[dict]:
110
130
  """
111
- Get file information from SQL based storage.
112
-
113
- Parameters
114
- ----------
115
- paths : list[str]
116
- List of source paths.
131
+ Get file metadata information from SQL storage.
117
132
 
118
133
  Returns
119
134
  -------
120
135
  list[dict]
121
- Returns files metadata.
136
+ Empty list.
122
137
  """
123
138
  return []
124
139
 
@@ -134,23 +149,33 @@ class SqlStore(Store):
134
149
  **kwargs,
135
150
  ) -> Any:
136
151
  """
137
- Read DataFrame from path.
152
+ Read a DataFrame from a SQL table.
153
+
154
+ Connects to the SQL database and reads data from the specified
155
+ table into a DataFrame using the specified engine (pandas, polars, etc.).
138
156
 
139
157
  Parameters
140
158
  ----------
141
159
  path : SourcesOrListOfSources
142
- Path(s) to read DataFrame from.
143
- file_format : str
144
- Extension of the file.
145
- engine : str
146
- Dataframe engine (pandas, polars, etc.).
160
+ The SQL URI path to read from in the format
161
+ 'sql://database/schema/table'. Only single paths are supported.
162
+ file_format : str, optional
163
+ File format specification (not used for SQL operations).
164
+ engine : str, optional
165
+ DataFrame engine to use (e.g., 'pandas', 'polars').
166
+ If None, uses the default engine.
147
167
  **kwargs : dict
148
- Keyword arguments.
168
+ Additional keyword arguments passed to the reader.
149
169
 
150
170
  Returns
151
171
  -------
152
172
  Any
153
- DataFrame.
173
+ DataFrame object containing the table data.
174
+
175
+ Raises
176
+ ------
177
+ StoreError
178
+ If a list of paths is provided (only single path supported).
154
179
  """
155
180
  if isinstance(path, list):
156
181
  raise StoreError("SQL store can only read a single DataFrame at a time.")
@@ -172,21 +197,26 @@ class SqlStore(Store):
172
197
  engine: str | None = None,
173
198
  ) -> Any:
174
199
  """
175
- Query data from database.
200
+ Execute a custom SQL query and return results as a DataFrame.
201
+
202
+ Runs a SQL query against the database specified in the path
203
+ and returns the results using the specified DataFrame engine.
176
204
 
177
205
  Parameters
178
206
  ----------
179
207
  query : str
180
- The query to execute.
208
+ The SQL query string to execute against the database.
181
209
  path : str
182
- Path to the database.
183
- engine : str
184
- Dataframe engine (pandas, polars, etc.).
210
+ The SQL URI path specifying the database connection
211
+ in the format 'sql://database/schema/table'.
212
+ engine : str, optional
213
+ DataFrame engine to use for result processing
214
+ (e.g., 'pandas', 'polars'). If None, uses the default.
185
215
 
186
216
  Returns
187
217
  -------
188
218
  Any
189
- DataFrame.
219
+ DataFrame object containing the query results.
190
220
  """
191
221
  reader = self._get_reader(engine)
192
222
  schema = self._get_schema(path)
@@ -195,21 +225,29 @@ class SqlStore(Store):
195
225
 
196
226
  def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
197
227
  """
198
- Write a dataframe to a database. Kwargs are passed to df.to_sql().
228
+ Write a DataFrame to a SQL database table.
229
+
230
+ Takes a DataFrame and writes it to the specified SQL table.
231
+ The destination should be in SQL URI format. Additional
232
+ parameters are passed to the underlying to_sql() method.
199
233
 
200
234
  Parameters
201
235
  ----------
202
236
  df : Any
203
- The dataframe to write.
237
+ The DataFrame object to write to the database.
204
238
  dst : str
205
- The destination of the dataframe.
239
+ The destination SQL URI in the format
240
+ 'sql://database/schema/table' or 'sql://database/table'.
241
+ extension : str, optional
242
+ File extension parameter (not used for SQL operations).
206
243
  **kwargs : dict
207
- Keyword arguments.
244
+ Additional keyword arguments passed to the DataFrame's
245
+ to_sql() method for controlling write behavior.
208
246
 
209
247
  Returns
210
248
  -------
211
249
  str
212
- Path of written dataframe.
250
+ The SQL URI path where the DataFrame was written.
213
251
  """
214
252
  schema = self._get_schema(dst)
215
253
  table = self._get_table_name(dst)
@@ -221,21 +259,25 @@ class SqlStore(Store):
221
259
 
222
260
  def _download_table(self, schema: str, table: str, dst: str) -> str:
223
261
  """
224
- Download a table from SQL based storage.
262
+ Download a specific table from SQL database to Parquet file.
263
+
264
+ Internal method that handles the actual table download process.
265
+ Connects to the database, retrieves all data from the specified
266
+ table, and writes it to a Parquet file using PyArrow.
225
267
 
226
268
  Parameters
227
269
  ----------
228
270
  schema : str
229
- The origin schema.
271
+ The database schema name containing the table.
230
272
  table : str
231
- The origin table.
273
+ The name of the table to download.
232
274
  dst : str
233
- The destination path.
275
+ The local file path where the Parquet file will be saved.
234
276
 
235
277
  Returns
236
278
  -------
237
279
  str
238
- The destination path.
280
+ The destination file path of the created Parquet file.
239
281
  """
240
282
  engine = self._check_factory(schema=schema)
241
283
 
@@ -259,23 +301,29 @@ class SqlStore(Store):
259
301
 
260
302
  def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
261
303
  """
262
- Upload a table to SQL based storage.
304
+ Upload a DataFrame to a SQL table.
305
+
306
+ Internal method that handles writing a DataFrame to a SQL database
307
+ table. Uses the appropriate reader based on the DataFrame type
308
+ and manages the database connection.
263
309
 
264
310
  Parameters
265
311
  ----------
266
- df : DataFrame
267
- The dataframe.
312
+ df : Any
313
+ The DataFrame object to upload to the database.
268
314
  schema : str
269
- Destination schema.
315
+ The target database schema name.
270
316
  table : str
271
- Destination table.
317
+ The target table name within the schema.
272
318
  **kwargs : dict
273
- Keyword arguments.
319
+ Additional keyword arguments passed to the write operation,
320
+ such as if_exists, index, method, etc.
274
321
 
275
322
  Returns
276
323
  -------
277
324
  str
278
- The SQL URI where the dataframe was saved.
325
+ The SQL URI where the DataFrame was saved in the format
326
+ 'sql://database/schema/table'.
279
327
  """
280
328
  reader = get_reader_by_object(df)
281
329
  engine = self._check_factory()
@@ -287,39 +335,45 @@ class SqlStore(Store):
287
335
  # Helper methods
288
336
  ##############################
289
337
 
290
- def _get_connection_string(self, origin: str) -> str:
338
+ def _get_connection_string(self) -> str:
291
339
  """
292
- Get the connection string.
340
+ Retrieve the database connection string from the configurator.
293
341
 
294
- Parameters
295
- ----------
296
- origin : str
297
- The origin of the credentials.
342
+ Gets the PostgreSQL connection string using the configured
343
+ database credentials (username, password, host, port, database).
298
344
 
299
345
  Returns
300
346
  -------
301
347
  str
302
- The connection string.
348
+ The PostgreSQL connection string in the format
349
+ 'postgresql://username:password@host:port/database'.
303
350
  """
304
- return self._configurator.get_sql_conn_string(origin)
351
+ return self._configurator.get_sql_conn_string()
305
352
 
306
- def _get_engine(self, origin: str, schema: str | None = None) -> Engine:
353
+ def _get_engine(self, schema: str | None = None) -> Engine:
307
354
  """
308
- Create engine from connection string.
355
+ Create a SQLAlchemy engine from the connection string.
356
+
357
+ Establishes a database engine using the configured connection
358
+ string with appropriate connection parameters and schema settings.
309
359
 
310
360
  Parameters
311
361
  ----------
312
- origin : str
313
- The origin of the credentials.
314
- schema : str
315
- The schema.
362
+ schema : str, optional
363
+ The database schema to set in the search path.
364
+ If provided, sets the PostgreSQL search_path option.
316
365
 
317
366
  Returns
318
367
  -------
319
368
  Engine
320
- An SQLAlchemy engine.
369
+ A configured SQLAlchemy engine instance.
370
+
371
+ Raises
372
+ ------
373
+ StoreError
374
+ If the connection string is invalid or engine creation fails.
321
375
  """
322
- connection_string = self._get_connection_string(origin)
376
+ connection_string = self._get_connection_string()
323
377
  if not isinstance(connection_string, str):
324
378
  raise StoreError("Connection string must be a string.")
325
379
  try:
@@ -330,42 +384,68 @@ class SqlStore(Store):
330
384
  except Exception as ex:
331
385
  raise StoreError(f"Something wrong with connection string. Arguments: {str(ex.args)}")
332
386
 
333
- def _check_factory(self, schema: str | None = None) -> Engine:
387
+ def _check_factory(self, retry: bool = True, schema: str | None = None) -> Engine:
334
388
  """
335
- Check if the database is accessible and return the engine.
389
+ Validate database accessibility and return a working engine.
390
+
391
+ Creates and tests a database engine, with retry capability if
392
+ the initial connection fails. Handles configuration changes
393
+ and ensures the database is accessible before returning.
336
394
 
337
395
  Parameters
338
396
  ----------
339
- schema : str
340
- The schema.
397
+ retry : bool, default True
398
+ Whether to attempt a retry with different configuration
399
+ if the initial connection fails.
400
+ schema : str, optional
401
+ The database schema to configure in the engine.
341
402
 
342
403
  Returns
343
404
  -------
344
405
  Engine
345
- The database engine.
406
+ A validated SQLAlchemy engine with confirmed database access.
407
+
408
+ Raises
409
+ ------
410
+ ConfigError
411
+ If database access fails and retry is exhausted or disabled.
346
412
  """
347
413
  try:
348
- engine = self._get_engine(CredsOrigin.ENV.value, schema)
349
- self._check_access_to_storage(engine)
350
- except StoreError:
351
- engine = self._get_engine(CredsOrigin.FILE.value, schema)
414
+ engine = self._get_engine(schema)
352
415
  self._check_access_to_storage(engine)
353
- return engine
416
+ return engine
417
+ except ConfigError as e:
418
+ if retry:
419
+ self._configurator.eval_change_origin()
420
+ return self._check_factory(retry=False, schema=schema)
421
+ raise e
354
422
 
355
423
  @staticmethod
356
424
  def _parse_path(path: str) -> dict:
357
425
  """
358
- Parse the path and return the components.
426
+ Parse a SQL URI path into its component parts.
427
+
428
+ Breaks down a SQL URI into database, schema, and table components.
429
+ Supports both full three-part paths and simplified two-part paths
430
+ (using 'public' as default schema).
359
431
 
360
432
  Parameters
361
433
  ----------
362
434
  path : str
363
- The path.
435
+ The SQL URI path to parse in the format
436
+ 'sql://database/schema/table' or 'sql://database/table'.
364
437
 
365
438
  Returns
366
439
  -------
367
440
  dict
368
- A dictionary containing the components of the path.
441
+ Dictionary containing parsed components with keys:
442
+ 'database', 'schema', and 'table'.
443
+
444
+ Raises
445
+ ------
446
+ ValueError
447
+ If the path format is invalid or doesn't follow the
448
+ expected SQL URI structure.
369
449
  """
370
450
  # Parse path
371
451
  err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
@@ -382,45 +462,54 @@ class SqlStore(Store):
382
462
 
383
463
  def _get_schema(self, uri: str) -> str:
384
464
  """
385
- Get the name of the SQL schema from the URI.
465
+ Extract the schema name from a SQL URI.
466
+
467
+ Parses the SQL URI and returns the schema component.
468
+ Uses 'public' as the default schema if not specified in the URI.
386
469
 
387
470
  Parameters
388
471
  ----------
389
472
  uri : str
390
- The URI.
473
+ The SQL URI to extract the schema from.
391
474
 
392
475
  Returns
393
476
  -------
394
477
  str
395
- The name of the SQL schema.
478
+ The schema name extracted from the URI.
396
479
  """
397
480
  return str(self._parse_path(uri).get("schema"))
398
481
 
399
482
  def _get_table_name(self, uri: str) -> str:
400
483
  """
401
- Get the name of the table from the URI.
484
+ Extract the table name from a SQL URI.
485
+
486
+ Parses the SQL URI and returns the table component,
487
+ which is always the last part of the URI path.
402
488
 
403
489
  Parameters
404
490
  ----------
405
491
  uri : str
406
- The URI.
492
+ The SQL URI to extract the table name from.
407
493
 
408
494
  Returns
409
495
  -------
410
496
  str
411
- The name of the table
497
+ The table name extracted from the URI.
412
498
  """
413
499
  return str(self._parse_path(uri).get("table"))
414
500
 
415
501
  @staticmethod
416
502
  def _check_access_to_storage(engine: Engine) -> None:
417
503
  """
418
- Check if there is access to the storage.
504
+ Verify database connectivity using the provided engine.
505
+
506
+ Tests the database connection by attempting to connect.
507
+ Properly disposes of the engine if connection fails.
419
508
 
420
509
  Parameters
421
510
  ----------
422
511
  engine : Engine
423
- An SQLAlchemy engine.
512
+ The SQLAlchemy engine to test for connectivity.
424
513
 
425
514
  Returns
426
515
  -------
@@ -428,11 +517,11 @@ class SqlStore(Store):
428
517
 
429
518
  Raises
430
519
  ------
431
- StoreError
432
- If there is no access to the storage.
520
+ ConfigError
521
+ If database connection cannot be established.
433
522
  """
434
523
  try:
435
524
  engine.connect()
436
525
  except SQLAlchemyError:
437
526
  engine.dispose()
438
- raise StoreError("No access to db!")
527
+ raise ConfigError("No access to db!")
@@ -81,3 +81,9 @@ class ClientError(Exception):
81
81
  """
82
82
  Raised when incontered errors on clients.
83
83
  """
84
+
85
+
86
+ class ConfigError(Exception):
87
+ """
88
+ Raised when incontered errors on configs.
89
+ """