digitalhub 0.13.0b3__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

@@ -10,8 +10,19 @@ from digitalhub.stores.credentials.enums import CredsEnvVar
10
10
 
11
11
  class SqlStoreConfigurator(Configurator):
12
12
  """
13
- Configure the store by getting the credentials from user
14
- provided config or from environment.
13
+ SQL store configuration manager for database connections.
14
+
15
+ Handles credential management and configuration for SQL database
16
+ connections. Loads credentials from environment variables or
17
+ configuration files and provides connection string generation
18
+ for database access.
19
+
20
+ Attributes
21
+ ----------
22
+ keys : list[str]
23
+ List of all supported credential keys for SQL connections.
24
+ required_keys : list[str]
25
+ List of mandatory credential keys that must be provided.
15
26
  """
16
27
 
17
28
  keys = [
@@ -40,31 +51,70 @@ class SqlStoreConfigurator(Configurator):
40
51
 
41
52
  def load_env_vars(self) -> None:
42
53
  """
43
- Load the credentials from the environment.
54
+ Load database credentials from environment variables.
55
+
56
+ Retrieves SQL database connection credentials from the system
57
+ environment variables and stores them in the configurator's
58
+ credential handler for use in database connections.
59
+
60
+ Returns
61
+ -------
62
+ None
44
63
  """
45
64
  env_creds = self._creds_handler.load_from_env(self.keys)
46
65
  self._creds_handler.set_credentials(self._env, env_creds)
47
66
 
48
67
  def load_file_vars(self) -> None:
49
68
  """
50
- Load the credentials from the file.
69
+ Load database credentials from configuration file.
70
+
71
+ Retrieves SQL database connection credentials from a
72
+ configuration file and stores them in the configurator's
73
+ credential handler for use in database connections.
74
+
75
+ Returns
76
+ -------
77
+ None
51
78
  """
52
79
  file_creds = self._creds_handler.load_from_file(self.keys)
53
80
  self._creds_handler.set_credentials(self._file, file_creds)
54
81
 
55
82
  def get_sql_conn_string(self) -> str:
56
83
  """
57
- Get the connection string from environment variables.
84
+ Generate PostgreSQL connection string from stored credentials.
85
+
86
+ Constructs a PostgreSQL connection string using the configured
87
+ database credentials including username, password, host, port,
88
+ and database name.
58
89
 
59
90
  Returns
60
91
  -------
61
92
  str
62
- The connection string.
93
+ A PostgreSQL connection string in the format:
94
+ 'postgresql://username:password@host:port/database'
63
95
  """
64
- creds = self.get_credentials(self._origin)
96
+ creds = self.get_sql_credentials()
65
97
  user = creds[CredsEnvVar.DB_USERNAME.value]
66
98
  password = creds[CredsEnvVar.DB_PASSWORD.value]
67
99
  host = creds[CredsEnvVar.DB_HOST.value]
68
100
  port = creds[CredsEnvVar.DB_PORT.value]
69
101
  database = creds[CredsEnvVar.DB_DATABASE.value]
70
102
  return f"postgresql://{user}:{password}@{host}:{port}/{database}"
103
+
104
+ def get_sql_credentials(self) -> dict:
105
+ """
106
+ Get all configured database credentials as a dictionary.
107
+
108
+ Retrieves all available database credentials from the configured
109
+ source and returns them as a dictionary with all credential keys
110
+ from self.keys mapped to their values.
111
+
112
+ Returns
113
+ -------
114
+ dict
115
+ Dictionary containing all credential key-value pairs from self.keys.
116
+ Keys correspond to database connection parameters such as
117
+ username, password, host, port, database, and platform.
118
+ """
119
+ creds = self.get_credentials(self._origin)
120
+ return {key: creds.get(key) for key in self.keys}
@@ -28,8 +28,17 @@ if typing.TYPE_CHECKING:
28
28
 
29
29
  class SqlStore(Store):
30
30
  """
31
- SQL store class. It implements the Store interface and provides methods to fetch and persist
32
- artifacts on SQL based storage.
31
+ SQL-based data store implementation for database operations.
32
+
33
+ Provides functionality for reading, writing, and managing data in SQL
34
+ databases. Implements the Store interface with SQL-specific operations
35
+ including table downloads, DataFrame operations, and query execution.
36
+
37
+ Attributes
38
+ ----------
39
+ _configurator : SqlStoreConfigurator
40
+ The configurator instance for managing SQL database credentials
41
+ and connection parameters.
33
42
  """
34
43
 
35
44
  def __init__(self, configurator: Configurator | None = None) -> None:
@@ -47,21 +56,33 @@ class SqlStore(Store):
47
56
  overwrite: bool = False,
48
57
  ) -> str:
49
58
  """
50
- Download artifacts from storage.
59
+ Download a SQL table as a Parquet file to local storage.
60
+
61
+ Retrieves data from a SQL table and saves it as a Parquet file
62
+ at the specified destination. The source path should be in the
63
+ format 'sql://database/schema/table'.
51
64
 
52
65
  Parameters
53
66
  ----------
54
67
  src : str
55
- Path of the material entity.
56
- dst : str
57
- The destination of the material entity on local filesystem.
58
- overwrite : bool
59
- Specify if overwrite existing file(s).
68
+ The SQL URI path of the table to download in the format
69
+ 'sql://database/schema/table' or 'sql://database/table'.
70
+ dst : Path
71
+ The destination path on the local filesystem where the
72
+ Parquet file will be saved.
73
+ overwrite : bool, default False
74
+ Whether to overwrite existing files at the destination path.
60
75
 
61
76
  Returns
62
77
  -------
63
78
  str
64
- Destination path of the downloaded files.
79
+ The absolute path of the downloaded Parquet file.
80
+
81
+ Raises
82
+ ------
83
+ StoreError
84
+ If the destination path has an invalid extension or if
85
+ file operations fail.
65
86
  """
66
87
  table_name = self._get_table_name(src) + ".parquet"
67
88
  # Case where dst is not provided
@@ -92,12 +113,12 @@ class SqlStore(Store):
92
113
  dst: str,
93
114
  ) -> list[tuple[str, str]]:
94
115
  """
95
- Upload an artifact to storage.
116
+ Upload artifacts to SQL storage.
96
117
 
97
118
  Raises
98
119
  ------
99
120
  StoreError
100
- This method is not implemented.
121
+ Always raised as SQL store does not support direct upload.
101
122
  """
102
123
  raise StoreError("SQL store does not support upload.")
103
124
 
@@ -107,17 +128,12 @@ class SqlStore(Store):
107
128
  paths: list[tuple[str, str]],
108
129
  ) -> list[dict]:
109
130
  """
110
- Get file information from SQL based storage.
111
-
112
- Parameters
113
- ----------
114
- paths : list[str]
115
- List of source paths.
131
+ Get file metadata information from SQL storage.
116
132
 
117
133
  Returns
118
134
  -------
119
135
  list[dict]
120
- Returns files metadata.
136
+ Empty list.
121
137
  """
122
138
  return []
123
139
 
@@ -133,23 +149,33 @@ class SqlStore(Store):
133
149
  **kwargs,
134
150
  ) -> Any:
135
151
  """
136
- Read DataFrame from path.
152
+ Read a DataFrame from a SQL table.
153
+
154
+ Connects to the SQL database and reads data from the specified
155
+ table into a DataFrame using the specified engine (pandas, polars, etc.).
137
156
 
138
157
  Parameters
139
158
  ----------
140
159
  path : SourcesOrListOfSources
141
- Path(s) to read DataFrame from.
142
- file_format : str
143
- Extension of the file.
144
- engine : str
145
- Dataframe engine (pandas, polars, etc.).
160
+ The SQL URI path to read from in the format
161
+ 'sql://database/schema/table'. Only single paths are supported.
162
+ file_format : str, optional
163
+ File format specification (not used for SQL operations).
164
+ engine : str, optional
165
+ DataFrame engine to use (e.g., 'pandas', 'polars').
166
+ If None, uses the default engine.
146
167
  **kwargs : dict
147
- Keyword arguments.
168
+ Additional keyword arguments passed to the reader.
148
169
 
149
170
  Returns
150
171
  -------
151
172
  Any
152
- DataFrame.
173
+ DataFrame object containing the table data.
174
+
175
+ Raises
176
+ ------
177
+ StoreError
178
+ If a list of paths is provided (only single path supported).
153
179
  """
154
180
  if isinstance(path, list):
155
181
  raise StoreError("SQL store can only read a single DataFrame at a time.")
@@ -171,21 +197,26 @@ class SqlStore(Store):
171
197
  engine: str | None = None,
172
198
  ) -> Any:
173
199
  """
174
- Query data from database.
200
+ Execute a custom SQL query and return results as a DataFrame.
201
+
202
+ Runs a SQL query against the database specified in the path
203
+ and returns the results using the specified DataFrame engine.
175
204
 
176
205
  Parameters
177
206
  ----------
178
207
  query : str
179
- The query to execute.
208
+ The SQL query string to execute against the database.
180
209
  path : str
181
- Path to the database.
182
- engine : str
183
- Dataframe engine (pandas, polars, etc.).
210
+ The SQL URI path specifying the database connection
211
+ in the format 'sql://database/schema/table'.
212
+ engine : str, optional
213
+ DataFrame engine to use for result processing
214
+ (e.g., 'pandas', 'polars'). If None, uses the default.
184
215
 
185
216
  Returns
186
217
  -------
187
218
  Any
188
- DataFrame.
219
+ DataFrame object containing the query results.
189
220
  """
190
221
  reader = self._get_reader(engine)
191
222
  schema = self._get_schema(path)
@@ -194,21 +225,29 @@ class SqlStore(Store):
194
225
 
195
226
  def write_df(self, df: Any, dst: str, extension: str | None = None, **kwargs) -> str:
196
227
  """
197
- Write a dataframe to a database. Kwargs are passed to df.to_sql().
228
+ Write a DataFrame to a SQL database table.
229
+
230
+ Takes a DataFrame and writes it to the specified SQL table.
231
+ The destination should be in SQL URI format. Additional
232
+ parameters are passed to the underlying to_sql() method.
198
233
 
199
234
  Parameters
200
235
  ----------
201
236
  df : Any
202
- The dataframe to write.
237
+ The DataFrame object to write to the database.
203
238
  dst : str
204
- The destination of the dataframe.
239
+ The destination SQL URI in the format
240
+ 'sql://database/schema/table' or 'sql://database/table'.
241
+ extension : str, optional
242
+ File extension parameter (not used for SQL operations).
205
243
  **kwargs : dict
206
- Keyword arguments.
244
+ Additional keyword arguments passed to the DataFrame's
245
+ to_sql() method for controlling write behavior.
207
246
 
208
247
  Returns
209
248
  -------
210
249
  str
211
- Path of written dataframe.
250
+ The SQL URI path where the DataFrame was written.
212
251
  """
213
252
  schema = self._get_schema(dst)
214
253
  table = self._get_table_name(dst)
@@ -220,21 +259,25 @@ class SqlStore(Store):
220
259
 
221
260
  def _download_table(self, schema: str, table: str, dst: str) -> str:
222
261
  """
223
- Download a table from SQL based storage.
262
+ Download a specific table from SQL database to Parquet file.
263
+
264
+ Internal method that handles the actual table download process.
265
+ Connects to the database, retrieves all data from the specified
266
+ table, and writes it to a Parquet file using PyArrow.
224
267
 
225
268
  Parameters
226
269
  ----------
227
270
  schema : str
228
- The origin schema.
271
+ The database schema name containing the table.
229
272
  table : str
230
- The origin table.
273
+ The name of the table to download.
231
274
  dst : str
232
- The destination path.
275
+ The local file path where the Parquet file will be saved.
233
276
 
234
277
  Returns
235
278
  -------
236
279
  str
237
- The destination path.
280
+ The destination file path of the created Parquet file.
238
281
  """
239
282
  engine = self._check_factory(schema=schema)
240
283
 
@@ -258,23 +301,29 @@ class SqlStore(Store):
258
301
 
259
302
  def _upload_table(self, df: Any, schema: str, table: str, **kwargs) -> str:
260
303
  """
261
- Upload a table to SQL based storage.
304
+ Upload a DataFrame to a SQL table.
305
+
306
+ Internal method that handles writing a DataFrame to a SQL database
307
+ table. Uses the appropriate reader based on the DataFrame type
308
+ and manages the database connection.
262
309
 
263
310
  Parameters
264
311
  ----------
265
- df : DataFrame
266
- The dataframe.
312
+ df : Any
313
+ The DataFrame object to upload to the database.
267
314
  schema : str
268
- Destination schema.
315
+ The target database schema name.
269
316
  table : str
270
- Destination table.
317
+ The target table name within the schema.
271
318
  **kwargs : dict
272
- Keyword arguments.
319
+ Additional keyword arguments passed to the write operation,
320
+ such as if_exists, index, method, etc.
273
321
 
274
322
  Returns
275
323
  -------
276
324
  str
277
- The SQL URI where the dataframe was saved.
325
+ The SQL URI where the DataFrame was saved in the format
326
+ 'sql://database/schema/table'.
278
327
  """
279
328
  reader = get_reader_by_object(df)
280
329
  engine = self._check_factory()
@@ -288,32 +337,43 @@ class SqlStore(Store):
288
337
 
289
338
  def _get_connection_string(self) -> str:
290
339
  """
291
- Get the connection string.
340
+ Retrieve the database connection string from the configurator.
341
+
342
+ Gets the PostgreSQL connection string using the configured
343
+ database credentials (username, password, host, port, database).
292
344
 
293
345
  Returns
294
346
  -------
295
347
  str
296
- The connection string.
348
+ The PostgreSQL connection string in the format
349
+ 'postgresql://username:password@host:port/database'.
297
350
  """
298
351
  return self._configurator.get_sql_conn_string()
299
352
 
300
- def _get_engine(self, origin: str, schema: str | None = None) -> Engine:
353
+ def _get_engine(self, schema: str | None = None) -> Engine:
301
354
  """
302
- Create engine from connection string.
355
+ Create a SQLAlchemy engine from the connection string.
356
+
357
+ Establishes a database engine using the configured connection
358
+ string with appropriate connection parameters and schema settings.
303
359
 
304
360
  Parameters
305
361
  ----------
306
- origin : str
307
- The origin of the credentials.
308
- schema : str
309
- The schema.
362
+ schema : str, optional
363
+ The database schema to set in the search path.
364
+ If provided, sets the PostgreSQL search_path option.
310
365
 
311
366
  Returns
312
367
  -------
313
368
  Engine
314
- An SQLAlchemy engine.
369
+ A configured SQLAlchemy engine instance.
370
+
371
+ Raises
372
+ ------
373
+ StoreError
374
+ If the connection string is invalid or engine creation fails.
315
375
  """
316
- connection_string = self._get_connection_string(origin)
376
+ connection_string = self._get_connection_string()
317
377
  if not isinstance(connection_string, str):
318
378
  raise StoreError("Connection string must be a string.")
319
379
  try:
@@ -326,19 +386,29 @@ class SqlStore(Store):
326
386
 
327
387
  def _check_factory(self, retry: bool = True, schema: str | None = None) -> Engine:
328
388
  """
329
- Check if the database is accessible and return the engine.
389
+ Validate database accessibility and return a working engine.
390
+
391
+ Creates and tests a database engine, with retry capability if
392
+ the initial connection fails. Handles configuration changes
393
+ and ensures the database is accessible before returning.
330
394
 
331
395
  Parameters
332
396
  ----------
333
- retry : bool
334
- Whether to retry if the database is not accessible.
335
- schema : str
336
- The schema.
397
+ retry : bool, default True
398
+ Whether to attempt a retry with different configuration
399
+ if the initial connection fails.
400
+ schema : str, optional
401
+ The database schema to configure in the engine.
337
402
 
338
403
  Returns
339
404
  -------
340
405
  Engine
341
- The database engine.
406
+ A validated SQLAlchemy engine with confirmed database access.
407
+
408
+ Raises
409
+ ------
410
+ ConfigError
411
+ If database access fails and retry is exhausted or disabled.
342
412
  """
343
413
  try:
344
414
  engine = self._get_engine(schema)
@@ -353,17 +423,29 @@ class SqlStore(Store):
353
423
  @staticmethod
354
424
  def _parse_path(path: str) -> dict:
355
425
  """
356
- Parse the path and return the components.
426
+ Parse a SQL URI path into its component parts.
427
+
428
+ Breaks down a SQL URI into database, schema, and table components.
429
+ Supports both full three-part paths and simplified two-part paths
430
+ (using 'public' as default schema).
357
431
 
358
432
  Parameters
359
433
  ----------
360
434
  path : str
361
- The path.
435
+ The SQL URI path to parse in the format
436
+ 'sql://database/schema/table' or 'sql://database/table'.
362
437
 
363
438
  Returns
364
439
  -------
365
440
  dict
366
- A dictionary containing the components of the path.
441
+ Dictionary containing parsed components with keys:
442
+ 'database', 'schema', and 'table'.
443
+
444
+ Raises
445
+ ------
446
+ ValueError
447
+ If the path format is invalid or doesn't follow the
448
+ expected SQL URI structure.
367
449
  """
368
450
  # Parse path
369
451
  err_msg = "Invalid SQL path. Must be sql://<database>/<schema>/<table> or sql://<database>/<table>"
@@ -380,45 +462,54 @@ class SqlStore(Store):
380
462
 
381
463
  def _get_schema(self, uri: str) -> str:
382
464
  """
383
- Get the name of the SQL schema from the URI.
465
+ Extract the schema name from a SQL URI.
466
+
467
+ Parses the SQL URI and returns the schema component.
468
+ Uses 'public' as the default schema if not specified in the URI.
384
469
 
385
470
  Parameters
386
471
  ----------
387
472
  uri : str
388
- The URI.
473
+ The SQL URI to extract the schema from.
389
474
 
390
475
  Returns
391
476
  -------
392
477
  str
393
- The name of the SQL schema.
478
+ The schema name extracted from the URI.
394
479
  """
395
480
  return str(self._parse_path(uri).get("schema"))
396
481
 
397
482
  def _get_table_name(self, uri: str) -> str:
398
483
  """
399
- Get the name of the table from the URI.
484
+ Extract the table name from a SQL URI.
485
+
486
+ Parses the SQL URI and returns the table component,
487
+ which is always the last part of the URI path.
400
488
 
401
489
  Parameters
402
490
  ----------
403
491
  uri : str
404
- The URI.
492
+ The SQL URI to extract the table name from.
405
493
 
406
494
  Returns
407
495
  -------
408
496
  str
409
- The name of the table
497
+ The table name extracted from the URI.
410
498
  """
411
499
  return str(self._parse_path(uri).get("table"))
412
500
 
413
501
  @staticmethod
414
502
  def _check_access_to_storage(engine: Engine) -> None:
415
503
  """
416
- Check if there is access to the storage.
504
+ Verify database connectivity using the provided engine.
505
+
506
+ Tests the database connection by attempting to connect.
507
+ Properly disposes of the engine if connection fails.
417
508
 
418
509
  Parameters
419
510
  ----------
420
511
  engine : Engine
421
- An SQLAlchemy engine.
512
+ The SQLAlchemy engine to test for connectivity.
422
513
 
423
514
  Returns
424
515
  -------
@@ -426,8 +517,8 @@ class SqlStore(Store):
426
517
 
427
518
  Raises
428
519
  ------
429
- StoreError
430
- If there is no access to the storage.
520
+ ConfigError
521
+ If database connection cannot be established.
431
522
  """
432
523
  try:
433
524
  engine.connect()
@@ -262,8 +262,7 @@ def carriage_return_warn(string: str) -> None:
262
262
  None
263
263
  """
264
264
  if "\r\n" in string:
265
- warn("String contains a carriage return. "
266
- "It may not be parsed correctly from remote runtimes.")
265
+ warn("String contains a carriage return. It may not be parsed correctly from remote runtimes.")
267
266
 
268
267
 
269
268
  def read_source(path: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: digitalhub
3
- Version: 0.13.0b3
3
+ Version: 0.13.1
4
4
  Summary: Python SDK for Digitalhub
5
5
  Project-URL: Homepage, https://github.com/scc-digitalhub/digitalhub-sdk
6
6
  Author-email: Fondazione Bruno Kessler <digitalhub@fbk.eu>, Matteo Martini <mmartini@fbk.eu>