digitalhub 0.9.1__py3-none-any.whl → 0.10.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (67) hide show
  1. digitalhub/__init__.py +2 -3
  2. digitalhub/client/_base/client.py +3 -2
  3. digitalhub/client/dhcore/api_builder.py +6 -1
  4. digitalhub/client/dhcore/client.py +27 -399
  5. digitalhub/client/dhcore/configurator.py +339 -0
  6. digitalhub/client/dhcore/error_parser.py +107 -0
  7. digitalhub/client/dhcore/models.py +13 -23
  8. digitalhub/client/dhcore/utils.py +4 -44
  9. digitalhub/client/local/api_builder.py +9 -17
  10. digitalhub/client/local/client.py +12 -2
  11. digitalhub/client/local/enums.py +11 -0
  12. digitalhub/configurator/api.py +31 -0
  13. digitalhub/configurator/configurator.py +194 -0
  14. digitalhub/configurator/credentials_store.py +65 -0
  15. digitalhub/configurator/ini_module.py +74 -0
  16. digitalhub/entities/_base/_base/entity.py +2 -2
  17. digitalhub/entities/_base/material/entity.py +19 -6
  18. digitalhub/entities/_base/material/utils.py +2 -2
  19. digitalhub/entities/_commons/enums.py +1 -0
  20. digitalhub/entities/_commons/models.py +9 -0
  21. digitalhub/entities/_commons/utils.py +25 -0
  22. digitalhub/entities/_operations/processor.py +103 -107
  23. digitalhub/entities/artifact/crud.py +3 -3
  24. digitalhub/entities/dataitem/_base/entity.py +2 -2
  25. digitalhub/entities/dataitem/crud.py +3 -3
  26. digitalhub/entities/dataitem/table/entity.py +2 -2
  27. digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +43 -51
  28. digitalhub/entities/dataitem/utils.py +6 -3
  29. digitalhub/entities/model/_base/entity.py +172 -0
  30. digitalhub/entities/model/_base/spec.py +0 -10
  31. digitalhub/entities/model/_base/status.py +10 -0
  32. digitalhub/entities/model/crud.py +3 -3
  33. digitalhub/entities/model/huggingface/spec.py +6 -3
  34. digitalhub/entities/model/mlflow/models.py +2 -2
  35. digitalhub/entities/model/mlflow/spec.py +1 -3
  36. digitalhub/entities/model/mlflow/utils.py +44 -5
  37. digitalhub/entities/run/_base/entity.py +149 -0
  38. digitalhub/entities/run/_base/status.py +12 -0
  39. digitalhub/entities/task/_base/spec.py +2 -0
  40. digitalhub/entities/task/crud.py +4 -0
  41. digitalhub/readers/{_commons → pandas}/enums.py +4 -0
  42. digitalhub/readers/pandas/reader.py +58 -10
  43. digitalhub/stores/_base/store.py +1 -49
  44. digitalhub/stores/api.py +8 -33
  45. digitalhub/stores/builder.py +44 -161
  46. digitalhub/stores/local/store.py +4 -18
  47. digitalhub/stores/remote/store.py +3 -10
  48. digitalhub/stores/s3/configurator.py +107 -0
  49. digitalhub/stores/s3/enums.py +17 -0
  50. digitalhub/stores/s3/models.py +21 -0
  51. digitalhub/stores/s3/store.py +8 -28
  52. digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
  53. digitalhub/stores/sql/configurator.py +88 -0
  54. digitalhub/stores/sql/enums.py +16 -0
  55. digitalhub/stores/sql/models.py +24 -0
  56. digitalhub/stores/sql/store.py +14 -57
  57. digitalhub/utils/exceptions.py +6 -0
  58. digitalhub/utils/generic_utils.py +9 -8
  59. digitalhub/utils/uri_utils.py +1 -1
  60. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0b0.dist-info}/METADATA +5 -6
  61. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0b0.dist-info}/RECORD +66 -53
  62. test/local/imports/test_imports.py +0 -1
  63. digitalhub/client/dhcore/env.py +0 -23
  64. /digitalhub/{readers/_commons → configurator}/__init__.py +0 -0
  65. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0b0.dist-info}/LICENSE.txt +0 -0
  66. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0b0.dist-info}/WHEEL +0 -0
  67. {digitalhub-0.9.1.dist-info → digitalhub-0.10.0b0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class S3StoreConfig(BaseModel):
7
+ """
8
+ S3 store configuration class.
9
+ """
10
+
11
+ endpoint_url: str
12
+ """S3 endpoint URL."""
13
+
14
+ aws_access_key_id: str
15
+ """AWS access key ID."""
16
+
17
+ aws_secret_access_key: str
18
+ """AWS secret access key."""
19
+
20
+ bucket_name: str
21
+ """S3 bucket name."""
@@ -10,42 +10,26 @@ import botocore.client # pylint: disable=unused-import
10
10
  from botocore.exceptions import ClientError
11
11
 
12
12
  from digitalhub.readers.api import get_reader_by_object
13
- from digitalhub.stores._base.store import Store, StoreConfig
13
+ from digitalhub.stores._base.store import Store
14
+ from digitalhub.stores.s3.configurator import S3StoreConfigurator
15
+ from digitalhub.stores.s3.utils import get_bucket_name
14
16
  from digitalhub.utils.exceptions import StoreError
15
17
  from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
16
- from digitalhub.utils.s3_utils import get_bucket_name
17
18
 
18
19
  # Type aliases
19
20
  S3Client = Type["botocore.client.S3"]
20
21
 
21
22
 
22
- class S3StoreConfig(StoreConfig):
23
- """
24
- S3 store configuration class.
25
- """
26
-
27
- endpoint_url: str
28
- """S3 endpoint URL."""
29
-
30
- aws_access_key_id: str
31
- """AWS access key ID."""
32
-
33
- aws_secret_access_key: str
34
- """AWS secret access key."""
35
-
36
- bucket_name: str
37
- """S3 bucket name."""
38
-
39
-
40
23
  class S3Store(Store):
41
24
  """
42
25
  S3 store class. It implements the Store interface and provides methods to fetch and persist
43
26
  artifacts on S3 based storage.
44
27
  """
45
28
 
46
- def __init__(self, name: str, store_type: str, config: S3StoreConfig) -> None:
47
- super().__init__(name, store_type)
48
- self.config = config
29
+ def __init__(self, config: dict | None = None) -> None:
30
+ super().__init__()
31
+ self._configurator = S3StoreConfigurator()
32
+ self._configurator.configure(config)
49
33
 
50
34
  ##############################
51
35
  # I/O methods
@@ -512,11 +496,7 @@ class S3Store(Store):
512
496
  S3Client
513
497
  Returns a client object that interacts with the S3 storage service.
514
498
  """
515
- cfg = {
516
- "endpoint_url": self.config.endpoint_url,
517
- "aws_access_key_id": self.config.aws_access_key_id,
518
- "aws_secret_access_key": self.config.aws_secret_access_key,
519
- }
499
+ cfg = self._configurator.get_s3_creds()
520
500
  return boto3.client("s3", **cfg)
521
501
 
522
502
  def _check_factory(self, root: str) -> tuple[S3Client, str]:
@@ -6,6 +6,10 @@ from urllib.parse import urlparse
6
6
 
7
7
  from boto3 import client as boto3_client
8
8
 
9
+ from digitalhub.stores.s3.enums import S3StoreEnv
10
+
11
+ DEFAULT_BUCKET = "datalake"
12
+
9
13
 
10
14
  def get_bucket_name(path: str) -> str:
11
15
  """
@@ -59,11 +63,11 @@ def get_s3_source(bucket: str, key: str, filename: Path) -> None:
59
63
  -------
60
64
  None
61
65
  """
62
- s3 = boto3_client("s3", endpoint_url=os.getenv("S3_ENDPOINT_URL"))
66
+ s3 = boto3_client("s3", endpoint_url=os.getenv(S3StoreEnv.ENDPOINT_URL.value))
63
67
  s3.download_file(bucket, key, filename)
64
68
 
65
69
 
66
- def get_s3_bucket() -> str | None:
70
+ def get_s3_bucket_from_env() -> str | None:
67
71
  """
68
72
  Function to get S3 bucket name.
69
73
 
@@ -72,4 +76,4 @@ def get_s3_bucket() -> str | None:
72
76
  str
73
77
  The S3 bucket name.
74
78
  """
75
- return os.getenv("S3_BUCKET_NAME", "datalake")
79
+ return os.getenv(S3StoreEnv.BUCKET_NAME.value, DEFAULT_BUCKET)
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ from digitalhub.configurator.configurator import configurator
4
+ from digitalhub.stores.sql.enums import SqlStoreEnv
5
+ from digitalhub.stores.sql.models import SqlStoreConfig
6
+ from digitalhub.utils.exceptions import StoreError
7
+
8
+
9
+ class SqlStoreConfigurator:
10
+ """
11
+ Configure the store by getting the credentials from user
12
+ provided config or from environment.
13
+ """
14
+
15
+ def __init__(self, config: dict | None = None) -> None:
16
+ self.configure(config)
17
+
18
+ ##############################
19
+ # Configuration methods
20
+ ##############################
21
+
22
+ def configure(self, config: dict | None = None) -> None:
23
+ """
24
+ Configure the store by getting the credentials from user
25
+ provided config or from environment.
26
+
27
+ Parameters
28
+ ----------
29
+ config : dict
30
+ Configuration dictionary.
31
+
32
+ Returns
33
+ -------
34
+ None
35
+ """
36
+ if config is None:
37
+ self._get_config()
38
+ else:
39
+ config: SqlStoreConfig = SqlStoreConfig(**config)
40
+ for pair in [
41
+ (SqlStoreEnv.USER.value, config.user),
42
+ (SqlStoreEnv.PASSWORD.value, config.password),
43
+ (SqlStoreEnv.HOST.value, config.host),
44
+ (SqlStoreEnv.PORT.value, config.port),
45
+ (SqlStoreEnv.DATABASE.value, config.database),
46
+ ]:
47
+ configurator.set_credential(*pair)
48
+
49
+ def get_sql_conn_string(self) -> str:
50
+ """
51
+ Get the connection string from environment variables.
52
+
53
+ Returns
54
+ -------
55
+ str
56
+ The connection string.
57
+ """
58
+ creds = configurator.get_all_cred()
59
+ try:
60
+ user = creds[SqlStoreEnv.USER.value]
61
+ password = creds[SqlStoreEnv.PASSWORD.value]
62
+ host = creds[SqlStoreEnv.HOST.value]
63
+ port = creds[SqlStoreEnv.PORT.value]
64
+ database = creds[SqlStoreEnv.DATABASE.value]
65
+ return f"postgresql://{user}:{password}@{host}:{port}/{database}"
66
+ except KeyError as e:
67
+ raise StoreError(f"Missing credentials for SQL store. {str(e)}")
68
+
69
+ def _get_config(self) -> None:
70
+ """
71
+ Get the credentials from environment variables.
72
+
73
+ Returns
74
+ -------
75
+ None
76
+ """
77
+ user = configurator.load_var(SqlStoreEnv.USER.value)
78
+ password = configurator.load_var(SqlStoreEnv.PASSWORD.value)
79
+ host = configurator.load_var(SqlStoreEnv.HOST.value)
80
+ port = configurator.load_var(SqlStoreEnv.PORT.value)
81
+ database = configurator.load_var(SqlStoreEnv.DATABASE.value)
82
+ if user is None or password is None or host is None or port is None or database is None:
83
+ raise StoreError("Missing credentials for SQL store.")
84
+ configurator.set_credential(SqlStoreEnv.USER.value, user)
85
+ configurator.set_credential(SqlStoreEnv.PASSWORD.value, password)
86
+ configurator.set_credential(SqlStoreEnv.HOST.value, host)
87
+ configurator.set_credential(SqlStoreEnv.PORT.value, port)
88
+ configurator.set_credential(SqlStoreEnv.DATABASE.value, database)
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class SqlStoreEnv(Enum):
7
+ """
8
+ SqlStore environment
9
+ """
10
+
11
+ HOST = "DB_HOST"
12
+ PORT = "DB_PORT"
13
+ USER = "DB_USER"
14
+ PASSWORD = "DB_PASSWORD"
15
+ DATABASE = "DB_DATABASE"
16
+ PG_SCHEMA = "DB_SCHEMA"
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class SqlStoreConfig(BaseModel):
7
+ """
8
+ SQL store configuration class.
9
+ """
10
+
11
+ host: str
12
+ """SQL host."""
13
+
14
+ port: int
15
+ """SQL port."""
16
+
17
+ user: str
18
+ """SQL user."""
19
+
20
+ password: str
21
+ """SQL password."""
22
+
23
+ database: str
24
+ """SQL database name."""
@@ -1,39 +1,22 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import typing
3
4
  from pathlib import Path
4
5
  from typing import Any
5
6
 
6
7
  import pyarrow as pa
7
8
  import pyarrow.parquet as pq
8
- from sqlalchemy import MetaData, Table, create_engine
9
+ from sqlalchemy import MetaData, Table, create_engine, select
9
10
  from sqlalchemy.engine import Engine
10
- from sqlalchemy.engine.row import LegacyRow
11
11
  from sqlalchemy.exc import SQLAlchemyError
12
12
 
13
13
  from digitalhub.readers.api import get_reader_by_object
14
- from digitalhub.stores._base.store import Store, StoreConfig
14
+ from digitalhub.stores._base.store import Store
15
+ from digitalhub.stores.sql.configurator import SqlStoreConfigurator
15
16
  from digitalhub.utils.exceptions import StoreError
16
17
 
17
-
18
- class SQLStoreConfig(StoreConfig):
19
- """
20
- SQL store configuration class.
21
- """
22
-
23
- host: str
24
- """SQL host."""
25
-
26
- port: int
27
- """SQL port."""
28
-
29
- user: str
30
- """SQL user."""
31
-
32
- password: str
33
- """SQL password."""
34
-
35
- database: str
36
- """SQL database name."""
18
+ if typing.TYPE_CHECKING:
19
+ from sqlalchemy.engine.row import Row
37
20
 
38
21
 
39
22
  class SqlStore(Store):
@@ -42,9 +25,10 @@ class SqlStore(Store):
42
25
  artifacts on SQL based storage.
43
26
  """
44
27
 
45
- def __init__(self, name: str, store_type: str, config: SQLStoreConfig) -> None:
46
- super().__init__(name, store_type)
47
- self.config = config
28
+ def __init__(self, config: dict | None = None) -> None:
29
+ super().__init__()
30
+ self._configurator = SqlStoreConfigurator()
31
+ self._configurator.configure(config)
48
32
 
49
33
  ##############################
50
34
  # I/O methods
@@ -160,12 +144,12 @@ class SqlStore(Store):
160
144
 
161
145
  # Read the table from the database
162
146
  sa_table = Table(table, MetaData(), autoload_with=engine)
163
- query = sa_table.select()
147
+ stm = select(sa_table)
164
148
  with engine.begin() as conn:
165
- result: list[LegacyRow] = conn.execute(query).fetchall()
149
+ result: list[Row] = conn.execute(stm).fetchall()
166
150
 
167
151
  # Parse the result
168
- data = self._parse_result(result)
152
+ data = {col: [row[idx] for row in result] for idx, col in enumerate(sa_table.columns.keys())}
169
153
 
170
154
  # Convert the result to a pyarrow table and
171
155
  # write the pyarrow table to a Parquet file
@@ -245,10 +229,7 @@ class SqlStore(Store):
245
229
  str
246
230
  The connection string.
247
231
  """
248
- return (
249
- f"postgresql://{self.config.user}:{self.config.password}@"
250
- f"{self.config.host}:{self.config.port}/{self.config.database}"
251
- )
232
+ return self._configurator.get_sql_conn_string()
252
233
 
253
234
  def _get_engine(self, schema: str | None = None) -> Engine:
254
235
  """
@@ -377,27 +358,3 @@ class SqlStore(Store):
377
358
  except SQLAlchemyError:
378
359
  engine.dispose()
379
360
  raise StoreError("No access to db!")
380
-
381
- @staticmethod
382
- def _parse_result(result: list[LegacyRow]) -> dict:
383
- """
384
- Convert a list of list of tuples to a dict.
385
-
386
- Parameters
387
- ----------
388
- result : list[LegacyRow]
389
- The data to convert.
390
-
391
- Returns
392
- -------
393
- dict
394
- The converted data.
395
- """
396
- data_list = [row.items() for row in result]
397
- data = {}
398
- for row in data_list:
399
- for column_name, value in row:
400
- if column_name not in data:
401
- data[column_name] = []
402
- data[column_name].append(value)
403
- return data
@@ -71,3 +71,9 @@ class ReaderError(Exception):
71
71
  """
72
72
  Raised when incontered errors on readers.
73
73
  """
74
+
75
+
76
+ class ClientError(Exception):
77
+ """
78
+ Raised when incontered errors on clients.
79
+ """
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import base64
4
4
  import importlib.util as imputil
5
5
  import json
6
- from datetime import datetime
6
+ from datetime import date, datetime, time
7
7
  from enum import Enum
8
8
  from pathlib import Path
9
9
  from typing import Any, Callable
@@ -120,14 +120,14 @@ def extract_archive(path: Path, filename: Path) -> None:
120
120
  zip_file.extractall(path)
121
121
 
122
122
 
123
- class MyEncoder(json.JSONEncoder):
123
+ class CustomJsonEncoder(json.JSONEncoder):
124
124
  """
125
- Custom JSON encoder to handle numpy types.
125
+ Custom JSON encoder to handle json dumps.
126
126
  """
127
127
 
128
128
  def default(self, obj: Any) -> Any:
129
129
  """
130
- Convert numpy types to json.
130
+ Convert an object to json.
131
131
 
132
132
  Parameters
133
133
  ----------
@@ -147,11 +147,12 @@ class MyEncoder(json.JSONEncoder):
147
147
  return float(obj)
148
148
  elif isinstance(obj, np.ndarray):
149
149
  return obj.tolist()
150
- else:
151
- return str(obj)
150
+ elif isinstance(obj, (datetime, date, time)):
151
+ return obj.isoformat()
152
+ return str(obj)
152
153
 
153
154
 
154
- def dict_to_json(struct: dict) -> str:
155
+ def dump_json(struct: Any) -> str:
155
156
  """
156
157
  Convert a dict to json.
157
158
 
@@ -165,7 +166,7 @@ def dict_to_json(struct: dict) -> str:
165
166
  str
166
167
  The json string.
167
168
  """
168
- return json.dumps(struct, cls=MyEncoder)
169
+ return json.dumps(struct, cls=CustomJsonEncoder).encode("utf-8")
169
170
 
170
171
 
171
172
  def slugify_string(filename: str) -> str:
@@ -99,7 +99,7 @@ def map_uri_scheme(uri: str) -> str:
99
99
  if scheme in list_enum(LocalSchemes):
100
100
  return SchemeCategory.LOCAL.value
101
101
  if scheme in list_enum(InvalidLocalSchemes):
102
- raise ValueError("For local uri, do not use any scheme.")
102
+ raise ValueError("For local URI, do not use any scheme.")
103
103
  if scheme in list_enum(RemoteSchemes):
104
104
  return SchemeCategory.REMOTE.value
105
105
  if scheme in list_enum(S3Schemes):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: digitalhub
3
- Version: 0.9.1
3
+ Version: 0.10.0b0
4
4
  Summary: Python SDK for Digitalhub
5
5
  Author-email: Fondazione Bruno Kessler <dslab@fbk.eu>, Matteo Martini <mmartini@fbk.eu>
6
6
  License: Apache License
@@ -231,7 +231,7 @@ Description-Content-Type: text/markdown
231
231
  License-File: LICENSE.txt
232
232
  Requires-Dist: boto3
233
233
  Requires-Dist: pydantic
234
- Requires-Dist: sqlalchemy<2
234
+ Requires-Dist: sqlalchemy
235
235
  Requires-Dist: pyarrow
236
236
  Requires-Dist: numpy<2
237
237
  Requires-Dist: requests
@@ -241,10 +241,10 @@ Requires-Dist: GitPython>=3
241
241
  Requires-Dist: psycopg2-binary
242
242
  Requires-Dist: python-slugify
243
243
  Provides-Extra: full
244
- Requires-Dist: pandas<2.2,>=1.2; extra == "full"
244
+ Requires-Dist: pandas; extra == "full"
245
245
  Requires-Dist: mlflow; extra == "full"
246
246
  Provides-Extra: pandas
247
- Requires-Dist: pandas<2.2,>=1.2; extra == "pandas"
247
+ Requires-Dist: pandas; extra == "pandas"
248
248
  Provides-Extra: mlflow
249
249
  Requires-Dist: mlflow; extra == "mlflow"
250
250
  Provides-Extra: dev
@@ -260,7 +260,6 @@ Requires-Dist: numpydoc>=1.6; extra == "docs"
260
260
 
261
261
  # Digitalhub Library
262
262
 
263
- The Digitalhub SDK library is used to manage entities and executions in Digitalhub from Python.
264
- It comes with a suite of tools to help you manage your projects and executions. It exposes CRUD methods to create, read, update and delete entities, and objects methods to excute functions or workflows, collect or store execution results and data.
263
+ The Digitalhub library is a python tool for managing projects, entities and executions in Digitalhub. It exposes CRUD methods to create, read, update and delete entities, tools to execute functions or workflows, collect or store execution results and data.
265
264
 
266
265
  A more detailed description of the library can be found in the [official documentation](https://scc-digitalhub.github.io/sdk-docs/).