digitalhub 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

Files changed (121) hide show
  1. digitalhub/__init__.py +2 -3
  2. digitalhub/client/_base/api_builder.py +1 -1
  3. digitalhub/client/_base/client.py +25 -2
  4. digitalhub/client/_base/params_builder.py +16 -0
  5. digitalhub/client/dhcore/api_builder.py +9 -3
  6. digitalhub/client/dhcore/client.py +30 -398
  7. digitalhub/client/dhcore/configurator.py +361 -0
  8. digitalhub/client/dhcore/error_parser.py +107 -0
  9. digitalhub/client/dhcore/models.py +13 -23
  10. digitalhub/client/dhcore/params_builder.py +178 -0
  11. digitalhub/client/dhcore/utils.py +4 -44
  12. digitalhub/client/local/api_builder.py +13 -18
  13. digitalhub/client/local/client.py +18 -2
  14. digitalhub/client/local/enums.py +11 -0
  15. digitalhub/client/local/params_builder.py +116 -0
  16. digitalhub/configurator/api.py +31 -0
  17. digitalhub/configurator/configurator.py +195 -0
  18. digitalhub/configurator/credentials_store.py +65 -0
  19. digitalhub/configurator/ini_module.py +74 -0
  20. digitalhub/entities/_base/_base/entity.py +2 -2
  21. digitalhub/entities/_base/context/entity.py +4 -4
  22. digitalhub/entities/_base/entity/builder.py +5 -5
  23. digitalhub/entities/_base/executable/entity.py +2 -2
  24. digitalhub/entities/_base/material/entity.py +12 -12
  25. digitalhub/entities/_base/material/status.py +1 -1
  26. digitalhub/entities/_base/material/utils.py +2 -2
  27. digitalhub/entities/_base/unversioned/entity.py +2 -2
  28. digitalhub/entities/_base/versioned/entity.py +2 -2
  29. digitalhub/entities/_commons/enums.py +2 -0
  30. digitalhub/entities/_commons/metrics.py +164 -0
  31. digitalhub/entities/_commons/types.py +5 -0
  32. digitalhub/entities/_commons/utils.py +2 -2
  33. digitalhub/entities/_processors/base.py +527 -0
  34. digitalhub/entities/{_operations/processor.py → _processors/context.py} +212 -837
  35. digitalhub/entities/_processors/utils.py +158 -0
  36. digitalhub/entities/artifact/artifact/spec.py +3 -1
  37. digitalhub/entities/artifact/crud.py +13 -12
  38. digitalhub/entities/artifact/utils.py +1 -1
  39. digitalhub/entities/builders.py +6 -18
  40. digitalhub/entities/dataitem/_base/entity.py +0 -41
  41. digitalhub/entities/dataitem/crud.py +27 -15
  42. digitalhub/entities/dataitem/table/entity.py +49 -35
  43. digitalhub/entities/dataitem/table/models.py +4 -3
  44. digitalhub/{utils/data_utils.py → entities/dataitem/table/utils.py} +46 -54
  45. digitalhub/entities/dataitem/utils.py +58 -10
  46. digitalhub/entities/function/crud.py +9 -9
  47. digitalhub/entities/model/_base/entity.py +120 -0
  48. digitalhub/entities/model/_base/spec.py +6 -17
  49. digitalhub/entities/model/_base/status.py +10 -0
  50. digitalhub/entities/model/crud.py +13 -12
  51. digitalhub/entities/model/huggingface/spec.py +9 -4
  52. digitalhub/entities/model/mlflow/models.py +2 -2
  53. digitalhub/entities/model/mlflow/spec.py +7 -7
  54. digitalhub/entities/model/mlflow/utils.py +44 -5
  55. digitalhub/entities/project/_base/entity.py +317 -9
  56. digitalhub/entities/project/_base/spec.py +8 -6
  57. digitalhub/entities/project/crud.py +12 -11
  58. digitalhub/entities/run/_base/entity.py +103 -6
  59. digitalhub/entities/run/_base/spec.py +4 -2
  60. digitalhub/entities/run/_base/status.py +12 -0
  61. digitalhub/entities/run/crud.py +8 -8
  62. digitalhub/entities/secret/_base/entity.py +3 -3
  63. digitalhub/entities/secret/_base/spec.py +4 -2
  64. digitalhub/entities/secret/crud.py +11 -9
  65. digitalhub/entities/task/_base/entity.py +4 -4
  66. digitalhub/entities/task/_base/models.py +51 -40
  67. digitalhub/entities/task/_base/spec.py +2 -0
  68. digitalhub/entities/task/_base/utils.py +2 -2
  69. digitalhub/entities/task/crud.py +12 -8
  70. digitalhub/entities/workflow/crud.py +9 -9
  71. digitalhub/factory/utils.py +9 -9
  72. digitalhub/readers/{_base → data/_base}/builder.py +1 -1
  73. digitalhub/readers/{_base → data/_base}/reader.py +16 -4
  74. digitalhub/readers/{api.py → data/api.py} +2 -2
  75. digitalhub/readers/{factory.py → data/factory.py} +3 -3
  76. digitalhub/readers/{pandas → data/pandas}/builder.py +2 -2
  77. digitalhub/readers/{pandas → data/pandas}/reader.py +110 -30
  78. digitalhub/readers/query/__init__.py +0 -0
  79. digitalhub/stores/_base/store.py +59 -69
  80. digitalhub/stores/api.py +8 -33
  81. digitalhub/stores/builder.py +44 -161
  82. digitalhub/stores/local/store.py +106 -89
  83. digitalhub/stores/remote/store.py +86 -11
  84. digitalhub/stores/s3/configurator.py +108 -0
  85. digitalhub/stores/s3/enums.py +17 -0
  86. digitalhub/stores/s3/models.py +21 -0
  87. digitalhub/stores/s3/store.py +154 -70
  88. digitalhub/{utils/s3_utils.py → stores/s3/utils.py} +7 -3
  89. digitalhub/stores/sql/configurator.py +88 -0
  90. digitalhub/stores/sql/enums.py +16 -0
  91. digitalhub/stores/sql/models.py +24 -0
  92. digitalhub/stores/sql/store.py +106 -85
  93. digitalhub/{readers/_commons → utils}/enums.py +5 -1
  94. digitalhub/utils/exceptions.py +6 -0
  95. digitalhub/utils/file_utils.py +8 -7
  96. digitalhub/utils/generic_utils.py +28 -15
  97. digitalhub/utils/git_utils.py +16 -9
  98. digitalhub/utils/types.py +5 -0
  99. digitalhub/utils/uri_utils.py +2 -2
  100. {digitalhub-0.9.2.dist-info → digitalhub-0.10.0.dist-info}/METADATA +25 -31
  101. {digitalhub-0.9.2.dist-info → digitalhub-0.10.0.dist-info}/RECORD +108 -99
  102. {digitalhub-0.9.2.dist-info → digitalhub-0.10.0.dist-info}/WHEEL +1 -2
  103. digitalhub/client/dhcore/env.py +0 -23
  104. digitalhub/entities/_base/project/entity.py +0 -341
  105. digitalhub-0.9.2.dist-info/top_level.txt +0 -2
  106. test/local/CRUD/test_artifacts.py +0 -96
  107. test/local/CRUD/test_dataitems.py +0 -96
  108. test/local/CRUD/test_models.py +0 -95
  109. test/local/imports/test_imports.py +0 -66
  110. test/local/instances/test_validate.py +0 -55
  111. test/test_crud_functions.py +0 -109
  112. test/test_crud_runs.py +0 -86
  113. test/test_crud_tasks.py +0 -81
  114. test/testkfp.py +0 -37
  115. test/testkfp_pipeline.py +0 -22
  116. /digitalhub/{entities/_base/project → configurator}/__init__.py +0 -0
  117. /digitalhub/entities/{_operations → _processors}/__init__.py +0 -0
  118. /digitalhub/readers/{_base → data}/__init__.py +0 -0
  119. /digitalhub/readers/{_commons → data/_base}/__init__.py +0 -0
  120. /digitalhub/readers/{pandas → data/pandas}/__init__.py +0 -0
  121. {digitalhub-0.9.2.dist-info → digitalhub-0.10.0.dist-info/licenses}/LICENSE.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import typing
3
4
  from io import BytesIO
4
5
  from pathlib import Path
5
6
  from typing import Any, Type
@@ -9,43 +10,31 @@ import boto3
9
10
  import botocore.client # pylint: disable=unused-import
10
11
  from botocore.exceptions import ClientError
11
12
 
12
- from digitalhub.readers.api import get_reader_by_object
13
- from digitalhub.stores._base.store import Store, StoreConfig
13
+ from digitalhub.readers.data.api import get_reader_by_object
14
+ from digitalhub.stores._base.store import Store
15
+ from digitalhub.stores.s3.configurator import S3StoreConfigurator
16
+ from digitalhub.stores.s3.utils import get_bucket_name
14
17
  from digitalhub.utils.exceptions import StoreError
15
18
  from digitalhub.utils.file_utils import get_file_info_from_s3, get_file_mime_type
16
- from digitalhub.utils.s3_utils import get_bucket_name
19
+ from digitalhub.utils.types import SourcesOrListOfSources
20
+
21
+ if typing.TYPE_CHECKING:
22
+ pass
17
23
 
18
24
  # Type aliases
19
25
  S3Client = Type["botocore.client.S3"]
20
26
 
21
27
 
22
- class S3StoreConfig(StoreConfig):
23
- """
24
- S3 store configuration class.
25
- """
26
-
27
- endpoint_url: str
28
- """S3 endpoint URL."""
29
-
30
- aws_access_key_id: str
31
- """AWS access key ID."""
32
-
33
- aws_secret_access_key: str
34
- """AWS secret access key."""
35
-
36
- bucket_name: str
37
- """S3 bucket name."""
38
-
39
-
40
28
  class S3Store(Store):
41
29
  """
42
30
  S3 store class. It implements the Store interface and provides methods to fetch and persist
43
31
  artifacts on S3 based storage.
44
32
  """
45
33
 
46
- def __init__(self, name: str, store_type: str, config: S3StoreConfig) -> None:
47
- super().__init__(name, store_type)
48
- self.config = config
34
+ def __init__(self, config: dict | None = None) -> None:
35
+ super().__init__()
36
+ self._configurator = S3StoreConfigurator()
37
+ self._configurator.configure(config)
49
38
 
50
39
  ##############################
51
40
  # I/O methods
@@ -129,7 +118,7 @@ class S3Store(Store):
129
118
 
130
119
  def upload(
131
120
  self,
132
- src: str | list[str],
121
+ src: SourcesOrListOfSources,
133
122
  dst: str,
134
123
  ) -> list[tuple[str, str]]:
135
124
  """
@@ -137,7 +126,7 @@ class S3Store(Store):
137
126
 
138
127
  Parameters
139
128
  ----------
140
- src : str | list[str]
129
+ src : SourcesOrListOfSources
141
130
  Source(s).
142
131
  dst : str
143
132
  The destination of the artifact on storage.
@@ -244,12 +233,121 @@ class S3Store(Store):
244
233
 
245
234
  return infos
246
235
 
236
+ ##############################
237
+ # Datastore methods
238
+ ##############################
239
+
240
+ def read_df(
241
+ self,
242
+ path: SourcesOrListOfSources,
243
+ file_format: str | None = None,
244
+ engine: str | None = None,
245
+ **kwargs,
246
+ ) -> Any:
247
+ """
248
+ Read DataFrame from path.
249
+
250
+ Parameters
251
+ ----------
252
+ path : SourcesOrListOfSources
253
+ Path(s) to read DataFrame from.
254
+ file_format : str
255
+ Extension of the file.
256
+ engine : str
257
+ Dataframe engine (pandas, polars, etc.).
258
+ **kwargs : dict
259
+ Keyword arguments.
260
+
261
+ Returns
262
+ -------
263
+ Any
264
+ DataFrame.
265
+ """
266
+ reader = self._get_reader(engine)
267
+
268
+ # Verify if partition or single file
269
+ if self.is_partition(path):
270
+ client, bucket = self._check_factory(path)
271
+ objects = self._list_objects(client, bucket, path)
272
+ keys = [self._get_key(o) for o in objects]
273
+
274
+ else:
275
+ if isinstance(path, list):
276
+ client, bucket = self._check_factory(path[0])
277
+ keys = [self._get_key(p) for p in path]
278
+ else:
279
+ client, bucket = self._check_factory(path)
280
+ keys = [self._get_key(path)]
281
+
282
+ dfs = []
283
+ for key in keys:
284
+ file_format = self._get_extension(file_format, key)
285
+ obj = self._download_fileobject(key, client, bucket)
286
+ dfs.append(reader.read_df(obj, extension=file_format, **kwargs))
287
+
288
+ if len(dfs) == 1:
289
+ return dfs[0]
290
+ return reader.concat_dfs(dfs)
291
+
292
+ def query(
293
+ self,
294
+ query: str,
295
+ engine: str | None = None,
296
+ ) -> Any:
297
+ """
298
+ Query data from database.
299
+
300
+ Parameters
301
+ ----------
302
+ query : str
303
+ The query to execute.
304
+ engine : str
305
+ Dataframe engine (pandas, polars, etc.).
306
+
307
+ Returns
308
+ -------
309
+ Any
310
+ DataFrame.
311
+ """
312
+ raise StoreError("S3 store does not support query.")
313
+
314
+ def write_df(
315
+ self,
316
+ df: Any,
317
+ dst: str,
318
+ extension: str | None = None,
319
+ **kwargs,
320
+ ) -> str:
321
+ """
322
+ Write a dataframe to S3 based storage. Kwargs are passed to df.to_parquet().
323
+
324
+ Parameters
325
+ ----------
326
+ df : Any
327
+ The dataframe.
328
+ dst : str
329
+ The destination path on S3 based storage.
330
+ extension : str
331
+ The extension of the file.
332
+ **kwargs : dict
333
+ Keyword arguments.
334
+
335
+ Returns
336
+ -------
337
+ str
338
+ The S3 path where the dataframe was saved.
339
+ """
340
+ fileobj = BytesIO()
341
+ reader = get_reader_by_object(df)
342
+ reader.write_df(df, fileobj, extension=extension, **kwargs)
343
+ return self.upload_fileobject(fileobj, dst)
344
+
247
345
  ##############################
248
346
  # Private I/O methods
249
347
  ##############################
250
348
 
349
+ @staticmethod
251
350
  def _download_file(
252
- self,
253
351
  key: str,
254
352
  dst_pth: Path,
255
353
  client: S3Client,
@@ -260,8 +358,8 @@ class S3Store(Store):
260
358
 
261
359
  Parameters
262
360
  ----------
263
- keys : str
264
- The list of keys to be downloaded.
361
+ key : str
362
+ The key to be downloaded.
265
363
  dst_pth : str
266
364
  The destination of the files on local filesystem.
267
365
  client : S3Client
@@ -274,9 +372,34 @@ class S3Store(Store):
274
372
  list[str]
275
373
  The list of paths of the downloaded files.
276
374
  """
277
- # Download file
278
375
  client.download_file(bucket, key, dst_pth)
279
376
 
377
+ @staticmethod
378
+ def _download_fileobject(
379
+ key: str,
380
+ client: S3Client,
381
+ bucket: str,
382
+ ) -> BytesIO:
383
+ """
384
+ Download fileobject from S3 partition.
385
+
386
+ Parameters
387
+ ----------
388
+ key : str
389
+ The key of the file.
390
+ client : S3Client
391
+ The S3 client object.
392
+ bucket : str
393
+ The name of the S3 bucket.
394
+
395
+ Returns
396
+ -------
397
+ BytesIO
398
+ The fileobject of the downloaded file.
399
+ """
400
+ obj = client.get_object(Bucket=bucket, Key=key)
401
+ return BytesIO(obj["Body"].read())
402
+
280
403
  def _upload_dir(
281
404
  self,
282
405
  src: str,
@@ -453,41 +576,6 @@ class S3Store(Store):
453
576
  """
454
577
  client.put_object(Bucket=bucket, Key=key, Body=fileobj.getvalue())
455
578
 
456
- ##############################
457
- # Datastore methods
458
- ##############################
459
-
460
- def write_df(
461
- self,
462
- df: Any,
463
- dst: str,
464
- extension: str | None = None,
465
- **kwargs,
466
- ) -> str:
467
- """
468
- Write a dataframe to S3 based storage. Kwargs are passed to df.to_parquet().
469
-
470
- Parameters
471
- ----------
472
- df : Any
473
- The dataframe.
474
- dst : str
475
- The destination path on S3 based storage.
476
- extension : str
477
- The extension of the file.
478
- **kwargs : dict
479
- Keyword arguments.
480
-
481
- Returns
482
- -------
483
- str
484
- The S3 path where the dataframe was saved.
485
- """
486
- fileobj = BytesIO()
487
- reader = get_reader_by_object(df)
488
- reader.write_df(df, fileobj, extension=extension, **kwargs)
489
- return self.upload_fileobject(fileobj, dst)
490
-
491
579
  ##############################
492
580
  # Helper methods
493
581
  ##############################
@@ -512,11 +600,7 @@ class S3Store(Store):
512
600
  S3Client
513
601
  Returns a client object that interacts with the S3 storage service.
514
602
  """
515
- cfg = {
516
- "endpoint_url": self.config.endpoint_url,
517
- "aws_access_key_id": self.config.aws_access_key_id,
518
- "aws_secret_access_key": self.config.aws_secret_access_key,
519
- }
603
+ cfg = self._configurator.get_boto3_client_config()
520
604
  return boto3.client("s3", **cfg)
521
605
 
522
606
  def _check_factory(self, root: str) -> tuple[S3Client, str]:
@@ -6,6 +6,10 @@ from urllib.parse import urlparse
6
6
 
7
7
  from boto3 import client as boto3_client
8
8
 
9
+ from digitalhub.stores.s3.enums import S3StoreEnv
10
+
11
+ DEFAULT_BUCKET = "datalake"
12
+
9
13
 
10
14
  def get_bucket_name(path: str) -> str:
11
15
  """
@@ -59,11 +63,11 @@ def get_s3_source(bucket: str, key: str, filename: Path) -> None:
59
63
  -------
60
64
  None
61
65
  """
62
- s3 = boto3_client("s3", endpoint_url=os.getenv("S3_ENDPOINT_URL"))
66
+ s3 = boto3_client("s3", endpoint_url=os.getenv(S3StoreEnv.ENDPOINT_URL.value))
63
67
  s3.download_file(bucket, key, filename)
64
68
 
65
69
 
66
- def get_s3_bucket() -> str | None:
70
+ def get_s3_bucket_from_env() -> str | None:
67
71
  """
68
72
  Function to get S3 bucket name.
69
73
 
@@ -72,4 +76,4 @@ def get_s3_bucket() -> str | None:
72
76
  str
73
77
  The S3 bucket name.
74
78
  """
75
- return os.getenv("S3_BUCKET_NAME", "datalake")
79
+ return os.getenv(S3StoreEnv.BUCKET_NAME.value, DEFAULT_BUCKET)
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ from digitalhub.configurator.configurator import configurator
4
+ from digitalhub.stores.sql.enums import SqlStoreEnv
5
+ from digitalhub.stores.sql.models import SqlStoreConfig
6
+ from digitalhub.utils.exceptions import StoreError
7
+
8
+
9
+ class SqlStoreConfigurator:
10
+ """
11
+ Configure the store by getting the credentials from user
12
+ provided config or from environment.
13
+ """
14
+
15
+ def __init__(self, config: dict | None = None) -> None:
16
+ self.configure(config)
17
+
18
+ ##############################
19
+ # Configuration methods
20
+ ##############################
21
+
22
+ def configure(self, config: dict | None = None) -> None:
23
+ """
24
+ Configure the store by getting the credentials from user
25
+ provided config or from environment.
26
+
27
+ Parameters
28
+ ----------
29
+ config : dict
30
+ Configuration dictionary.
31
+
32
+ Returns
33
+ -------
34
+ None
35
+ """
36
+ if config is None:
37
+ self._get_config()
38
+ else:
39
+ config: SqlStoreConfig = SqlStoreConfig(**config)
40
+ for pair in [
41
+ (SqlStoreEnv.USERNAME.value, config.user),
42
+ (SqlStoreEnv.PASSWORD.value, config.password),
43
+ (SqlStoreEnv.HOST.value, config.host),
44
+ (SqlStoreEnv.PORT.value, config.port),
45
+ (SqlStoreEnv.DATABASE.value, config.database),
46
+ ]:
47
+ configurator.set_credential(*pair)
48
+
49
+ def get_sql_conn_string(self) -> str:
50
+ """
51
+ Get the connection string from environment variables.
52
+
53
+ Returns
54
+ -------
55
+ str
56
+ The connection string.
57
+ """
58
+ creds = configurator.get_all_credentials()
59
+ try:
60
+ user = creds[SqlStoreEnv.USERNAME.value]
61
+ password = creds[SqlStoreEnv.PASSWORD.value]
62
+ host = creds[SqlStoreEnv.HOST.value]
63
+ port = creds[SqlStoreEnv.PORT.value]
64
+ database = creds[SqlStoreEnv.DATABASE.value]
65
+ return f"postgresql://{user}:{password}@{host}:{port}/{database}"
66
+ except KeyError as e:
67
+ raise StoreError(f"Missing credentials for SQL store. {str(e)}")
68
+
69
+ def _get_config(self) -> None:
70
+ """
71
+ Get the credentials from environment variables.
72
+
73
+ Returns
74
+ -------
75
+ None
76
+ """
77
+ user = configurator.load_var(SqlStoreEnv.USERNAME.value)
78
+ password = configurator.load_var(SqlStoreEnv.PASSWORD.value)
79
+ host = configurator.load_var(SqlStoreEnv.HOST.value)
80
+ port = configurator.load_var(SqlStoreEnv.PORT.value)
81
+ database = configurator.load_var(SqlStoreEnv.DATABASE.value)
82
+ if user is None or password is None or host is None or port is None or database is None:
83
+ raise StoreError("Missing credentials for SQL store.")
84
+ configurator.set_credential(SqlStoreEnv.USERNAME.value, user)
85
+ configurator.set_credential(SqlStoreEnv.PASSWORD.value, password)
86
+ configurator.set_credential(SqlStoreEnv.HOST.value, host)
87
+ configurator.set_credential(SqlStoreEnv.PORT.value, port)
88
+ configurator.set_credential(SqlStoreEnv.DATABASE.value, database)
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class SqlStoreEnv(Enum):
7
+ """
8
+ SqlStore environment
9
+ """
10
+
11
+ HOST = "DB_HOST"
12
+ PORT = "DB_PORT"
13
+ USERNAME = "DB_USERNAME"
14
+ PASSWORD = "DB_PASSWORD"
15
+ DATABASE = "DB_DATABASE"
16
+ PG_SCHEMA = "DB_SCHEMA"
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class SqlStoreConfig(BaseModel):
7
+ """
8
+ SQL store configuration class.
9
+ """
10
+
11
+ host: str
12
+ """SQL host."""
13
+
14
+ port: int
15
+ """SQL port."""
16
+
17
+ user: str
18
+ """SQL user."""
19
+
20
+ password: str
21
+ """SQL password."""
22
+
23
+ database: str
24
+ """SQL database name."""