deriva-ml 1.17.4__py3-none-any.whl → 1.17.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/__init__.py CHANGED
@@ -1,3 +1,5 @@
1
+ # We will be loading get_version from setuptools_scm and it will emit a UserWarning about it being deprecated.
2
+
1
3
  from importlib.metadata import PackageNotFoundError, version
2
4
  from typing import TYPE_CHECKING
3
5
 
deriva_ml/core/base.py CHANGED
@@ -187,6 +187,7 @@ class DerivaML(Dataset):
187
187
  logger_config = DEFAULT_LOGGER_OVERRIDES
188
188
  # allow for reconfiguration of module-specific logging levels
189
189
  [logging.getLogger(name).setLevel(level) for name, level in logger_config.items()]
190
+ logging.getLogger("root").setLevel(deriva_logging_level)
190
191
  logging.getLogger("bagit").setLevel(deriva_logging_level)
191
192
  logging.getLogger("bdbag").setLevel(deriva_logging_level)
192
193
 
@@ -1439,7 +1440,9 @@ class DerivaML(Dataset):
1439
1440
  # Create and return a new workflow object
1440
1441
  return Workflow(name=name, workflow_type=workflow_type, description=description)
1441
1442
 
1442
- def create_execution(self, configuration: ExecutionConfiguration, dry_run: bool = False) -> "Execution":
1443
+ def create_execution(
1444
+ self, configuration: ExecutionConfiguration, workflow: Workflow | RID | None = None, dry_run: bool = False
1445
+ ) -> "Execution":
1443
1446
  """Creates an execution environment.
1444
1447
 
1445
1448
  Given an execution configuration, initialize the local compute environment to prepare for executing an
@@ -1454,6 +1457,7 @@ class DerivaML(Dataset):
1454
1457
 
1455
1458
  Args:
1456
1459
  configuration: ExecutionConfiguration:
1460
+ workflow: Workflow object representing the workflow to execute if not present in the ExecutionConfiguration.
1457
1461
  dry_run: Do not create an execution record or upload results.
1458
1462
 
1459
1463
  Returns:
@@ -1463,7 +1467,7 @@ class DerivaML(Dataset):
1463
1467
  from deriva_ml.execution.execution import Execution
1464
1468
 
1465
1469
  # Create and store an execution instance
1466
- self._execution = Execution(configuration, self, dry_run=dry_run)
1470
+ self._execution = Execution(configuration, self, workflow=workflow, dry_run=dry_run)
1467
1471
  return self._execution
1468
1472
 
1469
1473
  def restore_execution(self, execution_rid: RID | None = None) -> Execution:
deriva_ml/core/config.py CHANGED
@@ -1,3 +1,4 @@
1
+ import getpass
1
2
  import logging
2
3
  from pathlib import Path
3
4
  from typing import Any
@@ -49,8 +50,9 @@ class DerivaMLConfig(BaseModel):
49
50
 
50
51
  @staticmethod
51
52
  def compute_workdir(working_dir) -> Path:
52
- # Create a default working directory if none is provided
53
- working_dir = Path(working_dir) if working_dir else Path.home() / "deriva-ml"
53
+ # Create a default working directory if none is provided. If a working directory is provided, we add the
54
+ # user name to it to ensure that multiple users do not overwrite each other's work.'
55
+ working_dir = (Path(working_dir) / getpass.getuser() if working_dir else Path.home()) / "deriva-ml"
54
56
  return working_dir.absolute()
55
57
 
56
58
 
@@ -1,7 +1,7 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
3
  # Safe imports - no circular dependencies
4
- from deriva_ml.execution.execution_configuration import ExecutionConfiguration, AssetRIDConfig
4
+ from deriva_ml.execution.execution_configuration import AssetRIDConfig, ExecutionConfiguration
5
5
  from deriva_ml.execution.workflow import Workflow
6
6
 
7
7
  if TYPE_CHECKING:
@@ -22,5 +22,5 @@ __all__ = [
22
22
  "Execution", # Lazy-loaded
23
23
  "ExecutionConfiguration",
24
24
  "Workflow",
25
- "AssetRIDConfig"
25
+ "AssetRIDConfig",
26
26
  ]
@@ -216,6 +216,7 @@ class Execution:
216
216
  self,
217
217
  configuration: ExecutionConfiguration,
218
218
  ml_object: DerivaML,
219
+ workflow: Workflow | RID | None = None,
219
220
  reload: RID | None = None,
220
221
  dry_run: bool = False,
221
222
  ):
@@ -227,6 +228,8 @@ class Execution:
227
228
  Args:
228
229
  configuration: Settings and parameters for the execution.
229
230
  ml_object: DerivaML instance managing the execution.
231
+ workflow: Optional workflow RID or Workflow object. If not specified, the workflow RID is taken from
232
+ the ExecutionConfiguration object
230
233
  reload: Optional RID of existing execution to reload.
231
234
  dry_run: If True, don't create catalog records or upload results.
232
235
 
@@ -234,7 +237,7 @@ class Execution:
234
237
  DerivaMLException: If initialization fails or configuration is invalid.
235
238
  """
236
239
 
237
- self.asset_paths: list[AssetFilePath] = []
240
+ self.asset_paths: dict[str, list[AssetFilePath]] = {}
238
241
  self.configuration = configuration
239
242
  self._ml_object = ml_object
240
243
  self._model = ml_object.model
@@ -253,6 +256,8 @@ class Execution:
253
256
  self._dry_run = dry_run
254
257
 
255
258
  # Make sure we have a good workflow.
259
+ if workflow:
260
+ self.configuration.workflow = workflow
256
261
  if isinstance(self.configuration.workflow, Workflow):
257
262
  self._ml_object.lookup_term(MLVocab.workflow_type, configuration.workflow.workflow_type)
258
263
  self.workflow_rid = (
@@ -26,9 +26,11 @@ import json
26
26
  import sys
27
27
  from dataclasses import dataclass
28
28
  from pathlib import Path
29
+ from typing import Any
29
30
 
30
31
  from hydra_zen import builds
31
- from pydantic import BaseModel, ConfigDict, Field
32
+ from omegaconf import DictConfig
33
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
32
34
 
33
35
  from deriva_ml.core.definitions import RID
34
36
  from deriva_ml.dataset.aux_classes import DatasetSpec
@@ -67,7 +69,7 @@ class ExecutionConfiguration(BaseModel):
67
69
 
68
70
  datasets: list[DatasetSpec] = []
69
71
  assets: list[RID] = []
70
- workflow: RID | Workflow
72
+ workflow: RID | Workflow | None = None
71
73
  description: str = ""
72
74
  argv: list[str] = Field(default_factory=lambda: sys.argv)
73
75
 
@@ -80,6 +82,10 @@ class ExecutionConfiguration(BaseModel):
80
82
  # config_list: DatasetList = value
81
83
  # value = config_list.datasets
82
84
  # return value
85
+ @field_validator("assets", mode="before")
86
+ @classmethod
87
+ def validate_assets(cls, value: Any) -> Any:
88
+ return [v.rid if isinstance(v, DictConfig) or isinstance(v, AssetRID) else v for v in value]
83
89
 
84
90
  @staticmethod
85
91
  def load_configuration(path: Path) -> ExecutionConfiguration:
@@ -3,6 +3,7 @@ import logging
3
3
  import os
4
4
  import subprocess
5
5
  import sys
6
+ import warnings
6
7
  from pathlib import Path
7
8
  from typing import Any
8
9
 
@@ -130,7 +131,6 @@ class Workflow(BaseModel):
130
131
  self.git_root = Workflow._get_git_root(path)
131
132
 
132
133
  self.version = Workflow.get_dynamic_version(root=str(self.git_root or Path.cwd()))
133
-
134
134
  self._logger = logging.getLogger("deriva_ml")
135
135
  return self
136
136
 
@@ -392,10 +392,19 @@ class Workflow(BaseModel):
392
392
 
393
393
  Works under uv / Python 3.10+ by forcing setuptools to use stdlib distutils.
394
394
  """
395
- # Ensure setuptools doesn't try to override stdlib distutils
395
+ # 1) Tell setuptools to use stdlib distutils (or no override) to avoid
396
+ # the '_distutils_hack' assertion you hit.
396
397
  os.environ.setdefault("SETUPTOOLS_USE_DISTUTILS", "stdlib")
397
398
 
398
- from setuptools_scm import get_version # imported *after* env var is set
399
+ warnings.filterwarnings(
400
+ "ignore",
401
+ category=UserWarning,
402
+ module="_distutils_hack",
403
+ )
404
+ try:
405
+ from setuptools_scm import get_version
406
+ except Exception as e: # ImportError or anything environment-specific
407
+ raise RuntimeError(f"setuptools_scm is not available: {e}") from e
399
408
 
400
409
  if root is None:
401
410
  # Adjust this to point at your repo root if needed
@@ -310,6 +310,7 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
310
310
  },
311
311
  {"Name": "File", "Description": "A file that is not managed by Hatrac"},
312
312
  {"Name": "Input_File", "Description": "A file input to an execution."},
313
+ {"Name": "Output_File", "Description": "A file output from an execution."},
313
314
  {"Name": "Model_File", "Description": "The ML model."},
314
315
  {
315
316
  "Name": "Notebook_Output",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.17.4
3
+ Version: 1.17.6
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1,5 +1,5 @@
1
1
  deriva_ml/.DS_Store,sha256=gb-f5IXVed_gS5Be1Z6WxCYjrI_r5SdblvfFpIOY4ro,8196
2
- deriva_ml/__init__.py,sha256=YCG7P4PUtO_b-aIIYb4KhKHcfnb8Wz_YeAL-c0HiQlA,1775
2
+ deriva_ml/__init__.py,sha256=OqhpZEC9xDpQ1vKNQSb_b0W32LlNGnfAyQaIkOXJNAs,1887
3
3
  deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
4
4
  deriva_ml/demo_catalog.py,sha256=FfXPlDfzy29K9g2Fr_KmYyRhmxP2eSaqm8_Xcji8fUM,15352
5
5
  deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
@@ -7,8 +7,8 @@ deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2
7
7
  deriva_ml/run_notebook.py,sha256=_pds1q3WcfWqhCBqKeznbwSv5n7OND8FkL6JQ2Jkfmc,8093
8
8
  deriva_ml/test.py,sha256=BqmQXR9IyQP9h8pWttk0dzyJod2CwcfYbSUZS-Q5r4k,4460
9
9
  deriva_ml/core/__init__.py,sha256=Ko8GsWc7K_eDFW0-GaNS6gOWYP8cWHWir-ChSQaHntE,856
10
- deriva_ml/core/base.py,sha256=zTOxrAonj59hKqaaMvsIbvBEHrCkpdIUZfGi4q_6qks,62554
11
- deriva_ml/core/config.py,sha256=dF4rOLFmbk1DEkQimqbiH4pC519nRZWpwKItARNMiZ4,2244
10
+ deriva_ml/core/base.py,sha256=uy2wgQ41yakoKy0faRcrg9Z-yQ_t2hFwC_mLkJn3sck,62813
11
+ deriva_ml/core/config.py,sha256=HKo_cTuPWbu-IMm5Nw57Wzn1B_Mhm7Wuiq6HC-Tmt2E,2423
12
12
  deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
13
13
  deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
14
14
  deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
@@ -21,11 +21,11 @@ deriva_ml/dataset/dataset.py,sha256=d860WuCL0-Pz6TyRpGVzhpPWDMco01-I5LT4dZjYxsQ,
21
21
  deriva_ml/dataset/dataset_bag.py,sha256=ori3BuYVqfeHkVCjNSKuZh7oMdC6uufsszicpTPODiw,19944
22
22
  deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
23
23
  deriva_ml/dataset/upload.py,sha256=n1aXSbOx1hghCDxuF8yf03jZmOLMueXL-rSnQMrfHq0,16535
24
- deriva_ml/execution/__init__.py,sha256=5kKpPwQbxhmRn7Npz7DpavuCxYwCQaDdl4-6z62hbds,705
24
+ deriva_ml/execution/__init__.py,sha256=1ngO7avVUqSOS1o09OzS8jFp5mf-rrLxEBHzlIm5R28,706
25
25
  deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
26
- deriva_ml/execution/execution.py,sha256=pso488INQ9eZx_VO6XHv4N6e3CwclOtgTEDE-qyGnNg,46311
27
- deriva_ml/execution/execution_configuration.py,sha256=lftW9tAzpCiQw83vLT0IG5JjlL4K6Q-qHjkhVquh8-E,5384
28
- deriva_ml/execution/workflow.py,sha256=80mFNRd-4H_DOJUT00aO4k9OFYSj1UOQBJTGm_kTlf8,14439
26
+ deriva_ml/execution/execution.py,sha256=l2dAtalhKt042Yi6vjY9N0sJ49kx-wnxbgEGlYaY4jA,46604
27
+ deriva_ml/execution/execution_configuration.py,sha256=Bxfa_JLbJoEkimRgJ0mDbCKyaKnsSn8j7XON0UbiP4I,5684
28
+ deriva_ml/execution/workflow.py,sha256=wchHbL2_VrROvbNGtEQb4FYxOtQzlqmwR0VyfnadDvE,14795
29
29
  deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  deriva_ml/model/catalog.py,sha256=O6_Ll4Uxg6DyxoBXT9P9CPTt9jx1guVTeX1L3KW1A5c,19645
31
31
  deriva_ml/model/database.py,sha256=BG5FSisl9tWTBnf5k9dNnijOIDyCUDeRhN_inkmIqTw,31132
@@ -33,13 +33,13 @@ deriva_ml/protocols/dataset.py,sha256=1TyaT--89Elcs-nCvVyJxUj4cDaLztZOuSOzzj1cBM
33
33
  deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
34
34
  deriva_ml/schema/annotations.py,sha256=CMcRqYUlyW8iLCYp6sYJsncaRNtp4kFKoxcg-i-t-50,18302
35
35
  deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
36
- deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
36
+ deriva_ml/schema/create_schema.py,sha256=jB6ZIlQVBMRTWOEnQN6XO6gxMNPdN3t4Q3VdDs8c6SE,13130
37
37
  deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
38
38
  deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
39
39
  deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
40
- deriva_ml-1.17.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
41
- deriva_ml-1.17.4.dist-info/METADATA,sha256=rCfFB1SBVMXGd4aNxpgelYzZQjyxg6HnB4idrmV3tKo,1272
42
- deriva_ml-1.17.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- deriva_ml-1.17.4.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
44
- deriva_ml-1.17.4.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
45
- deriva_ml-1.17.4.dist-info/RECORD,,
40
+ deriva_ml-1.17.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
41
+ deriva_ml-1.17.6.dist-info/METADATA,sha256=F-A3XQG1DZ_MyGCRZ-zijKA4kivzYNDKy0rh_bLL2UA,1272
42
+ deriva_ml-1.17.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ deriva_ml-1.17.6.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
44
+ deriva_ml-1.17.6.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
45
+ deriva_ml-1.17.6.dist-info/RECORD,,