deriva-ml 1.14.0__py3-none-any.whl → 1.14.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. deriva_ml/__init__.py +25 -30
  2. deriva_ml/core/__init__.py +39 -0
  3. deriva_ml/core/base.py +1489 -0
  4. deriva_ml/core/constants.py +36 -0
  5. deriva_ml/core/definitions.py +74 -0
  6. deriva_ml/core/enums.py +222 -0
  7. deriva_ml/core/ermrest.py +288 -0
  8. deriva_ml/core/exceptions.py +28 -0
  9. deriva_ml/core/filespec.py +116 -0
  10. deriva_ml/dataset/__init__.py +4 -0
  11. deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
  12. deriva_ml/{dataset.py → dataset/dataset.py} +406 -428
  13. deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
  14. deriva_ml/{history.py → dataset/history.py} +51 -33
  15. deriva_ml/{upload.py → dataset/upload.py} +48 -70
  16. deriva_ml/demo_catalog.py +233 -183
  17. deriva_ml/execution/environment.py +290 -0
  18. deriva_ml/{execution.py → execution/execution.py} +365 -252
  19. deriva_ml/execution/execution_configuration.py +163 -0
  20. deriva_ml/{execution_configuration.py → execution/workflow.py} +212 -224
  21. deriva_ml/feature.py +83 -46
  22. deriva_ml/model/__init__.py +0 -0
  23. deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
  24. deriva_ml/{database_model.py → model/database.py} +52 -74
  25. deriva_ml/model/sql_mapper.py +44 -0
  26. deriva_ml/run_notebook.py +19 -11
  27. deriva_ml/schema/__init__.py +3 -0
  28. deriva_ml/{schema_setup → schema}/annotations.py +31 -22
  29. deriva_ml/schema/check_schema.py +104 -0
  30. deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
  31. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  32. deriva_ml/schema/table_comments_utils.py +57 -0
  33. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/METADATA +5 -4
  34. deriva_ml-1.14.27.dist-info/RECORD +40 -0
  35. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/entry_points.txt +1 -0
  36. deriva_ml/deriva_definitions.py +0 -391
  37. deriva_ml/deriva_ml_base.py +0 -1046
  38. deriva_ml/execution_environment.py +0 -139
  39. deriva_ml/schema_setup/table_comments_utils.py +0 -56
  40. deriva_ml/test-files/execution-parameters.json +0 -1
  41. deriva_ml/test-files/notebook-parameters.json +0 -5
  42. deriva_ml/test_functions.py +0 -141
  43. deriva_ml/test_notebook.ipynb +0 -197
  44. deriva_ml-1.14.0.dist-info/RECORD +0 -31
  45. /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
  46. /deriva_ml/{schema_setup → schema}/policy.json +0 -0
  47. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/WHEEL +0 -0
  48. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/licenses/LICENSE +0 -0
  49. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ """Configuration management for DerivaML executions.
2
+
3
+ This module provides functionality for configuring and managing execution parameters in DerivaML.
4
+ It includes:
5
+
6
+ - ExecutionConfiguration class: Core class for execution settings
7
+ - Parameter validation: Handles JSON and file-based parameters
8
+ - Dataset specifications: Manages dataset versions and materialization
9
+ - Asset management: Tracks required input files
10
+
11
+ The module supports both direct parameter specification and JSON-based configuration files.
12
+
13
+ Typical usage example:
14
+ >>> config = ExecutionConfiguration(
15
+ ... workflow="analysis_workflow",
16
+ ... datasets=[DatasetSpec(rid="1-abc123", version="1.0.0")],
17
+ ... parameters={"threshold": 0.5},
18
+ ... description="Process sample data"
19
+ ... )
20
+ >>> execution = ml.create_execution(config)
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import sys
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
31
+
32
+ from deriva_ml.core.definitions import RID
33
+ from deriva_ml.dataset.aux_classes import DatasetSpec
34
+ from deriva_ml.execution.workflow import Workflow
35
+
36
+
37
+ class ExecutionConfiguration(BaseModel):
38
+ """Configuration for a DerivaML execution.
39
+
40
+ Defines the complete configuration for a computational or manual process in DerivaML,
41
+ including required datasets, input assets, workflow definition, and parameters.
42
+
43
+ Attributes:
44
+ datasets (list[DatasetSpec]): Dataset specifications, each containing:
45
+ - rid: Dataset Resource Identifier
46
+ - version: Version to use
47
+ - materialize: Whether to extract dataset contents
48
+ assets (list[RID]): Resource Identifiers of required input assets.
49
+ workflow (RID | Workflow): Workflow definition or its Resource Identifier.
50
+ parameters (dict[str, Any] | Path): Execution parameters, either as:
51
+ - Dictionary of parameter values
52
+ - Path to JSON file containing parameters
53
+ description (str): Description of execution purpose (supports Markdown).
54
+ argv (list[str]): Command line arguments used to start execution.
55
+
56
+ Example:
57
+ >>> config = ExecutionConfiguration(
58
+ ... workflow=Workflow.create_workflow("analysis", "python_script"),
59
+ ... datasets=[
60
+ ... DatasetSpec(rid="1-abc123", version="1.0.0", materialize=True)
61
+ ... ],
62
+ ... parameters={"threshold": 0.5, "max_iterations": 100},
63
+ ... description="Process RNA sequence data"
64
+ ... )
65
+ """
66
+
67
+ datasets: list[DatasetSpec] = []
68
+ assets: list[RID] = []
69
+ workflow: RID | Workflow
70
+ parameters: dict[str, Any] | Path = {}
71
+ description: str = ""
72
+ argv: list[str] = Field(default_factory=lambda: sys.argv)
73
+
74
+ model_config = ConfigDict(arbitrary_types_allowed=True)
75
+
76
+ @field_validator("parameters", mode="before")
77
+ @classmethod
78
+ def validate_parameters(cls, value: Any) -> Any:
79
+ """Validates and loads execution parameters.
80
+
81
+ If value is a file path, loads and parses it as JSON. Otherwise, returns
82
+ the value as is.
83
+
84
+ Args:
85
+ value: Parameter value to validate, either:
86
+ - Dictionary of parameters
87
+ - Path to JSON file
88
+ - String path to JSON file
89
+
90
+ Returns:
91
+ dict[str, Any]: Validated parameter dictionary.
92
+
93
+ Raises:
94
+ ValueError: If JSON file is invalid or cannot be read.
95
+ FileNotFoundError: If parameter file doesn't exist.
96
+
97
+ Example:
98
+ >>> config = ExecutionConfiguration(parameters="params.json")
99
+ >>> print(config.parameters) # Contents of params.json as dict
100
+ """
101
+ if isinstance(value, str) or isinstance(value, Path):
102
+ with Path(value).open("r") as f:
103
+ return json.load(f)
104
+ else:
105
+ return value
106
+
107
+ @field_validator("workflow", mode="before")
108
+ @classmethod
109
+ def validate_workflow(cls, value: Any) -> Any:
110
+ """Validates workflow specification.
111
+
112
+ Args:
113
+ value: Workflow value to validate (RID or Workflow object).
114
+
115
+ Returns:
116
+ RID | Workflow: Validated workflow specification.
117
+ """
118
+ return value
119
+
120
+ @staticmethod
121
+ def load_configuration(path: Path) -> ExecutionConfiguration:
122
+ """Creates an ExecutionConfiguration from a JSON file.
123
+
124
+ Loads and parses a JSON configuration file into an ExecutionConfiguration
125
+ instance. The file should contain a valid configuration specification.
126
+
127
+ Args:
128
+ path: Path to JSON configuration file.
129
+
130
+ Returns:
131
+ ExecutionConfiguration: Loaded configuration instance.
132
+
133
+ Raises:
134
+ ValueError: If JSON file is invalid or missing required fields.
135
+ FileNotFoundError: If configuration file doesn't exist.
136
+
137
+ Example:
138
+ >>> config = ExecutionConfiguration.load_configuration(Path("config.json"))
139
+ >>> print(f"Workflow: {config.workflow}")
140
+ >>> print(f"Datasets: {len(config.datasets)}")
141
+ """
142
+ with Path(path).open() as fd:
143
+ config = json.load(fd)
144
+ return ExecutionConfiguration.model_validate(config)
145
+
146
+ # def download_execution_configuration(
147
+ # self, configuration_rid: RID
148
+ # ) -> ExecutionConfiguration:
149
+ # """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
150
+ # configuration in hatrac
151
+ #
152
+ # Args:
153
+ # configuration_rid: RID that should be to an asset table that refers to an execution configuration
154
+ #
155
+ # Returns:
156
+ # A ExecutionConfiguration object for configured by the parameters in the configuration file.
157
+ # """
158
+ # AssertionError("Not Implemented")
159
+ # configuration = self.retrieve_rid(configuration_rid)
160
+ # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
161
+ # hs = HatracStore("https", self.host_name, self.credential)
162
+ # hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
163
+ # return ExecutionConfiguration.load_configuration(Path(dest_file.name))