deriva-ml 1.14.0__py3-none-any.whl → 1.14.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +25 -30
- deriva_ml/core/__init__.py +39 -0
- deriva_ml/core/base.py +1489 -0
- deriva_ml/core/constants.py +36 -0
- deriva_ml/core/definitions.py +74 -0
- deriva_ml/core/enums.py +222 -0
- deriva_ml/core/ermrest.py +288 -0
- deriva_ml/core/exceptions.py +28 -0
- deriva_ml/core/filespec.py +116 -0
- deriva_ml/dataset/__init__.py +4 -0
- deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
- deriva_ml/{dataset.py → dataset/dataset.py} +405 -428
- deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
- deriva_ml/{history.py → dataset/history.py} +51 -33
- deriva_ml/{upload.py → dataset/upload.py} +48 -70
- deriva_ml/demo_catalog.py +233 -183
- deriva_ml/execution/environment.py +290 -0
- deriva_ml/{execution.py → execution/execution.py} +365 -252
- deriva_ml/execution/execution_configuration.py +163 -0
- deriva_ml/{execution_configuration.py → execution/workflow.py} +206 -218
- deriva_ml/feature.py +83 -46
- deriva_ml/model/__init__.py +0 -0
- deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
- deriva_ml/{database_model.py → model/database.py} +52 -74
- deriva_ml/model/sql_mapper.py +44 -0
- deriva_ml/run_notebook.py +19 -11
- deriva_ml/schema/__init__.py +3 -0
- deriva_ml/{schema_setup → schema}/annotations.py +31 -22
- deriva_ml/schema/check_schema.py +104 -0
- deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
- deriva_ml/schema/deriva-ml-reference.json +8525 -0
- deriva_ml/schema/table_comments_utils.py +57 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.26.dist-info}/METADATA +5 -4
- deriva_ml-1.14.26.dist-info/RECORD +40 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.26.dist-info}/entry_points.txt +1 -0
- deriva_ml/deriva_definitions.py +0 -391
- deriva_ml/deriva_ml_base.py +0 -1046
- deriva_ml/execution_environment.py +0 -139
- deriva_ml/schema_setup/table_comments_utils.py +0 -56
- deriva_ml/test-files/execution-parameters.json +0 -1
- deriva_ml/test-files/notebook-parameters.json +0 -5
- deriva_ml/test_functions.py +0 -141
- deriva_ml/test_notebook.ipynb +0 -197
- deriva_ml-1.14.0.dist-info/RECORD +0 -31
- /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
- /deriva_ml/{schema_setup → schema}/policy.json +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.26.dist-info}/WHEEL +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.26.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.26.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Configuration management for DerivaML executions.
|
|
2
|
+
|
|
3
|
+
This module provides functionality for configuring and managing execution parameters in DerivaML.
|
|
4
|
+
It includes:
|
|
5
|
+
|
|
6
|
+
- ExecutionConfiguration class: Core class for execution settings
|
|
7
|
+
- Parameter validation: Handles JSON and file-based parameters
|
|
8
|
+
- Dataset specifications: Manages dataset versions and materialization
|
|
9
|
+
- Asset management: Tracks required input files
|
|
10
|
+
|
|
11
|
+
The module supports both direct parameter specification and JSON-based configuration files.
|
|
12
|
+
|
|
13
|
+
Typical usage example:
|
|
14
|
+
>>> config = ExecutionConfiguration(
|
|
15
|
+
... workflow="analysis_workflow",
|
|
16
|
+
... datasets=[DatasetSpec(rid="1-abc123", version="1.0.0")],
|
|
17
|
+
... parameters={"threshold": 0.5},
|
|
18
|
+
... description="Process sample data"
|
|
19
|
+
... )
|
|
20
|
+
>>> execution = ml.create_execution(config)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import sys
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
31
|
+
|
|
32
|
+
from deriva_ml.core.definitions import RID
|
|
33
|
+
from deriva_ml.dataset.aux_classes import DatasetSpec
|
|
34
|
+
from deriva_ml.execution.workflow import Workflow
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ExecutionConfiguration(BaseModel):
|
|
38
|
+
"""Configuration for a DerivaML execution.
|
|
39
|
+
|
|
40
|
+
Defines the complete configuration for a computational or manual process in DerivaML,
|
|
41
|
+
including required datasets, input assets, workflow definition, and parameters.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
datasets (list[DatasetSpec]): Dataset specifications, each containing:
|
|
45
|
+
- rid: Dataset Resource Identifier
|
|
46
|
+
- version: Version to use
|
|
47
|
+
- materialize: Whether to extract dataset contents
|
|
48
|
+
assets (list[RID]): Resource Identifiers of required input assets.
|
|
49
|
+
workflow (RID | Workflow): Workflow definition or its Resource Identifier.
|
|
50
|
+
parameters (dict[str, Any] | Path): Execution parameters, either as:
|
|
51
|
+
- Dictionary of parameter values
|
|
52
|
+
- Path to JSON file containing parameters
|
|
53
|
+
description (str): Description of execution purpose (supports Markdown).
|
|
54
|
+
argv (list[str]): Command line arguments used to start execution.
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
>>> config = ExecutionConfiguration(
|
|
58
|
+
... workflow=Workflow.create_workflow("analysis", "python_script"),
|
|
59
|
+
... datasets=[
|
|
60
|
+
... DatasetSpec(rid="1-abc123", version="1.0.0", materialize=True)
|
|
61
|
+
... ],
|
|
62
|
+
... parameters={"threshold": 0.5, "max_iterations": 100},
|
|
63
|
+
... description="Process RNA sequence data"
|
|
64
|
+
... )
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
datasets: list[DatasetSpec] = []
|
|
68
|
+
assets: list[RID] = []
|
|
69
|
+
workflow: RID | Workflow
|
|
70
|
+
parameters: dict[str, Any] | Path = {}
|
|
71
|
+
description: str = ""
|
|
72
|
+
argv: list[str] = Field(default_factory=lambda: sys.argv)
|
|
73
|
+
|
|
74
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
75
|
+
|
|
76
|
+
@field_validator("parameters", mode="before")
|
|
77
|
+
@classmethod
|
|
78
|
+
def validate_parameters(cls, value: Any) -> Any:
|
|
79
|
+
"""Validates and loads execution parameters.
|
|
80
|
+
|
|
81
|
+
If value is a file path, loads and parses it as JSON. Otherwise, returns
|
|
82
|
+
the value as is.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
value: Parameter value to validate, either:
|
|
86
|
+
- Dictionary of parameters
|
|
87
|
+
- Path to JSON file
|
|
88
|
+
- String path to JSON file
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
dict[str, Any]: Validated parameter dictionary.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: If JSON file is invalid or cannot be read.
|
|
95
|
+
FileNotFoundError: If parameter file doesn't exist.
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> config = ExecutionConfiguration(parameters="params.json")
|
|
99
|
+
>>> print(config.parameters) # Contents of params.json as dict
|
|
100
|
+
"""
|
|
101
|
+
if isinstance(value, str) or isinstance(value, Path):
|
|
102
|
+
with Path(value).open("r") as f:
|
|
103
|
+
return json.load(f)
|
|
104
|
+
else:
|
|
105
|
+
return value
|
|
106
|
+
|
|
107
|
+
@field_validator("workflow", mode="before")
|
|
108
|
+
@classmethod
|
|
109
|
+
def validate_workflow(cls, value: Any) -> Any:
|
|
110
|
+
"""Validates workflow specification.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
value: Workflow value to validate (RID or Workflow object).
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
RID | Workflow: Validated workflow specification.
|
|
117
|
+
"""
|
|
118
|
+
return value
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def load_configuration(path: Path) -> ExecutionConfiguration:
|
|
122
|
+
"""Creates an ExecutionConfiguration from a JSON file.
|
|
123
|
+
|
|
124
|
+
Loads and parses a JSON configuration file into an ExecutionConfiguration
|
|
125
|
+
instance. The file should contain a valid configuration specification.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
path: Path to JSON configuration file.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
ExecutionConfiguration: Loaded configuration instance.
|
|
132
|
+
|
|
133
|
+
Raises:
|
|
134
|
+
ValueError: If JSON file is invalid or missing required fields.
|
|
135
|
+
FileNotFoundError: If configuration file doesn't exist.
|
|
136
|
+
|
|
137
|
+
Example:
|
|
138
|
+
>>> config = ExecutionConfiguration.load_configuration(Path("config.json"))
|
|
139
|
+
>>> print(f"Workflow: {config.workflow}")
|
|
140
|
+
>>> print(f"Datasets: {len(config.datasets)}")
|
|
141
|
+
"""
|
|
142
|
+
with Path(path).open() as fd:
|
|
143
|
+
config = json.load(fd)
|
|
144
|
+
return ExecutionConfiguration.model_validate(config)
|
|
145
|
+
|
|
146
|
+
# def download_execution_configuration(
|
|
147
|
+
# self, configuration_rid: RID
|
|
148
|
+
# ) -> ExecutionConfiguration:
|
|
149
|
+
# """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
|
|
150
|
+
# configuration in hatrac
|
|
151
|
+
#
|
|
152
|
+
# Args:
|
|
153
|
+
# configuration_rid: RID that should be to an asset table that refers to an execution configuration
|
|
154
|
+
#
|
|
155
|
+
# Returns:
|
|
156
|
+
# A ExecutionConfiguration object for configured by the parameters in the configuration file.
|
|
157
|
+
# """
|
|
158
|
+
# AssertionError("Not Implemented")
|
|
159
|
+
# configuration = self.retrieve_rid(configuration_rid)
|
|
160
|
+
# with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
|
|
161
|
+
# hs = HatracStore("https", self.host_name, self.credential)
|
|
162
|
+
# hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
|
|
163
|
+
# return ExecutionConfiguration.load_configuration(Path(dest_file.name))
|