deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +126 -110
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +543 -242
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +223 -34
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.10.dist-info/RECORD +0 -45
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,6 @@ from dataclasses import dataclass
|
|
|
28
28
|
from pathlib import Path
|
|
29
29
|
from typing import Any
|
|
30
30
|
|
|
31
|
-
from hydra_zen import builds
|
|
32
31
|
from omegaconf import DictConfig
|
|
33
32
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
34
33
|
|
|
@@ -49,29 +48,33 @@ class ExecutionConfiguration(BaseModel):
|
|
|
49
48
|
- version: Version to use
|
|
50
49
|
- materialize: Whether to extract dataset contents
|
|
51
50
|
assets (list[RID]): Resource Identifiers of required input assets.
|
|
52
|
-
workflow (
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
- Path to JSON file containing parameters
|
|
51
|
+
workflow (Workflow | None): Workflow object defining the computational process.
|
|
52
|
+
Use ``ml.lookup_workflow(rid)`` or ``ml.lookup_workflow_by_url(url)`` to get
|
|
53
|
+
a Workflow object from a RID or URL.
|
|
56
54
|
description (str): Description of execution purpose (supports Markdown).
|
|
57
55
|
argv (list[str]): Command line arguments used to start execution.
|
|
56
|
+
config_choices (dict[str, str]): Hydra config group choices that were selected.
|
|
57
|
+
Maps group names to selected config names (e.g., {"model_config": "cifar10_quick"}).
|
|
58
|
+
Automatically populated by run_model() and get_notebook_configuration().
|
|
58
59
|
|
|
59
60
|
Example:
|
|
61
|
+
>>> # Look up workflow by RID or URL first
|
|
62
|
+
>>> workflow = ml.lookup_workflow("2-ABC1")
|
|
60
63
|
>>> config = ExecutionConfiguration(
|
|
61
|
-
... workflow=
|
|
64
|
+
... workflow=workflow,
|
|
62
65
|
... datasets=[
|
|
63
66
|
... DatasetSpec(rid="1-abc123", version="1.0.0", materialize=True)
|
|
64
67
|
... ],
|
|
65
|
-
... parameters={"threshold": 0.5, "max_iterations": 100},
|
|
66
68
|
... description="Process RNA sequence data"
|
|
67
69
|
... )
|
|
68
70
|
"""
|
|
69
71
|
|
|
70
72
|
datasets: list[DatasetSpec] = []
|
|
71
73
|
assets: list[RID] = []
|
|
72
|
-
workflow:
|
|
74
|
+
workflow: Workflow | None = None
|
|
73
75
|
description: str = ""
|
|
74
76
|
argv: list[str] = Field(default_factory=lambda: sys.argv)
|
|
77
|
+
config_choices: dict[str, str] = Field(default_factory=dict)
|
|
75
78
|
|
|
76
79
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
77
80
|
|
|
@@ -135,6 +138,21 @@ class ExecutionConfiguration(BaseModel):
|
|
|
135
138
|
|
|
136
139
|
@dataclass
|
|
137
140
|
class AssetRID(str):
|
|
141
|
+
"""A string subclass representing an asset Resource ID with optional description.
|
|
142
|
+
|
|
143
|
+
AssetRID extends str so it can be used directly wherever a string RID is expected,
|
|
144
|
+
while optionally carrying a description for documentation purposes.
|
|
145
|
+
|
|
146
|
+
Attributes:
|
|
147
|
+
rid: The Resource ID string identifying the asset in Deriva.
|
|
148
|
+
description: Optional human-readable description of the asset.
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
>>> asset = AssetRID("3RA", "Pretrained model weights")
|
|
152
|
+
>>> print(asset) # "3RA"
|
|
153
|
+
>>> print(asset.description) # "Pretrained model weights"
|
|
154
|
+
"""
|
|
155
|
+
|
|
138
156
|
rid: str
|
|
139
157
|
description: str = ""
|
|
140
158
|
|
|
@@ -142,6 +160,3 @@ class AssetRID(str):
|
|
|
142
160
|
obj = super().__new__(cls, rid)
|
|
143
161
|
obj.description = description
|
|
144
162
|
return obj
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
AssetRIDConfig = builds(AssetRID, populate_full_signature=True)
|
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""ExecutionRecord - Represents a catalog record for an execution.
|
|
2
|
+
|
|
3
|
+
This module provides the ExecutionRecord class which represents the state of an
|
|
4
|
+
execution record in the Deriva catalog. It provides getters and setters for
|
|
5
|
+
mutable properties that automatically sync changes to the catalog.
|
|
6
|
+
|
|
7
|
+
The ExecutionRecord is separate from the Execution class which manages the
|
|
8
|
+
execution lifecycle (start, stop, asset uploads, etc.). This separation allows
|
|
9
|
+
for lightweight lookups of execution records without initializing the full
|
|
10
|
+
execution environment.
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
Look up an execution record and update its description::
|
|
14
|
+
|
|
15
|
+
>>> record = ml.lookup_execution("2-ABC1")
|
|
16
|
+
>>> print(record.status)
|
|
17
|
+
Status.running
|
|
18
|
+
>>> record.description = "Updated analysis description"
|
|
19
|
+
>>> # The change is immediately written to the catalog
|
|
20
|
+
|
|
21
|
+
Query nested executions::
|
|
22
|
+
|
|
23
|
+
>>> children = record.list_nested_executions()
|
|
24
|
+
>>> for child in children:
|
|
25
|
+
... print(f"{child.execution_rid}: {child.status}")
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
from datetime import datetime
|
|
32
|
+
from typing import TYPE_CHECKING, Any, Iterable
|
|
33
|
+
|
|
34
|
+
from pydantic import BaseModel, ConfigDict, PrivateAttr
|
|
35
|
+
|
|
36
|
+
from deriva_ml.core.definitions import RID, Status
|
|
37
|
+
from deriva_ml.core.exceptions import DerivaMLException
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from deriva_ml.asset.asset import Asset
|
|
41
|
+
from deriva_ml.dataset.dataset import Dataset
|
|
42
|
+
from deriva_ml.execution.workflow import Workflow
|
|
43
|
+
from deriva_ml.interfaces import DerivaMLCatalog
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ExecutionRecord(BaseModel):
|
|
47
|
+
"""Represents a catalog record for an execution.
|
|
48
|
+
|
|
49
|
+
An ExecutionRecord provides access to the persistent state of an execution
|
|
50
|
+
stored in the Deriva catalog. When bound to a writable catalog, its mutable
|
|
51
|
+
properties (status, description) can be set and changes are automatically
|
|
52
|
+
synced to the catalog.
|
|
53
|
+
|
|
54
|
+
This class is separate from the Execution class which manages the execution
|
|
55
|
+
lifecycle. Use ExecutionRecord for lightweight queries and updates to
|
|
56
|
+
execution metadata. Use Execution for running computations with datasets
|
|
57
|
+
and assets.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
execution_rid (RID): Resource Identifier of the execution record.
|
|
61
|
+
workflow (Workflow | None): The associated workflow object, bound to catalog.
|
|
62
|
+
status (Status): Current execution status (Created, Running, Completed, Failed).
|
|
63
|
+
Setting this property updates the catalog.
|
|
64
|
+
description (str | None): Description of the execution. Setting this
|
|
65
|
+
property updates the catalog.
|
|
66
|
+
start_time (datetime | None): When the execution started (read-only).
|
|
67
|
+
stop_time (datetime | None): When the execution completed (read-only).
|
|
68
|
+
duration (str | None): Duration string from catalog (read-only).
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
Look up an execution and query its state::
|
|
72
|
+
|
|
73
|
+
>>> record = ml.lookup_execution("2-ABC1")
|
|
74
|
+
>>> print(f"Status: {record.status}")
|
|
75
|
+
>>> print(f"Workflow: {record.workflow.name}")
|
|
76
|
+
>>> print(f"Started: {record.start_time}")
|
|
77
|
+
|
|
78
|
+
Update mutable properties::
|
|
79
|
+
|
|
80
|
+
>>> record.status = Status.completed
|
|
81
|
+
>>> record.description = "Analysis completed successfully"
|
|
82
|
+
|
|
83
|
+
Query relationships::
|
|
84
|
+
|
|
85
|
+
>>> # Get child executions
|
|
86
|
+
>>> children = record.list_nested_executions()
|
|
87
|
+
>>> # Get parent executions
|
|
88
|
+
>>> parents = record.list_parent_executions()
|
|
89
|
+
>>> # Get input datasets
|
|
90
|
+
>>> datasets = record.list_input_datasets()
|
|
91
|
+
|
|
92
|
+
Attempting to update on a read-only catalog raises an error::
|
|
93
|
+
|
|
94
|
+
>>> snapshot = ml.catalog_snapshot("2023-01-15T10:30:00")
|
|
95
|
+
>>> record = snapshot.lookup_execution("2-ABC1")
|
|
96
|
+
>>> record.status = Status.completed # Raises DerivaMLException
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
100
|
+
|
|
101
|
+
execution_rid: RID
|
|
102
|
+
_workflow: "Workflow | None" = PrivateAttr(default=None)
|
|
103
|
+
_status: Status = PrivateAttr(default=Status.created)
|
|
104
|
+
_description: str | None = PrivateAttr(default=None)
|
|
105
|
+
start_time: datetime | None = None
|
|
106
|
+
stop_time: datetime | None = None
|
|
107
|
+
duration: str | None = None
|
|
108
|
+
|
|
109
|
+
_ml_instance: "DerivaMLCatalog | None" = PrivateAttr(default=None)
|
|
110
|
+
_logger: logging.Logger = PrivateAttr(default=None)
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
execution_rid: RID,
|
|
115
|
+
workflow: "Workflow | None" = None,
|
|
116
|
+
status: Status = Status.created,
|
|
117
|
+
description: str | None = None,
|
|
118
|
+
start_time: datetime | None = None,
|
|
119
|
+
stop_time: datetime | None = None,
|
|
120
|
+
duration: str | None = None,
|
|
121
|
+
**kwargs: Any,
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Initialize an ExecutionRecord.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
execution_rid: Resource Identifier of the execution.
|
|
127
|
+
workflow: The associated Workflow object (bound to catalog).
|
|
128
|
+
status: Current execution status.
|
|
129
|
+
description: Description of the execution.
|
|
130
|
+
start_time: When the execution started.
|
|
131
|
+
stop_time: When the execution completed.
|
|
132
|
+
duration: Duration string.
|
|
133
|
+
**kwargs: Additional arguments (including _ml_instance for internal use).
|
|
134
|
+
"""
|
|
135
|
+
super().__init__(
|
|
136
|
+
execution_rid=execution_rid,
|
|
137
|
+
start_time=start_time,
|
|
138
|
+
stop_time=stop_time,
|
|
139
|
+
duration=duration,
|
|
140
|
+
)
|
|
141
|
+
self._workflow = workflow
|
|
142
|
+
self._status = status
|
|
143
|
+
self._description = description
|
|
144
|
+
# Handle _ml_instance passed as keyword arg
|
|
145
|
+
if "_ml_instance" in kwargs:
|
|
146
|
+
self._ml_instance = kwargs["_ml_instance"]
|
|
147
|
+
if "_logger" in kwargs:
|
|
148
|
+
self._logger = kwargs["_logger"]
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def workflow(self) -> "Workflow | None":
|
|
152
|
+
"""Get the associated workflow.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
The Workflow object, or None if no workflow is associated.
|
|
156
|
+
"""
|
|
157
|
+
return self._workflow
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def workflow_rid(self) -> RID | None:
|
|
161
|
+
"""Get the RID of the associated workflow.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
The workflow RID, or None if no workflow is associated.
|
|
165
|
+
"""
|
|
166
|
+
return self._workflow.rid if self._workflow else None
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def status(self) -> Status:
|
|
170
|
+
"""Get the current execution status.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Status: The current status (Created, Running, Completed, Failed, etc.).
|
|
174
|
+
"""
|
|
175
|
+
return self._status
|
|
176
|
+
|
|
177
|
+
@status.setter
|
|
178
|
+
def status(self, value: Status) -> None:
|
|
179
|
+
"""Set the execution status.
|
|
180
|
+
|
|
181
|
+
When bound to a writable catalog, this updates the catalog record.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
value: The new status value.
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
DerivaMLException: If the catalog is read-only (snapshot).
|
|
188
|
+
"""
|
|
189
|
+
if self._ml_instance is not None:
|
|
190
|
+
self._update_status_in_catalog(value)
|
|
191
|
+
self._status = value
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def description(self) -> str | None:
|
|
195
|
+
"""Get the execution description.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
The description string, or None if not set.
|
|
199
|
+
"""
|
|
200
|
+
return self._description
|
|
201
|
+
|
|
202
|
+
@description.setter
|
|
203
|
+
def description(self, value: str | None) -> None:
|
|
204
|
+
"""Set the execution description.
|
|
205
|
+
|
|
206
|
+
When bound to a writable catalog, this updates the catalog record.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
value: The new description value.
|
|
210
|
+
|
|
211
|
+
Raises:
|
|
212
|
+
DerivaMLException: If the catalog is read-only (snapshot).
|
|
213
|
+
"""
|
|
214
|
+
if self._ml_instance is not None:
|
|
215
|
+
self._update_description_in_catalog(value)
|
|
216
|
+
self._description = value
|
|
217
|
+
|
|
218
|
+
def _check_writable_catalog(self, operation: str) -> None:
|
|
219
|
+
"""Check that the catalog is writable and execution is registered.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
operation: Description of the operation being attempted.
|
|
223
|
+
|
|
224
|
+
Raises:
|
|
225
|
+
DerivaMLException: If the execution is not registered (no RID),
|
|
226
|
+
or if the catalog is read-only (a snapshot).
|
|
227
|
+
"""
|
|
228
|
+
import importlib
|
|
229
|
+
_deriva_core = importlib.import_module("deriva.core")
|
|
230
|
+
ErmrestSnapshot = _deriva_core.ErmrestSnapshot
|
|
231
|
+
|
|
232
|
+
if self.execution_rid is None:
|
|
233
|
+
raise DerivaMLException(
|
|
234
|
+
f"Cannot {operation}: Execution is not registered in the catalog (no RID)"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if self._ml_instance is None:
|
|
238
|
+
raise DerivaMLException(
|
|
239
|
+
f"Cannot {operation}: ExecutionRecord is not bound to a catalog"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
if isinstance(self._ml_instance.catalog, ErmrestSnapshot):
|
|
243
|
+
raise DerivaMLException(
|
|
244
|
+
f"Cannot {operation} on a read-only catalog snapshot. "
|
|
245
|
+
"Use a writable catalog connection instead."
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def _update_status_in_catalog(self, new_status: Status, status_detail: str = "") -> None:
|
|
249
|
+
"""Update the status field in the catalog.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
new_status: The new status value.
|
|
253
|
+
status_detail: Optional detail message for the status.
|
|
254
|
+
|
|
255
|
+
Raises:
|
|
256
|
+
DerivaMLException: If the catalog is read-only or not connected.
|
|
257
|
+
"""
|
|
258
|
+
self._check_writable_catalog("update status")
|
|
259
|
+
|
|
260
|
+
pb = self._ml_instance.pathBuilder()
|
|
261
|
+
execution_path = pb.schemas[self._ml_instance.ml_schema].Execution
|
|
262
|
+
update_data = {"RID": self.execution_rid, "Status": new_status.value}
|
|
263
|
+
if status_detail:
|
|
264
|
+
update_data["Status_Detail"] = status_detail
|
|
265
|
+
execution_path.update([update_data])
|
|
266
|
+
|
|
267
|
+
def _update_description_in_catalog(self, new_description: str | None) -> None:
|
|
268
|
+
"""Update the description field in the catalog.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
new_description: The new description value.
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
DerivaMLException: If the catalog is read-only or not connected.
|
|
275
|
+
"""
|
|
276
|
+
self._check_writable_catalog("update description")
|
|
277
|
+
|
|
278
|
+
pb = self._ml_instance.pathBuilder()
|
|
279
|
+
execution_path = pb.schemas[self._ml_instance.ml_schema].Execution
|
|
280
|
+
execution_path.update([{"RID": self.execution_rid, "Description": new_description}])
|
|
281
|
+
|
|
282
|
+
def update_status(self, status: Status, status_detail: str = "") -> None:
|
|
283
|
+
"""Update execution status with an optional detail message.
|
|
284
|
+
|
|
285
|
+
This method updates both the Status and Status_Detail columns in the
|
|
286
|
+
catalog. Use this when you want to include a detail message, otherwise
|
|
287
|
+
you can simply assign to the status property.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
status: The new status value.
|
|
291
|
+
status_detail: Optional detail message describing the status.
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
DerivaMLException: If the catalog is read-only or not connected.
|
|
295
|
+
|
|
296
|
+
Example:
|
|
297
|
+
>>> record.update_status(Status.failed, "Network timeout during data transfer")
|
|
298
|
+
"""
|
|
299
|
+
if self._ml_instance is not None:
|
|
300
|
+
self._update_status_in_catalog(status, status_detail)
|
|
301
|
+
self._status = status
|
|
302
|
+
|
|
303
|
+
def is_nested(self) -> bool:
|
|
304
|
+
"""Check if this execution has any parent executions.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
True if this execution is nested under another execution.
|
|
308
|
+
|
|
309
|
+
Example:
|
|
310
|
+
>>> if record.is_nested():
|
|
311
|
+
... print("This is a child execution")
|
|
312
|
+
"""
|
|
313
|
+
return len(list(self.list_parent_executions())) > 0
|
|
314
|
+
|
|
315
|
+
def is_parent(self) -> bool:
|
|
316
|
+
"""Check if this execution has any child executions.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
True if this execution has nested child executions.
|
|
320
|
+
|
|
321
|
+
Example:
|
|
322
|
+
>>> if record.is_parent():
|
|
323
|
+
... print("This execution has children")
|
|
324
|
+
"""
|
|
325
|
+
return len(list(self.list_nested_executions())) > 0
|
|
326
|
+
|
|
327
|
+
def list_nested_executions(
|
|
328
|
+
self, recurse: bool = False, _visited: set[RID] | None = None
|
|
329
|
+
) -> Iterable["ExecutionRecord"]:
|
|
330
|
+
"""List child executions nested under this execution.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
recurse: If True, recursively list all descendants.
|
|
334
|
+
_visited: Internal parameter to track visited nodes and prevent cycles.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Iterable of ExecutionRecord objects for child executions.
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
DerivaMLException: If not bound to a catalog.
|
|
341
|
+
|
|
342
|
+
Example:
|
|
343
|
+
>>> for child in record.list_nested_executions():
|
|
344
|
+
... print(f"Child: {child.execution_rid}")
|
|
345
|
+
>>> # Get all descendants
|
|
346
|
+
>>> for desc in record.list_nested_executions(recurse=True):
|
|
347
|
+
... print(f"Descendant: {desc.execution_rid}")
|
|
348
|
+
"""
|
|
349
|
+
if self._ml_instance is None:
|
|
350
|
+
raise DerivaMLException("ExecutionRecord is not bound to a catalog")
|
|
351
|
+
|
|
352
|
+
# Track visited nodes to prevent infinite loops
|
|
353
|
+
if _visited is None:
|
|
354
|
+
_visited = set()
|
|
355
|
+
if self.execution_rid in _visited:
|
|
356
|
+
return
|
|
357
|
+
_visited.add(self.execution_rid)
|
|
358
|
+
|
|
359
|
+
pb = self._ml_instance.pathBuilder()
|
|
360
|
+
ml_schema = self._ml_instance.ml_schema
|
|
361
|
+
exec_exec_path = pb.schemas[ml_schema].Execution_Execution
|
|
362
|
+
execution_path = pb.schemas[ml_schema].Execution
|
|
363
|
+
|
|
364
|
+
# Query for child executions (Execution column = parent, Nested_Execution = child)
|
|
365
|
+
records = list(
|
|
366
|
+
exec_exec_path
|
|
367
|
+
.filter(exec_exec_path.Execution == self.execution_rid)
|
|
368
|
+
.link(execution_path, on=(exec_exec_path.Nested_Execution == execution_path.RID))
|
|
369
|
+
.entities()
|
|
370
|
+
.fetch()
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
for record in records:
|
|
374
|
+
# Look up the workflow if present
|
|
375
|
+
workflow_rid = record.get("Workflow")
|
|
376
|
+
workflow = self._ml_instance.lookup_workflow(workflow_rid) if workflow_rid else None
|
|
377
|
+
|
|
378
|
+
child = ExecutionRecord(
|
|
379
|
+
execution_rid=record["RID"],
|
|
380
|
+
workflow=workflow,
|
|
381
|
+
status=Status(record.get("Status", "Created")),
|
|
382
|
+
description=record.get("Description"),
|
|
383
|
+
_ml_instance=self._ml_instance,
|
|
384
|
+
_logger=self._logger,
|
|
385
|
+
)
|
|
386
|
+
yield child
|
|
387
|
+
if recurse:
|
|
388
|
+
yield from child.list_nested_executions(recurse=True, _visited=_visited)
|
|
389
|
+
|
|
390
|
+
def list_parent_executions(
|
|
391
|
+
self, recurse: bool = False, _visited: set[RID] | None = None
|
|
392
|
+
) -> Iterable["ExecutionRecord"]:
|
|
393
|
+
"""List parent executions that this execution is nested under.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
recurse: If True, recursively list all ancestors.
|
|
397
|
+
_visited: Internal parameter to track visited nodes and prevent cycles.
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Iterable of ExecutionRecord objects for parent executions.
|
|
401
|
+
|
|
402
|
+
Raises:
|
|
403
|
+
DerivaMLException: If not bound to a catalog.
|
|
404
|
+
|
|
405
|
+
Example:
|
|
406
|
+
>>> for parent in record.list_parent_executions():
|
|
407
|
+
... print(f"Parent: {parent.execution_rid}")
|
|
408
|
+
"""
|
|
409
|
+
if self._ml_instance is None:
|
|
410
|
+
raise DerivaMLException("ExecutionRecord is not bound to a catalog")
|
|
411
|
+
|
|
412
|
+
# Track visited nodes to prevent infinite loops
|
|
413
|
+
if _visited is None:
|
|
414
|
+
_visited = set()
|
|
415
|
+
if self.execution_rid in _visited:
|
|
416
|
+
return
|
|
417
|
+
_visited.add(self.execution_rid)
|
|
418
|
+
|
|
419
|
+
pb = self._ml_instance.pathBuilder()
|
|
420
|
+
ml_schema = self._ml_instance.ml_schema
|
|
421
|
+
exec_exec_path = pb.schemas[ml_schema].Execution_Execution
|
|
422
|
+
execution_path = pb.schemas[ml_schema].Execution
|
|
423
|
+
|
|
424
|
+
# Query for parent executions (Execution column = parent, Nested_Execution = child)
|
|
425
|
+
records = list(
|
|
426
|
+
exec_exec_path
|
|
427
|
+
.filter(exec_exec_path.Nested_Execution == self.execution_rid)
|
|
428
|
+
.link(execution_path, on=(exec_exec_path.Execution == execution_path.RID))
|
|
429
|
+
.entities()
|
|
430
|
+
.fetch()
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
for record in records:
|
|
434
|
+
# Look up the workflow if present
|
|
435
|
+
workflow_rid = record.get("Workflow")
|
|
436
|
+
workflow = self._ml_instance.lookup_workflow(workflow_rid) if workflow_rid else None
|
|
437
|
+
|
|
438
|
+
parent = ExecutionRecord(
|
|
439
|
+
execution_rid=record["RID"],
|
|
440
|
+
workflow=workflow,
|
|
441
|
+
status=Status(record.get("Status", "Created")),
|
|
442
|
+
description=record.get("Description"),
|
|
443
|
+
_ml_instance=self._ml_instance,
|
|
444
|
+
_logger=self._logger,
|
|
445
|
+
)
|
|
446
|
+
yield parent
|
|
447
|
+
if recurse:
|
|
448
|
+
yield from parent.list_parent_executions(recurse=True, _visited=_visited)
|
|
449
|
+
|
|
450
|
+
def add_nested_execution(self, child: "ExecutionRecord | RID", sequence: int | None = None) -> None:
|
|
451
|
+
"""Add a child execution nested under this execution.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
child: The child ExecutionRecord or its RID.
|
|
455
|
+
sequence: Optional sequence number for ordering children.
|
|
456
|
+
|
|
457
|
+
Raises:
|
|
458
|
+
DerivaMLException: If the catalog is read-only or not connected.
|
|
459
|
+
|
|
460
|
+
Example:
|
|
461
|
+
>>> parent_record.add_nested_execution(child_record)
|
|
462
|
+
>>> # Or by RID
|
|
463
|
+
>>> parent_record.add_nested_execution("3-XYZ9", sequence=1)
|
|
464
|
+
"""
|
|
465
|
+
self._check_writable_catalog("add nested execution")
|
|
466
|
+
|
|
467
|
+
child_rid = child.execution_rid if isinstance(child, ExecutionRecord) else child
|
|
468
|
+
|
|
469
|
+
pb = self._ml_instance.pathBuilder()
|
|
470
|
+
exec_exec_path = pb.schemas[self._ml_instance.ml_schema].Execution_Execution
|
|
471
|
+
|
|
472
|
+
record = {
|
|
473
|
+
"Execution": self.execution_rid,
|
|
474
|
+
"Nested_Execution": child_rid,
|
|
475
|
+
}
|
|
476
|
+
if sequence is not None:
|
|
477
|
+
record["Sequence"] = sequence
|
|
478
|
+
|
|
479
|
+
exec_exec_path.insert([record])
|
|
480
|
+
|
|
481
|
+
def list_input_datasets(self) -> list["Dataset"]:
|
|
482
|
+
"""List datasets that were input to this execution.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
List of Dataset objects that were used as inputs to this execution.
|
|
486
|
+
|
|
487
|
+
Raises:
|
|
488
|
+
DerivaMLException: If not bound to a catalog.
|
|
489
|
+
|
|
490
|
+
Example:
|
|
491
|
+
>>> for ds in record.list_input_datasets():
|
|
492
|
+
... print(f"Dataset: {ds.dataset_rid} version {ds.current_version}")
|
|
493
|
+
"""
|
|
494
|
+
if self._ml_instance is None:
|
|
495
|
+
raise DerivaMLException("ExecutionRecord is not bound to a catalog")
|
|
496
|
+
|
|
497
|
+
pb = self._ml_instance.pathBuilder()
|
|
498
|
+
dataset_exec_path = pb.schemas[self._ml_instance.ml_schema].Dataset_Execution
|
|
499
|
+
|
|
500
|
+
records = list(
|
|
501
|
+
dataset_exec_path
|
|
502
|
+
.filter(dataset_exec_path.Execution == self.execution_rid)
|
|
503
|
+
.entities()
|
|
504
|
+
.fetch()
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Look up each dataset and return Dataset objects
|
|
508
|
+
datasets = []
|
|
509
|
+
for record in records:
|
|
510
|
+
dataset_rid = record.get("Dataset")
|
|
511
|
+
if dataset_rid:
|
|
512
|
+
datasets.append(self._ml_instance.lookup_dataset(dataset_rid))
|
|
513
|
+
return datasets
|
|
514
|
+
|
|
515
|
+
def list_assets(self, asset_role: str | None = None) -> list["Asset"]:
|
|
516
|
+
"""List assets associated with this execution.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
asset_role: Optional filter for asset role ('Input' or 'Output').
|
|
520
|
+
If None, returns all assets associated with this execution.
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
List of Asset objects associated with this execution.
|
|
524
|
+
|
|
525
|
+
Raises:
|
|
526
|
+
DerivaMLException: If not bound to a catalog.
|
|
527
|
+
|
|
528
|
+
Example:
|
|
529
|
+
>>> # Get all input assets
|
|
530
|
+
>>> for asset in record.list_assets(asset_role="Input"):
|
|
531
|
+
... print(f"Input Asset: {asset.asset_rid} - {asset.filename}")
|
|
532
|
+
>>> # Get all output assets
|
|
533
|
+
>>> for asset in record.list_assets(asset_role="Output"):
|
|
534
|
+
... print(f"Output Asset: {asset.asset_rid}")
|
|
535
|
+
"""
|
|
536
|
+
from deriva_ml.asset.asset import Asset
|
|
537
|
+
|
|
538
|
+
if self._ml_instance is None:
|
|
539
|
+
raise DerivaMLException("ExecutionRecord is not bound to a catalog")
|
|
540
|
+
|
|
541
|
+
# Find all *_Execution association tables and query them
|
|
542
|
+
# Search both the domain schemas and the ML schema
|
|
543
|
+
assets: list[Asset] = []
|
|
544
|
+
schemas_to_search = [*self._ml_instance.domain_schemas, self._ml_instance.ml_schema]
|
|
545
|
+
|
|
546
|
+
for schema_name in schemas_to_search:
|
|
547
|
+
for table in self._ml_instance.model.model.schemas[schema_name].tables.values():
|
|
548
|
+
if table.name.endswith("_Execution") and table.name != "Dataset_Execution":
|
|
549
|
+
# Extract asset table name from association table name
|
|
550
|
+
# e.g., "Image_Execution" -> "Image", "Execution_Asset_Execution" -> "Execution_Asset"
|
|
551
|
+
asset_table_name = table.name.replace("_Execution", "")
|
|
552
|
+
|
|
553
|
+
pb = self._ml_instance.pathBuilder()
|
|
554
|
+
table_path = pb.schemas[schema_name].tables[table.name]
|
|
555
|
+
try:
|
|
556
|
+
query = table_path.filter(table_path.Execution == self.execution_rid)
|
|
557
|
+
if asset_role:
|
|
558
|
+
query = query.filter(table_path.Asset_Role == asset_role)
|
|
559
|
+
records = list(query.entities().fetch())
|
|
560
|
+
|
|
561
|
+
# Look up each asset and convert to Asset object
|
|
562
|
+
for record in records:
|
|
563
|
+
asset_rid = record.get(asset_table_name)
|
|
564
|
+
if asset_rid:
|
|
565
|
+
try:
|
|
566
|
+
assets.append(self._ml_instance.lookup_asset(asset_rid))
|
|
567
|
+
except Exception:
|
|
568
|
+
pass # Asset might not exist or be inaccessible
|
|
569
|
+
except Exception:
|
|
570
|
+
# Table might not have expected columns
|
|
571
|
+
pass
|
|
572
|
+
return assets
|
|
573
|
+
|
|
574
|
+
def __str__(self) -> str:
|
|
575
|
+
"""Return string representation of the execution record."""
|
|
576
|
+
lines = [
|
|
577
|
+
f"ExecutionRecord(rid={self.execution_rid})",
|
|
578
|
+
f" workflow_rid: {self.workflow_rid}",
|
|
579
|
+
f" status: {self.status.value}",
|
|
580
|
+
f" description: {self.description}",
|
|
581
|
+
]
|
|
582
|
+
if self.start_time:
|
|
583
|
+
lines.append(f" start_time: {self.start_time}")
|
|
584
|
+
if self.stop_time:
|
|
585
|
+
lines.append(f" stop_time: {self.stop_time}")
|
|
586
|
+
if self.duration:
|
|
587
|
+
lines.append(f" duration: {self.duration}")
|
|
588
|
+
return "\n".join(lines)
|
|
589
|
+
|
|
590
|
+
def __repr__(self) -> str:
|
|
591
|
+
"""Return repr of the execution record."""
|
|
592
|
+
return f"ExecutionRecord(execution_rid={self.execution_rid!r}, status={self.status!r})"
|