deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,6 @@ from dataclasses import dataclass
28
28
  from pathlib import Path
29
29
  from typing import Any
30
30
 
31
- from hydra_zen import builds
32
31
  from omegaconf import DictConfig
33
32
  from pydantic import BaseModel, ConfigDict, Field, field_validator
34
33
 
@@ -49,29 +48,33 @@ class ExecutionConfiguration(BaseModel):
49
48
  - version: Version to use
50
49
  - materialize: Whether to extract dataset contents
51
50
  assets (list[RID]): Resource Identifiers of required input assets.
52
- workflow (RID | Workflow): Workflow definition or its Resource Identifier.
53
- parameters (dict[str, Any] | Path): Execution parameters, either as:
54
- - Dictionary of parameter values
55
- - Path to JSON file containing parameters
51
+ workflow (Workflow | None): Workflow object defining the computational process.
52
+ Use ``ml.lookup_workflow(rid)`` or ``ml.lookup_workflow_by_url(url)`` to get
53
+ a Workflow object from a RID or URL.
56
54
  description (str): Description of execution purpose (supports Markdown).
57
55
  argv (list[str]): Command line arguments used to start execution.
56
+ config_choices (dict[str, str]): Hydra config group choices that were selected.
57
+ Maps group names to selected config names (e.g., {"model_config": "cifar10_quick"}).
58
+ Automatically populated by run_model() and get_notebook_configuration().
58
59
 
59
60
  Example:
61
+ >>> # Look up workflow by RID or URL first
62
+ >>> workflow = ml.lookup_workflow("2-ABC1")
60
63
  >>> config = ExecutionConfiguration(
61
- ... workflow=Workflow.create_workflow("analysis", "python_script"),
64
+ ... workflow=workflow,
62
65
  ... datasets=[
63
66
  ... DatasetSpec(rid="1-abc123", version="1.0.0", materialize=True)
64
67
  ... ],
65
- ... parameters={"threshold": 0.5, "max_iterations": 100},
66
68
  ... description="Process RNA sequence data"
67
69
  ... )
68
70
  """
69
71
 
70
72
  datasets: list[DatasetSpec] = []
71
73
  assets: list[RID] = []
72
- workflow: RID | Workflow | None = None
74
+ workflow: Workflow | None = None
73
75
  description: str = ""
74
76
  argv: list[str] = Field(default_factory=lambda: sys.argv)
77
+ config_choices: dict[str, str] = Field(default_factory=dict)
75
78
 
76
79
  model_config = ConfigDict(arbitrary_types_allowed=True)
77
80
 
@@ -135,6 +138,21 @@ class ExecutionConfiguration(BaseModel):
135
138
 
136
139
  @dataclass
137
140
  class AssetRID(str):
141
+ """A string subclass representing an asset Resource ID with optional description.
142
+
143
+ AssetRID extends str so it can be used directly wherever a string RID is expected,
144
+ while optionally carrying a description for documentation purposes.
145
+
146
+ Attributes:
147
+ rid: The Resource ID string identifying the asset in Deriva.
148
+ description: Optional human-readable description of the asset.
149
+
150
+ Example:
151
+ >>> asset = AssetRID("3RA", "Pretrained model weights")
152
+ >>> print(asset) # "3RA"
153
+ >>> print(asset.description) # "Pretrained model weights"
154
+ """
155
+
138
156
  rid: str
139
157
  description: str = ""
140
158
 
@@ -142,6 +160,3 @@ class AssetRID(str):
142
160
  obj = super().__new__(cls, rid)
143
161
  obj.description = description
144
162
  return obj
145
-
146
-
147
- AssetRIDConfig = builds(AssetRID, populate_full_signature=True)
@@ -0,0 +1,592 @@
1
+ """ExecutionRecord - Represents a catalog record for an execution.
2
+
3
+ This module provides the ExecutionRecord class which represents the state of an
4
+ execution record in the Deriva catalog. It provides getters and setters for
5
+ mutable properties that automatically sync changes to the catalog.
6
+
7
+ The ExecutionRecord is separate from the Execution class which manages the
8
+ execution lifecycle (start, stop, asset uploads, etc.). This separation allows
9
+ for lightweight lookups of execution records without initializing the full
10
+ execution environment.
11
+
12
+ Example:
13
+ Look up an execution record and update its description::
14
+
15
+ >>> record = ml.lookup_execution("2-ABC1")
16
+ >>> print(record.status)
17
+ Status.running
18
+ >>> record.description = "Updated analysis description"
19
+ >>> # The change is immediately written to the catalog
20
+
21
+ Query nested executions::
22
+
23
+ >>> children = record.list_nested_executions()
24
+ >>> for child in children:
25
+ ... print(f"{child.execution_rid}: {child.status}")
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import logging
31
+ from datetime import datetime
32
+ from typing import TYPE_CHECKING, Any, Iterable
33
+
34
+ from pydantic import BaseModel, ConfigDict, PrivateAttr
35
+
36
+ from deriva_ml.core.definitions import RID, Status
37
+ from deriva_ml.core.exceptions import DerivaMLException
38
+
39
+ if TYPE_CHECKING:
40
+ from deriva_ml.asset.asset import Asset
41
+ from deriva_ml.dataset.dataset import Dataset
42
+ from deriva_ml.execution.workflow import Workflow
43
+ from deriva_ml.interfaces import DerivaMLCatalog
44
+
45
+
46
+ class ExecutionRecord(BaseModel):
47
+ """Represents a catalog record for an execution.
48
+
49
+ An ExecutionRecord provides access to the persistent state of an execution
50
+ stored in the Deriva catalog. When bound to a writable catalog, its mutable
51
+ properties (status, description) can be set and changes are automatically
52
+ synced to the catalog.
53
+
54
+ This class is separate from the Execution class which manages the execution
55
+ lifecycle. Use ExecutionRecord for lightweight queries and updates to
56
+ execution metadata. Use Execution for running computations with datasets
57
+ and assets.
58
+
59
+ Attributes:
60
+ execution_rid (RID): Resource Identifier of the execution record.
61
+ workflow (Workflow | None): The associated workflow object, bound to catalog.
62
+ status (Status): Current execution status (Created, Running, Completed, Failed).
63
+ Setting this property updates the catalog.
64
+ description (str | None): Description of the execution. Setting this
65
+ property updates the catalog.
66
+ start_time (datetime | None): When the execution started (read-only).
67
+ stop_time (datetime | None): When the execution completed (read-only).
68
+ duration (str | None): Duration string from catalog (read-only).
69
+
70
+ Example:
71
+ Look up an execution and query its state::
72
+
73
+ >>> record = ml.lookup_execution("2-ABC1")
74
+ >>> print(f"Status: {record.status}")
75
+ >>> print(f"Workflow: {record.workflow.name}")
76
+ >>> print(f"Started: {record.start_time}")
77
+
78
+ Update mutable properties::
79
+
80
+ >>> record.status = Status.completed
81
+ >>> record.description = "Analysis completed successfully"
82
+
83
+ Query relationships::
84
+
85
+ >>> # Get child executions
86
+ >>> children = record.list_nested_executions()
87
+ >>> # Get parent executions
88
+ >>> parents = record.list_parent_executions()
89
+ >>> # Get input datasets
90
+ >>> datasets = record.list_input_datasets()
91
+
92
+ Attempting to update on a read-only catalog raises an error::
93
+
94
+ >>> snapshot = ml.catalog_snapshot("2023-01-15T10:30:00")
95
+ >>> record = snapshot.lookup_execution("2-ABC1")
96
+ >>> record.status = Status.completed # Raises DerivaMLException
97
+ """
98
+
99
+ model_config = ConfigDict(arbitrary_types_allowed=True)
100
+
101
+ execution_rid: RID
102
+ _workflow: "Workflow | None" = PrivateAttr(default=None)
103
+ _status: Status = PrivateAttr(default=Status.created)
104
+ _description: str | None = PrivateAttr(default=None)
105
+ start_time: datetime | None = None
106
+ stop_time: datetime | None = None
107
+ duration: str | None = None
108
+
109
+ _ml_instance: "DerivaMLCatalog | None" = PrivateAttr(default=None)
110
+ _logger: logging.Logger = PrivateAttr(default=None)
111
+
112
+ def __init__(
113
+ self,
114
+ execution_rid: RID,
115
+ workflow: "Workflow | None" = None,
116
+ status: Status = Status.created,
117
+ description: str | None = None,
118
+ start_time: datetime | None = None,
119
+ stop_time: datetime | None = None,
120
+ duration: str | None = None,
121
+ **kwargs: Any,
122
+ ) -> None:
123
+ """Initialize an ExecutionRecord.
124
+
125
+ Args:
126
+ execution_rid: Resource Identifier of the execution.
127
+ workflow: The associated Workflow object (bound to catalog).
128
+ status: Current execution status.
129
+ description: Description of the execution.
130
+ start_time: When the execution started.
131
+ stop_time: When the execution completed.
132
+ duration: Duration string.
133
+ **kwargs: Additional arguments (including _ml_instance for internal use).
134
+ """
135
+ super().__init__(
136
+ execution_rid=execution_rid,
137
+ start_time=start_time,
138
+ stop_time=stop_time,
139
+ duration=duration,
140
+ )
141
+ self._workflow = workflow
142
+ self._status = status
143
+ self._description = description
144
+ # Handle _ml_instance passed as keyword arg
145
+ if "_ml_instance" in kwargs:
146
+ self._ml_instance = kwargs["_ml_instance"]
147
+ if "_logger" in kwargs:
148
+ self._logger = kwargs["_logger"]
149
+
150
+ @property
151
+ def workflow(self) -> "Workflow | None":
152
+ """Get the associated workflow.
153
+
154
+ Returns:
155
+ The Workflow object, or None if no workflow is associated.
156
+ """
157
+ return self._workflow
158
+
159
+ @property
160
+ def workflow_rid(self) -> RID | None:
161
+ """Get the RID of the associated workflow.
162
+
163
+ Returns:
164
+ The workflow RID, or None if no workflow is associated.
165
+ """
166
+ return self._workflow.rid if self._workflow else None
167
+
168
+ @property
169
+ def status(self) -> Status:
170
+ """Get the current execution status.
171
+
172
+ Returns:
173
+ Status: The current status (Created, Running, Completed, Failed, etc.).
174
+ """
175
+ return self._status
176
+
177
+ @status.setter
178
+ def status(self, value: Status) -> None:
179
+ """Set the execution status.
180
+
181
+ When bound to a writable catalog, this updates the catalog record.
182
+
183
+ Args:
184
+ value: The new status value.
185
+
186
+ Raises:
187
+ DerivaMLException: If the catalog is read-only (snapshot).
188
+ """
189
+ if self._ml_instance is not None:
190
+ self._update_status_in_catalog(value)
191
+ self._status = value
192
+
193
+ @property
194
+ def description(self) -> str | None:
195
+ """Get the execution description.
196
+
197
+ Returns:
198
+ The description string, or None if not set.
199
+ """
200
+ return self._description
201
+
202
+ @description.setter
203
+ def description(self, value: str | None) -> None:
204
+ """Set the execution description.
205
+
206
+ When bound to a writable catalog, this updates the catalog record.
207
+
208
+ Args:
209
+ value: The new description value.
210
+
211
+ Raises:
212
+ DerivaMLException: If the catalog is read-only (snapshot).
213
+ """
214
+ if self._ml_instance is not None:
215
+ self._update_description_in_catalog(value)
216
+ self._description = value
217
+
218
+ def _check_writable_catalog(self, operation: str) -> None:
219
+ """Check that the catalog is writable and execution is registered.
220
+
221
+ Args:
222
+ operation: Description of the operation being attempted.
223
+
224
+ Raises:
225
+ DerivaMLException: If the execution is not registered (no RID),
226
+ or if the catalog is read-only (a snapshot).
227
+ """
228
+ import importlib
229
+ _deriva_core = importlib.import_module("deriva.core")
230
+ ErmrestSnapshot = _deriva_core.ErmrestSnapshot
231
+
232
+ if self.execution_rid is None:
233
+ raise DerivaMLException(
234
+ f"Cannot {operation}: Execution is not registered in the catalog (no RID)"
235
+ )
236
+
237
+ if self._ml_instance is None:
238
+ raise DerivaMLException(
239
+ f"Cannot {operation}: ExecutionRecord is not bound to a catalog"
240
+ )
241
+
242
+ if isinstance(self._ml_instance.catalog, ErmrestSnapshot):
243
+ raise DerivaMLException(
244
+ f"Cannot {operation} on a read-only catalog snapshot. "
245
+ "Use a writable catalog connection instead."
246
+ )
247
+
248
+ def _update_status_in_catalog(self, new_status: Status, status_detail: str = "") -> None:
249
+ """Update the status field in the catalog.
250
+
251
+ Args:
252
+ new_status: The new status value.
253
+ status_detail: Optional detail message for the status.
254
+
255
+ Raises:
256
+ DerivaMLException: If the catalog is read-only or not connected.
257
+ """
258
+ self._check_writable_catalog("update status")
259
+
260
+ pb = self._ml_instance.pathBuilder()
261
+ execution_path = pb.schemas[self._ml_instance.ml_schema].Execution
262
+ update_data = {"RID": self.execution_rid, "Status": new_status.value}
263
+ if status_detail:
264
+ update_data["Status_Detail"] = status_detail
265
+ execution_path.update([update_data])
266
+
267
+ def _update_description_in_catalog(self, new_description: str | None) -> None:
268
+ """Update the description field in the catalog.
269
+
270
+ Args:
271
+ new_description: The new description value.
272
+
273
+ Raises:
274
+ DerivaMLException: If the catalog is read-only or not connected.
275
+ """
276
+ self._check_writable_catalog("update description")
277
+
278
+ pb = self._ml_instance.pathBuilder()
279
+ execution_path = pb.schemas[self._ml_instance.ml_schema].Execution
280
+ execution_path.update([{"RID": self.execution_rid, "Description": new_description}])
281
+
282
+ def update_status(self, status: Status, status_detail: str = "") -> None:
283
+ """Update execution status with an optional detail message.
284
+
285
+ This method updates both the Status and Status_Detail columns in the
286
+ catalog. Use this when you want to include a detail message, otherwise
287
+ you can simply assign to the status property.
288
+
289
+ Args:
290
+ status: The new status value.
291
+ status_detail: Optional detail message describing the status.
292
+
293
+ Raises:
294
+ DerivaMLException: If the catalog is read-only or not connected.
295
+
296
+ Example:
297
+ >>> record.update_status(Status.failed, "Network timeout during data transfer")
298
+ """
299
+ if self._ml_instance is not None:
300
+ self._update_status_in_catalog(status, status_detail)
301
+ self._status = status
302
+
303
+ def is_nested(self) -> bool:
304
+ """Check if this execution has any parent executions.
305
+
306
+ Returns:
307
+ True if this execution is nested under another execution.
308
+
309
+ Example:
310
+ >>> if record.is_nested():
311
+ ... print("This is a child execution")
312
+ """
313
+ return len(list(self.list_parent_executions())) > 0
314
+
315
+ def is_parent(self) -> bool:
316
+ """Check if this execution has any child executions.
317
+
318
+ Returns:
319
+ True if this execution has nested child executions.
320
+
321
+ Example:
322
+ >>> if record.is_parent():
323
+ ... print("This execution has children")
324
+ """
325
+ return len(list(self.list_nested_executions())) > 0
326
+
327
+ def list_nested_executions(
328
+ self, recurse: bool = False, _visited: set[RID] | None = None
329
+ ) -> Iterable["ExecutionRecord"]:
330
+ """List child executions nested under this execution.
331
+
332
+ Args:
333
+ recurse: If True, recursively list all descendants.
334
+ _visited: Internal parameter to track visited nodes and prevent cycles.
335
+
336
+ Returns:
337
+ Iterable of ExecutionRecord objects for child executions.
338
+
339
+ Raises:
340
+ DerivaMLException: If not bound to a catalog.
341
+
342
+ Example:
343
+ >>> for child in record.list_nested_executions():
344
+ ... print(f"Child: {child.execution_rid}")
345
+ >>> # Get all descendants
346
+ >>> for desc in record.list_nested_executions(recurse=True):
347
+ ... print(f"Descendant: {desc.execution_rid}")
348
+ """
349
+ if self._ml_instance is None:
350
+ raise DerivaMLException("ExecutionRecord is not bound to a catalog")
351
+
352
+ # Track visited nodes to prevent infinite loops
353
+ if _visited is None:
354
+ _visited = set()
355
+ if self.execution_rid in _visited:
356
+ return
357
+ _visited.add(self.execution_rid)
358
+
359
+ pb = self._ml_instance.pathBuilder()
360
+ ml_schema = self._ml_instance.ml_schema
361
+ exec_exec_path = pb.schemas[ml_schema].Execution_Execution
362
+ execution_path = pb.schemas[ml_schema].Execution
363
+
364
+ # Query for child executions (Execution column = parent, Nested_Execution = child)
365
+ records = list(
366
+ exec_exec_path
367
+ .filter(exec_exec_path.Execution == self.execution_rid)
368
+ .link(execution_path, on=(exec_exec_path.Nested_Execution == execution_path.RID))
369
+ .entities()
370
+ .fetch()
371
+ )
372
+
373
+ for record in records:
374
+ # Look up the workflow if present
375
+ workflow_rid = record.get("Workflow")
376
+ workflow = self._ml_instance.lookup_workflow(workflow_rid) if workflow_rid else None
377
+
378
+ child = ExecutionRecord(
379
+ execution_rid=record["RID"],
380
+ workflow=workflow,
381
+ status=Status(record.get("Status", "Created")),
382
+ description=record.get("Description"),
383
+ _ml_instance=self._ml_instance,
384
+ _logger=self._logger,
385
+ )
386
+ yield child
387
+ if recurse:
388
+ yield from child.list_nested_executions(recurse=True, _visited=_visited)
389
+
390
+ def list_parent_executions(
391
+ self, recurse: bool = False, _visited: set[RID] | None = None
392
+ ) -> Iterable["ExecutionRecord"]:
393
+ """List parent executions that this execution is nested under.
394
+
395
+ Args:
396
+ recurse: If True, recursively list all ancestors.
397
+ _visited: Internal parameter to track visited nodes and prevent cycles.
398
+
399
+ Returns:
400
+ Iterable of ExecutionRecord objects for parent executions.
401
+
402
+ Raises:
403
+ DerivaMLException: If not bound to a catalog.
404
+
405
+ Example:
406
+ >>> for parent in record.list_parent_executions():
407
+ ... print(f"Parent: {parent.execution_rid}")
408
+ """
409
+ if self._ml_instance is None:
410
+ raise DerivaMLException("ExecutionRecord is not bound to a catalog")
411
+
412
+ # Track visited nodes to prevent infinite loops
413
+ if _visited is None:
414
+ _visited = set()
415
+ if self.execution_rid in _visited:
416
+ return
417
+ _visited.add(self.execution_rid)
418
+
419
+ pb = self._ml_instance.pathBuilder()
420
+ ml_schema = self._ml_instance.ml_schema
421
+ exec_exec_path = pb.schemas[ml_schema].Execution_Execution
422
+ execution_path = pb.schemas[ml_schema].Execution
423
+
424
+ # Query for parent executions (Execution column = parent, Nested_Execution = child)
425
+ records = list(
426
+ exec_exec_path
427
+ .filter(exec_exec_path.Nested_Execution == self.execution_rid)
428
+ .link(execution_path, on=(exec_exec_path.Execution == execution_path.RID))
429
+ .entities()
430
+ .fetch()
431
+ )
432
+
433
+ for record in records:
434
+ # Look up the workflow if present
435
+ workflow_rid = record.get("Workflow")
436
+ workflow = self._ml_instance.lookup_workflow(workflow_rid) if workflow_rid else None
437
+
438
+ parent = ExecutionRecord(
439
+ execution_rid=record["RID"],
440
+ workflow=workflow,
441
+ status=Status(record.get("Status", "Created")),
442
+ description=record.get("Description"),
443
+ _ml_instance=self._ml_instance,
444
+ _logger=self._logger,
445
+ )
446
+ yield parent
447
+ if recurse:
448
+ yield from parent.list_parent_executions(recurse=True, _visited=_visited)
449
+
450
+ def add_nested_execution(self, child: "ExecutionRecord | RID", sequence: int | None = None) -> None:
451
+ """Add a child execution nested under this execution.
452
+
453
+ Args:
454
+ child: The child ExecutionRecord or its RID.
455
+ sequence: Optional sequence number for ordering children.
456
+
457
+ Raises:
458
+ DerivaMLException: If the catalog is read-only or not connected.
459
+
460
+ Example:
461
+ >>> parent_record.add_nested_execution(child_record)
462
+ >>> # Or by RID
463
+ >>> parent_record.add_nested_execution("3-XYZ9", sequence=1)
464
+ """
465
+ self._check_writable_catalog("add nested execution")
466
+
467
+ child_rid = child.execution_rid if isinstance(child, ExecutionRecord) else child
468
+
469
+ pb = self._ml_instance.pathBuilder()
470
+ exec_exec_path = pb.schemas[self._ml_instance.ml_schema].Execution_Execution
471
+
472
+ record = {
473
+ "Execution": self.execution_rid,
474
+ "Nested_Execution": child_rid,
475
+ }
476
+ if sequence is not None:
477
+ record["Sequence"] = sequence
478
+
479
+ exec_exec_path.insert([record])
480
+
481
+ def list_input_datasets(self) -> list["Dataset"]:
482
+ """List datasets that were input to this execution.
483
+
484
+ Returns:
485
+ List of Dataset objects that were used as inputs to this execution.
486
+
487
+ Raises:
488
+ DerivaMLException: If not bound to a catalog.
489
+
490
+ Example:
491
+ >>> for ds in record.list_input_datasets():
492
+ ... print(f"Dataset: {ds.dataset_rid} version {ds.current_version}")
493
+ """
494
+ if self._ml_instance is None:
495
+ raise DerivaMLException("ExecutionRecord is not bound to a catalog")
496
+
497
+ pb = self._ml_instance.pathBuilder()
498
+ dataset_exec_path = pb.schemas[self._ml_instance.ml_schema].Dataset_Execution
499
+
500
+ records = list(
501
+ dataset_exec_path
502
+ .filter(dataset_exec_path.Execution == self.execution_rid)
503
+ .entities()
504
+ .fetch()
505
+ )
506
+
507
+ # Look up each dataset and return Dataset objects
508
+ datasets = []
509
+ for record in records:
510
+ dataset_rid = record.get("Dataset")
511
+ if dataset_rid:
512
+ datasets.append(self._ml_instance.lookup_dataset(dataset_rid))
513
+ return datasets
514
+
515
+ def list_assets(self, asset_role: str | None = None) -> list["Asset"]:
516
+ """List assets associated with this execution.
517
+
518
+ Args:
519
+ asset_role: Optional filter for asset role ('Input' or 'Output').
520
+ If None, returns all assets associated with this execution.
521
+
522
+ Returns:
523
+ List of Asset objects associated with this execution.
524
+
525
+ Raises:
526
+ DerivaMLException: If not bound to a catalog.
527
+
528
+ Example:
529
+ >>> # Get all input assets
530
+ >>> for asset in record.list_assets(asset_role="Input"):
531
+ ... print(f"Input Asset: {asset.asset_rid} - {asset.filename}")
532
+ >>> # Get all output assets
533
+ >>> for asset in record.list_assets(asset_role="Output"):
534
+ ... print(f"Output Asset: {asset.asset_rid}")
535
+ """
536
+ from deriva_ml.asset.asset import Asset
537
+
538
+ if self._ml_instance is None:
539
+ raise DerivaMLException("ExecutionRecord is not bound to a catalog")
540
+
541
+ # Find all *_Execution association tables and query them
542
+ # Search both the domain schemas and the ML schema
543
+ assets: list[Asset] = []
544
+ schemas_to_search = [*self._ml_instance.domain_schemas, self._ml_instance.ml_schema]
545
+
546
+ for schema_name in schemas_to_search:
547
+ for table in self._ml_instance.model.model.schemas[schema_name].tables.values():
548
+ if table.name.endswith("_Execution") and table.name != "Dataset_Execution":
549
+ # Extract asset table name from association table name
550
+ # e.g., "Image_Execution" -> "Image", "Execution_Asset_Execution" -> "Execution_Asset"
551
+ asset_table_name = table.name.replace("_Execution", "")
552
+
553
+ pb = self._ml_instance.pathBuilder()
554
+ table_path = pb.schemas[schema_name].tables[table.name]
555
+ try:
556
+ query = table_path.filter(table_path.Execution == self.execution_rid)
557
+ if asset_role:
558
+ query = query.filter(table_path.Asset_Role == asset_role)
559
+ records = list(query.entities().fetch())
560
+
561
+ # Look up each asset and convert to Asset object
562
+ for record in records:
563
+ asset_rid = record.get(asset_table_name)
564
+ if asset_rid:
565
+ try:
566
+ assets.append(self._ml_instance.lookup_asset(asset_rid))
567
+ except Exception:
568
+ pass # Asset might not exist or be inaccessible
569
+ except Exception:
570
+ # Table might not have expected columns
571
+ pass
572
+ return assets
573
+
574
+ def __str__(self) -> str:
575
+ """Return string representation of the execution record."""
576
+ lines = [
577
+ f"ExecutionRecord(rid={self.execution_rid})",
578
+ f" workflow_rid: {self.workflow_rid}",
579
+ f" status: {self.status.value}",
580
+ f" description: {self.description}",
581
+ ]
582
+ if self.start_time:
583
+ lines.append(f" start_time: {self.start_time}")
584
+ if self.stop_time:
585
+ lines.append(f" stop_time: {self.stop_time}")
586
+ if self.duration:
587
+ lines.append(f" duration: {self.duration}")
588
+ return "\n".join(lines)
589
+
590
+ def __repr__(self) -> str:
591
+ """Return repr of the execution record."""
592
+ return f"ExecutionRecord(execution_rid={self.execution_rid!r}, status={self.status!r})"