deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +126 -110
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +543 -242
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +223 -34
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.10.dist-info/RECORD +0 -45
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/__init__.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# We will be loading get_version from setuptools_scm and it will emit a UserWarning about it being deprecated.
|
|
2
2
|
|
|
3
|
+
# IMPORTANT: Import deriva package first to prevent shadowing by local 'deriva.py' files.
|
|
4
|
+
# This ensures 'deriva' is cached in sys.modules before any other imports that might
|
|
5
|
+
# add directories containing a 'deriva.py' file to sys.path.
|
|
6
|
+
import deriva.core # noqa: F401
|
|
7
|
+
|
|
3
8
|
from importlib.metadata import PackageNotFoundError, version
|
|
4
9
|
from typing import TYPE_CHECKING
|
|
5
10
|
|
|
@@ -20,6 +25,8 @@ from deriva_ml.core.definitions import (
|
|
|
20
25
|
MLAsset,
|
|
21
26
|
MLVocab,
|
|
22
27
|
TableDefinition,
|
|
28
|
+
UploadCallback,
|
|
29
|
+
UploadProgress,
|
|
23
30
|
UploadState,
|
|
24
31
|
)
|
|
25
32
|
from deriva_ml.core.exceptions import (
|
|
@@ -34,7 +41,7 @@ if TYPE_CHECKING:
|
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
# Lazy import function for runtime usage
|
|
37
|
-
def __getattr__(name):
|
|
44
|
+
def __getattr__(name: str) -> type:
|
|
38
45
|
"""Lazy import to avoid circular dependencies."""
|
|
39
46
|
if name == "DerivaML":
|
|
40
47
|
from deriva_ml.core.base import DerivaML
|
|
@@ -44,12 +51,45 @@ def __getattr__(name):
|
|
|
44
51
|
from deriva_ml.execution.execution import Execution
|
|
45
52
|
|
|
46
53
|
return Execution
|
|
54
|
+
elif name == "Asset":
|
|
55
|
+
from deriva_ml.asset.asset import Asset
|
|
56
|
+
|
|
57
|
+
return Asset
|
|
58
|
+
elif name == "AssetFilePath":
|
|
59
|
+
from deriva_ml.asset.aux_classes import AssetFilePath
|
|
60
|
+
|
|
61
|
+
return AssetFilePath
|
|
62
|
+
elif name == "AssetSpec":
|
|
63
|
+
from deriva_ml.asset.aux_classes import AssetSpec
|
|
64
|
+
|
|
65
|
+
return AssetSpec
|
|
66
|
+
elif name == "FeatureValueRecord":
|
|
67
|
+
from deriva_ml.dataset.dataset_bag import FeatureValueRecord
|
|
68
|
+
|
|
69
|
+
return FeatureValueRecord
|
|
70
|
+
elif name == "SchemaValidationReport":
|
|
71
|
+
from deriva_ml.schema.validation import SchemaValidationReport
|
|
72
|
+
|
|
73
|
+
return SchemaValidationReport
|
|
74
|
+
elif name == "validate_ml_schema":
|
|
75
|
+
from deriva_ml.schema.validation import validate_ml_schema
|
|
76
|
+
|
|
77
|
+
return validate_ml_schema
|
|
47
78
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
48
79
|
|
|
49
80
|
|
|
50
81
|
__all__ = [
|
|
51
82
|
"DerivaML", # Lazy-loaded
|
|
52
83
|
"DerivaMLConfig",
|
|
84
|
+
# Asset classes (lazy-loaded)
|
|
85
|
+
"Asset",
|
|
86
|
+
"AssetFilePath",
|
|
87
|
+
"AssetSpec",
|
|
88
|
+
# Feature value record for restructure_assets
|
|
89
|
+
"FeatureValueRecord",
|
|
90
|
+
# Schema validation (lazy-loaded)
|
|
91
|
+
"SchemaValidationReport",
|
|
92
|
+
"validate_ml_schema",
|
|
53
93
|
# Exceptions
|
|
54
94
|
"DerivaMLException",
|
|
55
95
|
"DerivaMLInvalidTerm",
|
|
@@ -69,6 +109,8 @@ __all__ = [
|
|
|
69
109
|
"MLAsset",
|
|
70
110
|
"MLVocab",
|
|
71
111
|
"TableDefinition",
|
|
112
|
+
"UploadCallback",
|
|
113
|
+
"UploadProgress",
|
|
72
114
|
"UploadState",
|
|
73
115
|
]
|
|
74
116
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Asset management module for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides classes for managing assets (files) in a Deriva catalog:
|
|
4
|
+
|
|
5
|
+
- Asset: Live catalog access to asset records
|
|
6
|
+
- AssetFilePath: Extended Path for staging files during execution
|
|
7
|
+
- AssetSpec: Specification for asset references in configurations
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .asset import Asset
|
|
11
|
+
from .aux_classes import AssetFilePath, AssetSpec
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Asset",
|
|
15
|
+
"AssetFilePath",
|
|
16
|
+
"AssetSpec",
|
|
17
|
+
]
|
deriva_ml/asset/asset.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""Asset management for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides the Asset class for managing assets in a Deriva catalog.
|
|
4
|
+
An asset represents a file-based record (image, model, data file, etc.) with
|
|
5
|
+
associated metadata, types, and provenance tracking.
|
|
6
|
+
|
|
7
|
+
The Asset class parallels the Dataset class, providing:
|
|
8
|
+
- Catalog-backed entity access via RID
|
|
9
|
+
- Type management (add/remove asset types)
|
|
10
|
+
- Provenance tracking (which executions created/used the asset)
|
|
11
|
+
- Feature discovery (features defined on the asset or its referenced tables)
|
|
12
|
+
- Download capability for offline access
|
|
13
|
+
|
|
14
|
+
Typical usage:
|
|
15
|
+
>>> # Look up an existing asset
|
|
16
|
+
>>> asset = ml.lookup_asset("3JSE")
|
|
17
|
+
>>> print(f"Asset: {asset.filename} ({asset.asset_table})")
|
|
18
|
+
>>> print(f"Types: {asset.asset_types}")
|
|
19
|
+
|
|
20
|
+
>>> # Find the execution that created this asset
|
|
21
|
+
>>> executions = asset.list_executions(asset_role="Output")
|
|
22
|
+
>>> creator = executions[0] if executions else None
|
|
23
|
+
|
|
24
|
+
>>> # Download for offline use
|
|
25
|
+
>>> local_path = asset.download(Path("/tmp/assets"))
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import TYPE_CHECKING, Any
|
|
33
|
+
|
|
34
|
+
from pydantic import ConfigDict, SkipValidation, validate_call
|
|
35
|
+
|
|
36
|
+
from deriva_ml.core.definitions import RID
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from deriva_ml.execution.execution import Execution
|
|
40
|
+
from deriva_ml.execution.execution_record import ExecutionRecord
|
|
41
|
+
from deriva_ml.feature import Feature, FeatureRecord
|
|
42
|
+
from deriva_ml.interfaces import DerivaMLCatalog
|
|
43
|
+
|
|
44
|
+
# Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
|
|
45
|
+
import importlib
|
|
46
|
+
|
|
47
|
+
_ermrest_model = importlib.import_module("deriva.core.ermrest_model")
|
|
48
|
+
Table = _ermrest_model.Table
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Asset:
|
|
52
|
+
"""Manages asset operations in a Deriva catalog.
|
|
53
|
+
|
|
54
|
+
The Asset class provides functionality for accessing and managing assets
|
|
55
|
+
in a Deriva catalog. It handles metadata, type associations, and provenance.
|
|
56
|
+
|
|
57
|
+
An Asset is a file-based record in an asset table (Image, Model, etc.)
|
|
58
|
+
with associated metadata, controlled vocabulary types, and execution tracking.
|
|
59
|
+
|
|
60
|
+
The class provides a consistent interface parallel to Dataset, allowing
|
|
61
|
+
code to work uniformly with both data collections and individual assets.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
asset_rid (RID): The unique Resource Identifier for this asset.
|
|
65
|
+
asset_table (str): Name of the asset table containing this asset.
|
|
66
|
+
filename (str): Original filename of the asset.
|
|
67
|
+
url (str): URL to access the asset file.
|
|
68
|
+
length (int): Size of the asset file in bytes.
|
|
69
|
+
md5 (str): MD5 checksum of the asset file.
|
|
70
|
+
asset_types (list[str]): List of vocabulary terms describing the asset type.
|
|
71
|
+
description (str): Human-readable description of the asset.
|
|
72
|
+
_ml_instance (DerivaMLCatalog): Reference to the catalog containing this asset.
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
>>> # Look up an existing asset
|
|
76
|
+
>>> asset = ml.lookup_asset("3JSE")
|
|
77
|
+
>>> print(f"File: {asset.filename}, Size: {asset.length} bytes")
|
|
78
|
+
>>> print(f"Types: {asset.asset_types}")
|
|
79
|
+
|
|
80
|
+
>>> # Find executions that used this asset
|
|
81
|
+
>>> for exe in asset.list_executions():
|
|
82
|
+
... print(f"Execution {exe.execution_rid}: {exe.configuration.description}")
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
catalog: "DerivaMLCatalog",
|
|
88
|
+
asset_rid: RID,
|
|
89
|
+
asset_table: str,
|
|
90
|
+
filename: str = "",
|
|
91
|
+
url: str = "",
|
|
92
|
+
length: int = 0,
|
|
93
|
+
md5: str = "",
|
|
94
|
+
description: str = "",
|
|
95
|
+
asset_types: list[str] | None = None,
|
|
96
|
+
execution_rid: RID | None = None,
|
|
97
|
+
):
|
|
98
|
+
"""Initialize an Asset object from an existing asset in the catalog.
|
|
99
|
+
|
|
100
|
+
This constructor wraps an existing asset record. To create a new asset
|
|
101
|
+
in the catalog, use Execution.asset_file_path() and upload_execution_outputs().
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
catalog: The DerivaMLCatalog instance containing this asset.
|
|
105
|
+
asset_rid: The RID of the existing asset record.
|
|
106
|
+
asset_table: Name of the asset table (e.g., "Image", "Model").
|
|
107
|
+
filename: Original filename of the asset.
|
|
108
|
+
url: URL to access the asset file.
|
|
109
|
+
length: Size of the asset file in bytes.
|
|
110
|
+
md5: MD5 checksum of the asset file.
|
|
111
|
+
description: Human-readable description.
|
|
112
|
+
asset_types: List of asset type vocabulary terms.
|
|
113
|
+
execution_rid: RID of the execution that created this asset (if known).
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
>>> # Usually created via ml.lookup_asset()
|
|
117
|
+
>>> asset = ml.lookup_asset("3JSE")
|
|
118
|
+
"""
|
|
119
|
+
self._logger = logging.getLogger("deriva_ml")
|
|
120
|
+
self._ml_instance = catalog
|
|
121
|
+
self.asset_rid = asset_rid
|
|
122
|
+
self.asset_table = asset_table
|
|
123
|
+
self.filename = filename
|
|
124
|
+
self.url = url
|
|
125
|
+
self.length = length
|
|
126
|
+
self.md5 = md5
|
|
127
|
+
self.description = description
|
|
128
|
+
self._asset_types = asset_types or []
|
|
129
|
+
self._execution_rid = execution_rid
|
|
130
|
+
|
|
131
|
+
def __repr__(self) -> str:
|
|
132
|
+
"""Return a string representation of the Asset for debugging."""
|
|
133
|
+
return (
|
|
134
|
+
f"<deriva_ml.Asset at {hex(id(self))}: rid='{self.asset_rid}', "
|
|
135
|
+
f"table='{self.asset_table}', file='{self.filename}', types={self._asset_types}>"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def asset_types(self) -> list[str]:
|
|
140
|
+
"""Get the asset types for this asset.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of asset type vocabulary terms.
|
|
144
|
+
"""
|
|
145
|
+
if not self._asset_types:
|
|
146
|
+
self._load_asset_types()
|
|
147
|
+
return self._asset_types
|
|
148
|
+
|
|
149
|
+
def _load_asset_types(self) -> None:
|
|
150
|
+
"""Load asset types from the catalog."""
|
|
151
|
+
# Find the asset type association table
|
|
152
|
+
asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
|
|
153
|
+
try:
|
|
154
|
+
type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
|
|
155
|
+
asset_table_obj, "Asset_Type"
|
|
156
|
+
)
|
|
157
|
+
except Exception:
|
|
158
|
+
# No type association for this asset table
|
|
159
|
+
self._asset_types = []
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
pb = self._ml_instance.pathBuilder()
|
|
163
|
+
type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
|
|
164
|
+
|
|
165
|
+
types = list(
|
|
166
|
+
type_path.filter(type_path.columns[asset_fk] == self.asset_rid)
|
|
167
|
+
.attributes(type_path.Asset_Type)
|
|
168
|
+
.fetch()
|
|
169
|
+
)
|
|
170
|
+
self._asset_types = [t["Asset_Type"] for t in types]
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def execution_rid(self) -> RID | None:
|
|
174
|
+
"""Get the RID of the execution that created this asset.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
RID of the creating execution, or None if not tracked.
|
|
178
|
+
"""
|
|
179
|
+
if self._execution_rid is None:
|
|
180
|
+
# Try to find the execution that created this asset (Output role)
|
|
181
|
+
executions = self.list_executions(asset_role="Output")
|
|
182
|
+
if executions:
|
|
183
|
+
self._execution_rid = executions[0].execution_rid
|
|
184
|
+
return self._execution_rid
|
|
185
|
+
|
|
186
|
+
def list_executions(self, asset_role: str | None = None) -> list["ExecutionRecord"]:
|
|
187
|
+
"""List all executions associated with this asset.
|
|
188
|
+
|
|
189
|
+
Returns all executions that created or used this asset, along with
|
|
190
|
+
the role (Input/Output) in each execution.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
asset_role: Optional filter for asset role ('Input' or 'Output').
|
|
194
|
+
If None, returns all associations.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of ExecutionRecord objects for the executions associated
|
|
198
|
+
with this asset.
|
|
199
|
+
|
|
200
|
+
Example:
|
|
201
|
+
>>> # Find the execution that created this asset
|
|
202
|
+
>>> creators = asset.list_executions(asset_role="Output")
|
|
203
|
+
>>> if creators:
|
|
204
|
+
... print(f"Created by execution {creators[0].execution_rid}")
|
|
205
|
+
|
|
206
|
+
>>> # Find all executions that used this asset as input
|
|
207
|
+
>>> users = asset.list_executions(asset_role="Input")
|
|
208
|
+
"""
|
|
209
|
+
return self._ml_instance.list_asset_executions(self.asset_rid, asset_role=asset_role)
|
|
210
|
+
|
|
211
|
+
def find_features(self) -> list["Feature"]:
|
|
212
|
+
"""Find all features defined on this asset's table.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of Feature objects defined on this asset's table.
|
|
216
|
+
|
|
217
|
+
Example:
|
|
218
|
+
>>> features = asset.find_features()
|
|
219
|
+
>>> for f in features:
|
|
220
|
+
... print(f"Feature: {f.feature_name}")
|
|
221
|
+
"""
|
|
222
|
+
return self._ml_instance.find_features(self.asset_table)
|
|
223
|
+
|
|
224
|
+
def list_feature_values(self, feature_name: str) -> list["FeatureRecord"]:
|
|
225
|
+
"""Get feature values for this specific asset.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
feature_name: Name of the feature to query.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
List of FeatureRecord instances for this asset. Each record has:
|
|
232
|
+
- Execution: RID of the execution that created this feature value
|
|
233
|
+
- Feature_Name: Name of the feature
|
|
234
|
+
- All feature-specific columns as typed attributes
|
|
235
|
+
- model_dump() method to convert back to a dictionary
|
|
236
|
+
|
|
237
|
+
Example:
|
|
238
|
+
>>> values = asset.list_feature_values("quality_score")
|
|
239
|
+
>>> for v in values:
|
|
240
|
+
... print(f"Score: {v.Score}, Execution: {v.Execution}")
|
|
241
|
+
>>> # Or convert to dict:
|
|
242
|
+
>>> dicts = [v.model_dump() for v in values]
|
|
243
|
+
"""
|
|
244
|
+
return list(self._ml_instance.list_feature_values(self.asset_table, feature_name))
|
|
245
|
+
|
|
246
|
+
def add_asset_type(self, type_name: str) -> None:
|
|
247
|
+
"""Add an asset type to this asset.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
type_name: Name of the asset type vocabulary term to add.
|
|
251
|
+
|
|
252
|
+
Example:
|
|
253
|
+
>>> asset.add_asset_type("Training_Data")
|
|
254
|
+
"""
|
|
255
|
+
asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
|
|
256
|
+
type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
|
|
257
|
+
asset_table_obj, "Asset_Type"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
pb = self._ml_instance.pathBuilder()
|
|
261
|
+
type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
|
|
262
|
+
|
|
263
|
+
# Insert the association
|
|
264
|
+
type_path.insert([{asset_fk: self.asset_rid, "Asset_Type": type_name}])
|
|
265
|
+
|
|
266
|
+
# Update local cache
|
|
267
|
+
if type_name not in self._asset_types:
|
|
268
|
+
self._asset_types.append(type_name)
|
|
269
|
+
|
|
270
|
+
def remove_asset_type(self, type_name: str) -> None:
|
|
271
|
+
"""Remove an asset type from this asset.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
type_name: Name of the asset type vocabulary term to remove.
|
|
275
|
+
|
|
276
|
+
Example:
|
|
277
|
+
>>> asset.remove_asset_type("Temporary")
|
|
278
|
+
"""
|
|
279
|
+
asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
|
|
280
|
+
type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
|
|
281
|
+
asset_table_obj, "Asset_Type"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
pb = self._ml_instance.pathBuilder()
|
|
285
|
+
type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
|
|
286
|
+
|
|
287
|
+
# Delete the association
|
|
288
|
+
type_path.filter(
|
|
289
|
+
(type_path.columns[asset_fk] == self.asset_rid) &
|
|
290
|
+
(type_path.Asset_Type == type_name)
|
|
291
|
+
).delete()
|
|
292
|
+
|
|
293
|
+
# Update local cache
|
|
294
|
+
if type_name in self._asset_types:
|
|
295
|
+
self._asset_types.remove(type_name)
|
|
296
|
+
|
|
297
|
+
def download(self, dest_dir: Path, update_catalog: bool = False) -> Path:
|
|
298
|
+
"""Download the asset file to a local directory.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
dest_dir: Directory to download the asset to.
|
|
302
|
+
update_catalog: If True and called within an execution context,
|
|
303
|
+
track this asset as an input to the current execution.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Path to the downloaded file.
|
|
307
|
+
|
|
308
|
+
Example:
|
|
309
|
+
>>> local_path = asset.download(Path("/tmp/assets"))
|
|
310
|
+
>>> print(f"Downloaded to: {local_path}")
|
|
311
|
+
"""
|
|
312
|
+
from deriva_ml.execution.execution import Execution
|
|
313
|
+
|
|
314
|
+
# Use hatrac to download the file
|
|
315
|
+
dest_dir = Path(dest_dir)
|
|
316
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
317
|
+
|
|
318
|
+
dest_path = dest_dir / self.filename
|
|
319
|
+
self._ml_instance.hatrac.get_obj(self.url, destfilename=str(dest_path))
|
|
320
|
+
|
|
321
|
+
return dest_path
|
|
322
|
+
|
|
323
|
+
def get_metadata(self) -> dict[str, Any]:
|
|
324
|
+
"""Get all metadata for this asset from the catalog.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Dictionary of all columns/values for this asset record.
|
|
328
|
+
|
|
329
|
+
Example:
|
|
330
|
+
>>> metadata = asset.get_metadata()
|
|
331
|
+
>>> print(f"Created: {metadata.get('RCT')}")
|
|
332
|
+
"""
|
|
333
|
+
pb = self._ml_instance.pathBuilder()
|
|
334
|
+
asset_path = pb.schemas[self._ml_instance.model.name_to_table(self.asset_table).schema.name].tables[self.asset_table]
|
|
335
|
+
|
|
336
|
+
records = list(asset_path.filter(asset_path.RID == self.asset_rid).entities().fetch())
|
|
337
|
+
return records[0] if records else {}
|
|
338
|
+
|
|
339
|
+
def get_chaise_url(self) -> str:
|
|
340
|
+
"""Get the Chaise URL for viewing this asset in the web interface.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
URL to view this asset in Chaise.
|
|
344
|
+
|
|
345
|
+
Example:
|
|
346
|
+
>>> url = asset.get_chaise_url()
|
|
347
|
+
>>> print(f"View at: {url}")
|
|
348
|
+
"""
|
|
349
|
+
table_obj = self._ml_instance.model.name_to_table(self.asset_table)
|
|
350
|
+
schema_name = table_obj.schema.name
|
|
351
|
+
catalog_id = self._ml_instance.catalog_id
|
|
352
|
+
hostname = self._ml_instance.host_name
|
|
353
|
+
|
|
354
|
+
return (
|
|
355
|
+
f"https://{hostname}/chaise/record/#{catalog_id}/"
|
|
356
|
+
f"{schema_name}:{self.asset_table}/RID={self.asset_rid}"
|
|
357
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Auxiliary classes for asset management in DerivaML.
|
|
2
|
+
|
|
3
|
+
This module defines helper classes for asset operations including:
|
|
4
|
+
- AssetFilePath: Extended Path for in-flight asset staging
|
|
5
|
+
- AssetSpec: Specification for asset references in configurations
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
12
|
+
|
|
13
|
+
from deriva_ml.core.definitions import RID
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AssetFilePath(Path):
|
|
17
|
+
"""Extended Path class for managing asset files during execution.
|
|
18
|
+
|
|
19
|
+
Represents a file path with additional metadata about its role as an asset
|
|
20
|
+
in the catalog. This class extends the standard Path class to include
|
|
21
|
+
information about the asset's catalog representation and type.
|
|
22
|
+
|
|
23
|
+
This is primarily used during execution for staging files before upload
|
|
24
|
+
or after download. For catalog-backed asset operations, use the Asset class.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
asset_table: Name of the asset table in the catalog (e.g., "Image", "Model").
|
|
28
|
+
file_name: Name of the local file containing the asset.
|
|
29
|
+
asset_metadata: Additional columns beyond URL, Length, and checksum.
|
|
30
|
+
asset_types: Terms from the Asset_Type controlled vocabulary.
|
|
31
|
+
asset_rid: Resource Identifier if uploaded to an asset table.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> path = AssetFilePath(
|
|
35
|
+
... "/path/to/file.txt",
|
|
36
|
+
... asset_table="Execution_Asset",
|
|
37
|
+
... file_name="results.txt",
|
|
38
|
+
... asset_metadata={"version": "1.0"},
|
|
39
|
+
... asset_types=["Model_File"]
|
|
40
|
+
... )
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
asset_path: str | Path,
|
|
46
|
+
asset_table: str,
|
|
47
|
+
file_name: str,
|
|
48
|
+
asset_metadata: dict[str, Any],
|
|
49
|
+
asset_types: list[str] | str,
|
|
50
|
+
asset_rid: RID | None = None,
|
|
51
|
+
):
|
|
52
|
+
"""Initialize an AssetFilePath instance.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
asset_path: Local path to the asset file.
|
|
56
|
+
asset_table: Name of the asset table in the catalog.
|
|
57
|
+
file_name: Name of the local file.
|
|
58
|
+
asset_metadata: Additional metadata columns.
|
|
59
|
+
asset_types: One or more asset type terms.
|
|
60
|
+
asset_rid: Optional Resource Identifier if already in catalog.
|
|
61
|
+
"""
|
|
62
|
+
super().__init__(asset_path)
|
|
63
|
+
self.asset_table = asset_table
|
|
64
|
+
self.file_name = file_name
|
|
65
|
+
self.asset_metadata = asset_metadata
|
|
66
|
+
self.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
|
|
67
|
+
self.asset_rid = asset_rid
|
|
68
|
+
|
|
69
|
+
# Backward compatibility alias
|
|
70
|
+
@property
|
|
71
|
+
def asset_name(self) -> str:
|
|
72
|
+
"""Alias for asset_table (backward compatibility)."""
|
|
73
|
+
return self.asset_table
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class AssetSpec(BaseModel):
|
|
77
|
+
"""Specification for an asset in execution configurations.
|
|
78
|
+
|
|
79
|
+
Used to reference assets as inputs to executions, similar to how
|
|
80
|
+
DatasetSpec is used for datasets.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
rid: Resource Identifier of the asset.
|
|
84
|
+
asset_role: Role of the asset ("Input" or "Output"). Defaults to "Input".
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
>>> spec = AssetSpec(rid="3JSE")
|
|
88
|
+
>>> spec = AssetSpec(rid="3JSE", asset_role="Input")
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
rid: RID
|
|
92
|
+
asset_role: str = "Input"
|
|
93
|
+
|
|
94
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
95
|
+
|
|
96
|
+
@model_validator(mode="before")
|
|
97
|
+
@classmethod
|
|
98
|
+
def _check_bare_rid(cls, data: Any) -> dict[str, str]:
|
|
99
|
+
"""Allow bare RID string as shorthand."""
|
|
100
|
+
return {"rid": data} if isinstance(data, str) else data
|