deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +186 -105
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +545 -244
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +224 -35
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.9.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/__init__.py CHANGED
@@ -1,5 +1,10 @@
1
1
  # We will be loading get_version from setuptools_scm and it will emit a UserWarning about it being deprecated.
2
2
 
3
+ # IMPORTANT: Import deriva package first to prevent shadowing by local 'deriva.py' files.
4
+ # This ensures 'deriva' is cached in sys.modules before any other imports that might
5
+ # add directories containing a 'deriva.py' file to sys.path.
6
+ import deriva.core # noqa: F401
7
+
3
8
  from importlib.metadata import PackageNotFoundError, version
4
9
  from typing import TYPE_CHECKING
5
10
 
@@ -20,6 +25,8 @@ from deriva_ml.core.definitions import (
20
25
  MLAsset,
21
26
  MLVocab,
22
27
  TableDefinition,
28
+ UploadCallback,
29
+ UploadProgress,
23
30
  UploadState,
24
31
  )
25
32
  from deriva_ml.core.exceptions import (
@@ -34,7 +41,7 @@ if TYPE_CHECKING:
34
41
 
35
42
 
36
43
  # Lazy import function for runtime usage
37
- def __getattr__(name):
44
+ def __getattr__(name: str) -> type:
38
45
  """Lazy import to avoid circular dependencies."""
39
46
  if name == "DerivaML":
40
47
  from deriva_ml.core.base import DerivaML
@@ -44,12 +51,45 @@ def __getattr__(name):
44
51
  from deriva_ml.execution.execution import Execution
45
52
 
46
53
  return Execution
54
+ elif name == "Asset":
55
+ from deriva_ml.asset.asset import Asset
56
+
57
+ return Asset
58
+ elif name == "AssetFilePath":
59
+ from deriva_ml.asset.aux_classes import AssetFilePath
60
+
61
+ return AssetFilePath
62
+ elif name == "AssetSpec":
63
+ from deriva_ml.asset.aux_classes import AssetSpec
64
+
65
+ return AssetSpec
66
+ elif name == "FeatureValueRecord":
67
+ from deriva_ml.dataset.dataset_bag import FeatureValueRecord
68
+
69
+ return FeatureValueRecord
70
+ elif name == "SchemaValidationReport":
71
+ from deriva_ml.schema.validation import SchemaValidationReport
72
+
73
+ return SchemaValidationReport
74
+ elif name == "validate_ml_schema":
75
+ from deriva_ml.schema.validation import validate_ml_schema
76
+
77
+ return validate_ml_schema
47
78
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
48
79
 
49
80
 
50
81
  __all__ = [
51
82
  "DerivaML", # Lazy-loaded
52
83
  "DerivaMLConfig",
84
+ # Asset classes (lazy-loaded)
85
+ "Asset",
86
+ "AssetFilePath",
87
+ "AssetSpec",
88
+ # Feature value record for restructure_assets
89
+ "FeatureValueRecord",
90
+ # Schema validation (lazy-loaded)
91
+ "SchemaValidationReport",
92
+ "validate_ml_schema",
53
93
  # Exceptions
54
94
  "DerivaMLException",
55
95
  "DerivaMLInvalidTerm",
@@ -69,6 +109,8 @@ __all__ = [
69
109
  "MLAsset",
70
110
  "MLVocab",
71
111
  "TableDefinition",
112
+ "UploadCallback",
113
+ "UploadProgress",
72
114
  "UploadState",
73
115
  ]
74
116
 
@@ -0,0 +1,17 @@
1
+ """Asset management module for DerivaML.
2
+
3
+ This module provides classes for managing assets (files) in a Deriva catalog:
4
+
5
+ - Asset: Live catalog access to asset records
6
+ - AssetFilePath: Extended Path for staging files during execution
7
+ - AssetSpec: Specification for asset references in configurations
8
+ """
9
+
10
+ from .asset import Asset
11
+ from .aux_classes import AssetFilePath, AssetSpec
12
+
13
+ __all__ = [
14
+ "Asset",
15
+ "AssetFilePath",
16
+ "AssetSpec",
17
+ ]
@@ -0,0 +1,357 @@
1
+ """Asset management for DerivaML.
2
+
3
+ This module provides the Asset class for managing assets in a Deriva catalog.
4
+ An asset represents a file-based record (image, model, data file, etc.) with
5
+ associated metadata, types, and provenance tracking.
6
+
7
+ The Asset class parallels the Dataset class, providing:
8
+ - Catalog-backed entity access via RID
9
+ - Type management (add/remove asset types)
10
+ - Provenance tracking (which executions created/used the asset)
11
+ - Feature discovery (features defined on the asset or its referenced tables)
12
+ - Download capability for offline access
13
+
14
+ Typical usage:
15
+ >>> # Look up an existing asset
16
+ >>> asset = ml.lookup_asset("3JSE")
17
+ >>> print(f"Asset: {asset.filename} ({asset.asset_table})")
18
+ >>> print(f"Types: {asset.asset_types}")
19
+
20
+ >>> # Find the execution that created this asset
21
+ >>> executions = asset.list_executions(asset_role="Output")
22
+ >>> creator = executions[0] if executions else None
23
+
24
+ >>> # Download for offline use
25
+ >>> local_path = asset.download(Path("/tmp/assets"))
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import logging
31
+ from pathlib import Path
32
+ from typing import TYPE_CHECKING, Any
33
+
34
+ from pydantic import ConfigDict, SkipValidation, validate_call
35
+
36
+ from deriva_ml.core.definitions import RID
37
+
38
+ if TYPE_CHECKING:
39
+ from deriva_ml.execution.execution import Execution
40
+ from deriva_ml.execution.execution_record import ExecutionRecord
41
+ from deriva_ml.feature import Feature, FeatureRecord
42
+ from deriva_ml.interfaces import DerivaMLCatalog
43
+
44
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
45
+ import importlib
46
+
47
+ _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
48
+ Table = _ermrest_model.Table
49
+
50
+
51
+ class Asset:
52
+ """Manages asset operations in a Deriva catalog.
53
+
54
+ The Asset class provides functionality for accessing and managing assets
55
+ in a Deriva catalog. It handles metadata, type associations, and provenance.
56
+
57
+ An Asset is a file-based record in an asset table (Image, Model, etc.)
58
+ with associated metadata, controlled vocabulary types, and execution tracking.
59
+
60
+ The class provides a consistent interface parallel to Dataset, allowing
61
+ code to work uniformly with both data collections and individual assets.
62
+
63
+ Attributes:
64
+ asset_rid (RID): The unique Resource Identifier for this asset.
65
+ asset_table (str): Name of the asset table containing this asset.
66
+ filename (str): Original filename of the asset.
67
+ url (str): URL to access the asset file.
68
+ length (int): Size of the asset file in bytes.
69
+ md5 (str): MD5 checksum of the asset file.
70
+ asset_types (list[str]): List of vocabulary terms describing the asset type.
71
+ description (str): Human-readable description of the asset.
72
+ _ml_instance (DerivaMLCatalog): Reference to the catalog containing this asset.
73
+
74
+ Example:
75
+ >>> # Look up an existing asset
76
+ >>> asset = ml.lookup_asset("3JSE")
77
+ >>> print(f"File: {asset.filename}, Size: {asset.length} bytes")
78
+ >>> print(f"Types: {asset.asset_types}")
79
+
80
+ >>> # Find executions that used this asset
81
+ >>> for exe in asset.list_executions():
82
+ ... print(f"Execution {exe.execution_rid}: {exe.configuration.description}")
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ catalog: "DerivaMLCatalog",
88
+ asset_rid: RID,
89
+ asset_table: str,
90
+ filename: str = "",
91
+ url: str = "",
92
+ length: int = 0,
93
+ md5: str = "",
94
+ description: str = "",
95
+ asset_types: list[str] | None = None,
96
+ execution_rid: RID | None = None,
97
+ ):
98
+ """Initialize an Asset object from an existing asset in the catalog.
99
+
100
+ This constructor wraps an existing asset record. To create a new asset
101
+ in the catalog, use Execution.asset_file_path() and upload_execution_outputs().
102
+
103
+ Args:
104
+ catalog: The DerivaMLCatalog instance containing this asset.
105
+ asset_rid: The RID of the existing asset record.
106
+ asset_table: Name of the asset table (e.g., "Image", "Model").
107
+ filename: Original filename of the asset.
108
+ url: URL to access the asset file.
109
+ length: Size of the asset file in bytes.
110
+ md5: MD5 checksum of the asset file.
111
+ description: Human-readable description.
112
+ asset_types: List of asset type vocabulary terms.
113
+ execution_rid: RID of the execution that created this asset (if known).
114
+
115
+ Example:
116
+ >>> # Usually created via ml.lookup_asset()
117
+ >>> asset = ml.lookup_asset("3JSE")
118
+ """
119
+ self._logger = logging.getLogger("deriva_ml")
120
+ self._ml_instance = catalog
121
+ self.asset_rid = asset_rid
122
+ self.asset_table = asset_table
123
+ self.filename = filename
124
+ self.url = url
125
+ self.length = length
126
+ self.md5 = md5
127
+ self.description = description
128
+ self._asset_types = asset_types or []
129
+ self._execution_rid = execution_rid
130
+
131
+ def __repr__(self) -> str:
132
+ """Return a string representation of the Asset for debugging."""
133
+ return (
134
+ f"<deriva_ml.Asset at {hex(id(self))}: rid='{self.asset_rid}', "
135
+ f"table='{self.asset_table}', file='{self.filename}', types={self._asset_types}>"
136
+ )
137
+
138
+ @property
139
+ def asset_types(self) -> list[str]:
140
+ """Get the asset types for this asset.
141
+
142
+ Returns:
143
+ List of asset type vocabulary terms.
144
+ """
145
+ if not self._asset_types:
146
+ self._load_asset_types()
147
+ return self._asset_types
148
+
149
+ def _load_asset_types(self) -> None:
150
+ """Load asset types from the catalog."""
151
+ # Find the asset type association table
152
+ asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
153
+ try:
154
+ type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
155
+ asset_table_obj, "Asset_Type"
156
+ )
157
+ except Exception:
158
+ # No type association for this asset table
159
+ self._asset_types = []
160
+ return
161
+
162
+ pb = self._ml_instance.pathBuilder()
163
+ type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
164
+
165
+ types = list(
166
+ type_path.filter(type_path.columns[asset_fk] == self.asset_rid)
167
+ .attributes(type_path.Asset_Type)
168
+ .fetch()
169
+ )
170
+ self._asset_types = [t["Asset_Type"] for t in types]
171
+
172
+ @property
173
+ def execution_rid(self) -> RID | None:
174
+ """Get the RID of the execution that created this asset.
175
+
176
+ Returns:
177
+ RID of the creating execution, or None if not tracked.
178
+ """
179
+ if self._execution_rid is None:
180
+ # Try to find the execution that created this asset (Output role)
181
+ executions = self.list_executions(asset_role="Output")
182
+ if executions:
183
+ self._execution_rid = executions[0].execution_rid
184
+ return self._execution_rid
185
+
186
+ def list_executions(self, asset_role: str | None = None) -> list["ExecutionRecord"]:
187
+ """List all executions associated with this asset.
188
+
189
+ Returns all executions that created or used this asset, along with
190
+ the role (Input/Output) in each execution.
191
+
192
+ Args:
193
+ asset_role: Optional filter for asset role ('Input' or 'Output').
194
+ If None, returns all associations.
195
+
196
+ Returns:
197
+ List of ExecutionRecord objects for the executions associated
198
+ with this asset.
199
+
200
+ Example:
201
+ >>> # Find the execution that created this asset
202
+ >>> creators = asset.list_executions(asset_role="Output")
203
+ >>> if creators:
204
+ ... print(f"Created by execution {creators[0].execution_rid}")
205
+
206
+ >>> # Find all executions that used this asset as input
207
+ >>> users = asset.list_executions(asset_role="Input")
208
+ """
209
+ return self._ml_instance.list_asset_executions(self.asset_rid, asset_role=asset_role)
210
+
211
+ def find_features(self) -> list["Feature"]:
212
+ """Find all features defined on this asset's table.
213
+
214
+ Returns:
215
+ List of Feature objects defined on this asset's table.
216
+
217
+ Example:
218
+ >>> features = asset.find_features()
219
+ >>> for f in features:
220
+ ... print(f"Feature: {f.feature_name}")
221
+ """
222
+ return self._ml_instance.find_features(self.asset_table)
223
+
224
+ def list_feature_values(self, feature_name: str) -> list["FeatureRecord"]:
225
+ """Get feature values for this specific asset.
226
+
227
+ Args:
228
+ feature_name: Name of the feature to query.
229
+
230
+ Returns:
231
+ List of FeatureRecord instances for this asset. Each record has:
232
+ - Execution: RID of the execution that created this feature value
233
+ - Feature_Name: Name of the feature
234
+ - All feature-specific columns as typed attributes
235
+ - model_dump() method to convert back to a dictionary
236
+
237
+ Example:
238
+ >>> values = asset.list_feature_values("quality_score")
239
+ >>> for v in values:
240
+ ... print(f"Score: {v.Score}, Execution: {v.Execution}")
241
+ >>> # Or convert to dict:
242
+ >>> dicts = [v.model_dump() for v in values]
243
+ """
244
+ return list(self._ml_instance.list_feature_values(self.asset_table, feature_name))
245
+
246
+ def add_asset_type(self, type_name: str) -> None:
247
+ """Add an asset type to this asset.
248
+
249
+ Args:
250
+ type_name: Name of the asset type vocabulary term to add.
251
+
252
+ Example:
253
+ >>> asset.add_asset_type("Training_Data")
254
+ """
255
+ asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
256
+ type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
257
+ asset_table_obj, "Asset_Type"
258
+ )
259
+
260
+ pb = self._ml_instance.pathBuilder()
261
+ type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
262
+
263
+ # Insert the association
264
+ type_path.insert([{asset_fk: self.asset_rid, "Asset_Type": type_name}])
265
+
266
+ # Update local cache
267
+ if type_name not in self._asset_types:
268
+ self._asset_types.append(type_name)
269
+
270
+ def remove_asset_type(self, type_name: str) -> None:
271
+ """Remove an asset type from this asset.
272
+
273
+ Args:
274
+ type_name: Name of the asset type vocabulary term to remove.
275
+
276
+ Example:
277
+ >>> asset.remove_asset_type("Temporary")
278
+ """
279
+ asset_table_obj = self._ml_instance.model.name_to_table(self.asset_table)
280
+ type_assoc_table, asset_fk, _ = self._ml_instance.model.find_association(
281
+ asset_table_obj, "Asset_Type"
282
+ )
283
+
284
+ pb = self._ml_instance.pathBuilder()
285
+ type_path = pb.schemas[type_assoc_table.schema.name].tables[type_assoc_table.name]
286
+
287
+ # Delete the association
288
+ type_path.filter(
289
+ (type_path.columns[asset_fk] == self.asset_rid) &
290
+ (type_path.Asset_Type == type_name)
291
+ ).delete()
292
+
293
+ # Update local cache
294
+ if type_name in self._asset_types:
295
+ self._asset_types.remove(type_name)
296
+
297
+ def download(self, dest_dir: Path, update_catalog: bool = False) -> Path:
298
+ """Download the asset file to a local directory.
299
+
300
+ Args:
301
+ dest_dir: Directory to download the asset to.
302
+ update_catalog: If True and called within an execution context,
303
+ track this asset as an input to the current execution.
304
+
305
+ Returns:
306
+ Path to the downloaded file.
307
+
308
+ Example:
309
+ >>> local_path = asset.download(Path("/tmp/assets"))
310
+ >>> print(f"Downloaded to: {local_path}")
311
+ """
312
+ from deriva_ml.execution.execution import Execution
313
+
314
+ # Use hatrac to download the file
315
+ dest_dir = Path(dest_dir)
316
+ dest_dir.mkdir(parents=True, exist_ok=True)
317
+
318
+ dest_path = dest_dir / self.filename
319
+ self._ml_instance.hatrac.get_obj(self.url, destfilename=str(dest_path))
320
+
321
+ return dest_path
322
+
323
+ def get_metadata(self) -> dict[str, Any]:
324
+ """Get all metadata for this asset from the catalog.
325
+
326
+ Returns:
327
+ Dictionary of all columns/values for this asset record.
328
+
329
+ Example:
330
+ >>> metadata = asset.get_metadata()
331
+ >>> print(f"Created: {metadata.get('RCT')}")
332
+ """
333
+ pb = self._ml_instance.pathBuilder()
334
+ asset_path = pb.schemas[self._ml_instance.model.name_to_table(self.asset_table).schema.name].tables[self.asset_table]
335
+
336
+ records = list(asset_path.filter(asset_path.RID == self.asset_rid).entities().fetch())
337
+ return records[0] if records else {}
338
+
339
+ def get_chaise_url(self) -> str:
340
+ """Get the Chaise URL for viewing this asset in the web interface.
341
+
342
+ Returns:
343
+ URL to view this asset in Chaise.
344
+
345
+ Example:
346
+ >>> url = asset.get_chaise_url()
347
+ >>> print(f"View at: {url}")
348
+ """
349
+ table_obj = self._ml_instance.model.name_to_table(self.asset_table)
350
+ schema_name = table_obj.schema.name
351
+ catalog_id = self._ml_instance.catalog_id
352
+ hostname = self._ml_instance.host_name
353
+
354
+ return (
355
+ f"https://{hostname}/chaise/record/#{catalog_id}/"
356
+ f"{schema_name}:{self.asset_table}/RID={self.asset_rid}"
357
+ )
@@ -0,0 +1,100 @@
1
+ """Auxiliary classes for asset management in DerivaML.
2
+
3
+ This module defines helper classes for asset operations including:
4
+ - AssetFilePath: Extended Path for in-flight asset staging
5
+ - AssetSpec: Specification for asset references in configurations
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from pydantic import BaseModel, ConfigDict, model_validator
12
+
13
+ from deriva_ml.core.definitions import RID
14
+
15
+
16
+ class AssetFilePath(Path):
17
+ """Extended Path class for managing asset files during execution.
18
+
19
+ Represents a file path with additional metadata about its role as an asset
20
+ in the catalog. This class extends the standard Path class to include
21
+ information about the asset's catalog representation and type.
22
+
23
+ This is primarily used during execution for staging files before upload
24
+ or after download. For catalog-backed asset operations, use the Asset class.
25
+
26
+ Attributes:
27
+ asset_table: Name of the asset table in the catalog (e.g., "Image", "Model").
28
+ file_name: Name of the local file containing the asset.
29
+ asset_metadata: Additional columns beyond URL, Length, and checksum.
30
+ asset_types: Terms from the Asset_Type controlled vocabulary.
31
+ asset_rid: Resource Identifier if uploaded to an asset table.
32
+
33
+ Example:
34
+ >>> path = AssetFilePath(
35
+ ... "/path/to/file.txt",
36
+ ... asset_table="Execution_Asset",
37
+ ... file_name="results.txt",
38
+ ... asset_metadata={"version": "1.0"},
39
+ ... asset_types=["Model_File"]
40
+ ... )
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ asset_path: str | Path,
46
+ asset_table: str,
47
+ file_name: str,
48
+ asset_metadata: dict[str, Any],
49
+ asset_types: list[str] | str,
50
+ asset_rid: RID | None = None,
51
+ ):
52
+ """Initialize an AssetFilePath instance.
53
+
54
+ Args:
55
+ asset_path: Local path to the asset file.
56
+ asset_table: Name of the asset table in the catalog.
57
+ file_name: Name of the local file.
58
+ asset_metadata: Additional metadata columns.
59
+ asset_types: One or more asset type terms.
60
+ asset_rid: Optional Resource Identifier if already in catalog.
61
+ """
62
+ super().__init__(asset_path)
63
+ self.asset_table = asset_table
64
+ self.file_name = file_name
65
+ self.asset_metadata = asset_metadata
66
+ self.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
67
+ self.asset_rid = asset_rid
68
+
69
+ # Backward compatibility alias
70
+ @property
71
+ def asset_name(self) -> str:
72
+ """Alias for asset_table (backward compatibility)."""
73
+ return self.asset_table
74
+
75
+
76
+ class AssetSpec(BaseModel):
77
+ """Specification for an asset in execution configurations.
78
+
79
+ Used to reference assets as inputs to executions, similar to how
80
+ DatasetSpec is used for datasets.
81
+
82
+ Attributes:
83
+ rid: Resource Identifier of the asset.
84
+ asset_role: Role of the asset ("Input" or "Output"). Defaults to "Input".
85
+
86
+ Example:
87
+ >>> spec = AssetSpec(rid="3JSE")
88
+ >>> spec = AssetSpec(rid="3JSE", asset_role="Input")
89
+ """
90
+
91
+ rid: RID
92
+ asset_role: str = "Input"
93
+
94
+ model_config = ConfigDict(arbitrary_types_allowed=True)
95
+
96
+ @model_validator(mode="before")
97
+ @classmethod
98
+ def _check_bare_rid(cls, data: Any) -> dict[str, str]:
99
+ """Allow bare RID string as shorthand."""
100
+ return {"rid": data} if isinstance(data, str) else data