deriva-ml 1.14.0__py3-none-any.whl → 1.14.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. deriva_ml/__init__.py +25 -30
  2. deriva_ml/core/__init__.py +39 -0
  3. deriva_ml/core/base.py +1489 -0
  4. deriva_ml/core/constants.py +36 -0
  5. deriva_ml/core/definitions.py +74 -0
  6. deriva_ml/core/enums.py +222 -0
  7. deriva_ml/core/ermrest.py +288 -0
  8. deriva_ml/core/exceptions.py +28 -0
  9. deriva_ml/core/filespec.py +116 -0
  10. deriva_ml/dataset/__init__.py +4 -0
  11. deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
  12. deriva_ml/{dataset.py → dataset/dataset.py} +406 -428
  13. deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
  14. deriva_ml/{history.py → dataset/history.py} +51 -33
  15. deriva_ml/{upload.py → dataset/upload.py} +48 -70
  16. deriva_ml/demo_catalog.py +233 -183
  17. deriva_ml/execution/environment.py +290 -0
  18. deriva_ml/{execution.py → execution/execution.py} +365 -252
  19. deriva_ml/execution/execution_configuration.py +163 -0
  20. deriva_ml/{execution_configuration.py → execution/workflow.py} +212 -224
  21. deriva_ml/feature.py +83 -46
  22. deriva_ml/model/__init__.py +0 -0
  23. deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
  24. deriva_ml/{database_model.py → model/database.py} +52 -74
  25. deriva_ml/model/sql_mapper.py +44 -0
  26. deriva_ml/run_notebook.py +19 -11
  27. deriva_ml/schema/__init__.py +3 -0
  28. deriva_ml/{schema_setup → schema}/annotations.py +31 -22
  29. deriva_ml/schema/check_schema.py +104 -0
  30. deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
  31. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  32. deriva_ml/schema/table_comments_utils.py +57 -0
  33. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/METADATA +5 -4
  34. deriva_ml-1.14.27.dist-info/RECORD +40 -0
  35. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/entry_points.txt +1 -0
  36. deriva_ml/deriva_definitions.py +0 -391
  37. deriva_ml/deriva_ml_base.py +0 -1046
  38. deriva_ml/execution_environment.py +0 -139
  39. deriva_ml/schema_setup/table_comments_utils.py +0 -56
  40. deriva_ml/test-files/execution-parameters.json +0 -1
  41. deriva_ml/test-files/notebook-parameters.json +0 -5
  42. deriva_ml/test_functions.py +0 -141
  43. deriva_ml/test_notebook.ipynb +0 -197
  44. deriva_ml-1.14.0.dist-info/RECORD +0 -31
  45. /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
  46. /deriva_ml/{schema_setup → schema}/policy.json +0 -0
  47. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/WHEEL +0 -0
  48. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/licenses/LICENSE +0 -0
  49. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/top_level.txt +0 -0
deriva_ml/feature.py CHANGED
@@ -1,27 +1,48 @@
1
- """
2
- This module provides the implementation of the Feature capability in deriva-ml
1
+ """Feature implementation for deriva-ml.
2
+
3
+ This module provides classes for defining and managing features in deriva-ml. Features represent measurable
4
+ properties or characteristics that can be associated with records in a table. The module includes:
5
+
6
+ - Feature: Main class for defining and managing features
7
+ - FeatureRecord: Base class for feature records using pydantic models
8
+
9
+ Typical usage example:
10
+ >>> feature = Feature(association_result, model)
11
+ >>> FeatureClass = feature.feature_record_class()
12
+ >>> record = FeatureClass(value="high", confidence=0.95)
3
13
  """
4
14
 
5
- from deriva.core.ermrest_model import FindAssociationResult, Column
6
15
  from pathlib import Path
7
- from pydantic import BaseModel, create_model
8
- from typing import Optional, Type, ClassVar, TYPE_CHECKING
9
16
  from types import UnionType
17
+ from typing import TYPE_CHECKING, ClassVar, Optional, Type
18
+
19
+ from deriva.core.ermrest_model import Column, FindAssociationResult
20
+ from pydantic import BaseModel, create_model
10
21
 
11
22
  if TYPE_CHECKING:
12
- from .deriva_model import DerivaModel
23
+ from model.catalog import DerivaModel
13
24
 
14
25
 
15
26
  class FeatureRecord(BaseModel):
16
- """Base class for feature records. Feature records are pydantic models which are dynamically generated and
17
- describe all the columns of a feature.
27
+ """Base class for dynamically generated feature record models.
18
28
 
19
- Attributes:
20
- Execution (str):
21
- Feature_Name (str):
22
- feature:
23
- Returns:
29
+ This class serves as the base for pydantic models that represent feature records. Each feature record
30
+ contains the values and metadata associated with a feature instance.
24
31
 
32
+ Attributes:
33
+ Execution (Optional[str]): RID of the execution that created this feature record.
34
+ Feature_Name (str): Name of the feature this record belongs to.
35
+ feature (ClassVar[Optional[Feature]]): Reference to the Feature object that created this record.
36
+
37
+ Example:
38
+ >>> class GeneFeature(FeatureRecord):
39
+ ... value: str
40
+ ... confidence: float
41
+ >>> record = GeneFeature(
42
+ ... Feature_Name="expression",
43
+ ... value="high",
44
+ ... confidence=0.95
45
+ ... )
25
46
  """
26
47
 
27
48
  # model_dump of this feature should be compatible with feature table columns.
@@ -34,53 +55,61 @@ class FeatureRecord(BaseModel):
34
55
 
35
56
  @classmethod
36
57
  def feature_columns(cls) -> set[Column]:
37
- """
58
+ """Returns all columns specific to this feature.
38
59
 
39
60
  Returns:
40
- A set of feature column names.
41
-
61
+ set[Column]: Set of feature-specific columns, excluding system and relationship columns.
42
62
  """
43
63
  return cls.feature.feature_columns
44
64
 
45
65
  @classmethod
46
66
  def asset_columns(cls) -> set[Column]:
47
- """
48
-
49
- Args:
67
+ """Returns columns that reference asset tables.
50
68
 
51
69
  Returns:
52
- A set of asset column names.
53
-
70
+ set[Column]: Set of columns that contain references to asset tables.
54
71
  """
55
72
  return cls.feature.asset_columns
56
73
 
57
74
  @classmethod
58
75
  def term_columns(cls) -> set[Column]:
59
- """
60
-
61
- Args:
76
+ """Returns columns that reference vocabulary terms.
62
77
 
63
78
  Returns:
64
- :return: set of term column names.
65
-
79
+ set[Column]: Set of columns that contain references to controlled vocabulary terms.
66
80
  """
67
81
  return cls.feature.term_columns
68
82
 
69
83
  @classmethod
70
84
  def value_columns(cls) -> set[Column]:
71
- """
72
-
73
- Args:
85
+ """Returns columns that contain direct values.
74
86
 
75
87
  Returns:
76
- A set of value column names.
77
-
88
+ set[Column]: Set of columns containing direct values (not references to assets or terms).
78
89
  """
79
90
  return cls.feature.value_columns
80
91
 
81
92
 
82
93
  class Feature:
83
- """Wrapper for results of Table.find_associations()"""
94
+ """Manages feature definitions and their relationships in the catalog.
95
+
96
+ A Feature represents a measurable property or characteristic that can be associated with records in a table.
97
+ Features can include asset references, controlled vocabulary terms, and custom metadata fields.
98
+
99
+ Attributes:
100
+ feature_table: Table containing the feature implementation.
101
+ target_table: Table that the feature is associated with.
102
+ feature_name: Name of the feature (from Feature_Name column default).
103
+ feature_columns: Set of columns specific to this feature.
104
+ asset_columns: Set of columns referencing asset tables.
105
+ term_columns: Set of columns referencing vocabulary tables.
106
+ value_columns: Set of columns containing direct values.
107
+
108
+ Example:
109
+ >>> feature = Feature(association_result, model)
110
+ >>> print(f"Feature {feature.feature_name} on {feature.target_table.name}")
111
+ >>> print("Asset columns:", [c.name for c in feature.asset_columns])
112
+ """
84
113
 
85
114
  def __init__(self, atable: FindAssociationResult, model: "DerivaModel") -> None:
86
115
  self.feature_table = atable.table
@@ -98,9 +127,7 @@ class Feature:
98
127
  self.target_table.name,
99
128
  "Execution",
100
129
  }
101
- self.feature_columns = {
102
- c for c in self.feature_table.columns if c.name not in skip_columns
103
- }
130
+ self.feature_columns = {c for c in self.feature_table.columns if c.name not in skip_columns}
104
131
 
105
132
  assoc_fkeys = {atable.self_fkey} | atable.other_fkeys
106
133
 
@@ -117,9 +144,7 @@ class Feature:
117
144
  if fk not in assoc_fkeys and self._model.is_vocabulary(fk.pk_table)
118
145
  }
119
146
 
120
- self.value_columns = self.feature_columns - (
121
- self.asset_columns | self.term_columns
122
- )
147
+ self.value_columns = self.feature_columns - (self.asset_columns | self.term_columns)
123
148
 
124
149
  def feature_record_class(self) -> type[FeatureRecord]:
125
150
  """Create a pydantic model for entries into the specified feature table
@@ -129,14 +154,25 @@ class Feature:
129
154
  """
130
155
 
131
156
  def map_type(c: Column) -> UnionType | Type[str] | Type[int] | Type[float]:
132
- """Map a deriva type into a pydantic model type.
157
+ """Maps a Deriva column type to a Python/pydantic type.
158
+
159
+ Converts ERMrest column types to appropriate Python types for use in pydantic models.
160
+ Special handling is provided for asset columns which can accept either strings or Path objects.
133
161
 
134
162
  Args:
135
- c: column to be mapped
136
- c: Column:
163
+ c: ERMrest column to map to a Python type.
137
164
 
138
165
  Returns:
139
- A pydantic model type
166
+ UnionType | Type[str] | Type[int] | Type[float]: Appropriate Python type for the column:
167
+ - str | Path for asset columns
168
+ - str for text columns
169
+ - int for integer columns
170
+ - float for floating point columns
171
+ - str for all other types
172
+
173
+ Example:
174
+ >>> col = Column(name="score", type="float4")
175
+ >>> typ = map_type(col) # Returns float
140
176
  """
141
177
  if c.name in {c.name for c in self.asset_columns}:
142
178
  return str | Path
@@ -168,7 +204,10 @@ class Feature:
168
204
  ), # Set default value for Feature_Name
169
205
  self.target_table.name: (str, ...),
170
206
  }
171
- docstring = f"Class to capture fields in a feature {self.feature_name} on table {self.target_table}. Feature columns include:\n"
207
+ docstring = (
208
+ f"Class to capture fields in a feature {self.feature_name} on table {self.target_table}. "
209
+ "Feature columns include:\n"
210
+ )
172
211
  docstring += "\n".join([f" {c.name}" for c in self.feature_columns])
173
212
 
174
213
  model = create_model(
@@ -177,9 +216,7 @@ class Feature:
177
216
  __doc__=docstring,
178
217
  **feature_columns,
179
218
  )
180
- model.feature = (
181
- self # Set value of class variable within the feature class definition.
182
- )
219
+ model.feature = self # Set value of class variable within the feature class definition.
183
220
 
184
221
  return model
185
222
 
File without changes
@@ -1,27 +1,55 @@
1
1
  """
2
- `deriva_ml_base.py` is the core module for the Deriva ML project. This module implements the DerivaML class, which is
3
- the primary interface to the Deriva based catalogs. The module also implements the Feature and Vocabulary functions
4
- in the DerivaML.
5
-
6
- DerivaML and its associated classes all depend on a catalog that implements a `deriva-ml` schema with tables and
7
- relationships that follow a specific data model.
2
+ Model management for Deriva ML catalogs.
8
3
 
4
+ This module provides the DerivaModel class which augments the standard Deriva model class with
5
+ ML-specific functionality. It handles schema management, feature definitions, and asset tracking.
9
6
  """
10
7
 
11
- from deriva.core.ermrest_model import Table, Column, Model, FindAssociationResult
8
+ from __future__ import annotations
9
+
10
+ # Standard library imports
11
+ from collections import Counter
12
+ from typing import Any, Callable, Final, Iterable, NewType, TypeAlias
13
+
12
14
  from deriva.core.ermrest_catalog import ErmrestCatalog
13
- from .feature import Feature
14
15
 
15
- from .deriva_definitions import (
16
- DerivaMLException,
16
+ # Deriva imports
17
+ from deriva.core.ermrest_model import Column, FindAssociationResult, Model, Schema, Table
18
+
19
+ # Third-party imports
20
+ from pydantic import ConfigDict, validate_call
21
+
22
+ from deriva_ml.core.definitions import (
17
23
  ML_SCHEMA,
18
- DerivaSystemColumns,
24
+ DerivaAssetColumns,
19
25
  TableDefinition,
20
26
  )
27
+ from deriva_ml.core.exceptions import DerivaMLException, DerivaMLTableTypeError
21
28
 
22
- from collections import Counter
23
- from pydantic import validate_call, ConfigDict
24
- from typing import Iterable, Optional, Any
29
+ # Local imports
30
+ from deriva_ml.feature import Feature
31
+
32
+ try:
33
+ from icecream import ic
34
+ except ImportError: # Graceful fallback if IceCream isn't installed.
35
+ ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
36
+
37
+
38
+ # Define common types:
39
+ TableInput: TypeAlias = str | Table
40
+ SchemaDict: TypeAlias = dict[str, Schema]
41
+ FeatureList: TypeAlias = Iterable[Feature]
42
+ SchemaName = NewType("SchemaName", str)
43
+ ColumnSet: TypeAlias = set[Column]
44
+ AssociationResult: TypeAlias = FindAssociationResult
45
+ TableSet: TypeAlias = set[Table]
46
+ PathList: TypeAlias = list[list[Table]]
47
+
48
+ # Define constants:
49
+ VOCAB_COLUMNS: Final[set[str]] = {"NAME", "URI", "SYNONYMS", "DESCRIPTION", "ID"}
50
+ ASSET_COLUMNS: Final[set[str]] = {"Filename", "URL", "Length", "MD5", "Description"}
51
+
52
+ FilterPredicate = Callable[[Table], bool]
25
53
 
26
54
 
27
55
  class DerivaModel:
@@ -30,9 +58,8 @@ class DerivaModel:
30
58
  This class provides a number of DerivaML specific methods that augment the interface in the deriva model class.
31
59
 
32
60
  Attributes:
33
- domain_schema: Schema name for domain specific tables and relationships.
61
+ domain_schema: Schema name for domain-specific tables and relationships.
34
62
  model: ERMRest model for the catalog.
35
- schemas: ERMRest model for the catalog.
36
63
  catalog: ERMRest catalog for the model
37
64
  hostname: ERMRest catalog for the model
38
65
  ml_schema: The ML schema for the catalog.
@@ -41,7 +68,10 @@ class DerivaModel:
41
68
  """
42
69
 
43
70
  def __init__(
44
- self, model: Model, ml_schema: str = ML_SCHEMA, domain_schema: str = ""
71
+ self,
72
+ model: Model,
73
+ ml_schema: str = ML_SCHEMA,
74
+ domain_schema: str | None = None,
45
75
  ):
46
76
  """Create and initialize a DerivaML instance.
47
77
 
@@ -49,64 +79,66 @@ class DerivaModel:
49
79
  This class is intended to be used as a base class on which domain-specific interfaces are built.
50
80
 
51
81
  Args:
82
+ model: The ERMRest model for the catalog.
83
+ ml_schema: The ML schema name.
84
+ domain_schema: The domain schema name.
52
85
  """
53
86
  self.model = model
54
87
  self.configuration = None
55
88
  self.catalog: ErmrestCatalog = self.model.catalog
56
- self.hostname = (
57
- self.catalog.deriva_server.server
58
- if isinstance(self.catalog, ErmrestCatalog)
59
- else "localhost"
60
- )
61
- self.schemas = self.model.schemas
89
+ self.hostname = self.catalog.deriva_server.server if isinstance(self.catalog, ErmrestCatalog) else "localhost"
62
90
 
63
91
  self.ml_schema = ml_schema
64
- builtin_schemas = ["public", self.ml_schema, "www", "WWW"]
65
- try:
66
- self.domain_schema = (
67
- domain_schema
68
- or [
69
- s for s in self.model.schemas.keys() if s not in builtin_schemas
70
- ].pop()
71
- )
72
- except IndexError:
73
- # No domain schema defined.
92
+ builtin_schemas = ("public", self.ml_schema, "www", "WWW")
93
+ if domain_schema:
74
94
  self.domain_schema = domain_schema
95
+ else:
96
+ if len(user_schemas := {k for k in self.model.schemas.keys()} - set(builtin_schemas)) == 1:
97
+ self.domain_schema = user_schemas.pop()
98
+ else:
99
+ raise DerivaMLException(f"Ambiguous domain schema: {user_schemas}")
100
+
101
+ def refresh_model(self) -> None:
102
+ self.model = self.catalog.getCatalogModel()
103
+
104
+ @property
105
+ def schemas(self) -> dict[str, Schema]:
106
+ return self.model.schemas
75
107
 
76
108
  @property
77
109
  def chaise_config(self) -> dict[str, Any]:
78
110
  """Return the chaise configuration."""
79
111
  return self.model.chaise_config
80
112
 
81
- def __getattr__(self, name):
113
+ def __getattr__(self, name: str) -> Any:
82
114
  # Called only if `name` is not found in Manager. Delegate attributes to model class.
83
115
  return getattr(self.model, name)
84
116
 
85
- def name_to_table(self, table: str | Table) -> Table:
117
+ def name_to_table(self, table: TableInput) -> Table:
86
118
  """Return the table object corresponding to the given table name.
87
119
 
88
120
  If the table name appears in more than one schema, return the first one you find.
89
121
 
90
122
  Args:
91
123
  table: A ERMRest table object or a string that is the name of the table.
92
- table: str | Table:
93
124
 
94
125
  Returns:
95
126
  Table object.
96
127
  """
97
128
  if isinstance(table, Table):
98
129
  return table
99
- for s in self.model.schemas.values():
130
+ if table in (s := self.model.schemas[self.domain_schema].tables):
131
+ return s[table]
132
+ for s in [self.model.schemas[sname] for sname in [self.domain_schema, self.ml_schema, "WWW"]]:
100
133
  if table in s.tables.keys():
101
134
  return s.tables[table]
102
135
  raise DerivaMLException(f"The table {table} doesn't exist.")
103
136
 
104
- def is_vocabulary(self, table_name: str | Table) -> bool:
137
+ def is_vocabulary(self, table_name: TableInput) -> bool:
105
138
  """Check if a given table is a controlled vocabulary table.
106
139
 
107
140
  Args:
108
141
  table_name: A ERMRest table object or the name of the table.
109
- table_name: str | Table:
110
142
 
111
143
  Returns:
112
144
  Table object if the table is a controlled vocabulary, False otherwise.
@@ -126,7 +158,7 @@ class DerivaModel:
126
158
  pure: bool = True,
127
159
  min_arity: int = 2,
128
160
  max_arity: int = 2,
129
- ) -> bool | set | int:
161
+ ) -> bool | set[str] | int:
130
162
  """Check the specified table to see if it is an association table.
131
163
 
132
164
  Args:
@@ -140,12 +172,10 @@ class DerivaModel:
140
172
 
141
173
  """
142
174
  table = self.name_to_table(table_name)
143
- return table.is_association(
144
- unqualified=unqualified, pure=pure, min_arity=min_arity, max_arity=max_arity
145
- )
175
+ return table.is_association(unqualified=unqualified, pure=pure, min_arity=min_arity, max_arity=max_arity)
146
176
 
147
- def find_association(self, table1: Table | str, table2: Table | str) -> Table:
148
- """Given two tables, return an association table that connects the two.
177
+ def find_association(self, table1: Table | str, table2: Table | str) -> tuple[Table, Column, Column]:
178
+ """Given two tables, return an association table that connects the two and the two columns used to link them..
149
179
 
150
180
  Raises:
151
181
  DerivaML exception if there is either not an association table or more than one association table.
@@ -154,22 +184,21 @@ class DerivaModel:
154
184
  table2 = self.name_to_table(table2)
155
185
 
156
186
  tables = [
157
- a.table
187
+ (a.table, a.self_fkey.columns[0].name, other_key.columns[0].name)
158
188
  for a in table1.find_associations(pure=False)
159
- if a.other_fkeys.pop().pk_table == table2
189
+ if len(a.other_fkeys) == 1 and (other_key := a.other_fkeys.pop()).pk_table == table2
160
190
  ]
191
+
161
192
  if len(tables) == 1:
162
193
  return tables[0]
163
194
  elif len(tables) == 0:
164
- raise DerivaMLException(
165
- f"No association tables found between {table1.name} and {table2.name}."
166
- )
195
+ raise DerivaMLException(f"No association tables found between {table1.name} and {table2.name}.")
167
196
  else:
168
197
  raise DerivaMLException(
169
198
  f"There are {len(tables)} association tables between {table1.name} and {table2.name}."
170
199
  )
171
200
 
172
- def is_asset(self, table_name: str | Table) -> bool:
201
+ def is_asset(self, table_name: TableInput) -> bool:
173
202
  """True if the specified table is an asset table.
174
203
 
175
204
  Args:
@@ -185,24 +214,14 @@ class DerivaModel:
185
214
 
186
215
  def find_assets(self, with_metadata: bool = False) -> list[Table]:
187
216
  """Return the list of asset tables in the current model"""
188
- return [
189
- t
190
- for s in self.model.schemas.values()
191
- for t in s.tables.values()
192
- if self.is_asset(t)
193
- ]
217
+ return [t for s in self.model.schemas.values() for t in s.tables.values() if self.is_asset(t)]
194
218
 
195
219
  def find_vocabularies(self) -> list[Table]:
196
220
  """Return a list of all the controlled vocabulary tables in the domain schema."""
197
- return [
198
- t
199
- for s in self.model.schemas.values()
200
- for t in s.tables.values()
201
- if self.is_vocabulary(t)
202
- ]
221
+ return [t for s in self.model.schemas.values() for t in s.tables.values() if self.is_vocabulary(t)]
203
222
 
204
223
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
205
- def find_features(self, table: Table | str) -> Iterable[Feature]:
224
+ def find_features(self, table: TableInput) -> Iterable[Feature]:
206
225
  """List the names of the features in the specified table.
207
226
 
208
227
  Args:
@@ -215,15 +234,13 @@ class DerivaModel:
215
234
  table = self.name_to_table(table)
216
235
 
217
236
  def is_feature(a: FindAssociationResult) -> bool:
218
- """
237
+ """Check if association represents a feature.
219
238
 
220
239
  Args:
221
- a: FindAssociationResult:
222
-
240
+ a: Association result to check
223
241
  Returns:
224
-
242
+ bool: True if association represents a feature
225
243
  """
226
- # return {'Feature_Name', 'Execution'}.issubset({c.name for c in a.table.columns})
227
244
  return {
228
245
  "Feature_Name",
229
246
  "Execution",
@@ -231,12 +248,10 @@ class DerivaModel:
231
248
  }.issubset({c.name for c in a.table.columns})
232
249
 
233
250
  return [
234
- Feature(a, self)
235
- for a in table.find_associations(min_arity=3, max_arity=3, pure=False)
236
- if is_feature(a)
251
+ Feature(a, self) for a in table.find_associations(min_arity=3, max_arity=3, pure=False) if is_feature(a)
237
252
  ]
238
253
 
239
- def lookup_feature(self, table: str | Table, feature_name: str) -> Feature:
254
+ def lookup_feature(self, table: TableInput, feature_name: str) -> Feature:
240
255
  """Lookup the named feature associated with the provided table.
241
256
 
242
257
  Args:
@@ -252,31 +267,20 @@ class DerivaModel:
252
267
  """
253
268
  table = self.name_to_table(table)
254
269
  try:
255
- return [
256
- f for f in self.find_features(table) if f.feature_name == feature_name
257
- ][0]
270
+ return [f for f in self.find_features(table) if f.feature_name == feature_name][0]
258
271
  except IndexError:
259
- raise DerivaMLException(
260
- f"Feature {table.name}:{feature_name} doesn't exist."
261
- )
272
+ raise DerivaMLException(f"Feature {table.name}:{feature_name} doesn't exist.")
262
273
 
263
274
  def asset_metadata(self, table: str | Table) -> set[str]:
264
275
  """Return the metadata columns for an asset table."""
265
276
 
266
277
  table = self.name_to_table(table)
267
- asset_columns = {
268
- "Filename",
269
- "URL",
270
- "Length",
271
- "MD5",
272
- "Description",
273
- }.union(set(DerivaSystemColumns))
274
278
 
275
279
  if not self.is_asset(table):
276
- raise DerivaMLException(f"{table.name} is not an asset table.")
277
- return {c.name for c in table.columns} - asset_columns
280
+ raise DerivaMLTableTypeError("asset table", table.name)
281
+ return {c.name for c in table.columns} - DerivaAssetColumns
278
282
 
279
- def apply(self):
283
+ def apply(self) -> None:
280
284
  """Call ERMRestModel.apply"""
281
285
  if self.catalog == "file-system":
282
286
  raise DerivaMLException("Cannot apply() to non-catalog model.")
@@ -284,45 +288,38 @@ class DerivaModel:
284
288
  self.model.apply()
285
289
 
286
290
  def _table_relationship(
287
- self, table1: Table | str, table2: Table | str
291
+ self,
292
+ table1: TableInput,
293
+ table2: TableInput,
288
294
  ) -> tuple[Column, Column]:
289
295
  """Return columns used to relate two tables."""
290
296
  table1 = self.name_to_table(table1)
291
297
  table2 = self.name_to_table(table2)
292
298
  relationships = [
293
- (fk.foreign_key_columns[0], fk.referenced_columns[0])
294
- for fk in table1.foreign_keys
295
- if fk.pk_table == table2
299
+ (fk.foreign_key_columns[0], fk.referenced_columns[0]) for fk in table1.foreign_keys if fk.pk_table == table2
296
300
  ]
297
301
  relationships.extend(
298
- [
299
- (fk.referenced_columns[0], fk.foreign_key_columns[0])
300
- for fk in table1.referenced_by
301
- if fk.table == table2
302
- ]
302
+ [(fk.referenced_columns[0], fk.foreign_key_columns[0]) for fk in table1.referenced_by if fk.table == table2]
303
303
  )
304
304
  if len(relationships) != 1:
305
- raise DerivaMLException(
306
- f"Ambiguous linkage between {table1.name} and {table2.name}"
307
- )
305
+ raise DerivaMLException(f"Ambiguous linkage between {table1.name} and {table2.name}")
308
306
  return relationships[0]
309
307
 
310
308
  def _schema_to_paths(
311
309
  self,
312
- root: Table = None,
313
- path: Optional[list[Table]] = None,
310
+ root: Table | None = None,
311
+ path: list[Table] | None = None,
314
312
  ) -> list[list[Table]]:
315
- """Recursively walk over the domain schema graph and extend the current path.
316
-
317
- Walk a schema graph and return a list all the paths through the graph.
313
+ """Return a list of paths through the schema graph.
318
314
 
319
315
  Args:
320
- path: Source path so far
316
+ root: The root table to start from.
317
+ path: The current path being built.
321
318
 
322
319
  Returns:
323
- A list of all the paths through the graph. Each path is a list of tables.
324
-
320
+ A list of paths through the schema graph.
325
321
  """
322
+ path = path or []
326
323
 
327
324
  root = root or self.model.schemas[self.ml_schema].tables["Dataset"]
328
325
  path = path.copy() if path else []
@@ -332,21 +329,11 @@ class DerivaModel:
332
329
 
333
330
  def find_arcs(table: Table) -> set[Table]:
334
331
  """Given a path through the model, return the FKs that link the tables"""
335
- arc_list = [fk.pk_table for fk in table.foreign_keys] + [
336
- fk.table for fk in table.referenced_by
337
- ]
338
- arc_list = [
339
- t
340
- for t in arc_list
341
- if t.schema.name in {self.domain_schema, self.ml_schema}
342
- ]
332
+ arc_list = [fk.pk_table for fk in table.foreign_keys] + [fk.table for fk in table.referenced_by]
333
+ arc_list = [t for t in arc_list if t.schema.name in {self.domain_schema, self.ml_schema}]
343
334
  domain_tables = [t for t in arc_list if t.schema.name == self.domain_schema]
344
- if multiple_columns := [
345
- c for c, cnt in Counter(domain_tables).items() if cnt > 1
346
- ]:
347
- raise DerivaMLException(
348
- f"Ambiguous relationship in {table.name} {multiple_columns}"
349
- )
335
+ if multiple_columns := [c for c, cnt in Counter(domain_tables).items() if cnt > 1]:
336
+ raise DerivaMLException(f"Ambiguous relationship in {table.name} {multiple_columns}")
350
337
  return set(arc_list)
351
338
 
352
339
  def is_nested_dataset_loopback(n1: Table, n2: Table) -> bool:
@@ -354,9 +341,7 @@ class DerivaModel:
354
341
  # If we have node_name <- node_name_dataset-> Dataset then we are looping
355
342
  # back around to a new dataset element
356
343
  dataset_table = self.model.schemas[self.ml_schema].tables["Dataset"]
357
- assoc_table = [
358
- a for a in dataset_table.find_associations() if a.table == n2
359
- ]
344
+ assoc_table = [a for a in dataset_table.find_associations() if a.table == n2]
360
345
  return len(assoc_table) == 1 and n1 != dataset_table
361
346
 
362
347
  # Don't follow vocabulary terms back to their use.
@@ -372,9 +357,7 @@ class DerivaModel:
372
357
  if is_nested_dataset_loopback(root, child):
373
358
  continue
374
359
  if child in path:
375
- raise DerivaMLException(
376
- f"Cycle in schema path: {child.name} path:{[p.name for p in path]}"
377
- )
360
+ raise DerivaMLException(f"Cycle in schema path: {child.name} path:{[p.name for p in path]}")
378
361
 
379
362
  paths.extend(self._schema_to_paths(child, path))
380
363
  return paths
@@ -382,6 +365,4 @@ class DerivaModel:
382
365
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
383
366
  def create_table(self, table_def: TableDefinition) -> Table:
384
367
  """Create a new table from TableDefinition."""
385
- return self.model.schemas[self.domain_schema].create_table(
386
- table_def.model_dump()
387
- )
368
+ return self.model.schemas[self.domain_schema].create_table(table_def.model_dump())