deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +186 -105
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +545 -244
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +224 -35
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.9.dist-info/RECORD +0 -45
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"""Vocabulary management mixin for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides the VocabularyMixin class which handles vocabulary
|
|
4
|
+
term operations including adding, looking up, and listing terms in
|
|
5
|
+
controlled vocabulary tables.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
11
|
+
|
|
12
|
+
# Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
|
|
13
|
+
import importlib
|
|
14
|
+
_datapath = importlib.import_module("deriva.core.datapath")
|
|
15
|
+
_ermrest_model = importlib.import_module("deriva.core.ermrest_model")
|
|
16
|
+
DataPathException = _datapath.DataPathException
|
|
17
|
+
Table = _ermrest_model.Table
|
|
18
|
+
|
|
19
|
+
from pydantic import ConfigDict, validate_call
|
|
20
|
+
|
|
21
|
+
from deriva_ml.core.definitions import MLVocab, VocabularyTerm, VocabularyTermHandle
|
|
22
|
+
from deriva_ml.core.exceptions import (
|
|
23
|
+
DerivaMLException,
|
|
24
|
+
DerivaMLInvalidTerm,
|
|
25
|
+
DerivaMLTableTypeError,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from deriva_ml.model.catalog import DerivaModel
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Type alias for the vocabulary cache structure
|
|
33
|
+
# Maps (schema_name, table_name) -> {term_name -> VocabularyTermHandle, synonym -> VocabularyTermHandle}
|
|
34
|
+
VocabCache = dict[tuple[str, str], dict[str, VocabularyTermHandle]]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VocabularyMixin:
|
|
38
|
+
"""Mixin providing vocabulary/term management operations.
|
|
39
|
+
|
|
40
|
+
This mixin requires the host class to have:
|
|
41
|
+
- model: DerivaModel instance
|
|
42
|
+
- pathBuilder(): method returning catalog path builder
|
|
43
|
+
|
|
44
|
+
Methods:
|
|
45
|
+
add_term: Add a new term to a vocabulary table
|
|
46
|
+
lookup_term: Find a term by name or synonym
|
|
47
|
+
list_vocabulary_terms: List all terms in a vocabulary table
|
|
48
|
+
clear_vocabulary_cache: Clear the vocabulary term cache
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# Type hints for IDE support - actual attributes/methods from host class
|
|
52
|
+
model: "DerivaModel"
|
|
53
|
+
pathBuilder: Callable[[], Any]
|
|
54
|
+
|
|
55
|
+
# Vocabulary term cache: maps (schema, table) -> {name_or_synonym -> VocabularyTerm}
|
|
56
|
+
_vocab_cache: VocabCache
|
|
57
|
+
|
|
58
|
+
def _get_vocab_cache(self) -> VocabCache:
|
|
59
|
+
"""Get the vocabulary cache, initializing if needed."""
|
|
60
|
+
if not hasattr(self, "_vocab_cache"):
|
|
61
|
+
self._vocab_cache = {}
|
|
62
|
+
return self._vocab_cache
|
|
63
|
+
|
|
64
|
+
def clear_vocabulary_cache(self, table: str | Table | None = None) -> None:
|
|
65
|
+
"""Clear the vocabulary term cache.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
table: If provided, only clear cache for this specific vocabulary table.
|
|
69
|
+
If None, clear the entire cache.
|
|
70
|
+
"""
|
|
71
|
+
cache = self._get_vocab_cache()
|
|
72
|
+
if table is None:
|
|
73
|
+
cache.clear()
|
|
74
|
+
else:
|
|
75
|
+
vocab_table = self.model.name_to_table(table)
|
|
76
|
+
cache_key = (vocab_table.schema.name, vocab_table.name)
|
|
77
|
+
cache.pop(cache_key, None)
|
|
78
|
+
|
|
79
|
+
def _populate_vocab_cache(self, schema_name: str, table_name: str) -> dict[str, VocabularyTermHandle]:
|
|
80
|
+
"""Fetch all terms from a vocabulary table and populate the cache.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Dictionary mapping term names and synonyms to VocabularyTermHandle objects.
|
|
84
|
+
"""
|
|
85
|
+
cache = self._get_vocab_cache()
|
|
86
|
+
cache_key = (schema_name, table_name)
|
|
87
|
+
|
|
88
|
+
# Fetch all terms from the server
|
|
89
|
+
schema_path = self.pathBuilder().schemas[schema_name]
|
|
90
|
+
term_lookup: dict[str, VocabularyTermHandle] = {}
|
|
91
|
+
|
|
92
|
+
for term_data in schema_path.tables[table_name].entities().fetch():
|
|
93
|
+
term = VocabularyTermHandle(ml=self, table=table_name, **term_data)
|
|
94
|
+
# Index by primary name
|
|
95
|
+
term_lookup[term.name] = term
|
|
96
|
+
# Also index by each synonym
|
|
97
|
+
if term.synonyms:
|
|
98
|
+
for synonym in term.synonyms:
|
|
99
|
+
term_lookup[synonym] = term
|
|
100
|
+
|
|
101
|
+
cache[cache_key] = term_lookup
|
|
102
|
+
return term_lookup
|
|
103
|
+
|
|
104
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
105
|
+
def add_term(
|
|
106
|
+
self,
|
|
107
|
+
table: str | Table,
|
|
108
|
+
term_name: str,
|
|
109
|
+
description: str,
|
|
110
|
+
synonyms: list[str] | None = None,
|
|
111
|
+
exists_ok: bool = True,
|
|
112
|
+
) -> VocabularyTermHandle:
|
|
113
|
+
"""Adds a term to a vocabulary table.
|
|
114
|
+
|
|
115
|
+
Creates a new standardized term with description and optional synonyms in a vocabulary table.
|
|
116
|
+
Can either create a new term or return an existing one if it already exists.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
table: Vocabulary table to add term to (name or Table object).
|
|
120
|
+
term_name: Primary name of the term (must be unique within vocabulary).
|
|
121
|
+
description: Explanation of term's meaning and usage.
|
|
122
|
+
synonyms: Alternative names for the term.
|
|
123
|
+
exists_ok: If True, return the existing term if found. If False, raise error.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
VocabularyTermHandle: Object representing the created or existing term, with
|
|
127
|
+
methods to modify it in the catalog.
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
DerivaMLException: If a term exists and exists_ok=False, or if the table is not a vocabulary table.
|
|
131
|
+
|
|
132
|
+
Examples:
|
|
133
|
+
Add a new tissue type:
|
|
134
|
+
>>> term = ml.add_term(
|
|
135
|
+
... table="tissue_types",
|
|
136
|
+
... term_name="epithelial",
|
|
137
|
+
... description="Epithelial tissue type",
|
|
138
|
+
... synonyms=["epithelium"]
|
|
139
|
+
... )
|
|
140
|
+
>>> # Modify the term
|
|
141
|
+
>>> term.description = "Updated description"
|
|
142
|
+
>>> term.synonyms = ("epithelium", "epithelial_tissue")
|
|
143
|
+
|
|
144
|
+
Attempt to add an existing term:
|
|
145
|
+
>>> term = ml.add_term("tissue_types", "epithelial", "...", exists_ok=True)
|
|
146
|
+
"""
|
|
147
|
+
# Initialize an empty synonyms list if None
|
|
148
|
+
synonyms = synonyms or []
|
|
149
|
+
|
|
150
|
+
# Get table reference and validate if it is a vocabulary table
|
|
151
|
+
vocab_table = self.model.name_to_table(table)
|
|
152
|
+
pb = self.pathBuilder()
|
|
153
|
+
if not (self.model.is_vocabulary(vocab_table)):
|
|
154
|
+
raise DerivaMLTableTypeError("vocabulary", vocab_table.name)
|
|
155
|
+
|
|
156
|
+
# Get schema and table names for path building
|
|
157
|
+
schema_name = vocab_table.schema.name
|
|
158
|
+
table_name = vocab_table.name
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
# Attempt to insert a new term
|
|
162
|
+
term_data = pb.schemas[schema_name].tables[table_name].insert(
|
|
163
|
+
[
|
|
164
|
+
{
|
|
165
|
+
"Name": term_name,
|
|
166
|
+
"Description": description,
|
|
167
|
+
"Synonyms": synonyms,
|
|
168
|
+
}
|
|
169
|
+
],
|
|
170
|
+
defaults={"ID", "URI"},
|
|
171
|
+
)[0]
|
|
172
|
+
term_handle = VocabularyTermHandle(ml=self, table=table_name, **term_data)
|
|
173
|
+
# Invalidate cache for this vocabulary since we added a new term
|
|
174
|
+
self.clear_vocabulary_cache(vocab_table)
|
|
175
|
+
return term_handle
|
|
176
|
+
except DataPathException:
|
|
177
|
+
# Term exists - look it up or raise an error
|
|
178
|
+
if not exists_ok:
|
|
179
|
+
raise DerivaMLInvalidTerm(vocab_table.name, term_name, msg="term already exists")
|
|
180
|
+
return self.lookup_term(vocab_table, term_name)
|
|
181
|
+
|
|
182
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
183
|
+
def lookup_term(self, table: str | Table, term_name: str) -> VocabularyTermHandle:
|
|
184
|
+
"""Finds a term in a vocabulary table.
|
|
185
|
+
|
|
186
|
+
Searches for a term in the specified vocabulary table, matching either the primary name
|
|
187
|
+
or any of its synonyms. Results are cached for performance - subsequent lookups in the
|
|
188
|
+
same vocabulary table are served from cache.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
table: Vocabulary table to search in (name or Table object).
|
|
192
|
+
term_name: Name or synonym of the term to find.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
VocabularyTermHandle: The matching vocabulary term, with methods to modify it.
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
DerivaMLVocabularyException: If the table is not a vocabulary table, or term is not found.
|
|
199
|
+
|
|
200
|
+
Examples:
|
|
201
|
+
Look up by primary name:
|
|
202
|
+
>>> term = ml.lookup_term("tissue_types", "epithelial")
|
|
203
|
+
>>> print(term.description)
|
|
204
|
+
|
|
205
|
+
Look up by synonym:
|
|
206
|
+
>>> term = ml.lookup_term("tissue_types", "epithelium")
|
|
207
|
+
|
|
208
|
+
Modify the term:
|
|
209
|
+
>>> term = ml.lookup_term("tissue_types", "epithelial")
|
|
210
|
+
>>> term.description = "Updated description"
|
|
211
|
+
>>> term.synonyms = ("epithelium", "epithelial_tissue")
|
|
212
|
+
"""
|
|
213
|
+
# Get and validate vocabulary table reference
|
|
214
|
+
vocab_table = self.model.name_to_table(table)
|
|
215
|
+
if not self.model.is_vocabulary(vocab_table):
|
|
216
|
+
raise DerivaMLException(f"The table {table} is not a controlled vocabulary")
|
|
217
|
+
|
|
218
|
+
# Get schema and table names
|
|
219
|
+
schema_name, table_name = vocab_table.schema.name, vocab_table.name
|
|
220
|
+
cache_key = (schema_name, table_name)
|
|
221
|
+
|
|
222
|
+
# Check cache first
|
|
223
|
+
cache = self._get_vocab_cache()
|
|
224
|
+
if cache_key in cache:
|
|
225
|
+
term_lookup = cache[cache_key]
|
|
226
|
+
if term_name in term_lookup:
|
|
227
|
+
return term_lookup[term_name]
|
|
228
|
+
# Term not in cache - might be newly added, try server-side lookup
|
|
229
|
+
else:
|
|
230
|
+
# Vocabulary not cached yet - try server-side lookup first for single term
|
|
231
|
+
term = self._server_lookup_term(schema_name, table_name, term_name)
|
|
232
|
+
if term is not None:
|
|
233
|
+
# Found it - populate the full cache for future lookups
|
|
234
|
+
self._populate_vocab_cache(schema_name, table_name)
|
|
235
|
+
return self._get_vocab_cache()[cache_key][term_name]
|
|
236
|
+
# Not found by name - need to check synonyms, populate cache
|
|
237
|
+
term_lookup = self._populate_vocab_cache(schema_name, table_name)
|
|
238
|
+
if term_name in term_lookup:
|
|
239
|
+
return term_lookup[term_name]
|
|
240
|
+
raise DerivaMLInvalidTerm(table_name, term_name)
|
|
241
|
+
|
|
242
|
+
# Term not in cache - try server-side lookup (might be newly added)
|
|
243
|
+
term = self._server_lookup_term(schema_name, table_name, term_name)
|
|
244
|
+
if term is not None:
|
|
245
|
+
# Refresh cache to get the VocabularyTermHandle
|
|
246
|
+
self._populate_vocab_cache(schema_name, table_name)
|
|
247
|
+
return self._get_vocab_cache()[cache_key][term_name]
|
|
248
|
+
|
|
249
|
+
# Still not found - refresh cache and try one more time
|
|
250
|
+
term_lookup = self._populate_vocab_cache(schema_name, table_name)
|
|
251
|
+
if term_name in term_lookup:
|
|
252
|
+
return term_lookup[term_name]
|
|
253
|
+
|
|
254
|
+
# Term not found
|
|
255
|
+
raise DerivaMLInvalidTerm(table_name, term_name)
|
|
256
|
+
|
|
257
|
+
def _server_lookup_term(
|
|
258
|
+
self, schema_name: str, table_name: str, term_name: str
|
|
259
|
+
) -> VocabularyTermHandle | None:
|
|
260
|
+
"""Look up a term by name using server-side filtering.
|
|
261
|
+
|
|
262
|
+
This performs a targeted server query for a specific term name.
|
|
263
|
+
Does NOT check synonyms (that requires client-side filtering).
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
schema_name: Schema containing the vocabulary table.
|
|
267
|
+
table_name: Vocabulary table name.
|
|
268
|
+
term_name: Primary name of the term to find.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
VocabularyTermHandle if found by exact name match, None otherwise.
|
|
272
|
+
"""
|
|
273
|
+
schema_path = self.pathBuilder().schemas[schema_name]
|
|
274
|
+
table_path = schema_path.tables[table_name]
|
|
275
|
+
|
|
276
|
+
# Server-side filter by Name
|
|
277
|
+
results = list(table_path.filter(table_path.Name == term_name).entities().fetch())
|
|
278
|
+
if results:
|
|
279
|
+
return VocabularyTermHandle(ml=self, table=table_name, **results[0])
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
def list_vocabulary_terms(self, table: str | Table) -> list[VocabularyTerm]:
|
|
283
|
+
"""Lists all terms in a vocabulary table.
|
|
284
|
+
|
|
285
|
+
Retrieves all terms, their descriptions, and synonyms from a controlled vocabulary table.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
table: Vocabulary table to list terms from (name or Table object).
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
list[VocabularyTerm]: List of vocabulary terms with their metadata.
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
DerivaMLException: If table doesn't exist or is not a vocabulary table.
|
|
295
|
+
|
|
296
|
+
Examples:
|
|
297
|
+
>>> terms = ml.list_vocabulary_terms("tissue_types")
|
|
298
|
+
>>> for term in terms:
|
|
299
|
+
... print(f"{term.name}: {term.description}")
|
|
300
|
+
... if term.synonyms:
|
|
301
|
+
... print(f" Synonyms: {', '.join(term.synonyms)}")
|
|
302
|
+
"""
|
|
303
|
+
# Get path builder and table reference
|
|
304
|
+
pb = self.pathBuilder()
|
|
305
|
+
table = self.model.name_to_table(table.value if isinstance(table, MLVocab) else table)
|
|
306
|
+
|
|
307
|
+
# Validate table is a vocabulary table
|
|
308
|
+
if not (self.model.is_vocabulary(table)):
|
|
309
|
+
raise DerivaMLException(f"The table {table} is not a controlled vocabulary")
|
|
310
|
+
|
|
311
|
+
# Fetch and convert all terms to VocabularyTerm objects
|
|
312
|
+
return [VocabularyTerm(**v) for v in pb.schemas[table.schema.name].tables[table.name].entities().fetch()]
|
|
313
|
+
|
|
314
|
+
def _update_term_synonyms(self, table: str | Table, term_name: str, synonyms: list[str]) -> None:
|
|
315
|
+
"""Internal: Update synonyms for a vocabulary term.
|
|
316
|
+
|
|
317
|
+
Called by VocabularyTermHandle.synonyms setter.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
table: Vocabulary table containing the term.
|
|
321
|
+
term_name: Primary name of the term to update.
|
|
322
|
+
synonyms: New list of synonyms (replaces all existing).
|
|
323
|
+
"""
|
|
324
|
+
# Look up the term to get its RID
|
|
325
|
+
term = self.lookup_term(table, term_name)
|
|
326
|
+
|
|
327
|
+
# Update the term in the catalog
|
|
328
|
+
vocab_table = self.model.name_to_table(table)
|
|
329
|
+
pb = self.pathBuilder()
|
|
330
|
+
table_path = pb.schemas[vocab_table.schema.name].tables[vocab_table.name]
|
|
331
|
+
table_path.update([{"RID": term.rid, "Synonyms": synonyms}])
|
|
332
|
+
|
|
333
|
+
# Invalidate cache
|
|
334
|
+
self.clear_vocabulary_cache(table)
|
|
335
|
+
|
|
336
|
+
def _update_term_description(self, table: str | Table, term_name: str, description: str) -> None:
|
|
337
|
+
"""Internal: Update description for a vocabulary term.
|
|
338
|
+
|
|
339
|
+
Called by VocabularyTermHandle.description setter.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
table: Vocabulary table containing the term.
|
|
343
|
+
term_name: Primary name of the term to update.
|
|
344
|
+
description: New description for the term.
|
|
345
|
+
"""
|
|
346
|
+
# Look up the term to get its RID
|
|
347
|
+
term = self.lookup_term(table, term_name)
|
|
348
|
+
|
|
349
|
+
# Update the term in the catalog
|
|
350
|
+
vocab_table = self.model.name_to_table(table)
|
|
351
|
+
pb = self.pathBuilder()
|
|
352
|
+
table_path = pb.schemas[vocab_table.schema.name].tables[vocab_table.name]
|
|
353
|
+
table_path.update([{"RID": term.rid, "Description": description}])
|
|
354
|
+
|
|
355
|
+
# Invalidate cache
|
|
356
|
+
self.clear_vocabulary_cache(table)
|
|
357
|
+
|
|
358
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
359
|
+
def delete_term(self, table: str | Table, term_name: str) -> None:
|
|
360
|
+
"""Delete a term from a vocabulary table.
|
|
361
|
+
|
|
362
|
+
Removes a term from the vocabulary. The term must not be in use by any
|
|
363
|
+
records in the catalog (e.g., no datasets using this dataset type, no
|
|
364
|
+
assets using this asset type).
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
table: Vocabulary table containing the term (name or Table object).
|
|
368
|
+
term_name: Primary name of the term to delete.
|
|
369
|
+
|
|
370
|
+
Raises:
|
|
371
|
+
DerivaMLInvalidTerm: If the term doesn't exist in the vocabulary.
|
|
372
|
+
DerivaMLException: If the term is currently in use by other records.
|
|
373
|
+
|
|
374
|
+
Example:
|
|
375
|
+
>>> ml.delete_term("Dataset_Type", "Obsolete_Type")
|
|
376
|
+
"""
|
|
377
|
+
# Look up the term (validates table and term existence)
|
|
378
|
+
term = self.lookup_term(table, term_name)
|
|
379
|
+
vocab_table = self.model.name_to_table(table)
|
|
380
|
+
|
|
381
|
+
# Check if the term is in use by examining association tables
|
|
382
|
+
associations = list(vocab_table.find_associations())
|
|
383
|
+
pb = self.pathBuilder()
|
|
384
|
+
|
|
385
|
+
for assoc in associations:
|
|
386
|
+
assoc_path = pb.schemas[assoc.schema.name].tables[assoc.name]
|
|
387
|
+
# Check if any rows reference this term
|
|
388
|
+
count = len(list(assoc_path.filter(getattr(assoc_path, vocab_table.name) == term.name).entities().fetch()))
|
|
389
|
+
if count > 0:
|
|
390
|
+
raise DerivaMLException(
|
|
391
|
+
f"Cannot delete term '{term_name}' from {vocab_table.name}: "
|
|
392
|
+
f"it is referenced by {count} record(s) in {assoc.name}"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# No references found - safe to delete
|
|
396
|
+
table_path = pb.schemas[vocab_table.schema.name].tables[vocab_table.name]
|
|
397
|
+
table_path.filter(table_path.RID == term.rid).delete()
|
|
398
|
+
|
|
399
|
+
# Invalidate cache
|
|
400
|
+
self.clear_vocabulary_cache(table)
|