deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +186 -105
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +545 -244
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +224 -35
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.9.dist-info/RECORD +0 -45
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/core/enums.py
CHANGED
|
@@ -7,7 +7,7 @@ Classes:
|
|
|
7
7
|
BaseStrEnum: Base class for string-based enums.
|
|
8
8
|
UploadState: States for file upload operations.
|
|
9
9
|
Status: Execution status values.
|
|
10
|
-
BuiltinTypes:
|
|
10
|
+
BuiltinTypes: Alias for BuiltinType from deriva.core.typed.
|
|
11
11
|
MLVocab: Controlled vocabulary types.
|
|
12
12
|
MLAsset: Asset type identifiers.
|
|
13
13
|
ExecMetadataType: Execution metadata type identifiers.
|
|
@@ -16,7 +16,16 @@ Classes:
|
|
|
16
16
|
|
|
17
17
|
from enum import Enum
|
|
18
18
|
|
|
19
|
-
from deriva.core.
|
|
19
|
+
# Import BuiltinType from deriva.core.typed
|
|
20
|
+
from deriva.core.typed import BuiltinType
|
|
21
|
+
|
|
22
|
+
# Backwards compatibility alias - DerivaML uses plural form
|
|
23
|
+
BuiltinTypes = BuiltinType
|
|
24
|
+
"""Alias for BuiltinType from deriva.core.typed.
|
|
25
|
+
|
|
26
|
+
This maintains backwards compatibility with existing DerivaML code that uses
|
|
27
|
+
the plural form 'BuiltinTypes'. New code should use BuiltinType directly.
|
|
28
|
+
"""
|
|
20
29
|
|
|
21
30
|
|
|
22
31
|
class BaseStrEnum(str, Enum):
|
|
@@ -85,78 +94,19 @@ class Status(BaseStrEnum):
|
|
|
85
94
|
failed = "Failed"
|
|
86
95
|
|
|
87
96
|
|
|
88
|
-
class BuiltinTypes(Enum):
|
|
89
|
-
"""ERMrest built-in data types.
|
|
90
|
-
|
|
91
|
-
Maps ERMrest's built-in data types to their type names. These types are used for defining
|
|
92
|
-
column types in tables and for type validation.
|
|
93
|
-
|
|
94
|
-
Attributes:
|
|
95
|
-
text (str): Text/string type.
|
|
96
|
-
int2 (str): 16-bit integer.
|
|
97
|
-
jsonb (str): Binary JSON.
|
|
98
|
-
float8 (str): 64-bit float.
|
|
99
|
-
timestamp (str): Timestamp without timezone.
|
|
100
|
-
int8 (str): 64-bit integer.
|
|
101
|
-
boolean (str): Boolean type.
|
|
102
|
-
json (str): JSON type.
|
|
103
|
-
float4 (str): 32-bit float.
|
|
104
|
-
int4 (str): 32-bit integer.
|
|
105
|
-
timestamptz (str): Timestamp with timezone.
|
|
106
|
-
date (str): Date type.
|
|
107
|
-
ermrest_rid (str): Resource identifier.
|
|
108
|
-
ermrest_rcb (str): Record created by.
|
|
109
|
-
ermrest_rmb (str): Record modified by.
|
|
110
|
-
ermrest_rct (str): Record creation time.
|
|
111
|
-
ermrest_rmt (str): Record modification time.
|
|
112
|
-
markdown (str): Markdown text.
|
|
113
|
-
longtext (str): Long text.
|
|
114
|
-
ermrest_curie (str): Compact URI.
|
|
115
|
-
ermrest_uri (str): URI type.
|
|
116
|
-
color_rgb_hex (str): RGB color in hex.
|
|
117
|
-
serial2 (str): 16-bit auto-incrementing.
|
|
118
|
-
serial4 (str): 32-bit auto-incrementing.
|
|
119
|
-
serial8 (str): 64-bit auto-incrementing.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
text = builtin_types.text.typename
|
|
123
|
-
int2 = builtin_types.int2.typename
|
|
124
|
-
jsonb = builtin_types.json.typename
|
|
125
|
-
float8 = builtin_types.float8.typename
|
|
126
|
-
timestamp = builtin_types.timestamp.typename
|
|
127
|
-
int8 = builtin_types.int8.typename
|
|
128
|
-
boolean = builtin_types.boolean.typename
|
|
129
|
-
json = builtin_types.json.typename
|
|
130
|
-
float4 = builtin_types.float4.typename
|
|
131
|
-
int4 = builtin_types.int4.typename
|
|
132
|
-
timestamptz = builtin_types.timestamptz.typename
|
|
133
|
-
date = builtin_types.date.typename
|
|
134
|
-
ermrest_rid = builtin_types.ermrest_rid.typename
|
|
135
|
-
ermrest_rcb = builtin_types.ermrest_rcb.typename
|
|
136
|
-
ermrest_rmb = builtin_types.ermrest_rmb.typename
|
|
137
|
-
ermrest_rct = builtin_types.ermrest_rct.typename
|
|
138
|
-
ermrest_rmt = builtin_types.ermrest_rmt.typename
|
|
139
|
-
markdown = builtin_types.markdown.typename
|
|
140
|
-
longtext = builtin_types.longtext.typename
|
|
141
|
-
ermrest_curie = builtin_types.ermrest_curie.typename
|
|
142
|
-
ermrest_uri = builtin_types.ermrest_uri.typename
|
|
143
|
-
color_rgb_hex = builtin_types.color_rgb_hex.typename
|
|
144
|
-
serial2 = builtin_types.serial2.typename
|
|
145
|
-
serial4 = builtin_types.serial4.typename
|
|
146
|
-
serial8 = builtin_types.serial8.typename
|
|
147
|
-
|
|
148
|
-
|
|
149
97
|
class MLVocab(BaseStrEnum):
|
|
150
|
-
"""Controlled vocabulary
|
|
98
|
+
"""Controlled vocabulary table identifiers.
|
|
151
99
|
|
|
152
|
-
Defines the names of controlled vocabulary tables used in DerivaML
|
|
153
|
-
|
|
100
|
+
Defines the names of controlled vocabulary tables used in DerivaML. These tables
|
|
101
|
+
store standardized terms with descriptions and synonyms for consistent data
|
|
102
|
+
classification across the catalog.
|
|
154
103
|
|
|
155
104
|
Attributes:
|
|
156
|
-
dataset_type (str): Dataset classification vocabulary.
|
|
157
|
-
workflow_type (str): Workflow classification vocabulary.
|
|
158
|
-
asset_type (str): Asset classification vocabulary.
|
|
159
|
-
asset_role (str): Asset role
|
|
105
|
+
dataset_type (str): Dataset classification vocabulary (e.g., "Training", "Test").
|
|
106
|
+
workflow_type (str): Workflow classification vocabulary (e.g., "Python", "Notebook").
|
|
107
|
+
asset_type (str): Asset/file type classification vocabulary (e.g., "Image", "CSV").
|
|
108
|
+
asset_role (str): Asset role vocabulary for execution relationships (e.g., "Input", "Output").
|
|
109
|
+
feature_name (str): Feature name vocabulary for ML feature definitions.
|
|
160
110
|
"""
|
|
161
111
|
|
|
162
112
|
dataset_type = "Dataset_Type"
|
|
@@ -181,11 +131,29 @@ class MLAsset(BaseStrEnum):
|
|
|
181
131
|
|
|
182
132
|
|
|
183
133
|
class MLTable(BaseStrEnum):
|
|
134
|
+
"""Core ML schema table identifiers.
|
|
135
|
+
|
|
136
|
+
Defines the names of the core tables in the deriva-ml schema. These tables
|
|
137
|
+
form the backbone of the ML workflow tracking system.
|
|
138
|
+
|
|
139
|
+
Attributes:
|
|
140
|
+
dataset (str): Dataset table for versioned data collections.
|
|
141
|
+
workflow (str): Workflow table for computational pipeline definitions.
|
|
142
|
+
file (str): File table for tracking individual files.
|
|
143
|
+
asset (str): Asset table for domain-specific file types.
|
|
144
|
+
execution (str): Execution table for workflow run tracking.
|
|
145
|
+
execution_execution (str): Execution_Execution table for nested executions.
|
|
146
|
+
dataset_version (str): Dataset_Version table for version history.
|
|
147
|
+
execution_metadata (str): Execution_Metadata table for run metadata.
|
|
148
|
+
execution_asset (str): Execution_Asset table for run outputs.
|
|
149
|
+
"""
|
|
150
|
+
|
|
184
151
|
dataset = "Dataset"
|
|
185
152
|
workflow = "Workflow"
|
|
186
153
|
file = "File"
|
|
187
154
|
asset = "Asset"
|
|
188
155
|
execution = "Execution"
|
|
156
|
+
execution_execution = "Execution_Execution"
|
|
189
157
|
dataset_version = "Dataset_Version"
|
|
190
158
|
execution_metadata = "Execution_Metadata"
|
|
191
159
|
execution_asset = "Execution_Asset"
|
|
@@ -197,23 +165,29 @@ class ExecMetadataType(BaseStrEnum):
|
|
|
197
165
|
Defines the types of metadata that can be associated with an execution.
|
|
198
166
|
|
|
199
167
|
Attributes:
|
|
200
|
-
execution_config (str):
|
|
168
|
+
execution_config (str): General execution configuration data.
|
|
201
169
|
runtime_env (str): Runtime environment information.
|
|
170
|
+
hydra_config (str): Hydra YAML configuration files (config.yaml, overrides.yaml).
|
|
171
|
+
deriva_config (str): DerivaML execution configuration (configuration.json).
|
|
202
172
|
"""
|
|
203
173
|
|
|
204
174
|
execution_config = "Execution_Config"
|
|
205
175
|
runtime_env = "Runtime_Env"
|
|
176
|
+
hydra_config = "Hydra_Config"
|
|
177
|
+
deriva_config = "Deriva_Config"
|
|
206
178
|
|
|
207
179
|
|
|
208
180
|
class ExecAssetType(BaseStrEnum):
|
|
209
181
|
"""Execution asset type identifiers.
|
|
210
182
|
|
|
211
|
-
Defines the types of assets that can be produced during an execution.
|
|
183
|
+
Defines the types of assets that can be produced or consumed during an execution.
|
|
184
|
+
These types are used to categorize files associated with workflow runs.
|
|
212
185
|
|
|
213
186
|
Attributes:
|
|
214
|
-
input_file (str): Input file
|
|
187
|
+
input_file (str): Input file consumed by the execution.
|
|
215
188
|
output_file (str): Output file produced by the execution.
|
|
216
189
|
notebook_output (str): Jupyter notebook output from the execution.
|
|
190
|
+
model_file (str): Machine learning model file (e.g., .pkl, .h5, .pt).
|
|
217
191
|
"""
|
|
218
192
|
|
|
219
193
|
input_file = "Input_File"
|
deriva_ml/core/ermrest.py
CHANGED
|
@@ -1,34 +1,71 @@
|
|
|
1
1
|
"""ERMrest data models for DerivaML.
|
|
2
2
|
|
|
3
|
-
This module provides
|
|
3
|
+
This module provides models that represent ERMrest catalog structures. These models are used
|
|
4
4
|
throughout DerivaML for defining and manipulating catalog elements like tables, columns, and keys.
|
|
5
5
|
|
|
6
|
+
The core definition classes (ColumnDef, KeyDef, ForeignKeyDef, TableDef) are now provided by
|
|
7
|
+
`deriva.core.typed` and re-exported here for backwards compatibility.
|
|
8
|
+
|
|
6
9
|
Classes:
|
|
7
10
|
FileUploadState: Tracks the state of file uploads.
|
|
8
11
|
VocabularyTerm: Represents terms in controlled vocabularies.
|
|
9
|
-
ColumnDefinition:
|
|
10
|
-
KeyDefinition:
|
|
11
|
-
ForeignKeyDefinition:
|
|
12
|
-
TableDefinition:
|
|
12
|
+
ColumnDefinition: Alias for ColumnDef from deriva.core.typed.
|
|
13
|
+
KeyDefinition: Alias for KeyDef from deriva.core.typed.
|
|
14
|
+
ForeignKeyDefinition: Alias for ForeignKeyDef from deriva.core.typed.
|
|
15
|
+
TableDefinition: Alias for TableDef from deriva.core.typed.
|
|
13
16
|
"""
|
|
14
17
|
|
|
15
18
|
from __future__ import annotations
|
|
16
19
|
|
|
17
20
|
import warnings
|
|
18
|
-
from
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Any, Protocol
|
|
19
23
|
|
|
20
|
-
import deriva.core.ermrest_model as em
|
|
21
|
-
from deriva.core.ermrest_model import builtin_types
|
|
22
24
|
from pydantic import (
|
|
23
25
|
BaseModel,
|
|
24
26
|
Field,
|
|
27
|
+
PrivateAttr,
|
|
25
28
|
computed_field,
|
|
26
|
-
field_validator,
|
|
27
|
-
model_serializer,
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
from .constants import RID
|
|
31
|
-
from .enums import
|
|
32
|
+
from .enums import UploadState
|
|
33
|
+
|
|
34
|
+
# Import and re-export typed definitions from deriva.core.typed
|
|
35
|
+
from deriva.core.typed import (
|
|
36
|
+
ColumnDef,
|
|
37
|
+
KeyDef,
|
|
38
|
+
ForeignKeyDef,
|
|
39
|
+
TableDef,
|
|
40
|
+
VocabularyTableDef,
|
|
41
|
+
AssetTableDef,
|
|
42
|
+
AssociationTableDef,
|
|
43
|
+
SchemaDef,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Re-export all typed classes for convenience
|
|
47
|
+
__all__ = [
|
|
48
|
+
# New typed definitions from deriva.core.typed
|
|
49
|
+
"ColumnDef",
|
|
50
|
+
"KeyDef",
|
|
51
|
+
"ForeignKeyDef",
|
|
52
|
+
"TableDef",
|
|
53
|
+
"VocabularyTableDef",
|
|
54
|
+
"AssetTableDef",
|
|
55
|
+
"AssociationTableDef",
|
|
56
|
+
"SchemaDef",
|
|
57
|
+
# Legacy aliases for backwards compatibility
|
|
58
|
+
"ColumnDefinition",
|
|
59
|
+
"KeyDefinition",
|
|
60
|
+
"ForeignKeyDefinition",
|
|
61
|
+
"TableDefinition",
|
|
62
|
+
# DerivaML-specific classes
|
|
63
|
+
"FileUploadState",
|
|
64
|
+
"UploadProgress",
|
|
65
|
+
"UploadCallback",
|
|
66
|
+
"VocabularyTerm",
|
|
67
|
+
"VocabularyTermHandle",
|
|
68
|
+
]
|
|
32
69
|
|
|
33
70
|
# Pydantic warnings suppression
|
|
34
71
|
warnings.filterwarnings("ignore", message='Field name "schema"', category=Warning, module="pydantic")
|
|
@@ -40,6 +77,46 @@ warnings.filterwarnings(
|
|
|
40
77
|
)
|
|
41
78
|
|
|
42
79
|
|
|
80
|
+
# =============================================================================
|
|
81
|
+
# Compatibility Aliases
|
|
82
|
+
# =============================================================================
|
|
83
|
+
# These aliases maintain backwards compatibility with existing DerivaML code
|
|
84
|
+
# that uses the old Pydantic-based class names.
|
|
85
|
+
|
|
86
|
+
ColumnDefinition = ColumnDef
|
|
87
|
+
"""Alias for ColumnDef from deriva.core.typed.
|
|
88
|
+
|
|
89
|
+
This maintains backwards compatibility with existing DerivaML code.
|
|
90
|
+
New code should use ColumnDef directly.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
KeyDefinition = KeyDef
|
|
94
|
+
"""Alias for KeyDef from deriva.core.typed.
|
|
95
|
+
|
|
96
|
+
This maintains backwards compatibility with existing DerivaML code.
|
|
97
|
+
New code should use KeyDef directly.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
ForeignKeyDefinition = ForeignKeyDef
|
|
101
|
+
"""Alias for ForeignKeyDef from deriva.core.typed.
|
|
102
|
+
|
|
103
|
+
This maintains backwards compatibility with existing DerivaML code.
|
|
104
|
+
New code should use ForeignKeyDef directly.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
TableDefinition = TableDef
|
|
108
|
+
"""Alias for TableDef from deriva.core.typed.
|
|
109
|
+
|
|
110
|
+
This maintains backwards compatibility with existing DerivaML code.
|
|
111
|
+
New code should use TableDef directly.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# =============================================================================
|
|
116
|
+
# DerivaML-Specific Classes
|
|
117
|
+
# =============================================================================
|
|
118
|
+
|
|
119
|
+
|
|
43
120
|
class FileUploadState(BaseModel):
|
|
44
121
|
"""Tracks the state and result of a file upload operation.
|
|
45
122
|
|
|
@@ -47,7 +124,6 @@ class FileUploadState(BaseModel):
|
|
|
47
124
|
state (UploadState): Current state of the upload (success, failed, etc.).
|
|
48
125
|
status (str): Detailed status message.
|
|
49
126
|
result (Any): Upload result data, if any.
|
|
50
|
-
rid (RID | None): Resource identifier of the uploaded file, if successful.
|
|
51
127
|
"""
|
|
52
128
|
state: UploadState
|
|
53
129
|
status: str
|
|
@@ -59,6 +135,53 @@ class FileUploadState(BaseModel):
|
|
|
59
135
|
return self.result and self.result["RID"]
|
|
60
136
|
|
|
61
137
|
|
|
138
|
+
@dataclass
|
|
139
|
+
class UploadProgress:
|
|
140
|
+
"""Progress information for file uploads.
|
|
141
|
+
|
|
142
|
+
This dataclass is passed to upload callbacks to report progress during
|
|
143
|
+
file upload operations.
|
|
144
|
+
|
|
145
|
+
Attributes:
|
|
146
|
+
file_path: Path to the file being uploaded.
|
|
147
|
+
file_name: Name of the file being uploaded.
|
|
148
|
+
bytes_completed: Number of bytes uploaded so far.
|
|
149
|
+
bytes_total: Total number of bytes to upload.
|
|
150
|
+
percent_complete: Percentage of upload completed (0-100).
|
|
151
|
+
phase: Current phase of the upload operation.
|
|
152
|
+
message: Human-readable status message.
|
|
153
|
+
"""
|
|
154
|
+
file_path: str = ""
|
|
155
|
+
file_name: str = ""
|
|
156
|
+
bytes_completed: int = 0
|
|
157
|
+
bytes_total: int = 0
|
|
158
|
+
percent_complete: float = 0.0
|
|
159
|
+
phase: str = ""
|
|
160
|
+
message: str = ""
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class UploadCallback(Protocol):
|
|
164
|
+
"""Protocol for upload progress callbacks.
|
|
165
|
+
|
|
166
|
+
Implement this protocol to receive progress updates during file uploads.
|
|
167
|
+
The callback is invoked with an UploadProgress object containing current
|
|
168
|
+
upload state information.
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
>>> def my_callback(progress: UploadProgress) -> None:
|
|
172
|
+
... print(f"Uploading {progress.file_name}: {progress.percent_complete:.1f}%")
|
|
173
|
+
...
|
|
174
|
+
>>> execution.upload_execution_outputs(progress_callback=my_callback)
|
|
175
|
+
"""
|
|
176
|
+
def __call__(self, progress: UploadProgress) -> None:
|
|
177
|
+
"""Called with upload progress information.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
progress: Current upload progress state.
|
|
181
|
+
"""
|
|
182
|
+
...
|
|
183
|
+
|
|
184
|
+
|
|
62
185
|
class VocabularyTerm(BaseModel):
|
|
63
186
|
"""Represents a term in a controlled vocabulary.
|
|
64
187
|
|
|
@@ -83,206 +206,116 @@ class VocabularyTerm(BaseModel):
|
|
|
83
206
|
... RID="1-abc123"
|
|
84
207
|
... )
|
|
85
208
|
"""
|
|
86
|
-
|
|
87
|
-
|
|
209
|
+
_name: str = PrivateAttr()
|
|
210
|
+
_synonyms: list[str] | None = PrivateAttr()
|
|
211
|
+
_description: str = PrivateAttr()
|
|
88
212
|
id: str = Field(alias="ID")
|
|
89
213
|
uri: str = Field(alias="URI")
|
|
90
|
-
description: str = Field(alias="Description")
|
|
91
214
|
rid: str = Field(alias="RID")
|
|
92
215
|
|
|
216
|
+
def __init__(self, **data):
|
|
217
|
+
# Extract fields that will be private attrs before calling super
|
|
218
|
+
name = data.pop("Name", None) or data.pop("name", None)
|
|
219
|
+
synonyms = data.pop("Synonyms", None) or data.pop("synonyms", None)
|
|
220
|
+
description = data.pop("Description", None) or data.pop("description", None)
|
|
221
|
+
super().__init__(**data)
|
|
222
|
+
self._name = name
|
|
223
|
+
self._synonyms = synonyms
|
|
224
|
+
self._description = description
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def name(self) -> str:
|
|
228
|
+
"""Primary name of the term."""
|
|
229
|
+
return self._name
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def synonyms(self) -> tuple[str, ...]:
|
|
233
|
+
"""Alternative names for the term (immutable)."""
|
|
234
|
+
return tuple(self._synonyms or [])
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def description(self) -> str:
|
|
238
|
+
"""Explanation of the term's meaning."""
|
|
239
|
+
return self._description
|
|
240
|
+
|
|
93
241
|
class Config:
|
|
94
242
|
extra = "ignore"
|
|
95
243
|
|
|
96
244
|
|
|
97
|
-
class
|
|
98
|
-
"""
|
|
245
|
+
class VocabularyTermHandle(VocabularyTerm):
|
|
246
|
+
"""A VocabularyTerm with methods to modify it in the catalog.
|
|
99
247
|
|
|
100
|
-
|
|
101
|
-
|
|
248
|
+
This class extends VocabularyTerm to provide mutable access to vocabulary
|
|
249
|
+
terms. Changes made through property setters are persisted to the catalog.
|
|
102
250
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
nullok (bool): Whether NULL values are allowed. Defaults to True.
|
|
107
|
-
default (Any): Default value for the column.
|
|
108
|
-
comment (str | None): Description of the column's purpose.
|
|
109
|
-
acls (dict): Access control lists.
|
|
110
|
-
acl_bindings (dict): Dynamic access control bindings.
|
|
111
|
-
annotations (dict): Additional metadata annotations.
|
|
112
|
-
|
|
113
|
-
Example:
|
|
114
|
-
>>> col = ColumnDefinition(
|
|
115
|
-
... name="score",
|
|
116
|
-
... type=BuiltinTypes.float4,
|
|
117
|
-
... nullok=False,
|
|
118
|
-
... comment="Confidence score between 0 and 1"
|
|
119
|
-
... )
|
|
120
|
-
"""
|
|
121
|
-
name: str
|
|
122
|
-
type: BuiltinTypes
|
|
123
|
-
nullok: bool = True
|
|
124
|
-
default: Any = None
|
|
125
|
-
comment: str | None = None
|
|
126
|
-
acls: dict = Field(default_factory=dict)
|
|
127
|
-
acl_bindings: dict = Field(default_factory=dict)
|
|
128
|
-
annotations: dict = Field(default_factory=dict)
|
|
129
|
-
|
|
130
|
-
@field_validator("type", mode="before")
|
|
131
|
-
@classmethod
|
|
132
|
-
def extract_type_name(cls, value: Any) -> Any:
|
|
133
|
-
if isinstance(value, dict):
|
|
134
|
-
return BuiltinTypes(value["typename"])
|
|
135
|
-
else:
|
|
136
|
-
return value
|
|
137
|
-
|
|
138
|
-
@model_serializer()
|
|
139
|
-
def serialize_column_definition(self):
|
|
140
|
-
return em.Column.define(
|
|
141
|
-
self.name,
|
|
142
|
-
builtin_types[self.type.value],
|
|
143
|
-
nullok=self.nullok,
|
|
144
|
-
default=self.default,
|
|
145
|
-
comment=self.comment,
|
|
146
|
-
acls=self.acls,
|
|
147
|
-
acl_bindings=self.acl_bindings,
|
|
148
|
-
annotations=self.annotations,
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
class KeyDefinition(BaseModel):
|
|
153
|
-
"""Defines a key constraint in an ERMrest table.
|
|
154
|
-
|
|
155
|
-
Provides a Pydantic model for defining primary keys and unique constraints.
|
|
156
|
-
Maps to deriva_py's Key.define functionality.
|
|
251
|
+
The `synonyms` property returns a tuple (immutable) to prevent accidental
|
|
252
|
+
modification without catalog update. To modify synonyms, assign a new
|
|
253
|
+
tuple/list to the property.
|
|
157
254
|
|
|
158
255
|
Attributes:
|
|
159
|
-
|
|
160
|
-
constraint_names (Iterable[str]): Names for the key constraints.
|
|
161
|
-
comment (str | None): Description of the key's purpose.
|
|
162
|
-
annotations (dict): Additional metadata annotations.
|
|
256
|
+
Inherits all attributes from VocabularyTerm.
|
|
163
257
|
|
|
164
258
|
Example:
|
|
165
|
-
>>>
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
... )
|
|
259
|
+
>>> term = ml.lookup_term("Dataset_Type", "Training")
|
|
260
|
+
>>> term.description = "Data used for model training"
|
|
261
|
+
>>> term.synonyms = ("Train", "TrainingData")
|
|
262
|
+
>>> term.delete()
|
|
170
263
|
"""
|
|
171
|
-
colnames: Iterable[str]
|
|
172
|
-
constraint_names: Iterable[str]
|
|
173
|
-
comment: str | None = None
|
|
174
|
-
annotations: dict = Field(default_factory=dict)
|
|
175
|
-
|
|
176
|
-
@model_serializer()
|
|
177
|
-
def serialize_key_definition(self):
|
|
178
|
-
return em.Key.define(
|
|
179
|
-
colnames=self.colnames,
|
|
180
|
-
constraint_names=self.constraint_names,
|
|
181
|
-
comment=self.comment,
|
|
182
|
-
annotations=self.annotations,
|
|
183
|
-
)
|
|
184
264
|
|
|
265
|
+
_ml: Any = PrivateAttr()
|
|
266
|
+
_table: str = PrivateAttr()
|
|
185
267
|
|
|
186
|
-
|
|
187
|
-
|
|
268
|
+
def __init__(self, ml: Any, table: str, **data):
|
|
269
|
+
"""Initialize a VocabularyTermHandle.
|
|
188
270
|
|
|
189
|
-
|
|
190
|
-
|
|
271
|
+
Args:
|
|
272
|
+
ml: DerivaML instance for catalog operations.
|
|
273
|
+
table: Name of the vocabulary table containing this term.
|
|
274
|
+
**data: Term data (Name, Synonyms, Description, ID, URI, RID).
|
|
275
|
+
"""
|
|
276
|
+
super().__init__(**data)
|
|
277
|
+
self._ml = ml
|
|
278
|
+
self._table = table
|
|
191
279
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
pk_colnames (Iterable[str]): Names of columns in the referenced table.
|
|
197
|
-
constraint_names (Iterable[str]): Names for the foreign key constraints.
|
|
198
|
-
on_update (str): Action on update of referenced row. Defaults to "NO ACTION".
|
|
199
|
-
on_delete (str): Action on delete of referenced row. Defaults to "NO ACTION".
|
|
200
|
-
comment (str | None): Description of the relationship.
|
|
201
|
-
acls (dict): Access control lists.
|
|
202
|
-
acl_bindings (dict): Dynamic access control bindings.
|
|
203
|
-
annotations (dict): Additional metadata annotations.
|
|
280
|
+
@property
|
|
281
|
+
def description(self) -> str:
|
|
282
|
+
"""Explanation of the term's meaning."""
|
|
283
|
+
return self._description
|
|
204
284
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
... pk_sname="core",
|
|
209
|
-
... pk_tname="dataset",
|
|
210
|
-
... pk_colnames=["id"],
|
|
211
|
-
... on_delete="CASCADE"
|
|
212
|
-
... )
|
|
213
|
-
"""
|
|
214
|
-
colnames: Iterable[str]
|
|
215
|
-
pk_sname: str
|
|
216
|
-
pk_tname: str
|
|
217
|
-
pk_colnames: Iterable[str]
|
|
218
|
-
constraint_names: Iterable[str] = Field(default_factory=list)
|
|
219
|
-
on_update: str = "NO ACTION"
|
|
220
|
-
on_delete: str = "NO ACTION"
|
|
221
|
-
comment: str | None = None
|
|
222
|
-
acls: dict[str, Any] = Field(default_factory=dict)
|
|
223
|
-
acl_bindings: dict[str, Any] = Field(default_factory=dict)
|
|
224
|
-
annotations: dict[str, Any] = Field(default_factory=dict)
|
|
225
|
-
|
|
226
|
-
@model_serializer()
|
|
227
|
-
def serialize_fk_definition(self):
|
|
228
|
-
return em.ForeignKey.define(
|
|
229
|
-
fk_colnames=self.colnames,
|
|
230
|
-
pk_sname=self.pk_sname,
|
|
231
|
-
pk_tname=self.pk_tname,
|
|
232
|
-
pk_colnames=self.pk_colnames,
|
|
233
|
-
on_update=self.on_update,
|
|
234
|
-
on_delete=self.on_delete,
|
|
235
|
-
comment=self.comment,
|
|
236
|
-
acls=self.acls,
|
|
237
|
-
acl_bindings=self.acl_bindings,
|
|
238
|
-
annotations=self.annotations,
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
class TableDefinition(BaseModel):
|
|
243
|
-
"""Defines a complete table structure in ERMrest.
|
|
244
|
-
|
|
245
|
-
Provides a Pydantic model for defining tables with their columns, keys, and relationships.
|
|
246
|
-
Maps to deriva_py's Table.define functionality.
|
|
285
|
+
@description.setter
|
|
286
|
+
def description(self, value: str) -> None:
|
|
287
|
+
"""Update the term's description in the catalog.
|
|
247
288
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
comment (str | None): Description of the table's purpose.
|
|
254
|
-
acls (dict): Access control lists.
|
|
255
|
-
acl_bindings (dict): Dynamic access control bindings.
|
|
256
|
-
annotations (dict): Additional metadata annotations.
|
|
289
|
+
Args:
|
|
290
|
+
value: New description for the term.
|
|
291
|
+
"""
|
|
292
|
+
self._ml._update_term_description(self._table, self.name, value)
|
|
293
|
+
self._description = value
|
|
257
294
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
acls=self.acls,
|
|
286
|
-
acl_bindings=self.acl_bindings,
|
|
287
|
-
annotations=self.annotations,
|
|
288
|
-
)
|
|
295
|
+
@property
|
|
296
|
+
def synonyms(self) -> tuple[str, ...]:
|
|
297
|
+
"""Alternative names for the term (immutable).
|
|
298
|
+
|
|
299
|
+
Returns a tuple to prevent accidental modification without catalog update.
|
|
300
|
+
To modify synonyms, assign a new tuple/list to this property.
|
|
301
|
+
"""
|
|
302
|
+
return tuple(self._synonyms or [])
|
|
303
|
+
|
|
304
|
+
@synonyms.setter
|
|
305
|
+
def synonyms(self, value: list[str] | tuple[str, ...]) -> None:
|
|
306
|
+
"""Replace all synonyms for this term in the catalog.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
value: New list of synonyms (replaces all existing synonyms).
|
|
310
|
+
"""
|
|
311
|
+
new_synonyms = list(value)
|
|
312
|
+
self._ml._update_term_synonyms(self._table, self.name, new_synonyms)
|
|
313
|
+
self._synonyms = new_synonyms
|
|
314
|
+
|
|
315
|
+
def delete(self) -> None:
|
|
316
|
+
"""Delete this term from the vocabulary.
|
|
317
|
+
|
|
318
|
+
Raises:
|
|
319
|
+
DerivaMLException: If the term is currently in use by other records.
|
|
320
|
+
"""
|
|
321
|
+
self._ml.delete_term(self._table, self.name)
|