deriva-ml 1.14.0__py3-none-any.whl → 1.14.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +25 -30
- deriva_ml/core/__init__.py +39 -0
- deriva_ml/core/base.py +1489 -0
- deriva_ml/core/constants.py +36 -0
- deriva_ml/core/definitions.py +74 -0
- deriva_ml/core/enums.py +222 -0
- deriva_ml/core/ermrest.py +288 -0
- deriva_ml/core/exceptions.py +28 -0
- deriva_ml/core/filespec.py +116 -0
- deriva_ml/dataset/__init__.py +4 -0
- deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
- deriva_ml/{dataset.py → dataset/dataset.py} +406 -428
- deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
- deriva_ml/{history.py → dataset/history.py} +51 -33
- deriva_ml/{upload.py → dataset/upload.py} +48 -70
- deriva_ml/demo_catalog.py +233 -183
- deriva_ml/execution/environment.py +290 -0
- deriva_ml/{execution.py → execution/execution.py} +365 -252
- deriva_ml/execution/execution_configuration.py +163 -0
- deriva_ml/{execution_configuration.py → execution/workflow.py} +212 -224
- deriva_ml/feature.py +83 -46
- deriva_ml/model/__init__.py +0 -0
- deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
- deriva_ml/{database_model.py → model/database.py} +52 -74
- deriva_ml/model/sql_mapper.py +44 -0
- deriva_ml/run_notebook.py +19 -11
- deriva_ml/schema/__init__.py +3 -0
- deriva_ml/{schema_setup → schema}/annotations.py +31 -22
- deriva_ml/schema/check_schema.py +104 -0
- deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
- deriva_ml/schema/deriva-ml-reference.json +8525 -0
- deriva_ml/schema/table_comments_utils.py +57 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/METADATA +5 -4
- deriva_ml-1.14.27.dist-info/RECORD +40 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/entry_points.txt +1 -0
- deriva_ml/deriva_definitions.py +0 -391
- deriva_ml/deriva_ml_base.py +0 -1046
- deriva_ml/execution_environment.py +0 -139
- deriva_ml/schema_setup/table_comments_utils.py +0 -56
- deriva_ml/test-files/execution-parameters.json +0 -1
- deriva_ml/test-files/notebook-parameters.json +0 -5
- deriva_ml/test_functions.py +0 -141
- deriva_ml/test_notebook.ipynb +0 -197
- deriva_ml-1.14.0.dist-info/RECORD +0 -31
- /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
- /deriva_ml/{schema_setup → schema}/policy.json +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/WHEEL +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants used throughout the DerivaML package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import NewType, TypeAlias
|
|
8
|
+
|
|
9
|
+
from pydantic import constr
|
|
10
|
+
|
|
11
|
+
# Schema name
|
|
12
|
+
ML_SCHEMA = "deriva-ml"
|
|
13
|
+
|
|
14
|
+
# Special RID for dry runs
|
|
15
|
+
DRY_RUN_RID = "0000"
|
|
16
|
+
|
|
17
|
+
# Regular expression parts for RIDs
|
|
18
|
+
rid_part = r"(?P<rid>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+))"
|
|
19
|
+
snapshot_part = r"(?:@(?P<snapshot>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+)))?"
|
|
20
|
+
rid_regex = f"^{rid_part}{snapshot_part}$"
|
|
21
|
+
|
|
22
|
+
# RID type definition
|
|
23
|
+
BaseRIDString = constr(pattern=rid_regex)
|
|
24
|
+
# RID = TypeVar("RID", bound=BaseRIDString)
|
|
25
|
+
RIDType: TypeAlias = constr(pattern=rid_regex)
|
|
26
|
+
RID = NewType("RID", BaseRIDString)
|
|
27
|
+
|
|
28
|
+
# System columns in Deriva
|
|
29
|
+
DerivaSystemColumns = ["RID", "RCT", "RMT", "RCB", "RMB"]
|
|
30
|
+
DerivaAssetColumns = {
|
|
31
|
+
"Filename",
|
|
32
|
+
"URL",
|
|
33
|
+
"Length",
|
|
34
|
+
"MD5",
|
|
35
|
+
"Description",
|
|
36
|
+
}.union(set(DerivaSystemColumns))
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared definitions that are used in different DerivaML modules.
|
|
3
|
+
This module re-exports all symbols from the core submodules for backwards compatibility.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
# Re-export constants
|
|
9
|
+
from deriva_ml.core.constants import (
|
|
10
|
+
DRY_RUN_RID,
|
|
11
|
+
ML_SCHEMA,
|
|
12
|
+
RID,
|
|
13
|
+
DerivaAssetColumns,
|
|
14
|
+
DerivaSystemColumns,
|
|
15
|
+
rid_part,
|
|
16
|
+
rid_regex,
|
|
17
|
+
snapshot_part,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Re-export enums
|
|
21
|
+
from deriva_ml.core.enums import (
|
|
22
|
+
BaseStrEnum,
|
|
23
|
+
BuiltinTypes,
|
|
24
|
+
ExecAssetType,
|
|
25
|
+
ExecMetadataType,
|
|
26
|
+
MLAsset,
|
|
27
|
+
MLTable,
|
|
28
|
+
MLVocab,
|
|
29
|
+
Status,
|
|
30
|
+
UploadState,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Re-export models
|
|
34
|
+
from deriva_ml.core.ermrest import (
|
|
35
|
+
ColumnDefinition,
|
|
36
|
+
FileUploadState,
|
|
37
|
+
ForeignKeyDefinition,
|
|
38
|
+
KeyDefinition,
|
|
39
|
+
TableDefinition,
|
|
40
|
+
VocabularyTerm,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Re-export exceptions
|
|
44
|
+
from deriva_ml.core.filespec import FileSpec
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
# Constants
|
|
48
|
+
"ML_SCHEMA",
|
|
49
|
+
"DRY_RUN_RID",
|
|
50
|
+
"rid_part",
|
|
51
|
+
"snapshot_part",
|
|
52
|
+
"rid_regex",
|
|
53
|
+
"DerivaSystemColumns",
|
|
54
|
+
"DerivaAssetColumns",
|
|
55
|
+
"RID",
|
|
56
|
+
# Enums
|
|
57
|
+
"BaseStrEnum",
|
|
58
|
+
"UploadState",
|
|
59
|
+
"Status",
|
|
60
|
+
"BuiltinTypes",
|
|
61
|
+
"MLVocab",
|
|
62
|
+
"MLTable",
|
|
63
|
+
"MLAsset",
|
|
64
|
+
"ExecMetadataType",
|
|
65
|
+
"ExecAssetType",
|
|
66
|
+
# Models
|
|
67
|
+
"FileUploadState",
|
|
68
|
+
"FileSpec",
|
|
69
|
+
"VocabularyTerm",
|
|
70
|
+
"ColumnDefinition",
|
|
71
|
+
"KeyDefinition",
|
|
72
|
+
"ForeignKeyDefinition",
|
|
73
|
+
"TableDefinition",
|
|
74
|
+
]
|
deriva_ml/core/enums.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""Enumeration classes for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides enumeration classes used throughout DerivaML for representing states, statuses,
|
|
4
|
+
types, and vocabularies. Each enum class represents a specific set of constants used in the system.
|
|
5
|
+
|
|
6
|
+
Classes:
|
|
7
|
+
BaseStrEnum: Base class for string-based enums.
|
|
8
|
+
UploadState: States for file upload operations.
|
|
9
|
+
Status: Execution status values.
|
|
10
|
+
BuiltinTypes: ERMrest built-in data types.
|
|
11
|
+
MLVocab: Controlled vocabulary types.
|
|
12
|
+
MLAsset: Asset type identifiers.
|
|
13
|
+
ExecMetadataType: Execution metadata type identifiers.
|
|
14
|
+
ExecAssetType: Execution asset type identifiers.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from enum import Enum
|
|
18
|
+
|
|
19
|
+
from deriva.core.ermrest_model import builtin_types
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BaseStrEnum(str, Enum):
|
|
23
|
+
"""Base class for string-based enumerations.
|
|
24
|
+
|
|
25
|
+
Extends both str and Enum to create string enums that are both string-like and enumerated.
|
|
26
|
+
This provides type safety while maintaining string compatibility.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
>>> class MyEnum(BaseStrEnum):
|
|
30
|
+
... VALUE = "value"
|
|
31
|
+
>>> isinstance(MyEnum.VALUE, str) # True
|
|
32
|
+
>>> isinstance(MyEnum.VALUE, Enum) # True
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class UploadState(Enum):
|
|
39
|
+
"""File upload operation states.
|
|
40
|
+
|
|
41
|
+
Represents the various states a file upload operation can be in, from initiation to completion.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
success (int): Upload completed successfully.
|
|
45
|
+
failed (int): Upload failed.
|
|
46
|
+
pending (int): Upload is queued.
|
|
47
|
+
running (int): Upload is in progress.
|
|
48
|
+
paused (int): Upload is temporarily paused.
|
|
49
|
+
aborted (int): Upload was aborted.
|
|
50
|
+
cancelled (int): Upload was cancelled.
|
|
51
|
+
timeout (int): Upload timed out.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
success = 0
|
|
55
|
+
failed = 1
|
|
56
|
+
pending = 2
|
|
57
|
+
running = 3
|
|
58
|
+
paused = 4
|
|
59
|
+
aborted = 5
|
|
60
|
+
cancelled = 6
|
|
61
|
+
timeout = 7
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Status(BaseStrEnum):
|
|
65
|
+
"""Execution status values.
|
|
66
|
+
|
|
67
|
+
Represents the various states an execution can be in throughout its lifecycle.
|
|
68
|
+
|
|
69
|
+
Attributes:
|
|
70
|
+
initializing (str): Initial setup is in progress.
|
|
71
|
+
created (str): Execution record has been created.
|
|
72
|
+
pending (str): Execution is queued.
|
|
73
|
+
running (str): Execution is in progress.
|
|
74
|
+
aborted (str): Execution was manually stopped.
|
|
75
|
+
completed (str): Execution finished successfully.
|
|
76
|
+
failed (str): Execution encountered an error.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
initializing = "Initializing"
|
|
80
|
+
created = "Created"
|
|
81
|
+
pending = "Pending"
|
|
82
|
+
running = "Running"
|
|
83
|
+
aborted = "Aborted"
|
|
84
|
+
completed = "Completed"
|
|
85
|
+
failed = "Failed"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class BuiltinTypes(Enum):
|
|
89
|
+
"""ERMrest built-in data types.
|
|
90
|
+
|
|
91
|
+
Maps ERMrest's built-in data types to their type names. These types are used for defining
|
|
92
|
+
column types in tables and for type validation.
|
|
93
|
+
|
|
94
|
+
Attributes:
|
|
95
|
+
text (str): Text/string type.
|
|
96
|
+
int2 (str): 16-bit integer.
|
|
97
|
+
jsonb (str): Binary JSON.
|
|
98
|
+
float8 (str): 64-bit float.
|
|
99
|
+
timestamp (str): Timestamp without timezone.
|
|
100
|
+
int8 (str): 64-bit integer.
|
|
101
|
+
boolean (str): Boolean type.
|
|
102
|
+
json (str): JSON type.
|
|
103
|
+
float4 (str): 32-bit float.
|
|
104
|
+
int4 (str): 32-bit integer.
|
|
105
|
+
timestamptz (str): Timestamp with timezone.
|
|
106
|
+
date (str): Date type.
|
|
107
|
+
ermrest_rid (str): Resource identifier.
|
|
108
|
+
ermrest_rcb (str): Record created by.
|
|
109
|
+
ermrest_rmb (str): Record modified by.
|
|
110
|
+
ermrest_rct (str): Record creation time.
|
|
111
|
+
ermrest_rmt (str): Record modification time.
|
|
112
|
+
markdown (str): Markdown text.
|
|
113
|
+
longtext (str): Long text.
|
|
114
|
+
ermrest_curie (str): Compact URI.
|
|
115
|
+
ermrest_uri (str): URI type.
|
|
116
|
+
color_rgb_hex (str): RGB color in hex.
|
|
117
|
+
serial2 (str): 16-bit auto-incrementing.
|
|
118
|
+
serial4 (str): 32-bit auto-incrementing.
|
|
119
|
+
serial8 (str): 64-bit auto-incrementing.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
text = builtin_types.text.typename
|
|
123
|
+
int2 = builtin_types.int2.typename
|
|
124
|
+
jsonb = builtin_types.json.typename
|
|
125
|
+
float8 = builtin_types.float8.typename
|
|
126
|
+
timestamp = builtin_types.timestamp.typename
|
|
127
|
+
int8 = builtin_types.int8.typename
|
|
128
|
+
boolean = builtin_types.boolean.typename
|
|
129
|
+
json = builtin_types.json.typename
|
|
130
|
+
float4 = builtin_types.float4.typename
|
|
131
|
+
int4 = builtin_types.int4.typename
|
|
132
|
+
timestamptz = builtin_types.timestamptz.typename
|
|
133
|
+
date = builtin_types.date.typename
|
|
134
|
+
ermrest_rid = builtin_types.ermrest_rid.typename
|
|
135
|
+
ermrest_rcb = builtin_types.ermrest_rcb.typename
|
|
136
|
+
ermrest_rmb = builtin_types.ermrest_rmb.typename
|
|
137
|
+
ermrest_rct = builtin_types.ermrest_rct.typename
|
|
138
|
+
ermrest_rmt = builtin_types.ermrest_rmt.typename
|
|
139
|
+
markdown = builtin_types.markdown.typename
|
|
140
|
+
longtext = builtin_types.longtext.typename
|
|
141
|
+
ermrest_curie = builtin_types.ermrest_curie.typename
|
|
142
|
+
ermrest_uri = builtin_types.ermrest_uri.typename
|
|
143
|
+
color_rgb_hex = builtin_types.color_rgb_hex.typename
|
|
144
|
+
serial2 = builtin_types.serial2.typename
|
|
145
|
+
serial4 = builtin_types.serial4.typename
|
|
146
|
+
serial8 = builtin_types.serial8.typename
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class MLVocab(BaseStrEnum):
|
|
150
|
+
"""Controlled vocabulary type identifiers.
|
|
151
|
+
|
|
152
|
+
Defines the names of controlled vocabulary tables used in DerivaML for various types
|
|
153
|
+
of entities and attributes.
|
|
154
|
+
|
|
155
|
+
Attributes:
|
|
156
|
+
dataset_type (str): Dataset classification vocabulary.
|
|
157
|
+
workflow_type (str): Workflow classification vocabulary.
|
|
158
|
+
asset_type (str): Asset classification vocabulary.
|
|
159
|
+
asset_role (str): Asset role classification vocabulary.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
dataset_type = "Dataset_Type"
|
|
163
|
+
workflow_type = "Workflow_Type"
|
|
164
|
+
asset_type = "Asset_Type"
|
|
165
|
+
asset_role = "Asset_Role"
|
|
166
|
+
feature_name = "Feature_Name"
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class MLAsset(BaseStrEnum):
|
|
170
|
+
"""Asset type identifiers.
|
|
171
|
+
|
|
172
|
+
Defines the types of assets that can be associated with executions.
|
|
173
|
+
|
|
174
|
+
Attributes:
|
|
175
|
+
execution_metadata (str): Metadata about an execution.
|
|
176
|
+
execution_asset (str): Asset produced by an execution.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
execution_metadata = "Execution_Metadata"
|
|
180
|
+
execution_asset = "Execution_Asset"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class MLTable(BaseStrEnum):
|
|
184
|
+
dataset = "Dataset"
|
|
185
|
+
workflow = "Workflow"
|
|
186
|
+
file = "File"
|
|
187
|
+
asset = "Asset"
|
|
188
|
+
execution = "Execution"
|
|
189
|
+
dataset_version = "Dataset_Version"
|
|
190
|
+
execution_metadata = "Execution_Metadata"
|
|
191
|
+
execution_asset = "Execution_Asset"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ExecMetadataType(BaseStrEnum):
|
|
195
|
+
"""Execution metadata type identifiers.
|
|
196
|
+
|
|
197
|
+
Defines the types of metadata that can be associated with an execution.
|
|
198
|
+
|
|
199
|
+
Attributes:
|
|
200
|
+
execution_config (str): Execution configuration data.
|
|
201
|
+
runtime_env (str): Runtime environment information.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
execution_config = "Execution_Config"
|
|
205
|
+
runtime_env = "Runtime_Env"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class ExecAssetType(BaseStrEnum):
|
|
209
|
+
"""Execution asset type identifiers.
|
|
210
|
+
|
|
211
|
+
Defines the types of assets that can be produced during an execution.
|
|
212
|
+
|
|
213
|
+
Attributes:
|
|
214
|
+
input_file (str): Input file used by the execution.
|
|
215
|
+
output_file (str): Output file produced by the execution.
|
|
216
|
+
notebook_output (str): Jupyter notebook output from the execution.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
input_file = "Input_File"
|
|
220
|
+
output_file = "Output_File"
|
|
221
|
+
notebook_output = "Notebook_Output"
|
|
222
|
+
model_file = "Model_File"
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""ERMrest data models for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides Pydantic models that represent ERMrest catalog structures. These models are used
|
|
4
|
+
throughout DerivaML for defining and manipulating catalog elements like tables, columns, and keys.
|
|
5
|
+
|
|
6
|
+
Classes:
|
|
7
|
+
FileUploadState: Tracks the state of file uploads.
|
|
8
|
+
VocabularyTerm: Represents terms in controlled vocabularies.
|
|
9
|
+
ColumnDefinition: Defines columns in tables.
|
|
10
|
+
KeyDefinition: Defines primary and unique keys.
|
|
11
|
+
ForeignKeyDefinition: Defines foreign key relationships.
|
|
12
|
+
TableDefinition: Defines complete table structures.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import warnings
|
|
18
|
+
from typing import Any, Iterable
|
|
19
|
+
|
|
20
|
+
import deriva.core.ermrest_model as em
|
|
21
|
+
from deriva.core.ermrest_model import builtin_types
|
|
22
|
+
from pydantic import (
|
|
23
|
+
BaseModel,
|
|
24
|
+
Field,
|
|
25
|
+
computed_field,
|
|
26
|
+
field_validator,
|
|
27
|
+
model_serializer,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .constants import RID
|
|
31
|
+
from .enums import BuiltinTypes, UploadState
|
|
32
|
+
|
|
33
|
+
# Pydantic warnings suppression
|
|
34
|
+
warnings.filterwarnings("ignore", message='Field name "schema"', category=Warning, module="pydantic")
|
|
35
|
+
warnings.filterwarnings(
|
|
36
|
+
"ignore",
|
|
37
|
+
message="fields may not start with an underscore",
|
|
38
|
+
category=Warning,
|
|
39
|
+
module="pydantic",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class FileUploadState(BaseModel):
|
|
44
|
+
"""Tracks the state and result of a file upload operation.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
state (UploadState): Current state of the upload (success, failed, etc.).
|
|
48
|
+
status (str): Detailed status message.
|
|
49
|
+
result (Any): Upload result data, if any.
|
|
50
|
+
rid (RID | None): Resource identifier of the uploaded file, if successful.
|
|
51
|
+
"""
|
|
52
|
+
state: UploadState
|
|
53
|
+
status: str
|
|
54
|
+
result: Any
|
|
55
|
+
|
|
56
|
+
@computed_field
|
|
57
|
+
@property
|
|
58
|
+
def rid(self) -> RID | None:
|
|
59
|
+
return self.result and self.result["RID"]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class VocabularyTerm(BaseModel):
|
|
63
|
+
"""Represents a term in a controlled vocabulary.
|
|
64
|
+
|
|
65
|
+
A vocabulary term is a standardized entry in a controlled vocabulary table. Each term has
|
|
66
|
+
a primary name, optional synonyms, and identifiers for cross-referencing.
|
|
67
|
+
|
|
68
|
+
Attributes:
|
|
69
|
+
name (str): Primary name of the term.
|
|
70
|
+
synonyms (list[str] | None): Alternative names for the term.
|
|
71
|
+
id (str): CURIE (Compact URI) identifier.
|
|
72
|
+
uri (str): Full URI for the term.
|
|
73
|
+
description (str): Explanation of the term's meaning.
|
|
74
|
+
rid (str): Resource identifier in the catalog.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> term = VocabularyTerm(
|
|
78
|
+
... Name="epithelial",
|
|
79
|
+
... Synonyms=["epithelium"],
|
|
80
|
+
... ID="tissue:0001",
|
|
81
|
+
... URI="http://example.org/tissue/0001",
|
|
82
|
+
... Description="Epithelial tissue type",
|
|
83
|
+
... RID="1-abc123"
|
|
84
|
+
... )
|
|
85
|
+
"""
|
|
86
|
+
name: str = Field(alias="Name")
|
|
87
|
+
synonyms: list[str] | None = Field(alias="Synonyms")
|
|
88
|
+
id: str = Field(alias="ID")
|
|
89
|
+
uri: str = Field(alias="URI")
|
|
90
|
+
description: str = Field(alias="Description")
|
|
91
|
+
rid: str = Field(alias="RID")
|
|
92
|
+
|
|
93
|
+
class Config:
|
|
94
|
+
extra = "ignore"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ColumnDefinition(BaseModel):
|
|
98
|
+
"""Defines a column in an ERMrest table.
|
|
99
|
+
|
|
100
|
+
Provides a Pydantic model for defining columns with their types, constraints, and metadata.
|
|
101
|
+
Maps to deriva_py's Column.define functionality.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
name (str): Name of the column.
|
|
105
|
+
type (BuiltinTypes): ERMrest data type for the column.
|
|
106
|
+
nullok (bool): Whether NULL values are allowed. Defaults to True.
|
|
107
|
+
default (Any): Default value for the column.
|
|
108
|
+
comment (str | None): Description of the column's purpose.
|
|
109
|
+
acls (dict): Access control lists.
|
|
110
|
+
acl_bindings (dict): Dynamic access control bindings.
|
|
111
|
+
annotations (dict): Additional metadata annotations.
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> col = ColumnDefinition(
|
|
115
|
+
... name="score",
|
|
116
|
+
... type=BuiltinTypes.float4,
|
|
117
|
+
... nullok=False,
|
|
118
|
+
... comment="Confidence score between 0 and 1"
|
|
119
|
+
... )
|
|
120
|
+
"""
|
|
121
|
+
name: str
|
|
122
|
+
type: BuiltinTypes
|
|
123
|
+
nullok: bool = True
|
|
124
|
+
default: Any = None
|
|
125
|
+
comment: str | None = None
|
|
126
|
+
acls: dict = Field(default_factory=dict)
|
|
127
|
+
acl_bindings: dict = Field(default_factory=dict)
|
|
128
|
+
annotations: dict = Field(default_factory=dict)
|
|
129
|
+
|
|
130
|
+
@field_validator("type", mode="before")
|
|
131
|
+
@classmethod
|
|
132
|
+
def extract_type_name(cls, value: Any) -> Any:
|
|
133
|
+
if isinstance(value, dict):
|
|
134
|
+
return BuiltinTypes(value["typename"])
|
|
135
|
+
else:
|
|
136
|
+
return value
|
|
137
|
+
|
|
138
|
+
@model_serializer()
|
|
139
|
+
def serialize_column_definition(self):
|
|
140
|
+
return em.Column.define(
|
|
141
|
+
self.name,
|
|
142
|
+
builtin_types[self.type.value],
|
|
143
|
+
nullok=self.nullok,
|
|
144
|
+
default=self.default,
|
|
145
|
+
comment=self.comment,
|
|
146
|
+
acls=self.acls,
|
|
147
|
+
acl_bindings=self.acl_bindings,
|
|
148
|
+
annotations=self.annotations,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class KeyDefinition(BaseModel):
|
|
153
|
+
"""Defines a key constraint in an ERMrest table.
|
|
154
|
+
|
|
155
|
+
Provides a Pydantic model for defining primary keys and unique constraints.
|
|
156
|
+
Maps to deriva_py's Key.define functionality.
|
|
157
|
+
|
|
158
|
+
Attributes:
|
|
159
|
+
colnames (Iterable[str]): Names of columns that form the key.
|
|
160
|
+
constraint_names (Iterable[str]): Names for the key constraints.
|
|
161
|
+
comment (str | None): Description of the key's purpose.
|
|
162
|
+
annotations (dict): Additional metadata annotations.
|
|
163
|
+
|
|
164
|
+
Example:
|
|
165
|
+
>>> key = KeyDefinition(
|
|
166
|
+
... colnames=["id", "version"],
|
|
167
|
+
... constraint_names=["unique_id_version"],
|
|
168
|
+
... comment="Unique identifier with version"
|
|
169
|
+
... )
|
|
170
|
+
"""
|
|
171
|
+
colnames: Iterable[str]
|
|
172
|
+
constraint_names: Iterable[str]
|
|
173
|
+
comment: str | None = None
|
|
174
|
+
annotations: dict = Field(default_factory=dict)
|
|
175
|
+
|
|
176
|
+
@model_serializer()
|
|
177
|
+
def serialize_key_definition(self):
|
|
178
|
+
return em.Key.define(
|
|
179
|
+
colnames=self.colnames,
|
|
180
|
+
constraint_names=self.constraint_names,
|
|
181
|
+
comment=self.comment,
|
|
182
|
+
annotations=self.annotations,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class ForeignKeyDefinition(BaseModel):
|
|
187
|
+
"""Defines a foreign key relationship between tables.
|
|
188
|
+
|
|
189
|
+
Provides a Pydantic model for defining foreign key constraints with referential actions
|
|
190
|
+
and metadata. Maps to deriva_py's ForeignKey.define functionality.
|
|
191
|
+
|
|
192
|
+
Attributes:
|
|
193
|
+
colnames (Iterable[str]): Names of columns in the referencing table.
|
|
194
|
+
pk_sname (str): Schema name of the referenced table.
|
|
195
|
+
pk_tname (str): Name of the referenced table.
|
|
196
|
+
pk_colnames (Iterable[str]): Names of columns in the referenced table.
|
|
197
|
+
constraint_names (Iterable[str]): Names for the foreign key constraints.
|
|
198
|
+
on_update (str): Action on update of referenced row. Defaults to "NO ACTION".
|
|
199
|
+
on_delete (str): Action on delete of referenced row. Defaults to "NO ACTION".
|
|
200
|
+
comment (str | None): Description of the relationship.
|
|
201
|
+
acls (dict): Access control lists.
|
|
202
|
+
acl_bindings (dict): Dynamic access control bindings.
|
|
203
|
+
annotations (dict): Additional metadata annotations.
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
>>> fk = ForeignKeyDefinition(
|
|
207
|
+
... colnames=["dataset_id"],
|
|
208
|
+
... pk_sname="core",
|
|
209
|
+
... pk_tname="dataset",
|
|
210
|
+
... pk_colnames=["id"],
|
|
211
|
+
... on_delete="CASCADE"
|
|
212
|
+
... )
|
|
213
|
+
"""
|
|
214
|
+
colnames: Iterable[str]
|
|
215
|
+
pk_sname: str
|
|
216
|
+
pk_tname: str
|
|
217
|
+
pk_colnames: Iterable[str]
|
|
218
|
+
constraint_names: Iterable[str] = Field(default_factory=list)
|
|
219
|
+
on_update: str = "NO ACTION"
|
|
220
|
+
on_delete: str = "NO ACTION"
|
|
221
|
+
comment: str | None = None
|
|
222
|
+
acls: dict[str, Any] = Field(default_factory=dict)
|
|
223
|
+
acl_bindings: dict[str, Any] = Field(default_factory=dict)
|
|
224
|
+
annotations: dict[str, Any] = Field(default_factory=dict)
|
|
225
|
+
|
|
226
|
+
@model_serializer()
|
|
227
|
+
def serialize_fk_definition(self):
|
|
228
|
+
return em.ForeignKey.define(
|
|
229
|
+
fk_colnames=self.colnames,
|
|
230
|
+
pk_sname=self.pk_sname,
|
|
231
|
+
pk_tname=self.pk_tname,
|
|
232
|
+
pk_colnames=self.pk_colnames,
|
|
233
|
+
on_update=self.on_update,
|
|
234
|
+
on_delete=self.on_delete,
|
|
235
|
+
comment=self.comment,
|
|
236
|
+
acls=self.acls,
|
|
237
|
+
acl_bindings=self.acl_bindings,
|
|
238
|
+
annotations=self.annotations,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class TableDefinition(BaseModel):
|
|
243
|
+
"""Defines a complete table structure in ERMrest.
|
|
244
|
+
|
|
245
|
+
Provides a Pydantic model for defining tables with their columns, keys, and relationships.
|
|
246
|
+
Maps to deriva_py's Table.define functionality.
|
|
247
|
+
|
|
248
|
+
Attributes:
|
|
249
|
+
name (str): Name of the table.
|
|
250
|
+
column_defs (Iterable[ColumnDefinition]): Column definitions.
|
|
251
|
+
key_defs (Iterable[KeyDefinition]): Key constraint definitions.
|
|
252
|
+
fkey_defs (Iterable[ForeignKeyDefinition]): Foreign key relationship definitions.
|
|
253
|
+
comment (str | None): Description of the table's purpose.
|
|
254
|
+
acls (dict): Access control lists.
|
|
255
|
+
acl_bindings (dict): Dynamic access control bindings.
|
|
256
|
+
annotations (dict): Additional metadata annotations.
|
|
257
|
+
|
|
258
|
+
Example:
|
|
259
|
+
>>> table = TableDefinition(
|
|
260
|
+
... name="experiment",
|
|
261
|
+
... column_defs=[
|
|
262
|
+
... ColumnDefinition(name="id", type=BuiltinTypes.text),
|
|
263
|
+
... ColumnDefinition(name="date", type=BuiltinTypes.date)
|
|
264
|
+
... ],
|
|
265
|
+
... comment="Experimental data records"
|
|
266
|
+
... )
|
|
267
|
+
"""
|
|
268
|
+
name: str
|
|
269
|
+
column_defs: Iterable[ColumnDefinition]
|
|
270
|
+
key_defs: Iterable[KeyDefinition] = Field(default_factory=list)
|
|
271
|
+
fkey_defs: Iterable[ForeignKeyDefinition] = Field(default_factory=list)
|
|
272
|
+
comment: str | None = None
|
|
273
|
+
acls: dict = Field(default_factory=dict)
|
|
274
|
+
acl_bindings: dict = Field(default_factory=dict)
|
|
275
|
+
annotations: dict = Field(default_factory=dict)
|
|
276
|
+
|
|
277
|
+
@model_serializer()
|
|
278
|
+
def serialize_table_definition(self):
|
|
279
|
+
return em.Table.define(
|
|
280
|
+
tname=self.name,
|
|
281
|
+
column_defs=[c.model_dump() for c in self.column_defs],
|
|
282
|
+
key_defs=[k.model_dump() for k in self.key_defs],
|
|
283
|
+
fkey_defs=[fk.model_dump() for fk in self.fkey_defs],
|
|
284
|
+
comment=self.comment,
|
|
285
|
+
acls=self.acls,
|
|
286
|
+
acl_bindings=self.acl_bindings,
|
|
287
|
+
annotations=self.annotations,
|
|
288
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions used throughout the DerivaML package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DerivaMLException(Exception):
|
|
7
|
+
"""Exception class specific to DerivaML module.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
msg (str): Optional message for the exception.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, msg=""):
|
|
14
|
+
super().__init__(msg)
|
|
15
|
+
self._msg = msg
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DerivaMLInvalidTerm(DerivaMLException):
|
|
19
|
+
"""Exception class for invalid terms in DerivaML controlled vocabulary."""
|
|
20
|
+
def __init__(self, vocabulary, term: str, msg: str = "Term doesn't exist"):
|
|
21
|
+
"""Exception indicating undefined term type"""
|
|
22
|
+
super().__init__(f"Invalid term {term} in vocabulary {vocabulary}: {msg}.")
|
|
23
|
+
|
|
24
|
+
class DerivaMLTableTypeError(DerivaMLException):
|
|
25
|
+
"""RID for table is not of correct type."""
|
|
26
|
+
def __init__(self, table_type, table: str):
|
|
27
|
+
"""Exception indicating undefined term type"""
|
|
28
|
+
super().__init__(f"Table {table} is not of type {table_type}.")
|