deriva-ml 1.14.0__py3-none-any.whl → 1.14.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. deriva_ml/__init__.py +25 -30
  2. deriva_ml/core/__init__.py +39 -0
  3. deriva_ml/core/base.py +1489 -0
  4. deriva_ml/core/constants.py +36 -0
  5. deriva_ml/core/definitions.py +74 -0
  6. deriva_ml/core/enums.py +222 -0
  7. deriva_ml/core/ermrest.py +288 -0
  8. deriva_ml/core/exceptions.py +28 -0
  9. deriva_ml/core/filespec.py +116 -0
  10. deriva_ml/dataset/__init__.py +4 -0
  11. deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
  12. deriva_ml/{dataset.py → dataset/dataset.py} +406 -428
  13. deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
  14. deriva_ml/{history.py → dataset/history.py} +51 -33
  15. deriva_ml/{upload.py → dataset/upload.py} +48 -70
  16. deriva_ml/demo_catalog.py +233 -183
  17. deriva_ml/execution/environment.py +290 -0
  18. deriva_ml/{execution.py → execution/execution.py} +365 -252
  19. deriva_ml/execution/execution_configuration.py +163 -0
  20. deriva_ml/{execution_configuration.py → execution/workflow.py} +212 -224
  21. deriva_ml/feature.py +83 -46
  22. deriva_ml/model/__init__.py +0 -0
  23. deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
  24. deriva_ml/{database_model.py → model/database.py} +52 -74
  25. deriva_ml/model/sql_mapper.py +44 -0
  26. deriva_ml/run_notebook.py +19 -11
  27. deriva_ml/schema/__init__.py +3 -0
  28. deriva_ml/{schema_setup → schema}/annotations.py +31 -22
  29. deriva_ml/schema/check_schema.py +104 -0
  30. deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
  31. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  32. deriva_ml/schema/table_comments_utils.py +57 -0
  33. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/METADATA +5 -4
  34. deriva_ml-1.14.27.dist-info/RECORD +40 -0
  35. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/entry_points.txt +1 -0
  36. deriva_ml/deriva_definitions.py +0 -391
  37. deriva_ml/deriva_ml_base.py +0 -1046
  38. deriva_ml/execution_environment.py +0 -139
  39. deriva_ml/schema_setup/table_comments_utils.py +0 -56
  40. deriva_ml/test-files/execution-parameters.json +0 -1
  41. deriva_ml/test-files/notebook-parameters.json +0 -5
  42. deriva_ml/test_functions.py +0 -141
  43. deriva_ml/test_notebook.ipynb +0 -197
  44. deriva_ml-1.14.0.dist-info/RECORD +0 -31
  45. /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
  46. /deriva_ml/{schema_setup → schema}/policy.json +0 -0
  47. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/WHEEL +0 -0
  48. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/licenses/LICENSE +0 -0
  49. {deriva_ml-1.14.0.dist-info → deriva_ml-1.14.27.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,36 @@
1
+ """
2
+ Constants used throughout the DerivaML package.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import NewType, TypeAlias
8
+
9
+ from pydantic import constr
10
+
11
+ # Schema name
12
+ ML_SCHEMA = "deriva-ml"
13
+
14
+ # Special RID for dry runs
15
+ DRY_RUN_RID = "0000"
16
+
17
+ # Regular expression parts for RIDs
18
+ rid_part = r"(?P<rid>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+))"
19
+ snapshot_part = r"(?:@(?P<snapshot>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+)))?"
20
+ rid_regex = f"^{rid_part}{snapshot_part}$"
21
+
22
+ # RID type definition
23
+ BaseRIDString = constr(pattern=rid_regex)
24
+ # RID = TypeVar("RID", bound=BaseRIDString)
25
+ RIDType: TypeAlias = constr(pattern=rid_regex)
26
+ RID = NewType("RID", BaseRIDString)
27
+
28
+ # System columns in Deriva
29
+ DerivaSystemColumns = ["RID", "RCT", "RMT", "RCB", "RMB"]
30
+ DerivaAssetColumns = {
31
+ "Filename",
32
+ "URL",
33
+ "Length",
34
+ "MD5",
35
+ "Description",
36
+ }.union(set(DerivaSystemColumns))
@@ -0,0 +1,74 @@
1
+ """
2
+ Shared definitions that are used in different DerivaML modules.
3
+ This module re-exports all symbols from the core submodules for backwards compatibility.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ # Re-export constants
9
+ from deriva_ml.core.constants import (
10
+ DRY_RUN_RID,
11
+ ML_SCHEMA,
12
+ RID,
13
+ DerivaAssetColumns,
14
+ DerivaSystemColumns,
15
+ rid_part,
16
+ rid_regex,
17
+ snapshot_part,
18
+ )
19
+
20
+ # Re-export enums
21
+ from deriva_ml.core.enums import (
22
+ BaseStrEnum,
23
+ BuiltinTypes,
24
+ ExecAssetType,
25
+ ExecMetadataType,
26
+ MLAsset,
27
+ MLTable,
28
+ MLVocab,
29
+ Status,
30
+ UploadState,
31
+ )
32
+
33
+ # Re-export models
34
+ from deriva_ml.core.ermrest import (
35
+ ColumnDefinition,
36
+ FileUploadState,
37
+ ForeignKeyDefinition,
38
+ KeyDefinition,
39
+ TableDefinition,
40
+ VocabularyTerm,
41
+ )
42
+
43
+ # Re-export exceptions
44
+ from deriva_ml.core.filespec import FileSpec
45
+
46
+ __all__ = [
47
+ # Constants
48
+ "ML_SCHEMA",
49
+ "DRY_RUN_RID",
50
+ "rid_part",
51
+ "snapshot_part",
52
+ "rid_regex",
53
+ "DerivaSystemColumns",
54
+ "DerivaAssetColumns",
55
+ "RID",
56
+ # Enums
57
+ "BaseStrEnum",
58
+ "UploadState",
59
+ "Status",
60
+ "BuiltinTypes",
61
+ "MLVocab",
62
+ "MLTable",
63
+ "MLAsset",
64
+ "ExecMetadataType",
65
+ "ExecAssetType",
66
+ # Models
67
+ "FileUploadState",
68
+ "FileSpec",
69
+ "VocabularyTerm",
70
+ "ColumnDefinition",
71
+ "KeyDefinition",
72
+ "ForeignKeyDefinition",
73
+ "TableDefinition",
74
+ ]
@@ -0,0 +1,222 @@
1
+ """Enumeration classes for DerivaML.
2
+
3
+ This module provides enumeration classes used throughout DerivaML for representing states, statuses,
4
+ types, and vocabularies. Each enum class represents a specific set of constants used in the system.
5
+
6
+ Classes:
7
+ BaseStrEnum: Base class for string-based enums.
8
+ UploadState: States for file upload operations.
9
+ Status: Execution status values.
10
+ BuiltinTypes: ERMrest built-in data types.
11
+ MLVocab: Controlled vocabulary types.
12
+ MLAsset: Asset type identifiers.
13
+ ExecMetadataType: Execution metadata type identifiers.
14
+ ExecAssetType: Execution asset type identifiers.
15
+ """
16
+
17
+ from enum import Enum
18
+
19
+ from deriva.core.ermrest_model import builtin_types
20
+
21
+
22
+ class BaseStrEnum(str, Enum):
23
+ """Base class for string-based enumerations.
24
+
25
+ Extends both str and Enum to create string enums that are both string-like and enumerated.
26
+ This provides type safety while maintaining string compatibility.
27
+
28
+ Example:
29
+ >>> class MyEnum(BaseStrEnum):
30
+ ... VALUE = "value"
31
+ >>> isinstance(MyEnum.VALUE, str) # True
32
+ >>> isinstance(MyEnum.VALUE, Enum) # True
33
+ """
34
+
35
+ pass
36
+
37
+
38
+ class UploadState(Enum):
39
+ """File upload operation states.
40
+
41
+ Represents the various states a file upload operation can be in, from initiation to completion.
42
+
43
+ Attributes:
44
+ success (int): Upload completed successfully.
45
+ failed (int): Upload failed.
46
+ pending (int): Upload is queued.
47
+ running (int): Upload is in progress.
48
+ paused (int): Upload is temporarily paused.
49
+ aborted (int): Upload was aborted.
50
+ cancelled (int): Upload was cancelled.
51
+ timeout (int): Upload timed out.
52
+ """
53
+
54
+ success = 0
55
+ failed = 1
56
+ pending = 2
57
+ running = 3
58
+ paused = 4
59
+ aborted = 5
60
+ cancelled = 6
61
+ timeout = 7
62
+
63
+
64
+ class Status(BaseStrEnum):
65
+ """Execution status values.
66
+
67
+ Represents the various states an execution can be in throughout its lifecycle.
68
+
69
+ Attributes:
70
+ initializing (str): Initial setup is in progress.
71
+ created (str): Execution record has been created.
72
+ pending (str): Execution is queued.
73
+ running (str): Execution is in progress.
74
+ aborted (str): Execution was manually stopped.
75
+ completed (str): Execution finished successfully.
76
+ failed (str): Execution encountered an error.
77
+ """
78
+
79
+ initializing = "Initializing"
80
+ created = "Created"
81
+ pending = "Pending"
82
+ running = "Running"
83
+ aborted = "Aborted"
84
+ completed = "Completed"
85
+ failed = "Failed"
86
+
87
+
88
+ class BuiltinTypes(Enum):
89
+ """ERMrest built-in data types.
90
+
91
+ Maps ERMrest's built-in data types to their type names. These types are used for defining
92
+ column types in tables and for type validation.
93
+
94
+ Attributes:
95
+ text (str): Text/string type.
96
+ int2 (str): 16-bit integer.
97
+ jsonb (str): Binary JSON.
98
+ float8 (str): 64-bit float.
99
+ timestamp (str): Timestamp without timezone.
100
+ int8 (str): 64-bit integer.
101
+ boolean (str): Boolean type.
102
+ json (str): JSON type.
103
+ float4 (str): 32-bit float.
104
+ int4 (str): 32-bit integer.
105
+ timestamptz (str): Timestamp with timezone.
106
+ date (str): Date type.
107
+ ermrest_rid (str): Resource identifier.
108
+ ermrest_rcb (str): Record created by.
109
+ ermrest_rmb (str): Record modified by.
110
+ ermrest_rct (str): Record creation time.
111
+ ermrest_rmt (str): Record modification time.
112
+ markdown (str): Markdown text.
113
+ longtext (str): Long text.
114
+ ermrest_curie (str): Compact URI.
115
+ ermrest_uri (str): URI type.
116
+ color_rgb_hex (str): RGB color in hex.
117
+ serial2 (str): 16-bit auto-incrementing.
118
+ serial4 (str): 32-bit auto-incrementing.
119
+ serial8 (str): 64-bit auto-incrementing.
120
+ """
121
+
122
+ text = builtin_types.text.typename
123
+ int2 = builtin_types.int2.typename
124
+ jsonb = builtin_types.json.typename
125
+ float8 = builtin_types.float8.typename
126
+ timestamp = builtin_types.timestamp.typename
127
+ int8 = builtin_types.int8.typename
128
+ boolean = builtin_types.boolean.typename
129
+ json = builtin_types.json.typename
130
+ float4 = builtin_types.float4.typename
131
+ int4 = builtin_types.int4.typename
132
+ timestamptz = builtin_types.timestamptz.typename
133
+ date = builtin_types.date.typename
134
+ ermrest_rid = builtin_types.ermrest_rid.typename
135
+ ermrest_rcb = builtin_types.ermrest_rcb.typename
136
+ ermrest_rmb = builtin_types.ermrest_rmb.typename
137
+ ermrest_rct = builtin_types.ermrest_rct.typename
138
+ ermrest_rmt = builtin_types.ermrest_rmt.typename
139
+ markdown = builtin_types.markdown.typename
140
+ longtext = builtin_types.longtext.typename
141
+ ermrest_curie = builtin_types.ermrest_curie.typename
142
+ ermrest_uri = builtin_types.ermrest_uri.typename
143
+ color_rgb_hex = builtin_types.color_rgb_hex.typename
144
+ serial2 = builtin_types.serial2.typename
145
+ serial4 = builtin_types.serial4.typename
146
+ serial8 = builtin_types.serial8.typename
147
+
148
+
149
+ class MLVocab(BaseStrEnum):
150
+ """Controlled vocabulary type identifiers.
151
+
152
+ Defines the names of controlled vocabulary tables used in DerivaML for various types
153
+ of entities and attributes.
154
+
155
+ Attributes:
156
+ dataset_type (str): Dataset classification vocabulary.
157
+ workflow_type (str): Workflow classification vocabulary.
158
+ asset_type (str): Asset classification vocabulary.
159
+ asset_role (str): Asset role classification vocabulary.
160
+ """
161
+
162
+ dataset_type = "Dataset_Type"
163
+ workflow_type = "Workflow_Type"
164
+ asset_type = "Asset_Type"
165
+ asset_role = "Asset_Role"
166
+ feature_name = "Feature_Name"
167
+
168
+
169
+ class MLAsset(BaseStrEnum):
170
+ """Asset type identifiers.
171
+
172
+ Defines the types of assets that can be associated with executions.
173
+
174
+ Attributes:
175
+ execution_metadata (str): Metadata about an execution.
176
+ execution_asset (str): Asset produced by an execution.
177
+ """
178
+
179
+ execution_metadata = "Execution_Metadata"
180
+ execution_asset = "Execution_Asset"
181
+
182
+
183
+ class MLTable(BaseStrEnum):
184
+ dataset = "Dataset"
185
+ workflow = "Workflow"
186
+ file = "File"
187
+ asset = "Asset"
188
+ execution = "Execution"
189
+ dataset_version = "Dataset_Version"
190
+ execution_metadata = "Execution_Metadata"
191
+ execution_asset = "Execution_Asset"
192
+
193
+
194
+ class ExecMetadataType(BaseStrEnum):
195
+ """Execution metadata type identifiers.
196
+
197
+ Defines the types of metadata that can be associated with an execution.
198
+
199
+ Attributes:
200
+ execution_config (str): Execution configuration data.
201
+ runtime_env (str): Runtime environment information.
202
+ """
203
+
204
+ execution_config = "Execution_Config"
205
+ runtime_env = "Runtime_Env"
206
+
207
+
208
+ class ExecAssetType(BaseStrEnum):
209
+ """Execution asset type identifiers.
210
+
211
+ Defines the types of assets that can be produced during an execution.
212
+
213
+ Attributes:
214
+ input_file (str): Input file used by the execution.
215
+ output_file (str): Output file produced by the execution.
216
+ notebook_output (str): Jupyter notebook output from the execution.
217
+ """
218
+
219
+ input_file = "Input_File"
220
+ output_file = "Output_File"
221
+ notebook_output = "Notebook_Output"
222
+ model_file = "Model_File"
@@ -0,0 +1,288 @@
1
+ """ERMrest data models for DerivaML.
2
+
3
+ This module provides Pydantic models that represent ERMrest catalog structures. These models are used
4
+ throughout DerivaML for defining and manipulating catalog elements like tables, columns, and keys.
5
+
6
+ Classes:
7
+ FileUploadState: Tracks the state of file uploads.
8
+ VocabularyTerm: Represents terms in controlled vocabularies.
9
+ ColumnDefinition: Defines columns in tables.
10
+ KeyDefinition: Defines primary and unique keys.
11
+ ForeignKeyDefinition: Defines foreign key relationships.
12
+ TableDefinition: Defines complete table structures.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import warnings
18
+ from typing import Any, Iterable
19
+
20
+ import deriva.core.ermrest_model as em
21
+ from deriva.core.ermrest_model import builtin_types
22
+ from pydantic import (
23
+ BaseModel,
24
+ Field,
25
+ computed_field,
26
+ field_validator,
27
+ model_serializer,
28
+ )
29
+
30
+ from .constants import RID
31
+ from .enums import BuiltinTypes, UploadState
32
+
33
+ # Pydantic warnings suppression
34
+ warnings.filterwarnings("ignore", message='Field name "schema"', category=Warning, module="pydantic")
35
+ warnings.filterwarnings(
36
+ "ignore",
37
+ message="fields may not start with an underscore",
38
+ category=Warning,
39
+ module="pydantic",
40
+ )
41
+
42
+
43
+ class FileUploadState(BaseModel):
44
+ """Tracks the state and result of a file upload operation.
45
+
46
+ Attributes:
47
+ state (UploadState): Current state of the upload (success, failed, etc.).
48
+ status (str): Detailed status message.
49
+ result (Any): Upload result data, if any.
50
+ rid (RID | None): Resource identifier of the uploaded file, if successful.
51
+ """
52
+ state: UploadState
53
+ status: str
54
+ result: Any
55
+
56
+ @computed_field
57
+ @property
58
+ def rid(self) -> RID | None:
59
+ return self.result and self.result["RID"]
60
+
61
+
62
+ class VocabularyTerm(BaseModel):
63
+ """Represents a term in a controlled vocabulary.
64
+
65
+ A vocabulary term is a standardized entry in a controlled vocabulary table. Each term has
66
+ a primary name, optional synonyms, and identifiers for cross-referencing.
67
+
68
+ Attributes:
69
+ name (str): Primary name of the term.
70
+ synonyms (list[str] | None): Alternative names for the term.
71
+ id (str): CURIE (Compact URI) identifier.
72
+ uri (str): Full URI for the term.
73
+ description (str): Explanation of the term's meaning.
74
+ rid (str): Resource identifier in the catalog.
75
+
76
+ Example:
77
+ >>> term = VocabularyTerm(
78
+ ... Name="epithelial",
79
+ ... Synonyms=["epithelium"],
80
+ ... ID="tissue:0001",
81
+ ... URI="http://example.org/tissue/0001",
82
+ ... Description="Epithelial tissue type",
83
+ ... RID="1-abc123"
84
+ ... )
85
+ """
86
+ name: str = Field(alias="Name")
87
+ synonyms: list[str] | None = Field(alias="Synonyms")
88
+ id: str = Field(alias="ID")
89
+ uri: str = Field(alias="URI")
90
+ description: str = Field(alias="Description")
91
+ rid: str = Field(alias="RID")
92
+
93
+ class Config:
94
+ extra = "ignore"
95
+
96
+
97
+ class ColumnDefinition(BaseModel):
98
+ """Defines a column in an ERMrest table.
99
+
100
+ Provides a Pydantic model for defining columns with their types, constraints, and metadata.
101
+ Maps to deriva_py's Column.define functionality.
102
+
103
+ Attributes:
104
+ name (str): Name of the column.
105
+ type (BuiltinTypes): ERMrest data type for the column.
106
+ nullok (bool): Whether NULL values are allowed. Defaults to True.
107
+ default (Any): Default value for the column.
108
+ comment (str | None): Description of the column's purpose.
109
+ acls (dict): Access control lists.
110
+ acl_bindings (dict): Dynamic access control bindings.
111
+ annotations (dict): Additional metadata annotations.
112
+
113
+ Example:
114
+ >>> col = ColumnDefinition(
115
+ ... name="score",
116
+ ... type=BuiltinTypes.float4,
117
+ ... nullok=False,
118
+ ... comment="Confidence score between 0 and 1"
119
+ ... )
120
+ """
121
+ name: str
122
+ type: BuiltinTypes
123
+ nullok: bool = True
124
+ default: Any = None
125
+ comment: str | None = None
126
+ acls: dict = Field(default_factory=dict)
127
+ acl_bindings: dict = Field(default_factory=dict)
128
+ annotations: dict = Field(default_factory=dict)
129
+
130
+ @field_validator("type", mode="before")
131
+ @classmethod
132
+ def extract_type_name(cls, value: Any) -> Any:
133
+ if isinstance(value, dict):
134
+ return BuiltinTypes(value["typename"])
135
+ else:
136
+ return value
137
+
138
+ @model_serializer()
139
+ def serialize_column_definition(self):
140
+ return em.Column.define(
141
+ self.name,
142
+ builtin_types[self.type.value],
143
+ nullok=self.nullok,
144
+ default=self.default,
145
+ comment=self.comment,
146
+ acls=self.acls,
147
+ acl_bindings=self.acl_bindings,
148
+ annotations=self.annotations,
149
+ )
150
+
151
+
152
+ class KeyDefinition(BaseModel):
153
+ """Defines a key constraint in an ERMrest table.
154
+
155
+ Provides a Pydantic model for defining primary keys and unique constraints.
156
+ Maps to deriva_py's Key.define functionality.
157
+
158
+ Attributes:
159
+ colnames (Iterable[str]): Names of columns that form the key.
160
+ constraint_names (Iterable[str]): Names for the key constraints.
161
+ comment (str | None): Description of the key's purpose.
162
+ annotations (dict): Additional metadata annotations.
163
+
164
+ Example:
165
+ >>> key = KeyDefinition(
166
+ ... colnames=["id", "version"],
167
+ ... constraint_names=["unique_id_version"],
168
+ ... comment="Unique identifier with version"
169
+ ... )
170
+ """
171
+ colnames: Iterable[str]
172
+ constraint_names: Iterable[str]
173
+ comment: str | None = None
174
+ annotations: dict = Field(default_factory=dict)
175
+
176
+ @model_serializer()
177
+ def serialize_key_definition(self):
178
+ return em.Key.define(
179
+ colnames=self.colnames,
180
+ constraint_names=self.constraint_names,
181
+ comment=self.comment,
182
+ annotations=self.annotations,
183
+ )
184
+
185
+
186
+ class ForeignKeyDefinition(BaseModel):
187
+ """Defines a foreign key relationship between tables.
188
+
189
+ Provides a Pydantic model for defining foreign key constraints with referential actions
190
+ and metadata. Maps to deriva_py's ForeignKey.define functionality.
191
+
192
+ Attributes:
193
+ colnames (Iterable[str]): Names of columns in the referencing table.
194
+ pk_sname (str): Schema name of the referenced table.
195
+ pk_tname (str): Name of the referenced table.
196
+ pk_colnames (Iterable[str]): Names of columns in the referenced table.
197
+ constraint_names (Iterable[str]): Names for the foreign key constraints.
198
+ on_update (str): Action on update of referenced row. Defaults to "NO ACTION".
199
+ on_delete (str): Action on delete of referenced row. Defaults to "NO ACTION".
200
+ comment (str | None): Description of the relationship.
201
+ acls (dict): Access control lists.
202
+ acl_bindings (dict): Dynamic access control bindings.
203
+ annotations (dict): Additional metadata annotations.
204
+
205
+ Example:
206
+ >>> fk = ForeignKeyDefinition(
207
+ ... colnames=["dataset_id"],
208
+ ... pk_sname="core",
209
+ ... pk_tname="dataset",
210
+ ... pk_colnames=["id"],
211
+ ... on_delete="CASCADE"
212
+ ... )
213
+ """
214
+ colnames: Iterable[str]
215
+ pk_sname: str
216
+ pk_tname: str
217
+ pk_colnames: Iterable[str]
218
+ constraint_names: Iterable[str] = Field(default_factory=list)
219
+ on_update: str = "NO ACTION"
220
+ on_delete: str = "NO ACTION"
221
+ comment: str | None = None
222
+ acls: dict[str, Any] = Field(default_factory=dict)
223
+ acl_bindings: dict[str, Any] = Field(default_factory=dict)
224
+ annotations: dict[str, Any] = Field(default_factory=dict)
225
+
226
+ @model_serializer()
227
+ def serialize_fk_definition(self):
228
+ return em.ForeignKey.define(
229
+ fk_colnames=self.colnames,
230
+ pk_sname=self.pk_sname,
231
+ pk_tname=self.pk_tname,
232
+ pk_colnames=self.pk_colnames,
233
+ on_update=self.on_update,
234
+ on_delete=self.on_delete,
235
+ comment=self.comment,
236
+ acls=self.acls,
237
+ acl_bindings=self.acl_bindings,
238
+ annotations=self.annotations,
239
+ )
240
+
241
+
242
+ class TableDefinition(BaseModel):
243
+ """Defines a complete table structure in ERMrest.
244
+
245
+ Provides a Pydantic model for defining tables with their columns, keys, and relationships.
246
+ Maps to deriva_py's Table.define functionality.
247
+
248
+ Attributes:
249
+ name (str): Name of the table.
250
+ column_defs (Iterable[ColumnDefinition]): Column definitions.
251
+ key_defs (Iterable[KeyDefinition]): Key constraint definitions.
252
+ fkey_defs (Iterable[ForeignKeyDefinition]): Foreign key relationship definitions.
253
+ comment (str | None): Description of the table's purpose.
254
+ acls (dict): Access control lists.
255
+ acl_bindings (dict): Dynamic access control bindings.
256
+ annotations (dict): Additional metadata annotations.
257
+
258
+ Example:
259
+ >>> table = TableDefinition(
260
+ ... name="experiment",
261
+ ... column_defs=[
262
+ ... ColumnDefinition(name="id", type=BuiltinTypes.text),
263
+ ... ColumnDefinition(name="date", type=BuiltinTypes.date)
264
+ ... ],
265
+ ... comment="Experimental data records"
266
+ ... )
267
+ """
268
+ name: str
269
+ column_defs: Iterable[ColumnDefinition]
270
+ key_defs: Iterable[KeyDefinition] = Field(default_factory=list)
271
+ fkey_defs: Iterable[ForeignKeyDefinition] = Field(default_factory=list)
272
+ comment: str | None = None
273
+ acls: dict = Field(default_factory=dict)
274
+ acl_bindings: dict = Field(default_factory=dict)
275
+ annotations: dict = Field(default_factory=dict)
276
+
277
+ @model_serializer()
278
+ def serialize_table_definition(self):
279
+ return em.Table.define(
280
+ tname=self.name,
281
+ column_defs=[c.model_dump() for c in self.column_defs],
282
+ key_defs=[k.model_dump() for k in self.key_defs],
283
+ fkey_defs=[fk.model_dump() for fk in self.fkey_defs],
284
+ comment=self.comment,
285
+ acls=self.acls,
286
+ acl_bindings=self.acl_bindings,
287
+ annotations=self.annotations,
288
+ )
@@ -0,0 +1,28 @@
1
+ """
2
+ Custom exceptions used throughout the DerivaML package.
3
+ """
4
+
5
+
6
+ class DerivaMLException(Exception):
7
+ """Exception class specific to DerivaML module.
8
+
9
+ Args:
10
+ msg (str): Optional message for the exception.
11
+ """
12
+
13
+ def __init__(self, msg=""):
14
+ super().__init__(msg)
15
+ self._msg = msg
16
+
17
+
18
+ class DerivaMLInvalidTerm(DerivaMLException):
19
+ """Exception class for invalid terms in DerivaML controlled vocabulary."""
20
+ def __init__(self, vocabulary, term: str, msg: str = "Term doesn't exist"):
21
+ """Exception indicating undefined term type"""
22
+ super().__init__(f"Invalid term {term} in vocabulary {vocabulary}: {msg}.")
23
+
24
+ class DerivaMLTableTypeError(DerivaMLException):
25
+ """RID for table is not of correct type."""
26
+ def __init__(self, table_type, table: str):
27
+ """Exception indicating undefined term type"""
28
+ super().__init__(f"Table {table} is not of type {table_type}.")