deriva-ml 1.17.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. deriva_ml/.DS_Store +0 -0
  2. deriva_ml/__init__.py +79 -0
  3. deriva_ml/bump_version.py +142 -0
  4. deriva_ml/core/__init__.py +39 -0
  5. deriva_ml/core/base.py +1527 -0
  6. deriva_ml/core/config.py +69 -0
  7. deriva_ml/core/constants.py +36 -0
  8. deriva_ml/core/definitions.py +74 -0
  9. deriva_ml/core/enums.py +222 -0
  10. deriva_ml/core/ermrest.py +288 -0
  11. deriva_ml/core/exceptions.py +28 -0
  12. deriva_ml/core/filespec.py +116 -0
  13. deriva_ml/dataset/__init__.py +12 -0
  14. deriva_ml/dataset/aux_classes.py +225 -0
  15. deriva_ml/dataset/dataset.py +1519 -0
  16. deriva_ml/dataset/dataset_bag.py +450 -0
  17. deriva_ml/dataset/history.py +109 -0
  18. deriva_ml/dataset/upload.py +439 -0
  19. deriva_ml/demo_catalog.py +495 -0
  20. deriva_ml/execution/__init__.py +26 -0
  21. deriva_ml/execution/environment.py +290 -0
  22. deriva_ml/execution/execution.py +1180 -0
  23. deriva_ml/execution/execution_configuration.py +147 -0
  24. deriva_ml/execution/workflow.py +413 -0
  25. deriva_ml/feature.py +228 -0
  26. deriva_ml/install_kernel.py +71 -0
  27. deriva_ml/model/__init__.py +0 -0
  28. deriva_ml/model/catalog.py +485 -0
  29. deriva_ml/model/database.py +719 -0
  30. deriva_ml/protocols/dataset.py +19 -0
  31. deriva_ml/run_notebook.py +228 -0
  32. deriva_ml/schema/__init__.py +3 -0
  33. deriva_ml/schema/annotations.py +473 -0
  34. deriva_ml/schema/check_schema.py +104 -0
  35. deriva_ml/schema/create_schema.py +393 -0
  36. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  37. deriva_ml/schema/policy.json +81 -0
  38. deriva_ml/schema/table_comments_utils.py +57 -0
  39. deriva_ml/test.py +94 -0
  40. deriva_ml-1.17.10.dist-info/METADATA +38 -0
  41. deriva_ml-1.17.10.dist-info/RECORD +45 -0
  42. deriva_ml-1.17.10.dist-info/WHEEL +5 -0
  43. deriva_ml-1.17.10.dist-info/entry_points.txt +9 -0
  44. deriva_ml-1.17.10.dist-info/licenses/LICENSE +201 -0
  45. deriva_ml-1.17.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,69 @@
1
+ import getpass
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from hydra.conf import HydraConf, RunDir
7
+ from hydra.core.hydra_config import HydraConfig
8
+ from hydra_zen import store
9
+ from omegaconf import OmegaConf
10
+ from pydantic import BaseModel, model_validator
11
+
12
+ from deriva_ml.core.definitions import ML_SCHEMA
13
+
14
+
15
+ class DerivaMLConfig(BaseModel):
16
+ hostname: str
17
+ catalog_id: str | int = 1
18
+ domain_schema: str | None = None
19
+ project_name: str | None = None
20
+ cache_dir: str | Path | None = None
21
+ working_dir: str | Path | None = None
22
+ hydra_runtime_output_dir: str | Path | None = None
23
+ ml_schema: str = ML_SCHEMA
24
+ logging_level: Any = logging.WARNING
25
+ deriva_logging_level: Any = logging.WARNING
26
+ credential: Any = None
27
+ use_minid: bool = True
28
+ check_auth: bool = True
29
+
30
+ @model_validator(mode="after")
31
+ def init_working_dir(self):
32
+ """
33
+ Sets up the working directory for the model.
34
+
35
+ This method configures the working directory, ensuring that all required
36
+ file operations are performed in the appropriate location. If the user does not
37
+ specify a directory, a default directory based on the user's home directory
38
+ or username will be used.
39
+
40
+ This is a repeat of what is in the DerivaML.__init__ bu we put this here so that the working
41
+ directory is available to hydra.
42
+
43
+ Returns:
44
+ Self: The object instance with the working directory initialized.
45
+ """
46
+
47
+ self.working_dir = DerivaMLConfig.compute_workdir(self.working_dir)
48
+ self.hydra_runtime_output_dir = Path(HydraConfig.get().runtime.output_dir)
49
+ return self
50
+
51
+ @staticmethod
52
+ def compute_workdir(working_dir) -> Path:
53
+ # Create a default working directory if none is provided. If a working directory is provided, we add the
54
+ # user name to it to ensure that multiple users do not overwrite each other's work.'
55
+ working_dir = (Path(working_dir) / getpass.getuser() if working_dir else Path.home()) / "deriva-ml"
56
+ return working_dir.absolute()
57
+
58
+
59
+ OmegaConf.register_new_resolver("compute_workdir", DerivaMLConfig.compute_workdir, replace=True)
60
+ store(
61
+ HydraConf(
62
+ run=RunDir("${compute_workdir:${deriva_ml.working_dir}}/hydra/${now:%Y-%m-%d_%H-%M-%S}"),
63
+ output_subdir="hydra-config",
64
+ ),
65
+ group="hydra",
66
+ name="config",
67
+ )
68
+
69
+ store.add_to_hydra_store()
@@ -0,0 +1,36 @@
1
+ """
2
+ Constants used throughout the DerivaML package.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import NewType, TypeAlias
8
+
9
+ from pydantic import constr
10
+
11
+ # Schema name
12
+ ML_SCHEMA = "deriva-ml"
13
+
14
+ # Special RID for dry runs
15
+ DRY_RUN_RID = "0000"
16
+
17
+ # Regular expression parts for RIDs
18
+ rid_part = r"(?P<rid>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+))"
19
+ snapshot_part = r"(?:@(?P<snapshot>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+)))?"
20
+ rid_regex = f"^{rid_part}{snapshot_part}$"
21
+
22
+ # RID type definition
23
+ BaseRIDString = constr(pattern=rid_regex)
24
+ # RID = TypeVar("RID", bound=BaseRIDString)
25
+ RIDType: TypeAlias = constr(pattern=rid_regex)
26
+ RID = NewType("RID", BaseRIDString)
27
+
28
+ # System columns in Deriva
29
+ DerivaSystemColumns = ["RID", "RCT", "RMT", "RCB", "RMB"]
30
+ DerivaAssetColumns = {
31
+ "Filename",
32
+ "URL",
33
+ "Length",
34
+ "MD5",
35
+ "Description",
36
+ }.union(set(DerivaSystemColumns))
@@ -0,0 +1,74 @@
1
+ """
2
+ Shared definitions that are used in different DerivaML modules.
3
+ This module re-exports all symbols from the core submodules for backwards compatibility.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ # Re-export constants
9
+ from deriva_ml.core.constants import (
10
+ DRY_RUN_RID,
11
+ ML_SCHEMA,
12
+ RID,
13
+ DerivaAssetColumns,
14
+ DerivaSystemColumns,
15
+ rid_part,
16
+ rid_regex,
17
+ snapshot_part,
18
+ )
19
+
20
+ # Re-export enums
21
+ from deriva_ml.core.enums import (
22
+ BaseStrEnum,
23
+ BuiltinTypes,
24
+ ExecAssetType,
25
+ ExecMetadataType,
26
+ MLAsset,
27
+ MLTable,
28
+ MLVocab,
29
+ Status,
30
+ UploadState,
31
+ )
32
+
33
+ # Re-export models
34
+ from deriva_ml.core.ermrest import (
35
+ ColumnDefinition,
36
+ FileUploadState,
37
+ ForeignKeyDefinition,
38
+ KeyDefinition,
39
+ TableDefinition,
40
+ VocabularyTerm,
41
+ )
42
+
43
+ # Re-export exceptions
44
+ from deriva_ml.core.filespec import FileSpec
45
+
46
+ __all__ = [
47
+ # Constants
48
+ "ML_SCHEMA",
49
+ "DRY_RUN_RID",
50
+ "rid_part",
51
+ "snapshot_part",
52
+ "rid_regex",
53
+ "DerivaSystemColumns",
54
+ "DerivaAssetColumns",
55
+ "RID",
56
+ # Enums
57
+ "BaseStrEnum",
58
+ "UploadState",
59
+ "Status",
60
+ "BuiltinTypes",
61
+ "MLVocab",
62
+ "MLTable",
63
+ "MLAsset",
64
+ "ExecMetadataType",
65
+ "ExecAssetType",
66
+ # Models
67
+ "FileUploadState",
68
+ "FileSpec",
69
+ "VocabularyTerm",
70
+ "ColumnDefinition",
71
+ "KeyDefinition",
72
+ "ForeignKeyDefinition",
73
+ "TableDefinition",
74
+ ]
@@ -0,0 +1,222 @@
1
+ """Enumeration classes for DerivaML.
2
+
3
+ This module provides enumeration classes used throughout DerivaML for representing states, statuses,
4
+ types, and vocabularies. Each enum class represents a specific set of constants used in the system.
5
+
6
+ Classes:
7
+ BaseStrEnum: Base class for string-based enums.
8
+ UploadState: States for file upload operations.
9
+ Status: Execution status values.
10
+ BuiltinTypes: ERMrest built-in data types.
11
+ MLVocab: Controlled vocabulary types.
12
+ MLAsset: Asset type identifiers.
13
+ ExecMetadataType: Execution metadata type identifiers.
14
+ ExecAssetType: Execution asset type identifiers.
15
+ """
16
+
17
+ from enum import Enum
18
+
19
+ from deriva.core.ermrest_model import builtin_types
20
+
21
+
22
+ class BaseStrEnum(str, Enum):
23
+ """Base class for string-based enumerations.
24
+
25
+ Extends both str and Enum to create string enums that are both string-like and enumerated.
26
+ This provides type safety while maintaining string compatibility.
27
+
28
+ Example:
29
+ >>> class MyEnum(BaseStrEnum):
30
+ ... VALUE = "value"
31
+ >>> isinstance(MyEnum.VALUE, str) # True
32
+ >>> isinstance(MyEnum.VALUE, Enum) # True
33
+ """
34
+
35
+ pass
36
+
37
+
38
+ class UploadState(Enum):
39
+ """File upload operation states.
40
+
41
+ Represents the various states a file upload operation can be in, from initiation to completion.
42
+
43
+ Attributes:
44
+ success (int): Upload completed successfully.
45
+ failed (int): Upload failed.
46
+ pending (int): Upload is queued.
47
+ running (int): Upload is in progress.
48
+ paused (int): Upload is temporarily paused.
49
+ aborted (int): Upload was aborted.
50
+ cancelled (int): Upload was cancelled.
51
+ timeout (int): Upload timed out.
52
+ """
53
+
54
+ success = 0
55
+ failed = 1
56
+ pending = 2
57
+ running = 3
58
+ paused = 4
59
+ aborted = 5
60
+ cancelled = 6
61
+ timeout = 7
62
+
63
+
64
+ class Status(BaseStrEnum):
65
+ """Execution status values.
66
+
67
+ Represents the various states an execution can be in throughout its lifecycle.
68
+
69
+ Attributes:
70
+ initializing (str): Initial setup is in progress.
71
+ created (str): Execution record has been created.
72
+ pending (str): Execution is queued.
73
+ running (str): Execution is in progress.
74
+ aborted (str): Execution was manually stopped.
75
+ completed (str): Execution finished successfully.
76
+ failed (str): Execution encountered an error.
77
+ """
78
+
79
+ initializing = "Initializing"
80
+ created = "Created"
81
+ pending = "Pending"
82
+ running = "Running"
83
+ aborted = "Aborted"
84
+ completed = "Completed"
85
+ failed = "Failed"
86
+
87
+
88
+ class BuiltinTypes(Enum):
89
+ """ERMrest built-in data types.
90
+
91
+ Maps ERMrest's built-in data types to their type names. These types are used for defining
92
+ column types in tables and for type validation.
93
+
94
+ Attributes:
95
+ text (str): Text/string type.
96
+ int2 (str): 16-bit integer.
97
+ jsonb (str): Binary JSON.
98
+ float8 (str): 64-bit float.
99
+ timestamp (str): Timestamp without timezone.
100
+ int8 (str): 64-bit integer.
101
+ boolean (str): Boolean type.
102
+ json (str): JSON type.
103
+ float4 (str): 32-bit float.
104
+ int4 (str): 32-bit integer.
105
+ timestamptz (str): Timestamp with timezone.
106
+ date (str): Date type.
107
+ ermrest_rid (str): Resource identifier.
108
+ ermrest_rcb (str): Record created by.
109
+ ermrest_rmb (str): Record modified by.
110
+ ermrest_rct (str): Record creation time.
111
+ ermrest_rmt (str): Record modification time.
112
+ markdown (str): Markdown text.
113
+ longtext (str): Long text.
114
+ ermrest_curie (str): Compact URI.
115
+ ermrest_uri (str): URI type.
116
+ color_rgb_hex (str): RGB color in hex.
117
+ serial2 (str): 16-bit auto-incrementing.
118
+ serial4 (str): 32-bit auto-incrementing.
119
+ serial8 (str): 64-bit auto-incrementing.
120
+ """
121
+
122
+ text = builtin_types.text.typename
123
+ int2 = builtin_types.int2.typename
124
+ jsonb = builtin_types.json.typename
125
+ float8 = builtin_types.float8.typename
126
+ timestamp = builtin_types.timestamp.typename
127
+ int8 = builtin_types.int8.typename
128
+ boolean = builtin_types.boolean.typename
129
+ json = builtin_types.json.typename
130
+ float4 = builtin_types.float4.typename
131
+ int4 = builtin_types.int4.typename
132
+ timestamptz = builtin_types.timestamptz.typename
133
+ date = builtin_types.date.typename
134
+ ermrest_rid = builtin_types.ermrest_rid.typename
135
+ ermrest_rcb = builtin_types.ermrest_rcb.typename
136
+ ermrest_rmb = builtin_types.ermrest_rmb.typename
137
+ ermrest_rct = builtin_types.ermrest_rct.typename
138
+ ermrest_rmt = builtin_types.ermrest_rmt.typename
139
+ markdown = builtin_types.markdown.typename
140
+ longtext = builtin_types.longtext.typename
141
+ ermrest_curie = builtin_types.ermrest_curie.typename
142
+ ermrest_uri = builtin_types.ermrest_uri.typename
143
+ color_rgb_hex = builtin_types.color_rgb_hex.typename
144
+ serial2 = builtin_types.serial2.typename
145
+ serial4 = builtin_types.serial4.typename
146
+ serial8 = builtin_types.serial8.typename
147
+
148
+
149
+ class MLVocab(BaseStrEnum):
150
+ """Controlled vocabulary type identifiers.
151
+
152
+ Defines the names of controlled vocabulary tables used in DerivaML for various types
153
+ of entities and attributes.
154
+
155
+ Attributes:
156
+ dataset_type (str): Dataset classification vocabulary.
157
+ workflow_type (str): Workflow classification vocabulary.
158
+ asset_type (str): Asset classification vocabulary.
159
+ asset_role (str): Asset role classification vocabulary.
160
+ """
161
+
162
+ dataset_type = "Dataset_Type"
163
+ workflow_type = "Workflow_Type"
164
+ asset_type = "Asset_Type"
165
+ asset_role = "Asset_Role"
166
+ feature_name = "Feature_Name"
167
+
168
+
169
+ class MLAsset(BaseStrEnum):
170
+ """Asset type identifiers.
171
+
172
+ Defines the types of assets that can be associated with executions.
173
+
174
+ Attributes:
175
+ execution_metadata (str): Metadata about an execution.
176
+ execution_asset (str): Asset produced by an execution.
177
+ """
178
+
179
+ execution_metadata = "Execution_Metadata"
180
+ execution_asset = "Execution_Asset"
181
+
182
+
183
+ class MLTable(BaseStrEnum):
184
+ dataset = "Dataset"
185
+ workflow = "Workflow"
186
+ file = "File"
187
+ asset = "Asset"
188
+ execution = "Execution"
189
+ dataset_version = "Dataset_Version"
190
+ execution_metadata = "Execution_Metadata"
191
+ execution_asset = "Execution_Asset"
192
+
193
+
194
+ class ExecMetadataType(BaseStrEnum):
195
+ """Execution metadata type identifiers.
196
+
197
+ Defines the types of metadata that can be associated with an execution.
198
+
199
+ Attributes:
200
+ execution_config (str): Execution configuration data.
201
+ runtime_env (str): Runtime environment information.
202
+ """
203
+
204
+ execution_config = "Execution_Config"
205
+ runtime_env = "Runtime_Env"
206
+
207
+
208
+ class ExecAssetType(BaseStrEnum):
209
+ """Execution asset type identifiers.
210
+
211
+ Defines the types of assets that can be produced during an execution.
212
+
213
+ Attributes:
214
+ input_file (str): Input file used by the execution.
215
+ output_file (str): Output file produced by the execution.
216
+ notebook_output (str): Jupyter notebook output from the execution.
217
+ """
218
+
219
+ input_file = "Input_File"
220
+ output_file = "Output_File"
221
+ notebook_output = "Notebook_Output"
222
+ model_file = "Model_File"
@@ -0,0 +1,288 @@
1
+ """ERMrest data models for DerivaML.
2
+
3
+ This module provides Pydantic models that represent ERMrest catalog structures. These models are used
4
+ throughout DerivaML for defining and manipulating catalog elements like tables, columns, and keys.
5
+
6
+ Classes:
7
+ FileUploadState: Tracks the state of file uploads.
8
+ VocabularyTerm: Represents terms in controlled vocabularies.
9
+ ColumnDefinition: Defines columns in tables.
10
+ KeyDefinition: Defines primary and unique keys.
11
+ ForeignKeyDefinition: Defines foreign key relationships.
12
+ TableDefinition: Defines complete table structures.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import warnings
18
+ from typing import Any, Iterable
19
+
20
+ import deriva.core.ermrest_model as em
21
+ from deriva.core.ermrest_model import builtin_types
22
+ from pydantic import (
23
+ BaseModel,
24
+ Field,
25
+ computed_field,
26
+ field_validator,
27
+ model_serializer,
28
+ )
29
+
30
+ from .constants import RID
31
+ from .enums import BuiltinTypes, UploadState
32
+
33
+ # Pydantic warnings suppression
34
+ warnings.filterwarnings("ignore", message='Field name "schema"', category=Warning, module="pydantic")
35
+ warnings.filterwarnings(
36
+ "ignore",
37
+ message="fields may not start with an underscore",
38
+ category=Warning,
39
+ module="pydantic",
40
+ )
41
+
42
+
43
+ class FileUploadState(BaseModel):
44
+ """Tracks the state and result of a file upload operation.
45
+
46
+ Attributes:
47
+ state (UploadState): Current state of the upload (success, failed, etc.).
48
+ status (str): Detailed status message.
49
+ result (Any): Upload result data, if any.
50
+ rid (RID | None): Resource identifier of the uploaded file, if successful.
51
+ """
52
+ state: UploadState
53
+ status: str
54
+ result: Any
55
+
56
+ @computed_field
57
+ @property
58
+ def rid(self) -> RID | None:
59
+ return self.result and self.result["RID"]
60
+
61
+
62
+ class VocabularyTerm(BaseModel):
63
+ """Represents a term in a controlled vocabulary.
64
+
65
+ A vocabulary term is a standardized entry in a controlled vocabulary table. Each term has
66
+ a primary name, optional synonyms, and identifiers for cross-referencing.
67
+
68
+ Attributes:
69
+ name (str): Primary name of the term.
70
+ synonyms (list[str] | None): Alternative names for the term.
71
+ id (str): CURIE (Compact URI) identifier.
72
+ uri (str): Full URI for the term.
73
+ description (str): Explanation of the term's meaning.
74
+ rid (str): Resource identifier in the catalog.
75
+
76
+ Example:
77
+ >>> term = VocabularyTerm(
78
+ ... Name="epithelial",
79
+ ... Synonyms=["epithelium"],
80
+ ... ID="tissue:0001",
81
+ ... URI="http://example.org/tissue/0001",
82
+ ... Description="Epithelial tissue type",
83
+ ... RID="1-abc123"
84
+ ... )
85
+ """
86
+ name: str = Field(alias="Name")
87
+ synonyms: list[str] | None = Field(alias="Synonyms")
88
+ id: str = Field(alias="ID")
89
+ uri: str = Field(alias="URI")
90
+ description: str = Field(alias="Description")
91
+ rid: str = Field(alias="RID")
92
+
93
+ class Config:
94
+ extra = "ignore"
95
+
96
+
97
+ class ColumnDefinition(BaseModel):
98
+ """Defines a column in an ERMrest table.
99
+
100
+ Provides a Pydantic model for defining columns with their types, constraints, and metadata.
101
+ Maps to deriva_py's Column.define functionality.
102
+
103
+ Attributes:
104
+ name (str): Name of the column.
105
+ type (BuiltinTypes): ERMrest data type for the column.
106
+ nullok (bool): Whether NULL values are allowed. Defaults to True.
107
+ default (Any): Default value for the column.
108
+ comment (str | None): Description of the column's purpose.
109
+ acls (dict): Access control lists.
110
+ acl_bindings (dict): Dynamic access control bindings.
111
+ annotations (dict): Additional metadata annotations.
112
+
113
+ Example:
114
+ >>> col = ColumnDefinition(
115
+ ... name="score",
116
+ ... type=BuiltinTypes.float4,
117
+ ... nullok=False,
118
+ ... comment="Confidence score between 0 and 1"
119
+ ... )
120
+ """
121
+ name: str
122
+ type: BuiltinTypes
123
+ nullok: bool = True
124
+ default: Any = None
125
+ comment: str | None = None
126
+ acls: dict = Field(default_factory=dict)
127
+ acl_bindings: dict = Field(default_factory=dict)
128
+ annotations: dict = Field(default_factory=dict)
129
+
130
+ @field_validator("type", mode="before")
131
+ @classmethod
132
+ def extract_type_name(cls, value: Any) -> Any:
133
+ if isinstance(value, dict):
134
+ return BuiltinTypes(value["typename"])
135
+ else:
136
+ return value
137
+
138
+ @model_serializer()
139
+ def serialize_column_definition(self):
140
+ return em.Column.define(
141
+ self.name,
142
+ builtin_types[self.type.value],
143
+ nullok=self.nullok,
144
+ default=self.default,
145
+ comment=self.comment,
146
+ acls=self.acls,
147
+ acl_bindings=self.acl_bindings,
148
+ annotations=self.annotations,
149
+ )
150
+
151
+
152
+ class KeyDefinition(BaseModel):
153
+ """Defines a key constraint in an ERMrest table.
154
+
155
+ Provides a Pydantic model for defining primary keys and unique constraints.
156
+ Maps to deriva_py's Key.define functionality.
157
+
158
+ Attributes:
159
+ colnames (Iterable[str]): Names of columns that form the key.
160
+ constraint_names (Iterable[str]): Names for the key constraints.
161
+ comment (str | None): Description of the key's purpose.
162
+ annotations (dict): Additional metadata annotations.
163
+
164
+ Example:
165
+ >>> key = KeyDefinition(
166
+ ... colnames=["id", "version"],
167
+ ... constraint_names=["unique_id_version"],
168
+ ... comment="Unique identifier with version"
169
+ ... )
170
+ """
171
+ colnames: Iterable[str]
172
+ constraint_names: Iterable[str]
173
+ comment: str | None = None
174
+ annotations: dict = Field(default_factory=dict)
175
+
176
+ @model_serializer()
177
+ def serialize_key_definition(self):
178
+ return em.Key.define(
179
+ colnames=self.colnames,
180
+ constraint_names=self.constraint_names,
181
+ comment=self.comment,
182
+ annotations=self.annotations,
183
+ )
184
+
185
+
186
+ class ForeignKeyDefinition(BaseModel):
187
+ """Defines a foreign key relationship between tables.
188
+
189
+ Provides a Pydantic model for defining foreign key constraints with referential actions
190
+ and metadata. Maps to deriva_py's ForeignKey.define functionality.
191
+
192
+ Attributes:
193
+ colnames (Iterable[str]): Names of columns in the referencing table.
194
+ pk_sname (str): Schema name of the referenced table.
195
+ pk_tname (str): Name of the referenced table.
196
+ pk_colnames (Iterable[str]): Names of columns in the referenced table.
197
+ constraint_names (Iterable[str]): Names for the foreign key constraints.
198
+ on_update (str): Action on update of referenced row. Defaults to "NO ACTION".
199
+ on_delete (str): Action on delete of referenced row. Defaults to "NO ACTION".
200
+ comment (str | None): Description of the relationship.
201
+ acls (dict): Access control lists.
202
+ acl_bindings (dict): Dynamic access control bindings.
203
+ annotations (dict): Additional metadata annotations.
204
+
205
+ Example:
206
+ >>> fk = ForeignKeyDefinition(
207
+ ... colnames=["dataset_id"],
208
+ ... pk_sname="core",
209
+ ... pk_tname="dataset",
210
+ ... pk_colnames=["id"],
211
+ ... on_delete="CASCADE"
212
+ ... )
213
+ """
214
+ colnames: Iterable[str]
215
+ pk_sname: str
216
+ pk_tname: str
217
+ pk_colnames: Iterable[str]
218
+ constraint_names: Iterable[str] = Field(default_factory=list)
219
+ on_update: str = "NO ACTION"
220
+ on_delete: str = "NO ACTION"
221
+ comment: str | None = None
222
+ acls: dict[str, Any] = Field(default_factory=dict)
223
+ acl_bindings: dict[str, Any] = Field(default_factory=dict)
224
+ annotations: dict[str, Any] = Field(default_factory=dict)
225
+
226
+ @model_serializer()
227
+ def serialize_fk_definition(self):
228
+ return em.ForeignKey.define(
229
+ fk_colnames=self.colnames,
230
+ pk_sname=self.pk_sname,
231
+ pk_tname=self.pk_tname,
232
+ pk_colnames=self.pk_colnames,
233
+ on_update=self.on_update,
234
+ on_delete=self.on_delete,
235
+ comment=self.comment,
236
+ acls=self.acls,
237
+ acl_bindings=self.acl_bindings,
238
+ annotations=self.annotations,
239
+ )
240
+
241
+
242
+ class TableDefinition(BaseModel):
243
+ """Defines a complete table structure in ERMrest.
244
+
245
+ Provides a Pydantic model for defining tables with their columns, keys, and relationships.
246
+ Maps to deriva_py's Table.define functionality.
247
+
248
+ Attributes:
249
+ name (str): Name of the table.
250
+ column_defs (Iterable[ColumnDefinition]): Column definitions.
251
+ key_defs (Iterable[KeyDefinition]): Key constraint definitions.
252
+ fkey_defs (Iterable[ForeignKeyDefinition]): Foreign key relationship definitions.
253
+ comment (str | None): Description of the table's purpose.
254
+ acls (dict): Access control lists.
255
+ acl_bindings (dict): Dynamic access control bindings.
256
+ annotations (dict): Additional metadata annotations.
257
+
258
+ Example:
259
+ >>> table = TableDefinition(
260
+ ... name="experiment",
261
+ ... column_defs=[
262
+ ... ColumnDefinition(name="id", type=BuiltinTypes.text),
263
+ ... ColumnDefinition(name="date", type=BuiltinTypes.date)
264
+ ... ],
265
+ ... comment="Experimental data records"
266
+ ... )
267
+ """
268
+ name: str
269
+ column_defs: Iterable[ColumnDefinition]
270
+ key_defs: Iterable[KeyDefinition] = Field(default_factory=list)
271
+ fkey_defs: Iterable[ForeignKeyDefinition] = Field(default_factory=list)
272
+ comment: str | None = None
273
+ acls: dict = Field(default_factory=dict)
274
+ acl_bindings: dict = Field(default_factory=dict)
275
+ annotations: dict = Field(default_factory=dict)
276
+
277
+ @model_serializer()
278
+ def serialize_table_definition(self):
279
+ return em.Table.define(
280
+ tname=self.name,
281
+ column_defs=[c.model_dump() for c in self.column_defs],
282
+ key_defs=[k.model_dump() for k in self.key_defs],
283
+ fkey_defs=[fk.model_dump() for fk in self.fkey_defs],
284
+ comment=self.comment,
285
+ acls=self.acls,
286
+ acl_bindings=self.acl_bindings,
287
+ annotations=self.annotations,
288
+ )