lsst-felis 26.2024.900__py3-none-any.whl → 29.2025.4500__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- felis/__init__.py +10 -24
- felis/cli.py +437 -341
- felis/config/tap_schema/columns.csv +33 -0
- felis/config/tap_schema/key_columns.csv +8 -0
- felis/config/tap_schema/keys.csv +8 -0
- felis/config/tap_schema/schemas.csv +2 -0
- felis/config/tap_schema/tables.csv +6 -0
- felis/config/tap_schema/tap_schema_std.yaml +273 -0
- felis/datamodel.py +1386 -193
- felis/db/dialects.py +116 -0
- felis/db/schema.py +62 -0
- felis/db/sqltypes.py +275 -48
- felis/db/utils.py +409 -0
- felis/db/variants.py +159 -0
- felis/diff.py +234 -0
- felis/metadata.py +385 -0
- felis/tap_schema.py +767 -0
- felis/tests/__init__.py +0 -0
- felis/tests/postgresql.py +134 -0
- felis/tests/run_cli.py +79 -0
- felis/types.py +57 -9
- lsst_felis-29.2025.4500.dist-info/METADATA +38 -0
- lsst_felis-29.2025.4500.dist-info/RECORD +31 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/WHEEL +1 -1
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/COPYRIGHT +1 -1
- felis/check.py +0 -381
- felis/simple.py +0 -424
- felis/sql.py +0 -275
- felis/tap.py +0 -433
- felis/utils.py +0 -100
- felis/validation.py +0 -103
- felis/version.py +0 -2
- felis/visitor.py +0 -180
- lsst_felis-26.2024.900.dist-info/METADATA +0 -28
- lsst_felis-26.2024.900.dist-info/RECORD +0 -23
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/entry_points.txt +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/LICENSE +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/top_level.txt +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/zip-safe +0 -0
felis/datamodel.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Define Pydantic data models for Felis."""
|
|
2
|
+
|
|
1
3
|
# This file is part of felis.
|
|
2
4
|
#
|
|
3
5
|
# Developed for the LSST Data Management System.
|
|
@@ -21,24 +23,43 @@
|
|
|
21
23
|
|
|
22
24
|
from __future__ import annotations
|
|
23
25
|
|
|
26
|
+
import json
|
|
24
27
|
import logging
|
|
25
|
-
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
+
import sys
|
|
29
|
+
from collections.abc import Sequence
|
|
30
|
+
from enum import StrEnum, auto
|
|
31
|
+
from typing import IO, Annotated, Any, Generic, Literal, TypeAlias, TypeVar
|
|
28
32
|
|
|
33
|
+
import yaml
|
|
29
34
|
from astropy import units as units # type: ignore
|
|
30
35
|
from astropy.io.votable import ucd # type: ignore
|
|
31
|
-
from
|
|
36
|
+
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
37
|
+
from pydantic import (
|
|
38
|
+
BaseModel,
|
|
39
|
+
ConfigDict,
|
|
40
|
+
Field,
|
|
41
|
+
PrivateAttr,
|
|
42
|
+
ValidationError,
|
|
43
|
+
ValidationInfo,
|
|
44
|
+
field_serializer,
|
|
45
|
+
field_validator,
|
|
46
|
+
model_validator,
|
|
47
|
+
)
|
|
48
|
+
from pydantic_core import InitErrorDetails
|
|
49
|
+
|
|
50
|
+
from .db.dialects import get_supported_dialects
|
|
51
|
+
from .db.sqltypes import get_type_func
|
|
52
|
+
from .db.utils import string_to_typeengine
|
|
53
|
+
from .types import Boolean, Byte, Char, Double, FelisType, Float, Int, Long, Short, String, Text, Unicode
|
|
32
54
|
|
|
33
55
|
logger = logging.getLogger(__name__)
|
|
34
|
-
# logger.setLevel(logging.DEBUG)
|
|
35
56
|
|
|
36
57
|
__all__ = (
|
|
37
58
|
"BaseObject",
|
|
38
|
-
"Column",
|
|
39
59
|
"CheckConstraint",
|
|
60
|
+
"Column",
|
|
40
61
|
"Constraint",
|
|
41
|
-
"
|
|
62
|
+
"DataType",
|
|
42
63
|
"ForeignKeyConstraint",
|
|
43
64
|
"Index",
|
|
44
65
|
"Schema",
|
|
@@ -50,9 +71,8 @@ __all__ = (
|
|
|
50
71
|
CONFIG = ConfigDict(
|
|
51
72
|
populate_by_name=True, # Populate attributes by name.
|
|
52
73
|
extra="forbid", # Do not allow extra fields.
|
|
53
|
-
use_enum_values=True, # Use enum values instead of names.
|
|
54
|
-
validate_assignment=True, # Validate assignments after model is created.
|
|
55
74
|
str_strip_whitespace=True, # Strip whitespace from string fields.
|
|
75
|
+
use_enum_values=False, # Do not use enum values during serialization.
|
|
56
76
|
)
|
|
57
77
|
"""Pydantic model configuration as described in:
|
|
58
78
|
https://docs.pydantic.dev/2.0/api/config/#pydantic.config.ConfigDict
|
|
@@ -62,132 +82,220 @@ DESCR_MIN_LENGTH = 3
|
|
|
62
82
|
"""Minimum length for a description field."""
|
|
63
83
|
|
|
64
84
|
DescriptionStr: TypeAlias = Annotated[str, Field(min_length=DESCR_MIN_LENGTH)]
|
|
65
|
-
"""
|
|
66
|
-
characters long. Stripping of whitespace is done globally on all str fields."""
|
|
85
|
+
"""Type for a description, which must be three or more characters long."""
|
|
67
86
|
|
|
68
87
|
|
|
69
88
|
class BaseObject(BaseModel):
|
|
70
|
-
"""Base
|
|
89
|
+
"""Base model.
|
|
90
|
+
|
|
91
|
+
All classes representing objects in the Felis data model should inherit
|
|
92
|
+
from this class.
|
|
93
|
+
"""
|
|
71
94
|
|
|
72
95
|
model_config = CONFIG
|
|
73
96
|
"""Pydantic model configuration."""
|
|
74
97
|
|
|
75
98
|
name: str
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
All Felis database objects must have a name.
|
|
79
|
-
"""
|
|
99
|
+
"""Name of the database object."""
|
|
80
100
|
|
|
81
101
|
id: str = Field(alias="@id")
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
All Felis database objects must have a unique identifier.
|
|
85
|
-
"""
|
|
102
|
+
"""Unique identifier of the database object."""
|
|
86
103
|
|
|
87
104
|
description: DescriptionStr | None = None
|
|
88
|
-
"""
|
|
105
|
+
"""Description of the database object."""
|
|
89
106
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
@model_validator(mode="before")
|
|
95
|
-
@classmethod
|
|
96
|
-
def check_description(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
97
|
-
"""Check that the description is present if required."""
|
|
98
|
-
if Schema.is_description_required():
|
|
99
|
-
if "description" not in values or not values["description"]:
|
|
100
|
-
raise ValueError("Description is required and must be non-empty")
|
|
101
|
-
if len(values["description"].strip()) < DESCR_MIN_LENGTH:
|
|
102
|
-
raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
|
|
103
|
-
return values
|
|
107
|
+
votable_utype: str | None = Field(None, alias="votable:utype")
|
|
108
|
+
"""VOTable utype (usage-specific or unique type) of the object."""
|
|
104
109
|
|
|
110
|
+
@model_validator(mode="after")
|
|
111
|
+
def check_description(self, info: ValidationInfo) -> BaseObject:
|
|
112
|
+
"""Check that the description is present if required.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
info
|
|
117
|
+
Validation context used to determine if the check is enabled.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
`BaseObject`
|
|
122
|
+
The object being validated.
|
|
123
|
+
"""
|
|
124
|
+
context = info.context
|
|
125
|
+
if not context or not context.get("check_description", False):
|
|
126
|
+
return self
|
|
127
|
+
if self.description is None or self.description == "":
|
|
128
|
+
raise ValueError("Description is required and must be non-empty")
|
|
129
|
+
if len(self.description) < DESCR_MIN_LENGTH:
|
|
130
|
+
raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
|
|
131
|
+
return self
|
|
105
132
|
|
|
106
|
-
class DataType(Enum):
|
|
107
|
-
"""`Enum` representing the data types supported by Felis."""
|
|
108
133
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
134
|
+
class DataType(StrEnum):
|
|
135
|
+
"""``Enum`` representing the data types supported by Felis."""
|
|
136
|
+
|
|
137
|
+
boolean = auto()
|
|
138
|
+
byte = auto()
|
|
139
|
+
short = auto()
|
|
140
|
+
int = auto()
|
|
141
|
+
long = auto()
|
|
142
|
+
float = auto()
|
|
143
|
+
double = auto()
|
|
144
|
+
char = auto()
|
|
145
|
+
string = auto()
|
|
146
|
+
unicode = auto()
|
|
147
|
+
text = auto()
|
|
148
|
+
binary = auto()
|
|
149
|
+
timestamp = auto()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def validate_ivoa_ucd(ivoa_ucd: str) -> str:
|
|
153
|
+
"""Validate IVOA UCD values.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
ivoa_ucd
|
|
158
|
+
IVOA UCD value to check.
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
`str`
|
|
163
|
+
The IVOA UCD value if it is valid.
|
|
164
|
+
|
|
165
|
+
Raises
|
|
166
|
+
------
|
|
167
|
+
ValueError
|
|
168
|
+
If the IVOA UCD value is invalid.
|
|
169
|
+
"""
|
|
170
|
+
if ivoa_ucd is not None:
|
|
171
|
+
try:
|
|
172
|
+
ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
|
|
173
|
+
except ValueError as e:
|
|
174
|
+
raise ValueError(f"Invalid IVOA UCD: {e}")
|
|
175
|
+
return ivoa_ucd
|
|
122
176
|
|
|
123
177
|
|
|
124
178
|
class Column(BaseObject):
|
|
125
|
-
"""
|
|
179
|
+
"""Column model."""
|
|
126
180
|
|
|
127
181
|
datatype: DataType
|
|
128
|
-
"""
|
|
182
|
+
"""Datatype of the column."""
|
|
129
183
|
|
|
130
|
-
length: int | None = None
|
|
131
|
-
"""
|
|
184
|
+
length: int | None = Field(None, gt=0)
|
|
185
|
+
"""Length of the column."""
|
|
186
|
+
|
|
187
|
+
precision: int | None = Field(None, ge=0)
|
|
188
|
+
"""The numerical precision of the column.
|
|
189
|
+
|
|
190
|
+
For timestamps, this is the number of fractional digits retained in the
|
|
191
|
+
seconds field.
|
|
192
|
+
"""
|
|
132
193
|
|
|
133
194
|
nullable: bool = True
|
|
134
|
-
"""Whether the column can be
|
|
195
|
+
"""Whether the column can be ``NULL``."""
|
|
135
196
|
|
|
136
|
-
value:
|
|
137
|
-
"""
|
|
197
|
+
value: str | int | float | bool | None = None
|
|
198
|
+
"""Default value of the column."""
|
|
138
199
|
|
|
139
200
|
autoincrement: bool | None = None
|
|
140
201
|
"""Whether the column is autoincremented."""
|
|
141
202
|
|
|
142
|
-
mysql_datatype: str | None = Field(None, alias="mysql:datatype")
|
|
143
|
-
"""The MySQL datatype of the column."""
|
|
144
|
-
|
|
145
203
|
ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
|
|
146
|
-
"""
|
|
204
|
+
"""IVOA UCD of the column."""
|
|
147
205
|
|
|
148
206
|
fits_tunit: str | None = Field(None, alias="fits:tunit")
|
|
149
|
-
"""
|
|
207
|
+
"""FITS TUNIT of the column."""
|
|
150
208
|
|
|
151
209
|
ivoa_unit: str | None = Field(None, alias="ivoa:unit")
|
|
152
|
-
"""
|
|
210
|
+
"""IVOA unit of the column."""
|
|
153
211
|
|
|
154
212
|
tap_column_index: int | None = Field(None, alias="tap:column_index")
|
|
155
|
-
"""
|
|
213
|
+
"""TAP_SCHEMA column index of the column."""
|
|
156
214
|
|
|
157
215
|
tap_principal: int | None = Field(0, alias="tap:principal", ge=0, le=1)
|
|
158
|
-
"""Whether this is a TAP_SCHEMA principal column
|
|
159
|
-
"""
|
|
216
|
+
"""Whether this is a TAP_SCHEMA principal column."""
|
|
160
217
|
|
|
161
|
-
votable_arraysize: int |
|
|
162
|
-
"""
|
|
218
|
+
votable_arraysize: int | str | None = Field(None, alias="votable:arraysize")
|
|
219
|
+
"""VOTable arraysize of the column."""
|
|
163
220
|
|
|
164
221
|
tap_std: int | None = Field(0, alias="tap:std", ge=0, le=1)
|
|
165
222
|
"""TAP_SCHEMA indication that this column is defined by an IVOA standard.
|
|
166
223
|
"""
|
|
167
224
|
|
|
168
|
-
votable_utype: str | None = Field(None, alias="votable:utype")
|
|
169
|
-
"""The VOTable utype (usage-specific or unique type) of the column."""
|
|
170
|
-
|
|
171
225
|
votable_xtype: str | None = Field(None, alias="votable:xtype")
|
|
172
|
-
"""
|
|
226
|
+
"""VOTable xtype (extended type) of the column."""
|
|
227
|
+
|
|
228
|
+
votable_datatype: str | None = Field(None, alias="votable:datatype")
|
|
229
|
+
"""VOTable datatype of the column."""
|
|
230
|
+
|
|
231
|
+
mysql_datatype: str | None = Field(None, alias="mysql:datatype")
|
|
232
|
+
"""MySQL datatype override on the column."""
|
|
233
|
+
|
|
234
|
+
postgresql_datatype: str | None = Field(None, alias="postgresql:datatype")
|
|
235
|
+
"""PostgreSQL datatype override on the column."""
|
|
236
|
+
|
|
237
|
+
@model_validator(mode="after")
|
|
238
|
+
def check_value(self) -> Column:
|
|
239
|
+
"""Check that the default value is valid.
|
|
240
|
+
|
|
241
|
+
Returns
|
|
242
|
+
-------
|
|
243
|
+
`Column`
|
|
244
|
+
The column being validated.
|
|
245
|
+
"""
|
|
246
|
+
if (value := self.value) is not None:
|
|
247
|
+
if value is not None and self.autoincrement is True:
|
|
248
|
+
raise ValueError("Column cannot have both a default value and be autoincremented")
|
|
249
|
+
felis_type = FelisType.felis_type(self.datatype)
|
|
250
|
+
if felis_type.is_numeric:
|
|
251
|
+
if felis_type in (Byte, Short, Int, Long) and not isinstance(value, int):
|
|
252
|
+
raise ValueError("Default value must be an int for integer type columns")
|
|
253
|
+
elif felis_type in (Float, Double) and not isinstance(value, float):
|
|
254
|
+
raise ValueError("Default value must be a decimal number for float and double columns")
|
|
255
|
+
elif felis_type in (String, Char, Unicode, Text):
|
|
256
|
+
if not isinstance(value, str):
|
|
257
|
+
raise ValueError("Default value must be a string for string columns")
|
|
258
|
+
if not len(value):
|
|
259
|
+
raise ValueError("Default value must be a non-empty string for string columns")
|
|
260
|
+
elif felis_type is Boolean and not isinstance(value, bool):
|
|
261
|
+
raise ValueError("Default value must be a boolean for boolean columns")
|
|
262
|
+
return self
|
|
173
263
|
|
|
174
264
|
@field_validator("ivoa_ucd")
|
|
175
265
|
@classmethod
|
|
176
266
|
def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
|
|
177
|
-
"""Check that IVOA UCD values are valid.
|
|
178
|
-
if ivoa_ucd is not None:
|
|
179
|
-
try:
|
|
180
|
-
ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
|
|
181
|
-
except ValueError as e:
|
|
182
|
-
raise ValueError(f"Invalid IVOA UCD: {e}")
|
|
183
|
-
return ivoa_ucd
|
|
267
|
+
"""Check that IVOA UCD values are valid.
|
|
184
268
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
269
|
+
Parameters
|
|
270
|
+
----------
|
|
271
|
+
ivoa_ucd
|
|
272
|
+
IVOA UCD value to check.
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
`str`
|
|
277
|
+
The IVOA UCD value if it is valid.
|
|
278
|
+
"""
|
|
279
|
+
return validate_ivoa_ucd(ivoa_ucd)
|
|
280
|
+
|
|
281
|
+
@model_validator(mode="after")
|
|
282
|
+
def check_units(self) -> Column:
|
|
283
|
+
"""Check that the ``fits:tunit`` or ``ivoa:unit`` field has valid
|
|
284
|
+
units according to astropy. Only one may be provided.
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
`Column`
|
|
289
|
+
The column being validated.
|
|
290
|
+
|
|
291
|
+
Raises
|
|
292
|
+
------
|
|
293
|
+
ValueError
|
|
294
|
+
Raised if both FITS and IVOA units are provided, or if the unit is
|
|
295
|
+
invalid.
|
|
296
|
+
"""
|
|
297
|
+
fits_unit = self.fits_tunit
|
|
298
|
+
ivoa_unit = self.ivoa_unit
|
|
191
299
|
|
|
192
300
|
if fits_unit and ivoa_unit:
|
|
193
301
|
raise ValueError("Column cannot have both FITS and IVOA units")
|
|
@@ -199,55 +307,420 @@ class Column(BaseObject):
|
|
|
199
307
|
except ValueError as e:
|
|
200
308
|
raise ValueError(f"Invalid unit: {e}")
|
|
201
309
|
|
|
310
|
+
return self
|
|
311
|
+
|
|
312
|
+
@model_validator(mode="before")
|
|
313
|
+
@classmethod
|
|
314
|
+
def check_length(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
315
|
+
"""Check that a valid length is provided for sized types.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
values
|
|
320
|
+
Values of the column.
|
|
321
|
+
|
|
322
|
+
Returns
|
|
323
|
+
-------
|
|
324
|
+
`dict` [ `str`, `Any` ]
|
|
325
|
+
The values of the column.
|
|
326
|
+
|
|
327
|
+
Raises
|
|
328
|
+
------
|
|
329
|
+
ValueError
|
|
330
|
+
Raised if a length is not provided for a sized type.
|
|
331
|
+
"""
|
|
332
|
+
datatype = values.get("datatype")
|
|
333
|
+
if datatype is None:
|
|
334
|
+
# Skip this validation if datatype is not provided
|
|
335
|
+
return values
|
|
336
|
+
length = values.get("length")
|
|
337
|
+
felis_type = FelisType.felis_type(datatype)
|
|
338
|
+
if felis_type.is_sized and length is None:
|
|
339
|
+
raise ValueError(
|
|
340
|
+
f"Length must be provided for type '{datatype}'"
|
|
341
|
+
+ (f" in column '{values['@id']}'" if "@id" in values else "")
|
|
342
|
+
)
|
|
343
|
+
elif not felis_type.is_sized and length is not None:
|
|
344
|
+
logger.warning(
|
|
345
|
+
f"The datatype '{datatype}' does not support a specified length"
|
|
346
|
+
+ (f" in column '{values['@id']}'" if "@id" in values else "")
|
|
347
|
+
)
|
|
348
|
+
return values
|
|
349
|
+
|
|
350
|
+
@model_validator(mode="after")
|
|
351
|
+
def check_redundant_datatypes(self, info: ValidationInfo) -> Column:
|
|
352
|
+
"""Check for redundant datatypes on columns.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
info
|
|
357
|
+
Validation context used to determine if the check is enabled.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
`Column`
|
|
362
|
+
The column being validated.
|
|
363
|
+
|
|
364
|
+
Raises
|
|
365
|
+
------
|
|
366
|
+
ValueError
|
|
367
|
+
Raised if a datatype override is redundant.
|
|
368
|
+
"""
|
|
369
|
+
context = info.context
|
|
370
|
+
if not context or not context.get("check_redundant_datatypes", False):
|
|
371
|
+
return self
|
|
372
|
+
if all(
|
|
373
|
+
getattr(self, f"{dialect}:datatype", None) is not None
|
|
374
|
+
for dialect in get_supported_dialects().keys()
|
|
375
|
+
):
|
|
376
|
+
return self
|
|
377
|
+
|
|
378
|
+
datatype = self.datatype
|
|
379
|
+
length: int | None = self.length or None
|
|
380
|
+
|
|
381
|
+
datatype_func = get_type_func(datatype)
|
|
382
|
+
felis_type = FelisType.felis_type(datatype)
|
|
383
|
+
if felis_type.is_sized:
|
|
384
|
+
datatype_obj = datatype_func(length)
|
|
385
|
+
else:
|
|
386
|
+
datatype_obj = datatype_func()
|
|
387
|
+
|
|
388
|
+
for dialect_name, dialect in get_supported_dialects().items():
|
|
389
|
+
db_annotation = f"{dialect_name}_datatype"
|
|
390
|
+
if datatype_string := self.model_dump().get(db_annotation):
|
|
391
|
+
db_datatype_obj = string_to_typeengine(datatype_string, dialect, length)
|
|
392
|
+
if datatype_obj.compile(dialect) == db_datatype_obj.compile(dialect):
|
|
393
|
+
raise ValueError(
|
|
394
|
+
"'{}: {}' is a redundant override of 'datatype: {}' in column '{}'{}".format(
|
|
395
|
+
db_annotation,
|
|
396
|
+
datatype_string,
|
|
397
|
+
self.datatype,
|
|
398
|
+
self.id,
|
|
399
|
+
"" if length is None else f" with length {length}",
|
|
400
|
+
)
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
logger.debug(
|
|
404
|
+
f"Type override of 'datatype: {self.datatype}' "
|
|
405
|
+
f"with '{db_annotation}: {datatype_string}' in column '{self.id}' "
|
|
406
|
+
f"compiled to '{datatype_obj.compile(dialect)}' and "
|
|
407
|
+
f"'{db_datatype_obj.compile(dialect)}'"
|
|
408
|
+
)
|
|
409
|
+
return self
|
|
410
|
+
|
|
411
|
+
@model_validator(mode="after")
|
|
412
|
+
def check_precision(self) -> Column:
|
|
413
|
+
"""Check that precision is only valid for timestamp columns.
|
|
414
|
+
|
|
415
|
+
Returns
|
|
416
|
+
-------
|
|
417
|
+
`Column`
|
|
418
|
+
The column being validated.
|
|
419
|
+
"""
|
|
420
|
+
if self.precision is not None and self.datatype != "timestamp":
|
|
421
|
+
raise ValueError("Precision is only valid for timestamp columns")
|
|
422
|
+
return self
|
|
423
|
+
|
|
424
|
+
@model_validator(mode="before")
|
|
425
|
+
@classmethod
|
|
426
|
+
def check_votable_arraysize(cls, values: dict[str, Any], info: ValidationInfo) -> dict[str, Any]:
|
|
427
|
+
"""Set the default value for the ``votable_arraysize`` field, which
|
|
428
|
+
corresponds to ``arraysize`` in the IVOA VOTable standard.
|
|
429
|
+
|
|
430
|
+
Parameters
|
|
431
|
+
----------
|
|
432
|
+
values
|
|
433
|
+
Values of the column.
|
|
434
|
+
info
|
|
435
|
+
Validation context used to determine if the check is enabled.
|
|
436
|
+
|
|
437
|
+
Returns
|
|
438
|
+
-------
|
|
439
|
+
`dict` [ `str`, `Any` ]
|
|
440
|
+
The values of the column.
|
|
441
|
+
|
|
442
|
+
Notes
|
|
443
|
+
-----
|
|
444
|
+
Following the IVOA VOTable standard, an ``arraysize`` of 1 should not
|
|
445
|
+
be used.
|
|
446
|
+
"""
|
|
447
|
+
if values.get("name", None) is None or values.get("datatype", None) is None:
|
|
448
|
+
# Skip bad column data that will not validate
|
|
449
|
+
return values
|
|
450
|
+
context = info.context if info.context else {}
|
|
451
|
+
arraysize = values.get("votable:arraysize", None)
|
|
452
|
+
if arraysize is None:
|
|
453
|
+
length = values.get("length", None)
|
|
454
|
+
datatype = values.get("datatype")
|
|
455
|
+
if length is not None and length > 1:
|
|
456
|
+
# Following the IVOA standard, arraysize of 1 is disallowed
|
|
457
|
+
if datatype == "char":
|
|
458
|
+
arraysize = str(length)
|
|
459
|
+
elif datatype in ("string", "unicode", "binary"):
|
|
460
|
+
if context.get("force_unbounded_arraysize", False):
|
|
461
|
+
arraysize = "*"
|
|
462
|
+
logger.debug(
|
|
463
|
+
f"Forced VOTable's 'arraysize' to '*' on column '{values['name']}' with datatype "
|
|
464
|
+
+ f"'{values['datatype']}' and length '{length}'"
|
|
465
|
+
)
|
|
466
|
+
else:
|
|
467
|
+
arraysize = f"{length}*"
|
|
468
|
+
elif datatype in ("timestamp", "text"):
|
|
469
|
+
arraysize = "*"
|
|
470
|
+
if arraysize is not None:
|
|
471
|
+
values["votable:arraysize"] = arraysize
|
|
472
|
+
logger.debug(
|
|
473
|
+
f"Set default 'votable:arraysize' to '{arraysize}' on column '{values['name']}'"
|
|
474
|
+
+ f" with datatype '{values['datatype']}' and length '{values.get('length', None)}'"
|
|
475
|
+
)
|
|
476
|
+
else:
|
|
477
|
+
logger.debug(f"Using existing 'votable:arraysize' of '{arraysize}' on column '{values['name']}'")
|
|
478
|
+
if isinstance(values["votable:arraysize"], int):
|
|
479
|
+
logger.warning(
|
|
480
|
+
f"Usage of an integer value for 'votable:arraysize' in column '{values['name']}' is "
|
|
481
|
+
+ "deprecated"
|
|
482
|
+
)
|
|
483
|
+
values["votable:arraysize"] = str(arraysize)
|
|
202
484
|
return values
|
|
203
485
|
|
|
486
|
+
@field_serializer("datatype")
|
|
487
|
+
def serialize_datatype(self, value: DataType) -> str:
|
|
488
|
+
"""Convert `DataType` to string when serializing to JSON/YAML.
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
value
|
|
493
|
+
The `DataType` value to serialize.
|
|
494
|
+
|
|
495
|
+
Returns
|
|
496
|
+
-------
|
|
497
|
+
`str`
|
|
498
|
+
The serialized `DataType` value.
|
|
499
|
+
"""
|
|
500
|
+
return str(value)
|
|
501
|
+
|
|
502
|
+
@field_validator("datatype", mode="before")
|
|
503
|
+
@classmethod
|
|
504
|
+
def deserialize_datatype(cls, value: str) -> DataType:
|
|
505
|
+
"""Convert string back into `DataType` when loading from JSON/YAML.
|
|
506
|
+
|
|
507
|
+
Parameters
|
|
508
|
+
----------
|
|
509
|
+
value
|
|
510
|
+
The string value to deserialize.
|
|
511
|
+
|
|
512
|
+
Returns
|
|
513
|
+
-------
|
|
514
|
+
`DataType`
|
|
515
|
+
The deserialized `DataType` value.
|
|
516
|
+
"""
|
|
517
|
+
return DataType(value)
|
|
518
|
+
|
|
519
|
+
@model_validator(mode="after")
|
|
520
|
+
def check_votable_xtype(self) -> Column:
|
|
521
|
+
"""Set the default value for the ``votable_xtype`` field, which
|
|
522
|
+
corresponds to an Extended Datatype or ``xtype`` in the IVOA VOTable
|
|
523
|
+
standard.
|
|
524
|
+
|
|
525
|
+
Returns
|
|
526
|
+
-------
|
|
527
|
+
`Column`
|
|
528
|
+
The column being validated.
|
|
529
|
+
|
|
530
|
+
Notes
|
|
531
|
+
-----
|
|
532
|
+
This is currently only set automatically for the Felis ``timestamp``
|
|
533
|
+
datatype.
|
|
534
|
+
"""
|
|
535
|
+
if self.datatype == DataType.timestamp and self.votable_xtype is None:
|
|
536
|
+
self.votable_xtype = "timestamp"
|
|
537
|
+
return self
|
|
538
|
+
|
|
204
539
|
|
|
205
540
|
class Constraint(BaseObject):
|
|
206
|
-
"""
|
|
541
|
+
"""Table constraint model."""
|
|
207
542
|
|
|
208
543
|
deferrable: bool = False
|
|
209
|
-
"""
|
|
544
|
+
"""Whether this constraint will be declared as deferrable."""
|
|
210
545
|
|
|
211
|
-
initially:
|
|
212
|
-
"""Value for ``INITIALLY`` clause
|
|
546
|
+
initially: Literal["IMMEDIATE", "DEFERRED"] | None = None
|
|
547
|
+
"""Value for ``INITIALLY`` clause; only used if `deferrable` is
|
|
548
|
+
`True`."""
|
|
213
549
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
550
|
+
@model_validator(mode="after")
|
|
551
|
+
def check_deferrable(self) -> Constraint:
|
|
552
|
+
"""Check that the ``INITIALLY`` clause is only used if `deferrable` is
|
|
553
|
+
`True`.
|
|
554
|
+
|
|
555
|
+
Returns
|
|
556
|
+
-------
|
|
557
|
+
`Constraint`
|
|
558
|
+
The constraint being validated.
|
|
559
|
+
"""
|
|
560
|
+
if self.initially is not None and not self.deferrable:
|
|
561
|
+
raise ValueError("INITIALLY clause can only be used if deferrable is True")
|
|
562
|
+
return self
|
|
219
563
|
|
|
220
564
|
|
|
221
565
|
class CheckConstraint(Constraint):
|
|
222
|
-
"""
|
|
566
|
+
"""Table check constraint model."""
|
|
567
|
+
|
|
568
|
+
type: Literal["Check"] = Field("Check", alias="@type")
|
|
569
|
+
"""Type of the constraint."""
|
|
223
570
|
|
|
224
571
|
expression: str
|
|
225
|
-
"""
|
|
572
|
+
"""Expression for the check constraint."""
|
|
573
|
+
|
|
574
|
+
@field_serializer("type")
|
|
575
|
+
def serialize_type(self, value: str) -> str:
|
|
576
|
+
"""Ensure '@type' is included in serialized output.
|
|
577
|
+
|
|
578
|
+
Parameters
|
|
579
|
+
----------
|
|
580
|
+
value
|
|
581
|
+
The value to serialize.
|
|
582
|
+
|
|
583
|
+
Returns
|
|
584
|
+
-------
|
|
585
|
+
`str`
|
|
586
|
+
The serialized value.
|
|
587
|
+
"""
|
|
588
|
+
return value
|
|
226
589
|
|
|
227
590
|
|
|
228
591
|
class UniqueConstraint(Constraint):
|
|
229
|
-
"""
|
|
592
|
+
"""Table unique constraint model."""
|
|
593
|
+
|
|
594
|
+
type: Literal["Unique"] = Field("Unique", alias="@type")
|
|
595
|
+
"""Type of the constraint."""
|
|
230
596
|
|
|
231
597
|
columns: list[str]
|
|
232
|
-
"""
|
|
598
|
+
"""Columns in the unique constraint."""
|
|
599
|
+
|
|
600
|
+
@field_serializer("type")
|
|
601
|
+
def serialize_type(self, value: str) -> str:
|
|
602
|
+
"""Ensure '@type' is included in serialized output.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
value
|
|
607
|
+
The value to serialize.
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
`str`
|
|
612
|
+
The serialized value.
|
|
613
|
+
"""
|
|
614
|
+
return value
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class ForeignKeyConstraint(Constraint):
|
|
618
|
+
"""Table foreign key constraint model.
|
|
619
|
+
|
|
620
|
+
This constraint is used to define a foreign key relationship between two
|
|
621
|
+
tables in the schema. There must be at least one column in the
|
|
622
|
+
`columns` list, and at least one column in the `referenced_columns` list
|
|
623
|
+
or a validation error will be raised.
|
|
624
|
+
|
|
625
|
+
Notes
|
|
626
|
+
-----
|
|
627
|
+
These relationships will be reflected in the TAP_SCHEMA ``keys`` and
|
|
628
|
+
``key_columns`` data.
|
|
629
|
+
"""
|
|
630
|
+
|
|
631
|
+
type: Literal["ForeignKey"] = Field("ForeignKey", alias="@type")
|
|
632
|
+
"""Type of the constraint."""
|
|
633
|
+
|
|
634
|
+
columns: list[str] = Field(min_length=1)
|
|
635
|
+
"""The columns comprising the foreign key."""
|
|
636
|
+
|
|
637
|
+
referenced_columns: list[str] = Field(alias="referencedColumns", min_length=1)
|
|
638
|
+
"""The columns referenced by the foreign key."""
|
|
639
|
+
|
|
640
|
+
on_delete: Literal["CASCADE", "SET NULL", "SET DEFAULT", "RESTRICT", "NO ACTION"] | None = None
|
|
641
|
+
"""Action to take when the referenced row is deleted."""
|
|
642
|
+
|
|
643
|
+
on_update: Literal["CASCADE", "SET NULL", "SET DEFAULT", "RESTRICT", "NO ACTION"] | None = None
|
|
644
|
+
"""Action to take when the referenced row is updated."""
|
|
645
|
+
|
|
646
|
+
@field_serializer("type")
|
|
647
|
+
def serialize_type(self, value: str) -> str:
|
|
648
|
+
"""Ensure '@type' is included in serialized output.
|
|
649
|
+
|
|
650
|
+
Parameters
|
|
651
|
+
----------
|
|
652
|
+
value
|
|
653
|
+
The value to serialize.
|
|
654
|
+
|
|
655
|
+
Returns
|
|
656
|
+
-------
|
|
657
|
+
`str`
|
|
658
|
+
The serialized value.
|
|
659
|
+
"""
|
|
660
|
+
return value
|
|
661
|
+
|
|
662
|
+
@model_validator(mode="after")
|
|
663
|
+
def check_column_lengths(self) -> ForeignKeyConstraint:
|
|
664
|
+
"""Check that the `columns` and `referenced_columns` lists have the
|
|
665
|
+
same length.
|
|
666
|
+
|
|
667
|
+
Returns
|
|
668
|
+
-------
|
|
669
|
+
`ForeignKeyConstraint`
|
|
670
|
+
The foreign key constraint being validated.
|
|
671
|
+
|
|
672
|
+
Raises
|
|
673
|
+
------
|
|
674
|
+
ValueError
|
|
675
|
+
Raised if the `columns` and `referenced_columns` lists do not have
|
|
676
|
+
the same length.
|
|
677
|
+
"""
|
|
678
|
+
if len(self.columns) != len(self.referenced_columns):
|
|
679
|
+
raise ValueError(
|
|
680
|
+
"Columns and referencedColumns must have the same length for a ForeignKey constraint"
|
|
681
|
+
)
|
|
682
|
+
return self
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
_ConstraintType = Annotated[
|
|
686
|
+
CheckConstraint | ForeignKeyConstraint | UniqueConstraint, Field(discriminator="type")
|
|
687
|
+
]
|
|
688
|
+
"""Type alias for a constraint type."""
|
|
233
689
|
|
|
234
690
|
|
|
235
691
|
class Index(BaseObject):
|
|
236
|
-
"""
|
|
692
|
+
"""Table index model.
|
|
237
693
|
|
|
238
694
|
An index can be defined on either columns or expressions, but not both.
|
|
239
695
|
"""
|
|
240
696
|
|
|
241
697
|
columns: list[str] | None = None
|
|
242
|
-
"""
|
|
698
|
+
"""Columns in the index."""
|
|
243
699
|
|
|
244
700
|
expressions: list[str] | None = None
|
|
245
|
-
"""
|
|
701
|
+
"""Expressions in the index."""
|
|
246
702
|
|
|
247
703
|
@model_validator(mode="before")
|
|
248
704
|
@classmethod
|
|
249
705
|
def check_columns_or_expressions(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
250
|
-
"""Check that columns or expressions are specified, but not both.
|
|
706
|
+
"""Check that columns or expressions are specified, but not both.
|
|
707
|
+
|
|
708
|
+
Parameters
|
|
709
|
+
----------
|
|
710
|
+
values
|
|
711
|
+
Values of the index.
|
|
712
|
+
|
|
713
|
+
Returns
|
|
714
|
+
-------
|
|
715
|
+
`dict` [ `str`, `Any` ]
|
|
716
|
+
The values of the index.
|
|
717
|
+
|
|
718
|
+
Raises
|
|
719
|
+
------
|
|
720
|
+
ValueError
|
|
721
|
+
Raised if both columns and expressions are specified, or if neither
|
|
722
|
+
are specified.
|
|
723
|
+
"""
|
|
251
724
|
if "columns" in values and "expressions" in values:
|
|
252
725
|
raise ValueError("Defining columns and expressions is not valid")
|
|
253
726
|
elif "columns" not in values and "expressions" not in values:
|
|
@@ -255,80 +728,234 @@ class Index(BaseObject):
|
|
|
255
728
|
return values
|
|
256
729
|
|
|
257
730
|
|
|
258
|
-
|
|
259
|
-
|
|
731
|
+
ColumnRef: TypeAlias = str
|
|
732
|
+
"""Type alias for a column reference."""
|
|
260
733
|
|
|
261
|
-
These will be reflected in the TAP_SCHEMA keys and key_columns data.
|
|
262
|
-
"""
|
|
263
734
|
|
|
264
|
-
|
|
265
|
-
"""
|
|
735
|
+
class ColumnGroup(BaseObject):
|
|
736
|
+
"""Column group model."""
|
|
266
737
|
|
|
267
|
-
|
|
268
|
-
"""
|
|
738
|
+
columns: list[ColumnRef | Column] = Field(..., min_length=1)
|
|
739
|
+
"""Columns in the group."""
|
|
269
740
|
|
|
741
|
+
ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
|
|
742
|
+
"""IVOA UCD of the column."""
|
|
270
743
|
|
|
271
|
-
|
|
272
|
-
"""
|
|
744
|
+
table: Table | None = Field(None, exclude=True)
|
|
745
|
+
"""Reference to the parent table."""
|
|
273
746
|
|
|
274
|
-
|
|
275
|
-
|
|
747
|
+
@field_validator("ivoa_ucd")
|
|
748
|
+
@classmethod
|
|
749
|
+
def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
|
|
750
|
+
"""Check that IVOA UCD values are valid.
|
|
276
751
|
|
|
277
|
-
|
|
278
|
-
|
|
752
|
+
Parameters
|
|
753
|
+
----------
|
|
754
|
+
ivoa_ucd
|
|
755
|
+
IVOA UCD value to check.
|
|
279
756
|
|
|
280
|
-
|
|
281
|
-
|
|
757
|
+
Returns
|
|
758
|
+
-------
|
|
759
|
+
`str`
|
|
760
|
+
The IVOA UCD value if it is valid.
|
|
761
|
+
"""
|
|
762
|
+
return validate_ivoa_ucd(ivoa_ucd)
|
|
763
|
+
|
|
764
|
+
@model_validator(mode="after")
|
|
765
|
+
def check_unique_columns(self) -> ColumnGroup:
|
|
766
|
+
"""Check that the columns list contains unique items.
|
|
282
767
|
|
|
283
|
-
|
|
284
|
-
|
|
768
|
+
Returns
|
|
769
|
+
-------
|
|
770
|
+
`ColumnGroup`
|
|
771
|
+
The column group being validated.
|
|
772
|
+
"""
|
|
773
|
+
column_ids = [col if isinstance(col, str) else col.id for col in self.columns]
|
|
774
|
+
if len(column_ids) != len(set(column_ids)):
|
|
775
|
+
raise ValueError("Columns in the group must be unique")
|
|
776
|
+
return self
|
|
285
777
|
|
|
286
|
-
|
|
287
|
-
|
|
778
|
+
def _dereference_columns(self) -> None:
|
|
779
|
+
"""Dereference ColumnRef to Column objects."""
|
|
780
|
+
if self.table is None:
|
|
781
|
+
raise ValueError("ColumnGroup must have a reference to its parent table")
|
|
782
|
+
|
|
783
|
+
dereferenced_columns: list[ColumnRef | Column] = []
|
|
784
|
+
for col in self.columns:
|
|
785
|
+
if isinstance(col, str):
|
|
786
|
+
# Dereference ColumnRef to Column object
|
|
787
|
+
try:
|
|
788
|
+
col_obj = self.table._find_column_by_id(col)
|
|
789
|
+
except KeyError as e:
|
|
790
|
+
raise ValueError(f"Column '{col}' not found in table '{self.table.name}'") from e
|
|
791
|
+
dereferenced_columns.append(col_obj)
|
|
792
|
+
else:
|
|
793
|
+
dereferenced_columns.append(col)
|
|
794
|
+
|
|
795
|
+
self.columns = dereferenced_columns
|
|
796
|
+
|
|
797
|
+
@field_serializer("columns")
|
|
798
|
+
def serialize_columns(self, columns: list[ColumnRef | Column]) -> list[str]:
|
|
799
|
+
"""Serialize columns as their IDs.
|
|
800
|
+
|
|
801
|
+
Parameters
|
|
802
|
+
----------
|
|
803
|
+
columns
|
|
804
|
+
The columns to serialize.
|
|
805
|
+
|
|
806
|
+
Returns
|
|
807
|
+
-------
|
|
808
|
+
`list` [ `str` ]
|
|
809
|
+
The serialized column IDs.
|
|
810
|
+
"""
|
|
811
|
+
return [col if isinstance(col, str) else col.id for col in columns]
|
|
288
812
|
|
|
289
|
-
mysql_engine: str | None = Field(None, alias="mysql:engine")
|
|
290
|
-
"""The mysql engine to use for the table.
|
|
291
813
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
814
|
+
class Table(BaseObject):
|
|
815
|
+
"""Table model."""
|
|
816
|
+
|
|
817
|
+
primary_key: str | list[str] | None = Field(None, alias="primaryKey")
|
|
818
|
+
"""Primary key of the table."""
|
|
819
|
+
|
|
820
|
+
tap_table_index: int | None = Field(None, alias="tap:table_index")
|
|
821
|
+
"""IVOA TAP_SCHEMA table index of the table."""
|
|
822
|
+
|
|
823
|
+
mysql_engine: str | None = Field("MyISAM", alias="mysql:engine")
|
|
824
|
+
"""MySQL engine to use for the table."""
|
|
295
825
|
|
|
296
826
|
mysql_charset: str | None = Field(None, alias="mysql:charset")
|
|
297
|
-
"""
|
|
827
|
+
"""MySQL charset to use for the table."""
|
|
298
828
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
"""
|
|
829
|
+
columns: Sequence[Column]
|
|
830
|
+
"""Columns in the table."""
|
|
302
831
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
new_constraints.append(ForeignKeyConstraint(**item))
|
|
312
|
-
elif item["@type"] == "Unique":
|
|
313
|
-
new_constraints.append(UniqueConstraint(**item))
|
|
314
|
-
elif item["@type"] == "Check":
|
|
315
|
-
new_constraints.append(CheckConstraint(**item))
|
|
316
|
-
else:
|
|
317
|
-
raise ValueError(f"Unknown constraint type: {item['@type']}")
|
|
318
|
-
values["constraints"] = new_constraints
|
|
319
|
-
return values
|
|
832
|
+
column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
|
|
833
|
+
"""Column groups in the table."""
|
|
834
|
+
|
|
835
|
+
constraints: list[_ConstraintType] = Field(default_factory=list)
|
|
836
|
+
"""Constraints on the table."""
|
|
837
|
+
|
|
838
|
+
indexes: list[Index] = Field(default_factory=list)
|
|
839
|
+
"""Indexes on the table."""
|
|
320
840
|
|
|
321
841
|
@field_validator("columns", mode="after")
|
|
322
842
|
@classmethod
|
|
323
843
|
def check_unique_column_names(cls, columns: list[Column]) -> list[Column]:
|
|
324
|
-
"""Check that column names are unique.
|
|
844
|
+
"""Check that column names are unique.
|
|
845
|
+
|
|
846
|
+
Parameters
|
|
847
|
+
----------
|
|
848
|
+
columns
|
|
849
|
+
The columns to check.
|
|
850
|
+
|
|
851
|
+
Returns
|
|
852
|
+
-------
|
|
853
|
+
`list` [ `Column` ]
|
|
854
|
+
The columns if they are unique.
|
|
855
|
+
|
|
856
|
+
Raises
|
|
857
|
+
------
|
|
858
|
+
ValueError
|
|
859
|
+
Raised if column names are not unique.
|
|
860
|
+
"""
|
|
325
861
|
if len(columns) != len(set(column.name for column in columns)):
|
|
326
862
|
raise ValueError("Column names must be unique")
|
|
327
863
|
return columns
|
|
328
864
|
|
|
865
|
+
@model_validator(mode="after")
|
|
866
|
+
def check_tap_table_index(self, info: ValidationInfo) -> Table:
|
|
867
|
+
"""Check that the table has a TAP table index.
|
|
868
|
+
|
|
869
|
+
Parameters
|
|
870
|
+
----------
|
|
871
|
+
info
|
|
872
|
+
Validation context used to determine if the check is enabled.
|
|
873
|
+
|
|
874
|
+
Returns
|
|
875
|
+
-------
|
|
876
|
+
`Table`
|
|
877
|
+
The table being validated.
|
|
878
|
+
|
|
879
|
+
Raises
|
|
880
|
+
------
|
|
881
|
+
ValueError
|
|
882
|
+
Raised If the table is missing a TAP table index.
|
|
883
|
+
"""
|
|
884
|
+
context = info.context
|
|
885
|
+
if not context or not context.get("check_tap_table_indexes", False):
|
|
886
|
+
return self
|
|
887
|
+
if self.tap_table_index is None:
|
|
888
|
+
raise ValueError("Table is missing a TAP table index")
|
|
889
|
+
return self
|
|
890
|
+
|
|
891
|
+
@model_validator(mode="after")
|
|
892
|
+
def check_tap_principal(self, info: ValidationInfo) -> Table:
|
|
893
|
+
"""Check that at least one column is flagged as 'principal' for TAP
|
|
894
|
+
purposes.
|
|
895
|
+
|
|
896
|
+
Parameters
|
|
897
|
+
----------
|
|
898
|
+
info
|
|
899
|
+
Validation context used to determine if the check is enabled.
|
|
900
|
+
|
|
901
|
+
Returns
|
|
902
|
+
-------
|
|
903
|
+
`Table`
|
|
904
|
+
The table being validated.
|
|
905
|
+
|
|
906
|
+
Raises
|
|
907
|
+
------
|
|
908
|
+
ValueError
|
|
909
|
+
Raised if the table is missing a column flagged as 'principal'.
|
|
910
|
+
"""
|
|
911
|
+
context = info.context
|
|
912
|
+
if not context or not context.get("check_tap_principal", False):
|
|
913
|
+
return self
|
|
914
|
+
for col in self.columns:
|
|
915
|
+
if col.tap_principal == 1:
|
|
916
|
+
return self
|
|
917
|
+
raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'")
|
|
918
|
+
|
|
919
|
+
def _find_column_by_id(self, id: str) -> Column:
|
|
920
|
+
"""Find a column by ID.
|
|
921
|
+
|
|
922
|
+
Parameters
|
|
923
|
+
----------
|
|
924
|
+
id
|
|
925
|
+
The ID of the column to find.
|
|
926
|
+
|
|
927
|
+
Returns
|
|
928
|
+
-------
|
|
929
|
+
`Column`
|
|
930
|
+
The column with the given ID.
|
|
931
|
+
|
|
932
|
+
Raises
|
|
933
|
+
------
|
|
934
|
+
ValueError
|
|
935
|
+
Raised if the column is not found.
|
|
936
|
+
"""
|
|
937
|
+
for column in self.columns:
|
|
938
|
+
if column.id == id:
|
|
939
|
+
return column
|
|
940
|
+
raise KeyError(f"Column '{id}' not found in table '{self.name}'")
|
|
941
|
+
|
|
942
|
+
@model_validator(mode="after")
|
|
943
|
+
def dereference_column_groups(self: Table) -> Table:
|
|
944
|
+
"""Dereference columns in column groups.
|
|
945
|
+
|
|
946
|
+
Returns
|
|
947
|
+
-------
|
|
948
|
+
`Table`
|
|
949
|
+
The table with dereferenced column groups.
|
|
950
|
+
"""
|
|
951
|
+
for group in self.column_groups:
|
|
952
|
+
group.table = self
|
|
953
|
+
group._dereference_columns()
|
|
954
|
+
return self
|
|
955
|
+
|
|
329
956
|
|
|
330
957
|
class SchemaVersion(BaseModel):
|
|
331
|
-
"""
|
|
958
|
+
"""Schema version model."""
|
|
332
959
|
|
|
333
960
|
current: str
|
|
334
961
|
"""The current version of the schema."""
|
|
@@ -341,35 +968,49 @@ class SchemaVersion(BaseModel):
|
|
|
341
968
|
|
|
342
969
|
|
|
343
970
|
class SchemaIdVisitor:
|
|
344
|
-
"""
|
|
971
|
+
"""Visit a schema and build the map of IDs to objects.
|
|
345
972
|
|
|
973
|
+
Notes
|
|
974
|
+
-----
|
|
346
975
|
Duplicates are added to a set when they are encountered, which can be
|
|
347
|
-
accessed via the
|
|
976
|
+
accessed via the ``duplicates`` attribute. The presence of duplicates will
|
|
348
977
|
not throw an error. Only the first object with a given ID will be added to
|
|
349
|
-
the map, but this should not matter, since a ValidationError will be
|
|
350
|
-
by the
|
|
351
|
-
|
|
352
|
-
This class is intended for internal use only.
|
|
978
|
+
the map, but this should not matter, since a ``ValidationError`` will be
|
|
979
|
+
thrown by the ``model_validator`` method if any duplicates are found in the
|
|
980
|
+
schema.
|
|
353
981
|
"""
|
|
354
982
|
|
|
355
983
|
def __init__(self) -> None:
|
|
356
984
|
"""Create a new SchemaVisitor."""
|
|
357
|
-
self.schema:
|
|
985
|
+
self.schema: Schema | None = None
|
|
358
986
|
self.duplicates: set[str] = set()
|
|
359
987
|
|
|
360
988
|
def add(self, obj: BaseObject) -> None:
|
|
361
|
-
"""Add an object to the ID map.
|
|
989
|
+
"""Add an object to the ID map.
|
|
990
|
+
|
|
991
|
+
Parameters
|
|
992
|
+
----------
|
|
993
|
+
obj
|
|
994
|
+
The object to add to the ID map.
|
|
995
|
+
"""
|
|
362
996
|
if hasattr(obj, "id"):
|
|
363
997
|
obj_id = getattr(obj, "id")
|
|
364
998
|
if self.schema is not None:
|
|
365
|
-
if obj_id in self.schema.
|
|
999
|
+
if obj_id in self.schema._id_map:
|
|
366
1000
|
self.duplicates.add(obj_id)
|
|
367
1001
|
else:
|
|
368
|
-
self.schema.
|
|
1002
|
+
self.schema._id_map[obj_id] = obj
|
|
369
1003
|
|
|
370
|
-
def visit_schema(self, schema:
|
|
371
|
-
"""Visit the schema
|
|
1004
|
+
def visit_schema(self, schema: Schema) -> None:
|
|
1005
|
+
"""Visit the objects in a schema and build the ID map.
|
|
372
1006
|
|
|
1007
|
+
Parameters
|
|
1008
|
+
----------
|
|
1009
|
+
schema
|
|
1010
|
+
The schema object to visit.
|
|
1011
|
+
|
|
1012
|
+
Notes
|
|
1013
|
+
-----
|
|
373
1014
|
This will set an internal variable pointing to the schema object.
|
|
374
1015
|
"""
|
|
375
1016
|
self.schema = schema
|
|
@@ -379,7 +1020,13 @@ class SchemaIdVisitor:
|
|
|
379
1020
|
self.visit_table(table)
|
|
380
1021
|
|
|
381
1022
|
def visit_table(self, table: Table) -> None:
|
|
382
|
-
"""Visit a table object.
|
|
1023
|
+
"""Visit a table object.
|
|
1024
|
+
|
|
1025
|
+
Parameters
|
|
1026
|
+
----------
|
|
1027
|
+
table
|
|
1028
|
+
The table object to visit.
|
|
1029
|
+
"""
|
|
383
1030
|
self.add(table)
|
|
384
1031
|
for column in table.columns:
|
|
385
1032
|
self.visit_column(column)
|
|
@@ -387,25 +1034,84 @@ class SchemaIdVisitor:
|
|
|
387
1034
|
self.visit_constraint(constraint)
|
|
388
1035
|
|
|
389
1036
|
def visit_column(self, column: Column) -> None:
|
|
390
|
-
"""Visit a column object.
|
|
1037
|
+
"""Visit a column object.
|
|
1038
|
+
|
|
1039
|
+
Parameters
|
|
1040
|
+
----------
|
|
1041
|
+
column
|
|
1042
|
+
The column object to visit.
|
|
1043
|
+
"""
|
|
391
1044
|
self.add(column)
|
|
392
1045
|
|
|
393
1046
|
def visit_constraint(self, constraint: Constraint) -> None:
|
|
394
|
-
"""Visit a constraint object.
|
|
1047
|
+
"""Visit a constraint object.
|
|
1048
|
+
|
|
1049
|
+
Parameters
|
|
1050
|
+
----------
|
|
1051
|
+
constraint
|
|
1052
|
+
The constraint object to visit.
|
|
1053
|
+
"""
|
|
395
1054
|
self.add(constraint)
|
|
396
1055
|
|
|
397
1056
|
|
|
398
|
-
|
|
399
|
-
"""The database schema containing the tables."""
|
|
1057
|
+
T = TypeVar("T", bound=BaseObject)
|
|
400
1058
|
|
|
401
|
-
class ValidationConfig:
|
|
402
|
-
"""Validation configuration which is specific to Felis."""
|
|
403
1059
|
|
|
404
|
-
|
|
405
|
-
|
|
1060
|
+
def _strip_ids(data: Any) -> Any:
|
|
1061
|
+
"""Recursively strip '@id' fields from a dictionary or list.
|
|
1062
|
+
|
|
1063
|
+
Parameters
|
|
1064
|
+
----------
|
|
1065
|
+
data
|
|
1066
|
+
The data to strip IDs from, which can be a dictionary, list, or any
|
|
1067
|
+
other type. Other types will be returned unchanged.
|
|
1068
|
+
"""
|
|
1069
|
+
if isinstance(data, dict):
|
|
1070
|
+
data.pop("@id", None)
|
|
1071
|
+
for k, v in data.items():
|
|
1072
|
+
data[k] = _strip_ids(v)
|
|
1073
|
+
return data
|
|
1074
|
+
elif isinstance(data, list):
|
|
1075
|
+
return [_strip_ids(item) for item in data]
|
|
1076
|
+
else:
|
|
1077
|
+
return data
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
def _append_error(
|
|
1081
|
+
errors: list[InitErrorDetails],
|
|
1082
|
+
loc: tuple,
|
|
1083
|
+
input_value: Any,
|
|
1084
|
+
error_message: str,
|
|
1085
|
+
error_type: str = "value_error",
|
|
1086
|
+
) -> None:
|
|
1087
|
+
"""Append an error to the errors list.
|
|
1088
|
+
|
|
1089
|
+
Parameters
|
|
1090
|
+
----------
|
|
1091
|
+
errors : list[InitErrorDetails]
|
|
1092
|
+
The list of errors to append to.
|
|
1093
|
+
loc : tuple
|
|
1094
|
+
The location of the error in the schema.
|
|
1095
|
+
input_value : Any
|
|
1096
|
+
The input value that caused the error.
|
|
1097
|
+
error_message : str
|
|
1098
|
+
The error message to include in the context.
|
|
1099
|
+
"""
|
|
1100
|
+
errors.append(
|
|
1101
|
+
{
|
|
1102
|
+
"type": error_type,
|
|
1103
|
+
"loc": loc,
|
|
1104
|
+
"input": input_value,
|
|
1105
|
+
"ctx": {"error": error_message},
|
|
1106
|
+
}
|
|
1107
|
+
)
|
|
406
1108
|
|
|
407
|
-
|
|
408
|
-
|
|
1109
|
+
|
|
1110
|
+
class Schema(BaseObject, Generic[T]):
|
|
1111
|
+
"""Database schema model.
|
|
1112
|
+
|
|
1113
|
+
This represents a database schema, which contains one or more tables.
|
|
1114
|
+
"""
|
|
409
1115
|
|
|
410
1116
|
version: SchemaVersion | str | None = None
|
|
411
1117
|
"""The version of the schema."""
|
|
@@ -413,52 +1119,539 @@ class Schema(BaseObject):
|
|
|
413
1119
|
tables: Sequence[Table]
|
|
414
1120
|
"""The tables in the schema."""
|
|
415
1121
|
|
|
416
|
-
|
|
1122
|
+
_id_map: dict[str, Any] = PrivateAttr(default_factory=dict)
|
|
417
1123
|
"""Map of IDs to objects."""
|
|
418
1124
|
|
|
1125
|
+
@model_validator(mode="before")
|
|
1126
|
+
@classmethod
|
|
1127
|
+
def generate_ids(cls, values: dict[str, Any], info: ValidationInfo) -> dict[str, Any]:
|
|
1128
|
+
"""Generate IDs for objects that do not have them.
|
|
1129
|
+
|
|
1130
|
+
Parameters
|
|
1131
|
+
----------
|
|
1132
|
+
values
|
|
1133
|
+
The values of the schema.
|
|
1134
|
+
info
|
|
1135
|
+
Validation context used to determine if ID generation is enabled.
|
|
1136
|
+
|
|
1137
|
+
Returns
|
|
1138
|
+
-------
|
|
1139
|
+
`dict` [ `str`, `Any` ]
|
|
1140
|
+
The values of the schema with generated IDs.
|
|
1141
|
+
"""
|
|
1142
|
+
context = info.context
|
|
1143
|
+
if not context or not context.get("id_generation", False):
|
|
1144
|
+
logger.debug("Skipping ID generation")
|
|
1145
|
+
return values
|
|
1146
|
+
schema_name = values["name"]
|
|
1147
|
+
if "@id" not in values:
|
|
1148
|
+
values["@id"] = f"#{schema_name}"
|
|
1149
|
+
logger.debug(f"Generated ID '{values['@id']}' for schema '{schema_name}'")
|
|
1150
|
+
if "tables" in values:
|
|
1151
|
+
for table in values["tables"]:
|
|
1152
|
+
if "@id" not in table:
|
|
1153
|
+
table["@id"] = f"#{table['name']}"
|
|
1154
|
+
logger.debug(f"Generated ID '{table['@id']}' for table '{table['name']}'")
|
|
1155
|
+
if "columns" in table:
|
|
1156
|
+
for column in table["columns"]:
|
|
1157
|
+
if "@id" not in column:
|
|
1158
|
+
column["@id"] = f"#{table['name']}.{column['name']}"
|
|
1159
|
+
logger.debug(f"Generated ID '{column['@id']}' for column '{column['name']}'")
|
|
1160
|
+
if "columnGroups" in table:
|
|
1161
|
+
for column_group in table["columnGroups"]:
|
|
1162
|
+
if "@id" not in column_group:
|
|
1163
|
+
column_group["@id"] = f"#{table['name']}.{column_group['name']}"
|
|
1164
|
+
logger.debug(
|
|
1165
|
+
f"Generated ID '{column_group['@id']}' for column group "
|
|
1166
|
+
f"'{column_group['name']}'"
|
|
1167
|
+
)
|
|
1168
|
+
if "constraints" in table:
|
|
1169
|
+
for constraint in table["constraints"]:
|
|
1170
|
+
if "@id" not in constraint:
|
|
1171
|
+
constraint["@id"] = f"#{constraint['name']}"
|
|
1172
|
+
logger.debug(
|
|
1173
|
+
f"Generated ID '{constraint['@id']}' for constraint '{constraint['name']}'"
|
|
1174
|
+
)
|
|
1175
|
+
if "indexes" in table:
|
|
1176
|
+
for index in table["indexes"]:
|
|
1177
|
+
if "@id" not in index:
|
|
1178
|
+
index["@id"] = f"#{index['name']}"
|
|
1179
|
+
logger.debug(f"Generated ID '{index['@id']}' for index '{index['name']}'")
|
|
1180
|
+
return values
|
|
1181
|
+
|
|
419
1182
|
@field_validator("tables", mode="after")
|
|
420
1183
|
@classmethod
|
|
421
1184
|
def check_unique_table_names(cls, tables: list[Table]) -> list[Table]:
|
|
422
|
-
"""Check that table names are unique.
|
|
1185
|
+
"""Check that table names are unique.
|
|
1186
|
+
|
|
1187
|
+
Parameters
|
|
1188
|
+
----------
|
|
1189
|
+
tables
|
|
1190
|
+
The tables to check.
|
|
1191
|
+
|
|
1192
|
+
Returns
|
|
1193
|
+
-------
|
|
1194
|
+
`list` [ `Table` ]
|
|
1195
|
+
The tables if they are unique.
|
|
1196
|
+
|
|
1197
|
+
Raises
|
|
1198
|
+
------
|
|
1199
|
+
ValueError
|
|
1200
|
+
Raised if table names are not unique.
|
|
1201
|
+
"""
|
|
423
1202
|
if len(tables) != len(set(table.name for table in tables)):
|
|
424
1203
|
raise ValueError("Table names must be unique")
|
|
425
1204
|
return tables
|
|
426
1205
|
|
|
1206
|
+
@model_validator(mode="after")
|
|
1207
|
+
def check_tap_table_indexes(self, info: ValidationInfo) -> Schema:
|
|
1208
|
+
"""Check that the TAP table indexes are unique.
|
|
1209
|
+
|
|
1210
|
+
Parameters
|
|
1211
|
+
----------
|
|
1212
|
+
info
|
|
1213
|
+
The validation context used to determine if the check is enabled.
|
|
1214
|
+
|
|
1215
|
+
Returns
|
|
1216
|
+
-------
|
|
1217
|
+
`Schema`
|
|
1218
|
+
The schema being validated.
|
|
1219
|
+
"""
|
|
1220
|
+
context = info.context
|
|
1221
|
+
if not context or not context.get("check_tap_table_indexes", False):
|
|
1222
|
+
return self
|
|
1223
|
+
table_indicies = set()
|
|
1224
|
+
for table in self.tables:
|
|
1225
|
+
table_index = table.tap_table_index
|
|
1226
|
+
if table_index is not None:
|
|
1227
|
+
if table_index in table_indicies:
|
|
1228
|
+
raise ValueError(f"Duplicate 'tap:table_index' value {table_index} found in schema")
|
|
1229
|
+
table_indicies.add(table_index)
|
|
1230
|
+
return self
|
|
1231
|
+
|
|
1232
|
+
@model_validator(mode="after")
|
|
1233
|
+
def check_unique_constraint_names(self: Schema) -> Schema:
|
|
1234
|
+
"""Check for duplicate constraint names in the schema.
|
|
1235
|
+
|
|
1236
|
+
Returns
|
|
1237
|
+
-------
|
|
1238
|
+
`Schema`
|
|
1239
|
+
The schema being validated.
|
|
1240
|
+
|
|
1241
|
+
Raises
|
|
1242
|
+
------
|
|
1243
|
+
ValueError
|
|
1244
|
+
Raised if duplicate constraint names are found in the schema.
|
|
1245
|
+
"""
|
|
1246
|
+
constraint_names = set()
|
|
1247
|
+
duplicate_names = []
|
|
1248
|
+
|
|
1249
|
+
for table in self.tables:
|
|
1250
|
+
for constraint in table.constraints:
|
|
1251
|
+
constraint_name = constraint.name
|
|
1252
|
+
if constraint_name in constraint_names:
|
|
1253
|
+
duplicate_names.append(constraint_name)
|
|
1254
|
+
else:
|
|
1255
|
+
constraint_names.add(constraint_name)
|
|
1256
|
+
|
|
1257
|
+
if duplicate_names:
|
|
1258
|
+
raise ValueError(f"Duplicate constraint names found in schema: {duplicate_names}")
|
|
1259
|
+
|
|
1260
|
+
return self
|
|
1261
|
+
|
|
1262
|
+
@model_validator(mode="after")
|
|
1263
|
+
def check_unique_index_names(self: Schema) -> Schema:
|
|
1264
|
+
"""Check for duplicate index names in the schema.
|
|
1265
|
+
|
|
1266
|
+
Returns
|
|
1267
|
+
-------
|
|
1268
|
+
`Schema`
|
|
1269
|
+
The schema being validated.
|
|
1270
|
+
|
|
1271
|
+
Raises
|
|
1272
|
+
------
|
|
1273
|
+
ValueError
|
|
1274
|
+
Raised if duplicate index names are found in the schema.
|
|
1275
|
+
"""
|
|
1276
|
+
index_names = set()
|
|
1277
|
+
duplicate_names = []
|
|
1278
|
+
|
|
1279
|
+
for table in self.tables:
|
|
1280
|
+
for index in table.indexes:
|
|
1281
|
+
index_name = index.name
|
|
1282
|
+
if index_name in index_names:
|
|
1283
|
+
duplicate_names.append(index_name)
|
|
1284
|
+
else:
|
|
1285
|
+
index_names.add(index_name)
|
|
1286
|
+
|
|
1287
|
+
if duplicate_names:
|
|
1288
|
+
raise ValueError(f"Duplicate index names found in schema: {duplicate_names}")
|
|
1289
|
+
|
|
1290
|
+
return self
|
|
1291
|
+
|
|
427
1292
|
@model_validator(mode="after")
|
|
428
1293
|
def create_id_map(self: Schema) -> Schema:
|
|
429
|
-
"""Create a map of IDs to objects.
|
|
1294
|
+
"""Create a map of IDs to objects.
|
|
1295
|
+
|
|
1296
|
+
Returns
|
|
1297
|
+
-------
|
|
1298
|
+
`Schema`
|
|
1299
|
+
The schema with the ID map created.
|
|
1300
|
+
|
|
1301
|
+
Raises
|
|
1302
|
+
------
|
|
1303
|
+
ValueError
|
|
1304
|
+
Raised if duplicate identifiers are found in the schema.
|
|
1305
|
+
"""
|
|
1306
|
+
if self._id_map:
|
|
1307
|
+
logger.debug("Ignoring call to create_id_map() - ID map was already populated")
|
|
1308
|
+
return self
|
|
430
1309
|
visitor: SchemaIdVisitor = SchemaIdVisitor()
|
|
431
1310
|
visitor.visit_schema(self)
|
|
432
|
-
logger.debug(f"ID map contains {len(self.id_map.keys())} objects")
|
|
433
1311
|
if len(visitor.duplicates):
|
|
434
1312
|
raise ValueError(
|
|
435
1313
|
"Duplicate IDs found in schema:\n " + "\n ".join(visitor.duplicates) + "\n"
|
|
436
1314
|
)
|
|
1315
|
+
logger.debug("Created ID map with %d entries", len(self._id_map))
|
|
1316
|
+
return self
|
|
1317
|
+
|
|
1318
|
+
def _validate_column_id(
|
|
1319
|
+
self: Schema,
|
|
1320
|
+
column_id: str,
|
|
1321
|
+
loc: tuple,
|
|
1322
|
+
errors: list[InitErrorDetails],
|
|
1323
|
+
) -> None:
|
|
1324
|
+
"""Validate a column ID from a constraint and append errors if invalid.
|
|
1325
|
+
|
|
1326
|
+
Parameters
|
|
1327
|
+
----------
|
|
1328
|
+
schema : Schema
|
|
1329
|
+
The schema being validated.
|
|
1330
|
+
column_id : str
|
|
1331
|
+
The column ID to validate.
|
|
1332
|
+
loc : tuple
|
|
1333
|
+
The location of the error in the schema.
|
|
1334
|
+
errors : list[InitErrorDetails]
|
|
1335
|
+
The list of errors to append to.
|
|
1336
|
+
"""
|
|
1337
|
+
if column_id not in self:
|
|
1338
|
+
_append_error(
|
|
1339
|
+
errors,
|
|
1340
|
+
loc,
|
|
1341
|
+
column_id,
|
|
1342
|
+
f"Column ID '{column_id}' not found in schema",
|
|
1343
|
+
)
|
|
1344
|
+
elif not isinstance(self[column_id], Column):
|
|
1345
|
+
_append_error(
|
|
1346
|
+
errors,
|
|
1347
|
+
loc,
|
|
1348
|
+
column_id,
|
|
1349
|
+
f"ID '{column_id}' does not refer to a Column object",
|
|
1350
|
+
)
|
|
1351
|
+
|
|
1352
|
+
def _validate_foreign_key_column(
|
|
1353
|
+
self: Schema,
|
|
1354
|
+
column_id: str,
|
|
1355
|
+
table: Table,
|
|
1356
|
+
loc: tuple,
|
|
1357
|
+
errors: list[InitErrorDetails],
|
|
1358
|
+
) -> None:
|
|
1359
|
+
"""Validate a foreign key column ID from a constraint and append errors
|
|
1360
|
+
if invalid.
|
|
1361
|
+
|
|
1362
|
+
Parameters
|
|
1363
|
+
----------
|
|
1364
|
+
schema : Schema
|
|
1365
|
+
The schema being validated.
|
|
1366
|
+
column_id : str
|
|
1367
|
+
The foreign key column ID to validate.
|
|
1368
|
+
loc : tuple
|
|
1369
|
+
The location of the error in the schema.
|
|
1370
|
+
errors : list[InitErrorDetails]
|
|
1371
|
+
The list of errors to append to.
|
|
1372
|
+
"""
|
|
1373
|
+
try:
|
|
1374
|
+
table._find_column_by_id(column_id)
|
|
1375
|
+
except KeyError:
|
|
1376
|
+
_append_error(
|
|
1377
|
+
errors,
|
|
1378
|
+
loc,
|
|
1379
|
+
column_id,
|
|
1380
|
+
f"Column '{column_id}' not found in table '{table.name}'",
|
|
1381
|
+
)
|
|
1382
|
+
|
|
1383
|
+
@model_validator(mode="after")
|
|
1384
|
+
def check_constraints(self: Schema) -> Schema:
|
|
1385
|
+
"""Check constraint objects for validity. This needs to be deferred
|
|
1386
|
+
until after the schema is fully loaded and the ID map is created.
|
|
1387
|
+
|
|
1388
|
+
Raises
|
|
1389
|
+
------
|
|
1390
|
+
pydantic.ValidationError
|
|
1391
|
+
Raised if any constraints are invalid.
|
|
1392
|
+
|
|
1393
|
+
Returns
|
|
1394
|
+
-------
|
|
1395
|
+
`Schema`
|
|
1396
|
+
The schema being validated.
|
|
1397
|
+
"""
|
|
1398
|
+
errors: list[InitErrorDetails] = []
|
|
1399
|
+
|
|
1400
|
+
for table_index, table in enumerate(self.tables):
|
|
1401
|
+
for constraint_index, constraint in enumerate(table.constraints):
|
|
1402
|
+
column_ids: list[str] = []
|
|
1403
|
+
referenced_column_ids: list[str] = []
|
|
1404
|
+
|
|
1405
|
+
if isinstance(constraint, ForeignKeyConstraint):
|
|
1406
|
+
column_ids += constraint.columns
|
|
1407
|
+
referenced_column_ids += constraint.referenced_columns
|
|
1408
|
+
elif isinstance(constraint, UniqueConstraint):
|
|
1409
|
+
column_ids += constraint.columns
|
|
1410
|
+
# No extra checks are required on CheckConstraint objects.
|
|
1411
|
+
|
|
1412
|
+
# Validate the foreign key columns
|
|
1413
|
+
for column_id in column_ids:
|
|
1414
|
+
self._validate_column_id(
|
|
1415
|
+
column_id,
|
|
1416
|
+
(
|
|
1417
|
+
"tables",
|
|
1418
|
+
table_index,
|
|
1419
|
+
"constraints",
|
|
1420
|
+
constraint_index,
|
|
1421
|
+
"columns",
|
|
1422
|
+
column_id,
|
|
1423
|
+
),
|
|
1424
|
+
errors,
|
|
1425
|
+
)
|
|
1426
|
+
# Check that the foreign key column is within the source
|
|
1427
|
+
# table.
|
|
1428
|
+
self._validate_foreign_key_column(
|
|
1429
|
+
column_id,
|
|
1430
|
+
table,
|
|
1431
|
+
(
|
|
1432
|
+
"tables",
|
|
1433
|
+
table_index,
|
|
1434
|
+
"constraints",
|
|
1435
|
+
constraint_index,
|
|
1436
|
+
"columns",
|
|
1437
|
+
column_id,
|
|
1438
|
+
),
|
|
1439
|
+
errors,
|
|
1440
|
+
)
|
|
1441
|
+
|
|
1442
|
+
# Validate the primary key (reference) columns
|
|
1443
|
+
for referenced_column_id in referenced_column_ids:
|
|
1444
|
+
self._validate_column_id(
|
|
1445
|
+
referenced_column_id,
|
|
1446
|
+
(
|
|
1447
|
+
"tables",
|
|
1448
|
+
table_index,
|
|
1449
|
+
"constraints",
|
|
1450
|
+
constraint_index,
|
|
1451
|
+
"referenced_columns",
|
|
1452
|
+
referenced_column_id,
|
|
1453
|
+
),
|
|
1454
|
+
errors,
|
|
1455
|
+
)
|
|
1456
|
+
|
|
1457
|
+
if errors:
|
|
1458
|
+
raise ValidationError.from_exception_data("Schema validation failed", errors)
|
|
1459
|
+
|
|
437
1460
|
return self
|
|
438
1461
|
|
|
439
1462
|
def __getitem__(self, id: str) -> BaseObject:
|
|
440
|
-
"""Get an object by its ID.
|
|
1463
|
+
"""Get an object by its ID.
|
|
1464
|
+
|
|
1465
|
+
Parameters
|
|
1466
|
+
----------
|
|
1467
|
+
id
|
|
1468
|
+
The ID of the object to get.
|
|
1469
|
+
|
|
1470
|
+
Raises
|
|
1471
|
+
------
|
|
1472
|
+
KeyError
|
|
1473
|
+
Raised if the object with the given ID is not found in the schema.
|
|
1474
|
+
"""
|
|
441
1475
|
if id not in self:
|
|
442
1476
|
raise KeyError(f"Object with ID '{id}' not found in schema")
|
|
443
|
-
return self.
|
|
1477
|
+
return self._id_map[id]
|
|
444
1478
|
|
|
445
1479
|
def __contains__(self, id: str) -> bool:
|
|
446
|
-
"""Check if an object with the given ID is in the schema.
|
|
447
|
-
return id in self.id_map
|
|
1480
|
+
"""Check if an object with the given ID is in the schema.
|
|
448
1481
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
1482
|
+
Parameters
|
|
1483
|
+
----------
|
|
1484
|
+
id
|
|
1485
|
+
The ID of the object to check.
|
|
1486
|
+
"""
|
|
1487
|
+
return id in self._id_map
|
|
1488
|
+
|
|
1489
|
+
def find_object_by_id(self, id: str, obj_type: type[T]) -> T:
|
|
1490
|
+
"""Find an object with the given type by its ID.
|
|
1491
|
+
|
|
1492
|
+
Parameters
|
|
1493
|
+
----------
|
|
1494
|
+
id
|
|
1495
|
+
The ID of the object to find.
|
|
1496
|
+
obj_type
|
|
1497
|
+
The type of the object to find.
|
|
1498
|
+
|
|
1499
|
+
Returns
|
|
1500
|
+
-------
|
|
1501
|
+
BaseObject
|
|
1502
|
+
The object with the given ID and type.
|
|
1503
|
+
|
|
1504
|
+
Raises
|
|
1505
|
+
------
|
|
1506
|
+
KeyError
|
|
1507
|
+
If the object with the given ID is not found in the schema.
|
|
1508
|
+
TypeError
|
|
1509
|
+
If the object that is found does not have the right type.
|
|
1510
|
+
|
|
1511
|
+
Notes
|
|
1512
|
+
-----
|
|
1513
|
+
The actual return type is the user-specified argument ``T``, which is
|
|
1514
|
+
expected to be a subclass of `BaseObject`.
|
|
1515
|
+
"""
|
|
1516
|
+
obj = self[id]
|
|
1517
|
+
if not isinstance(obj, obj_type):
|
|
1518
|
+
raise TypeError(f"Object with ID '{id}' is not of type '{obj_type.__name__}'")
|
|
1519
|
+
return obj
|
|
1520
|
+
|
|
1521
|
+
def get_table_by_column(self, column: Column) -> Table:
|
|
1522
|
+
"""Find the table that contains a column.
|
|
1523
|
+
|
|
1524
|
+
Parameters
|
|
1525
|
+
----------
|
|
1526
|
+
column
|
|
1527
|
+
The column to find.
|
|
1528
|
+
|
|
1529
|
+
Returns
|
|
1530
|
+
-------
|
|
1531
|
+
`Table`
|
|
1532
|
+
The table that contains the column.
|
|
1533
|
+
|
|
1534
|
+
Raises
|
|
1535
|
+
------
|
|
1536
|
+
ValueError
|
|
1537
|
+
If the column is not found in any table.
|
|
1538
|
+
"""
|
|
1539
|
+
for table in self.tables:
|
|
1540
|
+
if column in table.columns:
|
|
1541
|
+
return table
|
|
1542
|
+
raise ValueError(f"Column '{column.name}' not found in any table")
|
|
454
1543
|
|
|
455
|
-
|
|
456
|
-
|
|
1544
|
+
@classmethod
|
|
1545
|
+
def from_uri(cls, resource_path: ResourcePathExpression, context: dict[str, Any] = {}) -> Schema:
|
|
1546
|
+
"""Load a `Schema` from a string representing a ``ResourcePath``.
|
|
1547
|
+
|
|
1548
|
+
Parameters
|
|
1549
|
+
----------
|
|
1550
|
+
resource_path
|
|
1551
|
+
The ``ResourcePath`` pointing to a YAML file.
|
|
1552
|
+
context
|
|
1553
|
+
Pydantic context to be used in validation.
|
|
1554
|
+
|
|
1555
|
+
Returns
|
|
1556
|
+
-------
|
|
1557
|
+
`str`
|
|
1558
|
+
The ID of the object.
|
|
1559
|
+
|
|
1560
|
+
Raises
|
|
1561
|
+
------
|
|
1562
|
+
yaml.YAMLError
|
|
1563
|
+
Raised if there is an error loading the YAML data.
|
|
1564
|
+
ValueError
|
|
1565
|
+
Raised if there is an error reading the resource.
|
|
1566
|
+
pydantic.ValidationError
|
|
1567
|
+
Raised if the schema fails validation.
|
|
457
1568
|
"""
|
|
458
|
-
logger.debug(f"
|
|
459
|
-
|
|
1569
|
+
logger.debug(f"Loading schema from: '{resource_path}'")
|
|
1570
|
+
try:
|
|
1571
|
+
rp_stream = ResourcePath(resource_path).read()
|
|
1572
|
+
except Exception as e:
|
|
1573
|
+
raise ValueError(f"Error reading resource from '{resource_path}' : {e}") from e
|
|
1574
|
+
yaml_data = yaml.safe_load(rp_stream)
|
|
1575
|
+
return Schema.model_validate(yaml_data, context=context)
|
|
460
1576
|
|
|
461
1577
|
@classmethod
|
|
462
|
-
def
|
|
463
|
-
"""
|
|
464
|
-
|
|
1578
|
+
def from_stream(cls, source: IO[str], context: dict[str, Any] = {}) -> Schema:
|
|
1579
|
+
"""Load a `Schema` from a file stream which should contain YAML data.
|
|
1580
|
+
|
|
1581
|
+
Parameters
|
|
1582
|
+
----------
|
|
1583
|
+
source
|
|
1584
|
+
The file stream to read from.
|
|
1585
|
+
context
|
|
1586
|
+
Pydantic context to be used in validation.
|
|
1587
|
+
|
|
1588
|
+
Returns
|
|
1589
|
+
-------
|
|
1590
|
+
`Schema`
|
|
1591
|
+
The Felis schema loaded from the stream.
|
|
1592
|
+
|
|
1593
|
+
Raises
|
|
1594
|
+
------
|
|
1595
|
+
yaml.YAMLError
|
|
1596
|
+
Raised if there is an error loading the YAML file.
|
|
1597
|
+
pydantic.ValidationError
|
|
1598
|
+
Raised if the schema fails validation.
|
|
1599
|
+
"""
|
|
1600
|
+
logger.debug("Loading schema from: '%s'", source)
|
|
1601
|
+
yaml_data = yaml.safe_load(source)
|
|
1602
|
+
return Schema.model_validate(yaml_data, context=context)
|
|
1603
|
+
|
|
1604
|
+
def _model_dump(self, strip_ids: bool = False) -> dict[str, Any]:
|
|
1605
|
+
"""Dump the schema as a dictionary with some default arguments
|
|
1606
|
+
applied.
|
|
1607
|
+
|
|
1608
|
+
Parameters
|
|
1609
|
+
----------
|
|
1610
|
+
strip_ids
|
|
1611
|
+
Whether to strip the IDs from the dumped data. Defaults to `False`.
|
|
1612
|
+
|
|
1613
|
+
Returns
|
|
1614
|
+
-------
|
|
1615
|
+
`dict` [ `str`, `Any` ]
|
|
1616
|
+
The dumped schema data as a dictionary.
|
|
1617
|
+
"""
|
|
1618
|
+
data = self.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
|
|
1619
|
+
if strip_ids:
|
|
1620
|
+
data = _strip_ids(data)
|
|
1621
|
+
return data
|
|
1622
|
+
|
|
1623
|
+
def dump_yaml(self, stream: IO[str] = sys.stdout, strip_ids: bool = False) -> None:
|
|
1624
|
+
"""Pretty print the schema as YAML.
|
|
1625
|
+
|
|
1626
|
+
Parameters
|
|
1627
|
+
----------
|
|
1628
|
+
stream
|
|
1629
|
+
The stream to write the YAML data to.
|
|
1630
|
+
strip_ids
|
|
1631
|
+
Whether to strip the IDs from the dumped data. Defaults to `False`.
|
|
1632
|
+
"""
|
|
1633
|
+
data = self._model_dump(strip_ids=strip_ids)
|
|
1634
|
+
yaml.safe_dump(
|
|
1635
|
+
data,
|
|
1636
|
+
stream,
|
|
1637
|
+
default_flow_style=False,
|
|
1638
|
+
sort_keys=False,
|
|
1639
|
+
)
|
|
1640
|
+
|
|
1641
|
+
def dump_json(self, stream: IO[str] = sys.stdout, strip_ids: bool = False) -> None:
|
|
1642
|
+
"""Pretty print the schema as JSON.
|
|
1643
|
+
|
|
1644
|
+
Parameters
|
|
1645
|
+
----------
|
|
1646
|
+
stream
|
|
1647
|
+
The stream to write the JSON data to.
|
|
1648
|
+
strip_ids
|
|
1649
|
+
Whether to strip the IDs from the dumped data. Defaults to `False`.
|
|
1650
|
+
"""
|
|
1651
|
+
data = self._model_dump(strip_ids=strip_ids)
|
|
1652
|
+
json.dump(
|
|
1653
|
+
data,
|
|
1654
|
+
stream,
|
|
1655
|
+
indent=4,
|
|
1656
|
+
sort_keys=False,
|
|
1657
|
+
)
|