lsst-felis 26.2024.900__py3-none-any.whl → 29.2025.4500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. felis/__init__.py +10 -24
  2. felis/cli.py +437 -341
  3. felis/config/tap_schema/columns.csv +33 -0
  4. felis/config/tap_schema/key_columns.csv +8 -0
  5. felis/config/tap_schema/keys.csv +8 -0
  6. felis/config/tap_schema/schemas.csv +2 -0
  7. felis/config/tap_schema/tables.csv +6 -0
  8. felis/config/tap_schema/tap_schema_std.yaml +273 -0
  9. felis/datamodel.py +1386 -193
  10. felis/db/dialects.py +116 -0
  11. felis/db/schema.py +62 -0
  12. felis/db/sqltypes.py +275 -48
  13. felis/db/utils.py +409 -0
  14. felis/db/variants.py +159 -0
  15. felis/diff.py +234 -0
  16. felis/metadata.py +385 -0
  17. felis/tap_schema.py +767 -0
  18. felis/tests/__init__.py +0 -0
  19. felis/tests/postgresql.py +134 -0
  20. felis/tests/run_cli.py +79 -0
  21. felis/types.py +57 -9
  22. lsst_felis-29.2025.4500.dist-info/METADATA +38 -0
  23. lsst_felis-29.2025.4500.dist-info/RECORD +31 -0
  24. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/WHEEL +1 -1
  25. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/COPYRIGHT +1 -1
  26. felis/check.py +0 -381
  27. felis/simple.py +0 -424
  28. felis/sql.py +0 -275
  29. felis/tap.py +0 -433
  30. felis/utils.py +0 -100
  31. felis/validation.py +0 -103
  32. felis/version.py +0 -2
  33. felis/visitor.py +0 -180
  34. lsst_felis-26.2024.900.dist-info/METADATA +0 -28
  35. lsst_felis-26.2024.900.dist-info/RECORD +0 -23
  36. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/entry_points.txt +0 -0
  37. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/LICENSE +0 -0
  38. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/top_level.txt +0 -0
  39. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/zip-safe +0 -0
felis/datamodel.py CHANGED
@@ -1,3 +1,5 @@
1
+ """Define Pydantic data models for Felis."""
2
+
1
3
  # This file is part of felis.
2
4
  #
3
5
  # Developed for the LSST Data Management System.
@@ -21,24 +23,43 @@
21
23
 
22
24
  from __future__ import annotations
23
25
 
26
+ import json
24
27
  import logging
25
- from collections.abc import Mapping, Sequence
26
- from enum import Enum
27
- from typing import Annotated, Any, Literal, TypeAlias
28
+ import sys
29
+ from collections.abc import Sequence
30
+ from enum import StrEnum, auto
31
+ from typing import IO, Annotated, Any, Generic, Literal, TypeAlias, TypeVar
28
32
 
33
+ import yaml
29
34
  from astropy import units as units # type: ignore
30
35
  from astropy.io.votable import ucd # type: ignore
31
- from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
36
+ from lsst.resources import ResourcePath, ResourcePathExpression
37
+ from pydantic import (
38
+ BaseModel,
39
+ ConfigDict,
40
+ Field,
41
+ PrivateAttr,
42
+ ValidationError,
43
+ ValidationInfo,
44
+ field_serializer,
45
+ field_validator,
46
+ model_validator,
47
+ )
48
+ from pydantic_core import InitErrorDetails
49
+
50
+ from .db.dialects import get_supported_dialects
51
+ from .db.sqltypes import get_type_func
52
+ from .db.utils import string_to_typeengine
53
+ from .types import Boolean, Byte, Char, Double, FelisType, Float, Int, Long, Short, String, Text, Unicode
32
54
 
33
55
  logger = logging.getLogger(__name__)
34
- # logger.setLevel(logging.DEBUG)
35
56
 
36
57
  __all__ = (
37
58
  "BaseObject",
38
- "Column",
39
59
  "CheckConstraint",
60
+ "Column",
40
61
  "Constraint",
41
- "DescriptionStr",
62
+ "DataType",
42
63
  "ForeignKeyConstraint",
43
64
  "Index",
44
65
  "Schema",
@@ -50,9 +71,8 @@ __all__ = (
50
71
  CONFIG = ConfigDict(
51
72
  populate_by_name=True, # Populate attributes by name.
52
73
  extra="forbid", # Do not allow extra fields.
53
- use_enum_values=True, # Use enum values instead of names.
54
- validate_assignment=True, # Validate assignments after model is created.
55
74
  str_strip_whitespace=True, # Strip whitespace from string fields.
75
+ use_enum_values=False, # Do not use enum values during serialization.
56
76
  )
57
77
  """Pydantic model configuration as described in:
58
78
  https://docs.pydantic.dev/2.0/api/config/#pydantic.config.ConfigDict
@@ -62,132 +82,220 @@ DESCR_MIN_LENGTH = 3
62
82
  """Minimum length for a description field."""
63
83
 
64
84
  DescriptionStr: TypeAlias = Annotated[str, Field(min_length=DESCR_MIN_LENGTH)]
65
- """Define a type for a description string, which must be three or more
66
- characters long. Stripping of whitespace is done globally on all str fields."""
85
+ """Type for a description, which must be three or more characters long."""
67
86
 
68
87
 
69
88
  class BaseObject(BaseModel):
70
- """Base class for all Felis objects."""
89
+ """Base model.
90
+
91
+ All classes representing objects in the Felis data model should inherit
92
+ from this class.
93
+ """
71
94
 
72
95
  model_config = CONFIG
73
96
  """Pydantic model configuration."""
74
97
 
75
98
  name: str
76
- """The name of the database object.
77
-
78
- All Felis database objects must have a name.
79
- """
99
+ """Name of the database object."""
80
100
 
81
101
  id: str = Field(alias="@id")
82
- """The unique identifier of the database object.
83
-
84
- All Felis database objects must have a unique identifier.
85
- """
102
+ """Unique identifier of the database object."""
86
103
 
87
104
  description: DescriptionStr | None = None
88
- """A description of the database object.
105
+ """Description of the database object."""
89
106
 
90
- By default, the description is optional but will be required if
91
- `BaseObject.Config.require_description` is set to `True` by the user.
92
- """
93
-
94
- @model_validator(mode="before")
95
- @classmethod
96
- def check_description(cls, values: dict[str, Any]) -> dict[str, Any]:
97
- """Check that the description is present if required."""
98
- if Schema.is_description_required():
99
- if "description" not in values or not values["description"]:
100
- raise ValueError("Description is required and must be non-empty")
101
- if len(values["description"].strip()) < DESCR_MIN_LENGTH:
102
- raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
103
- return values
107
+ votable_utype: str | None = Field(None, alias="votable:utype")
108
+ """VOTable utype (usage-specific or unique type) of the object."""
104
109
 
110
+ @model_validator(mode="after")
111
+ def check_description(self, info: ValidationInfo) -> BaseObject:
112
+ """Check that the description is present if required.
113
+
114
+ Parameters
115
+ ----------
116
+ info
117
+ Validation context used to determine if the check is enabled.
118
+
119
+ Returns
120
+ -------
121
+ `BaseObject`
122
+ The object being validated.
123
+ """
124
+ context = info.context
125
+ if not context or not context.get("check_description", False):
126
+ return self
127
+ if self.description is None or self.description == "":
128
+ raise ValueError("Description is required and must be non-empty")
129
+ if len(self.description) < DESCR_MIN_LENGTH:
130
+ raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
131
+ return self
105
132
 
106
- class DataType(Enum):
107
- """`Enum` representing the data types supported by Felis."""
108
133
 
109
- BOOLEAN = "boolean"
110
- BYTE = "byte"
111
- SHORT = "short"
112
- INT = "int"
113
- LONG = "long"
114
- FLOAT = "float"
115
- DOUBLE = "double"
116
- CHAR = "char"
117
- STRING = "string"
118
- UNICODE = "unicode"
119
- TEXT = "text"
120
- BINARY = "binary"
121
- TIMESTAMP = "timestamp"
134
+ class DataType(StrEnum):
135
+ """``Enum`` representing the data types supported by Felis."""
136
+
137
+ boolean = auto()
138
+ byte = auto()
139
+ short = auto()
140
+ int = auto()
141
+ long = auto()
142
+ float = auto()
143
+ double = auto()
144
+ char = auto()
145
+ string = auto()
146
+ unicode = auto()
147
+ text = auto()
148
+ binary = auto()
149
+ timestamp = auto()
150
+
151
+
152
+ def validate_ivoa_ucd(ivoa_ucd: str) -> str:
153
+ """Validate IVOA UCD values.
154
+
155
+ Parameters
156
+ ----------
157
+ ivoa_ucd
158
+ IVOA UCD value to check.
159
+
160
+ Returns
161
+ -------
162
+ `str`
163
+ The IVOA UCD value if it is valid.
164
+
165
+ Raises
166
+ ------
167
+ ValueError
168
+ If the IVOA UCD value is invalid.
169
+ """
170
+ if ivoa_ucd is not None:
171
+ try:
172
+ ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
173
+ except ValueError as e:
174
+ raise ValueError(f"Invalid IVOA UCD: {e}")
175
+ return ivoa_ucd
122
176
 
123
177
 
124
178
  class Column(BaseObject):
125
- """A column in a table."""
179
+ """Column model."""
126
180
 
127
181
  datatype: DataType
128
- """The datatype of the column."""
182
+ """Datatype of the column."""
129
183
 
130
- length: int | None = None
131
- """The length of the column."""
184
+ length: int | None = Field(None, gt=0)
185
+ """Length of the column."""
186
+
187
+ precision: int | None = Field(None, ge=0)
188
+ """The numerical precision of the column.
189
+
190
+ For timestamps, this is the number of fractional digits retained in the
191
+ seconds field.
192
+ """
132
193
 
133
194
  nullable: bool = True
134
- """Whether the column can be `NULL`."""
195
+ """Whether the column can be ``NULL``."""
135
196
 
136
- value: Any = None
137
- """The default value of the column."""
197
+ value: str | int | float | bool | None = None
198
+ """Default value of the column."""
138
199
 
139
200
  autoincrement: bool | None = None
140
201
  """Whether the column is autoincremented."""
141
202
 
142
- mysql_datatype: str | None = Field(None, alias="mysql:datatype")
143
- """The MySQL datatype of the column."""
144
-
145
203
  ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
146
- """The IVOA UCD of the column."""
204
+ """IVOA UCD of the column."""
147
205
 
148
206
  fits_tunit: str | None = Field(None, alias="fits:tunit")
149
- """The FITS TUNIT of the column."""
207
+ """FITS TUNIT of the column."""
150
208
 
151
209
  ivoa_unit: str | None = Field(None, alias="ivoa:unit")
152
- """The IVOA unit of the column."""
210
+ """IVOA unit of the column."""
153
211
 
154
212
  tap_column_index: int | None = Field(None, alias="tap:column_index")
155
- """The TAP_SCHEMA column index of the column."""
213
+ """TAP_SCHEMA column index of the column."""
156
214
 
157
215
  tap_principal: int | None = Field(0, alias="tap:principal", ge=0, le=1)
158
- """Whether this is a TAP_SCHEMA principal column; can be either 0 or 1.
159
- """
216
+ """Whether this is a TAP_SCHEMA principal column."""
160
217
 
161
- votable_arraysize: int | Literal["*"] | None = Field(None, alias="votable:arraysize")
162
- """The VOTable arraysize of the column."""
218
+ votable_arraysize: int | str | None = Field(None, alias="votable:arraysize")
219
+ """VOTable arraysize of the column."""
163
220
 
164
221
  tap_std: int | None = Field(0, alias="tap:std", ge=0, le=1)
165
222
  """TAP_SCHEMA indication that this column is defined by an IVOA standard.
166
223
  """
167
224
 
168
- votable_utype: str | None = Field(None, alias="votable:utype")
169
- """The VOTable utype (usage-specific or unique type) of the column."""
170
-
171
225
  votable_xtype: str | None = Field(None, alias="votable:xtype")
172
- """The VOTable xtype (extended type) of the column."""
226
+ """VOTable xtype (extended type) of the column."""
227
+
228
+ votable_datatype: str | None = Field(None, alias="votable:datatype")
229
+ """VOTable datatype of the column."""
230
+
231
+ mysql_datatype: str | None = Field(None, alias="mysql:datatype")
232
+ """MySQL datatype override on the column."""
233
+
234
+ postgresql_datatype: str | None = Field(None, alias="postgresql:datatype")
235
+ """PostgreSQL datatype override on the column."""
236
+
237
+ @model_validator(mode="after")
238
+ def check_value(self) -> Column:
239
+ """Check that the default value is valid.
240
+
241
+ Returns
242
+ -------
243
+ `Column`
244
+ The column being validated.
245
+ """
246
+ if (value := self.value) is not None:
247
+ if value is not None and self.autoincrement is True:
248
+ raise ValueError("Column cannot have both a default value and be autoincremented")
249
+ felis_type = FelisType.felis_type(self.datatype)
250
+ if felis_type.is_numeric:
251
+ if felis_type in (Byte, Short, Int, Long) and not isinstance(value, int):
252
+ raise ValueError("Default value must be an int for integer type columns")
253
+ elif felis_type in (Float, Double) and not isinstance(value, float):
254
+ raise ValueError("Default value must be a decimal number for float and double columns")
255
+ elif felis_type in (String, Char, Unicode, Text):
256
+ if not isinstance(value, str):
257
+ raise ValueError("Default value must be a string for string columns")
258
+ if not len(value):
259
+ raise ValueError("Default value must be a non-empty string for string columns")
260
+ elif felis_type is Boolean and not isinstance(value, bool):
261
+ raise ValueError("Default value must be a boolean for boolean columns")
262
+ return self
173
263
 
174
264
  @field_validator("ivoa_ucd")
175
265
  @classmethod
176
266
  def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
177
- """Check that IVOA UCD values are valid."""
178
- if ivoa_ucd is not None:
179
- try:
180
- ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
181
- except ValueError as e:
182
- raise ValueError(f"Invalid IVOA UCD: {e}")
183
- return ivoa_ucd
267
+ """Check that IVOA UCD values are valid.
184
268
 
185
- @model_validator(mode="before")
186
- @classmethod
187
- def check_units(cls, values: dict[str, Any]) -> dict[str, Any]:
188
- """Check that units are valid."""
189
- fits_unit = values.get("fits:tunit")
190
- ivoa_unit = values.get("ivoa:unit")
269
+ Parameters
270
+ ----------
271
+ ivoa_ucd
272
+ IVOA UCD value to check.
273
+
274
+ Returns
275
+ -------
276
+ `str`
277
+ The IVOA UCD value if it is valid.
278
+ """
279
+ return validate_ivoa_ucd(ivoa_ucd)
280
+
281
+ @model_validator(mode="after")
282
+ def check_units(self) -> Column:
283
+ """Check that the ``fits:tunit`` or ``ivoa:unit`` field has valid
284
+ units according to astropy. Only one may be provided.
285
+
286
+ Returns
287
+ -------
288
+ `Column`
289
+ The column being validated.
290
+
291
+ Raises
292
+ ------
293
+ ValueError
294
+ Raised if both FITS and IVOA units are provided, or if the unit is
295
+ invalid.
296
+ """
297
+ fits_unit = self.fits_tunit
298
+ ivoa_unit = self.ivoa_unit
191
299
 
192
300
  if fits_unit and ivoa_unit:
193
301
  raise ValueError("Column cannot have both FITS and IVOA units")
@@ -199,55 +307,420 @@ class Column(BaseObject):
199
307
  except ValueError as e:
200
308
  raise ValueError(f"Invalid unit: {e}")
201
309
 
310
+ return self
311
+
312
+ @model_validator(mode="before")
313
+ @classmethod
314
+ def check_length(cls, values: dict[str, Any]) -> dict[str, Any]:
315
+ """Check that a valid length is provided for sized types.
316
+
317
+ Parameters
318
+ ----------
319
+ values
320
+ Values of the column.
321
+
322
+ Returns
323
+ -------
324
+ `dict` [ `str`, `Any` ]
325
+ The values of the column.
326
+
327
+ Raises
328
+ ------
329
+ ValueError
330
+ Raised if a length is not provided for a sized type.
331
+ """
332
+ datatype = values.get("datatype")
333
+ if datatype is None:
334
+ # Skip this validation if datatype is not provided
335
+ return values
336
+ length = values.get("length")
337
+ felis_type = FelisType.felis_type(datatype)
338
+ if felis_type.is_sized and length is None:
339
+ raise ValueError(
340
+ f"Length must be provided for type '{datatype}'"
341
+ + (f" in column '{values['@id']}'" if "@id" in values else "")
342
+ )
343
+ elif not felis_type.is_sized and length is not None:
344
+ logger.warning(
345
+ f"The datatype '{datatype}' does not support a specified length"
346
+ + (f" in column '{values['@id']}'" if "@id" in values else "")
347
+ )
348
+ return values
349
+
350
+ @model_validator(mode="after")
351
+ def check_redundant_datatypes(self, info: ValidationInfo) -> Column:
352
+ """Check for redundant datatypes on columns.
353
+
354
+ Parameters
355
+ ----------
356
+ info
357
+ Validation context used to determine if the check is enabled.
358
+
359
+ Returns
360
+ -------
361
+ `Column`
362
+ The column being validated.
363
+
364
+ Raises
365
+ ------
366
+ ValueError
367
+ Raised if a datatype override is redundant.
368
+ """
369
+ context = info.context
370
+ if not context or not context.get("check_redundant_datatypes", False):
371
+ return self
372
+ if all(
373
+ getattr(self, f"{dialect}:datatype", None) is not None
374
+ for dialect in get_supported_dialects().keys()
375
+ ):
376
+ return self
377
+
378
+ datatype = self.datatype
379
+ length: int | None = self.length or None
380
+
381
+ datatype_func = get_type_func(datatype)
382
+ felis_type = FelisType.felis_type(datatype)
383
+ if felis_type.is_sized:
384
+ datatype_obj = datatype_func(length)
385
+ else:
386
+ datatype_obj = datatype_func()
387
+
388
+ for dialect_name, dialect in get_supported_dialects().items():
389
+ db_annotation = f"{dialect_name}_datatype"
390
+ if datatype_string := self.model_dump().get(db_annotation):
391
+ db_datatype_obj = string_to_typeengine(datatype_string, dialect, length)
392
+ if datatype_obj.compile(dialect) == db_datatype_obj.compile(dialect):
393
+ raise ValueError(
394
+ "'{}: {}' is a redundant override of 'datatype: {}' in column '{}'{}".format(
395
+ db_annotation,
396
+ datatype_string,
397
+ self.datatype,
398
+ self.id,
399
+ "" if length is None else f" with length {length}",
400
+ )
401
+ )
402
+ else:
403
+ logger.debug(
404
+ f"Type override of 'datatype: {self.datatype}' "
405
+ f"with '{db_annotation}: {datatype_string}' in column '{self.id}' "
406
+ f"compiled to '{datatype_obj.compile(dialect)}' and "
407
+ f"'{db_datatype_obj.compile(dialect)}'"
408
+ )
409
+ return self
410
+
411
+ @model_validator(mode="after")
412
+ def check_precision(self) -> Column:
413
+ """Check that precision is only valid for timestamp columns.
414
+
415
+ Returns
416
+ -------
417
+ `Column`
418
+ The column being validated.
419
+ """
420
+ if self.precision is not None and self.datatype != "timestamp":
421
+ raise ValueError("Precision is only valid for timestamp columns")
422
+ return self
423
+
424
+ @model_validator(mode="before")
425
+ @classmethod
426
+ def check_votable_arraysize(cls, values: dict[str, Any], info: ValidationInfo) -> dict[str, Any]:
427
+ """Set the default value for the ``votable_arraysize`` field, which
428
+ corresponds to ``arraysize`` in the IVOA VOTable standard.
429
+
430
+ Parameters
431
+ ----------
432
+ values
433
+ Values of the column.
434
+ info
435
+ Validation context used to determine if the check is enabled.
436
+
437
+ Returns
438
+ -------
439
+ `dict` [ `str`, `Any` ]
440
+ The values of the column.
441
+
442
+ Notes
443
+ -----
444
+ Following the IVOA VOTable standard, an ``arraysize`` of 1 should not
445
+ be used.
446
+ """
447
+ if values.get("name", None) is None or values.get("datatype", None) is None:
448
+ # Skip bad column data that will not validate
449
+ return values
450
+ context = info.context if info.context else {}
451
+ arraysize = values.get("votable:arraysize", None)
452
+ if arraysize is None:
453
+ length = values.get("length", None)
454
+ datatype = values.get("datatype")
455
+ if length is not None and length > 1:
456
+ # Following the IVOA standard, arraysize of 1 is disallowed
457
+ if datatype == "char":
458
+ arraysize = str(length)
459
+ elif datatype in ("string", "unicode", "binary"):
460
+ if context.get("force_unbounded_arraysize", False):
461
+ arraysize = "*"
462
+ logger.debug(
463
+ f"Forced VOTable's 'arraysize' to '*' on column '{values['name']}' with datatype "
464
+ + f"'{values['datatype']}' and length '{length}'"
465
+ )
466
+ else:
467
+ arraysize = f"{length}*"
468
+ elif datatype in ("timestamp", "text"):
469
+ arraysize = "*"
470
+ if arraysize is not None:
471
+ values["votable:arraysize"] = arraysize
472
+ logger.debug(
473
+ f"Set default 'votable:arraysize' to '{arraysize}' on column '{values['name']}'"
474
+ + f" with datatype '{values['datatype']}' and length '{values.get('length', None)}'"
475
+ )
476
+ else:
477
+ logger.debug(f"Using existing 'votable:arraysize' of '{arraysize}' on column '{values['name']}'")
478
+ if isinstance(values["votable:arraysize"], int):
479
+ logger.warning(
480
+ f"Usage of an integer value for 'votable:arraysize' in column '{values['name']}' is "
481
+ + "deprecated"
482
+ )
483
+ values["votable:arraysize"] = str(arraysize)
202
484
  return values
203
485
 
486
+ @field_serializer("datatype")
487
+ def serialize_datatype(self, value: DataType) -> str:
488
+ """Convert `DataType` to string when serializing to JSON/YAML.
489
+
490
+ Parameters
491
+ ----------
492
+ value
493
+ The `DataType` value to serialize.
494
+
495
+ Returns
496
+ -------
497
+ `str`
498
+ The serialized `DataType` value.
499
+ """
500
+ return str(value)
501
+
502
+ @field_validator("datatype", mode="before")
503
+ @classmethod
504
+ def deserialize_datatype(cls, value: str) -> DataType:
505
+ """Convert string back into `DataType` when loading from JSON/YAML.
506
+
507
+ Parameters
508
+ ----------
509
+ value
510
+ The string value to deserialize.
511
+
512
+ Returns
513
+ -------
514
+ `DataType`
515
+ The deserialized `DataType` value.
516
+ """
517
+ return DataType(value)
518
+
519
+ @model_validator(mode="after")
520
+ def check_votable_xtype(self) -> Column:
521
+ """Set the default value for the ``votable_xtype`` field, which
522
+ corresponds to an Extended Datatype or ``xtype`` in the IVOA VOTable
523
+ standard.
524
+
525
+ Returns
526
+ -------
527
+ `Column`
528
+ The column being validated.
529
+
530
+ Notes
531
+ -----
532
+ This is currently only set automatically for the Felis ``timestamp``
533
+ datatype.
534
+ """
535
+ if self.datatype == DataType.timestamp and self.votable_xtype is None:
536
+ self.votable_xtype = "timestamp"
537
+ return self
538
+
204
539
 
205
540
  class Constraint(BaseObject):
206
- """A database table constraint."""
541
+ """Table constraint model."""
207
542
 
208
543
  deferrable: bool = False
209
- """If `True` then this constraint will be declared as deferrable."""
544
+ """Whether this constraint will be declared as deferrable."""
210
545
 
211
- initially: str | None = None
212
- """Value for ``INITIALLY`` clause, only used if ``deferrable`` is True."""
546
+ initially: Literal["IMMEDIATE", "DEFERRED"] | None = None
547
+ """Value for ``INITIALLY`` clause; only used if `deferrable` is
548
+ `True`."""
213
549
 
214
- annotations: Mapping[str, Any] = Field(default_factory=dict)
215
- """Additional annotations for this constraint."""
216
-
217
- type: str | None = Field(None, alias="@type")
218
- """The type of the constraint."""
550
+ @model_validator(mode="after")
551
+ def check_deferrable(self) -> Constraint:
552
+ """Check that the ``INITIALLY`` clause is only used if `deferrable` is
553
+ `True`.
554
+
555
+ Returns
556
+ -------
557
+ `Constraint`
558
+ The constraint being validated.
559
+ """
560
+ if self.initially is not None and not self.deferrable:
561
+ raise ValueError("INITIALLY clause can only be used if deferrable is True")
562
+ return self
219
563
 
220
564
 
221
565
  class CheckConstraint(Constraint):
222
- """A check constraint on a table."""
566
+ """Table check constraint model."""
567
+
568
+ type: Literal["Check"] = Field("Check", alias="@type")
569
+ """Type of the constraint."""
223
570
 
224
571
  expression: str
225
- """The expression for the check constraint."""
572
+ """Expression for the check constraint."""
573
+
574
+ @field_serializer("type")
575
+ def serialize_type(self, value: str) -> str:
576
+ """Ensure '@type' is included in serialized output.
577
+
578
+ Parameters
579
+ ----------
580
+ value
581
+ The value to serialize.
582
+
583
+ Returns
584
+ -------
585
+ `str`
586
+ The serialized value.
587
+ """
588
+ return value
226
589
 
227
590
 
228
591
  class UniqueConstraint(Constraint):
229
- """A unique constraint on a table."""
592
+ """Table unique constraint model."""
593
+
594
+ type: Literal["Unique"] = Field("Unique", alias="@type")
595
+ """Type of the constraint."""
230
596
 
231
597
  columns: list[str]
232
- """The columns in the unique constraint."""
598
+ """Columns in the unique constraint."""
599
+
600
+ @field_serializer("type")
601
+ def serialize_type(self, value: str) -> str:
602
+ """Ensure '@type' is included in serialized output.
603
+
604
+ Parameters
605
+ ----------
606
+ value
607
+ The value to serialize.
608
+
609
+ Returns
610
+ -------
611
+ `str`
612
+ The serialized value.
613
+ """
614
+ return value
615
+
616
+
617
+ class ForeignKeyConstraint(Constraint):
618
+ """Table foreign key constraint model.
619
+
620
+ This constraint is used to define a foreign key relationship between two
621
+ tables in the schema. There must be at least one column in the
622
+ `columns` list, and at least one column in the `referenced_columns` list
623
+ or a validation error will be raised.
624
+
625
+ Notes
626
+ -----
627
+ These relationships will be reflected in the TAP_SCHEMA ``keys`` and
628
+ ``key_columns`` data.
629
+ """
630
+
631
+ type: Literal["ForeignKey"] = Field("ForeignKey", alias="@type")
632
+ """Type of the constraint."""
633
+
634
+ columns: list[str] = Field(min_length=1)
635
+ """The columns comprising the foreign key."""
636
+
637
+ referenced_columns: list[str] = Field(alias="referencedColumns", min_length=1)
638
+ """The columns referenced by the foreign key."""
639
+
640
+ on_delete: Literal["CASCADE", "SET NULL", "SET DEFAULT", "RESTRICT", "NO ACTION"] | None = None
641
+ """Action to take when the referenced row is deleted."""
642
+
643
+ on_update: Literal["CASCADE", "SET NULL", "SET DEFAULT", "RESTRICT", "NO ACTION"] | None = None
644
+ """Action to take when the referenced row is updated."""
645
+
646
+ @field_serializer("type")
647
+ def serialize_type(self, value: str) -> str:
648
+ """Ensure '@type' is included in serialized output.
649
+
650
+ Parameters
651
+ ----------
652
+ value
653
+ The value to serialize.
654
+
655
+ Returns
656
+ -------
657
+ `str`
658
+ The serialized value.
659
+ """
660
+ return value
661
+
662
+ @model_validator(mode="after")
663
+ def check_column_lengths(self) -> ForeignKeyConstraint:
664
+ """Check that the `columns` and `referenced_columns` lists have the
665
+ same length.
666
+
667
+ Returns
668
+ -------
669
+ `ForeignKeyConstraint`
670
+ The foreign key constraint being validated.
671
+
672
+ Raises
673
+ ------
674
+ ValueError
675
+ Raised if the `columns` and `referenced_columns` lists do not have
676
+ the same length.
677
+ """
678
+ if len(self.columns) != len(self.referenced_columns):
679
+ raise ValueError(
680
+ "Columns and referencedColumns must have the same length for a ForeignKey constraint"
681
+ )
682
+ return self
683
+
684
+
685
+ _ConstraintType = Annotated[
686
+ CheckConstraint | ForeignKeyConstraint | UniqueConstraint, Field(discriminator="type")
687
+ ]
688
+ """Type alias for a constraint type."""
233
689
 
234
690
 
235
691
  class Index(BaseObject):
236
- """A database table index.
692
+ """Table index model.
237
693
 
238
694
  An index can be defined on either columns or expressions, but not both.
239
695
  """
240
696
 
241
697
  columns: list[str] | None = None
242
- """The columns in the index."""
698
+ """Columns in the index."""
243
699
 
244
700
  expressions: list[str] | None = None
245
- """The expressions in the index."""
701
+ """Expressions in the index."""
246
702
 
247
703
  @model_validator(mode="before")
248
704
  @classmethod
249
705
  def check_columns_or_expressions(cls, values: dict[str, Any]) -> dict[str, Any]:
250
- """Check that columns or expressions are specified, but not both."""
706
+ """Check that columns or expressions are specified, but not both.
707
+
708
+ Parameters
709
+ ----------
710
+ values
711
+ Values of the index.
712
+
713
+ Returns
714
+ -------
715
+ `dict` [ `str`, `Any` ]
716
+ The values of the index.
717
+
718
+ Raises
719
+ ------
720
+ ValueError
721
+ Raised if both columns and expressions are specified, or if neither
722
+ are specified.
723
+ """
251
724
  if "columns" in values and "expressions" in values:
252
725
  raise ValueError("Defining columns and expressions is not valid")
253
726
  elif "columns" not in values and "expressions" not in values:
@@ -255,80 +728,234 @@ class Index(BaseObject):
255
728
  return values
256
729
 
257
730
 
258
- class ForeignKeyConstraint(Constraint):
259
- """A foreign key constraint on a table.
731
+ ColumnRef: TypeAlias = str
732
+ """Type alias for a column reference."""
260
733
 
261
- These will be reflected in the TAP_SCHEMA keys and key_columns data.
262
- """
263
734
 
264
- columns: list[str]
265
- """The columns comprising the foreign key."""
735
+ class ColumnGroup(BaseObject):
736
+ """Column group model."""
266
737
 
267
- referenced_columns: list[str] = Field(alias="referencedColumns")
268
- """The columns referenced by the foreign key."""
738
+ columns: list[ColumnRef | Column] = Field(..., min_length=1)
739
+ """Columns in the group."""
269
740
 
741
+ ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
742
+ """IVOA UCD of the column."""
270
743
 
271
- class Table(BaseObject):
272
- """A database table."""
744
+ table: Table | None = Field(None, exclude=True)
745
+ """Reference to the parent table."""
273
746
 
274
- columns: Sequence[Column]
275
- """The columns in the table."""
747
+ @field_validator("ivoa_ucd")
748
+ @classmethod
749
+ def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
750
+ """Check that IVOA UCD values are valid.
276
751
 
277
- constraints: list[Constraint] = Field(default_factory=list)
278
- """The constraints on the table."""
752
+ Parameters
753
+ ----------
754
+ ivoa_ucd
755
+ IVOA UCD value to check.
279
756
 
280
- indexes: list[Index] = Field(default_factory=list)
281
- """The indexes on the table."""
757
+ Returns
758
+ -------
759
+ `str`
760
+ The IVOA UCD value if it is valid.
761
+ """
762
+ return validate_ivoa_ucd(ivoa_ucd)
763
+
764
+ @model_validator(mode="after")
765
+ def check_unique_columns(self) -> ColumnGroup:
766
+ """Check that the columns list contains unique items.
282
767
 
283
- primaryKey: str | list[str] | None = None
284
- """The primary key of the table."""
768
+ Returns
769
+ -------
770
+ `ColumnGroup`
771
+ The column group being validated.
772
+ """
773
+ column_ids = [col if isinstance(col, str) else col.id for col in self.columns]
774
+ if len(column_ids) != len(set(column_ids)):
775
+ raise ValueError("Columns in the group must be unique")
776
+ return self
285
777
 
286
- tap_table_index: int | None = Field(None, alias="tap:table_index")
287
- """The IVOA TAP_SCHEMA table index of the table."""
778
+ def _dereference_columns(self) -> None:
779
+ """Dereference ColumnRef to Column objects."""
780
+ if self.table is None:
781
+ raise ValueError("ColumnGroup must have a reference to its parent table")
782
+
783
+ dereferenced_columns: list[ColumnRef | Column] = []
784
+ for col in self.columns:
785
+ if isinstance(col, str):
786
+ # Dereference ColumnRef to Column object
787
+ try:
788
+ col_obj = self.table._find_column_by_id(col)
789
+ except KeyError as e:
790
+ raise ValueError(f"Column '{col}' not found in table '{self.table.name}'") from e
791
+ dereferenced_columns.append(col_obj)
792
+ else:
793
+ dereferenced_columns.append(col)
794
+
795
+ self.columns = dereferenced_columns
796
+
797
+ @field_serializer("columns")
798
+ def serialize_columns(self, columns: list[ColumnRef | Column]) -> list[str]:
799
+ """Serialize columns as their IDs.
800
+
801
+ Parameters
802
+ ----------
803
+ columns
804
+ The columns to serialize.
805
+
806
+ Returns
807
+ -------
808
+ `list` [ `str` ]
809
+ The serialized column IDs.
810
+ """
811
+ return [col if isinstance(col, str) else col.id for col in columns]
288
812
 
289
- mysql_engine: str | None = Field(None, alias="mysql:engine")
290
- """The mysql engine to use for the table.
291
813
 
292
- For now this is a freeform string but it could be constrained to a list of
293
- known engines in the future.
294
- """
814
+ class Table(BaseObject):
815
+ """Table model."""
816
+
817
+ primary_key: str | list[str] | None = Field(None, alias="primaryKey")
818
+ """Primary key of the table."""
819
+
820
+ tap_table_index: int | None = Field(None, alias="tap:table_index")
821
+ """IVOA TAP_SCHEMA table index of the table."""
822
+
823
+ mysql_engine: str | None = Field("MyISAM", alias="mysql:engine")
824
+ """MySQL engine to use for the table."""
295
825
 
296
826
  mysql_charset: str | None = Field(None, alias="mysql:charset")
297
- """The mysql charset to use for the table.
827
+ """MySQL charset to use for the table."""
298
828
 
299
- For now this is a freeform string but it could be constrained to a list of
300
- known charsets in the future.
301
- """
829
+ columns: Sequence[Column]
830
+ """Columns in the table."""
302
831
 
303
- @model_validator(mode="before")
304
- @classmethod
305
- def create_constraints(cls, values: dict[str, Any]) -> dict[str, Any]:
306
- """Create constraints from the ``constraints`` field."""
307
- if "constraints" in values:
308
- new_constraints: list[Constraint] = []
309
- for item in values["constraints"]:
310
- if item["@type"] == "ForeignKey":
311
- new_constraints.append(ForeignKeyConstraint(**item))
312
- elif item["@type"] == "Unique":
313
- new_constraints.append(UniqueConstraint(**item))
314
- elif item["@type"] == "Check":
315
- new_constraints.append(CheckConstraint(**item))
316
- else:
317
- raise ValueError(f"Unknown constraint type: {item['@type']}")
318
- values["constraints"] = new_constraints
319
- return values
832
+ column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
833
+ """Column groups in the table."""
834
+
835
+ constraints: list[_ConstraintType] = Field(default_factory=list)
836
+ """Constraints on the table."""
837
+
838
+ indexes: list[Index] = Field(default_factory=list)
839
+ """Indexes on the table."""
320
840
 
321
841
  @field_validator("columns", mode="after")
322
842
  @classmethod
323
843
  def check_unique_column_names(cls, columns: list[Column]) -> list[Column]:
324
- """Check that column names are unique."""
844
+ """Check that column names are unique.
845
+
846
+ Parameters
847
+ ----------
848
+ columns
849
+ The columns to check.
850
+
851
+ Returns
852
+ -------
853
+ `list` [ `Column` ]
854
+ The columns if they are unique.
855
+
856
+ Raises
857
+ ------
858
+ ValueError
859
+ Raised if column names are not unique.
860
+ """
325
861
  if len(columns) != len(set(column.name for column in columns)):
326
862
  raise ValueError("Column names must be unique")
327
863
  return columns
328
864
 
865
+ @model_validator(mode="after")
866
+ def check_tap_table_index(self, info: ValidationInfo) -> Table:
867
+ """Check that the table has a TAP table index.
868
+
869
+ Parameters
870
+ ----------
871
+ info
872
+ Validation context used to determine if the check is enabled.
873
+
874
+ Returns
875
+ -------
876
+ `Table`
877
+ The table being validated.
878
+
879
+ Raises
880
+ ------
881
+ ValueError
882
+ Raised If the table is missing a TAP table index.
883
+ """
884
+ context = info.context
885
+ if not context or not context.get("check_tap_table_indexes", False):
886
+ return self
887
+ if self.tap_table_index is None:
888
+ raise ValueError("Table is missing a TAP table index")
889
+ return self
890
+
891
+ @model_validator(mode="after")
892
+ def check_tap_principal(self, info: ValidationInfo) -> Table:
893
+ """Check that at least one column is flagged as 'principal' for TAP
894
+ purposes.
895
+
896
+ Parameters
897
+ ----------
898
+ info
899
+ Validation context used to determine if the check is enabled.
900
+
901
+ Returns
902
+ -------
903
+ `Table`
904
+ The table being validated.
905
+
906
+ Raises
907
+ ------
908
+ ValueError
909
+ Raised if the table is missing a column flagged as 'principal'.
910
+ """
911
+ context = info.context
912
+ if not context or not context.get("check_tap_principal", False):
913
+ return self
914
+ for col in self.columns:
915
+ if col.tap_principal == 1:
916
+ return self
917
+ raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'")
918
+
919
+ def _find_column_by_id(self, id: str) -> Column:
920
+ """Find a column by ID.
921
+
922
+ Parameters
923
+ ----------
924
+ id
925
+ The ID of the column to find.
926
+
927
+ Returns
928
+ -------
929
+ `Column`
930
+ The column with the given ID.
931
+
932
+ Raises
933
+ ------
934
+ ValueError
935
+ Raised if the column is not found.
936
+ """
937
+ for column in self.columns:
938
+ if column.id == id:
939
+ return column
940
+ raise KeyError(f"Column '{id}' not found in table '{self.name}'")
941
+
942
+ @model_validator(mode="after")
943
+ def dereference_column_groups(self: Table) -> Table:
944
+ """Dereference columns in column groups.
945
+
946
+ Returns
947
+ -------
948
+ `Table`
949
+ The table with dereferenced column groups.
950
+ """
951
+ for group in self.column_groups:
952
+ group.table = self
953
+ group._dereference_columns()
954
+ return self
955
+
329
956
 
330
957
  class SchemaVersion(BaseModel):
331
- """The version of the schema."""
958
+ """Schema version model."""
332
959
 
333
960
  current: str
334
961
  """The current version of the schema."""
@@ -341,35 +968,49 @@ class SchemaVersion(BaseModel):
341
968
 
342
969
 
343
970
  class SchemaIdVisitor:
344
- """Visitor to build a Schema object's map of IDs to objects.
971
+ """Visit a schema and build the map of IDs to objects.
345
972
 
973
+ Notes
974
+ -----
346
975
  Duplicates are added to a set when they are encountered, which can be
347
- accessed via the `duplicates` attribute. The presence of duplicates will
976
+ accessed via the ``duplicates`` attribute. The presence of duplicates will
348
977
  not throw an error. Only the first object with a given ID will be added to
349
- the map, but this should not matter, since a ValidationError will be thrown
350
- by the `model_validator` method if any duplicates are found in the schema.
351
-
352
- This class is intended for internal use only.
978
+ the map, but this should not matter, since a ``ValidationError`` will be
979
+ thrown by the ``model_validator`` method if any duplicates are found in the
980
+ schema.
353
981
  """
354
982
 
355
983
  def __init__(self) -> None:
356
984
  """Create a new SchemaVisitor."""
357
- self.schema: "Schema" | None = None
985
+ self.schema: Schema | None = None
358
986
  self.duplicates: set[str] = set()
359
987
 
360
988
  def add(self, obj: BaseObject) -> None:
361
- """Add an object to the ID map."""
989
+ """Add an object to the ID map.
990
+
991
+ Parameters
992
+ ----------
993
+ obj
994
+ The object to add to the ID map.
995
+ """
362
996
  if hasattr(obj, "id"):
363
997
  obj_id = getattr(obj, "id")
364
998
  if self.schema is not None:
365
- if obj_id in self.schema.id_map:
999
+ if obj_id in self.schema._id_map:
366
1000
  self.duplicates.add(obj_id)
367
1001
  else:
368
- self.schema.id_map[obj_id] = obj
1002
+ self.schema._id_map[obj_id] = obj
369
1003
 
370
- def visit_schema(self, schema: "Schema") -> None:
371
- """Visit the schema object that was added during initialization.
1004
+ def visit_schema(self, schema: Schema) -> None:
1005
+ """Visit the objects in a schema and build the ID map.
372
1006
 
1007
+ Parameters
1008
+ ----------
1009
+ schema
1010
+ The schema object to visit.
1011
+
1012
+ Notes
1013
+ -----
373
1014
  This will set an internal variable pointing to the schema object.
374
1015
  """
375
1016
  self.schema = schema
@@ -379,7 +1020,13 @@ class SchemaIdVisitor:
379
1020
  self.visit_table(table)
380
1021
 
381
1022
  def visit_table(self, table: Table) -> None:
382
- """Visit a table object."""
1023
+ """Visit a table object.
1024
+
1025
+ Parameters
1026
+ ----------
1027
+ table
1028
+ The table object to visit.
1029
+ """
383
1030
  self.add(table)
384
1031
  for column in table.columns:
385
1032
  self.visit_column(column)
@@ -387,25 +1034,84 @@ class SchemaIdVisitor:
387
1034
  self.visit_constraint(constraint)
388
1035
 
389
1036
  def visit_column(self, column: Column) -> None:
390
- """Visit a column object."""
1037
+ """Visit a column object.
1038
+
1039
+ Parameters
1040
+ ----------
1041
+ column
1042
+ The column object to visit.
1043
+ """
391
1044
  self.add(column)
392
1045
 
393
1046
  def visit_constraint(self, constraint: Constraint) -> None:
394
- """Visit a constraint object."""
1047
+ """Visit a constraint object.
1048
+
1049
+ Parameters
1050
+ ----------
1051
+ constraint
1052
+ The constraint object to visit.
1053
+ """
395
1054
  self.add(constraint)
396
1055
 
397
1056
 
398
- class Schema(BaseObject):
399
- """The database schema containing the tables."""
1057
+ T = TypeVar("T", bound=BaseObject)
400
1058
 
401
- class ValidationConfig:
402
- """Validation configuration which is specific to Felis."""
403
1059
 
404
- _require_description = False
405
- """Flag to require a description for all objects.
1060
+ def _strip_ids(data: Any) -> Any:
1061
+ """Recursively strip '@id' fields from a dictionary or list.
1062
+
1063
+ Parameters
1064
+ ----------
1065
+ data
1066
+ The data to strip IDs from, which can be a dictionary, list, or any
1067
+ other type. Other types will be returned unchanged.
1068
+ """
1069
+ if isinstance(data, dict):
1070
+ data.pop("@id", None)
1071
+ for k, v in data.items():
1072
+ data[k] = _strip_ids(v)
1073
+ return data
1074
+ elif isinstance(data, list):
1075
+ return [_strip_ids(item) for item in data]
1076
+ else:
1077
+ return data
1078
+
1079
+
1080
+ def _append_error(
1081
+ errors: list[InitErrorDetails],
1082
+ loc: tuple,
1083
+ input_value: Any,
1084
+ error_message: str,
1085
+ error_type: str = "value_error",
1086
+ ) -> None:
1087
+ """Append an error to the errors list.
1088
+
1089
+ Parameters
1090
+ ----------
1091
+ errors : list[InitErrorDetails]
1092
+ The list of errors to append to.
1093
+ loc : tuple
1094
+ The location of the error in the schema.
1095
+ input_value : Any
1096
+ The input value that caused the error.
1097
+ error_message : str
1098
+ The error message to include in the context.
1099
+ """
1100
+ errors.append(
1101
+ {
1102
+ "type": error_type,
1103
+ "loc": loc,
1104
+ "input": input_value,
1105
+ "ctx": {"error": error_message},
1106
+ }
1107
+ )
406
1108
 
407
- This is set by the `require_description` class method.
408
- """
1109
+
1110
+ class Schema(BaseObject, Generic[T]):
1111
+ """Database schema model.
1112
+
1113
+ This represents a database schema, which contains one or more tables.
1114
+ """
409
1115
 
410
1116
  version: SchemaVersion | str | None = None
411
1117
  """The version of the schema."""
@@ -413,52 +1119,539 @@ class Schema(BaseObject):
413
1119
  tables: Sequence[Table]
414
1120
  """The tables in the schema."""
415
1121
 
416
- id_map: dict[str, Any] = Field(default_factory=dict, exclude=True)
1122
+ _id_map: dict[str, Any] = PrivateAttr(default_factory=dict)
417
1123
  """Map of IDs to objects."""
418
1124
 
1125
+ @model_validator(mode="before")
1126
+ @classmethod
1127
+ def generate_ids(cls, values: dict[str, Any], info: ValidationInfo) -> dict[str, Any]:
1128
+ """Generate IDs for objects that do not have them.
1129
+
1130
+ Parameters
1131
+ ----------
1132
+ values
1133
+ The values of the schema.
1134
+ info
1135
+ Validation context used to determine if ID generation is enabled.
1136
+
1137
+ Returns
1138
+ -------
1139
+ `dict` [ `str`, `Any` ]
1140
+ The values of the schema with generated IDs.
1141
+ """
1142
+ context = info.context
1143
+ if not context or not context.get("id_generation", False):
1144
+ logger.debug("Skipping ID generation")
1145
+ return values
1146
+ schema_name = values["name"]
1147
+ if "@id" not in values:
1148
+ values["@id"] = f"#{schema_name}"
1149
+ logger.debug(f"Generated ID '{values['@id']}' for schema '{schema_name}'")
1150
+ if "tables" in values:
1151
+ for table in values["tables"]:
1152
+ if "@id" not in table:
1153
+ table["@id"] = f"#{table['name']}"
1154
+ logger.debug(f"Generated ID '{table['@id']}' for table '{table['name']}'")
1155
+ if "columns" in table:
1156
+ for column in table["columns"]:
1157
+ if "@id" not in column:
1158
+ column["@id"] = f"#{table['name']}.{column['name']}"
1159
+ logger.debug(f"Generated ID '{column['@id']}' for column '{column['name']}'")
1160
+ if "columnGroups" in table:
1161
+ for column_group in table["columnGroups"]:
1162
+ if "@id" not in column_group:
1163
+ column_group["@id"] = f"#{table['name']}.{column_group['name']}"
1164
+ logger.debug(
1165
+ f"Generated ID '{column_group['@id']}' for column group "
1166
+ f"'{column_group['name']}'"
1167
+ )
1168
+ if "constraints" in table:
1169
+ for constraint in table["constraints"]:
1170
+ if "@id" not in constraint:
1171
+ constraint["@id"] = f"#{constraint['name']}"
1172
+ logger.debug(
1173
+ f"Generated ID '{constraint['@id']}' for constraint '{constraint['name']}'"
1174
+ )
1175
+ if "indexes" in table:
1176
+ for index in table["indexes"]:
1177
+ if "@id" not in index:
1178
+ index["@id"] = f"#{index['name']}"
1179
+ logger.debug(f"Generated ID '{index['@id']}' for index '{index['name']}'")
1180
+ return values
1181
+
419
1182
  @field_validator("tables", mode="after")
420
1183
  @classmethod
421
1184
  def check_unique_table_names(cls, tables: list[Table]) -> list[Table]:
422
- """Check that table names are unique."""
1185
+ """Check that table names are unique.
1186
+
1187
+ Parameters
1188
+ ----------
1189
+ tables
1190
+ The tables to check.
1191
+
1192
+ Returns
1193
+ -------
1194
+ `list` [ `Table` ]
1195
+ The tables if they are unique.
1196
+
1197
+ Raises
1198
+ ------
1199
+ ValueError
1200
+ Raised if table names are not unique.
1201
+ """
423
1202
  if len(tables) != len(set(table.name for table in tables)):
424
1203
  raise ValueError("Table names must be unique")
425
1204
  return tables
426
1205
 
1206
+ @model_validator(mode="after")
1207
+ def check_tap_table_indexes(self, info: ValidationInfo) -> Schema:
1208
+ """Check that the TAP table indexes are unique.
1209
+
1210
+ Parameters
1211
+ ----------
1212
+ info
1213
+ The validation context used to determine if the check is enabled.
1214
+
1215
+ Returns
1216
+ -------
1217
+ `Schema`
1218
+ The schema being validated.
1219
+ """
1220
+ context = info.context
1221
+ if not context or not context.get("check_tap_table_indexes", False):
1222
+ return self
1223
+ table_indicies = set()
1224
+ for table in self.tables:
1225
+ table_index = table.tap_table_index
1226
+ if table_index is not None:
1227
+ if table_index in table_indicies:
1228
+ raise ValueError(f"Duplicate 'tap:table_index' value {table_index} found in schema")
1229
+ table_indicies.add(table_index)
1230
+ return self
1231
+
1232
+ @model_validator(mode="after")
1233
+ def check_unique_constraint_names(self: Schema) -> Schema:
1234
+ """Check for duplicate constraint names in the schema.
1235
+
1236
+ Returns
1237
+ -------
1238
+ `Schema`
1239
+ The schema being validated.
1240
+
1241
+ Raises
1242
+ ------
1243
+ ValueError
1244
+ Raised if duplicate constraint names are found in the schema.
1245
+ """
1246
+ constraint_names = set()
1247
+ duplicate_names = []
1248
+
1249
+ for table in self.tables:
1250
+ for constraint in table.constraints:
1251
+ constraint_name = constraint.name
1252
+ if constraint_name in constraint_names:
1253
+ duplicate_names.append(constraint_name)
1254
+ else:
1255
+ constraint_names.add(constraint_name)
1256
+
1257
+ if duplicate_names:
1258
+ raise ValueError(f"Duplicate constraint names found in schema: {duplicate_names}")
1259
+
1260
+ return self
1261
+
1262
+ @model_validator(mode="after")
1263
+ def check_unique_index_names(self: Schema) -> Schema:
1264
+ """Check for duplicate index names in the schema.
1265
+
1266
+ Returns
1267
+ -------
1268
+ `Schema`
1269
+ The schema being validated.
1270
+
1271
+ Raises
1272
+ ------
1273
+ ValueError
1274
+ Raised if duplicate index names are found in the schema.
1275
+ """
1276
+ index_names = set()
1277
+ duplicate_names = []
1278
+
1279
+ for table in self.tables:
1280
+ for index in table.indexes:
1281
+ index_name = index.name
1282
+ if index_name in index_names:
1283
+ duplicate_names.append(index_name)
1284
+ else:
1285
+ index_names.add(index_name)
1286
+
1287
+ if duplicate_names:
1288
+ raise ValueError(f"Duplicate index names found in schema: {duplicate_names}")
1289
+
1290
+ return self
1291
+
427
1292
  @model_validator(mode="after")
428
1293
  def create_id_map(self: Schema) -> Schema:
429
- """Create a map of IDs to objects."""
1294
+ """Create a map of IDs to objects.
1295
+
1296
+ Returns
1297
+ -------
1298
+ `Schema`
1299
+ The schema with the ID map created.
1300
+
1301
+ Raises
1302
+ ------
1303
+ ValueError
1304
+ Raised if duplicate identifiers are found in the schema.
1305
+ """
1306
+ if self._id_map:
1307
+ logger.debug("Ignoring call to create_id_map() - ID map was already populated")
1308
+ return self
430
1309
  visitor: SchemaIdVisitor = SchemaIdVisitor()
431
1310
  visitor.visit_schema(self)
432
- logger.debug(f"ID map contains {len(self.id_map.keys())} objects")
433
1311
  if len(visitor.duplicates):
434
1312
  raise ValueError(
435
1313
  "Duplicate IDs found in schema:\n " + "\n ".join(visitor.duplicates) + "\n"
436
1314
  )
1315
+ logger.debug("Created ID map with %d entries", len(self._id_map))
1316
+ return self
1317
+
1318
+ def _validate_column_id(
1319
+ self: Schema,
1320
+ column_id: str,
1321
+ loc: tuple,
1322
+ errors: list[InitErrorDetails],
1323
+ ) -> None:
1324
+ """Validate a column ID from a constraint and append errors if invalid.
1325
+
1326
+ Parameters
1327
+ ----------
1328
+ schema : Schema
1329
+ The schema being validated.
1330
+ column_id : str
1331
+ The column ID to validate.
1332
+ loc : tuple
1333
+ The location of the error in the schema.
1334
+ errors : list[InitErrorDetails]
1335
+ The list of errors to append to.
1336
+ """
1337
+ if column_id not in self:
1338
+ _append_error(
1339
+ errors,
1340
+ loc,
1341
+ column_id,
1342
+ f"Column ID '{column_id}' not found in schema",
1343
+ )
1344
+ elif not isinstance(self[column_id], Column):
1345
+ _append_error(
1346
+ errors,
1347
+ loc,
1348
+ column_id,
1349
+ f"ID '{column_id}' does not refer to a Column object",
1350
+ )
1351
+
1352
+ def _validate_foreign_key_column(
1353
+ self: Schema,
1354
+ column_id: str,
1355
+ table: Table,
1356
+ loc: tuple,
1357
+ errors: list[InitErrorDetails],
1358
+ ) -> None:
1359
+ """Validate a foreign key column ID from a constraint and append errors
1360
+ if invalid.
1361
+
1362
+ Parameters
1363
+ ----------
1364
+ schema : Schema
1365
+ The schema being validated.
1366
+ column_id : str
1367
+ The foreign key column ID to validate.
1368
+ loc : tuple
1369
+ The location of the error in the schema.
1370
+ errors : list[InitErrorDetails]
1371
+ The list of errors to append to.
1372
+ """
1373
+ try:
1374
+ table._find_column_by_id(column_id)
1375
+ except KeyError:
1376
+ _append_error(
1377
+ errors,
1378
+ loc,
1379
+ column_id,
1380
+ f"Column '{column_id}' not found in table '{table.name}'",
1381
+ )
1382
+
1383
+ @model_validator(mode="after")
1384
+ def check_constraints(self: Schema) -> Schema:
1385
+ """Check constraint objects for validity. This needs to be deferred
1386
+ until after the schema is fully loaded and the ID map is created.
1387
+
1388
+ Raises
1389
+ ------
1390
+ pydantic.ValidationError
1391
+ Raised if any constraints are invalid.
1392
+
1393
+ Returns
1394
+ -------
1395
+ `Schema`
1396
+ The schema being validated.
1397
+ """
1398
+ errors: list[InitErrorDetails] = []
1399
+
1400
+ for table_index, table in enumerate(self.tables):
1401
+ for constraint_index, constraint in enumerate(table.constraints):
1402
+ column_ids: list[str] = []
1403
+ referenced_column_ids: list[str] = []
1404
+
1405
+ if isinstance(constraint, ForeignKeyConstraint):
1406
+ column_ids += constraint.columns
1407
+ referenced_column_ids += constraint.referenced_columns
1408
+ elif isinstance(constraint, UniqueConstraint):
1409
+ column_ids += constraint.columns
1410
+ # No extra checks are required on CheckConstraint objects.
1411
+
1412
+ # Validate the foreign key columns
1413
+ for column_id in column_ids:
1414
+ self._validate_column_id(
1415
+ column_id,
1416
+ (
1417
+ "tables",
1418
+ table_index,
1419
+ "constraints",
1420
+ constraint_index,
1421
+ "columns",
1422
+ column_id,
1423
+ ),
1424
+ errors,
1425
+ )
1426
+ # Check that the foreign key column is within the source
1427
+ # table.
1428
+ self._validate_foreign_key_column(
1429
+ column_id,
1430
+ table,
1431
+ (
1432
+ "tables",
1433
+ table_index,
1434
+ "constraints",
1435
+ constraint_index,
1436
+ "columns",
1437
+ column_id,
1438
+ ),
1439
+ errors,
1440
+ )
1441
+
1442
+ # Validate the primary key (reference) columns
1443
+ for referenced_column_id in referenced_column_ids:
1444
+ self._validate_column_id(
1445
+ referenced_column_id,
1446
+ (
1447
+ "tables",
1448
+ table_index,
1449
+ "constraints",
1450
+ constraint_index,
1451
+ "referenced_columns",
1452
+ referenced_column_id,
1453
+ ),
1454
+ errors,
1455
+ )
1456
+
1457
+ if errors:
1458
+ raise ValidationError.from_exception_data("Schema validation failed", errors)
1459
+
437
1460
  return self
438
1461
 
439
1462
  def __getitem__(self, id: str) -> BaseObject:
440
- """Get an object by its ID."""
1463
+ """Get an object by its ID.
1464
+
1465
+ Parameters
1466
+ ----------
1467
+ id
1468
+ The ID of the object to get.
1469
+
1470
+ Raises
1471
+ ------
1472
+ KeyError
1473
+ Raised if the object with the given ID is not found in the schema.
1474
+ """
441
1475
  if id not in self:
442
1476
  raise KeyError(f"Object with ID '{id}' not found in schema")
443
- return self.id_map[id]
1477
+ return self._id_map[id]
444
1478
 
445
1479
  def __contains__(self, id: str) -> bool:
446
- """Check if an object with the given ID is in the schema."""
447
- return id in self.id_map
1480
+ """Check if an object with the given ID is in the schema.
448
1481
 
449
- @classmethod
450
- def require_description(cls, rd: bool = True) -> None:
451
- """Set whether a description is required for all objects.
452
-
453
- This includes the schema, tables, columns, and constraints.
1482
+ Parameters
1483
+ ----------
1484
+ id
1485
+ The ID of the object to check.
1486
+ """
1487
+ return id in self._id_map
1488
+
1489
+ def find_object_by_id(self, id: str, obj_type: type[T]) -> T:
1490
+ """Find an object with the given type by its ID.
1491
+
1492
+ Parameters
1493
+ ----------
1494
+ id
1495
+ The ID of the object to find.
1496
+ obj_type
1497
+ The type of the object to find.
1498
+
1499
+ Returns
1500
+ -------
1501
+ BaseObject
1502
+ The object with the given ID and type.
1503
+
1504
+ Raises
1505
+ ------
1506
+ KeyError
1507
+ If the object with the given ID is not found in the schema.
1508
+ TypeError
1509
+ If the object that is found does not have the right type.
1510
+
1511
+ Notes
1512
+ -----
1513
+ The actual return type is the user-specified argument ``T``, which is
1514
+ expected to be a subclass of `BaseObject`.
1515
+ """
1516
+ obj = self[id]
1517
+ if not isinstance(obj, obj_type):
1518
+ raise TypeError(f"Object with ID '{id}' is not of type '{obj_type.__name__}'")
1519
+ return obj
1520
+
1521
+ def get_table_by_column(self, column: Column) -> Table:
1522
+ """Find the table that contains a column.
1523
+
1524
+ Parameters
1525
+ ----------
1526
+ column
1527
+ The column to find.
1528
+
1529
+ Returns
1530
+ -------
1531
+ `Table`
1532
+ The table that contains the column.
1533
+
1534
+ Raises
1535
+ ------
1536
+ ValueError
1537
+ If the column is not found in any table.
1538
+ """
1539
+ for table in self.tables:
1540
+ if column in table.columns:
1541
+ return table
1542
+ raise ValueError(f"Column '{column.name}' not found in any table")
454
1543
 
455
- Users should call this method to set the requirement for a description
456
- when validating schemas, rather than change the flag value directly.
1544
+ @classmethod
1545
+ def from_uri(cls, resource_path: ResourcePathExpression, context: dict[str, Any] = {}) -> Schema:
1546
+ """Load a `Schema` from a string representing a ``ResourcePath``.
1547
+
1548
+ Parameters
1549
+ ----------
1550
+ resource_path
1551
+ The ``ResourcePath`` pointing to a YAML file.
1552
+ context
1553
+ Pydantic context to be used in validation.
1554
+
1555
+ Returns
1556
+ -------
1557
+ `str`
1558
+ The ID of the object.
1559
+
1560
+ Raises
1561
+ ------
1562
+ yaml.YAMLError
1563
+ Raised if there is an error loading the YAML data.
1564
+ ValueError
1565
+ Raised if there is an error reading the resource.
1566
+ pydantic.ValidationError
1567
+ Raised if the schema fails validation.
457
1568
  """
458
- logger.debug(f"Setting description requirement to '{rd}'")
459
- cls.ValidationConfig._require_description = rd
1569
+ logger.debug(f"Loading schema from: '{resource_path}'")
1570
+ try:
1571
+ rp_stream = ResourcePath(resource_path).read()
1572
+ except Exception as e:
1573
+ raise ValueError(f"Error reading resource from '{resource_path}' : {e}") from e
1574
+ yaml_data = yaml.safe_load(rp_stream)
1575
+ return Schema.model_validate(yaml_data, context=context)
460
1576
 
461
1577
  @classmethod
462
- def is_description_required(cls) -> bool:
463
- """Return whether a description is required for all objects."""
464
- return cls.ValidationConfig._require_description
1578
+ def from_stream(cls, source: IO[str], context: dict[str, Any] = {}) -> Schema:
1579
+ """Load a `Schema` from a file stream which should contain YAML data.
1580
+
1581
+ Parameters
1582
+ ----------
1583
+ source
1584
+ The file stream to read from.
1585
+ context
1586
+ Pydantic context to be used in validation.
1587
+
1588
+ Returns
1589
+ -------
1590
+ `Schema`
1591
+ The Felis schema loaded from the stream.
1592
+
1593
+ Raises
1594
+ ------
1595
+ yaml.YAMLError
1596
+ Raised if there is an error loading the YAML file.
1597
+ pydantic.ValidationError
1598
+ Raised if the schema fails validation.
1599
+ """
1600
+ logger.debug("Loading schema from: '%s'", source)
1601
+ yaml_data = yaml.safe_load(source)
1602
+ return Schema.model_validate(yaml_data, context=context)
1603
+
1604
+ def _model_dump(self, strip_ids: bool = False) -> dict[str, Any]:
1605
+ """Dump the schema as a dictionary with some default arguments
1606
+ applied.
1607
+
1608
+ Parameters
1609
+ ----------
1610
+ strip_ids
1611
+ Whether to strip the IDs from the dumped data. Defaults to `False`.
1612
+
1613
+ Returns
1614
+ -------
1615
+ `dict` [ `str`, `Any` ]
1616
+ The dumped schema data as a dictionary.
1617
+ """
1618
+ data = self.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
1619
+ if strip_ids:
1620
+ data = _strip_ids(data)
1621
+ return data
1622
+
1623
+ def dump_yaml(self, stream: IO[str] = sys.stdout, strip_ids: bool = False) -> None:
1624
+ """Pretty print the schema as YAML.
1625
+
1626
+ Parameters
1627
+ ----------
1628
+ stream
1629
+ The stream to write the YAML data to.
1630
+ strip_ids
1631
+ Whether to strip the IDs from the dumped data. Defaults to `False`.
1632
+ """
1633
+ data = self._model_dump(strip_ids=strip_ids)
1634
+ yaml.safe_dump(
1635
+ data,
1636
+ stream,
1637
+ default_flow_style=False,
1638
+ sort_keys=False,
1639
+ )
1640
+
1641
+ def dump_json(self, stream: IO[str] = sys.stdout, strip_ids: bool = False) -> None:
1642
+ """Pretty print the schema as JSON.
1643
+
1644
+ Parameters
1645
+ ----------
1646
+ stream
1647
+ The stream to write the JSON data to.
1648
+ strip_ids
1649
+ Whether to strip the IDs from the dumped data. Defaults to `False`.
1650
+ """
1651
+ data = self._model_dump(strip_ids=strip_ids)
1652
+ json.dump(
1653
+ data,
1654
+ stream,
1655
+ indent=4,
1656
+ sort_keys=False,
1657
+ )