lsst-felis 29.0.0rc1__tar.gz → 29.2025.1100__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lsst-felis might be problematic. Click here for more details.

Files changed (39) hide show
  1. {lsst_felis-29.0.0rc1/python/lsst_felis.egg-info → lsst_felis-29.2025.1100}/PKG-INFO +1 -1
  2. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/pyproject.toml +43 -26
  3. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/__init__.py +8 -1
  4. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/cli.py +52 -6
  5. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/datamodel.py +182 -34
  6. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/dialects.py +1 -1
  7. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/utils.py +1 -1
  8. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/diff.py +1 -1
  9. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/tap_schema.py +66 -6
  10. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100/python/lsst_felis.egg-info}/PKG-INFO +1 -1
  11. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/SOURCES.txt +0 -1
  12. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_cli.py +31 -0
  13. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_datamodel.py +40 -0
  14. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_tap_schema.py +32 -14
  15. lsst_felis-29.0.0rc1/python/felis/version.py +0 -2
  16. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/COPYRIGHT +0 -0
  17. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/LICENSE +0 -0
  18. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/README.rst +0 -0
  19. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/__init__.py +0 -0
  20. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/schema.py +0 -0
  21. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/sqltypes.py +7 -7
  22. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/db/variants.py +0 -0
  23. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/metadata.py +0 -0
  24. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/py.typed +0 -0
  25. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/schemas/tap_schema_std.yaml +0 -0
  26. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/tests/__init__.py +0 -0
  27. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/tests/postgresql.py +0 -0
  28. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/felis/types.py +7 -7
  29. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/dependency_links.txt +0 -0
  30. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/entry_points.txt +0 -0
  31. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/requires.txt +0 -0
  32. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/top_level.txt +0 -0
  33. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/python/lsst_felis.egg-info/zip-safe +0 -0
  34. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/setup.cfg +0 -0
  35. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_db.py +0 -0
  36. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_diff.py +0 -0
  37. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_metadata.py +0 -0
  38. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_postgres.py +0 -0
  39. {lsst_felis-29.0.0rc1 → lsst_felis-29.2025.1100}/tests/test_tap_schema_postgres.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: lsst-felis
3
- Version: 29.0.0rc1
3
+ Version: 29.2025.1100
4
4
  Summary: A vocabulary for describing catalogs and acting on those descriptions
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: GNU General Public License v3 or later (GPLv3+)
@@ -120,31 +120,15 @@ target-version = ["py311"]
120
120
  profile = "black"
121
121
  line_length = 110
122
122
 
123
- [tool.lsst_versions]
124
- write_to = "python/felis/version.py"
125
-
126
123
  [tool.ruff]
127
124
  line-length = 110
128
125
  target-version = "py311"
129
126
  exclude = [
130
- "__init__.py",
131
- "lex.py",
132
- "yacc.py",
127
+ "__init__.py"
133
128
  ]
134
129
 
135
130
  [tool.ruff.lint]
136
131
  ignore = [
137
- "D100",
138
- "D102",
139
- "D104",
140
- "D105",
141
- "D107",
142
- "D200",
143
- "D203",
144
- "D205",
145
- "D213",
146
- "D400",
147
- "D413",
148
132
  "N802",
149
133
  "N803",
150
134
  "N806",
@@ -152,7 +136,14 @@ ignore = [
152
136
  "N815",
153
137
  "N816",
154
138
  "N999",
155
- "UP007", # Allow UNION in type annotation
139
+ "D107",
140
+ "D105",
141
+ "D102",
142
+ "D104",
143
+ "D100",
144
+ "D200",
145
+ "D205",
146
+ "D400",
156
147
  ]
157
148
  select = [
158
149
  "E", # pycodestyle
@@ -161,8 +152,29 @@ select = [
161
152
  "W", # pycodestyle
162
153
  "D", # pydocstyle
163
154
  "UP", # pyupgrade
155
+ "I", # isort
156
+ "RUF022", # sort __all__
157
+ ]
158
+ # Commented out to suppress "unused noqa" in jenkins which has older ruff not
159
+ # generating E721.
160
+ extend-select = [
161
+ "RUF100", # Warn about unused noqa
164
162
  ]
165
163
 
164
+ [tool.ruff.lint.isort]
165
+ known-first-party = ["felis"]
166
+
167
+ [tool.ruff.lint.pycodestyle]
168
+ max-doc-length = 79
169
+
170
+ [tool.ruff.lint.pydocstyle]
171
+ convention = "numpy"
172
+
173
+ [tool.ruff.format]
174
+ docstring-code-format = true
175
+ # Formatter does not know about indenting.
176
+ docstring-code-line-length = 69
177
+
166
178
  [tool.pydocstyle]
167
179
  convention = "numpy"
168
180
  # Our coding style does not require docstrings for magic methods (D105)
@@ -171,13 +183,16 @@ convention = "numpy"
171
183
  # Docstring at the very first line is not required
172
184
  # D200, D205 and D400 all complain if the first sentence of the docstring does
173
185
  # not fit on one line. We do not require docstrings in __init__ files (D104).
174
- add-ignore = ["D107", "D105", "D102", "D100", "D200", "D205", "D400", "D104"]
175
-
176
- [tool.ruff.lint.pycodestyle]
177
- max-doc-length = 79
178
-
179
- [tool.ruff.lint.pydocstyle]
180
- convention = "numpy"
186
+ add-ignore = [
187
+ "D107",
188
+ "D105",
189
+ "D102",
190
+ "D100",
191
+ "D200",
192
+ "D205",
193
+ "D400",
194
+ "D104",
195
+ ]
181
196
 
182
197
  [tool.numpydoc_validation]
183
198
  checks = [
@@ -189,11 +204,13 @@ checks = [
189
204
  "GL01", # Summary text can start on same line as """
190
205
  "GL08", # Do not require docstring.
191
206
  "ES01", # No extended summary required.
207
+ "PR04", # Do not require types on parameters.
208
+ "RT02", # Does not want named return value. DM style says we do.
192
209
  "SS05", # pydocstyle is better at finding infinitive verb.
193
- "PR04", # Sphinx does not require parameter type.
194
210
  ]
195
211
  exclude = [
196
212
  "^test_.*", # Do not test docstrings in test code.
213
+ '^cli', # This is the main click command
197
214
  '^__init__$',
198
215
  '\._[a-zA-Z_]+$', # Private methods.
199
216
  ]
@@ -23,4 +23,11 @@ from .datamodel import Schema
23
23
  from .db.schema import create_database
24
24
  from .diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff
25
25
  from .metadata import MetaDataBuilder
26
- from .version import *
26
+
27
+ from importlib.metadata import PackageNotFoundError, version
28
+
29
+ try:
30
+ __version__ = version("lsst-felis")
31
+ except PackageNotFoundError:
32
+ # Package not installed or scons not run.
33
+ __version__ = "0.0.0"
@@ -180,14 +180,26 @@ def create(
180
180
 
181
181
  @cli.command("load-tap-schema", help="Load metadata from a Felis file into a TAP_SCHEMA database")
182
182
  @click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
183
- @click.option("--tap-schema-name", help="Name of the TAP_SCHEMA schema in the database (default: TAP_SCHEMA)")
184
183
  @click.option(
185
- "--tap-tables-postfix", help="Postfix which is applied to standard TAP_SCHEMA table names", default=""
184
+ "--tap-schema-name", "-n", help="Name of the TAP_SCHEMA schema in the database (default: TAP_SCHEMA)"
185
+ )
186
+ @click.option(
187
+ "--tap-tables-postfix",
188
+ "-p",
189
+ help="Postfix which is applied to standard TAP_SCHEMA table names",
190
+ default="",
191
+ )
192
+ @click.option("--tap-schema-index", "-i", type=int, help="TAP_SCHEMA index of the schema in this environment")
193
+ @click.option("--dry-run", "-D", is_flag=True, help="Execute dry run only. Does not insert any data.")
194
+ @click.option("--echo", "-e", is_flag=True, help="Print out the generated insert statements to stdout")
195
+ @click.option("--output-file", "-o", type=click.Path(), help="Write SQL commands to a file")
196
+ @click.option(
197
+ "--unique-keys",
198
+ "-u",
199
+ is_flag=True,
200
+ help="Generate unique key_id values for keys and key_columns tables by prepending the schema name",
201
+ default=False,
186
202
  )
187
- @click.option("--tap-schema-index", type=int, help="TAP_SCHEMA index of the schema in this environment")
188
- @click.option("--dry-run", is_flag=True, help="Execute dry run only. Does not insert any data.")
189
- @click.option("--echo", is_flag=True, help="Print out the generated insert statements to stdout")
190
- @click.option("--output-file", type=click.Path(), help="Write SQL commands to a file")
191
203
  @click.argument("file", type=click.File())
192
204
  @click.pass_context
193
205
  def load_tap_schema(
@@ -199,6 +211,7 @@ def load_tap_schema(
199
211
  dry_run: bool,
200
212
  echo: bool,
201
213
  output_file: str | None,
214
+ unique_keys: bool,
202
215
  file: IO[str],
203
216
  ) -> None:
204
217
  """Load TAP metadata from a Felis file.
@@ -248,6 +261,7 @@ def load_tap_schema(
248
261
  dry_run=dry_run,
249
262
  print_sql=echo,
250
263
  output_path=output_file,
264
+ unique_keys=unique_keys,
251
265
  ).load()
252
266
 
253
267
 
@@ -421,5 +435,37 @@ def diff(
421
435
  raise click.ClickException("Schema was changed")
422
436
 
423
437
 
438
+ @cli.command(
439
+ "dump",
440
+ help="""
441
+ Dump a schema file to YAML or JSON format
442
+
443
+ Example:
444
+
445
+ felis dump schema.yaml schema.json
446
+
447
+ felis dump schema.yaml schema_dump.yaml
448
+ """,
449
+ )
450
+ @click.argument("files", nargs=2, type=click.Path())
451
+ @click.pass_context
452
+ def dump(
453
+ ctx: click.Context,
454
+ files: list[str],
455
+ ) -> None:
456
+ if files[1].endswith(".json"):
457
+ format = "json"
458
+ elif files[1].endswith(".yaml"):
459
+ format = "yaml"
460
+ else:
461
+ raise click.ClickException("Output file must have a .json or .yaml extension")
462
+ schema = Schema.from_uri(files[0], context={"id_generation": ctx.obj["id_generation"]})
463
+ with open(files[1], "w") as f:
464
+ if format == "yaml":
465
+ schema.dump_yaml(f)
466
+ elif format == "json":
467
+ schema.dump_json(f)
468
+
469
+
424
470
  if __name__ == "__main__":
425
471
  cli()
@@ -23,16 +23,27 @@
23
23
 
24
24
  from __future__ import annotations
25
25
 
26
+ import json
26
27
  import logging
28
+ import sys
27
29
  from collections.abc import Sequence
28
30
  from enum import StrEnum, auto
29
- from typing import IO, Annotated, Any, Generic, Literal, TypeAlias, TypeVar, Union
31
+ from typing import IO, Annotated, Any, Generic, Literal, TypeAlias, TypeVar
30
32
 
31
33
  import yaml
32
34
  from astropy import units as units # type: ignore
33
35
  from astropy.io.votable import ucd # type: ignore
34
36
  from lsst.resources import ResourcePath, ResourcePathExpression
35
- from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator, model_validator
37
+ from pydantic import (
38
+ BaseModel,
39
+ ConfigDict,
40
+ Field,
41
+ PrivateAttr,
42
+ ValidationInfo,
43
+ field_serializer,
44
+ field_validator,
45
+ model_validator,
46
+ )
36
47
 
37
48
  from .db.dialects import get_supported_dialects
38
49
  from .db.sqltypes import get_type_func
@@ -43,9 +54,10 @@ logger = logging.getLogger(__name__)
43
54
 
44
55
  __all__ = (
45
56
  "BaseObject",
46
- "Column",
47
57
  "CheckConstraint",
58
+ "Column",
48
59
  "Constraint",
60
+ "DataType",
49
61
  "ForeignKeyConstraint",
50
62
  "Index",
51
63
  "Schema",
@@ -58,6 +70,7 @@ CONFIG = ConfigDict(
58
70
  populate_by_name=True, # Populate attributes by name.
59
71
  extra="forbid", # Do not allow extra fields.
60
72
  str_strip_whitespace=True, # Strip whitespace from string fields.
73
+ use_enum_values=False, # Do not use enum values during serialization.
61
74
  )
62
75
  """Pydantic model configuration as described in:
63
76
  https://docs.pydantic.dev/2.0/api/config/#pydantic.config.ConfigDict
@@ -117,7 +130,7 @@ class BaseObject(BaseModel):
117
130
 
118
131
 
119
132
  class DataType(StrEnum):
120
- """`Enum` representing the data types supported by Felis."""
133
+ """``Enum`` representing the data types supported by Felis."""
121
134
 
122
135
  boolean = auto()
123
136
  byte = auto()
@@ -185,12 +198,6 @@ class Column(BaseObject):
185
198
  autoincrement: bool | None = None
186
199
  """Whether the column is autoincremented."""
187
200
 
188
- mysql_datatype: str | None = Field(None, alias="mysql:datatype")
189
- """MySQL datatype override on the column."""
190
-
191
- postgresql_datatype: str | None = Field(None, alias="postgresql:datatype")
192
- """PostgreSQL datatype override on the column."""
193
-
194
201
  ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
195
202
  """IVOA UCD of the column."""
196
203
 
@@ -219,6 +226,12 @@ class Column(BaseObject):
219
226
  votable_datatype: str | None = Field(None, alias="votable:datatype")
220
227
  """VOTable datatype of the column."""
221
228
 
229
+ mysql_datatype: str | None = Field(None, alias="mysql:datatype")
230
+ """MySQL datatype override on the column."""
231
+
232
+ postgresql_datatype: str | None = Field(None, alias="postgresql:datatype")
233
+ """PostgreSQL datatype override on the column."""
234
+
222
235
  @model_validator(mode="after")
223
236
  def check_value(self) -> Column:
224
237
  """Check that the default value is valid.
@@ -458,6 +471,39 @@ class Column(BaseObject):
458
471
  values["votable:arraysize"] = str(arraysize)
459
472
  return values
460
473
 
474
+ @field_serializer("datatype")
475
+ def serialize_datatype(self, value: DataType) -> str:
476
+ """Convert `DataType` to string when serializing to JSON/YAML.
477
+
478
+ Parameters
479
+ ----------
480
+ value
481
+ The `DataType` value to serialize.
482
+
483
+ Returns
484
+ -------
485
+ `str`
486
+ The serialized `DataType` value.
487
+ """
488
+ return str(value)
489
+
490
+ @field_validator("datatype", mode="before")
491
+ @classmethod
492
+ def deserialize_datatype(cls, value: str) -> DataType:
493
+ """Convert string back into `DataType` when loading from JSON/YAML.
494
+
495
+ Parameters
496
+ ----------
497
+ value
498
+ The string value to deserialize.
499
+
500
+ Returns
501
+ -------
502
+ `DataType`
503
+ The deserialized `DataType` value.
504
+ """
505
+ return DataType(value)
506
+
461
507
 
462
508
  class Constraint(BaseObject):
463
509
  """Table constraint model."""
@@ -493,6 +539,22 @@ class CheckConstraint(Constraint):
493
539
  expression: str
494
540
  """Expression for the check constraint."""
495
541
 
542
+ @field_serializer("type")
543
+ def serialize_type(self, value: str) -> str:
544
+ """Ensure '@type' is included in serialized output.
545
+
546
+ Parameters
547
+ ----------
548
+ value
549
+ The value to serialize.
550
+
551
+ Returns
552
+ -------
553
+ `str`
554
+ The serialized value.
555
+ """
556
+ return value
557
+
496
558
 
497
559
  class UniqueConstraint(Constraint):
498
560
  """Table unique constraint model."""
@@ -503,6 +565,22 @@ class UniqueConstraint(Constraint):
503
565
  columns: list[str]
504
566
  """Columns in the unique constraint."""
505
567
 
568
+ @field_serializer("type")
569
+ def serialize_type(self, value: str) -> str:
570
+ """Ensure '@type' is included in serialized output.
571
+
572
+ Parameters
573
+ ----------
574
+ value
575
+ The value to serialize.
576
+
577
+ Returns
578
+ -------
579
+ `str`
580
+ The serialized value.
581
+ """
582
+ return value
583
+
506
584
 
507
585
  class ForeignKeyConstraint(Constraint):
508
586
  """Table foreign key constraint model.
@@ -525,6 +603,28 @@ class ForeignKeyConstraint(Constraint):
525
603
  referenced_columns: list[str] = Field(alias="referencedColumns")
526
604
  """The columns referenced by the foreign key."""
527
605
 
606
+ @field_serializer("type")
607
+ def serialize_type(self, value: str) -> str:
608
+ """Ensure '@type' is included in serialized output.
609
+
610
+ Parameters
611
+ ----------
612
+ value
613
+ The value to serialize.
614
+
615
+ Returns
616
+ -------
617
+ `str`
618
+ The serialized value.
619
+ """
620
+ return value
621
+
622
+
623
+ _ConstraintType = Annotated[
624
+ CheckConstraint | ForeignKeyConstraint | UniqueConstraint, Field(discriminator="type")
625
+ ]
626
+ """Type alias for a constraint type."""
627
+
528
628
 
529
629
  class Index(BaseObject):
530
630
  """Table index model.
@@ -566,12 +666,6 @@ class Index(BaseObject):
566
666
  return values
567
667
 
568
668
 
569
- _ConstraintType = Annotated[
570
- Union[CheckConstraint, ForeignKeyConstraint, UniqueConstraint], Field(discriminator="type")
571
- ]
572
- """Type alias for a constraint type."""
573
-
574
-
575
669
  ColumnRef: TypeAlias = str
576
670
  """Type alias for a column reference."""
577
671
 
@@ -585,7 +679,7 @@ class ColumnGroup(BaseObject):
585
679
  ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
586
680
  """IVOA UCD of the column."""
587
681
 
588
- table: Table | None = None
682
+ table: Table | None = Field(None, exclude=True)
589
683
  """Reference to the parent table."""
590
684
 
591
685
  @field_validator("ivoa_ucd")
@@ -635,21 +729,25 @@ class ColumnGroup(BaseObject):
635
729
 
636
730
  self.columns = dereferenced_columns
637
731
 
732
+ @field_serializer("columns")
733
+ def serialize_columns(self, columns: list[ColumnRef | Column]) -> list[str]:
734
+ """Serialize columns as their IDs.
638
735
 
639
- class Table(BaseObject):
640
- """Table model."""
641
-
642
- columns: Sequence[Column]
643
- """Columns in the table."""
736
+ Parameters
737
+ ----------
738
+ columns
739
+ The columns to serialize.
644
740
 
645
- constraints: list[_ConstraintType] = Field(default_factory=list)
646
- """Constraints on the table."""
741
+ Returns
742
+ -------
743
+ `list` [ `str` ]
744
+ The serialized column IDs.
745
+ """
746
+ return [col if isinstance(col, str) else col.id for col in columns]
647
747
 
648
- indexes: list[Index] = Field(default_factory=list)
649
- """Indexes on the table."""
650
748
 
651
- column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
652
- """Column groups in the table."""
749
+ class Table(BaseObject):
750
+ """Table model."""
653
751
 
654
752
  primary_key: str | list[str] | None = Field(None, alias="primaryKey")
655
753
  """Primary key of the table."""
@@ -663,6 +761,18 @@ class Table(BaseObject):
663
761
  mysql_charset: str | None = Field(None, alias="mysql:charset")
664
762
  """MySQL charset to use for the table."""
665
763
 
764
+ columns: Sequence[Column]
765
+ """Columns in the table."""
766
+
767
+ column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
768
+ """Column groups in the table."""
769
+
770
+ constraints: list[_ConstraintType] = Field(default_factory=list)
771
+ """Constraints on the table."""
772
+
773
+ indexes: list[Index] = Field(default_factory=list)
774
+ """Indexes on the table."""
775
+
666
776
  @field_validator("columns", mode="after")
667
777
  @classmethod
668
778
  def check_unique_column_names(cls, columns: list[Column]) -> list[Column]:
@@ -821,10 +931,10 @@ class SchemaIdVisitor:
821
931
  if hasattr(obj, "id"):
822
932
  obj_id = getattr(obj, "id")
823
933
  if self.schema is not None:
824
- if obj_id in self.schema.id_map:
934
+ if obj_id in self.schema._id_map:
825
935
  self.duplicates.add(obj_id)
826
936
  else:
827
- self.schema.id_map[obj_id] = obj
937
+ self.schema._id_map[obj_id] = obj
828
938
 
829
939
  def visit_schema(self, schema: Schema) -> None:
830
940
  """Visit the objects in a schema and build the ID map.
@@ -894,7 +1004,7 @@ class Schema(BaseObject, Generic[T]):
894
1004
  tables: Sequence[Table]
895
1005
  """The tables in the schema."""
896
1006
 
897
- id_map: dict[str, Any] = Field(default_factory=dict, exclude=True)
1007
+ _id_map: dict[str, Any] = PrivateAttr(default_factory=dict)
898
1008
  """Map of IDs to objects."""
899
1009
 
900
1010
  @model_validator(mode="before")
@@ -932,6 +1042,14 @@ class Schema(BaseObject, Generic[T]):
932
1042
  if "@id" not in column:
933
1043
  column["@id"] = f"#{table['name']}.{column['name']}"
934
1044
  logger.debug(f"Generated ID '{column['@id']}' for column '{column['name']}'")
1045
+ if "columnGroups" in table:
1046
+ for column_group in table["columnGroups"]:
1047
+ if "@id" not in column_group:
1048
+ column_group["@id"] = f"#{table['name']}.{column_group['name']}"
1049
+ logger.debug(
1050
+ f"Generated ID '{column_group['@id']}' for column group "
1051
+ f"'{column_group['name']}'"
1052
+ )
935
1053
  if "constraints" in table:
936
1054
  for constraint in table["constraints"]:
937
1055
  if "@id" not in constraint:
@@ -1069,7 +1187,7 @@ class Schema(BaseObject, Generic[T]):
1069
1187
  This is called automatically by the `model_post_init` method. If the
1070
1188
  ID map is already populated, this method will return immediately.
1071
1189
  """
1072
- if len(self.id_map):
1190
+ if self._id_map:
1073
1191
  logger.debug("Ignoring call to create_id_map() - ID map was already populated")
1074
1192
  return self
1075
1193
  visitor: SchemaIdVisitor = SchemaIdVisitor()
@@ -1113,7 +1231,7 @@ class Schema(BaseObject, Generic[T]):
1113
1231
  """
1114
1232
  if id not in self:
1115
1233
  raise KeyError(f"Object with ID '{id}' not found in schema")
1116
- return self.id_map[id]
1234
+ return self._id_map[id]
1117
1235
 
1118
1236
  def __contains__(self, id: str) -> bool:
1119
1237
  """Check if an object with the given ID is in the schema.
@@ -1123,7 +1241,7 @@ class Schema(BaseObject, Generic[T]):
1123
1241
  id
1124
1242
  The ID of the object to check.
1125
1243
  """
1126
- return id in self.id_map
1244
+ return id in self._id_map
1127
1245
 
1128
1246
  def find_object_by_id(self, id: str, obj_type: type[T]) -> T:
1129
1247
  """Find an object with the given type by its ID.
@@ -1239,3 +1357,33 @@ class Schema(BaseObject, Generic[T]):
1239
1357
  logger.debug("Loading schema from: '%s'", source)
1240
1358
  yaml_data = yaml.safe_load(source)
1241
1359
  return Schema.model_validate(yaml_data, context=context)
1360
+
1361
+ def dump_yaml(self, stream: IO[str] = sys.stdout) -> None:
1362
+ """Pretty print the schema as YAML.
1363
+
1364
+ Parameters
1365
+ ----------
1366
+ stream
1367
+ The stream to write the YAML data to.
1368
+ """
1369
+ yaml.safe_dump(
1370
+ self.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True),
1371
+ stream,
1372
+ default_flow_style=False,
1373
+ sort_keys=False,
1374
+ )
1375
+
1376
+ def dump_json(self, stream: IO[str] = sys.stdout) -> None:
1377
+ """Pretty print the schema as JSON.
1378
+
1379
+ Parameters
1380
+ ----------
1381
+ stream
1382
+ The stream to write the JSON data to.
1383
+ """
1384
+ json.dump(
1385
+ self.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True),
1386
+ stream,
1387
+ indent=4,
1388
+ sort_keys=False,
1389
+ )
@@ -32,7 +32,7 @@ from sqlalchemy.engine.mock import create_mock_engine
32
32
 
33
33
  from .sqltypes import MYSQL, POSTGRES, SQLITE
34
34
 
35
- __all__ = ["get_supported_dialects", "get_dialect_module"]
35
+ __all__ = ["get_dialect_module", "get_supported_dialects"]
36
36
 
37
37
  _DIALECT_NAMES = (MYSQL, POSTGRES, SQLITE)
38
38
  """List of supported dialect names.
@@ -38,7 +38,7 @@ from sqlalchemy.types import TypeEngine
38
38
 
39
39
  from .dialects import get_dialect_module
40
40
 
41
- __all__ = ["string_to_typeengine", "SQLWriter", "ConnectionWrapper", "DatabaseContext"]
41
+ __all__ = ["ConnectionWrapper", "DatabaseContext", "SQLWriter", "string_to_typeengine"]
42
42
 
43
43
  logger = logging.getLogger("felis")
44
44
 
@@ -35,7 +35,7 @@ from sqlalchemy import Engine, MetaData
35
35
  from .datamodel import Schema
36
36
  from .metadata import MetaDataBuilder
37
37
 
38
- __all__ = ["SchemaDiff", "DatabaseDiff"]
38
+ __all__ = ["DatabaseDiff", "SchemaDiff"]
39
39
 
40
40
  logger = logging.getLogger(__name__)
41
41
 
@@ -27,7 +27,7 @@ import re
27
27
  from typing import Any
28
28
 
29
29
  from lsst.resources import ResourcePath
30
- from sqlalchemy import MetaData, Table, text
30
+ from sqlalchemy import MetaData, Table, select, text
31
31
  from sqlalchemy.engine import Connection, Engine
32
32
  from sqlalchemy.engine.mock import MockConnection
33
33
  from sqlalchemy.exc import SQLAlchemyError
@@ -35,13 +35,13 @@ from sqlalchemy.schema import CreateSchema
35
35
  from sqlalchemy.sql.dml import Insert
36
36
 
37
37
  from felis import datamodel
38
- from felis.datamodel import Schema
38
+ from felis.datamodel import Constraint, Schema
39
39
  from felis.db.utils import is_valid_engine
40
40
  from felis.metadata import MetaDataBuilder
41
41
 
42
42
  from .types import FelisType
43
43
 
44
- __all__ = ["TableManager", "DataLoader"]
44
+ __all__ = ["DataLoader", "TableManager"]
45
45
 
46
46
  logger = logging.getLogger(__name__)
47
47
 
@@ -163,7 +163,7 @@ class TableManager:
163
163
  tables to be accessed by their standard TAP_SCHEMA names.
164
164
  """
165
165
  if table_name not in self._table_map:
166
- raise KeyError(f"Table '{table_name}' not found in table map")
166
+ raise KeyError(f"Table '{table_name}' not found in TAP_SCHEMA")
167
167
  return self.metadata.tables[self._table_map[table_name]]
168
168
 
169
169
  @property
@@ -365,6 +365,34 @@ class TableManager:
365
365
  self._create_schema(engine)
366
366
  self.metadata.create_all(engine)
367
367
 
368
+ def select(self, engine: Engine, table_name: str, filter_condition: str = "") -> list[dict[str, Any]]:
369
+ """Select all rows from a TAP_SCHEMA table with an optional filter
370
+ condition.
371
+
372
+ Parameters
373
+ ----------
374
+ engine
375
+ The SQLAlchemy engine to use to connect to the database.
376
+ table_name
377
+ The name of the table to select from.
378
+ filter_condition
379
+ The filter condition as a string. If empty, no filter will be
380
+ applied.
381
+
382
+ Returns
383
+ -------
384
+ list
385
+ A list of dictionaries containing the rows from the table.
386
+ """
387
+ table = self[table_name]
388
+ query = select(table)
389
+ if filter_condition:
390
+ query = query.where(text(filter_condition))
391
+ with engine.connect() as connection:
392
+ result = connection.execute(query)
393
+ rows = [dict(row._mapping) for row in result]
394
+ return rows
395
+
368
396
 
369
397
  class DataLoader:
370
398
  """Load data into the TAP_SCHEMA tables.
@@ -386,6 +414,9 @@ class DataLoader:
386
414
  If True, print the SQL statements that will be executed.
387
415
  dry_run
388
416
  If True, the data will not be loaded into the database.
417
+ unique_keys
418
+ If True, prepend the schema name to the key name to make it unique
419
+ when loading data into the keys and key_columns tables.
389
420
  """
390
421
 
391
422
  def __init__(
@@ -397,6 +428,7 @@ class DataLoader:
397
428
  output_path: str | None = None,
398
429
  print_sql: bool = False,
399
430
  dry_run: bool = False,
431
+ unique_keys: bool = False,
400
432
  ):
401
433
  self.schema = schema
402
434
  self.mgr = mgr
@@ -406,6 +438,7 @@ class DataLoader:
406
438
  self.output_path = output_path
407
439
  self.print_sql = print_sql
408
440
  self.dry_run = dry_run
441
+ self.unique_keys = unique_keys
409
442
 
410
443
  def load(self) -> None:
411
444
  """Load the schema data into the TAP_SCHEMA tables.
@@ -501,6 +534,32 @@ class DataLoader:
501
534
  }
502
535
  self._insert("columns", column_record)
503
536
 
537
+ def _get_key(self, constraint: Constraint) -> str:
538
+ """Get the key name for a constraint.
539
+
540
+ Parameters
541
+ ----------
542
+ constraint
543
+ The constraint to get the key name for.
544
+
545
+ Returns
546
+ -------
547
+ str
548
+ The key name for the constraint.
549
+
550
+ Notes
551
+ -----
552
+ This will prepend the name of the schema to the key name if the
553
+ `unique_keys` attribute is set to True. Otherwise, it will just return
554
+ the name of the constraint.
555
+ """
556
+ if self.unique_keys:
557
+ key_id = f"{self.schema.name}_{constraint.name}"
558
+ logger.debug("Generated unique key_id: %s -> %s", constraint.name, key_id)
559
+ else:
560
+ key_id = constraint.name
561
+ return key_id
562
+
504
563
  def _insert_keys(self) -> None:
505
564
  """Insert the foreign keys into the keys and key_columns tables."""
506
565
  for table in self.schema.tables:
@@ -511,8 +570,9 @@ class DataLoader:
511
570
  constraint.referenced_columns[0], datamodel.Column
512
571
  )
513
572
  referenced_table = self.schema.get_table_by_column(referenced_column)
573
+ key_id = self._get_key(constraint)
514
574
  key_record = {
515
- "key_id": constraint.name,
575
+ "key_id": key_id,
516
576
  "from_table": self._get_table_name(table),
517
577
  "target_table": self._get_table_name(referenced_table),
518
578
  "description": constraint.description,
@@ -526,7 +586,7 @@ class DataLoader:
526
586
  constraint.referenced_columns[0], datamodel.Column
527
587
  )
528
588
  key_columns_record = {
529
- "key_id": constraint.name,
589
+ "key_id": key_id,
530
590
  "from_column": from_column.name,
531
591
  "target_column": target_column.name,
532
592
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: lsst-felis
3
- Version: 29.0.0rc1
3
+ Version: 29.2025.1100
4
4
  Summary: A vocabulary for describing catalogs and acting on those descriptions
5
5
  Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
6
6
  License: GNU General Public License v3 or later (GPLv3+)
@@ -11,7 +11,6 @@ python/felis/metadata.py
11
11
  python/felis/py.typed
12
12
  python/felis/tap_schema.py
13
13
  python/felis/types.py
14
- python/felis/version.py
15
14
  python/felis/db/__init__.py
16
15
  python/felis/db/dialects.py
17
16
  python/felis/db/schema.py
@@ -227,6 +227,37 @@ class CliTestCase(unittest.TestCase):
227
227
  print(result.output)
228
228
  self.assertNotEqual(result.exit_code, 0)
229
229
 
230
+ def test_dump_yaml(self) -> None:
231
+ """Test for ``dump`` command with YAML output."""
232
+ runner = CliRunner()
233
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") as temp_file:
234
+ temp_file_name = temp_file.name
235
+ try:
236
+ result = runner.invoke(cli, ["dump", TEST_YAML, temp_file_name], catch_exceptions=False)
237
+ print(result.output)
238
+ self.assertEqual(result.exit_code, 0)
239
+ finally:
240
+ os.remove(temp_file_name)
241
+
242
+ def test_dump_json(self) -> None:
243
+ """Test for ``dump`` command with JSON output."""
244
+ runner = CliRunner()
245
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
246
+ temp_file_name = temp_file.name
247
+ try:
248
+ result = runner.invoke(cli, ["dump", TEST_YAML, temp_file_name], catch_exceptions=False)
249
+ print(result.output)
250
+ self.assertEqual(result.exit_code, 0)
251
+ finally:
252
+ os.remove(temp_file_name)
253
+
254
+ def test_dump_invalid_file_extension(self) -> None:
255
+ """Test for ``dump`` command with JSON output."""
256
+ runner = CliRunner()
257
+ result = runner.invoke(cli, ["dump", TEST_YAML, "out.bad"], catch_exceptions=False)
258
+ print(result.output)
259
+ self.assertNotEqual(result.exit_code, 0)
260
+
230
261
 
231
262
  if __name__ == "__main__":
232
263
  unittest.main()
@@ -19,8 +19,10 @@
19
19
  # You should have received a copy of the GNU General Public License
20
20
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
21
21
 
22
+ import difflib
22
23
  import os
23
24
  import pathlib
25
+ import tempfile
24
26
  import unittest
25
27
  from collections import defaultdict
26
28
 
@@ -45,6 +47,7 @@ from felis.datamodel import (
45
47
  TEST_DIR = os.path.abspath(os.path.dirname(__file__))
46
48
  TEST_YAML = os.path.join(TEST_DIR, "data", "test.yml")
47
49
  TEST_SALES = os.path.join(TEST_DIR, "data", "sales.yaml")
50
+ TEST_SERIALIZATION = os.path.join(TEST_DIR, "data", "test_serialization.yaml")
48
51
  TEST_ID_GENERATION = os.path.join(TEST_DIR, "data", "test_id_generation.yaml")
49
52
 
50
53
 
@@ -880,5 +883,42 @@ class RedundantDatatypesTest(unittest.TestCase):
880
883
  Column(**{"name": "testColumn", "@id": "#test_col_id", "datatype": "double", "precision": 6})
881
884
 
882
885
 
886
+ class SchemaSerializationTest(unittest.TestCase):
887
+ """Test serialization and deserialization of the schema data model."""
888
+
889
+ def test_serialization(self) -> None:
890
+ """Test serialization of the schema data model."""
891
+ # Read the original YAML content from the test_serialization.yaml file
892
+ with open(TEST_SERIALIZATION) as file:
893
+ original_yaml_content = file.read()
894
+
895
+ # Load the schema from the original YAML content
896
+ schema_out = Schema.from_uri(TEST_SERIALIZATION)
897
+ serialized_data = schema_out.model_dump(by_alias=True, exclude_none=True, exclude_defaults=True)
898
+
899
+ # Write the serialized data to a temporary YAML file
900
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w+") as temp_file:
901
+ yaml.dump(serialized_data, temp_file, default_flow_style=False, sort_keys=False)
902
+ temp_file.seek(0)
903
+ # Read the deserialized YAML content from the temporary file
904
+ deserialized_yaml_content = temp_file.read()
905
+
906
+ # Show the differences between the original and deserialized YAML
907
+ diff = difflib.unified_diff(
908
+ original_yaml_content.splitlines(keepends=True),
909
+ deserialized_yaml_content.splitlines(keepends=True),
910
+ fromfile="original.yaml",
911
+ tofile="deserialized.yaml",
912
+ )
913
+ print("Differences:\n", "".join(diff))
914
+
915
+ # Assert that the original and deserialized YAML are the same
916
+ self.assertEqual(
917
+ yaml.safe_load(original_yaml_content),
918
+ yaml.safe_load(deserialized_yaml_content),
919
+ "The original and deserialized YAML contents should be the same",
920
+ )
921
+
922
+
883
923
  if __name__ == "__main__":
884
924
  unittest.main()
@@ -25,7 +25,7 @@ import tempfile
25
25
  import unittest
26
26
  from typing import Any
27
27
 
28
- from sqlalchemy import Engine, MetaData, create_engine, select
28
+ from sqlalchemy import MetaData, create_engine, select
29
29
 
30
30
  from felis.datamodel import Schema
31
31
  from felis.tap_schema import DataLoader, TableManager
@@ -111,6 +111,37 @@ class DataLoaderTestCase(unittest.TestCase):
111
111
  f"Expected 22 'INSERT INTO' statements, found {insert_count}",
112
112
  )
113
113
 
114
+ def test_unique_keys(self) -> None:
115
+ """Test generation of unique foreign keys."""
116
+ engine = create_engine("sqlite:///:memory:")
117
+
118
+ mgr = TableManager(apply_schema_to_metadata=False)
119
+ mgr.initialize_database(engine)
120
+
121
+ loader = DataLoader(self.schema, mgr, engine, unique_keys=True)
122
+ loader.load()
123
+
124
+ keys_data = mgr.select(engine, "keys")
125
+ self.assertGreaterEqual(len(keys_data), 1)
126
+ for row in keys_data:
127
+ self.assertTrue(row["key_id"].startswith(f"{self.schema.name}_"))
128
+
129
+ key_columns_data = mgr.select(engine, "key_columns")
130
+ self.assertGreaterEqual(len(key_columns_data), 1)
131
+ for row in key_columns_data:
132
+ self.assertTrue(row["key_id"].startswith(f"{self.schema.name}_"))
133
+
134
+ def test_select_with_filter(self) -> None:
135
+ """Test selecting rows with a filter."""
136
+ engine = create_engine("sqlite:///:memory:")
137
+ mgr = TableManager(apply_schema_to_metadata=False)
138
+ mgr.initialize_database(engine)
139
+ loader = DataLoader(self.schema, mgr, engine, unique_keys=True)
140
+ loader.load()
141
+
142
+ rows = mgr.select(engine, "columns", "table_name = 'test_schema.table1'")
143
+ self.assertEqual(len(rows), 16)
144
+
114
145
 
115
146
  def _find_row(rows: list[dict[str, Any]], column_name: str, value: str) -> dict[str, Any]:
116
147
  next_row = next(
@@ -122,19 +153,6 @@ def _find_row(rows: list[dict[str, Any]], column_name: str, value: str) -> dict[
122
153
  return next_row
123
154
 
124
155
 
125
- def _fetch_results(_engine: Engine, _metadata: MetaData) -> dict:
126
- results: dict[str, Any] = {}
127
- with _engine.connect() as connection:
128
- for table_name in TableManager.get_table_names_std():
129
- tap_table = _metadata.tables[table_name]
130
- primary_key_columns = tap_table.primary_key.columns
131
- stmt = select(tap_table).order_by(*primary_key_columns)
132
- result = connection.execute(stmt)
133
- column_data = [row._asdict() for row in result]
134
- results[table_name] = column_data
135
- return results
136
-
137
-
138
156
  class TapSchemaDataTest(unittest.TestCase):
139
157
  """Test the validity of generated TAP SCHEMA data."""
140
158
 
@@ -1,2 +0,0 @@
1
- __all__ = ["__version__"]
2
- __version__ = "29.0.0rc1"
@@ -32,20 +32,20 @@ from sqlalchemy.dialects import mysql, postgresql
32
32
  from sqlalchemy.ext.compiler import compiles
33
33
 
34
34
  __all__ = [
35
+ "binary",
35
36
  "boolean",
36
37
  "byte",
37
- "short",
38
+ "char",
39
+ "double",
40
+ "float",
41
+ "get_type_func",
38
42
  "int",
39
43
  "long",
40
- "float",
41
- "double",
42
- "char",
44
+ "short",
43
45
  "string",
44
- "unicode",
45
46
  "text",
46
- "binary",
47
47
  "timestamp",
48
- "get_type_func",
48
+ "unicode",
49
49
  ]
50
50
 
51
51
  MYSQL = "mysql"
@@ -26,20 +26,20 @@ from __future__ import annotations
26
26
  from typing import Any
27
27
 
28
28
  __all__ = [
29
- "FelisType",
29
+ "Binary",
30
30
  "Boolean",
31
31
  "Byte",
32
- "Short",
32
+ "Char",
33
+ "Double",
34
+ "FelisType",
35
+ "Float",
33
36
  "Int",
34
37
  "Long",
35
- "Float",
36
- "Double",
37
- "Char",
38
+ "Short",
38
39
  "String",
39
- "Unicode",
40
40
  "Text",
41
- "Binary",
42
41
  "Timestamp",
42
+ "Unicode",
43
43
  ]
44
44
 
45
45