lsst-felis 28.2024.4500__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. felis/__init__.py +9 -1
  2. felis/cli.py +308 -209
  3. felis/config/tap_schema/columns.csv +33 -0
  4. felis/config/tap_schema/key_columns.csv +8 -0
  5. felis/config/tap_schema/keys.csv +8 -0
  6. felis/config/tap_schema/schemas.csv +2 -0
  7. felis/config/tap_schema/tables.csv +6 -0
  8. felis/config/tap_schema/tap_schema_extensions.yaml +73 -0
  9. felis/datamodel.py +599 -59
  10. felis/db/{dialects.py → _dialects.py} +69 -4
  11. felis/db/{variants.py → _variants.py} +1 -1
  12. felis/db/database_context.py +917 -0
  13. felis/diff.py +234 -0
  14. felis/metadata.py +89 -19
  15. felis/tap_schema.py +271 -166
  16. felis/tests/postgresql.py +1 -1
  17. felis/tests/run_cli.py +79 -0
  18. felis/types.py +7 -7
  19. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/METADATA +20 -16
  20. lsst_felis-30.0.0rc3.dist-info/RECORD +31 -0
  21. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/WHEEL +1 -1
  22. felis/db/utils.py +0 -409
  23. felis/tap.py +0 -597
  24. felis/tests/utils.py +0 -122
  25. felis/version.py +0 -2
  26. lsst_felis-28.2024.4500.dist-info/RECORD +0 -26
  27. felis/{schemas → config/tap_schema}/tap_schema_std.yaml +0 -0
  28. felis/db/{sqltypes.py → _sqltypes.py} +7 -7
  29. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/entry_points.txt +0 -0
  30. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/COPYRIGHT +0 -0
  31. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/LICENSE +0 -0
  32. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/top_level.txt +0 -0
  33. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/zip-safe +0 -0
felis/diff.py ADDED
@@ -0,0 +1,234 @@
1
+ """Compare schemas and print the differences."""
2
+
3
+ # This file is part of felis.
4
+ #
5
+ # Developed for the LSST Data Management System.
6
+ # This product includes software developed by the LSST Project
7
+ # (https://www.lsst.org).
8
+ # See the COPYRIGHT file at the top-level directory of this distribution
9
+ # for details of code ownership.
10
+ #
11
+ # This program is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # This program is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
23
+
24
+ import logging
25
+ import pprint
26
+ import re
27
+ from collections.abc import Callable
28
+ from typing import Any
29
+
30
+ from alembic.autogenerate import compare_metadata
31
+ from alembic.migration import MigrationContext
32
+ from deepdiff.diff import DeepDiff
33
+ from sqlalchemy import Engine, MetaData
34
+
35
+ from .datamodel import Schema
36
+ from .metadata import MetaDataBuilder
37
+
38
+ __all__ = ["DatabaseDiff", "SchemaDiff"]
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Change alembic log level to avoid unnecessary output
43
+ logging.getLogger("alembic").setLevel(logging.WARNING)
44
+
45
+
46
+ class SchemaDiff:
47
+ """
48
+ Compare two schemas using DeepDiff and print the differences.
49
+
50
+ Parameters
51
+ ----------
52
+ schema1
53
+ The first schema to compare.
54
+ schema2
55
+ The second schema to compare.
56
+ """
57
+
58
+ def __init__(self, schema1: Schema, schema2: Schema):
59
+ self.dict1 = schema1.model_dump(exclude_none=True)
60
+ self.dict2 = schema2.model_dump(exclude_none=True)
61
+ self.diff = DeepDiff(self.dict1, self.dict2, ignore_order=True)
62
+
63
+ def print(self) -> None:
64
+ """
65
+ Print the differences between the two schemas.
66
+ """
67
+ pprint.pprint(self.diff)
68
+
69
+ @property
70
+ def has_changes(self) -> bool:
71
+ """
72
+ Check if there are any differences between the two schemas.
73
+
74
+ Returns
75
+ -------
76
+ bool
77
+ True if there are differences, False otherwise.
78
+ """
79
+ return len(self.diff) > 0
80
+
81
+
82
+ class FormattedSchemaDiff(SchemaDiff):
83
+ """
84
+ Compare two schemas using DeepDiff and print the differences using a
85
+ customized output format.
86
+
87
+ Parameters
88
+ ----------
89
+ schema1
90
+ The first schema to compare.
91
+ schema2
92
+ The second schema to compare.
93
+ """
94
+
95
+ def __init__(self, schema1: Schema, schema2: Schema):
96
+ super().__init__(schema1, schema2)
97
+
98
+ def print(self) -> None:
99
+ """
100
+ Print the differences between the two schemas using a custom format.
101
+ """
102
+ handlers: dict[str, Callable[[dict[str, Any]], None]] = {
103
+ "values_changed": self._handle_values_changed,
104
+ "iterable_item_added": self._handle_iterable_item_added,
105
+ "iterable_item_removed": self._handle_iterable_item_removed,
106
+ "dictionary_item_added": self._handle_dictionary_item_added,
107
+ "dictionary_item_removed": self._handle_dictionary_item_removed,
108
+ }
109
+
110
+ for change_type, handler in handlers.items():
111
+ if change_type in self.diff:
112
+ handler(self.diff[change_type])
113
+
114
+ def _print_header(self, id_dict: dict[str, Any], keys: list[int | str]) -> None:
115
+ # id = self._get_id(id_dict, keys)
116
+ # Don't display ID here for now; it is always just the schema ID.
117
+ print(f"{self._get_key_display(keys)}")
118
+ # print(f"{id} @ {self._get_key_display(keys)}")
119
+
120
+ def _handle_values_changed(self, changes: dict[str, Any]) -> None:
121
+ for key in changes:
122
+ keys = self._parse_deepdiff_path(key)
123
+ value1 = changes[key]["old_value"]
124
+ value2 = changes[key]["new_value"]
125
+ self._print_header(self.dict1, keys)
126
+ print(f"- {value1}")
127
+ print(f"+ {value2}")
128
+
129
+ def _handle_iterable_item_added(self, changes: dict[str, Any]) -> None:
130
+ for key in changes:
131
+ keys = self._parse_deepdiff_path(key)
132
+ value = changes[key]
133
+ self._print_header(self.dict2, keys)
134
+ print(f"+ {value}")
135
+
136
+ def _handle_iterable_item_removed(self, changes: dict[str, Any]) -> None:
137
+ for key in changes:
138
+ keys = self._parse_deepdiff_path(key)
139
+ value = changes[key]
140
+ self._print_header(self.dict1, keys)
141
+ print(f"- {value}")
142
+
143
+ def _handle_dictionary_item_added(self, changes: dict[str, Any]) -> None:
144
+ for key in changes:
145
+ keys = self._parse_deepdiff_path(key)
146
+ value = keys[-1]
147
+ keys.pop()
148
+ self._print_header(self.dict2, keys)
149
+ print(f"+ {value}")
150
+
151
+ def _handle_dictionary_item_removed(self, changes: dict[str, Any]) -> None:
152
+ for key in changes:
153
+ keys = self._parse_deepdiff_path(key)
154
+ value = keys[-1]
155
+ keys.pop()
156
+ self._print_header(self.dict1, keys)
157
+ print(f"- {value}")
158
+
159
+ @staticmethod
160
+ def _get_id(values: dict, keys: list[str | int]) -> str:
161
+ # Unused for now, pending updates to diff tool in DM-49446.
162
+ value: list | dict = values
163
+ last_id = None
164
+
165
+ for key in keys:
166
+ logger.debug(f"Processing key <{key}> with type {type(key)}")
167
+ logger.debug(f"Type of value: {type(value)}")
168
+ if isinstance(value, dict) and "id" in value:
169
+ last_id = value["id"]
170
+ elif isinstance(value, list) and isinstance(key, int):
171
+ if 0 <= key < len(value):
172
+ value = value[key]
173
+ else:
174
+ raise ValueError(f"Index '{key}' is out of range for list of length {len(value)}")
175
+ value = value[key]
176
+
177
+ if isinstance(value, dict) and "id" in value:
178
+ last_id = value["id"]
179
+
180
+ if last_id is not None:
181
+ return last_id
182
+ else:
183
+ raise ValueError("No 'id' found in the specified path")
184
+
185
+ @staticmethod
186
+ def _get_key_display(keys: list[str | int]) -> str:
187
+ return ".".join(str(k) for k in keys)
188
+
189
+ @staticmethod
190
+ def _parse_deepdiff_path(path: str) -> list[str | int]:
191
+ if path.startswith("root"):
192
+ path = path[4:]
193
+
194
+ pattern = re.compile(r"\['([^']+)'\]|\[(\d+)\]")
195
+ matches = pattern.findall(path)
196
+
197
+ keys = []
198
+ for match in matches:
199
+ if match[0]: # String key
200
+ keys.append(match[0])
201
+ elif match[1]: # Integer index
202
+ keys.append(int(match[1]))
203
+
204
+ return keys
205
+
206
+
207
+ class DatabaseDiff(SchemaDiff):
208
+ """
209
+ Compare a schema with a database and print the differences.
210
+
211
+ Parameters
212
+ ----------
213
+ schema
214
+ The schema to compare.
215
+ engine
216
+ The database engine to compare with.
217
+ """
218
+
219
+ def __init__(self, schema: Schema, engine: Engine):
220
+ db_metadata = MetaData()
221
+ with engine.connect() as connection:
222
+ db_metadata.reflect(bind=connection)
223
+ mc = MigrationContext.configure(
224
+ connection, opts={"compare_type": True, "target_metadata": db_metadata}
225
+ )
226
+ schema_metadata = MetaDataBuilder(schema, apply_schema_to_metadata=False).build()
227
+ self.diff = compare_metadata(mc, schema_metadata)
228
+
229
+ def print(self) -> None:
230
+ """
231
+ Print the differences between the schema and the database.
232
+ """
233
+ if self.has_changes:
234
+ pprint.pprint(self.diff)
felis/metadata.py CHANGED
@@ -24,7 +24,7 @@
24
24
  from __future__ import annotations
25
25
 
26
26
  import logging
27
- from typing import Any, Literal
27
+ from typing import IO, Any, Literal
28
28
 
29
29
  from lsst.utils.iteration import ensure_iterable
30
30
  from sqlalchemy import (
@@ -43,11 +43,11 @@ from sqlalchemy import (
43
43
  from sqlalchemy.dialects import mysql, postgresql
44
44
  from sqlalchemy.types import TypeEngine
45
45
 
46
- from felis.datamodel import Schema
47
- from felis.db.variants import make_variant_dict
48
-
49
46
  from . import datamodel
50
- from .db import sqltypes
47
+ from .datamodel import Schema
48
+ from .db import _sqltypes as sqltypes
49
+ from .db._variants import make_variant_dict
50
+ from .db.database_context import is_sqlite_url
51
51
  from .types import FelisType
52
52
 
53
53
  __all__ = ("MetaDataBuilder", "get_datatype_with_variants")
@@ -125,29 +125,31 @@ class MetaDataBuilder:
125
125
  The schema object from which to build the SQLAlchemy metadata.
126
126
  apply_schema_to_metadata
127
127
  Whether to apply the schema name to the metadata object.
128
- apply_schema_to_tables
129
- Whether to apply the schema name to the tables.
130
128
  ignore_constraints
131
129
  Whether to ignore constraints when building the metadata.
130
+ table_name_postfix
131
+ A string to append to the table names when building the metadata.
132
+ skip_indexes
133
+ Skip indexes when building the metadata.
132
134
  """
133
135
 
134
136
  def __init__(
135
137
  self,
136
138
  schema: Schema,
137
139
  apply_schema_to_metadata: bool = True,
138
- apply_schema_to_tables: bool = True,
139
140
  ignore_constraints: bool = False,
141
+ table_name_postfix: str = "",
142
+ skip_indexes: bool = False,
140
143
  ) -> None:
141
144
  """Initialize the metadata builder."""
142
145
  self.schema = schema
143
146
  if not apply_schema_to_metadata:
144
147
  logger.debug("Schema name will not be applied to metadata")
145
- if not apply_schema_to_tables:
146
- logger.debug("Schema name will not be applied to tables")
147
148
  self.metadata = MetaData(schema=schema.name if apply_schema_to_metadata else None)
148
149
  self._objects: dict[str, Any] = {}
149
- self.apply_schema_to_tables = apply_schema_to_tables
150
150
  self.ignore_constraints = ignore_constraints
151
+ self.table_name_postfix = table_name_postfix
152
+ self.skip_indexes = skip_indexes
151
153
 
152
154
  def build(self) -> MetaData:
153
155
  """Build the SQLAlchemy tables and constraints from the schema.
@@ -164,6 +166,10 @@ class MetaDataBuilder:
164
166
  The SQLAlchemy metadata object.
165
167
  """
166
168
  self.build_tables()
169
+ if not self.skip_indexes:
170
+ self.build_indexes()
171
+ else:
172
+ logger.warning("Ignoring indexes")
167
173
  if not self.ignore_constraints:
168
174
  self.build_constraints()
169
175
  else:
@@ -231,20 +237,13 @@ class MetaDataBuilder:
231
237
  description = table_obj.description
232
238
  columns = [self.build_column(column) for column in table_obj.columns]
233
239
  table = Table(
234
- name,
240
+ name + self.table_name_postfix,
235
241
  self.metadata,
236
242
  *columns,
237
243
  comment=description,
238
- schema=self.schema.name if self.apply_schema_to_tables else None,
239
244
  **optargs, # type: ignore[arg-type]
240
245
  )
241
246
 
242
- # Create the indexes and add them to the table.
243
- indexes = [self.build_index(index) for index in table_obj.indexes]
244
- for index in indexes:
245
- index._set_parent(table)
246
- table.indexes.add(index)
247
-
248
247
  self._objects[id] = table
249
248
 
250
249
  def build_column(self, column_obj: datamodel.Column) -> Column:
@@ -341,12 +340,17 @@ class MetaDataBuilder:
341
340
  "deferrable": constraint_obj.deferrable or None,
342
341
  "initially": constraint_obj.initially or None,
343
342
  }
343
+
344
344
  constraint: Constraint
345
345
 
346
346
  if isinstance(constraint_obj, datamodel.ForeignKeyConstraint):
347
347
  fk_obj: datamodel.ForeignKeyConstraint = constraint_obj
348
348
  columns = [self._objects[column_id] for column_id in fk_obj.columns]
349
349
  refcolumns = [self._objects[column_id] for column_id in fk_obj.referenced_columns]
350
+ if constraint_obj.on_delete is not None:
351
+ args["ondelete"] = constraint_obj.on_delete
352
+ if constraint_obj.on_update is not None:
353
+ args["onupdate"] = constraint_obj.on_update
350
354
  constraint = ForeignKeyConstraint(columns, refcolumns, **args)
351
355
  elif isinstance(constraint_obj, datamodel.CheckConstraint):
352
356
  check_obj: datamodel.CheckConstraint = constraint_obj
@@ -381,3 +385,69 @@ class MetaDataBuilder:
381
385
  index = Index(index_obj.name, *columns, *expressions)
382
386
  self._objects[index_obj.id] = index
383
387
  return index
388
+
389
+ def build_indexes(self) -> None:
390
+ """Build the SQLAlchemy indexes from the Felis schema and add them to
391
+ the associated table in the metadata.
392
+ """
393
+ for table in self.schema.tables:
394
+ md_table = self._objects.get(table.id, None)
395
+ if md_table is None:
396
+ raise KeyError(f"Table with ID '{table.id}' not found in objects map")
397
+ if not isinstance(md_table, Table):
398
+ raise TypeError(f"Expected Table object, got {type(md_table)}")
399
+ indexes = [self.build_index(index) for index in table.indexes]
400
+ for index in indexes:
401
+ index._set_parent(md_table)
402
+ md_table.indexes.add(index)
403
+
404
+
405
+ def create_metadata(
406
+ felis_file: IO[str],
407
+ schema_name: str | None = None,
408
+ id_generation: bool = True,
409
+ ignore_constraints: bool = False,
410
+ skip_indexes: bool = False,
411
+ engine_url: str | None = None,
412
+ ) -> MetaData:
413
+ """Create SQLAlchemy metadata from a Felis schema file.
414
+
415
+ Parameters
416
+ ----------
417
+ felis_file
418
+ The Felis schema file to read.
419
+ schema_name
420
+ Optional schema name to override the one in the file.
421
+ id_generation
422
+ Whether to generate IDs for all objects in the schema that do not have
423
+ them.
424
+ ignore_constraints
425
+ Whether to ignore constraints when building metadata.
426
+ skip_indexes
427
+ Whether to skip creating indexes when building metadata.
428
+ engine_url
429
+ Engine URL to determine if SQLite-specific handling is needed.
430
+
431
+ Returns
432
+ -------
433
+ MetaData
434
+ The SQLAlchemy metadata object with proper schema handling.
435
+ """
436
+ schema = Schema.from_stream(felis_file, context={"id_generation": id_generation})
437
+ if schema_name:
438
+ logger.info(f"Overriding schema name with: {schema_name}")
439
+ schema.name = schema_name
440
+
441
+ # Determine if we need SQLite-specific handling
442
+ apply_schema = True
443
+ if engine_url:
444
+ if is_sqlite_url(engine_url):
445
+ apply_schema = False
446
+ logger.debug("SQLite detected: schema name will not be applied to metadata")
447
+
448
+ return MetaDataBuilder(
449
+ schema,
450
+ ignore_constraints=ignore_constraints,
451
+ skip_indexes=skip_indexes,
452
+ apply_schema_to_metadata=apply_schema,
453
+ ).build()