lsst-felis 26.2024.900__py3-none-any.whl → 29.2025.4500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. felis/__init__.py +10 -24
  2. felis/cli.py +437 -341
  3. felis/config/tap_schema/columns.csv +33 -0
  4. felis/config/tap_schema/key_columns.csv +8 -0
  5. felis/config/tap_schema/keys.csv +8 -0
  6. felis/config/tap_schema/schemas.csv +2 -0
  7. felis/config/tap_schema/tables.csv +6 -0
  8. felis/config/tap_schema/tap_schema_std.yaml +273 -0
  9. felis/datamodel.py +1386 -193
  10. felis/db/dialects.py +116 -0
  11. felis/db/schema.py +62 -0
  12. felis/db/sqltypes.py +275 -48
  13. felis/db/utils.py +409 -0
  14. felis/db/variants.py +159 -0
  15. felis/diff.py +234 -0
  16. felis/metadata.py +385 -0
  17. felis/tap_schema.py +767 -0
  18. felis/tests/__init__.py +0 -0
  19. felis/tests/postgresql.py +134 -0
  20. felis/tests/run_cli.py +79 -0
  21. felis/types.py +57 -9
  22. lsst_felis-29.2025.4500.dist-info/METADATA +38 -0
  23. lsst_felis-29.2025.4500.dist-info/RECORD +31 -0
  24. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/WHEEL +1 -1
  25. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/COPYRIGHT +1 -1
  26. felis/check.py +0 -381
  27. felis/simple.py +0 -424
  28. felis/sql.py +0 -275
  29. felis/tap.py +0 -433
  30. felis/utils.py +0 -100
  31. felis/validation.py +0 -103
  32. felis/version.py +0 -2
  33. felis/visitor.py +0 -180
  34. lsst_felis-26.2024.900.dist-info/METADATA +0 -28
  35. lsst_felis-26.2024.900.dist-info/RECORD +0 -23
  36. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/entry_points.txt +0 -0
  37. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/LICENSE +0 -0
  38. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/top_level.txt +0 -0
  39. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/zip-safe +0 -0
felis/cli.py CHANGED
@@ -1,3 +1,5 @@
1
+ """Click command line interface."""
2
+
1
3
  # This file is part of felis.
2
4
  #
3
5
  # Developed for the LSST Data Management System.
@@ -21,313 +23,384 @@
21
23
 
22
24
  from __future__ import annotations
23
25
 
24
- import io
25
- import json
26
26
  import logging
27
- import sys
28
- from collections.abc import Iterable, Mapping, MutableMapping
29
- from typing import Any
27
+ from collections.abc import Iterable
28
+ from typing import IO
30
29
 
31
30
  import click
32
- import yaml
33
31
  from pydantic import ValidationError
34
- from pyld import jsonld
35
- from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url
36
- from sqlalchemy.engine.mock import MockConnection
32
+ from sqlalchemy.engine import Engine, create_engine, make_url
33
+ from sqlalchemy.engine.mock import MockConnection, create_mock_engine
37
34
 
38
- from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__
39
- from .check import CheckingVisitor
35
+ from . import __version__
40
36
  from .datamodel import Schema
41
- from .sql import SQLVisitor
42
- from .tap import Tap11Base, TapLoadingVisitor, init_tables
43
- from .utils import ReorderingVisitor
44
- from .validation import get_schema
37
+ from .db.schema import create_database
38
+ from .db.utils import DatabaseContext, is_mock_url
39
+ from .diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff
40
+ from .metadata import MetaDataBuilder
41
+ from .tap_schema import DataLoader, MetadataInserter, TableManager
42
+
43
+ __all__ = ["cli"]
45
44
 
46
45
  logger = logging.getLogger("felis")
47
46
 
47
+ loglevel_choices = ["CRITICAL", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG"]
48
+
48
49
 
49
50
  @click.group()
50
51
  @click.version_option(__version__)
51
- def cli() -> None:
52
- """Felis Command Line Tools."""
53
- logging.basicConfig(level=logging.INFO)
52
+ @click.option(
53
+ "--log-level",
54
+ type=click.Choice(loglevel_choices),
55
+ envvar="FELIS_LOGLEVEL",
56
+ help="Felis log level",
57
+ default=logging.getLevelName(logging.INFO),
58
+ )
59
+ @click.option(
60
+ "--log-file",
61
+ type=click.Path(),
62
+ envvar="FELIS_LOGFILE",
63
+ help="Felis log file path",
64
+ )
65
+ @click.option(
66
+ "--id-generation/--no-id-generation",
67
+ is_flag=True,
68
+ help="Generate IDs for all objects that do not have them",
69
+ default=True,
70
+ )
71
+ @click.pass_context
72
+ def cli(ctx: click.Context, log_level: str, log_file: str | None, id_generation: bool) -> None:
73
+ """Felis command line tools"""
74
+ ctx.ensure_object(dict)
75
+ ctx.obj["id_generation"] = id_generation
76
+ if ctx.obj["id_generation"]:
77
+ logger.info("ID generation is enabled")
78
+ else:
79
+ logger.info("ID generation is disabled")
80
+ if log_file:
81
+ logging.basicConfig(filename=log_file, level=log_level)
82
+ else:
83
+ logging.basicConfig(level=log_level)
54
84
 
55
85
 
56
- @cli.command("create-all")
57
- @click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL")
58
- @click.option("--schema-name", help="Alternate Schema Name for Felis File")
59
- @click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed")
86
+ @cli.command("create", help="Create database objects from the Felis file")
87
+ @click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL", default="sqlite://")
88
+ @click.option("--schema-name", help="Alternate schema name to override Felis file")
89
+ @click.option(
90
+ "--initialize",
91
+ is_flag=True,
92
+ help="Create the schema in the database if it does not exist (error if already exists)",
93
+ )
94
+ @click.option(
95
+ "--drop", is_flag=True, help="Drop schema if it already exists in the database (implies --initialize)"
96
+ )
97
+ @click.option("--echo", is_flag=True, help="Echo database commands as they are executed")
98
+ @click.option("--dry-run", is_flag=True, help="Dry run only to print out commands instead of executing")
99
+ @click.option(
100
+ "--output-file", "-o", type=click.File(mode="w"), help="Write SQL commands to a file instead of executing"
101
+ )
102
+ @click.option("--ignore-constraints", is_flag=True, help="Ignore constraints when creating tables")
60
103
  @click.argument("file", type=click.File())
61
- def create_all(engine_url: str, schema_name: str, dry_run: bool, file: io.TextIOBase) -> None:
62
- """Create schema objects from the Felis FILE."""
63
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
64
- visitor = SQLVisitor(schema_name=schema_name)
65
- schema = visitor.visit_schema(schema_obj)
66
-
67
- metadata = schema.metadata
68
-
69
- engine: Engine | MockConnection
70
- if not dry_run:
71
- engine = create_engine(engine_url)
72
- else:
73
- _insert_dump = InsertDump()
74
- engine = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump)
75
- _insert_dump.dialect = engine.dialect
76
- metadata.create_all(engine)
77
-
78
-
79
- @cli.command("init-tap")
80
- @click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA")
81
- @click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas")
82
- @click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables")
83
- @click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns")
84
- @click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys")
85
- @click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns")
86
- @click.argument("engine-url")
87
- def init_tap(
104
+ @click.pass_context
105
+ def create(
106
+ ctx: click.Context,
88
107
  engine_url: str,
89
- tap_schema_name: str,
90
- tap_schemas_table: str,
91
- tap_tables_table: str,
92
- tap_columns_table: str,
93
- tap_keys_table: str,
94
- tap_key_columns_table: str,
108
+ schema_name: str | None,
109
+ initialize: bool,
110
+ drop: bool,
111
+ echo: bool,
112
+ dry_run: bool,
113
+ output_file: IO[str] | None,
114
+ ignore_constraints: bool,
115
+ file: IO[str],
95
116
  ) -> None:
96
- """Initialize TAP 1.1 TAP_SCHEMA objects.
97
-
98
- Please verify the schema/catalog you are executing this in in your
99
- engine URL.
117
+ """Create database objects from the Felis file.
118
+
119
+ Parameters
120
+ ----------
121
+ engine_url
122
+ SQLAlchemy Engine URL.
123
+ schema_name
124
+ Alternate schema name to override Felis file.
125
+ initialize
126
+ Create the schema in the database if it does not exist.
127
+ drop
128
+ Drop schema if it already exists in the database.
129
+ echo
130
+ Echo database commands as they are executed.
131
+ dry_run
132
+ Dry run only to print out commands instead of executing.
133
+ output_file
134
+ Write SQL commands to a file instead of executing.
135
+ ignore_constraints
136
+ Ignore constraints when creating tables.
137
+ file
138
+ Felis file to read.
100
139
  """
101
- engine = create_engine(engine_url, echo=True)
102
- init_tables(
103
- tap_schema_name,
104
- tap_schemas_table,
105
- tap_tables_table,
106
- tap_columns_table,
107
- tap_keys_table,
108
- tap_key_columns_table,
109
- )
110
- Tap11Base.metadata.create_all(engine)
111
-
112
-
113
- @cli.command("load-tap")
114
- @click.option("--engine-url", envvar="ENGINE_URL", help="SQLAlchemy Engine URL to catalog")
115
- @click.option("--schema-name", help="Alternate Schema Name for Felis file")
116
- @click.option("--catalog-name", help="Catalog Name for Schema")
117
- @click.option("--dry-run", is_flag=True, help="Dry Run Only. Prints out the DDL that would be executed")
118
- @click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA")
119
- @click.option("--tap-tables-postfix", help="Postfix for TAP table names")
120
- @click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas")
121
- @click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables")
122
- @click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns")
123
- @click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys")
124
- @click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns")
140
+ try:
141
+ schema = Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]})
142
+ url = make_url(engine_url)
143
+ if schema_name:
144
+ logger.info(f"Overriding schema name with: {schema_name}")
145
+ schema.name = schema_name
146
+ elif url.drivername == "sqlite":
147
+ logger.info("Overriding schema name for sqlite with: main")
148
+ schema.name = "main"
149
+ if not url.host and not url.drivername == "sqlite":
150
+ dry_run = True
151
+ logger.info("Forcing dry run for non-sqlite engine URL with no host")
152
+
153
+ metadata = MetaDataBuilder(schema, ignore_constraints=ignore_constraints).build()
154
+ logger.debug(f"Created metadata with schema name: {metadata.schema}")
155
+
156
+ engine: Engine | MockConnection
157
+ if not dry_run and not output_file:
158
+ engine = create_engine(url, echo=echo)
159
+ else:
160
+ if dry_run:
161
+ logger.info("Dry run will be executed")
162
+ engine = DatabaseContext.create_mock_engine(url, output_file)
163
+ if output_file:
164
+ logger.info("Writing SQL output to: " + output_file.name)
165
+
166
+ context = DatabaseContext(metadata, engine)
167
+
168
+ if drop and initialize:
169
+ raise ValueError("Cannot drop and initialize schema at the same time")
170
+
171
+ if drop:
172
+ logger.debug("Dropping schema if it exists")
173
+ context.drop()
174
+ initialize = True # If schema is dropped, it needs to be recreated.
175
+
176
+ if initialize:
177
+ logger.debug("Creating schema if not exists")
178
+ context.initialize()
179
+
180
+ context.create_all()
181
+ except Exception as e:
182
+ logger.exception(e)
183
+ raise click.ClickException(str(e))
184
+
185
+
186
+ @cli.command("load-tap-schema", help="Load metadata from a Felis file into a TAP_SCHEMA database")
187
+ @click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
188
+ @click.option(
189
+ "--tap-schema-name", "-n", help="Name of the TAP_SCHEMA schema in the database (default: TAP_SCHEMA)"
190
+ )
191
+ @click.option(
192
+ "--tap-tables-postfix",
193
+ "-p",
194
+ help="Postfix which is applied to standard TAP_SCHEMA table names",
195
+ default="",
196
+ )
197
+ @click.option("--tap-schema-index", "-i", type=int, help="TAP_SCHEMA index of the schema in this environment")
198
+ @click.option("--dry-run", "-D", is_flag=True, help="Execute dry run only. Does not insert any data.")
199
+ @click.option("--echo", "-e", is_flag=True, help="Print out the generated insert statements to stdout")
200
+ @click.option("--output-file", "-o", type=click.Path(), help="Write SQL commands to a file")
201
+ @click.option(
202
+ "--force-unbounded-arraysize",
203
+ is_flag=True,
204
+ help="Use unbounded arraysize by default for all variable length string columns"
205
+ ", e.g., ``votable:arraysize: *`` (workaround for astropy bug #18099)",
206
+ ) # DM-50899: Variable-length bounded strings are not handled correctly in astropy
207
+ @click.option(
208
+ "--unique-keys",
209
+ "-u",
210
+ is_flag=True,
211
+ help="Generate unique key_id values for keys and key_columns tables by prepending the schema name",
212
+ default=False,
213
+ )
125
214
  @click.argument("file", type=click.File())
126
- def load_tap(
215
+ @click.pass_context
216
+ def load_tap_schema(
217
+ ctx: click.Context,
127
218
  engine_url: str,
128
- schema_name: str,
129
- catalog_name: str,
130
- dry_run: bool,
131
219
  tap_schema_name: str,
132
220
  tap_tables_postfix: str,
133
- tap_schemas_table: str,
134
- tap_tables_table: str,
135
- tap_columns_table: str,
136
- tap_keys_table: str,
137
- tap_key_columns_table: str,
138
- file: io.TextIOBase,
221
+ tap_schema_index: int,
222
+ dry_run: bool,
223
+ echo: bool,
224
+ output_file: str | None,
225
+ force_unbounded_arraysize: bool,
226
+ unique_keys: bool,
227
+ file: IO[str],
139
228
  ) -> None:
140
- """Load TAP metadata from a Felis FILE.
141
-
142
- This command loads the associated TAP metadata from a Felis FILE
143
- to the TAP_SCHEMA tables.
229
+ """Load TAP metadata from a Felis file.
230
+
231
+ Parameters
232
+ ----------
233
+ engine_url
234
+ SQLAlchemy Engine URL.
235
+ tap_tables_postfix
236
+ Postfix which is applied to standard TAP_SCHEMA table names.
237
+ tap_schema_index
238
+ TAP_SCHEMA index of the schema in this environment.
239
+ dry_run
240
+ Execute dry run only. Does not insert any data.
241
+ echo
242
+ Print out the generated insert statements to stdout.
243
+ output_file
244
+ Output file for writing generated SQL.
245
+ file
246
+ Felis file to read.
247
+
248
+ Notes
249
+ -----
250
+ The TAP_SCHEMA database must already exist or the command will fail. This
251
+ command will not initialize the TAP_SCHEMA tables.
144
252
  """
145
- top_level_object = yaml.load(file, Loader=yaml.SafeLoader)
146
- schema_obj: dict
147
- if isinstance(top_level_object, dict):
148
- schema_obj = top_level_object
149
- if "@graph" not in schema_obj:
150
- schema_obj["@type"] = "felis:Schema"
151
- schema_obj["@context"] = DEFAULT_CONTEXT
152
- elif isinstance(top_level_object, list):
153
- schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object}
253
+ url = make_url(engine_url)
254
+ engine: Engine | MockConnection
255
+ if dry_run or is_mock_url(url):
256
+ engine = create_mock_engine(url, executor=None)
154
257
  else:
155
- logger.error("Schema object not of recognizable type")
156
- raise click.exceptions.Exit(1)
157
-
158
- normalized = _normalize(schema_obj, embed="@always")
159
- if len(normalized["@graph"]) > 1 and (schema_name or catalog_name):
160
- logger.error("--schema-name and --catalog-name incompatible with multiple schemas")
161
- raise click.exceptions.Exit(1)
162
-
163
- # Force normalized["@graph"] to a list, which is what happens when there's
164
- # multiple schemas
165
- if isinstance(normalized["@graph"], dict):
166
- normalized["@graph"] = [normalized["@graph"]]
167
-
168
- tap_tables = init_tables(
169
- tap_schema_name,
170
- tap_tables_postfix,
171
- tap_schemas_table,
172
- tap_tables_table,
173
- tap_columns_table,
174
- tap_keys_table,
175
- tap_key_columns_table,
176
- )
177
-
178
- if not dry_run:
179
258
  engine = create_engine(engine_url)
259
+ mgr = TableManager(
260
+ engine=engine,
261
+ apply_schema_to_metadata=False if engine.dialect.name == "sqlite" else True,
262
+ schema_name=tap_schema_name,
263
+ table_name_postfix=tap_tables_postfix,
264
+ )
180
265
 
181
- if engine_url == "sqlite://" and not dry_run:
182
- # In Memory SQLite - Mostly used to test
183
- Tap11Base.metadata.create_all(engine)
184
-
185
- for schema in normalized["@graph"]:
186
- tap_visitor = TapLoadingVisitor(
187
- engine,
188
- catalog_name=catalog_name,
189
- schema_name=schema_name,
190
- tap_tables=tap_tables,
191
- )
192
- tap_visitor.visit_schema(schema)
193
- else:
194
- _insert_dump = InsertDump()
195
- conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat")
196
- # After the engine is created, update the executor with the dialect
197
- _insert_dump.dialect = conn.dialect
198
-
199
- for schema in normalized["@graph"]:
200
- tap_visitor = TapLoadingVisitor.from_mock_connection(
201
- conn,
202
- catalog_name=catalog_name,
203
- schema_name=schema_name,
204
- tap_tables=tap_tables,
205
- )
206
- tap_visitor.visit_schema(schema)
207
-
208
-
209
- @cli.command("modify-tap")
210
- @click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index", default=0)
211
- @click.argument("files", nargs=-1, type=click.File())
212
- def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None:
213
- """Modify TAP information in Felis schema FILES.
214
-
215
- This command has some utilities to aid in rewriting felis FILES
216
- in specific ways. It will write out a merged version of these files.
217
- """
218
- count = 0
219
- graph = []
220
- for file in files:
221
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
222
- if "@graph" not in schema_obj:
223
- schema_obj["@type"] = "felis:Schema"
224
- schema_obj["@context"] = DEFAULT_CONTEXT
225
- schema_index = schema_obj.get("tap:schema_index")
226
- if not schema_index or (schema_index and schema_index > start_schema_at):
227
- schema_index = start_schema_at + count
228
- count += 1
229
- schema_obj["tap:schema_index"] = schema_index
230
- graph.extend(jsonld.flatten(schema_obj))
231
- merged = {"@context": DEFAULT_CONTEXT, "@graph": graph}
232
- normalized = _normalize(merged, embed="@always")
233
- _dump(normalized)
234
-
235
-
236
- @cli.command("basic-check")
237
- @click.argument("file", type=click.File())
238
- def basic_check(file: io.TextIOBase) -> None:
239
- """Perform a basic check on a felis FILE.
240
-
241
- This performs a very check to ensure required fields are
242
- populated and basic semantics are okay. It does not ensure semantics
243
- are valid for other commands like create-all or load-tap.
244
- """
245
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
246
- schema_obj["@type"] = "felis:Schema"
247
- # Force Context and Schema Type
248
- schema_obj["@context"] = DEFAULT_CONTEXT
249
- check_visitor = CheckingVisitor()
250
- check_visitor.visit_schema(schema_obj)
251
-
252
-
253
- @cli.command("normalize")
254
- @click.argument("file", type=click.File())
255
- def normalize(file: io.TextIOBase) -> None:
256
- """Normalize a Felis FILE.
257
-
258
- Takes a felis schema FILE, expands it (resolving the full URLs),
259
- then compacts it, and finally produces output in the canonical
260
- format.
261
-
262
- (This is most useful in some debugging scenarios)
263
-
264
- See Also :
266
+ schema = Schema.from_stream(
267
+ file,
268
+ context={
269
+ "id_generation": ctx.obj["id_generation"],
270
+ "force_unbounded_arraysize": force_unbounded_arraysize,
271
+ },
272
+ )
265
273
 
266
- https://json-ld.org/spec/latest/json-ld/#expanded-document-form
267
- https://json-ld.org/spec/latest/json-ld/#compacted-document-form
274
+ DataLoader(
275
+ schema,
276
+ mgr,
277
+ engine,
278
+ tap_schema_index=tap_schema_index,
279
+ dry_run=dry_run,
280
+ print_sql=echo,
281
+ output_path=output_file,
282
+ unique_keys=unique_keys,
283
+ ).load()
284
+
285
+
286
+ @cli.command("init-tap-schema", help="Initialize a standard TAP_SCHEMA database")
287
+ @click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL", required=True)
288
+ @click.option("--tap-schema-name", help="Name of the TAP_SCHEMA schema in the database")
289
+ @click.option(
290
+ "--tap-tables-postfix", help="Postfix which is applied to standard TAP_SCHEMA table names", default=""
291
+ )
292
+ @click.option(
293
+ "--insert-metadata/--no-insert-metadata",
294
+ is_flag=True,
295
+ help="Insert metadata describing TAP_SCHEMA itself",
296
+ default=True,
297
+ )
298
+ @click.pass_context
299
+ def init_tap_schema(
300
+ ctx: click.Context, engine_url: str, tap_schema_name: str, tap_tables_postfix: str, insert_metadata: bool
301
+ ) -> None:
302
+ """Initialize a standard TAP_SCHEMA database.
303
+
304
+ Parameters
305
+ ----------
306
+ engine_url
307
+ SQLAlchemy Engine URL.
308
+ tap_schema_name
309
+ Name of the TAP_SCHEMA schema in the database.
310
+ tap_tables_postfix
311
+ Postfix which is applied to standard TAP_SCHEMA table names.
312
+ insert_metadata
313
+ Insert metadata describing TAP_SCHEMA itself.
314
+ If set to False, only the TAP_SCHEMA tables will be created, but no
315
+ metadata will be inserted.
268
316
  """
269
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
270
- schema_obj["@type"] = "felis:Schema"
271
- # Force Context and Schema Type
272
- schema_obj["@context"] = DEFAULT_CONTEXT
273
- expanded = jsonld.expand(schema_obj)
274
- normalized = _normalize(expanded, embed="@always")
275
- _dump(normalized)
276
-
317
+ url = make_url(engine_url)
318
+ engine: Engine | MockConnection
319
+ if is_mock_url(url):
320
+ raise click.ClickException("Mock engine URL is not supported for this command")
321
+ engine = create_engine(engine_url)
322
+ mgr = TableManager(
323
+ apply_schema_to_metadata=False if engine.dialect.name == "sqlite" else True,
324
+ schema_name=tap_schema_name,
325
+ table_name_postfix=tap_tables_postfix,
326
+ )
327
+ mgr.initialize_database(engine)
328
+ if insert_metadata:
329
+ inserter = MetadataInserter(mgr, engine)
330
+ inserter.insert_metadata()
277
331
 
278
- @cli.command("merge")
279
- @click.argument("files", nargs=-1, type=click.File())
280
- def merge(files: Iterable[io.TextIOBase]) -> None:
281
- """Merge a set of Felis FILES.
282
332
 
283
- This will expand out the felis FILES so that it is easy to
284
- override values (using @Id), then normalize to a single
285
- output.
286
- """
287
- graph = []
288
- for file in files:
289
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
290
- if "@graph" not in schema_obj:
291
- schema_obj["@type"] = "felis:Schema"
292
- schema_obj["@context"] = DEFAULT_CONTEXT
293
- graph.extend(jsonld.flatten(schema_obj))
294
- updated_map: MutableMapping[str, Any] = {}
295
- for item in graph:
296
- _id = item["@id"]
297
- item_to_update = updated_map.get(_id, item)
298
- if item_to_update and item_to_update != item:
299
- logger.debug(f"Overwriting {_id}")
300
- item_to_update.update(item)
301
- updated_map[_id] = item_to_update
302
- merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())}
303
- normalized = _normalize(merged, embed="@always")
304
- _dump(normalized)
305
-
306
-
307
- @cli.command("validate")
333
+ @cli.command("validate", help="Validate one or more Felis YAML files")
334
+ @click.option(
335
+ "--check-description", is_flag=True, help="Check that all objects have a description", default=False
336
+ )
308
337
  @click.option(
309
- "-s",
310
- "--schema-name",
311
- help="Schema name for validation",
312
- type=click.Choice(["RSP", "default"]),
313
- default="default",
338
+ "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatype overrides", default=False
339
+ )
340
+ @click.option(
341
+ "--check-tap-table-indexes",
342
+ is_flag=True,
343
+ help="Check that every table has a unique TAP table index",
344
+ default=False,
345
+ )
346
+ @click.option(
347
+ "--check-tap-principal",
348
+ is_flag=True,
349
+ help="Check that at least one column per table is flagged as TAP principal",
350
+ default=False,
314
351
  )
315
- @click.option("-d", "--require-description", is_flag=True, help="Require description for all objects")
316
352
  @click.argument("files", nargs=-1, type=click.File())
317
- def validate(schema_name: str, require_description: bool, files: Iterable[io.TextIOBase]) -> None:
318
- """Validate one or more felis YAML files."""
319
- schema_class = get_schema(schema_name)
320
- logger.info(f"Using schema '{schema_class.__name__}'")
321
-
322
- if require_description:
323
- Schema.require_description(True)
324
-
353
+ @click.pass_context
354
+ def validate(
355
+ ctx: click.Context,
356
+ check_description: bool,
357
+ check_redundant_datatypes: bool,
358
+ check_tap_table_indexes: bool,
359
+ check_tap_principal: bool,
360
+ files: Iterable[IO[str]],
361
+ ) -> None:
362
+ """Validate one or more felis YAML files.
363
+
364
+ Parameters
365
+ ----------
366
+ check_description
367
+ Check that all objects have a valid description.
368
+ check_redundant_datatypes
369
+ Check for redundant type overrides.
370
+ check_tap_table_indexes
371
+ Check that every table has a unique TAP table index.
372
+ check_tap_principal
373
+ Check that at least one column per table is flagged as TAP principal.
374
+ files
375
+ The Felis YAML files to validate.
376
+
377
+ Raises
378
+ ------
379
+ click.exceptions.Exit
380
+ Raised if any validation errors are found. The ``ValidationError``
381
+ which is thrown when a schema fails to validate will be logged as an
382
+ error message.
383
+
384
+ Notes
385
+ -----
386
+ All of the ``check`` flags are turned off by default and represent
387
+ optional validations controlled by the Pydantic context.
388
+ """
325
389
  rc = 0
326
390
  for file in files:
327
391
  file_name = getattr(file, "name", None)
328
392
  logger.info(f"Validating {file_name}")
329
393
  try:
330
- schema_class.model_validate(yaml.load(file, Loader=yaml.SafeLoader))
394
+ Schema.from_stream(
395
+ file,
396
+ context={
397
+ "check_description": check_description,
398
+ "check_redundant_datatypes": check_redundant_datatypes,
399
+ "check_tap_table_indexes": check_tap_table_indexes,
400
+ "check_tap_principal": check_tap_principal,
401
+ "id_generation": ctx.obj["id_generation"],
402
+ },
403
+ )
331
404
  except ValidationError as e:
332
405
  logger.error(e)
333
406
  rc = 1
@@ -335,80 +408,103 @@ def validate(schema_name: str, require_description: bool, files: Iterable[io.Tex
335
408
  raise click.exceptions.Exit(rc)
336
409
 
337
410
 
338
- @cli.command("dump-json")
339
- @click.option("-x", "--expanded", is_flag=True, help="Extended schema before dumping.")
340
- @click.option("-f", "--framed", is_flag=True, help="Frame schema before dumping.")
341
- @click.option("-c", "--compacted", is_flag=True, help="Compact schema before dumping.")
342
- @click.option("-g", "--graph", is_flag=True, help="Pass graph option to compact.")
343
- @click.argument("file", type=click.File())
344
- def dump_json(
345
- file: io.TextIOBase,
346
- expanded: bool = False,
347
- compacted: bool = False,
348
- framed: bool = False,
349
- graph: bool = False,
411
+ @cli.command(
412
+ "diff",
413
+ help="""
414
+ Compare two schemas or a schema and a database for changes
415
+
416
+ Examples:
417
+
418
+ felis diff schema1.yaml schema2.yaml
419
+
420
+ felis diff -c alembic schema1.yaml schema2.yaml
421
+
422
+ felis diff --engine-url sqlite:///test.db schema.yaml
423
+ """,
424
+ )
425
+ @click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
426
+ @click.option(
427
+ "-c",
428
+ "--comparator",
429
+ type=click.Choice(["alembic", "deepdiff"], case_sensitive=False),
430
+ help="Comparator to use for schema comparison",
431
+ default="deepdiff",
432
+ )
433
+ @click.option("-E", "--error-on-change", is_flag=True, help="Exit with error code if schemas are different")
434
+ @click.argument("files", nargs=-1, type=click.File())
435
+ @click.pass_context
436
+ def diff(
437
+ ctx: click.Context,
438
+ engine_url: str | None,
439
+ comparator: str,
440
+ error_on_change: bool,
441
+ files: Iterable[IO[str]],
442
+ ) -> None:
443
+ schemas = [
444
+ Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]}) for file in files
445
+ ]
446
+
447
+ diff: SchemaDiff
448
+ if len(schemas) == 2 and engine_url is None:
449
+ if comparator == "alembic":
450
+ db_context = create_database(schemas[0])
451
+ assert isinstance(db_context.engine, Engine)
452
+ diff = DatabaseDiff(schemas[1], db_context.engine)
453
+ else:
454
+ diff = FormattedSchemaDiff(schemas[0], schemas[1])
455
+ elif len(schemas) == 1 and engine_url is not None:
456
+ engine = create_engine(engine_url)
457
+ diff = DatabaseDiff(schemas[0], engine)
458
+ else:
459
+ raise click.ClickException(
460
+ "Invalid arguments - provide two schemas or a schema and a database engine URL"
461
+ )
462
+
463
+ diff.print()
464
+
465
+ if diff.has_changes and error_on_change:
466
+ raise click.ClickException("Schema was changed")
467
+
468
+
469
+ @cli.command(
470
+ "dump",
471
+ help="""
472
+ Dump a schema file to YAML or JSON format
473
+
474
+ Example:
475
+
476
+ felis dump schema.yaml schema.json
477
+
478
+ felis dump schema.yaml schema_dump.yaml
479
+ """,
480
+ )
481
+ @click.option(
482
+ "--strip-ids/--no-strip-ids",
483
+ is_flag=True,
484
+ help="Strip IDs from the output schema",
485
+ default=False,
486
+ )
487
+ @click.argument("files", nargs=2, type=click.Path())
488
+ @click.pass_context
489
+ def dump(
490
+ ctx: click.Context,
491
+ strip_ids: bool,
492
+ files: list[str],
350
493
  ) -> None:
351
- """Dump JSON representation using various JSON-LD options."""
352
- schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
353
- schema_obj["@type"] = "felis:Schema"
354
- # Force Context and Schema Type
355
- schema_obj["@context"] = DEFAULT_CONTEXT
356
-
357
- if expanded:
358
- schema_obj = jsonld.expand(schema_obj)
359
- if framed:
360
- schema_obj = jsonld.frame(schema_obj, DEFAULT_FRAME)
361
- if compacted:
362
- options = {}
363
- if graph:
364
- options["graph"] = True
365
- schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options)
366
- json.dump(schema_obj, sys.stdout, indent=4)
367
-
368
-
369
- def _dump(obj: Mapping[str, Any]) -> None:
370
- class OrderedDumper(yaml.Dumper):
371
- pass
372
-
373
- def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any:
374
- return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())
375
-
376
- OrderedDumper.add_representer(dict, _dict_representer)
377
- print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False))
378
-
379
-
380
- def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]:
381
- framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed))
382
- compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True))
383
- graph = compacted["@graph"]
384
- graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph]
385
- compacted["@graph"] = graph if len(graph) > 1 else graph[0]
386
- return compacted
387
-
388
-
389
- class InsertDump:
390
- """An Insert Dumper for SQL statements."""
391
-
392
- dialect: Any = None
393
-
394
- def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None:
395
- compiled = sql.compile(dialect=self.dialect)
396
- sql_str = str(compiled) + ";"
397
- params_list = [compiled.params]
398
- for params in params_list:
399
- if not params:
400
- print(sql_str)
401
- continue
402
- new_params = {}
403
- for key, value in params.items():
404
- if isinstance(value, str):
405
- new_params[key] = f"'{value}'"
406
- elif value is None:
407
- new_params[key] = "null"
408
- else:
409
- new_params[key] = value
410
-
411
- print(sql_str % new_params)
494
+ if strip_ids:
495
+ logger.info("Stripping IDs from the output schema")
496
+ if files[1].endswith(".json"):
497
+ format = "json"
498
+ elif files[1].endswith(".yaml"):
499
+ format = "yaml"
500
+ else:
501
+ raise click.ClickException("Output file must have a .json or .yaml extension")
502
+ schema = Schema.from_uri(files[0], context={"id_generation": ctx.obj["id_generation"]})
503
+ with open(files[1], "w") as f:
504
+ if format == "yaml":
505
+ schema.dump_yaml(f, strip_ids=strip_ids)
506
+ elif format == "json":
507
+ schema.dump_json(f, strip_ids=strip_ids)
412
508
 
413
509
 
414
510
  if __name__ == "__main__":