lsst-felis 26.2024.900__py3-none-any.whl → 29.2025.4500__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- felis/__init__.py +10 -24
- felis/cli.py +437 -341
- felis/config/tap_schema/columns.csv +33 -0
- felis/config/tap_schema/key_columns.csv +8 -0
- felis/config/tap_schema/keys.csv +8 -0
- felis/config/tap_schema/schemas.csv +2 -0
- felis/config/tap_schema/tables.csv +6 -0
- felis/config/tap_schema/tap_schema_std.yaml +273 -0
- felis/datamodel.py +1386 -193
- felis/db/dialects.py +116 -0
- felis/db/schema.py +62 -0
- felis/db/sqltypes.py +275 -48
- felis/db/utils.py +409 -0
- felis/db/variants.py +159 -0
- felis/diff.py +234 -0
- felis/metadata.py +385 -0
- felis/tap_schema.py +767 -0
- felis/tests/__init__.py +0 -0
- felis/tests/postgresql.py +134 -0
- felis/tests/run_cli.py +79 -0
- felis/types.py +57 -9
- lsst_felis-29.2025.4500.dist-info/METADATA +38 -0
- lsst_felis-29.2025.4500.dist-info/RECORD +31 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/WHEEL +1 -1
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/COPYRIGHT +1 -1
- felis/check.py +0 -381
- felis/simple.py +0 -424
- felis/sql.py +0 -275
- felis/tap.py +0 -433
- felis/utils.py +0 -100
- felis/validation.py +0 -103
- felis/version.py +0 -2
- felis/visitor.py +0 -180
- lsst_felis-26.2024.900.dist-info/METADATA +0 -28
- lsst_felis-26.2024.900.dist-info/RECORD +0 -23
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/entry_points.txt +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/LICENSE +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/top_level.txt +0 -0
- {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/zip-safe +0 -0
felis/cli.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Click command line interface."""
|
|
2
|
+
|
|
1
3
|
# This file is part of felis.
|
|
2
4
|
#
|
|
3
5
|
# Developed for the LSST Data Management System.
|
|
@@ -21,313 +23,384 @@
|
|
|
21
23
|
|
|
22
24
|
from __future__ import annotations
|
|
23
25
|
|
|
24
|
-
import io
|
|
25
|
-
import json
|
|
26
26
|
import logging
|
|
27
|
-
import
|
|
28
|
-
from
|
|
29
|
-
from typing import Any
|
|
27
|
+
from collections.abc import Iterable
|
|
28
|
+
from typing import IO
|
|
30
29
|
|
|
31
30
|
import click
|
|
32
|
-
import yaml
|
|
33
31
|
from pydantic import ValidationError
|
|
34
|
-
from
|
|
35
|
-
from sqlalchemy.engine import
|
|
36
|
-
from sqlalchemy.engine.mock import MockConnection
|
|
32
|
+
from sqlalchemy.engine import Engine, create_engine, make_url
|
|
33
|
+
from sqlalchemy.engine.mock import MockConnection, create_mock_engine
|
|
37
34
|
|
|
38
|
-
from . import
|
|
39
|
-
from .check import CheckingVisitor
|
|
35
|
+
from . import __version__
|
|
40
36
|
from .datamodel import Schema
|
|
41
|
-
from .
|
|
42
|
-
from .
|
|
43
|
-
from .
|
|
44
|
-
from .
|
|
37
|
+
from .db.schema import create_database
|
|
38
|
+
from .db.utils import DatabaseContext, is_mock_url
|
|
39
|
+
from .diff import DatabaseDiff, FormattedSchemaDiff, SchemaDiff
|
|
40
|
+
from .metadata import MetaDataBuilder
|
|
41
|
+
from .tap_schema import DataLoader, MetadataInserter, TableManager
|
|
42
|
+
|
|
43
|
+
__all__ = ["cli"]
|
|
45
44
|
|
|
46
45
|
logger = logging.getLogger("felis")
|
|
47
46
|
|
|
47
|
+
loglevel_choices = ["CRITICAL", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
48
|
+
|
|
48
49
|
|
|
49
50
|
@click.group()
|
|
50
51
|
@click.version_option(__version__)
|
|
51
|
-
|
|
52
|
-
""
|
|
53
|
-
|
|
52
|
+
@click.option(
|
|
53
|
+
"--log-level",
|
|
54
|
+
type=click.Choice(loglevel_choices),
|
|
55
|
+
envvar="FELIS_LOGLEVEL",
|
|
56
|
+
help="Felis log level",
|
|
57
|
+
default=logging.getLevelName(logging.INFO),
|
|
58
|
+
)
|
|
59
|
+
@click.option(
|
|
60
|
+
"--log-file",
|
|
61
|
+
type=click.Path(),
|
|
62
|
+
envvar="FELIS_LOGFILE",
|
|
63
|
+
help="Felis log file path",
|
|
64
|
+
)
|
|
65
|
+
@click.option(
|
|
66
|
+
"--id-generation/--no-id-generation",
|
|
67
|
+
is_flag=True,
|
|
68
|
+
help="Generate IDs for all objects that do not have them",
|
|
69
|
+
default=True,
|
|
70
|
+
)
|
|
71
|
+
@click.pass_context
|
|
72
|
+
def cli(ctx: click.Context, log_level: str, log_file: str | None, id_generation: bool) -> None:
|
|
73
|
+
"""Felis command line tools"""
|
|
74
|
+
ctx.ensure_object(dict)
|
|
75
|
+
ctx.obj["id_generation"] = id_generation
|
|
76
|
+
if ctx.obj["id_generation"]:
|
|
77
|
+
logger.info("ID generation is enabled")
|
|
78
|
+
else:
|
|
79
|
+
logger.info("ID generation is disabled")
|
|
80
|
+
if log_file:
|
|
81
|
+
logging.basicConfig(filename=log_file, level=log_level)
|
|
82
|
+
else:
|
|
83
|
+
logging.basicConfig(level=log_level)
|
|
54
84
|
|
|
55
85
|
|
|
56
|
-
@cli.command("create
|
|
57
|
-
@click.option("--engine-url", envvar="
|
|
58
|
-
@click.option("--schema-name", help="Alternate
|
|
59
|
-
@click.option(
|
|
86
|
+
@cli.command("create", help="Create database objects from the Felis file")
|
|
87
|
+
@click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL", default="sqlite://")
|
|
88
|
+
@click.option("--schema-name", help="Alternate schema name to override Felis file")
|
|
89
|
+
@click.option(
|
|
90
|
+
"--initialize",
|
|
91
|
+
is_flag=True,
|
|
92
|
+
help="Create the schema in the database if it does not exist (error if already exists)",
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--drop", is_flag=True, help="Drop schema if it already exists in the database (implies --initialize)"
|
|
96
|
+
)
|
|
97
|
+
@click.option("--echo", is_flag=True, help="Echo database commands as they are executed")
|
|
98
|
+
@click.option("--dry-run", is_flag=True, help="Dry run only to print out commands instead of executing")
|
|
99
|
+
@click.option(
|
|
100
|
+
"--output-file", "-o", type=click.File(mode="w"), help="Write SQL commands to a file instead of executing"
|
|
101
|
+
)
|
|
102
|
+
@click.option("--ignore-constraints", is_flag=True, help="Ignore constraints when creating tables")
|
|
60
103
|
@click.argument("file", type=click.File())
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
visitor = SQLVisitor(schema_name=schema_name)
|
|
65
|
-
schema = visitor.visit_schema(schema_obj)
|
|
66
|
-
|
|
67
|
-
metadata = schema.metadata
|
|
68
|
-
|
|
69
|
-
engine: Engine | MockConnection
|
|
70
|
-
if not dry_run:
|
|
71
|
-
engine = create_engine(engine_url)
|
|
72
|
-
else:
|
|
73
|
-
_insert_dump = InsertDump()
|
|
74
|
-
engine = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump)
|
|
75
|
-
_insert_dump.dialect = engine.dialect
|
|
76
|
-
metadata.create_all(engine)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@cli.command("init-tap")
|
|
80
|
-
@click.option("--tap-schema-name", help="Alt Schema Name for TAP_SCHEMA")
|
|
81
|
-
@click.option("--tap-schemas-table", help="Alt Table Name for TAP_SCHEMA.schemas")
|
|
82
|
-
@click.option("--tap-tables-table", help="Alt Table Name for TAP_SCHEMA.tables")
|
|
83
|
-
@click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns")
|
|
84
|
-
@click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys")
|
|
85
|
-
@click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns")
|
|
86
|
-
@click.argument("engine-url")
|
|
87
|
-
def init_tap(
|
|
104
|
+
@click.pass_context
|
|
105
|
+
def create(
|
|
106
|
+
ctx: click.Context,
|
|
88
107
|
engine_url: str,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
108
|
+
schema_name: str | None,
|
|
109
|
+
initialize: bool,
|
|
110
|
+
drop: bool,
|
|
111
|
+
echo: bool,
|
|
112
|
+
dry_run: bool,
|
|
113
|
+
output_file: IO[str] | None,
|
|
114
|
+
ignore_constraints: bool,
|
|
115
|
+
file: IO[str],
|
|
95
116
|
) -> None:
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
117
|
+
"""Create database objects from the Felis file.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
engine_url
|
|
122
|
+
SQLAlchemy Engine URL.
|
|
123
|
+
schema_name
|
|
124
|
+
Alternate schema name to override Felis file.
|
|
125
|
+
initialize
|
|
126
|
+
Create the schema in the database if it does not exist.
|
|
127
|
+
drop
|
|
128
|
+
Drop schema if it already exists in the database.
|
|
129
|
+
echo
|
|
130
|
+
Echo database commands as they are executed.
|
|
131
|
+
dry_run
|
|
132
|
+
Dry run only to print out commands instead of executing.
|
|
133
|
+
output_file
|
|
134
|
+
Write SQL commands to a file instead of executing.
|
|
135
|
+
ignore_constraints
|
|
136
|
+
Ignore constraints when creating tables.
|
|
137
|
+
file
|
|
138
|
+
Felis file to read.
|
|
100
139
|
"""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
140
|
+
try:
|
|
141
|
+
schema = Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]})
|
|
142
|
+
url = make_url(engine_url)
|
|
143
|
+
if schema_name:
|
|
144
|
+
logger.info(f"Overriding schema name with: {schema_name}")
|
|
145
|
+
schema.name = schema_name
|
|
146
|
+
elif url.drivername == "sqlite":
|
|
147
|
+
logger.info("Overriding schema name for sqlite with: main")
|
|
148
|
+
schema.name = "main"
|
|
149
|
+
if not url.host and not url.drivername == "sqlite":
|
|
150
|
+
dry_run = True
|
|
151
|
+
logger.info("Forcing dry run for non-sqlite engine URL with no host")
|
|
152
|
+
|
|
153
|
+
metadata = MetaDataBuilder(schema, ignore_constraints=ignore_constraints).build()
|
|
154
|
+
logger.debug(f"Created metadata with schema name: {metadata.schema}")
|
|
155
|
+
|
|
156
|
+
engine: Engine | MockConnection
|
|
157
|
+
if not dry_run and not output_file:
|
|
158
|
+
engine = create_engine(url, echo=echo)
|
|
159
|
+
else:
|
|
160
|
+
if dry_run:
|
|
161
|
+
logger.info("Dry run will be executed")
|
|
162
|
+
engine = DatabaseContext.create_mock_engine(url, output_file)
|
|
163
|
+
if output_file:
|
|
164
|
+
logger.info("Writing SQL output to: " + output_file.name)
|
|
165
|
+
|
|
166
|
+
context = DatabaseContext(metadata, engine)
|
|
167
|
+
|
|
168
|
+
if drop and initialize:
|
|
169
|
+
raise ValueError("Cannot drop and initialize schema at the same time")
|
|
170
|
+
|
|
171
|
+
if drop:
|
|
172
|
+
logger.debug("Dropping schema if it exists")
|
|
173
|
+
context.drop()
|
|
174
|
+
initialize = True # If schema is dropped, it needs to be recreated.
|
|
175
|
+
|
|
176
|
+
if initialize:
|
|
177
|
+
logger.debug("Creating schema if not exists")
|
|
178
|
+
context.initialize()
|
|
179
|
+
|
|
180
|
+
context.create_all()
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.exception(e)
|
|
183
|
+
raise click.ClickException(str(e))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@cli.command("load-tap-schema", help="Load metadata from a Felis file into a TAP_SCHEMA database")
|
|
187
|
+
@click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
|
|
188
|
+
@click.option(
|
|
189
|
+
"--tap-schema-name", "-n", help="Name of the TAP_SCHEMA schema in the database (default: TAP_SCHEMA)"
|
|
190
|
+
)
|
|
191
|
+
@click.option(
|
|
192
|
+
"--tap-tables-postfix",
|
|
193
|
+
"-p",
|
|
194
|
+
help="Postfix which is applied to standard TAP_SCHEMA table names",
|
|
195
|
+
default="",
|
|
196
|
+
)
|
|
197
|
+
@click.option("--tap-schema-index", "-i", type=int, help="TAP_SCHEMA index of the schema in this environment")
|
|
198
|
+
@click.option("--dry-run", "-D", is_flag=True, help="Execute dry run only. Does not insert any data.")
|
|
199
|
+
@click.option("--echo", "-e", is_flag=True, help="Print out the generated insert statements to stdout")
|
|
200
|
+
@click.option("--output-file", "-o", type=click.Path(), help="Write SQL commands to a file")
|
|
201
|
+
@click.option(
|
|
202
|
+
"--force-unbounded-arraysize",
|
|
203
|
+
is_flag=True,
|
|
204
|
+
help="Use unbounded arraysize by default for all variable length string columns"
|
|
205
|
+
", e.g., ``votable:arraysize: *`` (workaround for astropy bug #18099)",
|
|
206
|
+
) # DM-50899: Variable-length bounded strings are not handled correctly in astropy
|
|
207
|
+
@click.option(
|
|
208
|
+
"--unique-keys",
|
|
209
|
+
"-u",
|
|
210
|
+
is_flag=True,
|
|
211
|
+
help="Generate unique key_id values for keys and key_columns tables by prepending the schema name",
|
|
212
|
+
default=False,
|
|
213
|
+
)
|
|
125
214
|
@click.argument("file", type=click.File())
|
|
126
|
-
|
|
215
|
+
@click.pass_context
|
|
216
|
+
def load_tap_schema(
|
|
217
|
+
ctx: click.Context,
|
|
127
218
|
engine_url: str,
|
|
128
|
-
schema_name: str,
|
|
129
|
-
catalog_name: str,
|
|
130
|
-
dry_run: bool,
|
|
131
219
|
tap_schema_name: str,
|
|
132
220
|
tap_tables_postfix: str,
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
221
|
+
tap_schema_index: int,
|
|
222
|
+
dry_run: bool,
|
|
223
|
+
echo: bool,
|
|
224
|
+
output_file: str | None,
|
|
225
|
+
force_unbounded_arraysize: bool,
|
|
226
|
+
unique_keys: bool,
|
|
227
|
+
file: IO[str],
|
|
139
228
|
) -> None:
|
|
140
|
-
"""Load TAP metadata from a Felis
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
229
|
+
"""Load TAP metadata from a Felis file.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
engine_url
|
|
234
|
+
SQLAlchemy Engine URL.
|
|
235
|
+
tap_tables_postfix
|
|
236
|
+
Postfix which is applied to standard TAP_SCHEMA table names.
|
|
237
|
+
tap_schema_index
|
|
238
|
+
TAP_SCHEMA index of the schema in this environment.
|
|
239
|
+
dry_run
|
|
240
|
+
Execute dry run only. Does not insert any data.
|
|
241
|
+
echo
|
|
242
|
+
Print out the generated insert statements to stdout.
|
|
243
|
+
output_file
|
|
244
|
+
Output file for writing generated SQL.
|
|
245
|
+
file
|
|
246
|
+
Felis file to read.
|
|
247
|
+
|
|
248
|
+
Notes
|
|
249
|
+
-----
|
|
250
|
+
The TAP_SCHEMA database must already exist or the command will fail. This
|
|
251
|
+
command will not initialize the TAP_SCHEMA tables.
|
|
144
252
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if
|
|
148
|
-
|
|
149
|
-
if "@graph" not in schema_obj:
|
|
150
|
-
schema_obj["@type"] = "felis:Schema"
|
|
151
|
-
schema_obj["@context"] = DEFAULT_CONTEXT
|
|
152
|
-
elif isinstance(top_level_object, list):
|
|
153
|
-
schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object}
|
|
253
|
+
url = make_url(engine_url)
|
|
254
|
+
engine: Engine | MockConnection
|
|
255
|
+
if dry_run or is_mock_url(url):
|
|
256
|
+
engine = create_mock_engine(url, executor=None)
|
|
154
257
|
else:
|
|
155
|
-
logger.error("Schema object not of recognizable type")
|
|
156
|
-
raise click.exceptions.Exit(1)
|
|
157
|
-
|
|
158
|
-
normalized = _normalize(schema_obj, embed="@always")
|
|
159
|
-
if len(normalized["@graph"]) > 1 and (schema_name or catalog_name):
|
|
160
|
-
logger.error("--schema-name and --catalog-name incompatible with multiple schemas")
|
|
161
|
-
raise click.exceptions.Exit(1)
|
|
162
|
-
|
|
163
|
-
# Force normalized["@graph"] to a list, which is what happens when there's
|
|
164
|
-
# multiple schemas
|
|
165
|
-
if isinstance(normalized["@graph"], dict):
|
|
166
|
-
normalized["@graph"] = [normalized["@graph"]]
|
|
167
|
-
|
|
168
|
-
tap_tables = init_tables(
|
|
169
|
-
tap_schema_name,
|
|
170
|
-
tap_tables_postfix,
|
|
171
|
-
tap_schemas_table,
|
|
172
|
-
tap_tables_table,
|
|
173
|
-
tap_columns_table,
|
|
174
|
-
tap_keys_table,
|
|
175
|
-
tap_key_columns_table,
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if not dry_run:
|
|
179
258
|
engine = create_engine(engine_url)
|
|
259
|
+
mgr = TableManager(
|
|
260
|
+
engine=engine,
|
|
261
|
+
apply_schema_to_metadata=False if engine.dialect.name == "sqlite" else True,
|
|
262
|
+
schema_name=tap_schema_name,
|
|
263
|
+
table_name_postfix=tap_tables_postfix,
|
|
264
|
+
)
|
|
180
265
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
catalog_name=catalog_name,
|
|
189
|
-
schema_name=schema_name,
|
|
190
|
-
tap_tables=tap_tables,
|
|
191
|
-
)
|
|
192
|
-
tap_visitor.visit_schema(schema)
|
|
193
|
-
else:
|
|
194
|
-
_insert_dump = InsertDump()
|
|
195
|
-
conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat")
|
|
196
|
-
# After the engine is created, update the executor with the dialect
|
|
197
|
-
_insert_dump.dialect = conn.dialect
|
|
198
|
-
|
|
199
|
-
for schema in normalized["@graph"]:
|
|
200
|
-
tap_visitor = TapLoadingVisitor.from_mock_connection(
|
|
201
|
-
conn,
|
|
202
|
-
catalog_name=catalog_name,
|
|
203
|
-
schema_name=schema_name,
|
|
204
|
-
tap_tables=tap_tables,
|
|
205
|
-
)
|
|
206
|
-
tap_visitor.visit_schema(schema)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
@cli.command("modify-tap")
|
|
210
|
-
@click.option("--start-schema-at", type=int, help="Rewrite index for tap:schema_index", default=0)
|
|
211
|
-
@click.argument("files", nargs=-1, type=click.File())
|
|
212
|
-
def modify_tap(start_schema_at: int, files: Iterable[io.TextIOBase]) -> None:
|
|
213
|
-
"""Modify TAP information in Felis schema FILES.
|
|
214
|
-
|
|
215
|
-
This command has some utilities to aid in rewriting felis FILES
|
|
216
|
-
in specific ways. It will write out a merged version of these files.
|
|
217
|
-
"""
|
|
218
|
-
count = 0
|
|
219
|
-
graph = []
|
|
220
|
-
for file in files:
|
|
221
|
-
schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
|
|
222
|
-
if "@graph" not in schema_obj:
|
|
223
|
-
schema_obj["@type"] = "felis:Schema"
|
|
224
|
-
schema_obj["@context"] = DEFAULT_CONTEXT
|
|
225
|
-
schema_index = schema_obj.get("tap:schema_index")
|
|
226
|
-
if not schema_index or (schema_index and schema_index > start_schema_at):
|
|
227
|
-
schema_index = start_schema_at + count
|
|
228
|
-
count += 1
|
|
229
|
-
schema_obj["tap:schema_index"] = schema_index
|
|
230
|
-
graph.extend(jsonld.flatten(schema_obj))
|
|
231
|
-
merged = {"@context": DEFAULT_CONTEXT, "@graph": graph}
|
|
232
|
-
normalized = _normalize(merged, embed="@always")
|
|
233
|
-
_dump(normalized)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
@cli.command("basic-check")
|
|
237
|
-
@click.argument("file", type=click.File())
|
|
238
|
-
def basic_check(file: io.TextIOBase) -> None:
|
|
239
|
-
"""Perform a basic check on a felis FILE.
|
|
240
|
-
|
|
241
|
-
This performs a very check to ensure required fields are
|
|
242
|
-
populated and basic semantics are okay. It does not ensure semantics
|
|
243
|
-
are valid for other commands like create-all or load-tap.
|
|
244
|
-
"""
|
|
245
|
-
schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
|
|
246
|
-
schema_obj["@type"] = "felis:Schema"
|
|
247
|
-
# Force Context and Schema Type
|
|
248
|
-
schema_obj["@context"] = DEFAULT_CONTEXT
|
|
249
|
-
check_visitor = CheckingVisitor()
|
|
250
|
-
check_visitor.visit_schema(schema_obj)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
@cli.command("normalize")
|
|
254
|
-
@click.argument("file", type=click.File())
|
|
255
|
-
def normalize(file: io.TextIOBase) -> None:
|
|
256
|
-
"""Normalize a Felis FILE.
|
|
257
|
-
|
|
258
|
-
Takes a felis schema FILE, expands it (resolving the full URLs),
|
|
259
|
-
then compacts it, and finally produces output in the canonical
|
|
260
|
-
format.
|
|
261
|
-
|
|
262
|
-
(This is most useful in some debugging scenarios)
|
|
263
|
-
|
|
264
|
-
See Also :
|
|
266
|
+
schema = Schema.from_stream(
|
|
267
|
+
file,
|
|
268
|
+
context={
|
|
269
|
+
"id_generation": ctx.obj["id_generation"],
|
|
270
|
+
"force_unbounded_arraysize": force_unbounded_arraysize,
|
|
271
|
+
},
|
|
272
|
+
)
|
|
265
273
|
|
|
266
|
-
|
|
267
|
-
|
|
274
|
+
DataLoader(
|
|
275
|
+
schema,
|
|
276
|
+
mgr,
|
|
277
|
+
engine,
|
|
278
|
+
tap_schema_index=tap_schema_index,
|
|
279
|
+
dry_run=dry_run,
|
|
280
|
+
print_sql=echo,
|
|
281
|
+
output_path=output_file,
|
|
282
|
+
unique_keys=unique_keys,
|
|
283
|
+
).load()
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@cli.command("init-tap-schema", help="Initialize a standard TAP_SCHEMA database")
|
|
287
|
+
@click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL", required=True)
|
|
288
|
+
@click.option("--tap-schema-name", help="Name of the TAP_SCHEMA schema in the database")
|
|
289
|
+
@click.option(
|
|
290
|
+
"--tap-tables-postfix", help="Postfix which is applied to standard TAP_SCHEMA table names", default=""
|
|
291
|
+
)
|
|
292
|
+
@click.option(
|
|
293
|
+
"--insert-metadata/--no-insert-metadata",
|
|
294
|
+
is_flag=True,
|
|
295
|
+
help="Insert metadata describing TAP_SCHEMA itself",
|
|
296
|
+
default=True,
|
|
297
|
+
)
|
|
298
|
+
@click.pass_context
|
|
299
|
+
def init_tap_schema(
|
|
300
|
+
ctx: click.Context, engine_url: str, tap_schema_name: str, tap_tables_postfix: str, insert_metadata: bool
|
|
301
|
+
) -> None:
|
|
302
|
+
"""Initialize a standard TAP_SCHEMA database.
|
|
303
|
+
|
|
304
|
+
Parameters
|
|
305
|
+
----------
|
|
306
|
+
engine_url
|
|
307
|
+
SQLAlchemy Engine URL.
|
|
308
|
+
tap_schema_name
|
|
309
|
+
Name of the TAP_SCHEMA schema in the database.
|
|
310
|
+
tap_tables_postfix
|
|
311
|
+
Postfix which is applied to standard TAP_SCHEMA table names.
|
|
312
|
+
insert_metadata
|
|
313
|
+
Insert metadata describing TAP_SCHEMA itself.
|
|
314
|
+
If set to False, only the TAP_SCHEMA tables will be created, but no
|
|
315
|
+
metadata will be inserted.
|
|
268
316
|
"""
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
317
|
+
url = make_url(engine_url)
|
|
318
|
+
engine: Engine | MockConnection
|
|
319
|
+
if is_mock_url(url):
|
|
320
|
+
raise click.ClickException("Mock engine URL is not supported for this command")
|
|
321
|
+
engine = create_engine(engine_url)
|
|
322
|
+
mgr = TableManager(
|
|
323
|
+
apply_schema_to_metadata=False if engine.dialect.name == "sqlite" else True,
|
|
324
|
+
schema_name=tap_schema_name,
|
|
325
|
+
table_name_postfix=tap_tables_postfix,
|
|
326
|
+
)
|
|
327
|
+
mgr.initialize_database(engine)
|
|
328
|
+
if insert_metadata:
|
|
329
|
+
inserter = MetadataInserter(mgr, engine)
|
|
330
|
+
inserter.insert_metadata()
|
|
277
331
|
|
|
278
|
-
@cli.command("merge")
|
|
279
|
-
@click.argument("files", nargs=-1, type=click.File())
|
|
280
|
-
def merge(files: Iterable[io.TextIOBase]) -> None:
|
|
281
|
-
"""Merge a set of Felis FILES.
|
|
282
332
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
graph = []
|
|
288
|
-
for file in files:
|
|
289
|
-
schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
|
|
290
|
-
if "@graph" not in schema_obj:
|
|
291
|
-
schema_obj["@type"] = "felis:Schema"
|
|
292
|
-
schema_obj["@context"] = DEFAULT_CONTEXT
|
|
293
|
-
graph.extend(jsonld.flatten(schema_obj))
|
|
294
|
-
updated_map: MutableMapping[str, Any] = {}
|
|
295
|
-
for item in graph:
|
|
296
|
-
_id = item["@id"]
|
|
297
|
-
item_to_update = updated_map.get(_id, item)
|
|
298
|
-
if item_to_update and item_to_update != item:
|
|
299
|
-
logger.debug(f"Overwriting {_id}")
|
|
300
|
-
item_to_update.update(item)
|
|
301
|
-
updated_map[_id] = item_to_update
|
|
302
|
-
merged = {"@context": DEFAULT_CONTEXT, "@graph": list(updated_map.values())}
|
|
303
|
-
normalized = _normalize(merged, embed="@always")
|
|
304
|
-
_dump(normalized)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
@cli.command("validate")
|
|
333
|
+
@cli.command("validate", help="Validate one or more Felis YAML files")
|
|
334
|
+
@click.option(
|
|
335
|
+
"--check-description", is_flag=True, help="Check that all objects have a description", default=False
|
|
336
|
+
)
|
|
308
337
|
@click.option(
|
|
309
|
-
"-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
338
|
+
"--check-redundant-datatypes", is_flag=True, help="Check for redundant datatype overrides", default=False
|
|
339
|
+
)
|
|
340
|
+
@click.option(
|
|
341
|
+
"--check-tap-table-indexes",
|
|
342
|
+
is_flag=True,
|
|
343
|
+
help="Check that every table has a unique TAP table index",
|
|
344
|
+
default=False,
|
|
345
|
+
)
|
|
346
|
+
@click.option(
|
|
347
|
+
"--check-tap-principal",
|
|
348
|
+
is_flag=True,
|
|
349
|
+
help="Check that at least one column per table is flagged as TAP principal",
|
|
350
|
+
default=False,
|
|
314
351
|
)
|
|
315
|
-
@click.option("-d", "--require-description", is_flag=True, help="Require description for all objects")
|
|
316
352
|
@click.argument("files", nargs=-1, type=click.File())
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
353
|
+
@click.pass_context
|
|
354
|
+
def validate(
|
|
355
|
+
ctx: click.Context,
|
|
356
|
+
check_description: bool,
|
|
357
|
+
check_redundant_datatypes: bool,
|
|
358
|
+
check_tap_table_indexes: bool,
|
|
359
|
+
check_tap_principal: bool,
|
|
360
|
+
files: Iterable[IO[str]],
|
|
361
|
+
) -> None:
|
|
362
|
+
"""Validate one or more felis YAML files.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
check_description
|
|
367
|
+
Check that all objects have a valid description.
|
|
368
|
+
check_redundant_datatypes
|
|
369
|
+
Check for redundant type overrides.
|
|
370
|
+
check_tap_table_indexes
|
|
371
|
+
Check that every table has a unique TAP table index.
|
|
372
|
+
check_tap_principal
|
|
373
|
+
Check that at least one column per table is flagged as TAP principal.
|
|
374
|
+
files
|
|
375
|
+
The Felis YAML files to validate.
|
|
376
|
+
|
|
377
|
+
Raises
|
|
378
|
+
------
|
|
379
|
+
click.exceptions.Exit
|
|
380
|
+
Raised if any validation errors are found. The ``ValidationError``
|
|
381
|
+
which is thrown when a schema fails to validate will be logged as an
|
|
382
|
+
error message.
|
|
383
|
+
|
|
384
|
+
Notes
|
|
385
|
+
-----
|
|
386
|
+
All of the ``check`` flags are turned off by default and represent
|
|
387
|
+
optional validations controlled by the Pydantic context.
|
|
388
|
+
"""
|
|
325
389
|
rc = 0
|
|
326
390
|
for file in files:
|
|
327
391
|
file_name = getattr(file, "name", None)
|
|
328
392
|
logger.info(f"Validating {file_name}")
|
|
329
393
|
try:
|
|
330
|
-
|
|
394
|
+
Schema.from_stream(
|
|
395
|
+
file,
|
|
396
|
+
context={
|
|
397
|
+
"check_description": check_description,
|
|
398
|
+
"check_redundant_datatypes": check_redundant_datatypes,
|
|
399
|
+
"check_tap_table_indexes": check_tap_table_indexes,
|
|
400
|
+
"check_tap_principal": check_tap_principal,
|
|
401
|
+
"id_generation": ctx.obj["id_generation"],
|
|
402
|
+
},
|
|
403
|
+
)
|
|
331
404
|
except ValidationError as e:
|
|
332
405
|
logger.error(e)
|
|
333
406
|
rc = 1
|
|
@@ -335,80 +408,103 @@ def validate(schema_name: str, require_description: bool, files: Iterable[io.Tex
|
|
|
335
408
|
raise click.exceptions.Exit(rc)
|
|
336
409
|
|
|
337
410
|
|
|
338
|
-
@cli.command(
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
411
|
+
@cli.command(
|
|
412
|
+
"diff",
|
|
413
|
+
help="""
|
|
414
|
+
Compare two schemas or a schema and a database for changes
|
|
415
|
+
|
|
416
|
+
Examples:
|
|
417
|
+
|
|
418
|
+
felis diff schema1.yaml schema2.yaml
|
|
419
|
+
|
|
420
|
+
felis diff -c alembic schema1.yaml schema2.yaml
|
|
421
|
+
|
|
422
|
+
felis diff --engine-url sqlite:///test.db schema.yaml
|
|
423
|
+
""",
|
|
424
|
+
)
|
|
425
|
+
@click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
|
|
426
|
+
@click.option(
|
|
427
|
+
"-c",
|
|
428
|
+
"--comparator",
|
|
429
|
+
type=click.Choice(["alembic", "deepdiff"], case_sensitive=False),
|
|
430
|
+
help="Comparator to use for schema comparison",
|
|
431
|
+
default="deepdiff",
|
|
432
|
+
)
|
|
433
|
+
@click.option("-E", "--error-on-change", is_flag=True, help="Exit with error code if schemas are different")
|
|
434
|
+
@click.argument("files", nargs=-1, type=click.File())
|
|
435
|
+
@click.pass_context
|
|
436
|
+
def diff(
|
|
437
|
+
ctx: click.Context,
|
|
438
|
+
engine_url: str | None,
|
|
439
|
+
comparator: str,
|
|
440
|
+
error_on_change: bool,
|
|
441
|
+
files: Iterable[IO[str]],
|
|
442
|
+
) -> None:
|
|
443
|
+
schemas = [
|
|
444
|
+
Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]}) for file in files
|
|
445
|
+
]
|
|
446
|
+
|
|
447
|
+
diff: SchemaDiff
|
|
448
|
+
if len(schemas) == 2 and engine_url is None:
|
|
449
|
+
if comparator == "alembic":
|
|
450
|
+
db_context = create_database(schemas[0])
|
|
451
|
+
assert isinstance(db_context.engine, Engine)
|
|
452
|
+
diff = DatabaseDiff(schemas[1], db_context.engine)
|
|
453
|
+
else:
|
|
454
|
+
diff = FormattedSchemaDiff(schemas[0], schemas[1])
|
|
455
|
+
elif len(schemas) == 1 and engine_url is not None:
|
|
456
|
+
engine = create_engine(engine_url)
|
|
457
|
+
diff = DatabaseDiff(schemas[0], engine)
|
|
458
|
+
else:
|
|
459
|
+
raise click.ClickException(
|
|
460
|
+
"Invalid arguments - provide two schemas or a schema and a database engine URL"
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
diff.print()
|
|
464
|
+
|
|
465
|
+
if diff.has_changes and error_on_change:
|
|
466
|
+
raise click.ClickException("Schema was changed")
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
@cli.command(
|
|
470
|
+
"dump",
|
|
471
|
+
help="""
|
|
472
|
+
Dump a schema file to YAML or JSON format
|
|
473
|
+
|
|
474
|
+
Example:
|
|
475
|
+
|
|
476
|
+
felis dump schema.yaml schema.json
|
|
477
|
+
|
|
478
|
+
felis dump schema.yaml schema_dump.yaml
|
|
479
|
+
""",
|
|
480
|
+
)
|
|
481
|
+
@click.option(
|
|
482
|
+
"--strip-ids/--no-strip-ids",
|
|
483
|
+
is_flag=True,
|
|
484
|
+
help="Strip IDs from the output schema",
|
|
485
|
+
default=False,
|
|
486
|
+
)
|
|
487
|
+
@click.argument("files", nargs=2, type=click.Path())
|
|
488
|
+
@click.pass_context
|
|
489
|
+
def dump(
|
|
490
|
+
ctx: click.Context,
|
|
491
|
+
strip_ids: bool,
|
|
492
|
+
files: list[str],
|
|
350
493
|
) -> None:
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
schema_obj = jsonld.compact(schema_obj, DEFAULT_CONTEXT, options=options)
|
|
366
|
-
json.dump(schema_obj, sys.stdout, indent=4)
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
def _dump(obj: Mapping[str, Any]) -> None:
|
|
370
|
-
class OrderedDumper(yaml.Dumper):
|
|
371
|
-
pass
|
|
372
|
-
|
|
373
|
-
def _dict_representer(dumper: yaml.Dumper, data: Any) -> Any:
|
|
374
|
-
return dumper.represent_mapping(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())
|
|
375
|
-
|
|
376
|
-
OrderedDumper.add_representer(dict, _dict_representer)
|
|
377
|
-
print(yaml.dump(obj, Dumper=OrderedDumper, default_flow_style=False))
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
def _normalize(schema_obj: Mapping[str, Any], embed: str = "@last") -> MutableMapping[str, Any]:
|
|
381
|
-
framed = jsonld.frame(schema_obj, DEFAULT_FRAME, options=dict(embed=embed))
|
|
382
|
-
compacted = jsonld.compact(framed, DEFAULT_CONTEXT, options=dict(graph=True))
|
|
383
|
-
graph = compacted["@graph"]
|
|
384
|
-
graph = [ReorderingVisitor(add_type=True).visit_schema(schema_obj) for schema_obj in graph]
|
|
385
|
-
compacted["@graph"] = graph if len(graph) > 1 else graph[0]
|
|
386
|
-
return compacted
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
class InsertDump:
|
|
390
|
-
"""An Insert Dumper for SQL statements."""
|
|
391
|
-
|
|
392
|
-
dialect: Any = None
|
|
393
|
-
|
|
394
|
-
def dump(self, sql: Any, *multiparams: Any, **params: Any) -> None:
|
|
395
|
-
compiled = sql.compile(dialect=self.dialect)
|
|
396
|
-
sql_str = str(compiled) + ";"
|
|
397
|
-
params_list = [compiled.params]
|
|
398
|
-
for params in params_list:
|
|
399
|
-
if not params:
|
|
400
|
-
print(sql_str)
|
|
401
|
-
continue
|
|
402
|
-
new_params = {}
|
|
403
|
-
for key, value in params.items():
|
|
404
|
-
if isinstance(value, str):
|
|
405
|
-
new_params[key] = f"'{value}'"
|
|
406
|
-
elif value is None:
|
|
407
|
-
new_params[key] = "null"
|
|
408
|
-
else:
|
|
409
|
-
new_params[key] = value
|
|
410
|
-
|
|
411
|
-
print(sql_str % new_params)
|
|
494
|
+
if strip_ids:
|
|
495
|
+
logger.info("Stripping IDs from the output schema")
|
|
496
|
+
if files[1].endswith(".json"):
|
|
497
|
+
format = "json"
|
|
498
|
+
elif files[1].endswith(".yaml"):
|
|
499
|
+
format = "yaml"
|
|
500
|
+
else:
|
|
501
|
+
raise click.ClickException("Output file must have a .json or .yaml extension")
|
|
502
|
+
schema = Schema.from_uri(files[0], context={"id_generation": ctx.obj["id_generation"]})
|
|
503
|
+
with open(files[1], "w") as f:
|
|
504
|
+
if format == "yaml":
|
|
505
|
+
schema.dump_yaml(f, strip_ids=strip_ids)
|
|
506
|
+
elif format == "json":
|
|
507
|
+
schema.dump_json(f, strip_ids=strip_ids)
|
|
412
508
|
|
|
413
509
|
|
|
414
510
|
if __name__ == "__main__":
|