lsst-felis 28.2024.4500__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- felis/__init__.py +9 -1
- felis/cli.py +308 -209
- felis/config/tap_schema/columns.csv +33 -0
- felis/config/tap_schema/key_columns.csv +8 -0
- felis/config/tap_schema/keys.csv +8 -0
- felis/config/tap_schema/schemas.csv +2 -0
- felis/config/tap_schema/tables.csv +6 -0
- felis/config/tap_schema/tap_schema_extensions.yaml +73 -0
- felis/datamodel.py +599 -59
- felis/db/{dialects.py → _dialects.py} +69 -4
- felis/db/{variants.py → _variants.py} +1 -1
- felis/db/database_context.py +917 -0
- felis/diff.py +234 -0
- felis/metadata.py +89 -19
- felis/tap_schema.py +271 -166
- felis/tests/postgresql.py +1 -1
- felis/tests/run_cli.py +79 -0
- felis/types.py +7 -7
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/METADATA +20 -16
- lsst_felis-30.0.0rc3.dist-info/RECORD +31 -0
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/WHEEL +1 -1
- felis/db/utils.py +0 -409
- felis/tap.py +0 -597
- felis/tests/utils.py +0 -122
- felis/version.py +0 -2
- lsst_felis-28.2024.4500.dist-info/RECORD +0 -26
- felis/{schemas → config/tap_schema}/tap_schema_std.yaml +0 -0
- felis/db/{sqltypes.py → _sqltypes.py} +7 -7
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/entry_points.txt +0 -0
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/COPYRIGHT +0 -0
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/LICENSE +0 -0
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/top_level.txt +0 -0
- {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/zip-safe +0 -0
felis/tap_schema.py
CHANGED
|
@@ -21,57 +21,56 @@
|
|
|
21
21
|
# You should have received a copy of the GNU General Public License
|
|
22
22
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
23
23
|
|
|
24
|
+
import csv
|
|
25
|
+
import io
|
|
24
26
|
import logging
|
|
25
27
|
import os
|
|
26
28
|
import re
|
|
27
|
-
from typing import Any
|
|
29
|
+
from typing import IO, Any
|
|
28
30
|
|
|
29
31
|
from lsst.resources import ResourcePath
|
|
30
|
-
from sqlalchemy import MetaData, Table, text
|
|
31
|
-
from sqlalchemy.engine import Connection, Engine
|
|
32
|
-
from sqlalchemy.engine.mock import MockConnection
|
|
32
|
+
from sqlalchemy import MetaData, Table, select, text
|
|
33
33
|
from sqlalchemy.exc import SQLAlchemyError
|
|
34
|
-
from sqlalchemy.schema import CreateSchema
|
|
35
34
|
from sqlalchemy.sql.dml import Insert
|
|
36
35
|
|
|
37
|
-
from
|
|
38
|
-
from
|
|
39
|
-
from
|
|
40
|
-
from
|
|
41
|
-
|
|
36
|
+
from . import datamodel
|
|
37
|
+
from .datamodel import Constraint, Schema
|
|
38
|
+
from .db.database_context import DatabaseContext, is_sqlite_url
|
|
39
|
+
from .metadata import MetaDataBuilder
|
|
42
40
|
from .types import FelisType
|
|
43
41
|
|
|
44
|
-
__all__ = ["
|
|
42
|
+
__all__ = ["DataLoader", "MetadataInserter", "TableManager"]
|
|
45
43
|
|
|
46
44
|
logger = logging.getLogger(__name__)
|
|
47
45
|
|
|
48
46
|
|
|
49
47
|
class TableManager:
|
|
50
|
-
"""Manage
|
|
48
|
+
"""Manage TAP_SCHEMA table definitions and access.
|
|
49
|
+
|
|
50
|
+
This class provides a streamlined interface for managing TAP_SCHEMA tables,
|
|
51
|
+
automatically handling dialect-specific requirements and providing
|
|
52
|
+
consistent access to TAP_SCHEMA tables through a dictionary-like interface.
|
|
51
53
|
|
|
52
54
|
Parameters
|
|
53
55
|
----------
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
engine_url
|
|
57
|
+
Database engine URL for automatic dialect detection and schema
|
|
58
|
+
handling.
|
|
59
|
+
db_context
|
|
60
|
+
Optional database context for reflecting existing TAP_SCHEMA tables.
|
|
61
|
+
If None, loads from internal YAML schema.
|
|
59
62
|
schema_name
|
|
60
|
-
The name of the schema to use for
|
|
61
|
-
|
|
62
|
-
apply_schema_to_metadata
|
|
63
|
-
If True, apply the schema to the metadata as well as the tables.
|
|
64
|
-
If False, these will be set to None, e.g., for sqlite.
|
|
63
|
+
The name of the schema to use for TAP_SCHEMA tables.
|
|
64
|
+
Defaults to "TAP_SCHEMA".
|
|
65
65
|
table_name_postfix
|
|
66
|
-
A string to append to
|
|
67
|
-
|
|
68
|
-
|
|
66
|
+
A string to append to standard table names for customization.
|
|
67
|
+
extensions_path
|
|
68
|
+
Path to additional TAP_SCHEMA table definitions.
|
|
69
69
|
|
|
70
70
|
Notes
|
|
71
71
|
-----
|
|
72
|
-
The
|
|
73
|
-
|
|
74
|
-
be used if ``engine`` is None or a ``MockConnection``.
|
|
72
|
+
The TableManager automatically detects SQLite vs. schema-supporting
|
|
73
|
+
databases and handles schema application appropriately.
|
|
75
74
|
"""
|
|
76
75
|
|
|
77
76
|
_TABLE_NAMES_STD = ["schemas", "tables", "columns", "keys", "key_columns"]
|
|
@@ -82,61 +81,107 @@ class TableManager:
|
|
|
82
81
|
|
|
83
82
|
def __init__(
|
|
84
83
|
self,
|
|
85
|
-
|
|
84
|
+
engine_url: str | None = None,
|
|
85
|
+
db_context: DatabaseContext | None = None,
|
|
86
86
|
schema_name: str | None = None,
|
|
87
|
-
apply_schema_to_metadata: bool = True,
|
|
88
87
|
table_name_postfix: str = "",
|
|
88
|
+
extensions_path: str | None = None,
|
|
89
89
|
):
|
|
90
90
|
"""Initialize the table manager."""
|
|
91
91
|
self.table_name_postfix = table_name_postfix
|
|
92
|
-
self.
|
|
93
|
-
self.
|
|
92
|
+
self.schema_name = schema_name or self._SCHEMA_NAME_STD
|
|
93
|
+
self.extensions_path = extensions_path
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
# Automatic dialect detection from engine URL
|
|
96
|
+
if engine_url is not None:
|
|
97
|
+
self.apply_schema_to_metadata = not is_sqlite_url(engine_url)
|
|
98
|
+
else:
|
|
99
|
+
# Default case: assume SQLite
|
|
100
|
+
engine_url = "sqlite:///:memory:"
|
|
101
|
+
self.apply_schema_to_metadata = False
|
|
102
|
+
|
|
103
|
+
if db_context is not None:
|
|
104
|
+
if table_name_postfix != "":
|
|
105
|
+
logger.warning(
|
|
106
|
+
"Table name postfix '%s' will be ignored when reflecting TAP_SCHEMA database",
|
|
107
|
+
table_name_postfix,
|
|
108
|
+
)
|
|
97
109
|
logger.debug(
|
|
98
110
|
"Reflecting TAP_SCHEMA database from existing database at %s",
|
|
99
|
-
engine.url._replace(password="***"),
|
|
111
|
+
db_context.engine.url._replace(password="***"),
|
|
100
112
|
)
|
|
101
|
-
self.
|
|
113
|
+
self._reflect_from_database(db_context)
|
|
102
114
|
else:
|
|
103
|
-
self.
|
|
115
|
+
self._load_from_yaml()
|
|
104
116
|
|
|
105
117
|
self._create_table_map()
|
|
106
118
|
self._check_tables()
|
|
107
119
|
|
|
108
|
-
def
|
|
109
|
-
"""
|
|
120
|
+
def _load_from_yaml(self) -> None:
|
|
121
|
+
"""Load TAP_SCHEMA from YAML resources and build metadata."""
|
|
122
|
+
# Load the base schema
|
|
123
|
+
self._schema = self.load_schema_resource()
|
|
124
|
+
|
|
125
|
+
# Override schema name if specified
|
|
126
|
+
if self.schema_name != self._SCHEMA_NAME_STD:
|
|
127
|
+
self._schema.name = self.schema_name
|
|
128
|
+
else:
|
|
129
|
+
self.schema_name = self._schema.name
|
|
130
|
+
|
|
131
|
+
# Apply any extensions
|
|
132
|
+
self._apply_extensions()
|
|
133
|
+
|
|
134
|
+
# Build metadata using streamlined approach
|
|
135
|
+
self._metadata = MetaDataBuilder(
|
|
136
|
+
self._schema,
|
|
137
|
+
apply_schema_to_metadata=self.apply_schema_to_metadata,
|
|
138
|
+
table_name_postfix=self.table_name_postfix,
|
|
139
|
+
).build()
|
|
140
|
+
|
|
141
|
+
logger.debug("Loaded TAP_SCHEMA '%s' from YAML resource", self.schema_name)
|
|
142
|
+
|
|
143
|
+
def _reflect_from_database(self, db_context: DatabaseContext) -> None:
|
|
144
|
+
"""Reflect TAP_SCHEMA tables from an existing database.
|
|
110
145
|
|
|
111
146
|
Parameters
|
|
112
147
|
----------
|
|
113
|
-
|
|
114
|
-
The
|
|
148
|
+
db_context
|
|
149
|
+
The database context to use for reflection.
|
|
115
150
|
"""
|
|
116
151
|
self._metadata = MetaData(schema=self.schema_name if self.apply_schema_to_metadata else None)
|
|
117
152
|
try:
|
|
118
|
-
self.
|
|
153
|
+
self._metadata.reflect(bind=db_context.engine)
|
|
119
154
|
except SQLAlchemyError as e:
|
|
120
155
|
logger.error("Error reflecting TAP_SCHEMA database: %s", e)
|
|
121
156
|
raise
|
|
122
157
|
|
|
123
|
-
def
|
|
124
|
-
"""
|
|
125
|
-
|
|
158
|
+
def _apply_extensions(self) -> None:
|
|
159
|
+
"""Apply extensions from a YAML file to the TAP_SCHEMA schema.
|
|
160
|
+
|
|
161
|
+
This method loads extension column definitions from a YAML file and
|
|
162
|
+
adds them to the appropriate TAP_SCHEMA tables.
|
|
126
163
|
"""
|
|
127
|
-
self.
|
|
128
|
-
|
|
129
|
-
self.schema.name = self.schema_name
|
|
130
|
-
else:
|
|
131
|
-
self.schema_name = self.schema.name
|
|
164
|
+
if not self.extensions_path:
|
|
165
|
+
return
|
|
132
166
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
apply_schema_to_metadata=self.apply_schema_to_metadata,
|
|
136
|
-
apply_schema_to_tables=self.apply_schema_to_metadata,
|
|
137
|
-
).build()
|
|
167
|
+
logger.info("Loading TAP_SCHEMA extensions from: %s", self.extensions_path)
|
|
168
|
+
extensions_schema = Schema.from_uri(self.extensions_path, context={"id_generation": True})
|
|
138
169
|
|
|
139
|
-
|
|
170
|
+
if not extensions_schema.tables:
|
|
171
|
+
logger.warning("Extensions schema does not contain any tables, no extensions applied")
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
extension_count = 0
|
|
175
|
+
extension_tables = {table.name: table.columns for table in extensions_schema.tables}
|
|
176
|
+
|
|
177
|
+
for table in self.schema.tables:
|
|
178
|
+
extension_columns = extension_tables.get(table.name)
|
|
179
|
+
if extension_columns:
|
|
180
|
+
table.columns = list(table.columns) + list(extension_columns)
|
|
181
|
+
extension_count += len(extension_columns)
|
|
182
|
+
logger.debug("Added %d extension columns to table '%s'", len(extension_columns), table.name)
|
|
183
|
+
|
|
184
|
+
logger.info("Applied %d extension columns to TAP_SCHEMA", extension_count)
|
|
140
185
|
|
|
141
186
|
def __getitem__(self, table_name: str) -> Table:
|
|
142
187
|
"""Get one of the TAP_SCHEMA tables by its standard TAP_SCHEMA name.
|
|
@@ -157,7 +202,7 @@ class TableManager:
|
|
|
157
202
|
tables to be accessed by their standard TAP_SCHEMA names.
|
|
158
203
|
"""
|
|
159
204
|
if table_name not in self._table_map:
|
|
160
|
-
raise KeyError(f"Table '{table_name}' not found in
|
|
205
|
+
raise KeyError(f"Table '{table_name}' not found in TAP_SCHEMA")
|
|
161
206
|
return self.metadata.tables[self._table_map[table_name]]
|
|
162
207
|
|
|
163
208
|
@property
|
|
@@ -202,7 +247,7 @@ class TableManager:
|
|
|
202
247
|
str
|
|
203
248
|
The path to the standard TAP_SCHEMA schema resource.
|
|
204
249
|
"""
|
|
205
|
-
return os.path.join(os.path.dirname(__file__), "
|
|
250
|
+
return os.path.join(os.path.dirname(__file__), "config", "tap_schema", "tap_schema_std.yaml")
|
|
206
251
|
|
|
207
252
|
@classmethod
|
|
208
253
|
def get_tap_schema_std_resource(cls) -> ResourcePath:
|
|
@@ -213,7 +258,7 @@ class TableManager:
|
|
|
213
258
|
`~lsst.resources.ResourcePath`
|
|
214
259
|
The standard TAP_SCHEMA schema resource.
|
|
215
260
|
"""
|
|
216
|
-
return ResourcePath("resource://felis/
|
|
261
|
+
return ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml")
|
|
217
262
|
|
|
218
263
|
@classmethod
|
|
219
264
|
def get_table_names_std(cls) -> list[str]:
|
|
@@ -258,19 +303,13 @@ class TableManager:
|
|
|
258
303
|
"""Create a mapping of standard table names to the table names modified
|
|
259
304
|
with a postfix, as well as the prepended schema name if it is set.
|
|
260
305
|
|
|
261
|
-
Returns
|
|
262
|
-
-------
|
|
263
|
-
dict
|
|
264
|
-
A dictionary mapping the standard table names to the modified
|
|
265
|
-
table names.
|
|
266
|
-
|
|
267
306
|
Notes
|
|
268
307
|
-----
|
|
269
308
|
This is a private method that is called during initialization, allowing
|
|
270
309
|
us to use table names like ``schemas11`` such as those used by the CADC
|
|
271
310
|
TAP library instead of the standard table names. It also maps between
|
|
272
311
|
the standard table names and those with the schema name prepended like
|
|
273
|
-
SQLAlchemy uses.
|
|
312
|
+
SQLAlchemy uses. The mapping is stored in ``self._table_map``.
|
|
274
313
|
"""
|
|
275
314
|
self._table_map = {
|
|
276
315
|
table_name: (
|
|
@@ -292,72 +331,50 @@ class TableManager:
|
|
|
292
331
|
for table_name in TableManager.get_table_names_std():
|
|
293
332
|
self[table_name]
|
|
294
333
|
|
|
295
|
-
def
|
|
296
|
-
"""
|
|
297
|
-
exist.
|
|
298
|
-
|
|
299
|
-
Parameters
|
|
300
|
-
----------
|
|
301
|
-
engine
|
|
302
|
-
The SQLAlchemy engine to use to create the schema.
|
|
303
|
-
|
|
304
|
-
Notes
|
|
305
|
-
-----
|
|
306
|
-
This method only creates the schema in the database. It does not create
|
|
307
|
-
the tables.
|
|
308
|
-
"""
|
|
309
|
-
create_schema_functions = {
|
|
310
|
-
"postgresql": self._create_schema_postgresql,
|
|
311
|
-
"mysql": self._create_schema_mysql,
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
dialect_name = engine.dialect.name
|
|
315
|
-
if dialect_name == "sqlite":
|
|
316
|
-
# SQLite doesn't have schemas.
|
|
317
|
-
return
|
|
318
|
-
|
|
319
|
-
create_function = create_schema_functions.get(dialect_name)
|
|
320
|
-
|
|
321
|
-
if create_function:
|
|
322
|
-
with engine.begin() as connection:
|
|
323
|
-
create_function(connection)
|
|
324
|
-
else:
|
|
325
|
-
# Some other database engine we don't currently know how to handle.
|
|
326
|
-
raise NotImplementedError(
|
|
327
|
-
f"Database engine '{engine.dialect.name}' is not supported for schema creation"
|
|
328
|
-
)
|
|
329
|
-
|
|
330
|
-
def _create_schema_postgresql(self, connection: Connection) -> None:
|
|
331
|
-
"""Create the schema in a PostgreSQL database.
|
|
334
|
+
def initialize_database(self, db_context: DatabaseContext) -> None:
|
|
335
|
+
"""Initialize a database with the TAP_SCHEMA tables.
|
|
332
336
|
|
|
333
337
|
Parameters
|
|
334
338
|
----------
|
|
335
|
-
|
|
336
|
-
The
|
|
339
|
+
db_context
|
|
340
|
+
The database context to use to create the tables.
|
|
337
341
|
"""
|
|
338
|
-
|
|
342
|
+
logger.info("Creating TAP_SCHEMA database '%s'", self.schema_name)
|
|
343
|
+
db_context.initialize()
|
|
344
|
+
db_context.create_all()
|
|
339
345
|
|
|
340
|
-
def
|
|
341
|
-
|
|
346
|
+
def select(
|
|
347
|
+
self,
|
|
348
|
+
db_context: DatabaseContext,
|
|
349
|
+
table_name: str,
|
|
350
|
+
filter_condition: str = "",
|
|
351
|
+
) -> list[dict[str, Any]]:
|
|
352
|
+
"""Select all rows from a TAP_SCHEMA table with an optional filter
|
|
353
|
+
condition.
|
|
342
354
|
|
|
343
355
|
Parameters
|
|
344
356
|
----------
|
|
345
|
-
|
|
346
|
-
The
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
357
|
+
db_context
|
|
358
|
+
The database context to use to connect to the database.
|
|
359
|
+
table_name
|
|
360
|
+
The name of the table to select from.
|
|
361
|
+
filter_condition
|
|
362
|
+
The filter condition as a string. If empty, no filter will be
|
|
363
|
+
applied.
|
|
352
364
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
365
|
+
Returns
|
|
366
|
+
-------
|
|
367
|
+
list
|
|
368
|
+
A list of dictionaries containing the rows from the table.
|
|
357
369
|
"""
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
370
|
+
table = self[table_name]
|
|
371
|
+
query = select(table)
|
|
372
|
+
if filter_condition:
|
|
373
|
+
query = query.where(text(filter_condition))
|
|
374
|
+
with db_context.engine.connect() as connection:
|
|
375
|
+
result = connection.execute(query)
|
|
376
|
+
rows = [dict(row._mapping) for row in result]
|
|
377
|
+
return rows
|
|
361
378
|
|
|
362
379
|
|
|
363
380
|
class DataLoader:
|
|
@@ -369,37 +386,42 @@ class DataLoader:
|
|
|
369
386
|
The Felis ``Schema`` to load into the TAP_SCHEMA tables.
|
|
370
387
|
mgr
|
|
371
388
|
The table manager that contains the TAP_SCHEMA tables.
|
|
372
|
-
|
|
373
|
-
The
|
|
389
|
+
db_context
|
|
390
|
+
The database context to use to connect to the database.
|
|
374
391
|
tap_schema_index
|
|
375
392
|
The index of the schema in the TAP_SCHEMA database.
|
|
376
|
-
|
|
377
|
-
The file to write the SQL statements to. If None,
|
|
378
|
-
suppressed.
|
|
393
|
+
output_file
|
|
394
|
+
The file object to write the SQL statements to. If None, file output
|
|
395
|
+
will be suppressed.
|
|
379
396
|
print_sql
|
|
380
397
|
If True, print the SQL statements that will be executed.
|
|
381
398
|
dry_run
|
|
382
399
|
If True, the data will not be loaded into the database.
|
|
400
|
+
unique_keys
|
|
401
|
+
If True, prepend the schema name to the key name to make it unique
|
|
402
|
+
when loading data into the keys and key_columns tables.
|
|
383
403
|
"""
|
|
384
404
|
|
|
385
405
|
def __init__(
|
|
386
406
|
self,
|
|
387
407
|
schema: Schema,
|
|
388
408
|
mgr: TableManager,
|
|
389
|
-
|
|
409
|
+
db_context: DatabaseContext,
|
|
390
410
|
tap_schema_index: int = 0,
|
|
391
|
-
|
|
411
|
+
output_file: IO[str] | None = None,
|
|
392
412
|
print_sql: bool = False,
|
|
393
413
|
dry_run: bool = False,
|
|
414
|
+
unique_keys: bool = False,
|
|
394
415
|
):
|
|
395
416
|
self.schema = schema
|
|
396
417
|
self.mgr = mgr
|
|
397
|
-
self.
|
|
418
|
+
self._db_context = db_context
|
|
398
419
|
self.tap_schema_index = tap_schema_index
|
|
399
420
|
self.inserts: list[Insert] = []
|
|
400
|
-
self.
|
|
421
|
+
self.output_file = output_file
|
|
401
422
|
self.print_sql = print_sql
|
|
402
423
|
self.dry_run = dry_run
|
|
424
|
+
self.unique_keys = unique_keys
|
|
403
425
|
|
|
404
426
|
def load(self) -> None:
|
|
405
427
|
"""Load the schema data into the TAP_SCHEMA tables.
|
|
@@ -419,17 +441,17 @@ class DataLoader:
|
|
|
419
441
|
if self.print_sql:
|
|
420
442
|
# Print to stdout.
|
|
421
443
|
self._print_sql()
|
|
422
|
-
if self.
|
|
444
|
+
if self.output_file:
|
|
423
445
|
# Print to an output file.
|
|
424
446
|
self._write_sql_to_file()
|
|
425
447
|
if not self.dry_run:
|
|
426
448
|
# Execute the inserts if not in dry run mode.
|
|
427
449
|
self._execute_inserts()
|
|
428
450
|
else:
|
|
429
|
-
logger.info("Dry run
|
|
451
|
+
logger.info("Dry run - skipped loading into database")
|
|
430
452
|
|
|
431
453
|
def _insert_schemas(self) -> None:
|
|
432
|
-
"""Insert the schema data into the schemas table."""
|
|
454
|
+
"""Insert the schema data into the ``schemas`` table."""
|
|
433
455
|
schema_record = {
|
|
434
456
|
"schema_name": self.schema.name,
|
|
435
457
|
"utype": self.schema.votable_utype,
|
|
@@ -454,7 +476,7 @@ class DataLoader:
|
|
|
454
476
|
return f"{self.schema.name}.{table.name}"
|
|
455
477
|
|
|
456
478
|
def _insert_tables(self) -> None:
|
|
457
|
-
"""Insert the table data into the tables table."""
|
|
479
|
+
"""Insert the table data into the ``tables`` table."""
|
|
458
480
|
for table in self.schema.tables:
|
|
459
481
|
table_record = {
|
|
460
482
|
"schema_name": self.schema.name,
|
|
@@ -467,7 +489,7 @@ class DataLoader:
|
|
|
467
489
|
self._insert("tables", table_record)
|
|
468
490
|
|
|
469
491
|
def _insert_columns(self) -> None:
|
|
470
|
-
"""Insert the column data into the columns table."""
|
|
492
|
+
"""Insert the column data into the ``columns`` table."""
|
|
471
493
|
for table in self.schema.tables:
|
|
472
494
|
for column in table.columns:
|
|
473
495
|
felis_type = FelisType.felis_type(column.datatype.value)
|
|
@@ -495,18 +517,49 @@ class DataLoader:
|
|
|
495
517
|
}
|
|
496
518
|
self._insert("columns", column_record)
|
|
497
519
|
|
|
520
|
+
def _get_key(self, constraint: Constraint) -> str:
|
|
521
|
+
"""Get the key name for a constraint.
|
|
522
|
+
|
|
523
|
+
Parameters
|
|
524
|
+
----------
|
|
525
|
+
constraint
|
|
526
|
+
The constraint to get the key name for.
|
|
527
|
+
|
|
528
|
+
Returns
|
|
529
|
+
-------
|
|
530
|
+
str
|
|
531
|
+
The key name for the constraint.
|
|
532
|
+
|
|
533
|
+
Notes
|
|
534
|
+
-----
|
|
535
|
+
This will prepend the name of the schema to the key name if the
|
|
536
|
+
`unique_keys` attribute is set to True. Otherwise, it will just return
|
|
537
|
+
the name of the constraint.
|
|
538
|
+
"""
|
|
539
|
+
if self.unique_keys:
|
|
540
|
+
key_id = f"{self.schema.name}_{constraint.name}"
|
|
541
|
+
logger.debug("Generated unique key_id: %s -> %s", constraint.name, key_id)
|
|
542
|
+
else:
|
|
543
|
+
key_id = constraint.name
|
|
544
|
+
return key_id
|
|
545
|
+
|
|
498
546
|
def _insert_keys(self) -> None:
|
|
499
|
-
"""Insert the foreign keys into the keys and key_columns
|
|
547
|
+
"""Insert the foreign keys into the ``keys`` and ``key_columns``
|
|
548
|
+
tables.
|
|
549
|
+
"""
|
|
500
550
|
for table in self.schema.tables:
|
|
501
551
|
for constraint in table.constraints:
|
|
502
552
|
if isinstance(constraint, datamodel.ForeignKeyConstraint):
|
|
553
|
+
###########################################################
|
|
503
554
|
# Handle keys table
|
|
555
|
+
###########################################################
|
|
504
556
|
referenced_column = self.schema.find_object_by_id(
|
|
505
557
|
constraint.referenced_columns[0], datamodel.Column
|
|
506
558
|
)
|
|
507
559
|
referenced_table = self.schema.get_table_by_column(referenced_column)
|
|
560
|
+
key_id = self._get_key(constraint)
|
|
508
561
|
key_record = {
|
|
509
|
-
"key_id":
|
|
562
|
+
"key_id": key_id,
|
|
510
563
|
"from_table": self._get_table_name(table),
|
|
511
564
|
"target_table": self._get_table_name(referenced_table),
|
|
512
565
|
"description": constraint.description,
|
|
@@ -514,17 +567,23 @@ class DataLoader:
|
|
|
514
567
|
}
|
|
515
568
|
self._insert("keys", key_record)
|
|
516
569
|
|
|
570
|
+
###########################################################
|
|
517
571
|
# Handle key_columns table
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
572
|
+
###########################################################
|
|
573
|
+
# Loop over the corresponding columns and referenced
|
|
574
|
+
# columns and insert a record for each pair. This is
|
|
575
|
+
# necessary for proper handling of composite keys.
|
|
576
|
+
for from_column_id, target_column_id in zip(
|
|
577
|
+
constraint.columns, constraint.referenced_columns
|
|
578
|
+
):
|
|
579
|
+
from_column = self.schema.find_object_by_id(from_column_id, datamodel.Column)
|
|
580
|
+
target_column = self.schema.find_object_by_id(target_column_id, datamodel.Column)
|
|
581
|
+
key_columns_record = {
|
|
582
|
+
"key_id": key_id,
|
|
583
|
+
"from_column": from_column.name,
|
|
584
|
+
"target_column": target_column.name,
|
|
585
|
+
}
|
|
586
|
+
self._insert("key_columns", key_columns_record)
|
|
528
587
|
|
|
529
588
|
def _generate_all_inserts(self) -> None:
|
|
530
589
|
"""Generate the inserts for all the data."""
|
|
@@ -539,17 +598,13 @@ class DataLoader:
|
|
|
539
598
|
"""Load the `~felis.datamodel.Schema` data into the TAP_SCHEMA
|
|
540
599
|
tables.
|
|
541
600
|
"""
|
|
542
|
-
|
|
543
|
-
with self.engine.
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
except Exception as e:
|
|
550
|
-
logger.error("Error loading data into database: %s", e)
|
|
551
|
-
transaction.rollback()
|
|
552
|
-
raise
|
|
601
|
+
try:
|
|
602
|
+
with self._db_context.engine.begin() as connection:
|
|
603
|
+
for insert in self.inserts:
|
|
604
|
+
connection.execute(insert)
|
|
605
|
+
except Exception as e:
|
|
606
|
+
logger.error("Error loading data into database: %s", e)
|
|
607
|
+
raise
|
|
553
608
|
|
|
554
609
|
def _compiled_inserts(self) -> list[str]:
|
|
555
610
|
"""Compile the inserts to SQL.
|
|
@@ -560,22 +615,26 @@ class DataLoader:
|
|
|
560
615
|
A list of the compiled insert statements.
|
|
561
616
|
"""
|
|
562
617
|
return [
|
|
563
|
-
str(
|
|
618
|
+
str(
|
|
619
|
+
insert.compile(
|
|
620
|
+
dialect=self._db_context.dialect,
|
|
621
|
+
compile_kwargs={"literal_binds": True},
|
|
622
|
+
),
|
|
623
|
+
)
|
|
564
624
|
for insert in self.inserts
|
|
565
625
|
]
|
|
566
626
|
|
|
567
627
|
def _print_sql(self) -> None:
|
|
568
628
|
"""Print the generated inserts to stdout."""
|
|
569
629
|
for insert_str in self._compiled_inserts():
|
|
570
|
-
print(insert_str)
|
|
630
|
+
print(insert_str + ";")
|
|
571
631
|
|
|
572
632
|
def _write_sql_to_file(self) -> None:
|
|
573
633
|
"""Write the generated insert statements to a file."""
|
|
574
|
-
if not self.
|
|
575
|
-
raise ValueError("No output
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
outfile.write(insert_str + "\n")
|
|
634
|
+
if not self.output_file:
|
|
635
|
+
raise ValueError("No output file specified")
|
|
636
|
+
for insert_str in self._compiled_inserts():
|
|
637
|
+
self.output_file.write(insert_str + ";" + "\n")
|
|
579
638
|
|
|
580
639
|
def _insert(self, table_name: str, record: list[Any] | dict[str, Any]) -> None:
|
|
581
640
|
"""Generate an insert statement for a record.
|
|
@@ -642,3 +701,49 @@ class DataLoader:
|
|
|
642
701
|
if index.columns and len(index.columns) == 1 and index.columns[0] == column.id:
|
|
643
702
|
return 1
|
|
644
703
|
return 0
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
class MetadataInserter:
|
|
707
|
+
"""Insert TAP_SCHEMA self-description rows into the database.
|
|
708
|
+
|
|
709
|
+
Parameters
|
|
710
|
+
----------
|
|
711
|
+
mgr
|
|
712
|
+
The table manager that contains the TAP_SCHEMA tables.
|
|
713
|
+
db_context
|
|
714
|
+
The database context for connecting to the TAP_SCHEMA database.
|
|
715
|
+
"""
|
|
716
|
+
|
|
717
|
+
def __init__(self, mgr: TableManager, db_context: DatabaseContext):
|
|
718
|
+
"""Initialize the metadata inserter.
|
|
719
|
+
|
|
720
|
+
Parameters
|
|
721
|
+
----------
|
|
722
|
+
mgr
|
|
723
|
+
The table manager representing the TAP_SCHEMA tables.
|
|
724
|
+
db_context
|
|
725
|
+
The database context for connecting to the database.
|
|
726
|
+
"""
|
|
727
|
+
self._mgr = mgr
|
|
728
|
+
self._db_context = db_context
|
|
729
|
+
|
|
730
|
+
def insert_metadata(self) -> None:
|
|
731
|
+
"""Insert the TAP_SCHEMA metadata into the database."""
|
|
732
|
+
with self._db_context.engine.begin() as conn:
|
|
733
|
+
for table_name in self._mgr.get_table_names_std():
|
|
734
|
+
table = self._mgr[table_name]
|
|
735
|
+
csv_bytes = ResourcePath(f"resource://felis/config/tap_schema/{table_name}.csv").read()
|
|
736
|
+
text_stream = io.TextIOWrapper(io.BytesIO(csv_bytes), encoding="utf-8")
|
|
737
|
+
reader = csv.reader(text_stream)
|
|
738
|
+
headers = next(reader)
|
|
739
|
+
rows = [
|
|
740
|
+
{key: None if value == "\\N" else value for key, value in zip(headers, row)}
|
|
741
|
+
for row in reader
|
|
742
|
+
]
|
|
743
|
+
logger.debug(
|
|
744
|
+
"Inserting %d rows into table '%s' with headers: %s",
|
|
745
|
+
len(rows),
|
|
746
|
+
table_name,
|
|
747
|
+
headers,
|
|
748
|
+
)
|
|
749
|
+
conn.execute(table.insert(), rows)
|
felis/tests/postgresql.py
CHANGED