lsst-felis 26.2024.900__py3-none-any.whl → 29.2025.4500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. felis/__init__.py +10 -24
  2. felis/cli.py +437 -341
  3. felis/config/tap_schema/columns.csv +33 -0
  4. felis/config/tap_schema/key_columns.csv +8 -0
  5. felis/config/tap_schema/keys.csv +8 -0
  6. felis/config/tap_schema/schemas.csv +2 -0
  7. felis/config/tap_schema/tables.csv +6 -0
  8. felis/config/tap_schema/tap_schema_std.yaml +273 -0
  9. felis/datamodel.py +1386 -193
  10. felis/db/dialects.py +116 -0
  11. felis/db/schema.py +62 -0
  12. felis/db/sqltypes.py +275 -48
  13. felis/db/utils.py +409 -0
  14. felis/db/variants.py +159 -0
  15. felis/diff.py +234 -0
  16. felis/metadata.py +385 -0
  17. felis/tap_schema.py +767 -0
  18. felis/tests/__init__.py +0 -0
  19. felis/tests/postgresql.py +134 -0
  20. felis/tests/run_cli.py +79 -0
  21. felis/types.py +57 -9
  22. lsst_felis-29.2025.4500.dist-info/METADATA +38 -0
  23. lsst_felis-29.2025.4500.dist-info/RECORD +31 -0
  24. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/WHEEL +1 -1
  25. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/COPYRIGHT +1 -1
  26. felis/check.py +0 -381
  27. felis/simple.py +0 -424
  28. felis/sql.py +0 -275
  29. felis/tap.py +0 -433
  30. felis/utils.py +0 -100
  31. felis/validation.py +0 -103
  32. felis/version.py +0 -2
  33. felis/visitor.py +0 -180
  34. lsst_felis-26.2024.900.dist-info/METADATA +0 -28
  35. lsst_felis-26.2024.900.dist-info/RECORD +0 -23
  36. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/entry_points.txt +0 -0
  37. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info/licenses}/LICENSE +0 -0
  38. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/top_level.txt +0 -0
  39. {lsst_felis-26.2024.900.dist-info → lsst_felis-29.2025.4500.dist-info}/zip-safe +0 -0
felis/tap_schema.py ADDED
@@ -0,0 +1,767 @@
1
+ """Provides utilities for creating and populating the TAP_SCHEMA database."""
2
+
3
+ # This file is part of felis.
4
+ #
5
+ # Developed for the LSST Data Management System.
6
+ # This product includes software developed by the LSST Project
7
+ # (https://www.lsst.org).
8
+ # See the COPYRIGHT file at the top-level directory of this distribution
9
+ # for details of code ownership.
10
+ #
11
+ # This program is free software: you can redistribute it and/or modify
12
+ # it under the terms of the GNU General Public License as published by
13
+ # the Free Software Foundation, either version 3 of the License, or
14
+ # (at your option) any later version.
15
+ #
16
+ # This program is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
23
+
24
+ import csv
25
+ import io
26
+ import logging
27
+ import os
28
+ import re
29
+ from typing import Any
30
+
31
+ from lsst.resources import ResourcePath
32
+ from sqlalchemy import MetaData, Table, select, text
33
+ from sqlalchemy.engine import Connection, Engine
34
+ from sqlalchemy.engine.mock import MockConnection
35
+ from sqlalchemy.exc import SQLAlchemyError
36
+ from sqlalchemy.schema import CreateSchema
37
+ from sqlalchemy.sql.dml import Insert
38
+
39
+ from felis import datamodel
40
+ from felis.datamodel import Constraint, Schema
41
+ from felis.db.utils import is_valid_engine
42
+ from felis.metadata import MetaDataBuilder
43
+
44
+ from .types import FelisType
45
+
46
+ __all__ = ["DataLoader", "TableManager"]
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ class TableManager:
52
+ """Manage creation of TAP_SCHEMA tables.
53
+
54
+ Parameters
55
+ ----------
56
+ engine
57
+ The SQLAlchemy engine for reflecting the TAP_SCHEMA tables from an
58
+ existing database.
59
+ This can be a mock connection or None, in which case the internal
60
+ TAP_SCHEMA schema will be used by loading an internal YAML file.
61
+ schema_name
62
+ The name of the schema to use for the TAP_SCHEMA tables.
63
+ Leave as None to use the standard name of "TAP_SCHEMA".
64
+ apply_schema_to_metadata
65
+ If True, apply the schema to the metadata as well as the tables.
66
+ If False, these will be set to None, e.g., for sqlite.
67
+ table_name_postfix
68
+ A string to append to all the standard table names.
69
+ This needs to be used in a way such that the resultant table names
70
+ map to tables within the TAP_SCHEMA database.
71
+
72
+ Notes
73
+ -----
74
+ The TAP_SCHEMA schema must either have been created already, in which case
75
+ the ``engine`` should be provided. Or the internal TAP_SCHEMA schema will
76
+ be used if ``engine`` is None or a ``MockConnection``.
77
+ """
78
+
79
+ _TABLE_NAMES_STD = ["schemas", "tables", "columns", "keys", "key_columns"]
80
+ """The standard table names for the TAP_SCHEMA tables."""
81
+
82
+ _SCHEMA_NAME_STD = "TAP_SCHEMA"
83
+ """The standard schema name for the TAP_SCHEMA tables."""
84
+
85
+ def __init__(
86
+ self,
87
+ engine: Engine | MockConnection | None = None,
88
+ schema_name: str | None = None,
89
+ apply_schema_to_metadata: bool = True,
90
+ table_name_postfix: str = "",
91
+ ):
92
+ """Initialize the table manager."""
93
+ self.table_name_postfix = table_name_postfix
94
+ self.apply_schema_to_metadata = apply_schema_to_metadata
95
+ self.schema_name = schema_name or TableManager._SCHEMA_NAME_STD
96
+ self.table_name_postfix = table_name_postfix
97
+
98
+ if is_valid_engine(engine):
99
+ assert isinstance(engine, Engine)
100
+ if table_name_postfix != "":
101
+ logger.warning(
102
+ "Table name postfix '%s' will be ignored when reflecting TAP_SCHEMA database",
103
+ table_name_postfix,
104
+ )
105
+ logger.debug(
106
+ "Reflecting TAP_SCHEMA database from existing database at %s",
107
+ engine.url._replace(password="***"),
108
+ )
109
+ self._reflect(engine)
110
+ else:
111
+ self._load_yaml()
112
+
113
+ self._create_table_map()
114
+ self._check_tables()
115
+
116
+ def _reflect(self, engine: Engine) -> None:
117
+ """Reflect the TAP_SCHEMA database tables into the metadata.
118
+
119
+ Parameters
120
+ ----------
121
+ engine
122
+ The SQLAlchemy engine to use to reflect the tables.
123
+ """
124
+ self._metadata = MetaData(schema=self.schema_name if self.apply_schema_to_metadata else None)
125
+ try:
126
+ self.metadata.reflect(bind=engine)
127
+ except SQLAlchemyError as e:
128
+ logger.error("Error reflecting TAP_SCHEMA database: %s", e)
129
+ raise
130
+
131
+ def _load_yaml(self) -> None:
132
+ """Load the standard TAP_SCHEMA schema from a Felis package
133
+ resource.
134
+ """
135
+ self._load_schema()
136
+ if self.schema_name != TableManager._SCHEMA_NAME_STD:
137
+ self.schema.name = self.schema_name
138
+ else:
139
+ self.schema_name = self.schema.name
140
+
141
+ self._metadata = MetaDataBuilder(
142
+ self.schema,
143
+ apply_schema_to_metadata=self.apply_schema_to_metadata,
144
+ table_name_postfix=self.table_name_postfix,
145
+ ).build()
146
+
147
+ logger.debug("Loaded TAP_SCHEMA '%s' from YAML resource", self.schema_name)
148
+
149
+ def __getitem__(self, table_name: str) -> Table:
150
+ """Get one of the TAP_SCHEMA tables by its standard TAP_SCHEMA name.
151
+
152
+ Parameters
153
+ ----------
154
+ table_name
155
+ The name of the table to get.
156
+
157
+ Returns
158
+ -------
159
+ Table
160
+ The table with the given name.
161
+
162
+ Notes
163
+ -----
164
+ This implements array semantics for the table manager, allowing
165
+ tables to be accessed by their standard TAP_SCHEMA names.
166
+ """
167
+ if table_name not in self._table_map:
168
+ raise KeyError(f"Table '{table_name}' not found in TAP_SCHEMA")
169
+ return self.metadata.tables[self._table_map[table_name]]
170
+
171
+ @property
172
+ def schema(self) -> Schema:
173
+ """Get the TAP_SCHEMA schema.
174
+
175
+ Returns
176
+ -------
177
+ Schema
178
+ The TAP_SCHEMA schema.
179
+
180
+ Notes
181
+ -----
182
+ This will only be set if the TAP_SCHEMA schema was loaded from a
183
+ Felis package resource. In the case where the TAP_SCHEMA schema was
184
+ reflected from an existing database, this will be None.
185
+ """
186
+ return self._schema
187
+
188
+ @property
189
+ def metadata(self) -> MetaData:
190
+ """Get the metadata for the TAP_SCHEMA tables.
191
+
192
+ Returns
193
+ -------
194
+ `~sqlalchemy.sql.schema.MetaData`
195
+ The metadata for the TAP_SCHEMA tables.
196
+
197
+ Notes
198
+ -----
199
+ This will either be the metadata that was reflected from an existing
200
+ database or the metadata that was loaded from a Felis package resource.
201
+ """
202
+ return self._metadata
203
+
204
+ @classmethod
205
+ def get_tap_schema_std_path(cls) -> str:
206
+ """Get the path to the standard TAP_SCHEMA schema resource.
207
+
208
+ Returns
209
+ -------
210
+ str
211
+ The path to the standard TAP_SCHEMA schema resource.
212
+ """
213
+ return os.path.join(os.path.dirname(__file__), "config", "tap_schema", "tap_schema_std.yaml")
214
+
215
+ @classmethod
216
+ def get_tap_schema_std_resource(cls) -> ResourcePath:
217
+ """Get the standard TAP_SCHEMA schema resource.
218
+
219
+ Returns
220
+ -------
221
+ `~lsst.resources.ResourcePath`
222
+ The standard TAP_SCHEMA schema resource.
223
+ """
224
+ return ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml")
225
+
226
+ @classmethod
227
+ def get_table_names_std(cls) -> list[str]:
228
+ """Get the standard column names for the TAP_SCHEMA tables.
229
+
230
+ Returns
231
+ -------
232
+ list
233
+ The standard table names for the TAP_SCHEMA tables.
234
+ """
235
+ return cls._TABLE_NAMES_STD
236
+
237
+ @classmethod
238
+ def get_schema_name_std(cls) -> str:
239
+ """Get the standard schema name for the TAP_SCHEMA tables.
240
+
241
+ Returns
242
+ -------
243
+ str
244
+ The standard schema name for the TAP_SCHEMA tables.
245
+ """
246
+ return cls._SCHEMA_NAME_STD
247
+
248
+ @classmethod
249
+ def load_schema_resource(cls) -> Schema:
250
+ """Load the standard TAP_SCHEMA schema from a Felis package
251
+ resource into a Felis `~felis.datamodel.Schema`.
252
+
253
+ Returns
254
+ -------
255
+ Schema
256
+ The TAP_SCHEMA schema.
257
+ """
258
+ rp = cls.get_tap_schema_std_resource()
259
+ return Schema.from_uri(rp, context={"id_generation": True})
260
+
261
+ def _load_schema(self) -> None:
262
+ """Load the TAP_SCHEMA schema from a Felis package resource."""
263
+ self._schema = self.load_schema_resource()
264
+
265
+ def _create_table_map(self) -> None:
266
+ """Create a mapping of standard table names to the table names modified
267
+ with a postfix, as well as the prepended schema name if it is set.
268
+
269
+ Returns
270
+ -------
271
+ dict
272
+ A dictionary mapping the standard table names to the modified
273
+ table names.
274
+
275
+ Notes
276
+ -----
277
+ This is a private method that is called during initialization, allowing
278
+ us to use table names like ``schemas11`` such as those used by the CADC
279
+ TAP library instead of the standard table names. It also maps between
280
+ the standard table names and those with the schema name prepended like
281
+ SQLAlchemy uses.
282
+ """
283
+ self._table_map = {
284
+ table_name: (
285
+ f"{self.schema_name + '.' if self.apply_schema_to_metadata else ''}"
286
+ f"{table_name}{self.table_name_postfix}"
287
+ )
288
+ for table_name in TableManager.get_table_names_std()
289
+ }
290
+ logger.debug(f"Created TAP_SCHEMA table map: {self._table_map}")
291
+
292
+ def _check_tables(self) -> None:
293
+ """Check that there is a valid mapping to each standard table.
294
+
295
+ Raises
296
+ ------
297
+ KeyError
298
+ If a table is missing from the table map.
299
+ """
300
+ for table_name in TableManager.get_table_names_std():
301
+ self[table_name]
302
+
303
+ def _create_schema(self, engine: Engine) -> None:
304
+ """Create the database schema for TAP_SCHEMA if it does not already
305
+ exist.
306
+
307
+ Parameters
308
+ ----------
309
+ engine
310
+ The SQLAlchemy engine to use to create the schema.
311
+
312
+ Notes
313
+ -----
314
+ This method only creates the schema in the database. It does not create
315
+ the tables.
316
+ """
317
+ create_schema_functions = {
318
+ "postgresql": self._create_schema_postgresql,
319
+ "mysql": self._create_schema_mysql,
320
+ }
321
+
322
+ dialect_name = engine.dialect.name
323
+ if dialect_name == "sqlite":
324
+ # SQLite doesn't have schemas.
325
+ return
326
+
327
+ create_function = create_schema_functions.get(dialect_name)
328
+
329
+ if create_function:
330
+ with engine.begin() as connection:
331
+ create_function(connection)
332
+ else:
333
+ # Some other database engine we don't currently know how to handle.
334
+ raise NotImplementedError(
335
+ f"Database engine '{engine.dialect.name}' is not supported for schema creation"
336
+ )
337
+
338
+ def _create_schema_postgresql(self, connection: Connection) -> None:
339
+ """Create the schema in a PostgreSQL database.
340
+
341
+ Parameters
342
+ ----------
343
+ connection
344
+ The SQLAlchemy connection to use to create the schema.
345
+ """
346
+ connection.execute(CreateSchema(self.schema_name, if_not_exists=True))
347
+
348
+ def _create_schema_mysql(self, connection: Connection) -> None:
349
+ """Create the schema in a MySQL database.
350
+
351
+ Parameters
352
+ ----------
353
+ connection
354
+ The SQLAlchemy connection to use to create the schema.
355
+ """
356
+ connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {self.schema_name}"))
357
+
358
+ def initialize_database(self, engine: Engine) -> None:
359
+ """Initialize a database with the TAP_SCHEMA tables.
360
+
361
+ Parameters
362
+ ----------
363
+ engine
364
+ The SQLAlchemy engine to use to create the tables.
365
+ """
366
+ logger.info("Creating TAP_SCHEMA database '%s'", self.schema_name)
367
+ self._create_schema(engine)
368
+ self.metadata.create_all(engine)
369
+
370
+ def select(self, engine: Engine, table_name: str, filter_condition: str = "") -> list[dict[str, Any]]:
371
+ """Select all rows from a TAP_SCHEMA table with an optional filter
372
+ condition.
373
+
374
+ Parameters
375
+ ----------
376
+ engine
377
+ The SQLAlchemy engine to use to connect to the database.
378
+ table_name
379
+ The name of the table to select from.
380
+ filter_condition
381
+ The filter condition as a string. If empty, no filter will be
382
+ applied.
383
+
384
+ Returns
385
+ -------
386
+ list
387
+ A list of dictionaries containing the rows from the table.
388
+ """
389
+ table = self[table_name]
390
+ query = select(table)
391
+ if filter_condition:
392
+ query = query.where(text(filter_condition))
393
+ with engine.connect() as connection:
394
+ result = connection.execute(query)
395
+ rows = [dict(row._mapping) for row in result]
396
+ return rows
397
+
398
+
399
+ class DataLoader:
400
+ """Load data into the TAP_SCHEMA tables.
401
+
402
+ Parameters
403
+ ----------
404
+ schema
405
+ The Felis ``Schema`` to load into the TAP_SCHEMA tables.
406
+ mgr
407
+ The table manager that contains the TAP_SCHEMA tables.
408
+ engine
409
+ The SQLAlchemy engine to use to connect to the database.
410
+ tap_schema_index
411
+ The index of the schema in the TAP_SCHEMA database.
412
+ output_path
413
+ The file to write the SQL statements to. If None, printing will be
414
+ suppressed.
415
+ print_sql
416
+ If True, print the SQL statements that will be executed.
417
+ dry_run
418
+ If True, the data will not be loaded into the database.
419
+ unique_keys
420
+ If True, prepend the schema name to the key name to make it unique
421
+ when loading data into the keys and key_columns tables.
422
+ """
423
+
424
+ def __init__(
425
+ self,
426
+ schema: Schema,
427
+ mgr: TableManager,
428
+ engine: Engine | MockConnection,
429
+ tap_schema_index: int = 0,
430
+ output_path: str | None = None,
431
+ print_sql: bool = False,
432
+ dry_run: bool = False,
433
+ unique_keys: bool = False,
434
+ ):
435
+ self.schema = schema
436
+ self.mgr = mgr
437
+ self.engine = engine
438
+ self.tap_schema_index = tap_schema_index
439
+ self.inserts: list[Insert] = []
440
+ self.output_path = output_path
441
+ self.print_sql = print_sql
442
+ self.dry_run = dry_run
443
+ self.unique_keys = unique_keys
444
+
445
+ def load(self) -> None:
446
+ """Load the schema data into the TAP_SCHEMA tables.
447
+
448
+ Notes
449
+ -----
450
+ This will generate inserts for the data, print the SQL statements if
451
+ requested, save the SQL statements to a file if requested, and load the
452
+ data into the database if not in dry run mode. These are done as
453
+ sequential operations rather than for each insert. The logic is that
454
+ the user may still want the complete SQL output to be printed or saved
455
+ to a file even if loading into the database causes errors. If there are
456
+ errors when inserting into the database, the SQLAlchemy error message
457
+ should indicate which SQL statement caused the error.
458
+ """
459
+ self._generate_all_inserts()
460
+ if self.print_sql:
461
+ # Print to stdout.
462
+ self._print_sql()
463
+ if self.output_path:
464
+ # Print to an output file.
465
+ self._write_sql_to_file()
466
+ if not self.dry_run:
467
+ # Execute the inserts if not in dry run mode.
468
+ self._execute_inserts()
469
+ else:
470
+ logger.info("Dry run - not loading data into database")
471
+
472
+ def _insert_schemas(self) -> None:
473
+ """Insert the schema data into the ``schemas`` table."""
474
+ schema_record = {
475
+ "schema_name": self.schema.name,
476
+ "utype": self.schema.votable_utype,
477
+ "description": self.schema.description,
478
+ "schema_index": self.tap_schema_index,
479
+ }
480
+ self._insert("schemas", schema_record)
481
+
482
+ def _get_table_name(self, table: datamodel.Table) -> str:
483
+ """Get the name of the table with the schema name prepended.
484
+
485
+ Parameters
486
+ ----------
487
+ table
488
+ The table to get the name for.
489
+
490
+ Returns
491
+ -------
492
+ str
493
+ The name of the table with the schema name prepended.
494
+ """
495
+ return f"{self.schema.name}.{table.name}"
496
+
497
+ def _insert_tables(self) -> None:
498
+ """Insert the table data into the ``tables`` table."""
499
+ for table in self.schema.tables:
500
+ table_record = {
501
+ "schema_name": self.schema.name,
502
+ "table_name": self._get_table_name(table),
503
+ "table_type": "table",
504
+ "utype": table.votable_utype,
505
+ "description": table.description,
506
+ "table_index": 0 if table.tap_table_index is None else table.tap_table_index,
507
+ }
508
+ self._insert("tables", table_record)
509
+
510
+ def _insert_columns(self) -> None:
511
+ """Insert the column data into the ``columns`` table."""
512
+ for table in self.schema.tables:
513
+ for column in table.columns:
514
+ felis_type = FelisType.felis_type(column.datatype.value)
515
+ arraysize = str(column.votable_arraysize) if column.votable_arraysize else None
516
+ size = DataLoader._get_size(column)
517
+ indexed = DataLoader._is_indexed(column, table)
518
+ tap_column_index = column.tap_column_index
519
+ unit = column.ivoa_unit or column.fits_tunit
520
+
521
+ column_record = {
522
+ "table_name": self._get_table_name(table),
523
+ "column_name": column.name,
524
+ "datatype": felis_type.votable_name,
525
+ "arraysize": arraysize,
526
+ "size": size,
527
+ "xtype": column.votable_xtype,
528
+ "description": column.description,
529
+ "utype": column.votable_utype,
530
+ "unit": unit,
531
+ "ucd": column.ivoa_ucd,
532
+ "indexed": indexed,
533
+ "principal": column.tap_principal,
534
+ "std": column.tap_std,
535
+ "column_index": tap_column_index,
536
+ }
537
+ self._insert("columns", column_record)
538
+
539
+ def _get_key(self, constraint: Constraint) -> str:
540
+ """Get the key name for a constraint.
541
+
542
+ Parameters
543
+ ----------
544
+ constraint
545
+ The constraint to get the key name for.
546
+
547
+ Returns
548
+ -------
549
+ str
550
+ The key name for the constraint.
551
+
552
+ Notes
553
+ -----
554
+ This will prepend the name of the schema to the key name if the
555
+ `unique_keys` attribute is set to True. Otherwise, it will just return
556
+ the name of the constraint.
557
+ """
558
+ if self.unique_keys:
559
+ key_id = f"{self.schema.name}_{constraint.name}"
560
+ logger.debug("Generated unique key_id: %s -> %s", constraint.name, key_id)
561
+ else:
562
+ key_id = constraint.name
563
+ return key_id
564
+
565
+ def _insert_keys(self) -> None:
566
+ """Insert the foreign keys into the ``keys`` and ``key_columns``
567
+ tables.
568
+ """
569
+ for table in self.schema.tables:
570
+ for constraint in table.constraints:
571
+ if isinstance(constraint, datamodel.ForeignKeyConstraint):
572
+ ###########################################################
573
+ # Handle keys table
574
+ ###########################################################
575
+ referenced_column = self.schema.find_object_by_id(
576
+ constraint.referenced_columns[0], datamodel.Column
577
+ )
578
+ referenced_table = self.schema.get_table_by_column(referenced_column)
579
+ key_id = self._get_key(constraint)
580
+ key_record = {
581
+ "key_id": key_id,
582
+ "from_table": self._get_table_name(table),
583
+ "target_table": self._get_table_name(referenced_table),
584
+ "description": constraint.description,
585
+ "utype": constraint.votable_utype,
586
+ }
587
+ self._insert("keys", key_record)
588
+
589
+ ###########################################################
590
+ # Handle key_columns table
591
+ ###########################################################
592
+ # Loop over the corresponding columns and referenced
593
+ # columns and insert a record for each pair. This is
594
+ # necessary for proper handling of composite keys.
595
+ for from_column_id, target_column_id in zip(
596
+ constraint.columns, constraint.referenced_columns
597
+ ):
598
+ from_column = self.schema.find_object_by_id(from_column_id, datamodel.Column)
599
+ target_column = self.schema.find_object_by_id(target_column_id, datamodel.Column)
600
+ key_columns_record = {
601
+ "key_id": key_id,
602
+ "from_column": from_column.name,
603
+ "target_column": target_column.name,
604
+ }
605
+ self._insert("key_columns", key_columns_record)
606
+
607
+ def _generate_all_inserts(self) -> None:
608
+ """Generate the inserts for all the data."""
609
+ self.inserts.clear()
610
+ self._insert_schemas()
611
+ self._insert_tables()
612
+ self._insert_columns()
613
+ self._insert_keys()
614
+ logger.debug("Generated %d insert statements", len(self.inserts))
615
+
616
+ def _execute_inserts(self) -> None:
617
+ """Load the `~felis.datamodel.Schema` data into the TAP_SCHEMA
618
+ tables.
619
+ """
620
+ if isinstance(self.engine, Engine):
621
+ with self.engine.connect() as connection:
622
+ transaction = connection.begin()
623
+ try:
624
+ for insert in self.inserts:
625
+ connection.execute(insert)
626
+ transaction.commit()
627
+ except Exception as e:
628
+ logger.error("Error loading data into database: %s", e)
629
+ transaction.rollback()
630
+ raise
631
+
632
+ def _compiled_inserts(self) -> list[str]:
633
+ """Compile the inserts to SQL.
634
+
635
+ Returns
636
+ -------
637
+ list
638
+ A list of the compiled insert statements.
639
+ """
640
+ return [
641
+ str(insert.compile(self.engine, compile_kwargs={"literal_binds": True}))
642
+ for insert in self.inserts
643
+ ]
644
+
645
+ def _print_sql(self) -> None:
646
+ """Print the generated inserts to stdout."""
647
+ for insert_str in self._compiled_inserts():
648
+ print(insert_str + ";")
649
+
650
+ def _write_sql_to_file(self) -> None:
651
+ """Write the generated insert statements to a file."""
652
+ if not self.output_path:
653
+ raise ValueError("No output path specified")
654
+ with open(self.output_path, "w") as outfile:
655
+ for insert_str in self._compiled_inserts():
656
+ outfile.write(insert_str + ";" + "\n")
657
+
658
+ def _insert(self, table_name: str, record: list[Any] | dict[str, Any]) -> None:
659
+ """Generate an insert statement for a record.
660
+
661
+ Parameters
662
+ ----------
663
+ table_name
664
+ The name of the table to insert the record into.
665
+ record
666
+ The record to insert into the table.
667
+ """
668
+ table = self.mgr[table_name]
669
+ insert_statement = table.insert().values(record)
670
+ self.inserts.append(insert_statement)
671
+
672
+ @staticmethod
673
+ def _get_size(column: datamodel.Column) -> int | None:
674
+ """Get the size of the column.
675
+
676
+ Parameters
677
+ ----------
678
+ column
679
+ The column to get the size for.
680
+
681
+ Returns
682
+ -------
683
+ int or None
684
+ The size of the column or None if not applicable.
685
+ """
686
+ arraysize = column.votable_arraysize
687
+
688
+ if not arraysize:
689
+ return None
690
+
691
+ arraysize_str = str(arraysize)
692
+ if arraysize_str.isdigit():
693
+ return int(arraysize_str)
694
+
695
+ match = re.match(r"^([0-9]+)\*$", arraysize_str)
696
+ if match and match.group(1) is not None:
697
+ return int(match.group(1))
698
+
699
+ return None
700
+
701
+ @staticmethod
702
+ def _is_indexed(column: datamodel.Column, table: datamodel.Table) -> int:
703
+ """Check if the column is indexed in the table.
704
+
705
+ Parameters
706
+ ----------
707
+ column
708
+ The column to check.
709
+ table
710
+ The table to check.
711
+
712
+ Returns
713
+ -------
714
+ int
715
+ 1 if the column is indexed, 0 otherwise.
716
+ """
717
+ if isinstance(table.primary_key, str) and table.primary_key == column.id:
718
+ return 1
719
+ for index in table.indexes:
720
+ if index.columns and len(index.columns) == 1 and index.columns[0] == column.id:
721
+ return 1
722
+ return 0
723
+
724
+
725
+ class MetadataInserter:
726
+ """Insert TAP_SCHEMA self-description rows into the database.
727
+
728
+ Parameters
729
+ ----------
730
+ mgr
731
+ The table manager that contains the TAP_SCHEMA tables.
732
+ engine
733
+ The engine for connecting to the TAP_SCHEMA database.
734
+ """
735
+
736
+ def __init__(self, mgr: TableManager, engine: Engine):
737
+ """Initialize the metadata inserter.
738
+
739
+ Parameters
740
+ ----------
741
+ mgr
742
+ The table manager representing the TAP_SCHEMA tables.
743
+ engine
744
+ The SQLAlchemy engine for connecting to the database.
745
+ """
746
+ self._mgr = mgr
747
+ self._engine = engine
748
+
749
+ def insert_metadata(self) -> None:
750
+ """Insert the TAP_SCHEMA metadata into the database."""
751
+ for table_name in self._mgr.get_table_names_std():
752
+ table = self._mgr[table_name]
753
+ csv_bytes = ResourcePath(f"resource://felis/config/tap_schema/{table_name}.csv").read()
754
+ text_stream = io.TextIOWrapper(io.BytesIO(csv_bytes), encoding="utf-8")
755
+ reader = csv.reader(text_stream)
756
+ headers = next(reader)
757
+ rows = [
758
+ {key: None if value == "\\N" else value for key, value in zip(headers, row)} for row in reader
759
+ ]
760
+ logger.debug(
761
+ "Inserting %d rows into table '%s' with headers: %s",
762
+ len(rows),
763
+ table_name,
764
+ headers,
765
+ )
766
+ with self._engine.begin() as conn:
767
+ conn.execute(table.insert(), rows)