lsst-felis 28.2024.4500__py3-none-any.whl → 30.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. felis/__init__.py +9 -1
  2. felis/cli.py +308 -209
  3. felis/config/tap_schema/columns.csv +33 -0
  4. felis/config/tap_schema/key_columns.csv +8 -0
  5. felis/config/tap_schema/keys.csv +8 -0
  6. felis/config/tap_schema/schemas.csv +2 -0
  7. felis/config/tap_schema/tables.csv +6 -0
  8. felis/config/tap_schema/tap_schema_extensions.yaml +73 -0
  9. felis/datamodel.py +599 -59
  10. felis/db/{dialects.py → _dialects.py} +69 -4
  11. felis/db/{variants.py → _variants.py} +1 -1
  12. felis/db/database_context.py +917 -0
  13. felis/diff.py +234 -0
  14. felis/metadata.py +89 -19
  15. felis/tap_schema.py +271 -166
  16. felis/tests/postgresql.py +1 -1
  17. felis/tests/run_cli.py +79 -0
  18. felis/types.py +7 -7
  19. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/METADATA +20 -16
  20. lsst_felis-30.0.0rc3.dist-info/RECORD +31 -0
  21. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/WHEEL +1 -1
  22. felis/db/utils.py +0 -409
  23. felis/tap.py +0 -597
  24. felis/tests/utils.py +0 -122
  25. felis/version.py +0 -2
  26. lsst_felis-28.2024.4500.dist-info/RECORD +0 -26
  27. felis/{schemas → config/tap_schema}/tap_schema_std.yaml +0 -0
  28. felis/db/{sqltypes.py → _sqltypes.py} +7 -7
  29. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/entry_points.txt +0 -0
  30. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/COPYRIGHT +0 -0
  31. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info/licenses}/LICENSE +0 -0
  32. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/top_level.txt +0 -0
  33. {lsst_felis-28.2024.4500.dist-info → lsst_felis-30.0.0rc3.dist-info}/zip-safe +0 -0
felis/tap_schema.py CHANGED
@@ -21,57 +21,56 @@
21
21
  # You should have received a copy of the GNU General Public License
22
22
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
23
23
 
24
+ import csv
25
+ import io
24
26
  import logging
25
27
  import os
26
28
  import re
27
- from typing import Any
29
+ from typing import IO, Any
28
30
 
29
31
  from lsst.resources import ResourcePath
30
- from sqlalchemy import MetaData, Table, text
31
- from sqlalchemy.engine import Connection, Engine
32
- from sqlalchemy.engine.mock import MockConnection
32
+ from sqlalchemy import MetaData, Table, select, text
33
33
  from sqlalchemy.exc import SQLAlchemyError
34
- from sqlalchemy.schema import CreateSchema
35
34
  from sqlalchemy.sql.dml import Insert
36
35
 
37
- from felis import datamodel
38
- from felis.datamodel import Schema
39
- from felis.db.utils import is_valid_engine
40
- from felis.metadata import MetaDataBuilder
41
-
36
+ from . import datamodel
37
+ from .datamodel import Constraint, Schema
38
+ from .db.database_context import DatabaseContext, is_sqlite_url
39
+ from .metadata import MetaDataBuilder
42
40
  from .types import FelisType
43
41
 
44
- __all__ = ["TableManager", "DataLoader"]
42
+ __all__ = ["DataLoader", "MetadataInserter", "TableManager"]
45
43
 
46
44
  logger = logging.getLogger(__name__)
47
45
 
48
46
 
49
47
  class TableManager:
50
- """Manage creation of TAP_SCHEMA tables.
48
+ """Manage TAP_SCHEMA table definitions and access.
49
+
50
+ This class provides a streamlined interface for managing TAP_SCHEMA tables,
51
+ automatically handling dialect-specific requirements and providing
52
+ consistent access to TAP_SCHEMA tables through a dictionary-like interface.
51
53
 
52
54
  Parameters
53
55
  ----------
54
- engine
55
- The SQLAlchemy engine for reflecting the TAP_SCHEMA tables from an
56
- existing database.
57
- This can be a mock connection or None, in which case the internal
58
- TAP_SCHEMA schema will be used by loading an internal YAML file.
56
+ engine_url
57
+ Database engine URL for automatic dialect detection and schema
58
+ handling.
59
+ db_context
60
+ Optional database context for reflecting existing TAP_SCHEMA tables.
61
+ If None, loads from internal YAML schema.
59
62
  schema_name
60
- The name of the schema to use for the TAP_SCHEMA tables.
61
- Leave as None to use the standard name of "TAP_SCHEMA".
62
- apply_schema_to_metadata
63
- If True, apply the schema to the metadata as well as the tables.
64
- If False, these will be set to None, e.g., for sqlite.
63
+ The name of the schema to use for TAP_SCHEMA tables.
64
+ Defaults to "TAP_SCHEMA".
65
65
  table_name_postfix
66
- A string to append to all the standard table names.
67
- This needs to be used in a way such that the resultant table names
68
- map to tables within the TAP_SCHEMA database.
66
+ A string to append to standard table names for customization.
67
+ extensions_path
68
+ Path to additional TAP_SCHEMA table definitions.
69
69
 
70
70
  Notes
71
71
  -----
72
- The TAP_SCHEMA schema must either have been created already, in which case
73
- the ``engine`` should be provided. Or the internal TAP_SCHEMA schema will
74
- be used if ``engine`` is None or a ``MockConnection``.
72
+ The TableManager automatically detects SQLite vs. schema-supporting
73
+ databases and handles schema application appropriately.
75
74
  """
76
75
 
77
76
  _TABLE_NAMES_STD = ["schemas", "tables", "columns", "keys", "key_columns"]
@@ -82,61 +81,107 @@ class TableManager:
82
81
 
83
82
  def __init__(
84
83
  self,
85
- engine: Engine | MockConnection | None = None,
84
+ engine_url: str | None = None,
85
+ db_context: DatabaseContext | None = None,
86
86
  schema_name: str | None = None,
87
- apply_schema_to_metadata: bool = True,
88
87
  table_name_postfix: str = "",
88
+ extensions_path: str | None = None,
89
89
  ):
90
90
  """Initialize the table manager."""
91
91
  self.table_name_postfix = table_name_postfix
92
- self.apply_schema_to_metadata = apply_schema_to_metadata
93
- self.schema_name = schema_name or TableManager._SCHEMA_NAME_STD
92
+ self.schema_name = schema_name or self._SCHEMA_NAME_STD
93
+ self.extensions_path = extensions_path
94
94
 
95
- if is_valid_engine(engine):
96
- assert isinstance(engine, Engine)
95
+ # Automatic dialect detection from engine URL
96
+ if engine_url is not None:
97
+ self.apply_schema_to_metadata = not is_sqlite_url(engine_url)
98
+ else:
99
+ # Default case: assume SQLite
100
+ engine_url = "sqlite:///:memory:"
101
+ self.apply_schema_to_metadata = False
102
+
103
+ if db_context is not None:
104
+ if table_name_postfix != "":
105
+ logger.warning(
106
+ "Table name postfix '%s' will be ignored when reflecting TAP_SCHEMA database",
107
+ table_name_postfix,
108
+ )
97
109
  logger.debug(
98
110
  "Reflecting TAP_SCHEMA database from existing database at %s",
99
- engine.url._replace(password="***"),
111
+ db_context.engine.url._replace(password="***"),
100
112
  )
101
- self._reflect(engine)
113
+ self._reflect_from_database(db_context)
102
114
  else:
103
- self._load_yaml()
115
+ self._load_from_yaml()
104
116
 
105
117
  self._create_table_map()
106
118
  self._check_tables()
107
119
 
108
- def _reflect(self, engine: Engine) -> None:
109
- """Reflect the TAP_SCHEMA database tables into the metadata.
120
+ def _load_from_yaml(self) -> None:
121
+ """Load TAP_SCHEMA from YAML resources and build metadata."""
122
+ # Load the base schema
123
+ self._schema = self.load_schema_resource()
124
+
125
+ # Override schema name if specified
126
+ if self.schema_name != self._SCHEMA_NAME_STD:
127
+ self._schema.name = self.schema_name
128
+ else:
129
+ self.schema_name = self._schema.name
130
+
131
+ # Apply any extensions
132
+ self._apply_extensions()
133
+
134
+ # Build metadata using streamlined approach
135
+ self._metadata = MetaDataBuilder(
136
+ self._schema,
137
+ apply_schema_to_metadata=self.apply_schema_to_metadata,
138
+ table_name_postfix=self.table_name_postfix,
139
+ ).build()
140
+
141
+ logger.debug("Loaded TAP_SCHEMA '%s' from YAML resource", self.schema_name)
142
+
143
+ def _reflect_from_database(self, db_context: DatabaseContext) -> None:
144
+ """Reflect TAP_SCHEMA tables from an existing database.
110
145
 
111
146
  Parameters
112
147
  ----------
113
- engine
114
- The SQLAlchemy engine to use to reflect the tables.
148
+ db_context
149
+ The database context to use for reflection.
115
150
  """
116
151
  self._metadata = MetaData(schema=self.schema_name if self.apply_schema_to_metadata else None)
117
152
  try:
118
- self.metadata.reflect(bind=engine)
153
+ self._metadata.reflect(bind=db_context.engine)
119
154
  except SQLAlchemyError as e:
120
155
  logger.error("Error reflecting TAP_SCHEMA database: %s", e)
121
156
  raise
122
157
 
123
- def _load_yaml(self) -> None:
124
- """Load the standard TAP_SCHEMA schema from a Felis package
125
- resource.
158
+ def _apply_extensions(self) -> None:
159
+ """Apply extensions from a YAML file to the TAP_SCHEMA schema.
160
+
161
+ This method loads extension column definitions from a YAML file and
162
+ adds them to the appropriate TAP_SCHEMA tables.
126
163
  """
127
- self._load_schema()
128
- if self.schema_name != TableManager._SCHEMA_NAME_STD:
129
- self.schema.name = self.schema_name
130
- else:
131
- self.schema_name = self.schema.name
164
+ if not self.extensions_path:
165
+ return
132
166
 
133
- self._metadata = MetaDataBuilder(
134
- self.schema,
135
- apply_schema_to_metadata=self.apply_schema_to_metadata,
136
- apply_schema_to_tables=self.apply_schema_to_metadata,
137
- ).build()
167
+ logger.info("Loading TAP_SCHEMA extensions from: %s", self.extensions_path)
168
+ extensions_schema = Schema.from_uri(self.extensions_path, context={"id_generation": True})
138
169
 
139
- logger.debug("Loaded TAP_SCHEMA '%s' from YAML resource", self.schema_name)
170
+ if not extensions_schema.tables:
171
+ logger.warning("Extensions schema does not contain any tables, no extensions applied")
172
+ return
173
+
174
+ extension_count = 0
175
+ extension_tables = {table.name: table.columns for table in extensions_schema.tables}
176
+
177
+ for table in self.schema.tables:
178
+ extension_columns = extension_tables.get(table.name)
179
+ if extension_columns:
180
+ table.columns = list(table.columns) + list(extension_columns)
181
+ extension_count += len(extension_columns)
182
+ logger.debug("Added %d extension columns to table '%s'", len(extension_columns), table.name)
183
+
184
+ logger.info("Applied %d extension columns to TAP_SCHEMA", extension_count)
140
185
 
141
186
  def __getitem__(self, table_name: str) -> Table:
142
187
  """Get one of the TAP_SCHEMA tables by its standard TAP_SCHEMA name.
@@ -157,7 +202,7 @@ class TableManager:
157
202
  tables to be accessed by their standard TAP_SCHEMA names.
158
203
  """
159
204
  if table_name not in self._table_map:
160
- raise KeyError(f"Table '{table_name}' not found in table map")
205
+ raise KeyError(f"Table '{table_name}' not found in TAP_SCHEMA")
161
206
  return self.metadata.tables[self._table_map[table_name]]
162
207
 
163
208
  @property
@@ -202,7 +247,7 @@ class TableManager:
202
247
  str
203
248
  The path to the standard TAP_SCHEMA schema resource.
204
249
  """
205
- return os.path.join(os.path.dirname(__file__), "schemas", "tap_schema_std.yaml")
250
+ return os.path.join(os.path.dirname(__file__), "config", "tap_schema", "tap_schema_std.yaml")
206
251
 
207
252
  @classmethod
208
253
  def get_tap_schema_std_resource(cls) -> ResourcePath:
@@ -213,7 +258,7 @@ class TableManager:
213
258
  `~lsst.resources.ResourcePath`
214
259
  The standard TAP_SCHEMA schema resource.
215
260
  """
216
- return ResourcePath("resource://felis/schemas/tap_schema_std.yaml")
261
+ return ResourcePath("resource://felis/config/tap_schema/tap_schema_std.yaml")
217
262
 
218
263
  @classmethod
219
264
  def get_table_names_std(cls) -> list[str]:
@@ -258,19 +303,13 @@ class TableManager:
258
303
  """Create a mapping of standard table names to the table names modified
259
304
  with a postfix, as well as the prepended schema name if it is set.
260
305
 
261
- Returns
262
- -------
263
- dict
264
- A dictionary mapping the standard table names to the modified
265
- table names.
266
-
267
306
  Notes
268
307
  -----
269
308
  This is a private method that is called during initialization, allowing
270
309
  us to use table names like ``schemas11`` such as those used by the CADC
271
310
  TAP library instead of the standard table names. It also maps between
272
311
  the standard table names and those with the schema name prepended like
273
- SQLAlchemy uses.
312
+ SQLAlchemy uses. The mapping is stored in ``self._table_map``.
274
313
  """
275
314
  self._table_map = {
276
315
  table_name: (
@@ -292,72 +331,50 @@ class TableManager:
292
331
  for table_name in TableManager.get_table_names_std():
293
332
  self[table_name]
294
333
 
295
- def _create_schema(self, engine: Engine) -> None:
296
- """Create the database schema for TAP_SCHEMA if it does not already
297
- exist.
298
-
299
- Parameters
300
- ----------
301
- engine
302
- The SQLAlchemy engine to use to create the schema.
303
-
304
- Notes
305
- -----
306
- This method only creates the schema in the database. It does not create
307
- the tables.
308
- """
309
- create_schema_functions = {
310
- "postgresql": self._create_schema_postgresql,
311
- "mysql": self._create_schema_mysql,
312
- }
313
-
314
- dialect_name = engine.dialect.name
315
- if dialect_name == "sqlite":
316
- # SQLite doesn't have schemas.
317
- return
318
-
319
- create_function = create_schema_functions.get(dialect_name)
320
-
321
- if create_function:
322
- with engine.begin() as connection:
323
- create_function(connection)
324
- else:
325
- # Some other database engine we don't currently know how to handle.
326
- raise NotImplementedError(
327
- f"Database engine '{engine.dialect.name}' is not supported for schema creation"
328
- )
329
-
330
- def _create_schema_postgresql(self, connection: Connection) -> None:
331
- """Create the schema in a PostgreSQL database.
334
+ def initialize_database(self, db_context: DatabaseContext) -> None:
335
+ """Initialize a database with the TAP_SCHEMA tables.
332
336
 
333
337
  Parameters
334
338
  ----------
335
- connection
336
- The SQLAlchemy connection to use to create the schema.
339
+ db_context
340
+ The database context to use to create the tables.
337
341
  """
338
- connection.execute(CreateSchema(self.schema_name, if_not_exists=True))
342
+ logger.info("Creating TAP_SCHEMA database '%s'", self.schema_name)
343
+ db_context.initialize()
344
+ db_context.create_all()
339
345
 
340
- def _create_schema_mysql(self, connection: Connection) -> None:
341
- """Create the schema in a MySQL database.
346
+ def select(
347
+ self,
348
+ db_context: DatabaseContext,
349
+ table_name: str,
350
+ filter_condition: str = "",
351
+ ) -> list[dict[str, Any]]:
352
+ """Select all rows from a TAP_SCHEMA table with an optional filter
353
+ condition.
342
354
 
343
355
  Parameters
344
356
  ----------
345
- connection
346
- The SQLAlchemy connection to use to create the schema.
347
- """
348
- connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {self.schema_name}"))
349
-
350
- def initialize_database(self, engine: Engine) -> None:
351
- """Initialize a database with the TAP_SCHEMA tables.
357
+ db_context
358
+ The database context to use to connect to the database.
359
+ table_name
360
+ The name of the table to select from.
361
+ filter_condition
362
+ The filter condition as a string. If empty, no filter will be
363
+ applied.
352
364
 
353
- Parameters
354
- ----------
355
- engine
356
- The SQLAlchemy engine to use to create the tables.
365
+ Returns
366
+ -------
367
+ list
368
+ A list of dictionaries containing the rows from the table.
357
369
  """
358
- logger.info("Creating TAP_SCHEMA database '%s'", self.metadata.schema)
359
- self._create_schema(engine)
360
- self.metadata.create_all(engine)
370
+ table = self[table_name]
371
+ query = select(table)
372
+ if filter_condition:
373
+ query = query.where(text(filter_condition))
374
+ with db_context.engine.connect() as connection:
375
+ result = connection.execute(query)
376
+ rows = [dict(row._mapping) for row in result]
377
+ return rows
361
378
 
362
379
 
363
380
  class DataLoader:
@@ -369,37 +386,42 @@ class DataLoader:
369
386
  The Felis ``Schema`` to load into the TAP_SCHEMA tables.
370
387
  mgr
371
388
  The table manager that contains the TAP_SCHEMA tables.
372
- engine
373
- The SQLAlchemy engine to use to connect to the database.
389
+ db_context
390
+ The database context to use to connect to the database.
374
391
  tap_schema_index
375
392
  The index of the schema in the TAP_SCHEMA database.
376
- output_path
377
- The file to write the SQL statements to. If None, printing will be
378
- suppressed.
393
+ output_file
394
+ The file object to write the SQL statements to. If None, file output
395
+ will be suppressed.
379
396
  print_sql
380
397
  If True, print the SQL statements that will be executed.
381
398
  dry_run
382
399
  If True, the data will not be loaded into the database.
400
+ unique_keys
401
+ If True, prepend the schema name to the key name to make it unique
402
+ when loading data into the keys and key_columns tables.
383
403
  """
384
404
 
385
405
  def __init__(
386
406
  self,
387
407
  schema: Schema,
388
408
  mgr: TableManager,
389
- engine: Engine | MockConnection,
409
+ db_context: DatabaseContext,
390
410
  tap_schema_index: int = 0,
391
- output_path: str | None = None,
411
+ output_file: IO[str] | None = None,
392
412
  print_sql: bool = False,
393
413
  dry_run: bool = False,
414
+ unique_keys: bool = False,
394
415
  ):
395
416
  self.schema = schema
396
417
  self.mgr = mgr
397
- self.engine = engine
418
+ self._db_context = db_context
398
419
  self.tap_schema_index = tap_schema_index
399
420
  self.inserts: list[Insert] = []
400
- self.output_path = output_path
421
+ self.output_file = output_file
401
422
  self.print_sql = print_sql
402
423
  self.dry_run = dry_run
424
+ self.unique_keys = unique_keys
403
425
 
404
426
  def load(self) -> None:
405
427
  """Load the schema data into the TAP_SCHEMA tables.
@@ -419,17 +441,17 @@ class DataLoader:
419
441
  if self.print_sql:
420
442
  # Print to stdout.
421
443
  self._print_sql()
422
- if self.output_path:
444
+ if self.output_file:
423
445
  # Print to an output file.
424
446
  self._write_sql_to_file()
425
447
  if not self.dry_run:
426
448
  # Execute the inserts if not in dry run mode.
427
449
  self._execute_inserts()
428
450
  else:
429
- logger.info("Dry run: not loading data into database")
451
+ logger.info("Dry run - skipped loading into database")
430
452
 
431
453
  def _insert_schemas(self) -> None:
432
- """Insert the schema data into the schemas table."""
454
+ """Insert the schema data into the ``schemas`` table."""
433
455
  schema_record = {
434
456
  "schema_name": self.schema.name,
435
457
  "utype": self.schema.votable_utype,
@@ -454,7 +476,7 @@ class DataLoader:
454
476
  return f"{self.schema.name}.{table.name}"
455
477
 
456
478
  def _insert_tables(self) -> None:
457
- """Insert the table data into the tables table."""
479
+ """Insert the table data into the ``tables`` table."""
458
480
  for table in self.schema.tables:
459
481
  table_record = {
460
482
  "schema_name": self.schema.name,
@@ -467,7 +489,7 @@ class DataLoader:
467
489
  self._insert("tables", table_record)
468
490
 
469
491
  def _insert_columns(self) -> None:
470
- """Insert the column data into the columns table."""
492
+ """Insert the column data into the ``columns`` table."""
471
493
  for table in self.schema.tables:
472
494
  for column in table.columns:
473
495
  felis_type = FelisType.felis_type(column.datatype.value)
@@ -495,18 +517,49 @@ class DataLoader:
495
517
  }
496
518
  self._insert("columns", column_record)
497
519
 
520
+ def _get_key(self, constraint: Constraint) -> str:
521
+ """Get the key name for a constraint.
522
+
523
+ Parameters
524
+ ----------
525
+ constraint
526
+ The constraint to get the key name for.
527
+
528
+ Returns
529
+ -------
530
+ str
531
+ The key name for the constraint.
532
+
533
+ Notes
534
+ -----
535
+ This will prepend the name of the schema to the key name if the
536
+ `unique_keys` attribute is set to True. Otherwise, it will just return
537
+ the name of the constraint.
538
+ """
539
+ if self.unique_keys:
540
+ key_id = f"{self.schema.name}_{constraint.name}"
541
+ logger.debug("Generated unique key_id: %s -> %s", constraint.name, key_id)
542
+ else:
543
+ key_id = constraint.name
544
+ return key_id
545
+
498
546
  def _insert_keys(self) -> None:
499
- """Insert the foreign keys into the keys and key_columns tables."""
547
+ """Insert the foreign keys into the ``keys`` and ``key_columns``
548
+ tables.
549
+ """
500
550
  for table in self.schema.tables:
501
551
  for constraint in table.constraints:
502
552
  if isinstance(constraint, datamodel.ForeignKeyConstraint):
553
+ ###########################################################
503
554
  # Handle keys table
555
+ ###########################################################
504
556
  referenced_column = self.schema.find_object_by_id(
505
557
  constraint.referenced_columns[0], datamodel.Column
506
558
  )
507
559
  referenced_table = self.schema.get_table_by_column(referenced_column)
560
+ key_id = self._get_key(constraint)
508
561
  key_record = {
509
- "key_id": constraint.name,
562
+ "key_id": key_id,
510
563
  "from_table": self._get_table_name(table),
511
564
  "target_table": self._get_table_name(referenced_table),
512
565
  "description": constraint.description,
@@ -514,17 +567,23 @@ class DataLoader:
514
567
  }
515
568
  self._insert("keys", key_record)
516
569
 
570
+ ###########################################################
517
571
  # Handle key_columns table
518
- from_column = self.schema.find_object_by_id(constraint.columns[0], datamodel.Column)
519
- target_column = self.schema.find_object_by_id(
520
- constraint.referenced_columns[0], datamodel.Column
521
- )
522
- key_columns_record = {
523
- "key_id": constraint.name,
524
- "from_column": from_column.name,
525
- "target_column": target_column.name,
526
- }
527
- self._insert("key_columns", key_columns_record)
572
+ ###########################################################
573
+ # Loop over the corresponding columns and referenced
574
+ # columns and insert a record for each pair. This is
575
+ # necessary for proper handling of composite keys.
576
+ for from_column_id, target_column_id in zip(
577
+ constraint.columns, constraint.referenced_columns
578
+ ):
579
+ from_column = self.schema.find_object_by_id(from_column_id, datamodel.Column)
580
+ target_column = self.schema.find_object_by_id(target_column_id, datamodel.Column)
581
+ key_columns_record = {
582
+ "key_id": key_id,
583
+ "from_column": from_column.name,
584
+ "target_column": target_column.name,
585
+ }
586
+ self._insert("key_columns", key_columns_record)
528
587
 
529
588
  def _generate_all_inserts(self) -> None:
530
589
  """Generate the inserts for all the data."""
@@ -539,17 +598,13 @@ class DataLoader:
539
598
  """Load the `~felis.datamodel.Schema` data into the TAP_SCHEMA
540
599
  tables.
541
600
  """
542
- if isinstance(self.engine, Engine):
543
- with self.engine.connect() as connection:
544
- transaction = connection.begin()
545
- try:
546
- for insert in self.inserts:
547
- connection.execute(insert)
548
- transaction.commit()
549
- except Exception as e:
550
- logger.error("Error loading data into database: %s", e)
551
- transaction.rollback()
552
- raise
601
+ try:
602
+ with self._db_context.engine.begin() as connection:
603
+ for insert in self.inserts:
604
+ connection.execute(insert)
605
+ except Exception as e:
606
+ logger.error("Error loading data into database: %s", e)
607
+ raise
553
608
 
554
609
  def _compiled_inserts(self) -> list[str]:
555
610
  """Compile the inserts to SQL.
@@ -560,22 +615,26 @@ class DataLoader:
560
615
  A list of the compiled insert statements.
561
616
  """
562
617
  return [
563
- str(insert.compile(self.engine, compile_kwargs={"literal_binds": True}))
618
+ str(
619
+ insert.compile(
620
+ dialect=self._db_context.dialect,
621
+ compile_kwargs={"literal_binds": True},
622
+ ),
623
+ )
564
624
  for insert in self.inserts
565
625
  ]
566
626
 
567
627
  def _print_sql(self) -> None:
568
628
  """Print the generated inserts to stdout."""
569
629
  for insert_str in self._compiled_inserts():
570
- print(insert_str)
630
+ print(insert_str + ";")
571
631
 
572
632
  def _write_sql_to_file(self) -> None:
573
633
  """Write the generated insert statements to a file."""
574
- if not self.output_path:
575
- raise ValueError("No output path specified")
576
- with open(self.output_path, "w") as outfile:
577
- for insert_str in self._compiled_inserts():
578
- outfile.write(insert_str + "\n")
634
+ if not self.output_file:
635
+ raise ValueError("No output file specified")
636
+ for insert_str in self._compiled_inserts():
637
+ self.output_file.write(insert_str + ";" + "\n")
579
638
 
580
639
  def _insert(self, table_name: str, record: list[Any] | dict[str, Any]) -> None:
581
640
  """Generate an insert statement for a record.
@@ -642,3 +701,49 @@ class DataLoader:
642
701
  if index.columns and len(index.columns) == 1 and index.columns[0] == column.id:
643
702
  return 1
644
703
  return 0
704
+
705
+
706
+ class MetadataInserter:
707
+ """Insert TAP_SCHEMA self-description rows into the database.
708
+
709
+ Parameters
710
+ ----------
711
+ mgr
712
+ The table manager that contains the TAP_SCHEMA tables.
713
+ db_context
714
+ The database context for connecting to the TAP_SCHEMA database.
715
+ """
716
+
717
+ def __init__(self, mgr: TableManager, db_context: DatabaseContext):
718
+ """Initialize the metadata inserter.
719
+
720
+ Parameters
721
+ ----------
722
+ mgr
723
+ The table manager representing the TAP_SCHEMA tables.
724
+ db_context
725
+ The database context for connecting to the database.
726
+ """
727
+ self._mgr = mgr
728
+ self._db_context = db_context
729
+
730
+ def insert_metadata(self) -> None:
731
+ """Insert the TAP_SCHEMA metadata into the database."""
732
+ with self._db_context.engine.begin() as conn:
733
+ for table_name in self._mgr.get_table_names_std():
734
+ table = self._mgr[table_name]
735
+ csv_bytes = ResourcePath(f"resource://felis/config/tap_schema/{table_name}.csv").read()
736
+ text_stream = io.TextIOWrapper(io.BytesIO(csv_bytes), encoding="utf-8")
737
+ reader = csv.reader(text_stream)
738
+ headers = next(reader)
739
+ rows = [
740
+ {key: None if value == "\\N" else value for key, value in zip(headers, row)}
741
+ for row in reader
742
+ ]
743
+ logger.debug(
744
+ "Inserting %d rows into table '%s' with headers: %s",
745
+ len(rows),
746
+ table_name,
747
+ headers,
748
+ )
749
+ conn.execute(table.insert(), rows)
felis/tests/postgresql.py CHANGED
@@ -130,5 +130,5 @@ def setup_postgres_test_db() -> Iterator[TemporaryPostgresInstance]:
130
130
 
131
131
  # Clean up any lingering SQLAlchemy engines/connections
132
132
  # so they're closed before we shut down the server.
133
- gc.collect()
134
133
  engine.dispose()
134
+ gc.collect()