PyPI - lsst-felis - Versions diffs - 27.2024.4100__py3-none-any.whl → 27.2024.4300__py3-none-any.whl - Mend

lsst-felis 27.2024.4100py3-none-any.whl → 27.2024.4300py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lsst-felis might be problematic. Click here for more details.

Files changed (17) hide show

felis/cli.py +83 -14
felis/datamodel.py +175 -3
felis/db/utils.py +92 -12
felis/schemas/tap_schema_std.yaml +273 -0
felis/tap.py +1 -5
felis/tap_schema.py +644 -0
felis/tests/utils.py +122 -0
felis/version.py +1 -1
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/METADATA +2 -1
lsst_felis-27.2024.4300.dist-info/RECORD +26 -0
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/WHEEL +1 -1
lsst_felis-27.2024.4100.dist-info/RECORD +0 -23
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/COPYRIGHT +0 -0
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/LICENSE +0 -0
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/entry_points.txt +0 -0
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/top_level.txt +0 -0
{lsst_felis-27.2024.4100.dist-info → lsst_felis-27.2024.4300.dist-info}/zip-safe +0 -0

felis/cli.py CHANGED Viewed

@@ -23,22 +23,21 @@
 from __future__ import annotations
-import io
 import logging
 from collections.abc import Iterable
 from typing import IO
 import click
-import yaml
 from pydantic import ValidationError
 from sqlalchemy.engine import Engine, create_engine, make_url
-from sqlalchemy.engine.mock import MockConnection
+from sqlalchemy.engine.mock import MockConnection, create_mock_engine
 from . import __version__
 from .datamodel import Schema
-from .db.utils import DatabaseContext
+from .db.utils import DatabaseContext, is_mock_url
 from .metadata import MetaDataBuilder
 from .tap import Tap11Base, TapLoadingVisitor, init_tables
+from .tap_schema import DataLoader, TableManager
 __all__ = ["cli"]
@@ -107,7 +106,7 @@ def create(
     dry_run: bool,
     output_file: IO[str] | None,
     ignore_constraints: bool,
-    file: IO,
+    file: IO[str],
 ) -> None:
     """Create database objects from the Felis file.
@@ -133,8 +132,7 @@ def create(
         Felis file to read.
     """
     try:
-        yaml_data = yaml.safe_load(file)
-        schema = Schema.model_validate(yaml_data, context={"id_generation": ctx.obj["id_generation"]})
+        schema = Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]})
         url = make_url(engine_url)
         if schema_name:
             logger.info(f"Overriding schema name with: {schema_name}")
@@ -261,7 +259,7 @@ def load_tap(
     tap_keys_table: str,
     tap_key_columns_table: str,
     tap_schema_index: int,
-    file: io.TextIOBase,
+    file: IO[str],
 ) -> None:
     """Load TAP metadata from a Felis file.
@@ -304,8 +302,7 @@ def load_tap(
     The data will be loaded into the TAP_SCHEMA from the engine URL. The
     tables must have already been initialized or an error will occur.
     """
-    yaml_data = yaml.load(file, Loader=yaml.SafeLoader)
-    schema = Schema.model_validate(yaml_data)
+    schema = Schema.from_stream(file)
     tap_tables = init_tables(
         tap_schema_name,
@@ -345,6 +342,79 @@ def load_tap(
         tap_visitor.visit_schema(schema)
+@cli.command("load-tap-schema", help="Load metadata from a Felis file into a TAP_SCHEMA database")
+@click.option("--engine-url", envvar="FELIS_ENGINE_URL", help="SQLAlchemy Engine URL")
+@click.option("--tap-schema-name", help="Name of the TAP_SCHEMA schema in the database")
+@click.option(
+    "--tap-tables-postfix", help="Postfix which is applied to standard TAP_SCHEMA table names", default=""
+)
+@click.option("--tap-schema-index", type=int, help="TAP_SCHEMA index of the schema in this environment")
+@click.option("--dry-run", is_flag=True, help="Execute dry run only. Does not insert any data.")
+@click.option("--echo", is_flag=True, help="Print out the generated insert statements to stdout")
+@click.option("--output-file", type=click.Path(), help="Write SQL commands to a file")
+@click.argument("file", type=click.File())
+@click.pass_context
+def load_tap_schema(
+    ctx: click.Context,
+    engine_url: str,
+    tap_schema_name: str,
+    tap_tables_postfix: str,
+    tap_schema_index: int,
+    dry_run: bool,
+    echo: bool,
+    output_file: str | None,
+    file: IO[str],
+) -> None:
+    """Load TAP metadata from a Felis file.
+    Parameters
+    ----------
+    engine_url
+        SQLAlchemy Engine URL.
+    tap_tables_postfix
+        Postfix which is applied to standard TAP_SCHEMA table names.
+    tap_schema_index
+        TAP_SCHEMA index of the schema in this environment.
+    dry_run
+        Execute dry run only. Does not insert any data.
+    echo
+        Print out the generated insert statements to stdout.
+    output_file
+        Output file for writing generated SQL.
+    file
+        Felis file to read.
+    Notes
+    -----
+    The TAP_SCHEMA database must already exist or the command will fail. This
+    command will not initialize the TAP_SCHEMA tables.
+    """
+    url = make_url(engine_url)
+    engine: Engine | MockConnection
+    if dry_run or is_mock_url(url):
+        engine = create_mock_engine(url, executor=None)
+    else:
+        engine = create_engine(engine_url)
+    mgr = TableManager(
+        engine=engine,
+        apply_schema_to_metadata=False if engine.dialect.name == "sqlite" else True,
+        schema_name=tap_schema_name,
+        table_name_postfix=tap_tables_postfix,
+    )
+    schema = Schema.from_stream(file, context={"id_generation": ctx.obj["id_generation"]})
+    DataLoader(
+        schema,
+        mgr,
+        engine,
+        tap_schema_index=tap_schema_index,
+        dry_run=dry_run,
+        print_sql=echo,
+        output_path=output_file,
+    ).load()
 @cli.command("validate", help="Validate one or more Felis YAML files")
 @click.option(
     "--check-description", is_flag=True, help="Check that all objects have a description", default=False
@@ -372,7 +442,7 @@ def validate(
     check_redundant_datatypes: bool,
     check_tap_table_indexes: bool,
     check_tap_principal: bool,
-    files: Iterable[io.TextIOBase],
+    files: Iterable[IO[str]],
 ) -> None:
     """Validate one or more felis YAML files.
@@ -406,9 +476,8 @@ def validate(
         file_name = getattr(file, "name", None)
         logger.info(f"Validating {file_name}")
         try:
-            data = yaml.load(file, Loader=yaml.SafeLoader)
-            Schema.model_validate(
-                data,
+            Schema.from_stream(
+                file,
                 context={
                     "check_description": check_description,
                     "check_redundant_datatypes": check_redundant_datatypes,

felis/datamodel.py CHANGED Viewed

@@ -26,10 +26,12 @@ from __future__ import annotations
 import logging
 from collections.abc import Sequence
 from enum import StrEnum, auto
-from typing import Annotated, Any, Literal, TypeAlias, Union
+from typing import IO, Annotated, Any, Generic, Literal, TypeAlias, TypeVar, Union
+import yaml
 from astropy import units as units  # type: ignore
 from astropy.io.votable import ucd  # type: ignore
+from lsst.resources import ResourcePath, ResourcePathExpression
 from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator, model_validator
 from .db.dialects import get_supported_dialects
@@ -253,7 +255,7 @@ class Column(BaseObject):
         Raises
         ------
         ValueError
-            Raised If both FITS and IVOA units are provided, or if the unit is
+            Raised if both FITS and IVOA units are provided, or if the unit is
             invalid.
         """
         fits_unit = self.fits_tunit
@@ -383,6 +385,58 @@ class Column(BaseObject):
             raise ValueError("Precision is only valid for timestamp columns")
         return self
+    @model_validator(mode="before")
+    @classmethod
+    def check_votable_arraysize(cls, values: dict[str, Any]) -> dict[str, Any]:
+        """Set the default value for the ``votable_arraysize`` field, which
+        corresponds to ``arraysize`` in the IVOA VOTable standard.
+        Parameters
+        ----------
+        values
+            Values of the column.
+        Returns
+        -------
+        `dict` [ `str`, `Any` ]
+            The values of the column.
+        Notes
+        -----
+        Following the IVOA VOTable standard, an ``arraysize`` of 1 should not
+        be used.
+        """
+        if values.get("name", None) is None or values.get("datatype", None) is None:
+            # Skip bad column data that will not validate
+            return values
+        arraysize = values.get("votable:arraysize", None)
+        if arraysize is None:
+            length = values.get("length", None)
+            datatype = values.get("datatype")
+            if length is not None and length > 1:
+                # Following the IVOA standard, arraysize of 1 is disallowed
+                if datatype == "char":
+                    arraysize = str(length)
+                elif datatype in ("string", "unicode", "binary"):
+                    arraysize = f"{length}*"
+            elif datatype in ("timestamp", "text"):
+                arraysize = "*"
+            if arraysize is not None:
+                values["votable:arraysize"] = arraysize
+                logger.debug(
+                    f"Set default 'votable:arraysize' to '{arraysize}' on column '{values['name']}'"
+                    + f" with datatype '{values['datatype']}' and length '{values.get('length', None)}'"
+                )
+        else:
+            logger.debug(f"Using existing 'votable:arraysize' of '{arraysize}' on column '{values['name']}'")
+            if isinstance(values["votable:arraysize"], int):
+                logger.warning(
+                    f"Usage of an integer value for 'votable:arraysize' in column '{values['name']}' is "
+                    + "deprecated"
+                )
+                values["votable:arraysize"] = str(arraysize)
+        return values
 class Constraint(BaseObject):
     """Table constraint model."""
@@ -700,7 +754,10 @@ class SchemaIdVisitor:
         self.add(constraint)
-class Schema(BaseObject):
+T = TypeVar("T", bound=BaseObject)
+class Schema(BaseObject, Generic[T]):
     """Database schema model.
     This represents a database schema, which contains one or more tables.
@@ -942,3 +999,118 @@ class Schema(BaseObject):
             The ID of the object to check.
         """
         return id in self.id_map
+    def find_object_by_id(self, id: str, obj_type: type[T]) -> T:
+        """Find an object with the given type by its ID.
+        Parameters
+        ----------
+        id
+            The ID of the object to find.
+        obj_type
+            The type of the object to find.
+        Returns
+        -------
+        BaseObject
+            The object with the given ID and type.
+        Raises
+        ------
+        KeyError
+            If the object with the given ID is not found in the schema.
+        TypeError
+            If the object that is found does not have the right type.
+        Notes
+        -----
+        The actual return type is the user-specified argument ``T``, which is
+        expected to be a subclass of `BaseObject`.
+        """
+        obj = self[id]
+        if not isinstance(obj, obj_type):
+            raise TypeError(f"Object with ID '{id}' is not of type '{obj_type.__name__}'")
+        return obj
+    def get_table_by_column(self, column: Column) -> Table:
+        """Find the table that contains a column.
+        Parameters
+        ----------
+        column
+            The column to find.
+        Returns
+        -------
+        `Table`
+            The table that contains the column.
+        Raises
+        ------
+        ValueError
+            If the column is not found in any table.
+        """
+        for table in self.tables:
+            if column in table.columns:
+                return table
+        raise ValueError(f"Column '{column.name}' not found in any table")
+    @classmethod
+    def from_uri(cls, resource_path: ResourcePathExpression, context: dict[str, Any] = {}) -> Schema:
+        """Load a `Schema` from a string representing a ``ResourcePath``.
+        Parameters
+        ----------
+        resource_path
+            The ``ResourcePath`` pointing to a YAML file.
+        context
+            Pydantic context to be used in validation.
+        Returns
+        -------
+        `str`
+            The ID of the object.
+        Raises
+        ------
+        yaml.YAMLError
+            Raised if there is an error loading the YAML data.
+        ValueError
+            Raised if there is an error reading the resource.
+        pydantic.ValidationError
+            Raised if the schema fails validation.
+        """
+        logger.debug(f"Loading schema from: '{resource_path}'")
+        try:
+            rp_stream = ResourcePath(resource_path).read()
+        except Exception as e:
+            raise ValueError(f"Error reading resource from '{resource_path}' : {e}") from e
+        yaml_data = yaml.safe_load(rp_stream)
+        return Schema.model_validate(yaml_data, context=context)
+    @classmethod
+    def from_stream(cls, source: IO[str], context: dict[str, Any] = {}) -> Schema:
+        """Load a `Schema` from a file stream which should contain YAML data.
+        Parameters
+        ----------
+        source
+            The file stream to read from.
+        context
+            Pydantic context to be used in validation.
+        Returns
+        -------
+        `Schema`
+            The Felis schema loaded from the stream.
+        Raises
+        ------
+        yaml.YAMLError
+            Raised if there is an error loading the YAML file.
+        pydantic.ValidationError
+            Raised if the schema fails validation.
+        """
+        logger.debug("Loading schema from: '%s'", source)
+        yaml_data = yaml.safe_load(source)
+        return Schema.model_validate(yaml_data, context=context)

felis/db/utils.py CHANGED Viewed

@@ -106,6 +106,43 @@ def string_to_typeengine(
     return type_obj
+def is_mock_url(url: URL) -> bool:
+    """Check if the engine URL is a mock URL.
+    Parameters
+    ----------
+    url
+        The SQLAlchemy engine URL.
+    Returns
+    -------
+    bool
+        True if the URL is a mock URL, False otherwise.
+    """
+    return (url.drivername == "sqlite" and url.database is None) or (
+        url.drivername != "sqlite" and url.host is None
+    )
+def is_valid_engine(engine: Engine | MockConnection | None) -> bool:
+    """Check if the engine is valid.
+    The engine cannot be none; it must not be a mock connection; and it must
+    not be a mock URL which is missing a host or, for sqlite, a database name.
+    Parameters
+    ----------
+    engine
+        The SQLAlchemy engine or mock connection.
+    Returns
+    -------
+    bool
+        True if the engine is valid, False otherwise.
+    """
+    return engine is not None and not isinstance(engine, MockConnection) and not is_mock_url(engine.url)
 class SQLWriter:
     """Write SQL statements to stdout or a file.
@@ -193,12 +230,19 @@ class ConnectionWrapper:
         """
         if isinstance(statement, str):
             statement = text(statement)
-        if isinstance(self.engine, MockConnection):
+        if isinstance(self.engine, Engine):
+            try:
+                with self.engine.begin() as connection:
+                    result = connection.execute(statement)
+                    return result
+            except SQLAlchemyError as e:
+                connection.rollback()
+                logger.error(f"Error executing statement: {e}")
+                raise
+        elif isinstance(self.engine, MockConnection):
             return self.engine.connect().execute(statement)
         else:
-            with self.engine.begin() as connection:
-                result = connection.execute(statement)
-                return result
+            raise ValueError("Unsupported engine type:" + str(type(self.engine)))
 class DatabaseContext:
@@ -218,7 +262,7 @@ class DatabaseContext:
         self.engine = engine
         self.dialect_name = engine.dialect.name
         self.metadata = metadata
-        self.conn = ConnectionWrapper(engine)
+        self.connection = ConnectionWrapper(engine)
     def initialize(self) -> None:
         """Create the schema in the database if it does not exist.
@@ -240,14 +284,14 @@ class DatabaseContext:
         try:
             if self.dialect_name == "mysql":
                 logger.debug(f"Checking if MySQL database exists: {schema_name}")
-                result = self.conn.execute(text(f"SHOW DATABASES LIKE '{schema_name}'"))
+                result = self.execute(text(f"SHOW DATABASES LIKE '{schema_name}'"))
                 if result.fetchone():
                     raise ValueError(f"MySQL database '{schema_name}' already exists.")
                 logger.debug(f"Creating MySQL database: {schema_name}")
-                self.conn.execute(text(f"CREATE DATABASE {schema_name}"))
+                self.execute(text(f"CREATE DATABASE {schema_name}"))
             elif self.dialect_name == "postgresql":
                 logger.debug(f"Checking if PG schema exists: {schema_name}")
-                result = self.conn.execute(
+                result = self.execute(
                     text(
                         f"""
                         SELECT schema_name
@@ -259,7 +303,7 @@ class DatabaseContext:
                 if result.fetchone():
                     raise ValueError(f"PostgreSQL schema '{schema_name}' already exists.")
                 logger.debug(f"Creating PG schema: {schema_name}")
-                self.conn.execute(CreateSchema(schema_name))
+                self.execute(CreateSchema(schema_name))
             elif self.dialect_name == "sqlite":
                 # Just silently ignore this operation for SQLite. The database
                 # will still be created if it does not exist and the engine
@@ -285,13 +329,15 @@ class DatabaseContext:
         schema. For other variants, this is an unsupported operation.
         """
         schema_name = self.metadata.schema
+        if not self.engine.dialect.name == "sqlite" and self.metadata.schema is None:
+            raise ValueError("Schema name is required to drop the schema.")
         try:
             if self.dialect_name == "mysql":
                 logger.debug(f"Dropping MySQL database if exists: {schema_name}")
-                self.conn.execute(text(f"DROP DATABASE IF EXISTS {schema_name}"))
+                self.execute(text(f"DROP DATABASE IF EXISTS {schema_name}"))
             elif self.dialect_name == "postgresql":
                 logger.debug(f"Dropping PostgreSQL schema if exists: {schema_name}")
-                self.conn.execute(DropSchema(schema_name, if_exists=True, cascade=True))
+                self.execute(DropSchema(schema_name, if_exists=True, cascade=True))
             elif self.dialect_name == "sqlite":
                 if isinstance(self.engine, Engine):
                     logger.debug("Dropping tables in SQLite schema")
@@ -304,7 +350,21 @@ class DatabaseContext:
     def create_all(self) -> None:
         """Create all tables in the schema using the metadata object."""
-        self.metadata.create_all(self.engine)
+        if isinstance(self.engine, Engine):
+            # Use a transaction for a real connection.
+            with self.engine.begin() as conn:
+                try:
+                    self.metadata.create_all(bind=conn)
+                    conn.commit()
+                except SQLAlchemyError as e:
+                    conn.rollback()
+                    logger.error(f"Error creating tables: {e}")
+                    raise
+        elif isinstance(self.engine, MockConnection):
+            # Mock connection so no need for a transaction.
+            self.metadata.create_all(self.engine)
+        else:
+            raise ValueError("Unsupported engine type: " + str(type(self.engine)))
     @staticmethod
     def create_mock_engine(engine_url: str | URL, output_file: IO[str] | None = None) -> MockConnection:
@@ -327,3 +387,23 @@ class DatabaseContext:
         engine = create_mock_engine(engine_url, executor=writer.write, paramstyle="pyformat")
         writer.dialect = engine.dialect
         return engine
+    def execute(self, statement: Any) -> ResultProxy:
+        """Execute a SQL statement on the engine and return the result.
+        Parameters
+        ----------
+        statement
+            The SQL statement to execute.
+        Returns
+        -------
+        ``sqlalchemy.engine.ResultProxy``
+            The result of the statement execution.
+        Notes
+        -----
+        This is just a wrapper around the execution method of the connection
+        object, which may execute on a real or mock connection.
+        """
+        return self.connection.execute(statement)

lsst-felis 27.2024.4100__py3-none-any.whl → 27.2024.4300__py3-none-any.whl

Potentially problematic release.

lsst-felis 27.2024.4100py3-none-any.whl → 27.2024.4300py3-none-any.whl