PyPI - lsst-felis - Versions diffs - 27.2024.2300__tar.gz → 27.2024.2500__tar.gz - Mend

lsst-felis 27.2024.2300tar.gz → 27.2024.2500tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lsst-felis might be problematic. Click here for more details.

Files changed (34) hide show

{lsst_felis-27.2024.2300/python/lsst_felis.egg-info → lsst_felis-27.2024.2500}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lsst-felis
-Version: 27.2024.2300
+Version: 27.2024.2500
 Summary: A vocabulary for describing catalogs and acting on those descriptions
 Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
 License: GNU General Public License v3 or later (GPLv3+)

{lsst_felis-27.2024.2300 → lsst_felis-27.2024.2500}/python/felis/cli.py RENAMED Viewed

@@ -29,14 +29,14 @@ from typing import IO
 import click
 import yaml
 from pydantic import ValidationError
-from sqlalchemy.engine import Engine, create_engine, create_mock_engine, make_url
+from sqlalchemy.engine import Engine, create_engine, make_url
 from sqlalchemy.engine.mock import MockConnection
 from . import __version__
 from .datamodel import Schema
-from .metadata import DatabaseContext, InsertDump, MetaDataBuilder
+from .db.utils import DatabaseContext
+from .metadata import MetaDataBuilder
 from .tap import Tap11Base, TapLoadingVisitor, init_tables
-from .validation import get_schema
 logger = logging.getLogger("felis")
@@ -92,29 +92,27 @@ def create(
     """Create database objects from the Felis file."""
     yaml_data = yaml.safe_load(file)
     schema = Schema.model_validate(yaml_data)
-    url_obj = make_url(engine_url)
+    url = make_url(engine_url)
     if schema_name:
         logger.info(f"Overriding schema name with: {schema_name}")
         schema.name = schema_name
-    elif url_obj.drivername == "sqlite":
+    elif url.drivername == "sqlite":
         logger.info("Overriding schema name for sqlite with: main")
         schema.name = "main"
-    if not url_obj.host and not url_obj.drivername == "sqlite":
+    if not url.host and not url.drivername == "sqlite":
         dry_run = True
         logger.info("Forcing dry run for non-sqlite engine URL with no host")
-    builder = MetaDataBuilder(schema)
-    builder.build()
-    metadata = builder.metadata
+    metadata = MetaDataBuilder(schema).build()
     logger.debug(f"Created metadata with schema name: {metadata.schema}")
     engine: Engine | MockConnection
     if not dry_run and not output_file:
-        engine = create_engine(engine_url, echo=echo)
+        engine = create_engine(url, echo=echo)
     else:
         if dry_run:
             logger.info("Dry run will be executed")
-        engine = DatabaseContext.create_mock_engine(url_obj, output_file)
+        engine = DatabaseContext.create_mock_engine(url, output_file)
         if output_file:
             logger.info("Writing SQL output to: " + output_file.name)
@@ -229,10 +227,7 @@ def load_tap(
         )
         tap_visitor.visit_schema(schema)
     else:
-        _insert_dump = InsertDump()
-        conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat")
-        # After the engine is created, update the executor with the dialect
-        _insert_dump.dialect = conn.dialect
+        conn = DatabaseContext.create_mock_engine(engine_url)
         tap_visitor = TapLoadingVisitor.from_mock_connection(
             conn,
@@ -245,42 +240,44 @@ def load_tap(
 @cli.command("validate")
+@click.option("--check-description", is_flag=True, help="Require description for all objects", default=False)
 @click.option(
-    "-s",
-    "--schema-name",
-    help="Schema name for validation",
-    type=click.Choice(["RSP", "default"]),
-    default="default",
+    "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatypes", default=False
 )
 @click.option(
-    "-d", "--require-description", is_flag=True, help="Require description for all objects", default=False
+    "--check-tap-table-indexes",
+    is_flag=True,
+    help="Check that every table has a unique TAP table index",
+    default=False,
 )
 @click.option(
-    "-t", "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatypes", default=False
+    "--check-tap-principal",
+    is_flag=True,
+    help="Check that at least one column per table is flagged as TAP principal",
+    default=False,
 )
 @click.argument("files", nargs=-1, type=click.File())
 def validate(
-    schema_name: str,
-    require_description: bool,
+    check_description: bool,
     check_redundant_datatypes: bool,
+    check_tap_table_indexes: bool,
+    check_tap_principal: bool,
     files: Iterable[io.TextIOBase],
 ) -> None:
     """Validate one or more felis YAML files."""
-    schema_class = get_schema(schema_name)
-    if schema_name != "default":
-        logger.info(f"Using schema '{schema_class.__name__}'")
     rc = 0
     for file in files:
         file_name = getattr(file, "name", None)
         logger.info(f"Validating {file_name}")
         try:
             data = yaml.load(file, Loader=yaml.SafeLoader)
-            schema_class.model_validate(
+            Schema.model_validate(
                 data,
                 context={
+                    "check_description": check_description,
                     "check_redundant_datatypes": check_redundant_datatypes,
-                    "require_description": require_description,
+                    "check_tap_table_indexes": check_tap_table_indexes,
+                    "check_tap_principal": check_tap_principal,
                 },
             )
         except ValidationError as e:

{lsst_felis-27.2024.2300 → lsst_felis-27.2024.2500}/python/felis/datamodel.py RENAMED Viewed

@@ -22,7 +22,6 @@
 from __future__ import annotations
 import logging
-import re
 from collections.abc import Mapping, Sequence
 from enum import StrEnum, auto
 from typing import Annotated, Any, Literal, TypeAlias
@@ -30,13 +29,10 @@ from typing import Annotated, Any, Literal, TypeAlias
 from astropy import units as units  # type: ignore
 from astropy.io.votable import ucd  # type: ignore
 from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator, model_validator
-from sqlalchemy import dialects
-from sqlalchemy import types as sqa_types
-from sqlalchemy.engine import create_mock_engine
-from sqlalchemy.engine.interfaces import Dialect
-from sqlalchemy.types import TypeEngine
+from .db.dialects import get_supported_dialects
 from .db.sqltypes import get_type_func
+from .db.utils import string_to_typeengine
 from .types import Boolean, Byte, Char, Double, FelisType, Float, Int, Long, Short, String, Text, Unicode
 logger = logging.getLogger(__name__)
@@ -100,7 +96,7 @@ class BaseObject(BaseModel):
     def check_description(self, info: ValidationInfo) -> BaseObject:
         """Check that the description is present if required."""
         context = info.context
-        if not context or not context.get("require_description", False):
+        if not context or not context.get("check_description", False):
             return self
         if self.description is None or self.description == "":
             raise ValueError("Description is required and must be non-empty")
@@ -127,51 +123,6 @@ class DataType(StrEnum):
     timestamp = auto()
-_DIALECTS = {
-    "mysql": create_mock_engine("mysql://", executor=None).dialect,
-    "postgresql": create_mock_engine("postgresql://", executor=None).dialect,
-}
-"""Dictionary of dialect names to SQLAlchemy dialects."""
-_DIALECT_MODULES = {"mysql": getattr(dialects, "mysql"), "postgresql": getattr(dialects, "postgresql")}
-"""Dictionary of dialect names to SQLAlchemy dialect modules."""
-_DATATYPE_REGEXP = re.compile(r"(\w+)(\((.*)\))?")
-"""Regular expression to match data types in the form "type(length)"""
-def string_to_typeengine(
-    type_string: str, dialect: Dialect | None = None, length: int | None = None
-) -> TypeEngine:
-    match = _DATATYPE_REGEXP.search(type_string)
-    if not match:
-        raise ValueError(f"Invalid type string: {type_string}")
-    type_name, _, params = match.groups()
-    if dialect is None:
-        type_class = getattr(sqa_types, type_name.upper(), None)
-    else:
-        try:
-            dialect_module = _DIALECT_MODULES[dialect.name]
-        except KeyError:
-            raise ValueError(f"Unsupported dialect: {dialect}")
-        type_class = getattr(dialect_module, type_name.upper(), None)
-    if not type_class:
-        raise ValueError(f"Unsupported type: {type_class}")
-    if params:
-        params = [int(param) if param.isdigit() else param for param in params.split(",")]
-        type_obj = type_class(*params)
-    else:
-        type_obj = type_class()
-    if hasattr(type_obj, "length") and getattr(type_obj, "length") is None and length is not None:
-        type_obj.length = length
-    return type_obj
 class Column(BaseObject):
     """A column in a table."""
@@ -257,12 +208,11 @@ class Column(BaseObject):
                 raise ValueError(f"Invalid IVOA UCD: {e}")
         return ivoa_ucd
-    @model_validator(mode="before")
-    @classmethod
-    def check_units(cls, values: dict[str, Any]) -> dict[str, Any]:
+    @model_validator(mode="after")
+    def check_units(self) -> Column:
         """Check that units are valid."""
-        fits_unit = values.get("fits:tunit")
-        ivoa_unit = values.get("ivoa:unit")
+        fits_unit = self.fits_tunit
+        ivoa_unit = self.ivoa_unit
         if fits_unit and ivoa_unit:
             raise ValueError("Column cannot have both FITS and IVOA units")
@@ -274,7 +224,7 @@ class Column(BaseObject):
             except ValueError as e:
                 raise ValueError(f"Invalid unit: {e}")
-        return values
+        return self
     @model_validator(mode="before")
     @classmethod
@@ -299,12 +249,15 @@ class Column(BaseObject):
         return values
     @model_validator(mode="after")
-    def check_datatypes(self, info: ValidationInfo) -> Column:
+    def check_redundant_datatypes(self, info: ValidationInfo) -> Column:
         """Check for redundant datatypes on columns."""
         context = info.context
         if not context or not context.get("check_redundant_datatypes", False):
             return self
-        if all(getattr(self, f"{dialect}:datatype", None) is not None for dialect in _DIALECTS.keys()):
+        if all(
+            getattr(self, f"{dialect}:datatype", None) is not None
+            for dialect in get_supported_dialects().keys()
+        ):
             return self
         datatype = self.datatype
@@ -317,7 +270,7 @@ class Column(BaseObject):
         else:
             datatype_obj = datatype_func()
-        for dialect_name, dialect in _DIALECTS.items():
+        for dialect_name, dialect in get_supported_dialects().items():
             db_annotation = f"{dialect_name}_datatype"
             if datatype_string := self.model_dump().get(db_annotation):
                 db_datatype_obj = string_to_typeengine(datatype_string, dialect, length)
@@ -465,6 +418,29 @@ class Table(BaseObject):
             raise ValueError("Column names must be unique")
         return columns
+    @model_validator(mode="after")
+    def check_tap_table_index(self, info: ValidationInfo) -> Table:
+        """Check that the table has a TAP table index."""
+        context = info.context
+        if not context or not context.get("check_tap_table_indexes", False):
+            return self
+        if self.tap_table_index is None:
+            raise ValueError("Table is missing a TAP table index")
+        return self
+    @model_validator(mode="after")
+    def check_tap_principal(self, info: ValidationInfo) -> Table:
+        """Check that at least one column is flagged as 'principal' for TAP
+        purposes.
+        """
+        context = info.context
+        if not context or not context.get("check_tap_principal", False):
+            return self
+        for col in self.columns:
+            if col.tap_principal == 1:
+                return self
+        raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'")
 class SchemaVersion(BaseModel):
     """The version of the schema."""
@@ -554,6 +530,21 @@ class Schema(BaseObject):
             raise ValueError("Table names must be unique")
         return tables
+    @model_validator(mode="after")
+    def check_tap_table_indexes(self, info: ValidationInfo) -> Schema:
+        """Check that the TAP table indexes are unique."""
+        context = info.context
+        if not context or not context.get("check_tap_table_indexes", False):
+            return self
+        table_indicies = set()
+        for table in self.tables:
+            table_index = table.tap_table_index
+            if table_index is not None:
+                if table_index in table_indicies:
+                    raise ValueError(f"Duplicate 'tap:table_index' value {table_index} found in schema")
+                table_indicies.add(table_index)
+        return self
     def _create_id_map(self: Schema) -> Schema:
         """Create a map of IDs to objects.
@@ -566,7 +557,6 @@ class Schema(BaseObject):
             return self
         visitor: SchemaIdVisitor = SchemaIdVisitor()
         visitor.visit_schema(self)
-        logger.debug(f"Created schema ID map with {len(self.id_map.keys())} objects")
         if len(visitor.duplicates):
             raise ValueError(
                 "Duplicate IDs found in schema:\n    " + "\n    ".join(visitor.duplicates) + "\n"

lsst_felis-27.2024.2500/python/felis/db/dialects.py ADDED Viewed

@@ -0,0 +1,63 @@
+# This file is part of felis.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+import logging
+from types import ModuleType
+from sqlalchemy import dialects
+from sqlalchemy.engine import Dialect
+from sqlalchemy.engine.mock import create_mock_engine
+from .sqltypes import MYSQL, ORACLE, POSTGRES, SQLITE
+logger = logging.getLogger(__name__)
+_DIALECT_NAMES = [MYSQL, POSTGRES, SQLITE, ORACLE]
+def _dialect(dialect_name: str) -> Dialect:
+    """Create the SQLAlchemy dialect for the given name."""
+    return create_mock_engine(f"{dialect_name}://", executor=None).dialect
+_DIALECTS = {name: _dialect(name) for name in _DIALECT_NAMES}
+"""Dictionary of dialect names to SQLAlchemy dialects."""
+def get_supported_dialects() -> dict[str, Dialect]:
+    """Get a dictionary of the supported SQLAlchemy dialects."""
+    return _DIALECTS
+def _dialect_module(dialect_name: str) -> ModuleType:
+    """Get the SQLAlchemy dialect module for the given name."""
+    return getattr(dialects, dialect_name)
+_DIALECT_MODULES = {name: _dialect_module(name) for name in _DIALECT_NAMES}
+"""Dictionary of dialect names to SQLAlchemy modules for type instantiation."""
+def get_dialect_module(dialect_name: str) -> ModuleType:
+    """Get the SQLAlchemy dialect module for the given name."""
+    if dialect_name not in _DIALECT_MODULES:
+        raise ValueError(f"Unsupported dialect: {dialect_name}")
+    return _DIALECT_MODULES[dialect_name]

lsst_felis-27.2024.2500/python/felis/db/utils.py ADDED Viewed

@@ -0,0 +1,248 @@
+# This file is part of felis.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from __future__ import annotations
+import logging
+import re
+from typing import IO, Any
+from sqlalchemy import MetaData, types
+from sqlalchemy.engine import Dialect, Engine, ResultProxy
+from sqlalchemy.engine.mock import MockConnection, create_mock_engine
+from sqlalchemy.engine.url import URL
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.schema import CreateSchema, DropSchema
+from sqlalchemy.sql import text
+from sqlalchemy.types import TypeEngine
+from .dialects import get_dialect_module
+logger = logging.getLogger("felis")
+_DATATYPE_REGEXP = re.compile(r"(\w+)(\((.*)\))?")
+"""Regular expression to match data types in the form "type(length)"""
+def string_to_typeengine(
+    type_string: str, dialect: Dialect | None = None, length: int | None = None
+) -> TypeEngine:
+    """Convert a string representation of a data type to a SQLAlchemy
+    TypeEngine.
+    """
+    match = _DATATYPE_REGEXP.search(type_string)
+    if not match:
+        raise ValueError(f"Invalid type string: {type_string}")
+    type_name, _, params = match.groups()
+    if dialect is None:
+        type_class = getattr(types, type_name.upper(), None)
+    else:
+        try:
+            dialect_module = get_dialect_module(dialect.name)
+        except KeyError:
+            raise ValueError(f"Unsupported dialect: {dialect}")
+        type_class = getattr(dialect_module, type_name.upper(), None)
+    if not type_class:
+        raise ValueError(f"Unsupported type: {type_class}")
+    if params:
+        params = [int(param) if param.isdigit() else param for param in params.split(",")]
+        type_obj = type_class(*params)
+    else:
+        type_obj = type_class()
+    if hasattr(type_obj, "length") and getattr(type_obj, "length") is None and length is not None:
+        type_obj.length = length
+    return type_obj
+class SQLWriter:
+    """Writes SQL statements to stdout or a file."""
+    def __init__(self, file: IO[str] | None = None) -> None:
+        """Initialize the SQL writer.
+        Parameters
+        ----------
+        file : `io.TextIOBase` or `None`, optional
+            The file to write the SQL statements to. If None, the statements
+            will be written to stdout.
+        """
+        self.file = file
+        self.dialect: Dialect | None = None
+    def write(self, sql: Any, *multiparams: Any, **params: Any) -> None:
+        """Write the SQL statement to a file or stdout.
+        Statements with parameters will be formatted with the values
+        inserted into the resultant SQL output.
+        Parameters
+        ----------
+        sql : `typing.Any`
+            The SQL statement to write.
+        multiparams : `typing.Any`
+            The multiparams to use for the SQL statement.
+        params : `typing.Any`
+            The params to use for the SQL statement.
+        """
+        compiled = sql.compile(dialect=self.dialect)
+        sql_str = str(compiled) + ";"
+        params_list = [compiled.params]
+        for params in params_list:
+            if not params:
+                print(sql_str, file=self.file)
+                continue
+            new_params = {}
+            for key, value in params.items():
+                if isinstance(value, str):
+                    new_params[key] = f"'{value}'"
+                elif value is None:
+                    new_params[key] = "null"
+                else:
+                    new_params[key] = value
+            print(sql_str % new_params, file=self.file)
+class ConnectionWrapper:
+    """A wrapper for a SQLAlchemy engine or mock connection which provides a
+    consistent interface for executing SQL statements.
+    """
+    def __init__(self, engine: Engine | MockConnection):
+        """Initialize the connection wrapper.
+        Parameters
+        ----------
+        engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
+            The SQLAlchemy engine or mock connection to wrap.
+        """
+        self.engine = engine
+    def execute(self, statement: Any) -> ResultProxy:
+        """Execute a SQL statement on the engine and return the result."""
+        if isinstance(statement, str):
+            statement = text(statement)
+        if isinstance(self.engine, MockConnection):
+            return self.engine.connect().execute(statement)
+        else:
+            with self.engine.begin() as connection:
+                result = connection.execute(statement)
+                return result
+class DatabaseContext:
+    """A class for managing the schema and its database connection."""
+    def __init__(self, metadata: MetaData, engine: Engine | MockConnection):
+        """Initialize the database context.
+        Parameters
+        ----------
+        metadata : `sqlalchemy.MetaData`
+            The SQLAlchemy metadata object.
+        engine : `sqlalchemy.Engine` or `sqlalchemy.MockConnection`
+            The SQLAlchemy engine or mock connection object.
+        """
+        self.engine = engine
+        self.dialect_name = engine.dialect.name
+        self.metadata = metadata
+        self.conn = ConnectionWrapper(engine)
+    def create_if_not_exists(self) -> None:
+        """Create the schema in the database if it does not exist.
+        In MySQL, this will create a new database. In PostgreSQL, it will
+        create a new schema. For other variants, this is an unsupported
+        operation.
+        Parameters
+        ----------
+        engine: `sqlalchemy.Engine`
+            The SQLAlchemy engine object.
+        schema_name: `str`
+            The name of the schema (or database) to create.
+        """
+        schema_name = self.metadata.schema
+        try:
+            if self.dialect_name == "mysql":
+                logger.debug(f"Creating MySQL database: {schema_name}")
+                self.conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {schema_name}"))
+            elif self.dialect_name == "postgresql":
+                logger.debug(f"Creating PG schema: {schema_name}")
+                self.conn.execute(CreateSchema(schema_name, if_not_exists=True))
+            else:
+                raise ValueError("Unsupported database type:" + self.dialect_name)
+        except SQLAlchemyError as e:
+            logger.error(f"Error creating schema: {e}")
+            raise
+    def drop_if_exists(self) -> None:
+        """Drop the schema in the database if it exists.
+        In MySQL, this will drop a database. In PostgreSQL, it will drop a
+        schema. For other variants, this is unsupported for now.
+        Parameters
+        ----------
+        engine: `sqlalchemy.Engine`
+            The SQLAlchemy engine object.
+        schema_name: `str`
+            The name of the schema (or database) to drop.
+        """
+        schema_name = self.metadata.schema
+        try:
+            if self.dialect_name == "mysql":
+                logger.debug(f"Dropping MySQL database if exists: {schema_name}")
+                self.conn.execute(text(f"DROP DATABASE IF EXISTS {schema_name}"))
+            elif self.dialect_name == "postgresql":
+                logger.debug(f"Dropping PostgreSQL schema if exists: {schema_name}")
+                self.conn.execute(DropSchema(schema_name, if_exists=True, cascade=True))
+            else:
+                raise ValueError(f"Unsupported database type: {self.dialect_name}")
+        except SQLAlchemyError as e:
+            logger.error(f"Error dropping schema: {e}")
+            raise
+    def create_all(self) -> None:
+        """Create all tables in the schema using the metadata object."""
+        self.metadata.create_all(self.engine)
+    @staticmethod
+    def create_mock_engine(engine_url: str | URL, output_file: IO[str] | None = None) -> MockConnection:
+        """Create a mock engine for testing or dumping DDL statements.
+        Parameters
+        ----------
+        engine_url : `sqlalchemy.engine.url.URL`
+            The SQLAlchemy engine URL.
+        output_file : `typing.IO` [ `str` ] or `None`, optional
+            The file to write the SQL statements to. If None, the statements
+            will be written to stdout.
+        """
+        writer = SQLWriter(output_file)
+        engine = create_mock_engine(engine_url, executor=writer.write)
+        writer.dialect = engine.dialect
+        return engine

lsst-felis 27.2024.2300__tar.gz → 27.2024.2500__tar.gz

Potentially problematic release.

lsst-felis 27.2024.2300tar.gz → 27.2024.2500tar.gz