PyPI - lsst-felis - Versions diffs - 26.2024.1500__tar.gz → 26.2024.1700__tar.gz - Mend

lsst-felis 26.2024.1500tar.gz → 26.2024.1700tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lsst-felis might be problematic. Click here for more details.

Files changed (39) hide show

{lsst-felis-26.2024.1500/python/lsst_felis.egg-info → lsst_felis-26.2024.1700}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,11 @@
 Metadata-Version: 2.1
 Name: lsst-felis
-Version: 26.2024.1500
+Version: 26.2024.1700
 Summary: A vocabulary for describing catalogs and acting on those descriptions
 Author-email: Rubin Observatory Data Management <dm-admin@lists.lsst.org>
 License: GNU General Public License v3 or later (GPLv3+)
-Project-URL: Homepage, https://github.com/lsst/felis
+Project-URL: Homepage, https://felis.lsst.io
+Project-URL: Source, https://github.com/lsst/felis
 Keywords: lsst
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
@@ -26,3 +27,5 @@ Requires-Dist: pydantic<3,>=2
 Requires-Dist: lsst-utils
 Provides-Extra: test
 Requires-Dist: pytest>=3.2; extra == "test"
+Provides-Extra: dev
+Requires-Dist: documenteer[guide]; extra == "dev"

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/pyproject.toml RENAMED Viewed

@@ -33,12 +33,16 @@ requires-python = ">=3.11.0"
 dynamic = ["version"]
 [project.urls]
-"Homepage" = "https://github.com/lsst/felis"
+Homepage = "https://felis.lsst.io"
+Source = "https://github.com/lsst/felis"
 [project.optional-dependencies]
 test = [
     "pytest >= 3.2"
 ]
+dev = [
+    "documenteer[guide]"
+]
 [tool.pytest.ini_options]
@@ -143,11 +147,6 @@ select = [
     "D",  # pydocstyle
 ]
 target-version = "py311"
-# Commented out to suppress "unused noqa" in jenkins which has older ruff not
-# generating E721.
-extend-select = [
-    "RUF100", # Warn about unused noqa
-]
 [tool.pydeps]
 max_bacon = 2

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/python/felis/cli.py RENAMED Viewed

@@ -183,6 +183,7 @@ def init_tap(
 @click.option("--tap-columns-table", help="Alt Table Name for TAP_SCHEMA.columns")
 @click.option("--tap-keys-table", help="Alt Table Name for TAP_SCHEMA.keys")
 @click.option("--tap-key-columns-table", help="Alt Table Name for TAP_SCHEMA.key_columns")
+@click.option("--tap-schema-index", type=int, help="TAP_SCHEMA index of the schema")
 @click.argument("file", type=click.File())
 def load_tap(
     engine_url: str,
@@ -196,6 +197,7 @@ def load_tap(
     tap_columns_table: str,
     tap_keys_table: str,
     tap_key_columns_table: str,
+    tap_schema_index: int,
     file: io.TextIOBase,
 ) -> None:
     """Load TAP metadata from a Felis FILE.
@@ -203,28 +205,8 @@ def load_tap(
     This command loads the associated TAP metadata from a Felis FILE
     to the TAP_SCHEMA tables.
     """
-    top_level_object = yaml.load(file, Loader=yaml.SafeLoader)
-    schema_obj: dict
-    if isinstance(top_level_object, dict):
-        schema_obj = top_level_object
-        if "@graph" not in schema_obj:
-            schema_obj["@type"] = "felis:Schema"
-        schema_obj["@context"] = DEFAULT_CONTEXT
-    elif isinstance(top_level_object, list):
-        schema_obj = {"@context": DEFAULT_CONTEXT, "@graph": top_level_object}
-    else:
-        logger.error("Schema object not of recognizable type")
-        raise click.exceptions.Exit(1)
-    normalized = _normalize(schema_obj, embed="@always")
-    if len(normalized["@graph"]) > 1 and (schema_name or catalog_name):
-        logger.error("--schema-name and --catalog-name incompatible with multiple schemas")
-        raise click.exceptions.Exit(1)
-    # Force normalized["@graph"] to a list, which is what happens when there's
-    # multiple schemas
-    if isinstance(normalized["@graph"], dict):
-        normalized["@graph"] = [normalized["@graph"]]
+    yaml_data = yaml.load(file, Loader=yaml.SafeLoader)
+    schema = Schema.model_validate(yaml_data)
     tap_tables = init_tables(
         tap_schema_name,
@@ -243,28 +225,28 @@ def load_tap(
             # In Memory SQLite - Mostly used to test
             Tap11Base.metadata.create_all(engine)
-        for schema in normalized["@graph"]:
-            tap_visitor = TapLoadingVisitor(
-                engine,
-                catalog_name=catalog_name,
-                schema_name=schema_name,
-                tap_tables=tap_tables,
-            )
-            tap_visitor.visit_schema(schema)
+        tap_visitor = TapLoadingVisitor(
+            engine,
+            catalog_name=catalog_name,
+            schema_name=schema_name,
+            tap_tables=tap_tables,
+            tap_schema_index=tap_schema_index,
+        )
+        tap_visitor.visit_schema(schema)
     else:
         _insert_dump = InsertDump()
         conn = create_mock_engine(make_url(engine_url), executor=_insert_dump.dump, paramstyle="pyformat")
         # After the engine is created, update the executor with the dialect
         _insert_dump.dialect = conn.dialect
-        for schema in normalized["@graph"]:
-            tap_visitor = TapLoadingVisitor.from_mock_connection(
-                conn,
-                catalog_name=catalog_name,
-                schema_name=schema_name,
-                tap_tables=tap_tables,
-            )
-            tap_visitor.visit_schema(schema)
+        tap_visitor = TapLoadingVisitor.from_mock_connection(
+            conn,
+            catalog_name=catalog_name,
+            schema_name=schema_name,
+            tap_tables=tap_tables,
+            tap_schema_index=tap_schema_index,
+        )
+        tap_visitor.visit_schema(schema)
 @cli.command("modify-tap")
@@ -373,22 +355,37 @@ def merge(files: Iterable[io.TextIOBase]) -> None:
     type=click.Choice(["RSP", "default"]),
     default="default",
 )
-@click.option("-d", "--require-description", is_flag=True, help="Require description for all objects")
+@click.option(
+    "-d", "--require-description", is_flag=True, help="Require description for all objects", default=False
+)
+@click.option(
+    "-t", "--check-redundant-datatypes", is_flag=True, help="Check for redundant datatypes", default=False
+)
 @click.argument("files", nargs=-1, type=click.File())
-def validate(schema_name: str, require_description: bool, files: Iterable[io.TextIOBase]) -> None:
+def validate(
+    schema_name: str,
+    require_description: bool,
+    check_redundant_datatypes: bool,
+    files: Iterable[io.TextIOBase],
+) -> None:
     """Validate one or more felis YAML files."""
     schema_class = get_schema(schema_name)
-    logger.info(f"Using schema '{schema_class.__name__}'")
-    if require_description:
-        Schema.require_description(True)
+    if schema_name != "default":
+        logger.info(f"Using schema '{schema_class.__name__}'")
     rc = 0
     for file in files:
         file_name = getattr(file, "name", None)
         logger.info(f"Validating {file_name}")
         try:
-            schema_class.model_validate(yaml.load(file, Loader=yaml.SafeLoader))
+            data = yaml.load(file, Loader=yaml.SafeLoader)
+            schema_class.model_validate(
+                data,
+                context={
+                    "check_redundant_datatypes": check_redundant_datatypes,
+                    "require_description": require_description,
+                },
+            )
         except ValidationError as e:
             logger.error(e)
             rc = 1

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/python/felis/datamodel.py RENAMED Viewed

@@ -22,13 +22,22 @@
 from __future__ import annotations
 import logging
+import re
 from collections.abc import Mapping, Sequence
-from enum import Enum
+from enum import StrEnum, auto
 from typing import Annotated, Any, Literal, TypeAlias
 from astropy import units as units  # type: ignore
 from astropy.io.votable import ucd  # type: ignore
-from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator, model_validator
+from sqlalchemy import dialects
+from sqlalchemy import types as sqa_types
+from sqlalchemy.engine import create_mock_engine
+from sqlalchemy.engine.interfaces import Dialect
+from sqlalchemy.types import TypeEngine
+from .db.sqltypes import get_type_func
+from .types import FelisType
 logger = logging.getLogger(__name__)
@@ -49,7 +58,6 @@ __all__ = (
 CONFIG = ConfigDict(
     populate_by_name=True,  # Populate attributes by name.
     extra="forbid",  # Do not allow extra fields.
-    validate_assignment=True,  # Validate assignments after model is created.
     str_strip_whitespace=True,  # Strip whitespace from string fields.
 )
 """Pydantic model configuration as described in:
@@ -83,40 +91,85 @@ class BaseObject(BaseModel):
     """
     description: DescriptionStr | None = None
-    """A description of the database object.
+    """A description of the database object."""
-    By default, the description is optional but will be required if
-    `BaseObject.Config.require_description` is set to `True` by the user.
-    """
+    votable_utype: str | None = Field(None, alias="votable:utype")
+    """The VOTable utype (usage-specific or unique type) of the object."""
-    @model_validator(mode="before")
-    @classmethod
-    def check_description(cls, values: dict[str, Any]) -> dict[str, Any]:
+    @model_validator(mode="after")
+    def check_description(self, info: ValidationInfo) -> BaseObject:
         """Check that the description is present if required."""
-        if Schema.is_description_required():
-            if "description" not in values or not values["description"]:
-                raise ValueError("Description is required and must be non-empty")
-            if len(values["description"].strip()) < DESCR_MIN_LENGTH:
-                raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
-        return values
+        context = info.context
+        if not context or not context.get("require_description", False):
+            return self
+        if self.description is None or self.description == "":
+            raise ValueError("Description is required and must be non-empty")
+        if len(self.description) < DESCR_MIN_LENGTH:
+            raise ValueError(f"Description must be at least {DESCR_MIN_LENGTH} characters long")
+        return self
-class DataType(Enum):
+class DataType(StrEnum):
     """`Enum` representing the data types supported by Felis."""
-    BOOLEAN = "boolean"
-    BYTE = "byte"
-    SHORT = "short"
-    INT = "int"
-    LONG = "long"
-    FLOAT = "float"
-    DOUBLE = "double"
-    CHAR = "char"
-    STRING = "string"
-    UNICODE = "unicode"
-    TEXT = "text"
-    BINARY = "binary"
-    TIMESTAMP = "timestamp"
+    boolean = auto()
+    byte = auto()
+    short = auto()
+    int = auto()
+    long = auto()
+    float = auto()
+    double = auto()
+    char = auto()
+    string = auto()
+    unicode = auto()
+    text = auto()
+    binary = auto()
+    timestamp = auto()
+_DIALECTS = {
+    "mysql": create_mock_engine("mysql://", executor=None).dialect,
+    "postgresql": create_mock_engine("postgresql://", executor=None).dialect,
+}
+"""Dictionary of dialect names to SQLAlchemy dialects."""
+_DIALECT_MODULES = {"mysql": getattr(dialects, "mysql"), "postgresql": getattr(dialects, "postgresql")}
+"""Dictionary of dialect names to SQLAlchemy dialect modules."""
+_DATATYPE_REGEXP = re.compile(r"(\w+)(\((.*)\))?")
+"""Regular expression to match data types in the form "type(length)"""
+def string_to_typeengine(
+    type_string: str, dialect: Dialect | None = None, length: int | None = None
+) -> TypeEngine:
+    match = _DATATYPE_REGEXP.search(type_string)
+    if not match:
+        raise ValueError(f"Invalid type string: {type_string}")
+    type_name, _, params = match.groups()
+    if dialect is None:
+        type_class = getattr(sqa_types, type_name.upper(), None)
+    else:
+        try:
+            dialect_module = _DIALECT_MODULES[dialect.name]
+        except KeyError:
+            raise ValueError(f"Unsupported dialect: {dialect}")
+        type_class = getattr(dialect_module, type_name.upper(), None)
+    if not type_class:
+        raise ValueError(f"Unsupported type: {type_class}")
+    if params:
+        params = [int(param) if param.isdigit() else param for param in params.split(",")]
+        type_obj = type_class(*params)
+    else:
+        type_obj = type_class()
+    if hasattr(type_obj, "length") and getattr(type_obj, "length") is None and length is not None:
+        type_obj.length = length
+    return type_obj
 class Column(BaseObject):
@@ -128,13 +181,8 @@ class Column(BaseObject):
     length: int | None = None
     """The length of the column."""
-    nullable: bool | None = None
-    """Whether the column can be ``NULL``.
-    If `None`, this value was not set explicitly in the YAML data. In this
-    case, it will be set to `False` for columns with numeric types and `True`
-    otherwise.
-    """
+    nullable: bool = True
+    """Whether the column can be ``NULL``."""
     value: Any = None
     """The default value of the column."""
@@ -171,12 +219,12 @@ class Column(BaseObject):
     """TAP_SCHEMA indication that this column is defined by an IVOA standard.
     """
-    votable_utype: str | None = Field(None, alias="votable:utype")
-    """The VOTable utype (usage-specific or unique type) of the column."""
     votable_xtype: str | None = Field(None, alias="votable:xtype")
     """The VOTable xtype (extended type) of the column."""
+    votable_datatype: str | None = Field(None, alias="votable:datatype")
+    """The VOTable datatype of the column."""
     @field_validator("ivoa_ucd")
     @classmethod
     def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
@@ -207,6 +255,57 @@ class Column(BaseObject):
         return values
+    @model_validator(mode="after")  # type: ignore[arg-type]
+    @classmethod
+    def validate_datatypes(cls, col: Column, info: ValidationInfo) -> Column:
+        """Check for redundant datatypes on columns."""
+        context = info.context
+        if not context or not context.get("check_redundant_datatypes", False):
+            return col
+        if all(getattr(col, f"{dialect}:datatype", None) is not None for dialect in _DIALECTS.keys()):
+            return col
+        datatype = col.datatype
+        length: int | None = col.length or None
+        datatype_func = get_type_func(datatype)
+        felis_type = FelisType.felis_type(datatype)
+        if felis_type.is_sized:
+            if length is not None:
+                datatype_obj = datatype_func(length)
+            else:
+                raise ValueError(f"Length must be provided for sized type '{datatype}' in column '{col.id}'")
+        else:
+            datatype_obj = datatype_func()
+        for dialect_name, dialect in _DIALECTS.items():
+            db_annotation = f"{dialect_name}_datatype"
+            if datatype_string := col.model_dump().get(db_annotation):
+                db_datatype_obj = string_to_typeengine(datatype_string, dialect, length)
+                if datatype_obj.compile(dialect) == db_datatype_obj.compile(dialect):
+                    raise ValueError(
+                        "'{}: {}' is a redundant override of 'datatype: {}' in column '{}'{}".format(
+                            db_annotation,
+                            datatype_string,
+                            col.datatype,
+                            col.id,
+                            "" if length is None else f" with length {length}",
+                        )
+                    )
+                else:
+                    logger.debug(
+                        "Type override of 'datatype: {}' with '{}: {}' in column '{}' "
+                        "compiled to '{}' and '{}'".format(
+                            col.datatype,
+                            db_annotation,
+                            datatype_string,
+                            col.id,
+                            datatype_obj.compile(dialect),
+                            db_datatype_obj.compile(dialect),
+                        )
+                    )
+        return col
 class Constraint(BaseObject):
     """A database table constraint."""
@@ -404,15 +503,6 @@ class SchemaIdVisitor:
 class Schema(BaseObject):
     """The database schema containing the tables."""
-    class ValidationConfig:
-        """Validation configuration which is specific to Felis."""
-        _require_description = False
-        """Flag to require a description for all objects.
-        This is set by the `require_description` class method.
-        """
     version: SchemaVersion | str | None = None
     """The version of the schema."""
@@ -430,21 +520,29 @@ class Schema(BaseObject):
             raise ValueError("Table names must be unique")
         return tables
-    @model_validator(mode="after")
-    def create_id_map(self: Schema) -> Schema:
-        """Create a map of IDs to objects."""
+    def _create_id_map(self: Schema) -> Schema:
+        """Create a map of IDs to objects.
+        This method should not be called by users. It is called automatically
+        by the ``model_post_init()`` method. If the ID map is already
+        populated, this method will return immediately.
+        """
         if len(self.id_map):
-            logger.debug("ID map was already populated")
+            logger.debug("Ignoring call to create_id_map() - ID map was already populated")
             return self
         visitor: SchemaIdVisitor = SchemaIdVisitor()
         visitor.visit_schema(self)
-        logger.debug(f"ID map contains {len(self.id_map.keys())} objects")
+        logger.debug(f"Created schema ID map with {len(self.id_map.keys())} objects")
         if len(visitor.duplicates):
             raise ValueError(
                 "Duplicate IDs found in schema:\n    " + "\n    ".join(visitor.duplicates) + "\n"
             )
         return self
+    def model_post_init(self, ctx: Any) -> None:
+        """Post-initialization hook for the model."""
+        self._create_id_map()
     def __getitem__(self, id: str) -> BaseObject:
         """Get an object by its ID."""
         if id not in self:
@@ -454,20 +552,3 @@ class Schema(BaseObject):
     def __contains__(self, id: str) -> bool:
         """Check if an object with the given ID is in the schema."""
         return id in self.id_map
-    @classmethod
-    def require_description(cls, rd: bool = True) -> None:
-        """Set whether a description is required for all objects.
-        This includes the schema, tables, columns, and constraints.
-        Users should call this method to set the requirement for a description
-        when validating schemas, rather than change the flag value directly.
-        """
-        logger.debug(f"Setting description requirement to '{rd}'")
-        cls.ValidationConfig._require_description = rd
-    @classmethod
-    def is_description_required(cls) -> bool:
-        """Return whether a description is required for all objects."""
-        return cls.ValidationConfig._require_description

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/python/felis/db/_variants.py RENAMED Viewed

@@ -40,10 +40,10 @@ TABLE_OPTS = {
 }
 COLUMN_VARIANT_OVERRIDE = {
-    "mysql:datatype": "mysql",
-    "oracle:datatype": "oracle",
-    "postgresql:datatype": "postgresql",
-    "sqlite:datatype": "sqlite",
+    "mysql_datatype": "mysql",
+    "oracle_datatype": "oracle",
+    "postgresql_datatype": "postgresql",
+    "sqlite_datatype": "sqlite",
 }
 DIALECT_MODULES = {MYSQL: mysql, ORACLE: oracle, SQLITE: sqlite, POSTGRES: postgresql}
@@ -87,7 +87,7 @@ def make_variant_dict(column_obj: Column) -> dict[str, TypeEngine[Any]]:
     """
     variant_dict = {}
     for field_name, value in iter(column_obj):
-        if field_name in COLUMN_VARIANT_OVERRIDE:
+        if field_name in COLUMN_VARIANT_OVERRIDE and value is not None:
             dialect = COLUMN_VARIANT_OVERRIDE[field_name]
             variant: TypeEngine = process_variant_override(dialect, value)
             variant_dict[dialect] = variant

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/python/felis/db/sqltypes.py RENAMED Viewed

@@ -21,9 +21,9 @@
 import builtins
 from collections.abc import Mapping
-from typing import Any
+from typing import Any, Callable
-from sqlalchemy import Float, SmallInteger, types
+from sqlalchemy import SmallInteger, types
 from sqlalchemy.dialects import mysql, oracle, postgresql
 from sqlalchemy.ext.compiler import compiles
@@ -39,27 +39,15 @@ class TINYINT(SmallInteger):
     __visit_name__ = "TINYINT"
-class DOUBLE(Float):
-    """The non-standard DOUBLE type."""
-    __visit_name__ = "DOUBLE"
 @compiles(TINYINT)
 def compile_tinyint(type_: Any, compiler: Any, **kw: Any) -> str:
     """Return type name for TINYINT."""
     return "TINYINT"
-@compiles(DOUBLE)
-def compile_double(type_: Any, compiler: Any, **kw: Any) -> str:
-    """Return type name for double precision type."""
-    return "DOUBLE"
 _TypeMap = Mapping[str, types.TypeEngine | type[types.TypeEngine]]
-boolean_map: _TypeMap = {MYSQL: mysql.BIT(1), ORACLE: oracle.NUMBER(1), POSTGRES: postgresql.BOOLEAN()}
+boolean_map: _TypeMap = {MYSQL: mysql.BOOLEAN, ORACLE: oracle.NUMBER(1), POSTGRES: postgresql.BOOLEAN()}
 byte_map: _TypeMap = {
     MYSQL: mysql.TINYINT(),
@@ -160,7 +148,7 @@ def float(**kwargs: Any) -> types.TypeEngine:
 def double(**kwargs: Any) -> types.TypeEngine:
     """Return SQLAlchemy type for double precision float."""
-    return _vary(DOUBLE(), double_map, kwargs)
+    return _vary(types.DOUBLE(), double_map, kwargs)
 def char(length: builtins.int, **kwargs: Any) -> types.TypeEngine:
@@ -178,9 +166,9 @@ def unicode(length: builtins.int, **kwargs: Any) -> types.TypeEngine:
     return _vary(types.NVARCHAR(length), unicode_map, kwargs, length)
-def text(length: builtins.int, **kwargs: Any) -> types.TypeEngine:
+def text(**kwargs: Any) -> types.TypeEngine:
     """Return SQLAlchemy type for text."""
-    return _vary(types.CLOB(length), text_map, kwargs, length)
+    return _vary(types.TEXT(), text_map, kwargs)
 def binary(length: builtins.int, **kwargs: Any) -> types.TypeEngine:
@@ -193,6 +181,13 @@ def timestamp(**kwargs: Any) -> types.TypeEngine:
     return types.TIMESTAMP()
+def get_type_func(type_name: str) -> Callable:
+    """Return the function for the type with the given name."""
+    if type_name not in globals():
+        raise ValueError(f"Unknown type: {type_name}")
+    return globals()[type_name]
 def _vary(
     type_: types.TypeEngine,
     variant_map: _TypeMap,
@@ -203,7 +198,7 @@ def _vary(
     variants.update(overrides)
     for dialect, variant in variants.items():
         # If this is a class and not an instance, instantiate
-        if isinstance(variant, type):
+        if callable(variant):
             variant = variant(*args)
         type_ = type_.with_variant(variant, dialect)
     return type_

{lsst-felis-26.2024.1500 → lsst_felis-26.2024.1700}/python/felis/metadata.py RENAMED Viewed

@@ -34,7 +34,6 @@ from sqlalchemy import (
     ForeignKeyConstraint,
     Index,
     MetaData,
-    Numeric,
     PrimaryKeyConstraint,
     ResultProxy,
     Table,
@@ -265,17 +264,12 @@ class MetaDataBuilder:
         id = column_obj.id
         description = column_obj.description
         default = column_obj.value
+        nullable = column_obj.nullable
-        # Handle variant overrides for the column (e.g., "mysql:datatype").
+        # Get datatype, handling variant overrides such as "mysql:datatype".
         datatype = get_datatype_with_variants(column_obj)
-        # Set default value of nullable based on column type and then whether
-        # it was explicitly provided in the schema data.
-        nullable = column_obj.nullable
-        if nullable is None:
-            nullable = False if isinstance(datatype, Numeric) else True
-        # Set autoincrement depending on if it was provided explicitly.
+        # Set autoincrement, depending on if it was provided explicitly.
         autoincrement: Literal["auto"] | bool = (
             column_obj.autoincrement if column_obj.autoincrement is not None else "auto"
         )

lsst-felis 26.2024.1500__tar.gz → 26.2024.1700__tar.gz

Potentially problematic release.

lsst-felis 26.2024.1500tar.gz → 26.2024.1700tar.gz