PyPI - ctao-calibpipe - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ctao-calibpipe 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ctao-calibpipe might be problematic. Click here for more details.

Files changed (93) hide show

calibpipe/database/interfaces/sql_table_info.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""SQLTableInfo class."""
+from __future__ import annotations
+import sqlalchemy as sa
+from sqlalchemy.orm import declarative_base
+from sqlalchemy.schema import (
+    CheckConstraint,
+    ForeignKeyConstraint,
+    PrimaryKeyConstraint,
+    UniqueConstraint,
+)
+from ..interfaces import sql_metadata
+from .sql_column_info import SQLColumnInfo
+class InvalidTableError(Exception):
+    """Raised when a table is invalid e.g. has no primary key."""
+class SQLTableInfo:
+    """
+    Collection of attributes defining a Table's columns.
+    The class contains the column information (`SQLColumnInfo`)
+    and additional arguments required to build the sqlalchemy
+    table when the `get_table()` method is called.
+    This class can provide useful information on the corresponding
+    table. For example the primary-key or the list of undeferred
+    and deferred columns, i.e. that must be loaded directly or
+    looked up in a cache system (if implemented) respectively.
+    Note that no cache implementation lies here, only the information
+    that some columns must be deferred if possible.
+    The `SQLTableInfo` also can manage several tables of the same type
+    (e.g. for versioning, table_A_v1 && table_A_v2). When calling
+    the `get_table()` method, a custom table name can be given. The
+    object will ensure that only one table is created for a given
+    name (otherwise `sqlalchemy` cannot work properly).
+    """
+    table_base_class = declarative_base()
+    def __init__(
+        self,
+        table_name: str,
+        metadata: sql_metadata,
+        columns: list[SQLColumnInfo],
+        constraints: list[
+            ForeignKeyConstraint
+            | UniqueConstraint
+            | CheckConstraint
+            | PrimaryKeyConstraint
+        ]
+        | None = None,
+    ) -> None:
+        """Initialize the table data and sqlachemy metadata."""
+        self.table_name = table_name
+        self.metadata = metadata
+        self.columns = columns
+        self.constraints = constraints if constraints else []
+        self._table_instances: dict[str, sa.Table] = {}
+    def get_primary_keys(self) -> list[SQLColumnInfo]:
+        """Get list of primary keys for the table.
+        Returns
+        -------
+        list
+            list with SQLColumnInfo objects that are the primary keys
+        Raises
+        ------
+        InvalidTableError
+            If there are no primary key in the table
+        """
+        pk_columns = []
+        for column in self.columns:
+            if column.is_primary_key():
+                pk_columns.append(column)
+        if pk_columns:
+            return pk_columns
+        raise InvalidTableError(f"Table {self.table_name!r} has no primary key.")
+    def get_deferred_columns(self) -> list[SQLColumnInfo]:
+        """
+        Return the columns that must be deferred.
+        Deferred columns won't be loaded directly when queried.
+        """
+        return [column for column in self.columns if column.is_deferred]
+    def get_undeferred_columns(self) -> list[SQLColumnInfo]:
+        """Return the columns that must not be deferred.
+        These columns are loaded directly when queried.
+        """
+        return [column for column in self.columns if not column.is_deferred]
+    def get_table(self, table_name: str | None = None) -> sa.Table:
+        """
+        Return a table with a given name, create it if necessary.
+        Parameters
+        ----------
+        table_name: str (optional, default=None)
+            Name of the table to create. If not given, the `table_name`
+            attribute is used. If the table with the given name has
+            already been created it is returned and no new table
+            is generated.
+        """
+        table_name = table_name or self.table_name
+        if table_name not in self._table_instances:
+            if table_name in self.metadata.tables:
+                self._table_instances[table_name] = sa.Table(table_name, self.metadata)
+            else:
+                self._table_instances[table_name] = self._generate_table(
+                    table_name=table_name
+                )
+        return self._table_instances[table_name]
+    def _generate_table(self, table_name: str) -> sa.Table:
+        """Generate a table corresponding to the info with a specific name."""
+        return sa.Table(
+            table_name,
+            self.metadata,
+            *[col.generate_column() for col in self.columns],
+            *self.constraints,
+        )

calibpipe/database/interfaces/table_handler.py ADDED Viewed

@@ -0,0 +1,351 @@
+"""Utilities for CalibPipe data."""
+from datetime import datetime, timezone
+from typing import Any
+import astropy.units as u
+import numpy as np  #  # noqa: F401
+import sqlalchemy as sa
+from astropy.table import QTable
+from ctapipe.core import Container
+import calibpipe.core.common_metadata_containers as common_metadata_module
+from ...core.exceptions import DBStorageError
+from ..adapter.adapter import Adapter
+from ..adapter.database_containers.container_map import ContainerMap
+from ..adapter.database_containers.table_version_manager import TableVersionManager
+from ..connections import CalibPipeDatabase
+from ..interfaces import sql_metadata
+class TableHandler:
+    """
+    Handles tables in CalibPipe DataBase.
+    The first method returns a valid insertion for a DB, made by the table instance
+    and the values to be inserted. The second method just insert values in a DB,
+    provided the DB connection, the table and the values.
+    """
+    @staticmethod
+    def get_database_table_insertion(
+        container: Container,
+        version: str | None = None,
+    ) -> tuple[sa.Table, dict[str, Any]]:
+        """Return a valid insertion for a DB made by the table instance, and the values to insert."""
+        table, kwargs = Adapter.to_postgres(container, version=version)
+        if table is None:
+            raise TypeError(f"Table cannot be created for {type(container)}.")
+        return table, kwargs
+    @staticmethod
+    def insert_row_in_database(
+        table: sa.Table,
+        kwargs: dict[str, Any],
+        connection: CalibPipeDatabase,
+    ) -> None:
+        """Insert values in a DB table as a row."""
+        connection.execute(sa.insert(table).values(**kwargs))
+    @staticmethod
+    def read_table_from_database(
+        container: Container,
+        connection: CalibPipeDatabase,
+        condition: str | None = None,
+    ) -> QTable:
+        """
+        Read a table from the DB and return it as a QTable object.
+        An optional argument `condition` shall have the following form:
+        `c.<column_name> <operator> <value>`
+        or a combination of thereof using `&` and `|` operators.
+        In case of compound condition, every singleton must be contained in parentheses.
+        """
+        table = ContainerMap.map_to_db_container(container).get_table()
+        if condition:
+            query = table.select().where(
+                eval(condition.replace("c.", "table.c."))  # pylint: disable=eval-used
+            )
+        else:
+            query = table.select()
+        rows = connection.execute(query).fetchall()
+        if not rows:
+            return QTable(
+                names=table.columns.keys(),
+                units=[
+                    1 * u.Unit(c.comment) if c.comment else None for c in table.columns
+                ],
+            )
+        return QTable(
+            rows=rows,
+            names=table.columns.keys(),
+            units=[1 * u.Unit(c.comment) if c.comment else None for c in table.columns],
+        )
+    @staticmethod
+    def get_compatible_version(
+        version_table: sa.Table,
+        table_name: str,
+        version: str,
+        connection: CalibPipeDatabase,
+    ) -> str:
+        """
+        Get a compatible version for a certain table from the version table.
+        If no compatible version of the table is available, the new version
+        the table will be added to the version table.
+        """
+        version_major = version.split(".")[0]
+        query = sa.select(version_table.c.version).where(
+            version_table.c.version.like(f"{version_major}%"),
+            version_table.c.name == table_name,
+        )
+        query_results = connection.execute(query).first()
+        if query_results is None:
+            vals = {
+                "name": table_name,
+                "version": version,
+                "validity_start": datetime(2023, 1, 1, 0, 0, 1, tzinfo=timezone.utc),
+                "validity_end": datetime(2023, 1, 1, 0, 0, 2, tzinfo=timezone.utc),
+            }
+            TableHandler.insert_row_in_database(
+                version_table,
+                vals,
+                connection=connection,
+            )
+            return version
+        comp_version = query_results[0]
+        return comp_version
+    @staticmethod
+    def update_tables_info(
+        table: sa.Table,
+        version_table: sa.Table,
+        table_name: str,
+        comp_version: str,
+        table_version: str,
+        connection: CalibPipeDatabase,
+    ) -> str:
+        """
+        Update the tables' info.
+        Updated min and max timestamps are taken from the data table,
+        and a check on version is performed to update the version table.
+        Also, the name of the table is updated accordingly if version has changed.
+        """
+        msg = "DB tables have been updated successfully."
+        query = sa.select(
+            sa.func.min(table.c.validity_start).label("min_time"),
+            sa.func.max(table.c.validity_end).label("max_time"),
+        )
+        results = connection.execute(query).first()
+        if float(table_version.split(".")[1]) > float(comp_version.split(".")[1]):
+            TableHandler.update_version_table(
+                version_table,
+                table_name,
+                comp_version,
+                table_version,
+                results.min_time,
+                results.max_time,
+                connection,
+            )
+            TableHandler.update_table_name(table, table_version, connection)
+            return (
+                msg
+                + f" Version has been updated from v{comp_version} to v{table_version}."
+            )
+        TableHandler.update_version_table(
+            version_table,
+            table_name,
+            comp_version,
+            comp_version,
+            results.min_time,
+            results.max_time,
+            connection,
+        )
+        return msg
+    @staticmethod
+    def update_version_table(
+        version_table: sa.Table,
+        table_name: str,
+        old_version: str,
+        new_version: str,
+        min_time: datetime,
+        max_time: datetime,
+        connection: CalibPipeDatabase,
+    ) -> None:
+        """Update the version of a table with the new version in the version table of the DB."""
+        stmt = (
+            sa.update(version_table)
+            .where(
+                version_table.c.name == table_name,
+                version_table.c.version == old_version,
+            )
+            .values(version=new_version, validity_start=min_time, validity_end=max_time)
+        )
+        connection.execute(stmt)
+    @staticmethod
+    def update_table_name(
+        table: sa.Table,
+        version: str,
+        connection: CalibPipeDatabase,
+    ) -> None:
+        """Update the name of a table with the new version."""
+        new_table_name = TableVersionManager.update_version(table.name, version)
+        stmt = sa.text(f"ALTER TABLE {table} RENAME TO {new_table_name};")
+        connection.execute(stmt)
+    @staticmethod
+    def prepare_db_tables(containers, db_config):
+        """
+        Create and upload to the CalibPipe DB empty tables for selected calibration containers.
+        Parameters
+        ----------
+        containers : list[Container]
+            list of calibpipe containers or ContainerMeta instances
+            that will be created as empty tables in the DB
+        config_data : dict
+            Calibpipe configuration with database connection configuration
+        """
+        try:
+            with CalibPipeDatabase(**db_config) as connection:
+                sql_metadata.reflect(bind=connection.engine, extend_existing=True)
+                # Create empty main data tables
+                for cp_container in containers:
+                    if isinstance(cp_container, Container):
+                        db_container = ContainerMap.map_to_db_container(
+                            type(cp_container)
+                        )
+                    else:
+                        db_container = ContainerMap.map_to_db_container(cp_container)
+                    if not sa.inspect(connection.engine).has_table(
+                        db_container.table_name
+                    ):
+                        db_container.get_table()
+                sql_metadata.create_all(bind=connection.engine)
+        except sa.exc.DatabaseError:
+            raise DBStorageError("Issues with connection to the CalibPipe DB")
+    @staticmethod
+    def upload_data(calibpipe_data_container, config_data):
+        """
+        Universal function to upload data and metadata to the DB.
+        Metadata is uploaded based on values in the dictionary config_data.
+        It is possible to update fields in the dictionary while performing calibration,
+        and transfer the final metadata collection to this function.
+        Parameters
+        ----------
+        calibpipe_data_container : ctapipe.container
+            calibpipe container with data that will be uploaded to the main table of DB
+        config_data : dict
+            dictionary with configurable values,
+            should contain at least DB configuration
+            and metadata information for each metadata table.
+        Returns
+        -------
+        insertion_list : list
+            list of metadata dictionaries that were uploaded to DB
+        """
+        insertion_list = []
+        metadata_dict = {
+            container: values
+            for container, values in config_data.items()
+            if "Reference" in container
+        }
+        data_db_container = ContainerMap.map_to_db_container(
+            type(calibpipe_data_container)
+        )
+        has_autoincrement_pk = any(
+            col.autoincrement for col in data_db_container.get_table().c
+        )
+        is_single_pk = len(data_db_container.get_primary_keys()) == 1
+        # Check if there are only one autoincremented pk in the table
+        if has_autoincrement_pk and is_single_pk:
+            pk_name = data_db_container.get_primary_keys()[0].name
+            try:
+                with CalibPipeDatabase(
+                    **config_data["database_configuration"]
+                ) as connection:
+                    TableHandler.insert_row_in_database(
+                        data_db_container.get_table(),
+                        calibpipe_data_container,
+                        connection,
+                    )
+                    # Get the last uploaded DB record,
+                    # to which all metadata will be attached
+                    stmt = (
+                        sa.select(data_db_container.get_table())
+                        .order_by(sa.desc(data_db_container.get_table().c[pk_name]))
+                        .limit(1)
+                    )
+                    last_db_record = connection.execute(stmt).fetchone()
+                    data_pk_value = last_db_record._asdict()[pk_name]
+                    # We should process Reference metadata separately,
+                    # because it contains autoincremented PK
+                    # to which all other metadata are connected
+                    cp_container = getattr(
+                        common_metadata_module, "ReferenceMetadataContainer"
+                    )
+                    db_container = ContainerMap.map_to_db_container(cp_container)
+                    reference_meta_insertion = cp_container(
+                        ID_optical_throughput=data_pk_value,
+                        **config_data["ReferenceMetadataContainer"],
+                    )
+                    TableHandler.insert_row_in_database(
+                        db_container.get_table(), reference_meta_insertion, connection
+                    )
+                    # Extract value of the Reference metadata PK,
+                    # and connect to it all other metadata tables
+                    stmt = (
+                        sa.select(db_container.get_table())
+                        .order_by(sa.desc(db_container.get_table().c.ID))
+                        .limit(1)
+                    )
+                    metadata_id = connection.execute(stmt).fetchone()
+                    # Remove Reference metadata from the dict to not process it second time
+                    metadata_dict.pop("ReferenceMetadataContainer", None)
+                    # Create list with values that should be inserted
+                    # to the metadata tables in the DB
+                    for container in metadata_dict.keys():
+                        cp_container = getattr(common_metadata_module, container)
+                        insertion_list.append(
+                            cp_container(ID=metadata_id.ID, **config_data[container])
+                        )
+                    # Upload metadata values to the DB
+                    for insertion, container in zip(
+                        insertion_list, metadata_dict.keys()
+                    ):
+                        cp_container = getattr(common_metadata_module, container)
+                        db_container = ContainerMap.map_to_db_container(cp_container)
+                        TableHandler.insert_row_in_database(
+                            db_container.get_table(), insertion, connection
+                        )
+                    insertion_list = [reference_meta_insertion] + insertion_list
+            except sa.exc.DatabaseError:
+                raise DBStorageError("Issues with connection to the CalibPipe DB")
+        else:
+            raise ValueError(
+                f"Table '{data_db_container.table_name}' "
+                "doesn't contain single autoincremented primary key."
+            )
+        return insertion_list

calibpipe/database/interfaces/types.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""
+Type definitions for SQLAlchemy.
+These type definitions allow us to define database fields and
+containers being almost completely decoupled from SQLAlchemy
+(without direct coupling).
+In particular, SQLColumnInfo and SQLTableInfo use these generic
+types and not the sqlalchemy types directly.
+The NDArray type is defined explicitly to implemented the
+serialization/deserialization np.ndarray <-> bytes and the
+(optional) zlib compression/decompression on the byte data.
+"""
+import pickle
+import zlib
+import numpy as np
+import sqlalchemy as sa
+import sqlalchemy.sql.sqltypes
+from sqlalchemy.dialects.postgresql import ARRAY, DOUBLE_PRECISION
+ColumnType = sqlalchemy.sql.sqltypes.TypeEngine
+Boolean: ColumnType = sa.Boolean
+SmallInteger: ColumnType = sa.SmallInteger
+Integer: ColumnType = sa.Integer
+BigInteger: ColumnType = sa.BigInteger
+Float: ColumnType = sa.Float
+Double: ColumnType = DOUBLE_PRECISION
+Numeric: ColumnType = sa.Numeric
+Binary: ColumnType = sa.types.LargeBinary
+String: ColumnType = sa.String
+ArrayF1D: ColumnType = ARRAY(Float, dimensions=1)
+ArrayF2D: ColumnType = ARRAY(Float, dimensions=2)
+ArrayF3D: ColumnType = ARRAY(Float, dimensions=3)
+Date: ColumnType = sa.Date
+Time: ColumnType = sa.Time
+DateTime: ColumnType = sa.DateTime
+class NDArray(sa.types.TypeDecorator):  # pylint: disable=too-many-ancestors
+    """
+    Type for numpy.ndarray binding, include data compression.
+    The array is stored as a compressed byte string in the database.
+    The class implements the binding between the `np.ndarray` in the
+    program memory and the byte string stored in the DB.
+    Compression can be removed or modified, but the two process methods
+    should be the opposite of each other for the binding to work.
+    Ignoring the dialect parameter that is anyway not used, this means
+    that the following assertion should always pass::
+        db_arr: NDArray
+        arr: np.ndarray
+        arr_bytes: bytes = db_arr.process_bind_param(arr)
+        recov_arr: np.ndarray = db_arr.process_result_value(arr_bytes)
+        assert(arr == recov_arr)
+    """
+    impl = sa.types.LargeBinary  # Byte storage in the DB
+    cache_ok: bool = True  # Results of process methods can be cached
+    def process_bind_param(self, value: np.ndarray, dialect) -> bytes:
+        """
+        Serialize a np.ndarray into a byte object to store in the DB.
+        The array is first serialized into bytes and compressed using
+        the default zlib compression algorithm.
+        """
+        return zlib.compress(pickle.dumps(value))
+    def process_result_value(self, value: bytes, dialect) -> np.ndarray:
+        """
+        Deserialize a np.ndarray from bytes read in the DB.
+        The bytes are first decompressed and the array is loaded from
+        the decompressed byte string.
+        """
+        return pickle.loads(zlib.decompress(value))
+    def process_literal_param(self, value: np.ndarray, dialect) -> str:
+        """Representation of the NDArray object."""
+        return f"NDArray(shape={value.shape}, dtype={value.dtype})"
+    @property
+    def python_type(self) -> type:
+        """Return the python type of the underlying object represented by the byte string."""
+        return np.ndarray

calibpipe/tests/data/atmosphere/molecular_atmosphere/__init__.py ADDED Viewed

File without changes

calibpipe/tests/data/atmosphere/molecular_atmosphere/contemporary_MDP.ecsv ADDED Viewed

@@ -0,0 +1,34 @@
+# %ECSV 1.0
+# ---
+# datatype:
+# - {name: height, unit: m, datatype: float64}
+# - {name: number density, unit: 1 / cm3, datatype: float64}
+# schema: astropy-2.0
+height "number density"
+0.0 2.5226221253975482e+19
+1000.0 2.2612248701943214e+19
+2000.0 2.0188159791161926e+19
+3000.0 1.821777495861036e+19
+4000.0 1.6455659215053523e+19
+5000.0 1.4943555611850443e+19
+6000.0 1.3555512867555471e+19
+7000.0 1.2239497020511496e+19
+8000.0 1.1015931343575489e+19
+9000.0 9.89813592093043e+18
+10000.0 8.860096921932773e+18
+11000.0 7.904336449894107e+18
+12000.0 6.990997567438649e+18
+13000.0 6.139328580924725e+18
+14000.0 5.360396027080355e+18
+15000.0 4.644145761612367e+18
+16000.0 3.989701117077186e+18
+17000.0 3.4018252254753705e+18
+18000.0 2.88252588630463e+18
+19000.0 2.428328176278362e+18
+20000.0 2.035126555710463e+18
+21000.0 1.6988154849148237e+18
+22000.0 1.4152894242053386e+18
+23000.0 1.1804428338959007e+18
+24000.0 9.901701743004028e+17
+25000.0 8.403659057327388e+17
+26000.0 7.269244885068019e+17