PyPI - macrostrat.database - Versions diffs - 3.2.0__tar.gz → 3.3.0__tar.gz - Mend

macrostrat.database 3.2.0tar.gz → 3.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{macrostrat_database-3.2.0 → macrostrat_database-3.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: macrostrat.database
-Version: 3.2.0
+Version: 3.3.0
 Summary: A SQLAlchemy-based database toolkit.
 Author: Daven Quinn
 Author-email: dev@davenquinn.com
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: GeoAlchemy2 (>=0.14.0,<0.15.0)
 Requires-Dist: SQLAlchemy (>=2.0.18,<3.0.0)
 Requires-Dist: SQLAlchemy-Utils (>=0.41.1,<0.42.0)

macrostrat_database-3.3.0/macrostrat/database/__init__.py ADDED Viewed

@@ -0,0 +1,338 @@
+import warnings
+from contextlib import contextmanager
+from enum import Enum
+from pathlib import Path
+from typing import Optional, Union
+from psycopg2.errors import InvalidSavepointSpecification
+from psycopg2.sql import Identifier
+from sqlalchemy import URL, MetaData, create_engine, inspect, text
+from sqlalchemy.exc import IntegrityError, InternalError
+from sqlalchemy.ext.compiler import compiles
+from sqlalchemy.orm import Session, scoped_session, sessionmaker
+from sqlalchemy.sql.expression import Insert
+from macrostrat.utils import get_logger
+from .mapper import DatabaseMapper
+from .postgresql import on_conflict, prefix_inserts  # noqa
+from .utils import (  # noqa
+    create_database,
+    database_exists,
+    drop_database,
+    get_dataframe,
+    get_or_create,
+    reflect_table,
+    run_fixtures,
+    run_query,
+    run_sql,
+)
+metadata = MetaData()
+log = get_logger(__name__)
+class Database(object):
+    mapper: Optional[DatabaseMapper] = None
+    metadata: MetaData
+    session: Session
+    instance_params: dict
+    __inspector__ = None
+    def __init__(self, db_conn: Union[str, URL], *, echo_sql=False, **kwargs):
+        """
+        Wrapper for interacting with a database using SQLAlchemy.
+        Optimized for use with PostgreSQL, but usable with SQLite
+        as well.
+        Args:
+            db_conn (str): Connection string for the database.
+        Keyword Args:
+            echo_sql (bool): If True, will echo SQL commands to the
+                console. Default is False.
+            instance_params (dict): Parameters to
+                pass to queries and other database operations.
+        """
+        compiles(Insert, "postgresql")(prefix_inserts)
+        self.instance_params = kwargs.pop("instance_params", {})
+        log.info(f"Setting up database connection '{db_conn}'")
+        self.engine = create_engine(db_conn, echo=echo_sql, **kwargs)
+        self.metadata = kwargs.get("metadata", metadata)
+        # Scoped session for database
+        # https://docs.sqlalchemy.org/en/13/orm/contextual.html#unitofwork-contextual
+        # https://docs.sqlalchemy.org/en/13/orm/session_basics.html#session-faq-whentocreate
+        self._session_factory = sessionmaker(bind=self.engine)
+        self.session = scoped_session(self._session_factory)
+        # Use the self.session_scope function to more explicitly manage sessions.
+    def create_tables(self):
+        """
+        Create all tables described by the database's metadata instance.
+        """
+        metadata.create_all(bind=self.engine)
+    def automap(self, **kwargs):
+        log.info("Automapping the database")
+        self.mapper = DatabaseMapper(self)
+        self.mapper.reflect_database(**kwargs)
+    @contextmanager
+    def session_scope(self, commit=True):
+        """Provide a transactional scope around a series of operations."""
+        # self.__old_session = self.session
+        # session = self._session_factory()
+        session = self.session
+        try:
+            yield session
+            if commit:
+                session.commit()
+        except Exception as err:
+            session.rollback()
+            raise err
+        finally:
+            session.close()
+    def _flush_nested_objects(self, session):
+        """
+        Flush objects remaining in a session (generally these are objects loaded
+        during schema-based importing).
+        """
+        for object in session:
+            try:
+                session.flush(objects=[object])
+                log.debug(f"Successfully flushed instance {object}")
+            except IntegrityError as err:
+                session.rollback()
+                log.debug(err)
+    def run_sql(self, fn, params=None, **kwargs):
+        """Executes SQL files or query strings using the run_sql function.
+        Args:
+            fn (str|Path): SQL file or query string to execute.
+            params (dict): Parameters to pass to the query.
+        Keyword Args:
+            use_instance_params (bool): If True, will use the instance_params set on
+                the Database object. Default is True.
+        Returns: Iterator of results from the query.
+        """
+        params = self._setup_params(params, kwargs)
+        return iter(run_sql(self.session, fn, params, **kwargs))
+    def run_query(self, sql, params=None, **kwargs):
+        """Run a single query on the database object, returning the result.
+        Args:
+            sql (str): SQL file or query to execute.
+            params (dict): Parameters to pass to the query.
+        Keyword Args:
+            use_instance_params (bool): If True, will use the instance_params set on
+                the Database object. Default is True.
+        """
+        params = self._setup_params(params, kwargs)
+        return run_query(self.session, sql, params, **kwargs)
+    def run_fixtures(self, fixtures: Union[Path, list[Path]], params=None, **kwargs):
+        """Run a set of fixtures on the database object.
+        Args:
+            fixtures (Path|list[Path]): Path to a directory of fixtures or a list of paths to fixture files.
+            params (dict): Parameters to pass to the query.
+        Keyword Args:
+            use_instance_params (bool): If True, will use the instance_params set on
+                the Database object. Default is True.
+        """
+        params = self._setup_params(params, kwargs)
+        return run_fixtures(self.session, fixtures, params, **kwargs)
+    def _setup_params(self, params, kwargs):
+        use_instance_params = kwargs.pop("use_instance_params", True)
+        if params is None:
+            params = {}
+        if use_instance_params:
+            params.update(self.instance_params)
+        return params
+    def exec_sql(self, sql, params=None, **kwargs):
+        """Executes SQL files passed"""
+        warnings.warn(
+            "exec_sql is deprecated and will be removed in version 4.0. Use run_sql instead",
+            DeprecationWarning,
+        )
+        return self.run_sql(sql, params, **kwargs)
+    def get_dataframe(self, *args):
+        """Returns a Pandas DataFrame from a SQL query"""
+        return get_dataframe(self.engine, *args)
+    @property
+    def inspector(self):
+        if self.__inspector__ is None:
+            self.__inspector__ = inspect(self.engine)
+        return self.__inspector__
+    def entity_names(self, **kwargs):
+        """
+        Returns an iterator of names of *schema objects*
+        (both tables and views) from a the database.
+        """
+        yield from self.inspector.get_table_names(**kwargs)
+        yield from self.inspector.get_view_names(**kwargs)
+    def get(self, model, *args, **kwargs):
+        if isinstance(model, str):
+            model = getattr(self.model, model)
+        return self.session.query(model).get(*args, **kwargs)
+    def get_or_create(self, model, **kwargs):
+        """
+        Get an instance of a model, or create it if it doesn't
+        exist.
+        """
+        if isinstance(model, str):
+            model = getattr(self.model, model)
+        return get_or_create(self.session, model, **kwargs)
+    def reflect_table(self, *args, **kwargs):
+        """
+        One-off reflection of a database table or view. Note: for most purposes,
+        it will be better to use the database tables automapped at runtime using
+        `self.automap()`. Then, tables can be accessed using the
+        `self.table` object. However, this function can be useful for views (which
+        are not reflected automatically), or to customize type definitions for mapped
+        tables.
+        A set of `column_args` can be used to pass columns to override with the mapper, for
+        instance to set up foreign and primary key constraints.
+        https://docs.sqlalchemy.org/en/13/core/reflection.html#reflecting-views
+        """
+        warnings.warn(
+            "reflect_table is deprecated and will be removed in version 4.0. Shift away from table refection, or use reflect_table from the macrostrat.database.utils module.",
+            DeprecationWarning,
+        )
+        return reflect_table(self.engine, *args, **kwargs)
+    @property
+    def table(self):
+        """
+        Map of all tables in the database as SQLAlchemy table objects
+        """
+        if self.mapper is None or self.mapper._tables is None:
+            self.automap()
+        return self.mapper._tables
+    @property
+    def model(self):
+        """
+        Map of all tables in the database as SQLAlchemy models
+        https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html
+        """
+        if self.mapper is None or self.mapper._models is None:
+            self.automap()
+        return self.mapper._models
+    @property
+    def mapped_classes(self):
+        return self.model
+    @contextmanager
+    def transaction(self, *, rollback="on-error", connection=None, raise_errors=True):
+        """Create a database session that can be rolled back after use.
+        This is similar to the `session_scope` method but includes
+        more fine-grained control over transactions. The two methods may be integrated
+        in the future.
+        This is based on the Sparrow's implementation:
+        https://github.com/EarthCubeGeochron/Sparrow/blob/main/backend/conftest.py
+        It can be effectively used in a Pytest fixture like so:
+        ```
+        @fixture(scope="class")
+        def db(base_db):
+            with base_db.transaction(rollback=True):
+                yield base_db
+        """
+        if connection is None:
+            connection = self.engine.connect()
+        transaction = connection.begin()
+        session = Session(bind=connection)
+        prev_session = self.session
+        self.session = session
+        should_rollback = rollback == "always"
+        try:
+            yield self
+        except Exception as e:
+            should_rollback = rollback != "never"
+            if raise_errors:
+                raise e
+        finally:
+            if should_rollback:
+                transaction.rollback()
+            else:
+                transaction.commit()
+            session.close()
+            self.session = prev_session
+    savepoint_counter = 0
+    @contextmanager
+    def savepoint(self, name=None, rollback="on-error", connection=None):
+        """A PostgreSQL-specific savepoint context manager. This is similar to the
+        `transaction` context manager but uses savepoints directly for simpler operation.
+        Notably, it supports nested savepoints, a feature that is difficult in SQLAlchemy's `transaction`
+        model.
+        This function is not yet drop-in compatible with the `transaction` context manager, but that
+        is a future goal.
+        """
+        if name is None:
+            name = f"sp_{self.savepoint_counter}"
+            self.savepoint_counter += 1
+        _prev_session = self.session
+        if connection is None:
+            connection = self.session.connection()
+        params = {"name": Identifier(name)}
+        run_query(connection, "SAVEPOINT {name}", params)
+        should_rollback = rollback == "always"
+        self.session = Session(bind=connection)
+        try:
+            yield name
+        except Exception as e:
+            should_rollback = rollback != "never"
+            raise e
+        finally:
+            _clear_savepoint(connection, name, rollback=should_rollback)
+            self.session.close()
+            self.session = _prev_session
+def _clear_savepoint(connection, name, rollback=True):
+    params = {"name": Identifier(name)}
+    try:
+        if rollback:
+            run_query(connection, "ROLLBACK TO SAVEPOINT {name}", params)
+        else:
+            run_query(connection, "RELEASE SAVEPOINT {name}", params)
+    except InternalError as err:
+        if isinstance(err.orig, InvalidSavepointSpecification):
+            log.warning(
+                f"Savepoint {name} does not exist; we may have already rolled back."
+            )
+            run_query(connection, "ROLLBACK")

{macrostrat_database-3.2.0 → macrostrat_database-3.3.0}/macrostrat/database/transfer/move_tables.py RENAMED Viewed

@@ -4,8 +4,8 @@ from sqlalchemy.engine import Engine
 from macrostrat.utils import get_logger
-from .dump_database import _pg_dump
-from .restore_database import _pg_restore
+from .dump_database import pg_dump
+from .restore_database import pg_restore
 from .utils import print_stdout, print_stream_progress
 log = get_logger(__name__)
@@ -45,8 +45,8 @@ async def move_tables(
     log.debug(f"Dump args: {dump_args}")
     log.debug(f"Restore args: {restore_args}")
-    source = await _pg_dump(from_database, **kwargs, args=dump_args)
-    dest = await _pg_restore(to_database, **kwargs, args=restore_args)
+    source = await pg_dump(from_database, **kwargs, args=dump_args)
+    dest = await pg_restore(to_database, **kwargs, args=restore_args)
     await asyncio.gather(
         asyncio.create_task(print_stream_progress(source.stdout, dest.stdin)),

{macrostrat_database-3.2.0 → macrostrat_database-3.3.0}/macrostrat/database/utils.py RENAMED Viewed

@@ -9,6 +9,7 @@ from click import echo, secho
 from psycopg2.extensions import set_wait_callback
 from psycopg2.extras import wait_select
 from psycopg2.sql import SQL, Composable, Composed
+from rich.console import Console
 from sqlalchemy import MetaData, create_engine, text
 from sqlalchemy.engine import Connection, Engine
 from sqlalchemy.exc import (
@@ -346,6 +347,45 @@ def run_query(connectable, query, params=None, **kwargs):
     )
+def get_sql_files(
+    fixtures: Union[Path, list[Path]], recursive=False, order_by_name=True
+):
+    files = []
+    if isinstance(fixtures, Path):
+        fixtures = [fixtures]
+    for fixture in fixtures:
+        files.extend(_get_sql_files(fixture, recursive))
+    if order_by_name:
+        files = sorted(files)
+    return files
+def _get_sql_files(fixture: Path, recursive=False):
+    if not fixture.exists():
+        raise FileNotFoundError(f"Fixture {fixture} does not exist.")
+    if fixture.is_file() and fixture.suffix == ".sql":
+        return [fixture]
+    _fn = "rglob" if recursive else "glob"
+    files = getattr(fixture, _fn)("*.sql")
+    return [r for r in files if r.is_file()]
+def run_fixtures(connectable, fixtures: Union[Path, list[Path]], params=None, **kwargs):
+    """
+    Run a set of SQL fixture files on a database. Fixtures can be passed as a list of file paths or a directory.
+    Fixtures are ordered by name by default, but this can be disabled.
+    """
+    recursive = kwargs.pop("recursive", False)
+    order_by_name = kwargs.pop("order_by_name", True)
+    console = kwargs.pop("console", Console(stderr=True))
+    files = get_sql_files(fixtures, recursive=recursive, order_by_name=order_by_name)
+    for fixture in files:
+        console.print(f"[cyan bold]{fixture}[/]")
+        run_sql_file(connectable, fixture, params, **kwargs)
+        console.print()
 def run_sql(*args, **kwargs):
     """
     Run a query on a SQLAlchemy connectable.

{macrostrat_database-3.2.0 → macrostrat_database-3.3.0}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ authors = ["Daven Quinn <dev@davenquinn.com>"]
 description = "A SQLAlchemy-based database toolkit."
 name = "macrostrat.database"
 packages = [{ include = "macrostrat" }]
-version = "3.2.0"
+version = "3.3.0"
 [tool.poetry.dependencies]
 GeoAlchemy2 = "^0.14.0"

macrostrat_database-3.2.0/macrostrat/database/__init__.py DELETED Viewed

@@ -1,179 +0,0 @@
-import warnings
-from contextlib import contextmanager
-from typing import Optional
-from sqlalchemy import MetaData, create_engine, inspect, text
-from sqlalchemy.exc import IntegrityError
-from sqlalchemy.ext.compiler import compiles
-from sqlalchemy.orm import Session, scoped_session, sessionmaker
-from sqlalchemy.sql.expression import Insert
-from macrostrat.utils import get_logger
-from .mapper import DatabaseMapper
-from .postgresql import on_conflict, prefix_inserts  # noqa
-from .utils import (  # noqa
-    create_database,
-    database_exists,
-    drop_database,
-    get_dataframe,
-    get_or_create,
-    reflect_table,
-    run_query,
-    run_sql,
-)
-metadata = MetaData()
-log = get_logger(__name__)
-class Database(object):
-    mapper: Optional[DatabaseMapper] = None
-    metadata: MetaData
-    session: Session
-    __inspector__ = None
-    def __init__(self, db_conn, echo_sql=False, **kwargs):
-        """
-        We can pass a connection string, a **Flask** application object
-        with the appropriate configuration, or nothing, in which
-        case we will try to infer the correct database from
-        the SPARROW_BACKEND_CONFIG file, if available.
-        """
-        compiles(Insert, "postgresql")(prefix_inserts)
-        log.info(f"Setting up database connection '{db_conn}'")
-        self.engine = create_engine(db_conn, echo=echo_sql, **kwargs)
-        self.metadata = kwargs.get("metadata", metadata)
-        # Scoped session for database
-        # https://docs.sqlalchemy.org/en/13/orm/contextual.html#unitofwork-contextual
-        # https://docs.sqlalchemy.org/en/13/orm/session_basics.html#session-faq-whentocreate
-        self._session_factory = sessionmaker(bind=self.engine)
-        self.session = scoped_session(self._session_factory)
-        # Use the self.session_scope function to more explicitly manage sessions.
-    def create_tables(self):
-        """
-        Create all tables described by the database's metadata instance.
-        """
-        metadata.create_all(bind=self.engine)
-    def automap(self, **kwargs):
-        log.info("Automapping the database")
-        self.mapper = DatabaseMapper(self)
-        self.mapper.reflect_database(**kwargs)
-    @contextmanager
-    def session_scope(self, commit=True):
-        """Provide a transactional scope around a series of operations."""
-        # self.__old_session = self.session
-        # session = self._session_factory()
-        session = self.session
-        try:
-            yield session
-            if commit:
-                session.commit()
-        except Exception as err:
-            session.rollback()
-            raise err
-        finally:
-            session.close()
-    def _flush_nested_objects(self, session):
-        """
-        Flush objects remaining in a session (generally these are objects loaded
-        during schema-based importing).
-        """
-        for object in session:
-            try:
-                session.flush(objects=[object])
-                log.debug(f"Successfully flushed instance {object}")
-            except IntegrityError as err:
-                session.rollback()
-                log.debug(err)
-    def run_sql(self, fn, params=None, **kwargs):
-        """Executes SQL files passed"""
-        return iter(run_sql(self.session, fn, params, **kwargs))
-    def run_query(self, sql, params=None, **kwargs):
-        return run_query(self.session, sql, params, **kwargs)
-    def exec_sql(self, sql, params=None, **kwargs):
-        """Executes SQL files passed"""
-        warnings.warn("exec_sql is deprecated. Use run_sql instead", DeprecationWarning)
-        return self.run_sql(sql, params, **kwargs)
-    def get_dataframe(self, *args):
-        """Returns a Pandas DataFrame from a SQL query"""
-        return get_dataframe(self.engine, *args)
-    @property
-    def inspector(self):
-        if self.__inspector__ is None:
-            self.__inspector__ = inspect(self.engine)
-        return self.__inspector__
-    def entity_names(self, **kwargs):
-        """
-        Returns an iterator of names of *schema objects*
-        (both tables and views) from a the database.
-        """
-        yield from self.inspector.get_table_names(**kwargs)
-        yield from self.inspector.get_view_names(**kwargs)
-    def get(self, model, *args, **kwargs):
-        if isinstance(model, str):
-            model = getattr(self.model, model)
-        return self.session.query(model).get(*args, **kwargs)
-    def get_or_create(self, model, **kwargs):
-        """
-        Get an instance of a model, or create it if it doesn't
-        exist.
-        """
-        if isinstance(model, str):
-            model = getattr(self.model, model)
-        return get_or_create(self.session, model, **kwargs)
-    def reflect_table(self, *args, **kwargs):
-        """
-        One-off reflection of a database table or view. Note: for most purposes,
-        it will be better to use the database tables automapped at runtime using
-        `self.automap()`. Then, tables can be accessed using the
-        `self.table` object. However, this function can be useful for views (which
-        are not reflected automatically), or to customize type definitions for mapped
-        tables.
-        A set of `column_args` can be used to pass columns to override with the mapper, for
-        instance to set up foreign and primary key constraints.
-        https://docs.sqlalchemy.org/en/13/core/reflection.html#reflecting-views
-        """
-        return reflect_table(self.engine, *args, **kwargs)
-    @property
-    def table(self):
-        """
-        Map of all tables in the database as SQLAlchemy table objects
-        """
-        if self.mapper is None or self.mapper._tables is None:
-            self.automap()
-        return self.mapper._tables
-    @property
-    def model(self):
-        """
-        Map of all tables in the database as SQLAlchemy models
-        https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html
-        """
-        if self.mapper is None or self.mapper._models is None:
-            self.automap()
-        return self.mapper._models
-    @property
-    def mapped_classes(self):
-        return self.model