PyPI - PostBOUND - Versions diffs - 0.19.0__py3-none-any.whl - Mend

PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

postbound/__init__.py +211 -0
postbound/_base.py +6 -0
postbound/_bench.py +1012 -0
postbound/_core.py +1153 -0
postbound/_hints.py +1373 -0
postbound/_jointree.py +1079 -0
postbound/_pipelines.py +1121 -0
postbound/_qep.py +1986 -0
postbound/_stages.py +876 -0
postbound/_validation.py +734 -0
postbound/db/__init__.py +72 -0
postbound/db/_db.py +2348 -0
postbound/db/_duckdb.py +785 -0
postbound/db/mysql.py +1195 -0
postbound/db/postgres.py +4216 -0
postbound/experiments/__init__.py +12 -0
postbound/experiments/analysis.py +674 -0
postbound/experiments/benchmarking.py +54 -0
postbound/experiments/ceb.py +877 -0
postbound/experiments/interactive.py +105 -0
postbound/experiments/querygen.py +334 -0
postbound/experiments/workloads.py +980 -0
postbound/optimizer/__init__.py +92 -0
postbound/optimizer/__init__.pyi +73 -0
postbound/optimizer/_cardinalities.py +369 -0
postbound/optimizer/_joingraph.py +1150 -0
postbound/optimizer/dynprog.py +1825 -0
postbound/optimizer/enumeration.py +432 -0
postbound/optimizer/native.py +539 -0
postbound/optimizer/noopt.py +54 -0
postbound/optimizer/presets.py +147 -0
postbound/optimizer/randomized.py +650 -0
postbound/optimizer/tonic.py +1479 -0
postbound/optimizer/ues.py +1607 -0
postbound/qal/__init__.py +343 -0
postbound/qal/_qal.py +9678 -0
postbound/qal/formatter.py +1089 -0
postbound/qal/parser.py +2344 -0
postbound/qal/relalg.py +4257 -0
postbound/qal/transform.py +2184 -0
postbound/shortcuts.py +70 -0
postbound/util/__init__.py +46 -0
postbound/util/_errors.py +33 -0
postbound/util/collections.py +490 -0
postbound/util/dataframe.py +71 -0
postbound/util/dicts.py +330 -0
postbound/util/jsonize.py +68 -0
postbound/util/logging.py +106 -0
postbound/util/misc.py +168 -0
postbound/util/networkx.py +401 -0
postbound/util/numbers.py +438 -0
postbound/util/proc.py +107 -0
postbound/util/stats.py +37 -0
postbound/util/system.py +48 -0
postbound/util/typing.py +35 -0
postbound/vis/__init__.py +5 -0
postbound/vis/fdl.py +69 -0
postbound/vis/graphs.py +48 -0
postbound/vis/optimizer.py +538 -0
postbound/vis/plots.py +84 -0
postbound/vis/tonic.py +70 -0
postbound/vis/trees.py +105 -0
postbound-0.19.0.dist-info/METADATA +355 -0
postbound-0.19.0.dist-info/RECORD +67 -0
postbound-0.19.0.dist-info/WHEEL +5 -0
postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
postbound-0.19.0.dist-info/top_level.txt +1 -0

postbound/shortcuts.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Shortcuts provide simple methods to generate instances of different PostBOUND objects, mostly for REPL contexts."""
+from __future__ import annotations
+from . import qal
+from ._core import ColumnReference, TableReference
+def tab(table: str) -> TableReference:
+    """Creates a table instance.
+    Parameters
+    ----------
+    table : str
+        The name and/or alias of the table. Supported formats include ``"table_name"`` and ``"table_name alias"``
+    Returns
+    -------
+    TableReference
+        The resulting table. This will never be a virtual table.
+    """
+    if " " in table:
+        full_name, alias = table.split(" ")
+        return TableReference(full_name, alias)
+    else:
+        return TableReference(table)
+def col(column: str) -> ColumnReference:
+    """Creates a column instance.
+    Parameters
+    ----------
+    column : str
+        The name and/or table of the column. Supported formats include ``"column_name"`` and ``"table_name.column_name"``
+    Returns
+    -------
+    ColumnReference
+        The resulting column. If a table name is included before the ``.``, it will be parsed according to the rules of
+        `tab()`.
+    """
+    if "." in column:
+        table_name, column_name = column.split(".")
+        return ColumnReference(column_name, tab(table_name))
+    else:
+        return ColumnReference(column)
+def q(query: str) -> qal.SqlQuery:
+    """Parses the given SQL query.
+    This is really just a shortcut to calling importing and calling the parser module.
+    Parameters
+    ----------
+    query : str
+        The SQL query to parse
+    Returns
+    -------
+    qal.SqlQuery
+        A QAL query object corresponding to the given input query. Errors can be produced according to the documentation of
+        `qal.parse_query`.
+    See Also
+    --------
+    qal.parse_query
+    """
+    return qal.parse_query(query)

postbound/util/__init__.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""Contains utilities that are not specific to PostBOUND's domain of databases and query optimization."""
+from . import collections, dicts, proc, stats, system, typing
+from . import networkx as nx
+from ._errors import InvariantViolationError, LogicError, StateError
+from .collections import enlist, flatten, powerset, set_union, simplify
+from .dicts import argmin, frozendict, hash_dict
+from .jsonize import jsondict, to_json, to_json_dump
+from .logging import Logger, make_logger, timestamp
+from .misc import DependencyGraph, Version, camel_case2snake_case
+from .proc import run_cmd
+from .stats import jaccard
+from .system import open_files
+__all__ = [
+    "flatten",
+    "enlist",
+    "simplify",
+    "set_union",
+    "powerset",
+    "collections",
+    "hash_dict",
+    "argmin",
+    "frozendict",
+    "dicts",
+    "StateError",
+    "LogicError",
+    "InvariantViolationError",
+    "jsondict",
+    "to_json",
+    "to_json_dump",
+    "timestamp",
+    "make_logger",
+    "Logger",
+    "camel_case2snake_case",
+    "Version",
+    "DependencyGraph",
+    "nx",
+    "run_cmd",
+    "proc",
+    "jaccard",
+    "stats",
+    "open_files",
+    "system",
+    "typing",
+]

postbound/util/_errors.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Contains various general errors that extend Python's base errors."""
+from __future__ import annotations
+class LogicError(RuntimeError):
+    """Generic error to indicate that any kind of algorithmic problem occurred.
+    This error is generally used when some assumption within PostBOUND is violated, but it's (probably) not the user's fault.
+    As a rule of thumb, if the user supplies faulty input, a `ValueError` should be raised instead.
+    Therefore, encoutering a `LogicError` indicates a bug in PostBOUND itself.
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(
+            "Internal PostBOUND error found. Please file a bug report on Github (https://github.com/rbergm/PostBOUND): ",
+            *args,
+            **kwargs,
+        )
+class StateError(RuntimeError):
+    """Indicates that an object is not in the right state to perform an operation."""
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+class InvariantViolationError(LogicError):
+    """Indicates that some contract of a method was violated. The arguments should provide further details."""
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)

postbound/util/collections.py ADDED Viewed

@@ -0,0 +1,490 @@
+"""Provides utilities to work with arbitrary collections like lists, sets and tuples."""
+from __future__ import annotations
+import itertools
+import typing
+from collections.abc import (
+    Callable,
+    Collection,
+    Container,
+    Generator,
+    Iterable,
+    Iterator,
+    Sequence,
+    Sized,
+)
+from typing import Any, Optional, overload
+from .._base import T
+from .dicts import HashableDict
+ContainerType = typing.TypeVar("ContainerType", list, tuple, set, frozenset)
+"""Specifies which types are considered containers.
+For some methods this is necessary to determine whether any work still has to be done.
+"""
+def flatten(deep_list: Iterable[Iterable[T] | T]) -> list[T]:
+    """Transforms a nested list into a flat list: ``[[1, 2], [3]]`` is turned into ``[1, 2, 3]``
+    Parameters
+    ----------
+    deep_list : Iterable[Iterable[T]  |  T]
+        The list to flatten
+    Returns
+    -------
+    list[T]
+        The flattened list: all elements of iterables from the `deep_list` are now contained directly in the resulting list.
+    """
+    flattened: list[T] = []
+    for nested in deep_list:
+        if isinstance(nested, Iterable) and not isinstance(nested, str):
+            flattened.extend(nested)
+        else:
+            flattened.append(nested)
+    return flattened
+@overload
+def enlist(obj: list[T]) -> list[T]: ...
+@overload
+def enlist(
+    obj: tuple[T, ...], *, enlist_tuples: bool = False
+) -> list[tuple[T, ...]]: ...
+@overload
+def enlist(obj: tuple[T, ...]) -> tuple[T, ...]: ...
+@overload
+def enlist(obj: set[T]) -> set[T]: ...
+@overload
+def enlist(obj: frozenset[T]) -> frozenset[T]: ...
+@overload
+def enlist(obj: T) -> list[T]: ...
+def enlist(obj: T | Iterable[T], *, enlist_tuples: bool = False) -> Iterable[T]:
+    """Transforms any object into a singular list of that object, if it is not a container already.
+    Specifically, the following types are treated as container-like and will not be transformed: lists, tuples, sets
+    and frozensets. The treatment of tuples can be configured via parameters. All other arguments will be wrapped in a list.
+    For example, ``"abc"`` is turned into ``["abc"]``, whereas ``["abc"]`` is returned unmodified.
+    Parameters
+    ----------
+    obj : T | Iterable[T]
+        The object or list to wrap
+    enlist_tuples : bool, optional
+        Whether a tuple `obj` should be enlisted. This is ``False`` by default
+    Returns
+    -------
+    Iterable[T]
+        The object, wrapped into a list if necessary
+    """
+    if isinstance(obj, str):
+        return [obj]
+    if isinstance(obj, tuple) and enlist_tuples:
+        return [obj]
+    list_types = [tuple, list, set, frozenset]
+    if any(isinstance(obj, target_type) for target_type in list_types):
+        return obj
+    return [obj]
+def get_any(elems: Iterable[T]) -> T:
+    """Provides any element from an iterable. There is no guarantee which one will be returned.
+    This method can potentially iterate over the entire iterable. The behaviour for empty iterables is undefined.
+    Parameters
+    ----------
+    elems : Iterable[T]
+        The items from which to choose.
+    Returns
+    -------
+    T
+        Any of the elements from the iterable. If the iterable is empty, the behaviour is undefined.
+    """
+    return next(iter(elems))
+def simplify(obj: Iterable[T]) -> T:
+    """Unwraps containers containing just a single element.
+    This can be thought of as the inverse operation to `enlist`. If the object contains multiple elements, nothing happens.
+    Parameters
+    ----------
+    obj : Iterable[T]
+        The object to simplify
+    Returns
+    -------
+    T
+        For a singular list, the object that was contained in that list. Otherwise `obj` is returned unmodified. Since this
+        method is mainly intended for lists which are known to contain exactly one element, we use *T* as a return type to
+        assist the type checker.
+    Examples
+    --------
+    The singular list ``[1]`` is simplified to ``1``. On the other hand, ``[1,2]`` is returned unmodified.
+    """
+    if "__len__" not in dir(obj) or "__iter__" not in dir(obj):
+        return obj
+    if len(obj) == 1:
+        return list(obj)[0]
+    return obj
+def foreach(lst: Iterable[T], action: Callable[[T], None]) -> None:
+    """Shortcut to apply a specific action to each element in an iterable.
+    Parameters
+    ----------
+    lst : Iterable[T]
+        The elements.
+    action : Callable[[T], None]
+        The side-effect that should be applied to all elements.
+    """
+    for elem in lst:
+        action(elem)
+def powerset(lst: Collection[T]) -> Iterable[tuple[T, ...]]:
+    """Calculates the powerset of the provided iterable.
+    The powerset of a set *S* is defined as the set that contains all subsets of *S*. This is includes the empty set, as well
+    as the entire set *S*.
+    Parameters
+    ----------
+    lst : Collection[T]
+        The "set" *S*
+    Returns
+    -------
+    Iterable[tuple[T, ...]]
+        The powerset of *S*. Each tuple correponds to a specific subset. The order of the elements within the tuple is not
+        significant.
+    """
+    return itertools.chain.from_iterable(
+        itertools.combinations(lst, size) for size in range(len(lst) + 1)
+    )
+def sliding_window(
+    lst: Sequence[T], size: int, step: int = 1
+) -> Generator[tuple[Sequence[T], Sequence[T], Sequence[T]], None, None]:
+    """Iterates over the given sequence using a sliding window.
+    The window will contain exactly `size` many entries, starting at the beginning of the sequence. After yielding a
+    window, the next window will be shifted `step` many elements.
+    Parameters
+    ----------
+    lst : Sequence[T]
+        The sequence to iterate over
+    size : int
+        The number of elements in the sliding window
+    step : int, optional
+        The number of elements to shift after each window, defaults to 1.
+    Yields
+    ------
+    Generator[tuple[Sequence[T], Sequence[T], Sequence[T]]]
+        The sliding window subsets. The tuples are structured as follows: *(prefix, window, suffix)* where *prefix* are all
+        elements of the sequence before the current window, *window* contains exactly those elements that are part of the
+        current window and *suffix* contains all elements after the current window.
+    """
+    for i in range(0, len(lst) - size + 1, step=step):
+        prefix = lst[:i]
+        window = lst[i : i + size]
+        suffix = lst[i + size :]
+        yield prefix, window, suffix
+def pairs(lst: Iterable[T]) -> Generator[tuple[T, T], None, None]:
+    """Provides all pairs of elements of the given iterable, disregarding order and identical pairs.
+    This means that the resulting iterable will not contain entries *(a, a)* unless *a* itself is present multiple
+    times in the input. Likewise, tuples *(a, b)* and *(b, a)* are treated as equal and only one of them will be
+    returned (Again, unless *a* or *b* are present multiple times in the input. In that case, their order is
+    unspecified.)
+    Parameters
+    ----------
+    lst : Iterable[T]
+        The iterable that contains the pairs. It must be possible to iterate over it multiple times (twice, to be exact).
+    Yields
+    ------
+    Generator[tuple[T, T], None, None]
+        The element pairs.
+    """
+    for a_idx, a in enumerate(lst):
+        for b_idx, b in enumerate(lst):
+            if b_idx <= a_idx:
+                continue
+            yield a, b
+def set_union(sets: Iterable[set[T] | frozenset[T]]) -> set[T]:
+    """Computes the union of many sets.
+    Parameters
+    ----------
+    sets : Iterable[set[T]  |  frozenset[T]]
+        The sets to combine. Frozensets are "expanded" to regular sets.
+    Returns
+    -------
+    set[T]
+        Large union of all provided sets.
+    """
+    union_set: set[T] = set()
+    for s in sets:
+        union_set |= s
+    return union_set
+def make_hashable(obj: Any) -> Any:
+    """Attempts to generate an equivalent, hashable representation for a container.
+    This function operates on the standard container types list, tuple, set, dictionary and frozenset and performs the
+    following conversion:
+    - list becomes tuple, all elements of the list are recursively made hashable
+    - tuples are left as-is, but all elements of the tuple are recursively made hashable
+    - sets become frozensets. The elements are left as they are, because they must already be hashable
+    - dictionaries become instances of `dict_utils.HashableDict`. The values are recursively made hashable, keys are left the
+      way they are because they must already be hashable
+    - frozensets are left as-is
+    All other types, including user-defined types are returned as-is.
+    Parameters
+    ----------
+    obj : Any
+        The object to hash
+    Returns
+    -------
+    Any
+        The hashable counterpart of the object
+    """
+    if isinstance(obj, set):
+        return frozenset(obj)
+    elif isinstance(obj, list) or isinstance(obj, tuple):
+        return tuple(make_hashable(elem) for elem in obj)
+    elif isinstance(obj, dict):
+        return HashableDict({k: make_hashable(v) for k, v in obj.items()})
+    else:
+        return obj
+class Queue(Iterable[T], Sized, Container[T]):
+    """A queue is a wrapper around an underlying list of elements which provides FIFO semantics for access.
+    Parameters
+    ----------
+    data : Iterable[T] | None, optional
+        Initial contents of the queue. By default the queue is empty at the beginning.
+    """
+    def __init__(self, data: Iterable[T] | None = None) -> None:
+        self.data = list(data) if data else []
+    def enqueue(self, value: T) -> None:
+        """Adds a new item to the end of the queue.
+        Parameters
+        ----------
+        value : T
+            The item to add
+        """
+        self.data.append(value)
+    def push(self, value: T) -> None:
+        """Adds a new item to the end of the queue.
+        This is an alias for `enqueue`.
+        Parameters
+        ----------
+        value : T
+            The item to add
+        """
+        self.enqueue(value)
+    def append(self, value: T) -> None:
+        """Adds a new item to end of the queue.
+        This method is an alias for `enqueue` to enable easier interchangeability with normal lists.
+        Parameters
+        ----------
+        value : T
+            The item to add
+        """
+        self.enqueue(value)
+    def extend(self, values: Iterable[T]) -> None:
+        """Adds a number of values to the end of the queue.
+        Parameters
+        ----------
+        values : Iterable[T]
+            The elements to add. The order in the queue matches the order in the iterable.
+        """
+        self.data.extend(values)
+    def head(self) -> Optional[T]:
+        """Provides the current first element of the queue without removing.
+        Returns
+        -------
+        Optional[T]
+            The first element if it exists, or ``None`` if the queue is empty.
+        """
+        return self.data[0] if self.data else None
+    def peak(self) -> Optional[T]:
+        """Provides the current first element of the queue without removing.
+        This is an alias for `head`.
+        Returns
+        -------
+        Optional[T]
+            The first element if it exists, or ``None`` if the queue is empty.
+        """
+        return self.head()
+    def pop(self) -> Optional[T]:
+        """Provides the current first element of the queue and removes it.
+        Returns
+        -------
+        Optional[T]
+            The first element if it exists, or ``None`` if the queue is empty.
+        """
+        item = self.head()
+        if item:
+            self.data.pop(0)
+        return item
+    def __len__(self) -> int:
+        return len(self.data)
+    def __contains__(self, __x: object) -> bool:
+        return __x in self.data
+    def __iter__(self) -> Iterator[T]:
+        return self.data.__iter__()
+    def __repr__(self) -> str:
+        return f"Queue({self.data})"
+    def __str__(self) -> str:
+        return str(self.data)
+class SizedQueue(Collection[T]):
+    """A sized queue extends on the behaviour of a normal queue by restricting the number of items in the queue.
+    A sized queue has weak FIFO semantics: items can only be appended at the end, but the contents of the entire queue
+    can be accessed at any time.
+    If upon enqueuing a new item the queue is already at maximum capacity, the current head of the queue will be
+    dropped.
+    Parameters
+    ----------
+    capacity : int
+        The maximum number of items the queue can contain at the same time.
+    data : Optional[Iterable[T]], optional
+        Initial contents of the queue. By default the queue is empty at the beginning.
+    Notes
+    -----
+    Although `Queue` and `SizedQueue` provide similar FIFO semantics, there is no subclass relationship between the two. This
+    is by design, since the contract of a queue is very different from the contract of a sized queue.
+    """
+    def __init__(self, capacity: int, data: Optional[Iterable[T]] = None) -> None:
+        self.data = list(data) if data else []
+        self.capacity = capacity
+    def append(self, value: T) -> None:
+        """Adds a new item to the end of the queue, popping any excess items.
+        Parameters
+        ----------
+        value : T
+            The value to add
+        """
+        if len(self.data) >= self.capacity:
+            self.data.pop(0)
+        self.data.append(value)
+    def extend(self, values: typing.Iterable[T]) -> None:
+        """Adds all the items to the end of the queue, popping any excess items.
+        Parameters
+        ----------
+        values : typing.Iterable[T]
+            The values to add
+        """
+        self.data = (self.data + list(values))[: self.capacity]
+    def head(self) -> Optional[T]:
+        """Provides the current first item of the queue without removing it.
+        Returns
+        -------
+        Optional[T]
+            The first item in the queue, or ``None`` if the queue is empty
+        """
+        return self.data[0] if self.data else None
+    def pop(self) -> Optional[T]:
+        """Provides the current first item of the queue and removes it.
+        Returns
+        -------
+        Optional[T]
+            The first item in the queue, or ``None`` if the queue is empty
+        """
+        return self.data.pop(0) if self.data else None
+    def __contains__(self, other: T) -> bool:
+        return other in self.data
+    def __iter__(self) -> typing.Iterator[T]:
+        return self.data.__iter__()
+    def __len__(self) -> int:
+        return len(self.data)
+    def __repr__(self) -> str:
+        return f"SizedQueue(capacity={self.capacity}, data={self.data})"
+    def __str__(self) -> str:
+        return str(self.data)

postbound/util/dataframe.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Utilities to work with Pandas data frames"""
+from __future__ import annotations
+from collections.abc import Collection, Iterable
+from typing import Any, Optional
+import pandas as pd
+def _df_from_dict(
+    data: dict[Any, Collection[Any]],
+    key_name: Optional[str] = None,
+    column_names: Optional[Iterable[str]] = None,
+) -> pd.DataFrame:
+    data_template = next(iter(data.values()))
+    if column_names is None:
+        column_name_map = {i: str(i) for i in range(len(data_template))}
+    else:
+        column_name_map = {idx: col for idx, col in enumerate(column_names)}
+    df_container: dict[str, list[Any]] = {col: [] for col in column_name_map.values()}
+    for row in data.values():
+        for col_idx, col in enumerate(row):
+            col_name = column_name_map[col_idx]
+            df_container[col_name].append(col)
+    key_name = "key" if key_name is None else key_name
+    df_container[key_name] = list(data.keys())
+    return pd.DataFrame(df_container)
+def _df_from_list(data: Collection[dict[Any, Any]]) -> pd.DataFrame:
+    data_template = next(iter(data))
+    df_container: dict[str, list[Any]] = {col: [] for col in data_template.keys()}
+    for row in data:
+        for key in df_container.keys():
+            df_container[key].append(row[key])
+    return pd.DataFrame(df_container)
+def as_df(
+    data: dict[Any, Collection[Any]] | Collection[dict[Any, Any]],
+    *,
+    key_name: Optional[str] = None,
+    column_names: Optional[Iterable[str]] = None,
+) -> pd.DataFrame:
+    """Generates a new Pandas `DataFrame`.
+    The contents of the dataframe can be supplied in one of two forms: a collection of dictionaries will be transformed
+    into a dataframe such that each dictionary corresponds to one row of the dataframe. All dictionaries have to
+    consist of exactly the same key-value pairs. Each key becomes a column in the dataframe. The precise columns are
+    inferred from the first dictionary in the collection. In this case, column values are derived directly from the
+    keys.
+    The other form consists of one large dictionary of keys mapping to several columns. The resulting dataframe will
+    have one column that corresponds to the key values and additional columns that correspond to the entries in the
+    collection which was mapped-to by the key. All collections have to consist of exactly the same number of elements.
+    The precise number is inferred based on the first key-value pair. To name the different columns of the dataframe,
+    the `key_name` and `column_names` can be used. If no key name is given, it defaults to `key`. If no column names
+    are given, they default to numerical indices that correspond to the position in the mapped collection.
+    """
+    if not data:
+        return pd.DataFrame()
+    if isinstance(data, dict):
+        return _df_from_dict(data, key_name, column_names)
+    elif isinstance(data, Collection):
+        return _df_from_list(data)
+    else:
+        raise TypeError("Unexpected data type: " + str(data))