PyPI - PostBOUND - Versions diffs - 0.19.0__py3-none-any.whl - Mend

PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

postbound/__init__.py +211 -0
postbound/_base.py +6 -0
postbound/_bench.py +1012 -0
postbound/_core.py +1153 -0
postbound/_hints.py +1373 -0
postbound/_jointree.py +1079 -0
postbound/_pipelines.py +1121 -0
postbound/_qep.py +1986 -0
postbound/_stages.py +876 -0
postbound/_validation.py +734 -0
postbound/db/__init__.py +72 -0
postbound/db/_db.py +2348 -0
postbound/db/_duckdb.py +785 -0
postbound/db/mysql.py +1195 -0
postbound/db/postgres.py +4216 -0
postbound/experiments/__init__.py +12 -0
postbound/experiments/analysis.py +674 -0
postbound/experiments/benchmarking.py +54 -0
postbound/experiments/ceb.py +877 -0
postbound/experiments/interactive.py +105 -0
postbound/experiments/querygen.py +334 -0
postbound/experiments/workloads.py +980 -0
postbound/optimizer/__init__.py +92 -0
postbound/optimizer/__init__.pyi +73 -0
postbound/optimizer/_cardinalities.py +369 -0
postbound/optimizer/_joingraph.py +1150 -0
postbound/optimizer/dynprog.py +1825 -0
postbound/optimizer/enumeration.py +432 -0
postbound/optimizer/native.py +539 -0
postbound/optimizer/noopt.py +54 -0
postbound/optimizer/presets.py +147 -0
postbound/optimizer/randomized.py +650 -0
postbound/optimizer/tonic.py +1479 -0
postbound/optimizer/ues.py +1607 -0
postbound/qal/__init__.py +343 -0
postbound/qal/_qal.py +9678 -0
postbound/qal/formatter.py +1089 -0
postbound/qal/parser.py +2344 -0
postbound/qal/relalg.py +4257 -0
postbound/qal/transform.py +2184 -0
postbound/shortcuts.py +70 -0
postbound/util/__init__.py +46 -0
postbound/util/_errors.py +33 -0
postbound/util/collections.py +490 -0
postbound/util/dataframe.py +71 -0
postbound/util/dicts.py +330 -0
postbound/util/jsonize.py +68 -0
postbound/util/logging.py +106 -0
postbound/util/misc.py +168 -0
postbound/util/networkx.py +401 -0
postbound/util/numbers.py +438 -0
postbound/util/proc.py +107 -0
postbound/util/stats.py +37 -0
postbound/util/system.py +48 -0
postbound/util/typing.py +35 -0
postbound/vis/__init__.py +5 -0
postbound/vis/fdl.py +69 -0
postbound/vis/graphs.py +48 -0
postbound/vis/optimizer.py +538 -0
postbound/vis/plots.py +84 -0
postbound/vis/tonic.py +70 -0
postbound/vis/trees.py +105 -0
postbound-0.19.0.dist-info/METADATA +355 -0
postbound-0.19.0.dist-info/RECORD +67 -0
postbound-0.19.0.dist-info/WHEEL +5 -0
postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
postbound-0.19.0.dist-info/top_level.txt +1 -0

postbound/util/numbers.py ADDED Viewed

@@ -0,0 +1,438 @@
+"""Utilities centered around numbers."""
+from __future__ import annotations
+import math
+import numbers
+import threading
+from typing import Any, Union
+def represents_number(val: str) -> bool:
+    """Checks, whether `val` can be cast into an integer/float value."""
+    try:
+        float(val)
+    except (TypeError, ValueError):
+        return False
+    return True
+class AtomicInt(numbers.Integral):
+    """An atomic int allows for multi-threaded access to the integer value."""
+    def __init__(self, value: int = 0):
+        self._value = value
+        self._lock = threading.Lock()
+    def increment(self, by: int = 1) -> None:
+        with self._lock:
+            self._value += by
+    def reset(self) -> None:
+        with self._lock:
+            self._value = 0
+    def _get_value(self) -> int:
+        with self._lock:
+            return self._value
+    def _set_value(self, value: int) -> None:
+        with self._lock:
+            self._value = value
+    def _assert_integral(self, other: Any):
+        if not isinstance(other, numbers.Integral):
+            raise TypeError(
+                f"Cannot add argument of type {type(other)} to object of type AtomicInt"
+            )
+    def _unwrap_atomic(self, other: Any):
+        return other._value if isinstance(other, AtomicInt) else other
+    value = property(_get_value, _set_value)
+    def __abs__(self) -> int:
+        with self._lock:
+            return abs(self._value)
+    def __add__(self, other: Any) -> AtomicInt:
+        self._assert_integral(other)
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return AtomicInt(self._value + other)
+    def __and__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value & other
+    def __ceil__(self) -> int:
+        with self._lock:
+            return math.ceil(self._value)
+    def __eq__(self, other: object) -> bool:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value == other
+    def __floor__(self) -> int:
+        with self._lock:
+            return math.floor(self._value)
+    def __floordiv__(self, other: Any) -> AtomicInt:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return AtomicInt(self._value // other)
+    def __int__(self) -> int:
+        with self._lock:
+            return int(self._value)
+    def __invert__(self) -> Any:
+        with self._lock:
+            return ~self._value
+    def __le__(self, other: Any) -> bool:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value <= other
+    def __lshift__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value << other
+    def __lt__(self, other: Any) -> bool:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value < other
+    def __mod__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value % other
+    def __mul__(self, other: Any) -> AtomicInt:
+        self._assert_integral(other)
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return AtomicInt(self._value * other)
+    def __neg__(self) -> AtomicInt:
+        with self._lock:
+            return AtomicInt(-self._value)
+    def __or__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value | other
+    def __pos__(self) -> Any:
+        with self._lock:
+            return +self.value
+    def __pow__(self, exponent: Any, modulus: Any | None = ...) -> AtomicInt:
+        with self._lock:
+            res = self._value**exponent
+            if res != int(res):
+                raise ValueError(
+                    f"Power not supported for type AtomicInt with argument {exponent}"
+                )
+            return AtomicInt(res)
+    def __radd__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other + self._value
+    def __rand__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other + self._value
+    def __rfloordiv__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other // self._value
+    def __rlshift__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other << self._value
+    def __rmod__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other % self._value
+    def __rmul__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other * self._value
+    def __ror__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other | self._value
+    def __round__(self, ndigits: Union[int, None] = None) -> int:
+        with self._lock:
+            return self._value
+    def __rpow__(self, base: Any) -> Any:
+        base = self._unwrap_atomic(base)
+        with self._lock:
+            return base**self._value
+    def __rrshift__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other >> self._value
+    def __rshift__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value >> other
+    def __rtruediv__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other / self._value
+    def __rxor__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return other ^ self._value
+    def __truediv__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value / other
+    def __trunc__(self) -> int:
+        with self._lock:
+            return math.trunc(self._value)
+    def __xor__(self, other: Any) -> Any:
+        other = self._unwrap_atomic(other)
+        with self._lock:
+            return self._value ^ other
+    def __hash__(self) -> int:
+        with self._lock:
+            return hash(self._value)
+    def __repr__(self) -> str:
+        with self._lock:
+            return f"AtomicInt({self._value})"
+    def __str__(self) -> str:
+        with self._lock:
+            return str(self._value)
+class BoundedInt(numbers.Integral):
+    """A bounded int cannot become larger and/or smaller than a specified interval.
+    If the bounded integer does leave the allowed interval, it will be snapped back to the minimum/maximum allowed
+    number, respectively.
+    """
+    @staticmethod
+    def non_neg(value: int, *, allowed_max: Union[int, None] = None) -> BoundedInt:
+        return BoundedInt(value, allowed_min=0, allowed_max=allowed_max)
+    def __init__(
+        self,
+        value: int,
+        *,
+        allowed_min: Union[int, None] = None,
+        allowed_max: Union[int, None] = None,
+    ):
+        if not isinstance(value, int):
+            raise TypeError(f"Only integer values allowed, but {type(value)} given!")
+        if (
+            allowed_min is not None
+            and allowed_max is not None
+            and allowed_min > allowed_max
+        ):
+            raise ValueError("Allowed minimum may not be larger than allowed maximum!")
+        self._value = value
+        self._allowed_min = allowed_min
+        self._allowed_max = allowed_max
+        # don't forget the first update!
+        self._snap_to_min_max()
+    def _snap_to_min_max(self) -> None:
+        if self._allowed_min is not None and self._value < self._allowed_min:
+            self._value = self._allowed_min
+        if self._allowed_max is not None and self._value > self._allowed_max:
+            self._value = self._allowed_max
+    def _unwrap_atomic(self, value: Any) -> int:
+        return value._value if isinstance(value, BoundedInt) else value
+    def _get_value(self) -> int:
+        return self._value
+    def _set_value(self, value: int) -> None:
+        if not isinstance(value, int):
+            raise TypeError(f"Only integer values allowed, but {type(value)} given!")
+        self._value = value
+        self._snap_to_min_max()
+    value = property(_get_value, _set_value)
+    def __abs__(self) -> int:
+        return abs(self._value)
+    def __add__(self, other: int | BoundedInt) -> BoundedInt:
+        other_value = self._unwrap_atomic(other)
+        return BoundedInt(
+            self._value + other_value,
+            allowed_min=self._allowed_min,
+            allowed_max=self._allowed_max,
+        )
+    def __and__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value & other_value
+    def __ceil__(self) -> int:
+        return self._value
+    def __eq__(self, other: object) -> bool:
+        other_value = self._unwrap_atomic(other)
+        return self._value == other_value
+    def __floor__(self) -> int:
+        return self._value
+    def __floordiv__(self, other: Any) -> int:
+        other_value = self._unwrap_atomic(other)
+        return self._value // other_value
+    def __int__(self) -> int:
+        return self._value
+    def __invert__(self) -> Any:
+        return ~self._value
+    def __le__(self, other: Any) -> bool:
+        other_value = self._unwrap_atomic(other)
+        return self._value <= other_value
+    def __lshift__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self.value << other_value
+    def __lt__(self, other: Any) -> bool:
+        other_value = self._unwrap_atomic(other)
+        return self._value < other_value
+    def __mod__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value % other_value
+    def __mul__(self, other: Any) -> BoundedInt:
+        other_value = self._unwrap_atomic(other)
+        return BoundedInt(
+            self._value * other_value,
+            allowed_min=self._allowed_min,
+            allowed_max=self._allowed_max,
+        )
+    def __neg__(self) -> BoundedInt:
+        return BoundedInt(
+            -self._value, allowed_min=self._allowed_min, allowed_max=self._allowed_max
+        )
+    def __or__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value | other_value
+    def __pos__(self) -> Any:
+        return +self._value
+    def __pow__(self, exponent: Any, modulus: Union[Any, None] = ...) -> BoundedInt:
+        res = self._value**exponent
+        if res != int(res):
+            raise ValueError(
+                f"Power not support for type BoundedInt with argument {exponent}"
+            )
+        return BoundedInt(
+            res, allowed_min=self._allowed_min, allowed_max=self._allowed_max
+        )
+    def __radd__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value + self._value
+    def __rand__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value & self._value
+    def __rfloordiv__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value // self._value
+    def __rlshift__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value << self._value
+    def __rmod__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value % self._value
+    def __rmul__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value * self._value
+    def __ror__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value | self._value
+    def __round__(self, ndigits: Union[int, None] = None) -> int:
+        return self._value
+    def __rpow__(self, base: Any) -> Any:
+        other_value = self._unwrap_atomic(base)
+        return other_value**self._value
+    def __rrshift__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value >> self._value
+    def __rshift__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value >> other_value
+    def __rtruediv__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value / self._value
+    def __rxor__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return other_value ^ self._value
+    def __truediv__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value / other_value
+    def __trunc__(self) -> int:
+        return math.trunc(self._value)
+    def __xor__(self, other: Any) -> Any:
+        other_value = self._unwrap_atomic(other)
+        return self._value ^ other_value
+    def __hash__(self) -> int:
+        return hash(self._value)
+    def __repr__(self) -> str:
+        return f"BoundedInt({self._value}; min={self._allowed_min}, max={self._allowed_max})"
+    def __str__(self) -> str:
+        return str(self._value)

postbound/util/proc.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Provides utilities to interact with outside processes."""
+from __future__ import annotations
+import os
+import pathlib
+import subprocess
+from collections.abc import Iterable
+from typing import Any, Optional
+class ProcResult(str):
+    """Wrapper for the result of an external process.
+    In contrast to `CompletedProcess` provided by the `subprocess` module, this class is designed for more convenient usage.
+    More specifically, it can be used directly as a substitute for the *stdout* of the process (hence the subclassing of
+    `str`). Furthermore, bool checks ensure that the process exited with a zero exit code.
+    All output is provided in dedicated attributes.
+    Parameters
+    ----------
+    out_data : str
+        The stdout of the process.
+    err_data : str
+        The stderr of the process.
+    exit_code : int
+        The exit code of the process.
+    """
+    def __init__(self, out_data: str, err_data: str, exit_code: int) -> None:
+        self.out_data = out_data
+        self.err_data = err_data
+        self.exit_code = exit_code
+    def __new__(cls, out_data: str, err_data: str, exit_code: int):
+        return str.__new__(cls, out_data)
+    def echo(self) -> None:
+        """Provides the contents of stdout and stderr in a format for debugging by humans."""
+        print("stdout:")
+        print(self.out_data)
+        print("stderr:")
+        print(self.err_data)
+    def raise_if_error(self) -> None:
+        """Raises an exception if the process exited with a non-zero exit code."""
+        if self.exit_code != 0:
+            raise RuntimeError(
+                f"Process exited with code {self.exit_code}: '{self.err_data}'"
+            )
+    def __bool__(self) -> bool:
+        return self.exit_code == 0
+    def __repr__(self) -> str:
+        return f"ProcResult(exit_code={self.exit_code}, stdout={repr(self.out_data)}, stderr={repr(self.err_data)})"
+    def __str__(self) -> str:
+        return self.out_data
+def run_cmd(
+    cmd: str | Iterable[Any],
+    *args,
+    work_dir: Optional[str | pathlib.Path] = None,
+    **kwargs,
+) -> ProcResult:
+    """Executes an arbitrary external command.
+    The command can be executed in an different working directory. After execution the working directory is restored.
+    This function delegates to `subprocess.run`. Therefore, most arguments accepted by this function follow the same rules
+    as the `run` function.
+    Parameters
+    ----------
+    cmd : str | Iterable[Any]
+        The program to execute. Can be either a single invocation, or a list of the program name and its arguments.
+    work_dir : Optional[str  |  pathlib.Path], optional
+        The working directory where the process should be executed. If `None`, the current working directory is used.
+        Otherwise, the current working directory is changed to the desired directory for the duration of the process execution
+        and restored afterwards.
+    *args
+        Additional arguments to be passed to the command.
+    **kwargs
+        Additional arguments to customize the subprocess invocation.
+    Returns
+    -------
+    ProcResult
+        The result of the process execution. If the command can be executed but fails, the `exit_code` will be non-zero. On the
+        other hand, if the command cannot be executed at all (e.g. because it is not found or the user does not have the
+        required permissions), an error is raised.
+    """
+    work_dir = os.getcwd() if work_dir is None else str(work_dir)
+    current_dir = os.getcwd()
+    if isinstance(cmd, Iterable) and not isinstance(cmd, str):
+        cmd, args = str(cmd[0]), cmd[1:] + list(args)
+    invocation = [cmd] + [str(arg) for arg in args]
+    os.chdir(work_dir)
+    res = subprocess.run(invocation, capture_output=True, text=True, **kwargs)
+    os.chdir(current_dir)
+    return ProcResult(res.stdout, res.stderr, res.returncode)

postbound/util/stats.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Different mathematical and statistical formulas and utilities."""
+from __future__ import annotations
+import math
+import numbers
+import typing
+from collections.abc import Callable, Iterable
+import numpy as np
+def catalan_number(n: int) -> int:
+    """Computes the n-th catalan number. See https://en.wikipedia.org/wiki/Catalan_number."""
+    return round(math.comb(2 * n, n) / (n + 1))
+def jaccard(a: set | frozenset, b: set | frozenset) -> float:
+    """Jaccard coefficient between a and b. Defined as |a ∩ b| / |a ∪ b|"""
+    return len(a & b) / len(a | b)
+T = typing.TypeVar("T")
+def score_matrix(
+    elems: Iterable[T], scoring: Callable[[T, T], numbers.Number]
+) -> np.ndarray:
+    elems = list(elems)
+    n = len(elems)
+    matrix = np.ones((n, n))
+    for i, elem_i in enumerate(elems):
+        for j, elem_j in enumerate(elems):
+            matrix[i, j] = scoring(elem_i, elem_j)
+    return matrix

postbound/util/system.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Provides utilities to access (operating system) related information."""
+from __future__ import annotations
+import os
+import sys
+import warnings
+from typing import Optional
+from . import proc
+def open_files(pid: Optional[int] = None) -> list[str]:
+    """Provides all files (e.g. text files and shared objects) opened by the given process/PID.
+    Parameters
+    ----------
+    pid : Optional[int], optional
+        The PID of the process to query. Defaults to the current process.
+    Returns
+    -------
+    list[str]
+        All opened files
+    """
+    if not os.name == "posix":
+        warnings.warn("Can only check for open files on POSIX systems.")
+        return []
+    pid = os.getpid() if pid is None else pid
+    ext = ".dylib" if sys.platform == "darwin" else ".so"
+    # lsof -p produces some "weird" (or rather impractical) output from time to time (and depending on the lsof version)
+    # we do the following:
+    # lsof -Fn -p gives the names of all opened files for a specific PID
+    # But: it prefixes those names with a "n" to distinguish from other files (e.g. sockets)
+    # Hence, we grep for ^n to only get real files
+    # Afterwards, we remove the n prefix with cut
+    # Still, some files are weird because lsof adds a suffix like (path dev=...) to the output. As of right now, I don't know
+    # how to interpret this output nor how to get rid of it. The second cut removes this suffix.
+    # Lastly, the final grep filters for shared objects. Notice that we don't grep for '.so$' in order to keep files like
+    # loibc.so.6
+    res = proc.run_cmd(
+        f"lsof -Fn -p {pid} | grep '^n' | cut -c2- | cut -d' ' -f1 | grep '{ext}'",
+        shell=True,
+    )
+    return res.splitlines()

postbound/util/typing.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Provides additional type hints, type decorators, ..."""
+from __future__ import annotations
+import functools
+import warnings
+from typing import Callable
+from .._base import T
+def deprecated(func: Callable) -> Callable:
+    """Indicates that the given function or class should no longer be used."""
+    @functools.wraps(func)
+    def deprecation_wrapper(*args, **kwargs) -> Callable:
+        warnings.warn(f"Usage of {func.__name__} is deprecated")
+        return func(*args, **kwargs)
+    return deprecation_wrapper
+def module_local(func: Callable) -> Callable:
+    """
+    Marker decorator to show that a seemingly private method of a class is intended to be used by other objects from
+    the same module.
+    """
+    return func
+Lazy = None
+"""A placeholder to indicate that a value is not yet computed, but will be computed lazily."""
+LazyVal = T | Lazy
+"""Type hint for a value that is computed lazily."""

postbound/vis/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Contains utilities to visualize different PostBOUND objects."""
+from . import fdl, graphs, optimizer as opt, plots, tonic, trees
+__all__ = ["fdl", "graphs", "opt", "plots", "tonic", "trees"]