PyPI - pymc-extras - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pymc_extras/__init__.py +5 -1
pymc_extras/distributions/timeseries.py +1 -1
pymc_extras/inference/fit.py +0 -4
pymc_extras/inference/pathfinder/__init__.py +3 -0
pymc_extras/inference/pathfinder/importance_sampling.py +139 -0
pymc_extras/inference/pathfinder/lbfgs.py +190 -0
pymc_extras/inference/pathfinder/pathfinder.py +1746 -0
pymc_extras/model/marginal/distributions.py +100 -3
pymc_extras/model/marginal/graph_analysis.py +8 -9
pymc_extras/model/marginal/marginal_model.py +437 -424
pymc_extras/model/model_api.py +18 -2
pymc_extras/statespace/core/statespace.py +79 -36
pymc_extras/statespace/models/structural.py +21 -6
pymc_extras/utils/model_equivalence.py +66 -0
pymc_extras/version.txt +1 -1
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/METADATA +15 -5
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/RECORD +28 -24
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/WHEEL +1 -1
tests/model/marginal/test_distributions.py +12 -11
tests/model/marginal/test_marginal_model.py +301 -201
tests/model/test_model_api.py +9 -0
tests/statespace/test_statespace.py +54 -0
tests/statespace/test_structural.py +10 -3
tests/test_pathfinder.py +135 -7
tests/test_pivoted_cholesky.py +1 -1
tests/utils.py +0 -31
pymc_extras/inference/pathfinder.py +0 -134
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/LICENSE +0 -0
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/top_level.txt +0 -0

pymc_extras/__init__.py CHANGED Viewed

@@ -16,7 +16,11 @@ import logging
 from pymc_extras import gp, statespace, utils
 from pymc_extras.distributions import *
 from pymc_extras.inference.fit import fit
-from pymc_extras.model.marginal.marginal_model import MarginalModel, marginalize
+from pymc_extras.model.marginal.marginal_model import (
+    MarginalModel,
+    marginalize,
+    recover_marginals,
+)
 from pymc_extras.model.model_api import as_model
 from pymc_extras.version import __version__

pymc_extras/distributions/timeseries.py CHANGED Viewed

@@ -214,8 +214,8 @@ class DiscreteMarkovChain(Distribution):
         discrete_mc_op = DiscreteMarkovChainRV(
             inputs=[P_, steps_, init_dist_, state_rng],
             outputs=[state_next_rng, discrete_mc_],
-            ndim_supp=1,
             n_lags=n_lags,
+            extended_signature="(p,p),(),(p),[rng]->[rng],(t)",
         )
         discrete_mc = discrete_mc_op(P, steps, init_dist, state_rng)

pymc_extras/inference/fit.py CHANGED Viewed

@@ -11,7 +11,6 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-from importlib.util import find_spec
 def fit(method, **kwargs):
@@ -31,9 +30,6 @@ def fit(method, **kwargs):
     arviz.InferenceData
     """
     if method == "pathfinder":
-        if find_spec("blackjax") is None:
-            raise RuntimeError("Need BlackJAX to use `pathfinder`")
         from pymc_extras.inference.pathfinder import fit_pathfinder
         return fit_pathfinder(**kwargs)

pymc_extras/inference/pathfinder/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from pymc_extras.inference.pathfinder.pathfinder import fit_pathfinder
+__all__ = ["fit_pathfinder"]

pymc_extras/inference/pathfinder/importance_sampling.py ADDED Viewed

@@ -0,0 +1,139 @@
+import logging
+import warnings as _warnings
+from dataclasses import dataclass, field
+from typing import Literal
+import arviz as az
+import numpy as np
+from numpy.typing import NDArray
+from scipy.special import logsumexp
+logger = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class ImportanceSamplingResult:
+    """container for importance sampling results"""
+    samples: NDArray
+    pareto_k: float | None = None
+    warnings: list[str] = field(default_factory=list)
+    method: str = "none"
+def importance_sampling(
+    samples: NDArray,
+    logP: NDArray,
+    logQ: NDArray,
+    num_draws: int,
+    method: Literal["psis", "psir", "identity", "none"] | None,
+    random_seed: int | None = None,
+) -> ImportanceSamplingResult:
+    """Pareto Smoothed Importance Resampling (PSIR)
+    This implements the Pareto Smooth Importance Resampling (PSIR) method, as described in Algorithm 5 of Zhang et al. (2022). The PSIR follows a similar approach to Algorithm 1 PSIS diagnostic from Yao et al., (2018). However, before computing the the importance ratio r_s, the logP and logQ are adjusted to account for the number multiple estimators (or paths). The process involves resampling from the original sample with replacement, with probabilities proportional to the computed importance weights from PSIS.
+    Parameters
+    ----------
+    samples : NDArray
+        samples from proposal distribution, shape (L, M, N)
+    logP : NDArray
+        log probability values of target distribution, shape (L, M)
+    logQ : NDArray
+        log probability values of proposal distribution, shape (L, M)
+    num_draws : int
+        number of draws to return where num_draws <= samples.shape[0]
+    method : str, optional
+        importance sampling method to use. Options are "psis" (default), "psir", "identity", "none. Pareto Smoothed Importance Sampling (psis) is recommended in many cases for more stable results than Pareto Smoothed Importance Resampling (psir). identity applies the log importance weights directly without resampling. none applies no importance sampling weights and returns the samples as is of size num_draws_per_path * num_paths.
+    random_seed : int | None
+    Returns
+    -------
+    ImportanceSamplingResult
+        importance sampled draws and other info based on the specified method
+    Future work!
+    ----------
+    - Implement the 3 sampling approaches and 5 weighting functions from Elvira et al. (2019)
+    - Implement Algorithm 2 VSBC marginal diagnostics from Yao et al. (2018)
+    - Incorporate these various diagnostics, sampling approaches and weighting functions into VI algorithms.
+    References
+    ----------
+    Elvira, V., Martino, L., Luengo, D., & Bugallo, M. F. (2019). Generalized Multiple Importance Sampling. Statistical Science, 34(1), 129-155. https://doi.org/10.1214/18-STS668
+    Yao, Y., Vehtari, A., Simpson, D., & Gelman, A. (2018). Yes, but Did It Work?: Evaluating Variational Inference. arXiv:1802.02538 [Stat]. http://arxiv.org/abs/1802.02538
+    Zhang, L., Carpenter, B., Gelman, A., & Vehtari, A. (2022). Pathfinder: Parallel quasi-Newton variational inference. Journal of Machine Learning Research, 23(306), 1-49.
+    """
+    warnings = []
+    num_paths, _, N = samples.shape
+    if method == "none":
+        warnings.append(
+            "Importance sampling is disabled. The samples are returned as is which may include samples from failed paths with non-finite logP or logQ values. It is recommended to use importance_sampling='psis' for better stability."
+        )
+        return ImportanceSamplingResult(samples=samples, warnings=warnings)
+    else:
+        samples = samples.reshape(-1, N)
+        logP = logP.ravel()
+        logQ = logQ.ravel()
+        # adjust log densities
+        log_I = np.log(num_paths)
+        logP -= log_I
+        logQ -= log_I
+        logiw = logP - logQ
+        with _warnings.catch_warnings():
+            _warnings.filterwarnings(
+                "ignore", category=RuntimeWarning, message="overflow encountered in exp"
+            )
+            if method == "psis":
+                replace = False
+                logiw, pareto_k = az.psislw(logiw)
+            elif method == "psir":
+                replace = True
+                logiw, pareto_k = az.psislw(logiw)
+            elif method == "identity":
+                replace = False
+                pareto_k = None
+            else:
+                raise ValueError(f"Invalid importance sampling method: {method}")
+    # NOTE: Pareto k is normally bad for Pathfinder even when the posterior is close to the NUTS posterior or closer to NUTS than ADVI.
+    # Pareto k may not be a good diagnostic for Pathfinder.
+    # TODO: Find replacement diagnostics for Pathfinder.
+    p = np.exp(logiw - logsumexp(logiw))
+    rng = np.random.default_rng(random_seed)
+    try:
+        resampled = rng.choice(samples, size=num_draws, replace=replace, p=p, shuffle=False, axis=0)
+        return ImportanceSamplingResult(
+            samples=resampled, pareto_k=pareto_k, warnings=warnings, method=method
+        )
+    except ValueError as e1:
+        if "Fewer non-zero entries in p than size" in str(e1):
+            num_nonzero = np.where(np.nonzero(p)[0], 1, 0).sum()
+            warnings.append(
+                f"Not enough valid samples: {num_nonzero} available out of {num_draws} requested. Switching to psir importance sampling."
+            )
+            try:
+                resampled = rng.choice(
+                    samples, size=num_draws, replace=True, p=p, shuffle=False, axis=0
+                )
+                return ImportanceSamplingResult(
+                    samples=resampled, pareto_k=pareto_k, warnings=warnings, method=method
+                )
+            except ValueError as e2:
+                logger.error(
+                    "Importance sampling failed even with psir importance sampling. "
+                    "This might indicate invalid probability weights or insufficient valid samples."
+                )
+                raise ValueError(
+                    "Importance sampling failed for both with and without replacement"
+                ) from e2
+        raise

pymc_extras/inference/pathfinder/lbfgs.py ADDED Viewed

@@ -0,0 +1,190 @@
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from enum import Enum, auto
+import numpy as np
+from numpy.typing import NDArray
+from scipy.optimize import minimize
+logger = logging.getLogger(__name__)
+@dataclass(slots=True)
+class LBFGSHistory:
+    """History of LBFGS iterations."""
+    x: NDArray[np.float64]
+    g: NDArray[np.float64]
+    count: int
+    def __post_init__(self):
+        self.x = np.ascontiguousarray(self.x, dtype=np.float64)
+        self.g = np.ascontiguousarray(self.g, dtype=np.float64)
+@dataclass(slots=True)
+class LBFGSHistoryManager:
+    """manages and stores the history of lbfgs optimisation iterations.
+    Parameters
+    ----------
+    value_grad_fn : Callable
+        function that returns tuple of (value, gradient) given input x
+    x0 : NDArray
+        initial position
+    maxiter : int
+        maximum number of iterations to store
+    """
+    value_grad_fn: Callable[[NDArray[np.float64]], tuple[np.float64, NDArray[np.float64]]]
+    x0: NDArray[np.float64]
+    maxiter: int
+    x_history: NDArray[np.float64] = field(init=False)
+    g_history: NDArray[np.float64] = field(init=False)
+    count: int = field(init=False)
+    def __post_init__(self) -> None:
+        self.x_history = np.empty((self.maxiter + 1, self.x0.shape[0]), dtype=np.float64)
+        self.g_history = np.empty((self.maxiter + 1, self.x0.shape[0]), dtype=np.float64)
+        self.count = 0
+        value, grad = self.value_grad_fn(self.x0)
+        if np.all(np.isfinite(grad)) and np.isfinite(value):
+            self.add_entry(self.x0, grad)
+    def add_entry(self, x: NDArray[np.float64], g: NDArray[np.float64]) -> None:
+        """adds new position and gradient to history.
+        Parameters
+        ----------
+        x : NDArray
+            position vector
+        g : NDArray
+            gradient vector
+        """
+        self.x_history[self.count] = x
+        self.g_history[self.count] = g
+        self.count += 1
+    def get_history(self) -> LBFGSHistory:
+        """returns history of optimisation iterations."""
+        return LBFGSHistory(
+            x=self.x_history[: self.count], g=self.g_history[: self.count], count=self.count
+        )
+    def __call__(self, x: NDArray[np.float64]) -> None:
+        value, grad = self.value_grad_fn(x)
+        if np.all(np.isfinite(grad)) and np.isfinite(value) and self.count < self.maxiter + 1:
+            self.add_entry(x, grad)
+class LBFGSStatus(Enum):
+    CONVERGED = auto()
+    MAX_ITER_REACHED = auto()
+    DIVERGED = auto()
+    # Statuses that lead to Exceptions:
+    INIT_FAILED = auto()
+    LBFGS_FAILED = auto()
+class LBFGSException(Exception):
+    DEFAULT_MESSAGE = "LBFGS failed."
+    def __init__(self, message=None, status: LBFGSStatus = LBFGSStatus.LBFGS_FAILED):
+        super().__init__(message or self.DEFAULT_MESSAGE)
+        self.status = status
+class LBFGSInitFailed(LBFGSException):
+    DEFAULT_MESSAGE = "LBFGS failed to initialise."
+    def __init__(self, message=None):
+        super().__init__(message or self.DEFAULT_MESSAGE, LBFGSStatus.INIT_FAILED)
+class LBFGS:
+    """L-BFGS optimizer wrapper around scipy's implementation.
+    Parameters
+    ----------
+    value_grad_fn : Callable
+        function that returns tuple of (value, gradient) given input x
+    maxcor : int
+        maximum number of variable metric corrections
+    maxiter : int, optional
+        maximum number of iterations, defaults to 1000
+    ftol : float, optional
+        function tolerance for convergence, defaults to 1e-5
+    gtol : float, optional
+        gradient tolerance for convergence, defaults to 1e-8
+    maxls : int, optional
+        maximum number of line search steps, defaults to 1000
+    """
+    def __init__(
+        self, value_grad_fn, maxcor, maxiter=1000, ftol=1e-5, gtol=1e-8, maxls=1000
+    ) -> None:
+        self.value_grad_fn = value_grad_fn
+        self.maxcor = maxcor
+        self.maxiter = maxiter
+        self.ftol = ftol
+        self.gtol = gtol
+        self.maxls = maxls
+    def minimize(self, x0) -> tuple[NDArray, NDArray, int, LBFGSStatus]:
+        """minimizes objective function starting from initial position.
+        Parameters
+        ----------
+        x0 : array_like
+            initial position
+        Returns
+        -------
+        x : NDArray
+            history of positions
+        g : NDArray
+            history of gradients
+        count : int
+            number of iterations
+        status : LBFGSStatus
+            final status of optimisation
+        """
+        x0 = np.array(x0, dtype=np.float64)
+        history_manager = LBFGSHistoryManager(
+            value_grad_fn=self.value_grad_fn, x0=x0, maxiter=self.maxiter
+        )
+        result = minimize(
+            self.value_grad_fn,
+            x0,
+            method="L-BFGS-B",
+            jac=True,
+            callback=history_manager,
+            options={
+                "maxcor": self.maxcor,
+                "maxiter": self.maxiter,
+                "ftol": self.ftol,
+                "gtol": self.gtol,
+                "maxls": self.maxls,
+            },
+        )
+        history = history_manager.get_history()
+        # warnings and suggestions for LBFGSStatus are displayed at the end
+        if result.status == 1:
+            lbfgs_status = LBFGSStatus.MAX_ITER_REACHED
+        elif (result.status == 2) or (history.count <= 1):
+            if result.nit <= 1:
+                lbfgs_status = LBFGSStatus.INIT_FAILED
+            elif result.fun == np.inf:
+                lbfgs_status = LBFGSStatus.DIVERGED
+        else:
+            lbfgs_status = LBFGSStatus.CONVERGED
+        return history.x, history.g, history.count, lbfgs_status

pymc-extras 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl