PyPI - pymc-extras - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pymc-extras 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pymc_extras/__init__.py +1 -3
pymc_extras/distributions/__init__.py +2 -0
pymc_extras/distributions/transforms/__init__.py +3 -0
pymc_extras/distributions/transforms/partial_order.py +227 -0
pymc_extras/inference/__init__.py +4 -2
pymc_extras/inference/fit.py +6 -4
pymc_extras/inference/laplace.py +4 -1
pymc_extras/inference/pathfinder/importance_sampling.py +23 -17
pymc_extras/inference/pathfinder/lbfgs.py +49 -13
pymc_extras/inference/pathfinder/pathfinder.py +136 -118
pymc_extras/statespace/core/statespace.py +5 -4
pymc_extras/statespace/filters/distributions.py +9 -45
pymc_extras/statespace/utils/data_tools.py +24 -9
pymc_extras/version.txt +1 -1
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/METADATA +5 -3
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/RECORD +23 -20
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/WHEEL +1 -1
tests/distributions/test_transform.py +77 -0
tests/statespace/test_coord_assignment.py +65 -0
tests/test_laplace.py +16 -0
tests/test_pathfinder.py +141 -17
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info/licenses}/LICENSE +0 -0
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/top_level.txt +0 -0

pymc_extras/__init__.py CHANGED Viewed

@@ -15,9 +15,7 @@ import logging
 from pymc_extras import gp, statespace, utils
 from pymc_extras.distributions import *
-from pymc_extras.inference.find_map import find_MAP
-from pymc_extras.inference.fit import fit
-from pymc_extras.inference.laplace import fit_laplace
+from pymc_extras.inference import find_MAP, fit, fit_laplace, fit_pathfinder
 from pymc_extras.model.marginal.marginal_model import (
     MarginalModel,
     marginalize,

pymc_extras/distributions/__init__.py CHANGED Viewed

@@ -26,6 +26,7 @@ from pymc_extras.distributions.discrete import (
 from pymc_extras.distributions.histogram_utils import histogram_approximation
 from pymc_extras.distributions.multivariate import R2D2M2CP
 from pymc_extras.distributions.timeseries import DiscreteMarkovChain
+from pymc_extras.distributions.transforms import PartialOrder
 __all__ = [
     "Chi",
@@ -37,4 +38,5 @@ __all__ = [
     "R2D2M2CP",
     "Skellam",
     "histogram_approximation",
+    "PartialOrder",
 ]

pymc_extras/distributions/transforms/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from pymc_extras.distributions.transforms.partial_order import PartialOrder
+__all__ = ["PartialOrder"]

pymc_extras/distributions/transforms/partial_order.py ADDED Viewed

@@ -0,0 +1,227 @@
+#   Copyright 2025 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import numpy as np
+import pytensor.tensor as pt
+from pymc.logprob.transforms import Transform
+__all__ = ["PartialOrder"]
+def dtype_minval(dtype):
+    """Find the minimum value for a given dtype"""
+    return np.iinfo(dtype).min if np.issubdtype(dtype, np.integer) else np.finfo(dtype).min
+def padded_where(x, to_len, padval=-1):
+    """A padded version of np.where"""
+    w = np.where(x)
+    return np.concatenate([w[0], np.full(to_len - len(w[0]), padval)])
+class PartialOrder(Transform):
+    """Create a PartialOrder transform
+    A more flexible version of the pymc ordered transform that
+    allows specifying a (strict) partial order on the elements.
+    Examples
+    --------
+    .. code:: python
+        import numpy as np
+        import pymc as pm
+        import pymc_extras as pmx
+        # Define two partial orders on 4 elements
+        # am[i,j] = 1 means i < j
+        adj_mats = np.array([
+            # 0 < {1, 2} < 3
+            [[0, 1, 1, 0],
+            [0, 0, 0, 1],
+            [0, 0, 0, 1],
+            [0, 0, 0, 0]],
+            # 1 < 0 < 3 < 2
+            [[0, 0, 0, 1],
+            [1, 0, 0, 0],
+            [0, 0, 0, 0],
+            [0, 0, 1, 0]],
+        ])
+        # Create the partial order from the adjacency matrices
+        po = pmx.PartialOrder(adj_mats)
+        with pm.Model() as model:
+            # Generate 3 samples from both partial orders
+            pm.Normal("po_vals", shape=(3,2,4), transform=po,
+                        initval=po.initvals((3,2,4)))
+            idata = pm.sample()
+        # Verify that for first po, the zeroth element is always the smallest
+        assert (idata.posterior['po_vals'][:,:,:,0,0] <
+                idata.posterior['po_vals'][:,:,:,0,1:]).all()
+        # Verify that for second po, the second element is always the largest
+        assert (idata.posterior['po_vals'][:,:,:,1,2] >=
+                idata.posterior['po_vals'][:,:,:,1,:]).all()
+    Technical notes
+    ----------------
+    Partial order needs to be strict, i.e. without equalities.
+    A DAG defining the partial order is sufficient, as transitive closure is automatically computed.
+    Code works in O(N*D) in runtime, but takes O(N^3) in initialization,
+    where N is the number of nodes in the dag and D is the maximum
+    in-degree of a node in the transitive reduction.
+    """
+    name = "partial_order"
+    def __init__(self, adj_mat):
+        """
+        Initialize the PartialOrder transform
+        Parameters
+        ----------
+        adj_mat: ndarray
+            adjacency matrix for the DAG that generates the partial order,
+            where ``adj_mat[i][j] = 1`` denotes ``i < j``.
+            Note this also accepts multiple DAGs if RV is multidimensional
+        """
+        # Basic input checks
+        if adj_mat.ndim < 2:
+            raise ValueError("Adjacency matrix must have at least 2 dimensions")
+        if adj_mat.shape[-2] != adj_mat.shape[-1]:
+            raise ValueError("Adjacency matrix is not square")
+        if adj_mat.min() != 0 or adj_mat.max() != 1:
+            raise ValueError("Adjacency matrix must contain only 0s and 1s")
+        # Create index over the first ellipsis dimensions
+        idx = np.ix_(*[np.arange(s) for s in adj_mat.shape[:-2]])
+        # Transitive closure using Floyd-Warshall
+        tc = adj_mat.astype(bool)
+        for k in range(tc.shape[-1]):
+            tc |= np.logical_and(tc[..., :, k, None], tc[..., None, k, :])
+        # Check if the dag is acyclic
+        if np.any(tc.diagonal(axis1=-2, axis2=-1)):
+            raise ValueError("Partial order contains equalities")
+        # Transitive reduction using the closure
+        # This gives the minimum description of the partial order
+        # This is to minmax the input degree
+        adj_mat = tc * (1 - np.matmul(tc, tc))
+        # Find the maximum in-degree of the reduced dag
+        dag_idim = adj_mat.sum(axis=-2).max()
+        # Topological sort
+        ts_inds = np.zeros(adj_mat.shape[:-1], dtype=int)
+        dm = adj_mat.copy()
+        for i in range(adj_mat.shape[1]):
+            assert dm.sum(axis=-2).min() == 0  # DAG is acyclic
+            nind = np.argmin(dm.sum(axis=-2), axis=-1)
+            dm[(*idx, slice(None), nind)] = 1  # Make nind not show up again
+            dm[(*idx, nind, slice(None))] = 0  # Allow it's children to show
+            ts_inds[(*idx, i)] = nind
+        self.ts_inds = ts_inds
+        # Change the dag to adjacency lists (with -1 for NA)
+        dag_T = np.apply_along_axis(padded_where, axis=-2, arr=adj_mat, padval=-1, to_len=dag_idim)
+        self.dag = np.swapaxes(dag_T, -2, -1)
+        self.is_start = np.all(self.dag[..., :, :] == -1, axis=-1)
+    def initvals(self, shape=None, lower=-1, upper=1):
+        """
+        Create a set of appropriate initial values for the variable.
+        NB! It is important that proper initial values are used,
+        as only properly ordered values are in the range of the transform.
+        Parameters
+        ----------
+        shape: tuple, default None
+            shape of the initial values. If None, adj_mat[:-1] is used
+        lower: float, default -1
+            lower bound for the initial values
+        upper: float, default 1
+            upper bound for the initial values
+        Returns
+        -------
+        vals: ndarray
+            initial values for the transformed variable
+        """
+        if shape is None:
+            shape = self.dag.shape[:-1]
+        if shape[-len(self.dag.shape[:-1]) :] != self.dag.shape[:-1]:
+            raise ValueError("Shape must match the shape of the adjacency matrix")
+        # Create the initial values
+        vals = np.linspace(lower, upper, self.dag.shape[-2])
+        inds = np.argsort(self.ts_inds, axis=-1)
+        ivals = vals[inds]
+        # Expand the initial values to the extra dimensions
+        extra_dims = shape[: -len(self.dag.shape[:-1])]
+        ivals = np.tile(ivals, extra_dims + tuple([1] * len(self.dag.shape[:-1])))
+        return ivals
+    def backward(self, value, *inputs):
+        minv = dtype_minval(value.dtype)
+        x = pt.concatenate(
+            [pt.zeros_like(value), pt.full(value.shape[:-1], minv)[..., None]], axis=-1
+        )
+        # Indices to allow broadcasting the max over the last dimension
+        idx = np.ix_(*[np.arange(s) for s in self.dag.shape[:-2]])
+        idx2 = tuple(np.tile(i[:, None], self.dag.shape[-1]) for i in idx)
+        # Has to be done stepwise as next steps depend on previous values
+        # Also has to be done in topological order, hence the ts_inds
+        for i in range(self.dag.shape[-2]):
+            tsi = self.ts_inds[..., i]
+            if len(tsi.shape) == 0:
+                tsi = int(tsi)  # if shape 0, it's a scalar
+            ni = (*idx, tsi)  # i-th node in topological order
+            eni = (Ellipsis, *ni)
+            ist = self.is_start[ni]
+            mval = pt.max(x[(Ellipsis, *idx2, self.dag[ni])], axis=-1)
+            x = pt.set_subtensor(x[eni], ist * value[eni] + (1 - ist) * (mval + pt.exp(value[eni])))
+        return x[..., :-1]
+    def forward(self, value, *inputs):
+        y = pt.zeros_like(value)
+        minv = dtype_minval(value.dtype)
+        vx = pt.concatenate([value, pt.full(value.shape[:-1], minv)[..., None]], axis=-1)
+        # Indices to allow broadcasting the max over the last dimension
+        idx = np.ix_(*[np.arange(s) for s in self.dag.shape[:-2]])
+        idx = tuple(np.tile(i[:, None, None], self.dag.shape[-2:]) for i in idx)
+        y = self.is_start * value + (1 - self.is_start) * (
+            pt.log(value - pt.max(vx[(Ellipsis, *idx, self.dag[..., :])], axis=-1))
+        )
+        return y
+    def log_jac_det(self, value, *inputs):
+        return pt.sum(value * (1 - self.is_start), axis=-1)

pymc_extras/inference/__init__.py CHANGED Viewed

@@ -12,7 +12,9 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+from pymc_extras.inference.find_map import find_MAP
 from pymc_extras.inference.fit import fit
+from pymc_extras.inference.laplace import fit_laplace
+from pymc_extras.inference.pathfinder.pathfinder import fit_pathfinder
-__all__ = ["fit"]
+__all__ = ["fit", "fit_pathfinder", "fit_laplace", "find_MAP"]

pymc_extras/inference/fit.py CHANGED Viewed

@@ -11,11 +11,13 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import arviz as az
-def fit(method, **kwargs):
+def fit(method: str, **kwargs) -> az.InferenceData:
     """
-    Fit a model with an inference algorithm
+    Fit a model with an inference algorithm.
+    See :func:`fit_pathfinder` and :func:`fit_laplace` for more details.
     Parameters
     ----------
@@ -23,11 +25,11 @@ def fit(method, **kwargs):
         Which inference method to run.
         Supported: pathfinder or laplace
-    kwargs are passed on.
+    kwargs: keyword arguments are passed on to the inference method.
     Returns
     -------
-    arviz.InferenceData
+    :class:`~arviz.InferenceData`
     """
     if method == "pathfinder":
         from pymc_extras.inference.pathfinder import fit_pathfinder

pymc_extras/inference/laplace.py CHANGED Viewed

@@ -377,7 +377,10 @@ def sample_laplace_posterior(
     posterior_dist = stats.multivariate_normal(
         mean=mu.data, cov=H_inv, allow_singular=True, seed=rng
     )
     posterior_draws = posterior_dist.rvs(size=(chains, draws))
+    if mu.data.shape == (1,):
+        posterior_draws = np.expand_dims(posterior_draws, -1)
     if transform_samples:
         constrained_rvs, unconstrained_vector = _unconstrained_vector_to_constrained_rvs(model)
@@ -506,7 +509,7 @@ def fit_laplace(
     Returns
     -------
-    idata: az.InferenceData
+    :class:`~arviz.InferenceData`
         An InferenceData object containing the approximated posterior samples.
     Examples

pymc_extras/inference/pathfinder/importance_sampling.py CHANGED Viewed

@@ -20,7 +20,7 @@ class ImportanceSamplingResult:
     samples: NDArray
     pareto_k: float | None = None
     warnings: list[str] = field(default_factory=list)
-    method: str = "none"
+    method: str = "psis"
 def importance_sampling(
@@ -28,7 +28,7 @@ def importance_sampling(
     logP: NDArray,
     logQ: NDArray,
     num_draws: int,
-    method: Literal["psis", "psir", "identity", "none"] | None,
+    method: Literal["psis", "psir", "identity"] | None,
     random_seed: int | None = None,
 ) -> ImportanceSamplingResult:
     """Pareto Smoothed Importance Resampling (PSIR)
@@ -44,8 +44,15 @@ def importance_sampling(
         log probability values of proposal distribution, shape (L, M)
     num_draws : int
         number of draws to return where num_draws <= samples.shape[0]
-    method : str, optional
-        importance sampling method to use. Options are "psis" (default), "psir", "identity", "none. Pareto Smoothed Importance Sampling (psis) is recommended in many cases for more stable results than Pareto Smoothed Importance Resampling (psir). identity applies the log importance weights directly without resampling. none applies no importance sampling weights and returns the samples as is of size num_draws_per_path * num_paths.
+    method : str, None, optional
+        Method to apply sampling based on log importance weights (logP - logQ).
+        Options are:
+        "psis" : Pareto Smoothed Importance Sampling (default)
+                Recommended for more stable results.
+        "psir" : Pareto Smoothed Importance Resampling
+                Less stable than PSIS.
+        "identity" : Applies log importance weights directly without resampling.
+        None : No importance sampling weights. Returns raw samples of size (num_paths, num_draws_per_path, N) where N is number of model parameters. Other methods return samples of size (num_draws, N).
     random_seed : int | None
     Returns
@@ -71,11 +78,11 @@ def importance_sampling(
     warnings = []
     num_paths, _, N = samples.shape
-    if method == "none":
+    if method is None:
         warnings.append(
             "Importance sampling is disabled. The samples are returned as is which may include samples from failed paths with non-finite logP or logQ values. It is recommended to use importance_sampling='psis' for better stability."
         )
-        return ImportanceSamplingResult(samples=samples, warnings=warnings)
+        return ImportanceSamplingResult(samples=samples, warnings=warnings, method=method)
     else:
         samples = samples.reshape(-1, N)
         logP = logP.ravel()
@@ -91,17 +98,16 @@ def importance_sampling(
             _warnings.filterwarnings(
                 "ignore", category=RuntimeWarning, message="overflow encountered in exp"
             )
-            if method == "psis":
-                replace = False
-                logiw, pareto_k = az.psislw(logiw)
-            elif method == "psir":
-                replace = True
-                logiw, pareto_k = az.psislw(logiw)
-            elif method == "identity":
-                replace = False
-                pareto_k = None
-            else:
-                raise ValueError(f"Invalid importance sampling method: {method}")
+            match method:
+                case "psis":
+                    replace = False
+                    logiw, pareto_k = az.psislw(logiw)
+                case "psir":
+                    replace = True
+                    logiw, pareto_k = az.psislw(logiw)
+                case "identity":
+                    replace = False
+                    pareto_k = None
     # NOTE: Pareto k is normally bad for Pathfinder even when the posterior is close to the NUTS posterior or closer to NUTS than ADVI.
     # Pareto k may not be a good diagnostic for Pathfinder.

pymc_extras/inference/pathfinder/lbfgs.py CHANGED Viewed

@@ -37,11 +37,14 @@ class LBFGSHistoryManager:
         initial position
     maxiter : int
         maximum number of iterations to store
+    epsilon : float
+        tolerance for lbfgs update
     """
     value_grad_fn: Callable[[NDArray[np.float64]], tuple[np.float64, NDArray[np.float64]]]
     x0: NDArray[np.float64]
     maxiter: int
+    epsilon: float
     x_history: NDArray[np.float64] = field(init=False)
     g_history: NDArray[np.float64] = field(init=False)
     count: int = field(init=False)
@@ -52,7 +55,7 @@ class LBFGSHistoryManager:
         self.count = 0
         value, grad = self.value_grad_fn(self.x0)
-        if np.all(np.isfinite(grad)) and np.isfinite(value):
+        if self.entry_condition_met(self.x0, value, grad):
             self.add_entry(self.x0, grad)
     def add_entry(self, x: NDArray[np.float64], g: NDArray[np.float64]) -> None:
@@ -75,18 +78,39 @@ class LBFGSHistoryManager:
             x=self.x_history[: self.count], g=self.g_history[: self.count], count=self.count
         )
+    def entry_condition_met(self, x, value, grad) -> bool:
+        """Checks if the LBFGS iteration should continue."""
+        if np.all(np.isfinite(grad)) and np.isfinite(value) and (self.count < self.maxiter + 1):
+            if self.count == 0:
+                return True
+            else:
+                s = x - self.x_history[self.count - 1]
+                z = grad - self.g_history[self.count - 1]
+                sz = (s * z).sum(axis=-1)
+                update = sz > self.epsilon * np.sqrt(np.sum(z**2, axis=-1))
+                if update:
+                    return True
+                else:
+                    return False
+        else:
+            return False
     def __call__(self, x: NDArray[np.float64]) -> None:
         value, grad = self.value_grad_fn(x)
-        if np.all(np.isfinite(grad)) and np.isfinite(value) and self.count < self.maxiter + 1:
+        if self.entry_condition_met(x, value, grad):
             self.add_entry(x, grad)
 class LBFGSStatus(Enum):
     CONVERGED = auto()
     MAX_ITER_REACHED = auto()
-    DIVERGED = auto()
+    NON_FINITE = auto()
+    LOW_UPDATE_PCT = auto()
     # Statuses that lead to Exceptions:
     INIT_FAILED = auto()
+    INIT_FAILED_LOW_UPDATE_PCT = auto()
     LBFGS_FAILED = auto()
@@ -101,8 +125,8 @@ class LBFGSException(Exception):
 class LBFGSInitFailed(LBFGSException):
     DEFAULT_MESSAGE = "LBFGS failed to initialise."
-    def __init__(self, message=None):
-        super().__init__(message or self.DEFAULT_MESSAGE, LBFGSStatus.INIT_FAILED)
+    def __init__(self, status: LBFGSStatus, message=None):
+        super().__init__(message or self.DEFAULT_MESSAGE, status)
 class LBFGS:
@@ -122,10 +146,12 @@ class LBFGS:
         gradient tolerance for convergence, defaults to 1e-8
     maxls : int, optional
         maximum number of line search steps, defaults to 1000
+    epsilon : float, optional
+        tolerance for lbfgs update, defaults to 1e-8
     """
     def __init__(
-        self, value_grad_fn, maxcor, maxiter=1000, ftol=1e-5, gtol=1e-8, maxls=1000
+        self, value_grad_fn, maxcor, maxiter=1000, ftol=1e-5, gtol=1e-8, maxls=1000, epsilon=1e-8
     ) -> None:
         self.value_grad_fn = value_grad_fn
         self.maxcor = maxcor
@@ -133,6 +159,7 @@ class LBFGS:
         self.ftol = ftol
         self.gtol = gtol
         self.maxls = maxls
+        self.epsilon = epsilon
     def minimize(self, x0) -> tuple[NDArray, NDArray, int, LBFGSStatus]:
         """minimizes objective function starting from initial position.
@@ -157,7 +184,7 @@ class LBFGS:
         x0 = np.array(x0, dtype=np.float64)
         history_manager = LBFGSHistoryManager(
-            value_grad_fn=self.value_grad_fn, x0=x0, maxiter=self.maxiter
+            value_grad_fn=self.value_grad_fn, x0=x0, maxiter=self.maxiter, epsilon=self.epsilon
         )
         result = minimize(
@@ -177,13 +204,22 @@ class LBFGS:
         history = history_manager.get_history()
         # warnings and suggestions for LBFGSStatus are displayed at the end
-        if result.status == 1:
-            lbfgs_status = LBFGSStatus.MAX_ITER_REACHED
-        elif (result.status == 2) or (history.count <= 1):
-            if result.nit <= 1:
+        # threshold determining if the number of lbfgs updates is low compared to iterations
+        low_update_threshold = 3
+        if history.count <= 1:  # triggers LBFGSInitFailed
+            if result.nit < low_update_threshold:
                 lbfgs_status = LBFGSStatus.INIT_FAILED
-            elif result.fun == np.inf:
-                lbfgs_status = LBFGSStatus.DIVERGED
+            else:
+                lbfgs_status = LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT
+        elif result.status == 1:
+            # (result.nit > maxiter) or (result.nit > maxls)
+            lbfgs_status = LBFGSStatus.MAX_ITER_REACHED
+        elif result.status == 2:
+            # precision loss resulting to inf or nan
+            lbfgs_status = LBFGSStatus.NON_FINITE
+        elif history.count * low_update_threshold < result.nit:
+            lbfgs_status = LBFGSStatus.LOW_UPDATE_PCT
         else:
             lbfgs_status = LBFGSStatus.CONVERGED

pymc-extras 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

pymc-extras 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl