PyPI - pymc-extras - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

pymc-extras 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

pymc_extras/__init__.py +6 -4
pymc_extras/distributions/__init__.py +2 -0
pymc_extras/distributions/continuous.py +3 -2
pymc_extras/distributions/discrete.py +3 -1
pymc_extras/distributions/transforms/__init__.py +3 -0
pymc_extras/distributions/transforms/partial_order.py +227 -0
pymc_extras/inference/__init__.py +4 -2
pymc_extras/inference/find_map.py +62 -17
pymc_extras/inference/fit.py +6 -4
pymc_extras/inference/laplace.py +14 -8
pymc_extras/inference/pathfinder/lbfgs.py +49 -13
pymc_extras/inference/pathfinder/pathfinder.py +89 -103
pymc_extras/statespace/core/statespace.py +191 -52
pymc_extras/statespace/filters/distributions.py +15 -16
pymc_extras/statespace/filters/kalman_filter.py +1 -18
pymc_extras/statespace/filters/kalman_smoother.py +2 -6
pymc_extras/statespace/models/ETS.py +10 -0
pymc_extras/statespace/models/SARIMAX.py +26 -5
pymc_extras/statespace/models/VARMAX.py +12 -2
pymc_extras/statespace/models/structural.py +18 -5
pymc_extras/statespace/utils/data_tools.py +24 -9
pymc_extras-0.2.6.dist-info/METADATA +318 -0
pymc_extras-0.2.6.dist-info/RECORD +65 -0
{pymc_extras-0.2.4.dist-info → pymc_extras-0.2.6.dist-info}/WHEEL +1 -2
pymc_extras/version.py +0 -11
pymc_extras/version.txt +0 -1
pymc_extras-0.2.4.dist-info/METADATA +0 -110
pymc_extras-0.2.4.dist-info/RECORD +0 -105
pymc_extras-0.2.4.dist-info/top_level.txt +0 -2
tests/__init__.py +0 -13
tests/distributions/__init__.py +0 -19
tests/distributions/test_continuous.py +0 -185
tests/distributions/test_discrete.py +0 -210
tests/distributions/test_discrete_markov_chain.py +0 -258
tests/distributions/test_multivariate.py +0 -304
tests/model/__init__.py +0 -0
tests/model/marginal/__init__.py +0 -0
tests/model/marginal/test_distributions.py +0 -132
tests/model/marginal/test_graph_analysis.py +0 -182
tests/model/marginal/test_marginal_model.py +0 -967
tests/model/test_model_api.py +0 -38
tests/statespace/__init__.py +0 -0
tests/statespace/test_ETS.py +0 -411
tests/statespace/test_SARIMAX.py +0 -405
tests/statespace/test_VARMAX.py +0 -184
tests/statespace/test_coord_assignment.py +0 -116
tests/statespace/test_distributions.py +0 -270
tests/statespace/test_kalman_filter.py +0 -326
tests/statespace/test_representation.py +0 -175
tests/statespace/test_statespace.py +0 -872
tests/statespace/test_statespace_JAX.py +0 -156
tests/statespace/test_structural.py +0 -836
tests/statespace/utilities/__init__.py +0 -0
tests/statespace/utilities/shared_fixtures.py +0 -9
tests/statespace/utilities/statsmodel_local_level.py +0 -42
tests/statespace/utilities/test_helpers.py +0 -310
tests/test_blackjax_smc.py +0 -222
tests/test_find_map.py +0 -103
tests/test_histogram_approximation.py +0 -109
tests/test_laplace.py +0 -265
tests/test_linearmodel.py +0 -208
tests/test_model_builder.py +0 -306
tests/test_pathfinder.py +0 -203
tests/test_pivoted_cholesky.py +0 -24
tests/test_printing.py +0 -98
tests/test_prior_from_trace.py +0 -172
tests/test_splines.py +0 -77
tests/utils.py +0 -0
{pymc_extras-0.2.4.dist-info → pymc_extras-0.2.6.dist-info/licenses}/LICENSE +0 -0

pymc_extras/inference/pathfinder/lbfgs.py CHANGED Viewed

@@ -37,11 +37,14 @@ class LBFGSHistoryManager:
         initial position
     maxiter : int
         maximum number of iterations to store
+    epsilon : float
+        tolerance for lbfgs update
     """
     value_grad_fn: Callable[[NDArray[np.float64]], tuple[np.float64, NDArray[np.float64]]]
     x0: NDArray[np.float64]
     maxiter: int
+    epsilon: float
     x_history: NDArray[np.float64] = field(init=False)
     g_history: NDArray[np.float64] = field(init=False)
     count: int = field(init=False)
@@ -52,7 +55,7 @@ class LBFGSHistoryManager:
         self.count = 0
         value, grad = self.value_grad_fn(self.x0)
-        if np.all(np.isfinite(grad)) and np.isfinite(value):
+        if self.entry_condition_met(self.x0, value, grad):
             self.add_entry(self.x0, grad)
     def add_entry(self, x: NDArray[np.float64], g: NDArray[np.float64]) -> None:
@@ -75,18 +78,39 @@ class LBFGSHistoryManager:
             x=self.x_history[: self.count], g=self.g_history[: self.count], count=self.count
         )
+    def entry_condition_met(self, x, value, grad) -> bool:
+        """Checks if the LBFGS iteration should continue."""
+        if np.all(np.isfinite(grad)) and np.isfinite(value) and (self.count < self.maxiter + 1):
+            if self.count == 0:
+                return True
+            else:
+                s = x - self.x_history[self.count - 1]
+                z = grad - self.g_history[self.count - 1]
+                sz = (s * z).sum(axis=-1)
+                update = sz > self.epsilon * np.sqrt(np.sum(z**2, axis=-1))
+                if update:
+                    return True
+                else:
+                    return False
+        else:
+            return False
     def __call__(self, x: NDArray[np.float64]) -> None:
         value, grad = self.value_grad_fn(x)
-        if np.all(np.isfinite(grad)) and np.isfinite(value) and self.count < self.maxiter + 1:
+        if self.entry_condition_met(x, value, grad):
             self.add_entry(x, grad)
 class LBFGSStatus(Enum):
     CONVERGED = auto()
     MAX_ITER_REACHED = auto()
-    DIVERGED = auto()
+    NON_FINITE = auto()
+    LOW_UPDATE_PCT = auto()
     # Statuses that lead to Exceptions:
     INIT_FAILED = auto()
+    INIT_FAILED_LOW_UPDATE_PCT = auto()
     LBFGS_FAILED = auto()
@@ -101,8 +125,8 @@ class LBFGSException(Exception):
 class LBFGSInitFailed(LBFGSException):
     DEFAULT_MESSAGE = "LBFGS failed to initialise."
-    def __init__(self, message=None):
-        super().__init__(message or self.DEFAULT_MESSAGE, LBFGSStatus.INIT_FAILED)
+    def __init__(self, status: LBFGSStatus, message=None):
+        super().__init__(message or self.DEFAULT_MESSAGE, status)
 class LBFGS:
@@ -122,10 +146,12 @@ class LBFGS:
         gradient tolerance for convergence, defaults to 1e-8
     maxls : int, optional
         maximum number of line search steps, defaults to 1000
+    epsilon : float, optional
+        tolerance for lbfgs update, defaults to 1e-8
     """
     def __init__(
-        self, value_grad_fn, maxcor, maxiter=1000, ftol=1e-5, gtol=1e-8, maxls=1000
+        self, value_grad_fn, maxcor, maxiter=1000, ftol=1e-5, gtol=1e-8, maxls=1000, epsilon=1e-8
     ) -> None:
         self.value_grad_fn = value_grad_fn
         self.maxcor = maxcor
@@ -133,6 +159,7 @@ class LBFGS:
         self.ftol = ftol
         self.gtol = gtol
         self.maxls = maxls
+        self.epsilon = epsilon
     def minimize(self, x0) -> tuple[NDArray, NDArray, int, LBFGSStatus]:
         """minimizes objective function starting from initial position.
@@ -157,7 +184,7 @@ class LBFGS:
         x0 = np.array(x0, dtype=np.float64)
         history_manager = LBFGSHistoryManager(
-            value_grad_fn=self.value_grad_fn, x0=x0, maxiter=self.maxiter
+            value_grad_fn=self.value_grad_fn, x0=x0, maxiter=self.maxiter, epsilon=self.epsilon
         )
         result = minimize(
@@ -177,13 +204,22 @@ class LBFGS:
         history = history_manager.get_history()
         # warnings and suggestions for LBFGSStatus are displayed at the end
-        if result.status == 1:
-            lbfgs_status = LBFGSStatus.MAX_ITER_REACHED
-        elif (result.status == 2) or (history.count <= 1):
-            if result.nit <= 1:
+        # threshold determining if the number of lbfgs updates is low compared to iterations
+        low_update_threshold = 3
+        if history.count <= 1:  # triggers LBFGSInitFailed
+            if result.nit < low_update_threshold:
                 lbfgs_status = LBFGSStatus.INIT_FAILED
-            elif result.fun == np.inf:
-                lbfgs_status = LBFGSStatus.DIVERGED
+            else:
+                lbfgs_status = LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT
+        elif result.status == 1:
+            # (result.nit > maxiter) or (result.nit > maxls)
+            lbfgs_status = LBFGSStatus.MAX_ITER_REACHED
+        elif result.status == 2:
+            # precision loss resulting to inf or nan
+            lbfgs_status = LBFGSStatus.NON_FINITE
+        elif history.count * low_update_threshold < result.nit:
+            lbfgs_status = LBFGSStatus.LOW_UPDATE_PCT
         else:
             lbfgs_status = LBFGSStatus.CONVERGED

pymc_extras/inference/pathfinder/pathfinder.py CHANGED Viewed

@@ -12,22 +12,19 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 import collections
 import logging
 import time
-import warnings as _warnings
 from collections import Counter
 from collections.abc import Callable, Iterator
 from dataclasses import asdict, dataclass, field, replace
 from enum import Enum, auto
-from importlib.util import find_spec
 from typing import Literal, TypeAlias
 import arviz as az
-import blackjax
 import filelock
-import jax
 import numpy as np
 import pymc as pm
 import pytensor
@@ -42,11 +39,10 @@ from pymc.initial_point import make_initial_point_fn
 from pymc.model import modelcontext
 from pymc.model.core import Point
 from pymc.pytensorf import (
-    compile_pymc,
+    compile,
     find_rng_nodes,
     reseed_rngs,
 )
-from pymc.sampling.jax import get_jaxified_graph
 from pymc.util import (
     CustomProgress,
     RandomSeed,
@@ -67,6 +63,7 @@ from rich.text import Text
 # TODO: change to typing.Self after Python versions greater than 3.10
 from typing_extensions import Self
+from pymc_extras.inference.laplace import add_data_to_inferencedata
 from pymc_extras.inference.pathfinder.importance_sampling import (
     importance_sampling as _importance_sampling,
 )
@@ -78,9 +75,6 @@ from pymc_extras.inference.pathfinder.lbfgs import (
 )
 logger = logging.getLogger(__name__)
-_warnings.filterwarnings(
-    "ignore", category=FutureWarning, message="compile_pymc was renamed to compile"
-)
 REGULARISATION_TERM = 1e-8
 DEFAULT_LINKER = "cvm_nogc"
@@ -105,6 +99,8 @@ def get_jaxified_logp_of_ravel_inputs(model: Model, jacobian: bool = True) -> Ca
         A JAX function that computes the log-probability of a PyMC model with ravelled inputs.
     """
+    from pymc.sampling.jax import get_jaxified_graph
     # TODO: JAX: test if we should get jaxified graph of dlogp as well
     new_logprob, new_input = pm.pytensorf.join_nonshared_inputs(
         model.initial_point(), (model.logp(jacobian=jacobian),), model.value_vars, ()
@@ -144,7 +140,7 @@ def get_logp_dlogp_of_ravel_inputs(
         [model.logp(jacobian=jacobian), model.dlogp(jacobian=jacobian)],
         model.value_vars,
     )
-    logp_dlogp_fn = compile_pymc([inputs], (logP, dlogP), **compile_kwargs)
+    logp_dlogp_fn = compile([inputs], (logP, dlogP), **compile_kwargs)
     logp_dlogp_fn.trust_input = True
     return logp_dlogp_fn
@@ -224,6 +220,10 @@ def convert_flat_trace_to_idata(
             result = [res.reshape(num_paths, num_pdraws, *res.shape[2:]) for res in result]
     elif inference_backend == "blackjax":
+        import jax
+        from pymc.sampling.jax import get_jaxified_graph
         jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=vars_to_sample)
         result = jax.vmap(jax.vmap(jax_fn))(
             *jax.device_put(list(trace.values()), jax.devices(postprocessing_backend)[0])
@@ -237,8 +237,8 @@ def convert_flat_trace_to_idata(
 def alpha_recover(
-    x: TensorVariable, g: TensorVariable, epsilon: TensorVariable
-) -> tuple[TensorVariable, TensorVariable, TensorVariable, TensorVariable]:
+    x: TensorVariable, g: TensorVariable
+) -> tuple[TensorVariable, TensorVariable, TensorVariable]:
     """compute the diagonal elements of the inverse Hessian at each iterations of L-BFGS and filter updates.
     Parameters
@@ -247,9 +247,6 @@ def alpha_recover(
         position array, shape (L+1, N)
     g : TensorVariable
         gradient array, shape (L+1, N)
-    epsilon : float
-        threshold for filtering updates based on inner product of position
-        and gradient differences
     Returns
     -------
@@ -259,15 +256,13 @@ def alpha_recover(
         position differences, shape (L, N)
     z : TensorVariable
         gradient differences, shape (L, N)
-    update_mask : TensorVariable
-        mask for filtering updates, shape (L,)
     Notes
     -----
     shapes: L=batch_size, N=num_params
     """
-    def compute_alpha_l(alpha_lm1, s_l, z_l) -> TensorVariable:
+    def compute_alpha_l(s_l, z_l, alpha_lm1) -> TensorVariable:
         # alpha_lm1: (N,)
         # s_l: (N,)
         # z_l: (N,)
@@ -281,43 +276,28 @@ def alpha_recover(
         )  # fmt:off
         return 1.0 / inv_alpha_l
-    def return_alpha_lm1(alpha_lm1, s_l, z_l) -> TensorVariable:
-        return alpha_lm1[-1]
-    def scan_body(update_mask_l, s_l, z_l, alpha_lm1) -> TensorVariable:
-        return pt.switch(
-            update_mask_l,
-            compute_alpha_l(alpha_lm1, s_l, z_l),
-            return_alpha_lm1(alpha_lm1, s_l, z_l),
-        )
     Lp1, N = x.shape
     s = pt.diff(x, axis=0)
     z = pt.diff(g, axis=0)
     alpha_l_init = pt.ones(N)
-    sz = (s * z).sum(axis=-1)
-    # update_mask = sz > epsilon * pt.linalg.norm(z, axis=-1)
-    # pt.linalg.norm does not work with JAX!!
-    update_mask = sz > epsilon * pt.sqrt(pt.sum(z**2, axis=-1))
     alpha, _ = pytensor.scan(
-        fn=scan_body,
+        fn=compute_alpha_l,
         outputs_info=alpha_l_init,
-        sequences=[update_mask, s, z],
+        sequences=[s, z],
         n_steps=Lp1 - 1,
         allow_gc=False,
     )
     # assert np.all(alpha.eval() > 0), "alpha cannot be negative"
-    # alpha: (L, N), update_mask: (L, N)
-    return alpha, s, z, update_mask
+    # alpha: (L, N)
+    return alpha, s, z
 def inverse_hessian_factors(
     alpha: TensorVariable,
     s: TensorVariable,
     z: TensorVariable,
-    update_mask: TensorVariable,
     J: TensorConstant,
 ) -> tuple[TensorVariable, TensorVariable]:
     """compute the inverse hessian factors for the BFGS approximation.
@@ -330,8 +310,6 @@ def inverse_hessian_factors(
         position differences, shape (L, N)
     z : TensorVariable
         gradient differences, shape (L, N)
-    update_mask : TensorVariable
-        mask for filtering updates, shape (L,)
     J : TensorConstant
         history size for L-BFGS
@@ -350,30 +328,19 @@ def inverse_hessian_factors(
     # NOTE: get_chi_matrix_1 is a modified version of get_chi_matrix_2 to closely follow Zhang et al., (2022)
     # NOTE: get_chi_matrix_2 is from blackjax which MAYBE incorrectly implemented
-    def get_chi_matrix_1(
-        diff: TensorVariable, update_mask: TensorVariable, J: TensorConstant
-    ) -> TensorVariable:
+    def get_chi_matrix_1(diff: TensorVariable, J: TensorConstant) -> TensorVariable:
         L, N = diff.shape
         j_last = pt.as_tensor(J - 1)  # since indexing starts at 0
-        def chi_update(chi_lm1, diff_l) -> TensorVariable:
+        def chi_update(diff_l, chi_lm1) -> TensorVariable:
             chi_l = pt.roll(chi_lm1, -1, axis=0)
             return pt.set_subtensor(chi_l[j_last], diff_l)
-        def no_op(chi_lm1, diff_l) -> TensorVariable:
-            return chi_lm1
-        def scan_body(update_mask_l, diff_l, chi_lm1) -> TensorVariable:
-            return pt.switch(update_mask_l, chi_update(chi_lm1, diff_l), no_op(chi_lm1, diff_l))
         chi_init = pt.zeros((J, N))
         chi_mat, _ = pytensor.scan(
-            fn=scan_body,
+            fn=chi_update,
             outputs_info=chi_init,
-            sequences=[
-                update_mask,
-                diff,
-            ],
+            sequences=[diff],
             allow_gc=False,
         )
@@ -382,19 +349,15 @@ def inverse_hessian_factors(
         # (L, N, J)
         return chi_mat
-    def get_chi_matrix_2(
-        diff: TensorVariable, update_mask: TensorVariable, J: TensorConstant
-    ) -> TensorVariable:
+    def get_chi_matrix_2(diff: TensorVariable, J: TensorConstant) -> TensorVariable:
         L, N = diff.shape
-        diff_masked = update_mask[:, None] * diff
         # diff_padded: (L+J, N)
         pad_width = pt.zeros(shape=(2, 2), dtype="int32")
-        pad_width = pt.set_subtensor(pad_width[0, 0], J)
-        diff_padded = pt.pad(diff_masked, pad_width, mode="constant")
+        pad_width = pt.set_subtensor(pad_width[0, 0], J - 1)
+        diff_padded = pt.pad(diff, pad_width, mode="constant")
-        index = pt.arange(L)[:, None] + pt.arange(J)[None, :]
+        index = pt.arange(L)[..., None] + pt.arange(J)[None, ...]
         index = index.reshape((L, J))
         chi_mat = pt.matrix_transpose(diff_padded[index])
@@ -403,8 +366,10 @@ def inverse_hessian_factors(
         return chi_mat
     L, N = alpha.shape
-    S = get_chi_matrix_1(s, update_mask, J)
-    Z = get_chi_matrix_1(z, update_mask, J)
+    # changed to get_chi_matrix_2 after removing update_mask
+    S = get_chi_matrix_2(s, J)
+    Z = get_chi_matrix_2(z, J)
     # E: (L, J, J)
     Ij = pt.eye(J)[None, ...]
@@ -489,6 +454,7 @@ def bfgs_sample_dense(
     N = x.shape[-1]
     IdN = pt.eye(N)[None, ...]
+    IdN += IdN * REGULARISATION_TERM
     # inverse Hessian
     H_inv = (
@@ -504,7 +470,10 @@ def bfgs_sample_dense(
     logdet = 2.0 * pt.sum(pt.log(pt.abs(pt.diagonal(Lchol, axis1=-2, axis2=-1))), axis=-1)
-    mu = x - pt.batched_dot(H_inv, g)
+    # mu = x - pt.einsum("ijk,ik->ij", H_inv, g) # causes error: Multiple destroyers of g
+    batched_dot = pt.vectorize(pt.dot, signature="(ijk),(ilk)->(ij)")
+    mu = x - batched_dot(H_inv, pt.matrix_transpose(g[..., None]))
     phi = pt.matrix_transpose(
         # (L, N, 1)
@@ -565,23 +534,28 @@ def bfgs_sample_sparse(
     # qr_input: (L, N, 2J)
     qr_input = inv_sqrt_alpha_diag @ beta
     (Q, R), _ = pytensor.scan(fn=pt.nlinalg.qr, sequences=[qr_input], allow_gc=False)
     IdN = pt.eye(R.shape[1])[None, ...]
+    IdN += IdN * REGULARISATION_TERM
     Lchol_input = IdN + R @ gamma @ pt.matrix_transpose(R)
+    # TODO: make robust Lchol calcs more robust, ie. try exceptions, increase REGULARISATION_TERM if non-finite exists
     Lchol = pt.linalg.cholesky(Lchol_input, lower=False, check_finite=False, on_error="nan")
     logdet = 2.0 * pt.sum(pt.log(pt.abs(pt.diagonal(Lchol, axis1=-2, axis2=-1))), axis=-1)
     logdet += pt.sum(pt.log(alpha), axis=-1)
+    # inverse Hessian
+    # (L, N, N) + (L, N, 2J), (L, 2J, 2J), (L, 2J, N) -> (L, N, N)
+    H_inv = alpha_diag + (beta @ gamma @ pt.matrix_transpose(beta))
     # NOTE: changed the sign from "x + " to "x -" of the expression to match Stan which differs from Zhang et al., (2022). same for dense version.
-    mu = x - (
-        # (L, N), (L, N) -> (L, N)
-        pt.batched_dot(alpha_diag, g)
-        # beta @ gamma @ beta.T
-        # (L, N, 2J), (L, 2J, 2J), (L, 2J, N) -> (L, N, N)
-        # (L, N, N), (L, N) -> (L, N)
-        + pt.batched_dot((beta @ gamma @ pt.matrix_transpose(beta)), g)
-    )
+    # mu = x - pt.einsum("ijk,ik->ij", H_inv, g) # causes error: Multiple destroyers of g
+    batched_dot = pt.vectorize(pt.dot, signature="(ijk),(ilk)->(ij)")
+    mu = x - batched_dot(H_inv, pt.matrix_transpose(g[..., None]))
     phi = pt.matrix_transpose(
         # (L, N, 1)
@@ -589,8 +563,6 @@ def bfgs_sample_sparse(
         # (L, N, N), (L, N, M) -> (L, N, M)
         + sqrt_alpha_diag
         @ (
-            # (L, N, 2J), (L, 2J, M) -> (L, N, M)
-            # intermediate calcs below
             # (L, N, 2J), (L, 2J, 2J) -> (L, N, 2J)
             (Q @ (Lchol - IdN))
             # (L, 2J, N), (L, N, M) -> (L, 2J, M)
@@ -778,7 +750,6 @@ def make_pathfinder_body(
     num_draws: int,
     maxcor: int,
     num_elbo_draws: int,
-    epsilon: float,
     **compile_kwargs: dict,
 ) -> Function:
     """
@@ -794,8 +765,6 @@ def make_pathfinder_body(
         The maximum number of iterations for the L-BFGS algorithm.
     num_elbo_draws : int
         The number of draws for the Evidence Lower Bound (ELBO) estimation.
-    epsilon : float
-        The value used to filter out large changes in the direction of the update gradient at each iteration l in L. Iteration l is only accepted if delta_theta[l] * delta_grad[l] > epsilon * L2_norm(delta_grad[l]) for each l in L.
     compile_kwargs : dict
         Additional keyword arguments for the PyTensor compiler.
@@ -820,11 +789,10 @@ def make_pathfinder_body(
     num_draws = pt.constant(num_draws, "num_draws", dtype="int32")
     num_elbo_draws = pt.constant(num_elbo_draws, "num_elbo_draws", dtype="int32")
-    epsilon = pt.constant(epsilon, "epsilon", dtype="float64")
     maxcor = pt.constant(maxcor, "maxcor", dtype="int32")
-    alpha, s, z, update_mask = alpha_recover(x_full, g_full, epsilon=epsilon)
-    beta, gamma = inverse_hessian_factors(alpha, s, z, update_mask, J=maxcor)
+    alpha, s, z = alpha_recover(x_full, g_full)
+    beta, gamma = inverse_hessian_factors(alpha, s, z, J=maxcor)
     # ignore initial point - x, g: (L, N)
     x = x_full[1:]
@@ -855,7 +823,7 @@ def make_pathfinder_body(
     # return psi, logP_psi, logQ_psi, elbo_argmax
-    pathfinder_body_fn = compile_pymc(
+    pathfinder_body_fn = compile(
         [x_full, g_full],
         [psi, logP_psi, logQ_psi, elbo_argmax],
         **compile_kwargs,
@@ -934,11 +902,11 @@ def make_single_pathfinder_fn(
     x_base = DictToArrayBijection.map(ip).data
     # lbfgs
-    lbfgs = LBFGS(neg_logp_dlogp_func, maxcor, maxiter, ftol, gtol, maxls)
+    lbfgs = LBFGS(neg_logp_dlogp_func, maxcor, maxiter, ftol, gtol, maxls, epsilon)
     # pathfinder body
     pathfinder_body_fn = make_pathfinder_body(
-        logp_func, num_draws, maxcor, num_elbo_draws, epsilon, **compile_kwargs
+        logp_func, num_draws, maxcor, num_elbo_draws, **compile_kwargs
     )
     rngs = find_rng_nodes(pathfinder_body_fn.maker.fgraph.outputs)
@@ -950,8 +918,8 @@ def make_single_pathfinder_fn(
             x0 = x_base + jitter_value
             x, g, lbfgs_niter, lbfgs_status = lbfgs.minimize(x0)
-            if lbfgs_status == LBFGSStatus.INIT_FAILED:
-                raise LBFGSInitFailed()
+            if lbfgs_status in {LBFGSStatus.INIT_FAILED, LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT}:
+                raise LBFGSInitFailed(lbfgs_status)
             elif lbfgs_status == LBFGSStatus.LBFGS_FAILED:
                 raise LBFGSException()
@@ -1389,15 +1357,16 @@ def _get_status_warning(mpr: MultiPathfinderResult) -> list[str]:
     warnings = []
     lbfgs_status_message = {
-        LBFGSStatus.MAX_ITER_REACHED: "LBFGS maximum number of iterations reached. Consider increasing maxiter if this occurence is high relative to the number of paths.",
-        LBFGSStatus.INIT_FAILED: "LBFGS failed to initialise. Consider reparameterizing the model or reducing jitter if this occurence is high relative to the number of paths.",
-        LBFGSStatus.DIVERGED: "LBFGS diverged to infinity. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.MAX_ITER_REACHED: "MAX_ITER_REACHED: LBFGS maximum number of iterations reached. Consider increasing maxiter if this occurence is high relative to the number of paths.",
+        LBFGSStatus.INIT_FAILED: "INIT_FAILED: LBFGS failed to initialize. Consider reparameterizing the model or reducing jitter if this occurence is high relative to the number of paths.",
+        LBFGSStatus.NON_FINITE: "NON_FINITE: LBFGS objective function produced inf or nan at the last iteration. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.LOW_UPDATE_PCT: "LOW_UPDATE_PCT: Majority of LBFGS iterations were not accepted due to the either: (1) LBFGS function or gradient values containing too many inf or nan values or (2) gradient changes being significantly large, set by epsilon. Consider reparameterizing the model, adjusting initvals or jitter or other pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT: "INIT_FAILED_LOW_UPDATE_PCT: LBFGS failed to initialize due to the either: (1) LBFGS function or gradient values containing too many inf or nan values or (2) gradient changes being significantly large, set by epsilon. Consider reparameterizing the model, adjusting initvals or jitter or other pathfinder arguments if this occurence is high relative to the number of paths.",
     }
     path_status_message = {
-        PathStatus.ELBO_ARGMAX_AT_ZERO: "ELBO argmax at zero refers to the first iteration during LBFGS. A high occurrence suggests the model's default initial point + jitter is may be too close to the mean posterior and a poor exploration of the parameter space. Consider increasing jitter if this occurence is high relative to the number of paths.",
-        PathStatus.ELBO_ARGMAX_AT_ZERO: "ELBO argmax at zero refers to the first iteration during LBFGS. A high occurrence suggests the model's default initial point + jitter values are concentrated in high-density regions in the target distribution and may result in poor exploration of the parameter space. Consider increasing jitter if this occurrence is high relative to the number of paths.",
-        PathStatus.INVALID_LOGQ: "Invalid logQ values occur when a path's logQ values are not finite. The failed path is not included in samples when importance sampling is used. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        PathStatus.ELBO_ARGMAX_AT_ZERO: "ELBO_ARGMAX_AT_ZERO: ELBO argmax at zero refers to the first iteration during LBFGS. A high occurrence suggests the model's default initial point + jitter values are concentrated in high-density regions in the target distribution and may result in poor exploration of the parameter space. Consider increasing jitter if this occurrence is high relative to the number of paths.",
+        PathStatus.INVALID_LOGQ: "INVALID_LOGQ: Invalid logQ values occur when a path's logQ values are not finite. The failed path is not included in samples when importance sampling is used. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
     }
     for lbfgs_status in mpr.lbfgs_status:
@@ -1567,8 +1536,9 @@ def multipath_pathfinder(
                         task,
                         description=desc.format(path_idx=path_idx),
                         completed=path_idx,
-                        refresh=True,
                     )
+            # Ensure the progress bar visually reaches 100% and shows 'Completed'
+            progress.update(task, completed=num_paths, description="Completed")
     except (KeyboardInterrupt, StopIteration) as e:
         # if exception is raised here, MultiPathfinderResult will collect all the successful results and report the results. User is free to abort the process earlier and the results will still be collected and return az.InferenceData.
         if isinstance(e, StopIteration):
@@ -1618,7 +1588,7 @@ def fit_pathfinder(
     maxiter: int = 1000,  # L^max
     ftol: float = 1e-5,
     gtol: float = 1e-8,
-    maxls=1000,
+    maxls: int = 1000,
     num_elbo_draws: int = 10,  # K
     jitter: float = 2.0,
     epsilon: float = 1e-8,
@@ -1630,6 +1600,7 @@ def fit_pathfinder(
     inference_backend: Literal["pymc", "blackjax"] = "pymc",
     pathfinder_kwargs: dict = {},
     compile_kwargs: dict = {},
+    initvals: dict | None = None,
 ) -> az.InferenceData:
     """
     Fit the Pathfinder Variational Inference algorithm.
@@ -1665,12 +1636,12 @@ def fit_pathfinder(
     importance_sampling : str, None, optional
         Method to apply sampling based on log importance weights (logP - logQ).
         Options are:
-        "psis" : Pareto Smoothed Importance Sampling (default)
-                Recommended for more stable results.
-        "psir" : Pareto Smoothed Importance Resampling
-                Less stable than PSIS.
-        "identity" : Applies log importance weights directly without resampling.
-        None : No importance sampling weights. Returns raw samples of size (num_paths, num_draws_per_path, N) where N is number of model parameters. Other methods return samples of size (num_draws, N).
+        - "psis" : Pareto Smoothed Importance Sampling (default). Usually most stable.
+        - "psir" : Pareto Smoothed Importance Resampling. Less stable than PSIS.
+        - "identity" : Applies log importance weights directly without resampling.
+        - None : No importance sampling weights. Returns raw samples of size (num_paths, num_draws_per_path, N) where N is number of model parameters. Other methods return samples of size (num_draws, N).
     progressbar : bool, optional
         Whether to display a progress bar (default is True). Setting this to False will likely reduce the computation time.
     random_seed : RandomSeed, optional
@@ -1685,10 +1656,13 @@ def fit_pathfinder(
         Additional keyword arguments for the Pathfinder algorithm.
     compile_kwargs
         Additional keyword arguments for the PyTensor compiler. If not provided, the default linker is "cvm_nogc".
+    initvals: dict | None = None
+        Initial values for the model parameters, as str:ndarray key-value pairs. Paritial initialization is permitted.
+        If None, the model's default initial values are used.
     Returns
     -------
-    arviz.InferenceData
+    :class:`~arviz.InferenceData`
         The inference data containing the results of the Pathfinder algorithm.
     References
@@ -1698,6 +1672,14 @@ def fit_pathfinder(
     model = modelcontext(model)
+    if initvals is not None:
+        model = pm.model.fgraph.clone_model(model)  # Create a clone of the model
+        for (
+            rv_name,
+            ivals,
+        ) in initvals.items():  # Set the initial values for the variables in the clone
+            model.set_initval(model.named_vars[rv_name], ivals)
     valid_importance_sampling = {"psis", "psir", "identity", None}
     if importance_sampling is not None:
@@ -1736,8 +1718,9 @@ def fit_pathfinder(
         )
         pathfinder_samples = mp_result.samples
     elif inference_backend == "blackjax":
-        if find_spec("blackjax") is None:
-            raise RuntimeError("Need BlackJAX to use `pathfinder`")
+        import blackjax
+        import jax
         if version.parse(blackjax.__version__).major < 1:
             raise ImportError("fit_pathfinder requires blackjax 1.0 or above")
@@ -1775,4 +1758,7 @@ def fit_pathfinder(
         model=model,
         importance_sampling=importance_sampling,
     )
+    idata = add_data_to_inferencedata(idata, progressbar, model, compile_kwargs)
     return idata

pymc-extras 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

pymc-extras 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl