PyPI - pymc-extras - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pymc-extras 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pymc_extras/__init__.py +1 -3
pymc_extras/distributions/__init__.py +2 -0
pymc_extras/distributions/transforms/__init__.py +3 -0
pymc_extras/distributions/transforms/partial_order.py +227 -0
pymc_extras/inference/__init__.py +4 -2
pymc_extras/inference/fit.py +6 -4
pymc_extras/inference/laplace.py +4 -1
pymc_extras/inference/pathfinder/importance_sampling.py +23 -17
pymc_extras/inference/pathfinder/lbfgs.py +49 -13
pymc_extras/inference/pathfinder/pathfinder.py +136 -118
pymc_extras/statespace/core/statespace.py +5 -4
pymc_extras/statespace/filters/distributions.py +9 -45
pymc_extras/statespace/utils/data_tools.py +24 -9
pymc_extras/version.txt +1 -1
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/METADATA +5 -3
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/RECORD +23 -20
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/WHEEL +1 -1
tests/distributions/test_transform.py +77 -0
tests/statespace/test_coord_assignment.py +65 -0
tests/test_laplace.py +16 -0
tests/test_pathfinder.py +141 -17
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info/licenses}/LICENSE +0 -0
{pymc_extras-0.2.3.dist-info → pymc_extras-0.2.5.dist-info}/top_level.txt +0 -0

pymc_extras/inference/pathfinder/pathfinder.py CHANGED Viewed

@@ -12,22 +12,19 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 import collections
 import logging
 import time
-import warnings as _warnings
 from collections import Counter
 from collections.abc import Callable, Iterator
 from dataclasses import asdict, dataclass, field, replace
 from enum import Enum, auto
-from importlib.util import find_spec
 from typing import Literal, TypeAlias
 import arviz as az
-import blackjax
 import filelock
-import jax
 import numpy as np
 import pymc as pm
 import pytensor
@@ -42,11 +39,10 @@ from pymc.initial_point import make_initial_point_fn
 from pymc.model import modelcontext
 from pymc.model.core import Point
 from pymc.pytensorf import (
-    compile_pymc,
+    compile,
     find_rng_nodes,
     reseed_rngs,
 )
-from pymc.sampling.jax import get_jaxified_graph
 from pymc.util import (
     CustomProgress,
     RandomSeed,
@@ -60,12 +56,14 @@ from pytensor.graph import Apply, Op, vectorize_graph
 from pytensor.tensor import TensorConstant, TensorVariable
 from rich.console import Console, Group
 from rich.padding import Padding
+from rich.progress import BarColumn, TextColumn, TimeElapsedColumn, TimeRemainingColumn
 from rich.table import Table
 from rich.text import Text
 # TODO: change to typing.Self after Python versions greater than 3.10
 from typing_extensions import Self
+from pymc_extras.inference.laplace import add_data_to_inferencedata
 from pymc_extras.inference.pathfinder.importance_sampling import (
     importance_sampling as _importance_sampling,
 )
@@ -77,9 +75,6 @@ from pymc_extras.inference.pathfinder.lbfgs import (
 )
 logger = logging.getLogger(__name__)
-_warnings.filterwarnings(
-    "ignore", category=FutureWarning, message="compile_pymc was renamed to compile"
-)
 REGULARISATION_TERM = 1e-8
 DEFAULT_LINKER = "cvm_nogc"
@@ -104,6 +99,8 @@ def get_jaxified_logp_of_ravel_inputs(model: Model, jacobian: bool = True) -> Ca
         A JAX function that computes the log-probability of a PyMC model with ravelled inputs.
     """
+    from pymc.sampling.jax import get_jaxified_graph
     # TODO: JAX: test if we should get jaxified graph of dlogp as well
     new_logprob, new_input = pm.pytensorf.join_nonshared_inputs(
         model.initial_point(), (model.logp(jacobian=jacobian),), model.value_vars, ()
@@ -143,7 +140,7 @@ def get_logp_dlogp_of_ravel_inputs(
         [model.logp(jacobian=jacobian), model.dlogp(jacobian=jacobian)],
         model.value_vars,
     )
-    logp_dlogp_fn = compile_pymc([inputs], (logP, dlogP), **compile_kwargs)
+    logp_dlogp_fn = compile([inputs], (logP, dlogP), **compile_kwargs)
     logp_dlogp_fn.trust_input = True
     return logp_dlogp_fn
@@ -155,7 +152,7 @@ def convert_flat_trace_to_idata(
     postprocessing_backend: Literal["cpu", "gpu"] = "cpu",
     inference_backend: Literal["pymc", "blackjax"] = "pymc",
     model: Model | None = None,
-    importance_sampling: Literal["psis", "psir", "identity", "none"] = "psis",
+    importance_sampling: Literal["psis", "psir", "identity"] | None = "psis",
 ) -> az.InferenceData:
     """convert flattened samples to arviz InferenceData format.
@@ -180,7 +177,7 @@ def convert_flat_trace_to_idata(
         arviz inference data object
     """
-    if importance_sampling == "none":
+    if importance_sampling is None:
         # samples.ndim == 3 in this case, otherwise ndim == 2
         num_paths, num_pdraws, N = samples.shape
         samples = samples.reshape(-1, N)
@@ -219,10 +216,14 @@ def convert_flat_trace_to_idata(
         fn.trust_input = True
         result = fn(*list(trace.values()))
-        if importance_sampling == "none":
+        if importance_sampling is None:
             result = [res.reshape(num_paths, num_pdraws, *res.shape[2:]) for res in result]
     elif inference_backend == "blackjax":
+        import jax
+        from pymc.sampling.jax import get_jaxified_graph
         jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=vars_to_sample)
         result = jax.vmap(jax.vmap(jax_fn))(
             *jax.device_put(list(trace.values()), jax.devices(postprocessing_backend)[0])
@@ -236,8 +237,8 @@ def convert_flat_trace_to_idata(
 def alpha_recover(
-    x: TensorVariable, g: TensorVariable, epsilon: TensorVariable
-) -> tuple[TensorVariable, TensorVariable, TensorVariable, TensorVariable]:
+    x: TensorVariable, g: TensorVariable
+) -> tuple[TensorVariable, TensorVariable, TensorVariable]:
     """compute the diagonal elements of the inverse Hessian at each iterations of L-BFGS and filter updates.
     Parameters
@@ -246,9 +247,6 @@ def alpha_recover(
         position array, shape (L+1, N)
     g : TensorVariable
         gradient array, shape (L+1, N)
-    epsilon : float
-        threshold for filtering updates based on inner product of position
-        and gradient differences
     Returns
     -------
@@ -258,15 +256,13 @@ def alpha_recover(
         position differences, shape (L, N)
     z : TensorVariable
         gradient differences, shape (L, N)
-    update_mask : TensorVariable
-        mask for filtering updates, shape (L,)
     Notes
     -----
     shapes: L=batch_size, N=num_params
     """
-    def compute_alpha_l(alpha_lm1, s_l, z_l) -> TensorVariable:
+    def compute_alpha_l(s_l, z_l, alpha_lm1) -> TensorVariable:
         # alpha_lm1: (N,)
         # s_l: (N,)
         # z_l: (N,)
@@ -280,43 +276,28 @@ def alpha_recover(
         )  # fmt:off
         return 1.0 / inv_alpha_l
-    def return_alpha_lm1(alpha_lm1, s_l, z_l) -> TensorVariable:
-        return alpha_lm1[-1]
-    def scan_body(update_mask_l, s_l, z_l, alpha_lm1) -> TensorVariable:
-        return pt.switch(
-            update_mask_l,
-            compute_alpha_l(alpha_lm1, s_l, z_l),
-            return_alpha_lm1(alpha_lm1, s_l, z_l),
-        )
     Lp1, N = x.shape
     s = pt.diff(x, axis=0)
     z = pt.diff(g, axis=0)
     alpha_l_init = pt.ones(N)
-    sz = (s * z).sum(axis=-1)
-    # update_mask = sz > epsilon * pt.linalg.norm(z, axis=-1)
-    # pt.linalg.norm does not work with JAX!!
-    update_mask = sz > epsilon * pt.sqrt(pt.sum(z**2, axis=-1))
     alpha, _ = pytensor.scan(
-        fn=scan_body,
+        fn=compute_alpha_l,
         outputs_info=alpha_l_init,
-        sequences=[update_mask, s, z],
+        sequences=[s, z],
         n_steps=Lp1 - 1,
         allow_gc=False,
     )
     # assert np.all(alpha.eval() > 0), "alpha cannot be negative"
-    # alpha: (L, N), update_mask: (L, N)
-    return alpha, s, z, update_mask
+    # alpha: (L, N)
+    return alpha, s, z
 def inverse_hessian_factors(
     alpha: TensorVariable,
     s: TensorVariable,
     z: TensorVariable,
-    update_mask: TensorVariable,
     J: TensorConstant,
 ) -> tuple[TensorVariable, TensorVariable]:
     """compute the inverse hessian factors for the BFGS approximation.
@@ -329,8 +310,6 @@ def inverse_hessian_factors(
         position differences, shape (L, N)
     z : TensorVariable
         gradient differences, shape (L, N)
-    update_mask : TensorVariable
-        mask for filtering updates, shape (L,)
     J : TensorConstant
         history size for L-BFGS
@@ -349,30 +328,19 @@ def inverse_hessian_factors(
     # NOTE: get_chi_matrix_1 is a modified version of get_chi_matrix_2 to closely follow Zhang et al., (2022)
     # NOTE: get_chi_matrix_2 is from blackjax which MAYBE incorrectly implemented
-    def get_chi_matrix_1(
-        diff: TensorVariable, update_mask: TensorVariable, J: TensorConstant
-    ) -> TensorVariable:
+    def get_chi_matrix_1(diff: TensorVariable, J: TensorConstant) -> TensorVariable:
         L, N = diff.shape
         j_last = pt.as_tensor(J - 1)  # since indexing starts at 0
-        def chi_update(chi_lm1, diff_l) -> TensorVariable:
+        def chi_update(diff_l, chi_lm1) -> TensorVariable:
             chi_l = pt.roll(chi_lm1, -1, axis=0)
             return pt.set_subtensor(chi_l[j_last], diff_l)
-        def no_op(chi_lm1, diff_l) -> TensorVariable:
-            return chi_lm1
-        def scan_body(update_mask_l, diff_l, chi_lm1) -> TensorVariable:
-            return pt.switch(update_mask_l, chi_update(chi_lm1, diff_l), no_op(chi_lm1, diff_l))
         chi_init = pt.zeros((J, N))
         chi_mat, _ = pytensor.scan(
-            fn=scan_body,
+            fn=chi_update,
             outputs_info=chi_init,
-            sequences=[
-                update_mask,
-                diff,
-            ],
+            sequences=[diff],
             allow_gc=False,
         )
@@ -381,19 +349,15 @@ def inverse_hessian_factors(
         # (L, N, J)
         return chi_mat
-    def get_chi_matrix_2(
-        diff: TensorVariable, update_mask: TensorVariable, J: TensorConstant
-    ) -> TensorVariable:
+    def get_chi_matrix_2(diff: TensorVariable, J: TensorConstant) -> TensorVariable:
         L, N = diff.shape
-        diff_masked = update_mask[:, None] * diff
         # diff_padded: (L+J, N)
         pad_width = pt.zeros(shape=(2, 2), dtype="int32")
-        pad_width = pt.set_subtensor(pad_width[0, 0], J)
-        diff_padded = pt.pad(diff_masked, pad_width, mode="constant")
+        pad_width = pt.set_subtensor(pad_width[0, 0], J - 1)
+        diff_padded = pt.pad(diff, pad_width, mode="constant")
-        index = pt.arange(L)[:, None] + pt.arange(J)[None, :]
+        index = pt.arange(L)[..., None] + pt.arange(J)[None, ...]
         index = index.reshape((L, J))
         chi_mat = pt.matrix_transpose(diff_padded[index])
@@ -402,8 +366,10 @@ def inverse_hessian_factors(
         return chi_mat
     L, N = alpha.shape
-    S = get_chi_matrix_1(s, update_mask, J)
-    Z = get_chi_matrix_1(z, update_mask, J)
+    # changed to get_chi_matrix_2 after removing update_mask
+    S = get_chi_matrix_2(s, J)
+    Z = get_chi_matrix_2(z, J)
     # E: (L, J, J)
     Ij = pt.eye(J)[None, ...]
@@ -488,6 +454,7 @@ def bfgs_sample_dense(
     N = x.shape[-1]
     IdN = pt.eye(N)[None, ...]
+    IdN += IdN * REGULARISATION_TERM
     # inverse Hessian
     H_inv = (
@@ -503,7 +470,10 @@ def bfgs_sample_dense(
     logdet = 2.0 * pt.sum(pt.log(pt.abs(pt.diagonal(Lchol, axis1=-2, axis2=-1))), axis=-1)
-    mu = x - pt.batched_dot(H_inv, g)
+    # mu = x - pt.einsum("ijk,ik->ij", H_inv, g) # causes error: Multiple destroyers of g
+    batched_dot = pt.vectorize(pt.dot, signature="(ijk),(ilk)->(ij)")
+    mu = x - batched_dot(H_inv, pt.matrix_transpose(g[..., None]))
     phi = pt.matrix_transpose(
         # (L, N, 1)
@@ -564,23 +534,28 @@ def bfgs_sample_sparse(
     # qr_input: (L, N, 2J)
     qr_input = inv_sqrt_alpha_diag @ beta
     (Q, R), _ = pytensor.scan(fn=pt.nlinalg.qr, sequences=[qr_input], allow_gc=False)
     IdN = pt.eye(R.shape[1])[None, ...]
+    IdN += IdN * REGULARISATION_TERM
     Lchol_input = IdN + R @ gamma @ pt.matrix_transpose(R)
+    # TODO: make robust Lchol calcs more robust, ie. try exceptions, increase REGULARISATION_TERM if non-finite exists
     Lchol = pt.linalg.cholesky(Lchol_input, lower=False, check_finite=False, on_error="nan")
     logdet = 2.0 * pt.sum(pt.log(pt.abs(pt.diagonal(Lchol, axis1=-2, axis2=-1))), axis=-1)
     logdet += pt.sum(pt.log(alpha), axis=-1)
+    # inverse Hessian
+    # (L, N, N) + (L, N, 2J), (L, 2J, 2J), (L, 2J, N) -> (L, N, N)
+    H_inv = alpha_diag + (beta @ gamma @ pt.matrix_transpose(beta))
     # NOTE: changed the sign from "x + " to "x -" of the expression to match Stan which differs from Zhang et al., (2022). same for dense version.
-    mu = x - (
-        # (L, N), (L, N) -> (L, N)
-        pt.batched_dot(alpha_diag, g)
-        # beta @ gamma @ beta.T
-        # (L, N, 2J), (L, 2J, 2J), (L, 2J, N) -> (L, N, N)
-        # (L, N, N), (L, N) -> (L, N)
-        + pt.batched_dot((beta @ gamma @ pt.matrix_transpose(beta)), g)
-    )
+    # mu = x - pt.einsum("ijk,ik->ij", H_inv, g) # causes error: Multiple destroyers of g
+    batched_dot = pt.vectorize(pt.dot, signature="(ijk),(ilk)->(ij)")
+    mu = x - batched_dot(H_inv, pt.matrix_transpose(g[..., None]))
     phi = pt.matrix_transpose(
         # (L, N, 1)
@@ -588,8 +563,6 @@ def bfgs_sample_sparse(
         # (L, N, N), (L, N, M) -> (L, N, M)
         + sqrt_alpha_diag
         @ (
-            # (L, N, 2J), (L, 2J, M) -> (L, N, M)
-            # intermediate calcs below
             # (L, N, 2J), (L, 2J, 2J) -> (L, N, 2J)
             (Q @ (Lchol - IdN))
             # (L, 2J, N), (L, N, M) -> (L, 2J, M)
@@ -777,7 +750,6 @@ def make_pathfinder_body(
     num_draws: int,
     maxcor: int,
     num_elbo_draws: int,
-    epsilon: float,
     **compile_kwargs: dict,
 ) -> Function:
     """
@@ -793,8 +765,6 @@ def make_pathfinder_body(
         The maximum number of iterations for the L-BFGS algorithm.
     num_elbo_draws : int
         The number of draws for the Evidence Lower Bound (ELBO) estimation.
-    epsilon : float
-        The value used to filter out large changes in the direction of the update gradient at each iteration l in L. Iteration l is only accepted if delta_theta[l] * delta_grad[l] > epsilon * L2_norm(delta_grad[l]) for each l in L.
     compile_kwargs : dict
         Additional keyword arguments for the PyTensor compiler.
@@ -819,11 +789,10 @@ def make_pathfinder_body(
     num_draws = pt.constant(num_draws, "num_draws", dtype="int32")
     num_elbo_draws = pt.constant(num_elbo_draws, "num_elbo_draws", dtype="int32")
-    epsilon = pt.constant(epsilon, "epsilon", dtype="float64")
     maxcor = pt.constant(maxcor, "maxcor", dtype="int32")
-    alpha, s, z, update_mask = alpha_recover(x_full, g_full, epsilon=epsilon)
-    beta, gamma = inverse_hessian_factors(alpha, s, z, update_mask, J=maxcor)
+    alpha, s, z = alpha_recover(x_full, g_full)
+    beta, gamma = inverse_hessian_factors(alpha, s, z, J=maxcor)
     # ignore initial point - x, g: (L, N)
     x = x_full[1:]
@@ -854,7 +823,7 @@ def make_pathfinder_body(
     # return psi, logP_psi, logQ_psi, elbo_argmax
-    pathfinder_body_fn = compile_pymc(
+    pathfinder_body_fn = compile(
         [x_full, g_full],
         [psi, logP_psi, logQ_psi, elbo_argmax],
         **compile_kwargs,
@@ -933,11 +902,11 @@ def make_single_pathfinder_fn(
     x_base = DictToArrayBijection.map(ip).data
     # lbfgs
-    lbfgs = LBFGS(neg_logp_dlogp_func, maxcor, maxiter, ftol, gtol, maxls)
+    lbfgs = LBFGS(neg_logp_dlogp_func, maxcor, maxiter, ftol, gtol, maxls, epsilon)
     # pathfinder body
     pathfinder_body_fn = make_pathfinder_body(
-        logp_func, num_draws, maxcor, num_elbo_draws, epsilon, **compile_kwargs
+        logp_func, num_draws, maxcor, num_elbo_draws, **compile_kwargs
     )
     rngs = find_rng_nodes(pathfinder_body_fn.maker.fgraph.outputs)
@@ -949,8 +918,8 @@ def make_single_pathfinder_fn(
             x0 = x_base + jitter_value
             x, g, lbfgs_niter, lbfgs_status = lbfgs.minimize(x0)
-            if lbfgs_status == LBFGSStatus.INIT_FAILED:
-                raise LBFGSInitFailed()
+            if lbfgs_status in {LBFGSStatus.INIT_FAILED, LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT}:
+                raise LBFGSInitFailed(lbfgs_status)
             elif lbfgs_status == LBFGSStatus.LBFGS_FAILED:
                 raise LBFGSException()
@@ -1188,7 +1157,7 @@ class MultiPathfinderResult:
     elbo_argmax: NDArray | None = None
     lbfgs_status: Counter = field(default_factory=Counter)
     path_status: Counter = field(default_factory=Counter)
-    importance_sampling: str = "none"
+    importance_sampling: str | None = "psis"
     warnings: list[str] = field(default_factory=list)
     pareto_k: float | None = None
@@ -1257,7 +1226,7 @@ class MultiPathfinderResult:
     def with_importance_sampling(
         self,
         num_draws: int,
-        method: Literal["psis", "psir", "identity", "none"] | None,
+        method: Literal["psis", "psir", "identity"] | None,
         random_seed: int | None = None,
     ) -> Self:
         """perform importance sampling"""
@@ -1388,15 +1357,16 @@ def _get_status_warning(mpr: MultiPathfinderResult) -> list[str]:
     warnings = []
     lbfgs_status_message = {
-        LBFGSStatus.MAX_ITER_REACHED: "LBFGS maximum number of iterations reached. Consider increasing maxiter if this occurence is high relative to the number of paths.",
-        LBFGSStatus.INIT_FAILED: "LBFGS failed to initialise. Consider reparameterizing the model or reducing jitter if this occurence is high relative to the number of paths.",
-        LBFGSStatus.DIVERGED: "LBFGS diverged to infinity. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.MAX_ITER_REACHED: "MAX_ITER_REACHED: LBFGS maximum number of iterations reached. Consider increasing maxiter if this occurence is high relative to the number of paths.",
+        LBFGSStatus.INIT_FAILED: "INIT_FAILED: LBFGS failed to initialize. Consider reparameterizing the model or reducing jitter if this occurence is high relative to the number of paths.",
+        LBFGSStatus.NON_FINITE: "NON_FINITE: LBFGS objective function produced inf or nan at the last iteration. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.LOW_UPDATE_PCT: "LOW_UPDATE_PCT: Majority of LBFGS iterations were not accepted due to the either: (1) LBFGS function or gradient values containing too many inf or nan values or (2) gradient changes being significantly large, set by epsilon. Consider reparameterizing the model, adjusting initvals or jitter or other pathfinder arguments if this occurence is high relative to the number of paths.",
+        LBFGSStatus.INIT_FAILED_LOW_UPDATE_PCT: "INIT_FAILED_LOW_UPDATE_PCT: LBFGS failed to initialize due to the either: (1) LBFGS function or gradient values containing too many inf or nan values or (2) gradient changes being significantly large, set by epsilon. Consider reparameterizing the model, adjusting initvals or jitter or other pathfinder arguments if this occurence is high relative to the number of paths.",
     }
     path_status_message = {
-        PathStatus.ELBO_ARGMAX_AT_ZERO: "ELBO argmax at zero refers to the first iteration during LBFGS. A high occurrence suggests the model's default initial point + jitter is may be too close to the mean posterior and a poor exploration of the parameter space. Consider increasing jitter if this occurence is high relative to the number of paths.",
-        PathStatus.INVALID_LOGP: "Invalid logP values occur when a path's logP values are not finite. The failed path is not included in samples when importance sampling is used. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
-        PathStatus.INVALID_LOGQ: "Invalid logQ values occur when a path's logQ values are not finite. The failed path is not included in samples when importance sampling is used. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
+        PathStatus.ELBO_ARGMAX_AT_ZERO: "ELBO_ARGMAX_AT_ZERO: ELBO argmax at zero refers to the first iteration during LBFGS. A high occurrence suggests the model's default initial point + jitter values are concentrated in high-density regions in the target distribution and may result in poor exploration of the parameter space. Consider increasing jitter if this occurrence is high relative to the number of paths.",
+        PathStatus.INVALID_LOGQ: "INVALID_LOGQ: Invalid logQ values occur when a path's logQ values are not finite. The failed path is not included in samples when importance sampling is used. Consider reparameterizing the model or adjusting the pathfinder arguments if this occurence is high relative to the number of paths.",
     }
     for lbfgs_status in mpr.lbfgs_status:
@@ -1423,7 +1393,7 @@ def multipath_pathfinder(
     num_elbo_draws: int,
     jitter: float,
     epsilon: float,
-    importance_sampling: Literal["psis", "psir", "identity", "none"] | None,
+    importance_sampling: Literal["psis", "psir", "identity"] | None,
     progressbar: bool,
     concurrent: Literal["thread", "process"] | None,
     random_seed: RandomSeed,
@@ -1459,8 +1429,14 @@ def multipath_pathfinder(
         Amount of jitter to apply to initial points (default is 2.0). Note that Pathfinder may be highly sensitive to the jitter value. It is recommended to increase num_paths when increasing the jitter value.
     epsilon: float
         value used to filter out large changes in the direction of the update gradient at each iteration l in L. Iteration l is only accepted if delta_theta[l] * delta_grad[l] > epsilon * L2_norm(delta_grad[l]) for each l in L. (default is 1e-8).
-    importance_sampling : str, optional
-        importance sampling method to use which applies sampling based on the log importance weights equal to logP - logQ. Options are "psis" (default), "psir", "identity", "none". Pareto Smoothed Importance Sampling (psis) is recommended in many cases for more stable results than Pareto Smoothed Importance Resampling (psir). identity applies the log importance weights directly without resampling. none applies no importance sampling weights and returns the samples as is of size (num_paths, num_draws_per_path, N) where N is the number of model parameters, otherwise sample size is (num_draws, N).
+    importance_sampling : str, None, optional
+        Method to apply sampling based on log importance weights (logP - logQ).
+        "psis" : Pareto Smoothed Importance Sampling (default)
+                Recommended for more stable results.
+        "psir" : Pareto Smoothed Importance Resampling
+                Less stable than PSIS.
+        "identity" : Applies log importance weights directly without resampling.
+        None : No importance sampling weights. Returns raw samples of size (num_paths, num_draws_per_path, N) where N is number of model parameters. Other methods return samples of size (num_draws, N).
     progressbar : bool, optional
         Whether to display a progress bar (default is False). Setting this to True will likely increase the computation time.
     random_seed : RandomSeed, optional
@@ -1482,12 +1458,6 @@ def multipath_pathfinder(
         The result containing samples and other information from the Multi-Path Pathfinder algorithm.
     """
-    valid_importance_sampling = ["psis", "psir", "identity", "none", None]
-    if importance_sampling is None:
-        importance_sampling = "none"
-    if importance_sampling.lower() not in valid_importance_sampling:
-        raise ValueError(f"Invalid importance sampling method: {importance_sampling}")
     *path_seeds, choice_seed = _get_seeds_per_chain(random_seed, num_paths + 1)
     pathfinder_config = PathfinderConfig(
@@ -1521,12 +1491,20 @@ def multipath_pathfinder(
     results = []
     compute_start = time.time()
     try:
-        with CustomProgress(
+        desc = f"Paths Complete: {{path_idx}}/{num_paths}"
+        progress = CustomProgress(
+            "[progress.description]{task.description}",
+            BarColumn(),
+            "[progress.percentage]{task.percentage:>3.0f}%",
+            TimeRemainingColumn(),
+            TextColumn("/"),
+            TimeElapsedColumn(),
             console=Console(theme=default_progress_theme),
             disable=not progressbar,
-        ) as progress:
-            task = progress.add_task("Fitting", total=num_paths)
-            for result in generator:
+        )
+        with progress:
+            task = progress.add_task(desc.format(path_idx=0), completed=0, total=num_paths)
+            for path_idx, result in enumerate(generator, start=1):
                 try:
                     if isinstance(result, Exception):
                         raise result
@@ -1552,7 +1530,15 @@ def multipath_pathfinder(
                             lbfgs_status=LBFGSStatus.LBFGS_FAILED,
                         )
                     )
-                progress.update(task, advance=1)
+                finally:
+                    # TODO: display LBFGS and Path Status in real time
+                    progress.update(
+                        task,
+                        description=desc.format(path_idx=path_idx),
+                        completed=path_idx,
+                    )
+            # Ensure the progress bar visually reaches 100% and shows 'Completed'
+            progress.update(task, completed=num_paths, description="Completed")
     except (KeyboardInterrupt, StopIteration) as e:
         # if exception is raised here, MultiPathfinderResult will collect all the successful results and report the results. User is free to abort the process earlier and the results will still be collected and return az.InferenceData.
         if isinstance(e, StopIteration):
@@ -1602,11 +1588,11 @@ def fit_pathfinder(
     maxiter: int = 1000,  # L^max
     ftol: float = 1e-5,
     gtol: float = 1e-8,
-    maxls=1000,
+    maxls: int = 1000,
     num_elbo_draws: int = 10,  # K
     jitter: float = 2.0,
     epsilon: float = 1e-8,
-    importance_sampling: Literal["psis", "psir", "identity", "none"] = "psis",
+    importance_sampling: Literal["psis", "psir", "identity"] | None = "psis",
     progressbar: bool = True,
     concurrent: Literal["thread", "process"] | None = None,
     random_seed: RandomSeed | None = None,
@@ -1614,6 +1600,7 @@ def fit_pathfinder(
     inference_backend: Literal["pymc", "blackjax"] = "pymc",
     pathfinder_kwargs: dict = {},
     compile_kwargs: dict = {},
+    initvals: dict | None = None,
 ) -> az.InferenceData:
     """
     Fit the Pathfinder Variational Inference algorithm.
@@ -1646,8 +1633,15 @@ def fit_pathfinder(
         Amount of jitter to apply to initial points (default is 2.0). Note that Pathfinder may be highly sensitive to the jitter value. It is recommended to increase num_paths when increasing the jitter value.
     epsilon: float
         value used to filter out large changes in the direction of the update gradient at each iteration l in L. Iteration l is only accepted if delta_theta[l] * delta_grad[l] > epsilon * L2_norm(delta_grad[l]) for each l in L. (default is 1e-8).
-    importance_sampling : str, optional
-        importance sampling method to use which applies sampling based on the log importance weights equal to logP - logQ. Options are "psis" (default), "psir", "identity", "none". Pareto Smoothed Importance Sampling (psis) is recommended in many cases for more stable results than Pareto Smoothed Importance Resampling (psir). identity applies the log importance weights directly without resampling. none applies no importance sampling weights and returns the samples as is of size (num_paths, num_draws_per_path, N) where N is the number of model parameters, otherwise sample size is (num_draws, N).
+    importance_sampling : str, None, optional
+        Method to apply sampling based on log importance weights (logP - logQ).
+        Options are:
+        - "psis" : Pareto Smoothed Importance Sampling (default). Usually most stable.
+        - "psir" : Pareto Smoothed Importance Resampling. Less stable than PSIS.
+        - "identity" : Applies log importance weights directly without resampling.
+        - None : No importance sampling weights. Returns raw samples of size (num_paths, num_draws_per_path, N) where N is number of model parameters. Other methods return samples of size (num_draws, N).
     progressbar : bool, optional
         Whether to display a progress bar (default is True). Setting this to False will likely reduce the computation time.
     random_seed : RandomSeed, optional
@@ -1662,10 +1656,13 @@ def fit_pathfinder(
         Additional keyword arguments for the Pathfinder algorithm.
     compile_kwargs
         Additional keyword arguments for the PyTensor compiler. If not provided, the default linker is "cvm_nogc".
+    initvals: dict | None = None
+        Initial values for the model parameters, as str:ndarray key-value pairs. Paritial initialization is permitted.
+        If None, the model's default initial values are used.
     Returns
     -------
-    arviz.InferenceData
+    :class:`~arviz.InferenceData`
         The inference data containing the results of the Pathfinder algorithm.
     References
@@ -1674,6 +1671,23 @@ def fit_pathfinder(
     """
     model = modelcontext(model)
+    if initvals is not None:
+        model = pm.model.fgraph.clone_model(model)  # Create a clone of the model
+        for (
+            rv_name,
+            ivals,
+        ) in initvals.items():  # Set the initial values for the variables in the clone
+            model.set_initval(model.named_vars[rv_name], ivals)
+    valid_importance_sampling = {"psis", "psir", "identity", None}
+    if importance_sampling is not None:
+        importance_sampling = importance_sampling.lower()
+    if importance_sampling not in valid_importance_sampling:
+        raise ValueError(f"Invalid importance sampling method: {importance_sampling}")
     N = DictToArrayBijection.map(model.initial_point()).data.shape[0]
     if maxcor is None:
@@ -1704,8 +1718,9 @@ def fit_pathfinder(
         )
         pathfinder_samples = mp_result.samples
     elif inference_backend == "blackjax":
-        if find_spec("blackjax") is None:
-            raise RuntimeError("Need BlackJAX to use `pathfinder`")
+        import blackjax
+        import jax
         if version.parse(blackjax.__version__).major < 1:
             raise ImportError("fit_pathfinder requires blackjax 1.0 or above")
@@ -1743,4 +1758,7 @@ def fit_pathfinder(
         model=model,
         importance_sampling=importance_sampling,
     )
+    idata = add_data_to_inferencedata(idata, progressbar, model, compile_kwargs)
     return idata

pymc_extras/statespace/core/statespace.py CHANGED Viewed

@@ -28,7 +28,6 @@ from pymc_extras.statespace.filters import (
 )
 from pymc_extras.statespace.filters.distributions import (
     LinearGaussianStateSpace,
-    MvNormalSVD,
     SequenceMvNormal,
 )
 from pymc_extras.statespace.filters.utilities import stabilize
@@ -707,7 +706,7 @@ class PyMCStateSpace:
         with pymc_model:
             for param_name in self.param_names:
                 param = getattr(pymc_model, param_name, None)
-                if param:
+                if param is not None:
                     found_params.append(param.name)
         missing_params = list(set(self.param_names) - set(found_params))
@@ -746,7 +745,7 @@ class PyMCStateSpace:
         with pymc_model:
             for data_name in data_names:
                 data = getattr(pymc_model, data_name, None)
-                if data:
+                if data is not None:
                     found_data.append(data.name)
         missing_data = list(set(data_names) - set(found_data))
@@ -2233,7 +2232,9 @@ class PyMCStateSpace:
             if shock_trajectory is None:
                 shock_trajectory = pt.zeros((n_steps, self.k_posdef))
                 if Q is not None:
-                    init_shock = MvNormalSVD("initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM])
+                    init_shock = pm.MvNormal(
+                        "initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method="svd"
+                    )
                 else:
                     init_shock = pm.Deterministic(
                         "initial_shock",

pymc-extras 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

pymc-extras 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl