PyPI - pymc-extras - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

pymc_extras/deserialize.py +10 -4
pymc_extras/distributions/continuous.py +1 -1
pymc_extras/distributions/histogram_utils.py +6 -4
pymc_extras/distributions/multivariate/r2d2m2cp.py +4 -3
pymc_extras/distributions/timeseries.py +14 -12
pymc_extras/inference/dadvi/dadvi.py +149 -128
pymc_extras/inference/laplace_approx/find_map.py +16 -39
pymc_extras/inference/laplace_approx/idata.py +22 -4
pymc_extras/inference/laplace_approx/laplace.py +196 -151
pymc_extras/inference/laplace_approx/scipy_interface.py +47 -7
pymc_extras/inference/pathfinder/idata.py +517 -0
pymc_extras/inference/pathfinder/pathfinder.py +71 -12
pymc_extras/inference/smc/sampling.py +2 -2
pymc_extras/model/marginal/distributions.py +4 -2
pymc_extras/model/marginal/graph_analysis.py +2 -2
pymc_extras/model/marginal/marginal_model.py +12 -2
pymc_extras/model_builder.py +9 -4
pymc_extras/prior.py +203 -8
pymc_extras/statespace/core/compile.py +1 -1
pymc_extras/statespace/core/statespace.py +2 -1
pymc_extras/statespace/filters/distributions.py +15 -13
pymc_extras/statespace/filters/kalman_filter.py +24 -22
pymc_extras/statespace/filters/kalman_smoother.py +3 -5
pymc_extras/statespace/filters/utilities.py +2 -5
pymc_extras/statespace/models/DFM.py +12 -27
pymc_extras/statespace/models/ETS.py +190 -198
pymc_extras/statespace/models/SARIMAX.py +5 -17
pymc_extras/statespace/models/VARMAX.py +15 -67
pymc_extras/statespace/models/structural/components/autoregressive.py +4 -4
pymc_extras/statespace/models/structural/components/regression.py +4 -26
pymc_extras/statespace/models/utilities.py +7 -0
pymc_extras/utils/model_equivalence.py +2 -2
pymc_extras/utils/prior.py +10 -14
pymc_extras/utils/spline.py +4 -10
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/METADATA +4 -4
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/RECORD +38 -37
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/WHEEL +1 -1
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/licenses/LICENSE +0 -0

pymc_extras/deserialize.py CHANGED Viewed

@@ -13,10 +13,7 @@ Make use of the already registered deserializers:
     from pymc_extras.deserialize import deserialize
-    prior_class_data = {
-        "dist": "Normal",
-        "kwargs": {"mu": 0, "sigma": 1}
-    }
+    prior_class_data = {"dist": "Normal", "kwargs": {"mu": 0, "sigma": 1}}
     prior = deserialize(prior_class_data)
     # Prior("Normal", mu=0, sigma=1)
@@ -26,6 +23,7 @@ Register custom class deserialization:
     from pymc_extras.deserialize import register_deserialization
     class MyClass:
         def __init__(self, value: int):
             self.value = value
@@ -34,6 +32,7 @@ Register custom class deserialization:
             # Example of what the to_dict method might look like.
             return {"value": self.value}
     register_deserialization(
         is_type=lambda data: data.keys() == {"value"} and isinstance(data["value"], int),
         deserialize=lambda data: MyClass(value=data["value"]),
@@ -80,18 +79,23 @@ class Deserializer:
         from typing import Any
         class MyClass:
             def __init__(self, value: int):
                 self.value = value
         from pymc_extras.deserialize import Deserializer
         def is_type(data: Any) -> bool:
             return data.keys() == {"value"} and isinstance(data["value"], int)
         def deserialize(data: dict) -> MyClass:
             return MyClass(value=data["value"])
         deserialize_logic = Deserializer(is_type=is_type, deserialize=deserialize)
     """
@@ -196,6 +200,7 @@ def register_deserialization(is_type: IsType, deserialize: Deserialize) -> None:
         from pymc_extras.deserialize import register_deserialization
         class MyClass:
             def __init__(self, value: int):
                 self.value = value
@@ -204,6 +209,7 @@ def register_deserialization(is_type: IsType, deserialize: Deserialize) -> None:
                 # Example of what the to_dict method might look like.
                 return {"value": self.value}
         register_deserialization(
             is_type=lambda data: data.keys() == {"value"} and isinstance(data["value"], int),
             deserialize=lambda data: MyClass(value=data["value"]),

pymc_extras/distributions/continuous.py CHANGED Viewed

@@ -265,7 +265,7 @@ class Chi:
         from pymc_extras.distributions import Chi
         with pm.Model():
-            x = Chi('x', nu=1)
+            x = Chi("x", nu=1)
     """
     @staticmethod

pymc_extras/distributions/histogram_utils.py CHANGED Viewed

@@ -130,8 +130,7 @@ def histogram_approximation(name, dist, *, observed, **h_kwargs):
     ...     m = pm.Normal("m", dims="tests")
     ...     s = pm.LogNormal("s", dims="tests")
     ...     pot = pmx.distributions.histogram_approximation(
-    ...         "pot", pm.Normal.dist(m, s),
-    ...         observed=measurements, n_quantiles=50
+    ...         "pot", pm.Normal.dist(m, s), observed=measurements, n_quantiles=50
     ...     )
     For special cases like Zero Inflation in Continuous variables there is a flag.
@@ -143,8 +142,11 @@ def histogram_approximation(name, dist, *, observed, **h_kwargs):
     ...     m = pm.Normal("m", dims="tests")
     ...     s = pm.LogNormal("s", dims="tests")
     ...     pot = pmx.distributions.histogram_approximation(
-    ...         "pot", pm.Normal.dist(m, s),
-    ...         observed=measurements, n_quantiles=50, zero_inflation=True
+    ...         "pot",
+    ...         pm.Normal.dist(m, s),
+    ...         observed=measurements,
+    ...         n_quantiles=50,
+    ...         zero_inflation=True,
     ...     )
     """
     try:

pymc_extras/distributions/multivariate/r2d2m2cp.py CHANGED Viewed

@@ -305,6 +305,7 @@ def R2D2M2CP(
         import pymc_extras as pmx
         import pymc as pm
         import numpy as np
         X = np.random.randn(10, 3)
         b = np.random.randn(3)
         y = X @ b + np.random.randn(10) * 0.04 + 5
@@ -339,7 +340,7 @@ def R2D2M2CP(
                 # "c" - a must have in the relation
                 variables_importance=[10, 1, 34],
                 # NOTE: try both
-                centered=True
+                centered=True,
             )
             # intercept prior centering should be around prior predictive mean
             intercept = y.mean()
@@ -365,7 +366,7 @@ def R2D2M2CP(
                 r2_std=0.2,
                 # NOTE: if you know where a variable should go
                 # if you do not know, leave as 0.5
-                centered=False
+                centered=False,
             )
             # intercept prior centering should be around prior predictive mean
             intercept = y.mean()
@@ -394,7 +395,7 @@ def R2D2M2CP(
                 # if you do not know, leave as 0.5
                 positive_probs=[0.8, 0.5, 0.1],
                 # NOTE: try both
-                centered=True
+                centered=True,
             )
             intercept = y.mean()
             obs = pm.Normal("obs", intercept + X @ beta, eps, observed=y)

pymc_extras/distributions/timeseries.py CHANGED Viewed

@@ -113,8 +113,10 @@ class DiscreteMarkovChain(Distribution):
         with pm.Model() as markov_chain:
             P = pm.Dirichlet("P", a=[1, 1, 1], size=(3,))
-            init_dist = pm.Categorical.dist(p = np.full(3, 1 / 3))
-            markov_chain = pmx.DiscreteMarkovChain("markov_chain", P=P, init_dist=init_dist, shape=(100,))
+            init_dist = pm.Categorical.dist(p=np.full(3, 1 / 3))
+            markov_chain = pmx.DiscreteMarkovChain(
+                "markov_chain", P=P, init_dist=init_dist, shape=(100,)
+            )
     """
@@ -194,21 +196,20 @@ class DiscreteMarkovChain(Distribution):
         state_rng = pytensor.shared(np.random.default_rng())
         def transition(*args):
-            *states, transition_probs, old_rng = args
+            old_rng, *states, transition_probs = args
             p = transition_probs[tuple(states)]
             next_rng, next_state = pm.Categorical.dist(p=p, rng=old_rng).owner.outputs
-            return next_state, {old_rng: next_rng}
+            return next_rng, next_state
-        markov_chain, state_updates = pytensor.scan(
+        state_next_rng, markov_chain = pytensor.scan(
             transition,
-            non_sequences=[P_, state_rng],
-            outputs_info=_make_outputs_info(n_lags, init_dist_),
+            outputs_info=[state_rng, *_make_outputs_info(n_lags, init_dist_)],
+            non_sequences=[P_],
             n_steps=steps_,
             strict=True,
+            return_updates=False,
         )
-        (state_next_rng,) = tuple(state_updates.values())
         discrete_mc_ = pt.moveaxis(pt.concatenate([init_dist_, markov_chain], axis=0), 0, -1)
         discrete_mc_op = DiscreteMarkovChainRV(
@@ -237,16 +238,17 @@ def discrete_mc_moment(op, rv, P, steps, init_dist, state_rng):
     n_lags = op.n_lags
     def greedy_transition(*args):
-        *states, transition_probs, old_rng = args
+        *states, transition_probs = args
         p = transition_probs[tuple(states)]
         return pt.argmax(p)
-    chain_moment, moment_updates = pytensor.scan(
+    chain_moment = pytensor.scan(
         greedy_transition,
-        non_sequences=[P, state_rng],
+        non_sequences=[P],
         outputs_info=_make_outputs_info(n_lags, init_dist),
         n_steps=steps,
         strict=True,
+        return_updates=False,
     )
     chain_moment = pt.concatenate([init_dist_moment, chain_moment])
     return chain_moment

pymc_extras/inference/dadvi/dadvi.py CHANGED Viewed

@@ -3,43 +3,45 @@ import numpy as np
 import pymc
 import pytensor
 import pytensor.tensor as pt
-import xarray
-from better_optimize import minimize
+from arviz import InferenceData
+from better_optimize import basinhopping, minimize
 from better_optimize.constants import minimize_method
 from pymc import DictToArrayBijection, Model, join_nonshared_inputs
-from pymc.backends.arviz import (
-    PointFunc,
-    apply_function_over_dataset,
-    coords_and_dims_for_inferencedata,
-)
-from pymc.util import RandomSeed, get_default_varnames
+from pymc.blocking import RaveledVars
+from pymc.util import RandomSeed
 from pytensor.tensor.variable import TensorVariable
-from pymc_extras.inference.laplace_approx.laplace import unstack_laplace_draws
+from pymc_extras.inference.laplace_approx.idata import (
+    add_data_to_inference_data,
+    add_optimizer_result_to_inference_data,
+)
+from pymc_extras.inference.laplace_approx.laplace import draws_from_laplace_approx
 from pymc_extras.inference.laplace_approx.scipy_interface import (
-    _compile_functions_for_scipy_optimize,
+    scipy_optimize_funcs_from_loss,
+    set_optimizer_function_defaults,
 )
 def fit_dadvi(
     model: Model | None = None,
     n_fixed_draws: int = 30,
-    random_seed: RandomSeed = None,
     n_draws: int = 1000,
-    keep_untransformed: bool = False,
+    include_transformed: bool = False,
     optimizer_method: minimize_method = "trust-ncg",
-    use_grad: bool = True,
-    use_hessp: bool = True,
-    use_hess: bool = False,
-    **minimize_kwargs,
+    use_grad: bool | None = None,
+    use_hessp: bool | None = None,
+    use_hess: bool | None = None,
+    gradient_backend: str = "pytensor",
+    compile_kwargs: dict | None = None,
+    random_seed: RandomSeed = None,
+    progressbar: bool = True,
+    **optimizer_kwargs,
 ) -> az.InferenceData:
     """
-    Does inference using deterministic ADVI (automatic differentiation
-    variational inference), DADVI for short.
+    Does inference using Deterministic ADVI (Automatic Differentiation Variational Inference), DADVI for short.
-    For full details see the paper cited in the references:
-    https://www.jmlr.org/papers/v25/23-1015.html
+    For full details see the paper cited in the references: https://www.jmlr.org/papers/v25/23-1015.html
     Parameters
     ----------
@@ -47,46 +49,48 @@ def fit_dadvi(
         The PyMC model to be fit. If None, the current model context is used.
     n_fixed_draws : int
-        The number of fixed draws to use for the optimisation. More
-        draws will result in more accurate estimates, but also
-        increase inference time. Usually, the default of 30 is a good
-        tradeoff.between speed and accuracy.
+        The number of fixed draws to use for the optimisation. More draws will result in more accurate estimates, but
+        also increase inference time. Usually, the default of 30 is a good tradeoff between speed and accuracy.
     random_seed: int
-        The random seed to use for the fixed draws. Running the optimisation
-        twice with the same seed should arrive at the same result.
+        The random seed to use for the fixed draws. Running the optimisation twice with the same seed should arrive at
+        the same result.
     n_draws: int
         The number of draws to return from the variational approximation.
-    keep_untransformed: bool
-        Whether or not to keep the unconstrained variables (such as
-        logs of positive-constrained parameters) in the output.
+    include_transformed: bool
+        Whether or not to keep the unconstrained variables (such as logs of positive-constrained parameters) in the
+        output.
     optimizer_method: str
-        Which optimization method to use. The function calls
-        ``scipy.optimize.minimize``, so any of the methods there can
-        be used. The default is trust-ncg, which uses second-order
-        information and is generally very reliable. Other methods such
-        as L-BFGS-B might be faster but potentially more brittle and
-        may not converge exactly to the optimum.
-    minimize_kwargs:
-        Additional keyword arguments to pass to the
-        ``scipy.optimize.minimize`` function. See the documentation of
-        that function for details.
+        Which optimization method to use. The function calls ``scipy.optimize.minimize``, so any of the methods there
+        can be used. The default is trust-ncg, which uses second-order information and is generally very reliable.
+        Other methods such as L-BFGS-B might be faster but potentially more brittle and may not converge exactly to
+        the optimum.
+    gradient_backend: str
+        Which backend to use to compute gradients. Must be one of "jax" or "pytensor". Default is "pytensor".
-    use_grad:
-        If True, pass the gradient function to
-        `scipy.optimize.minimize` (where it is referred to as `jac`).
+    compile_kwargs: dict, optional
+        Additional keyword arguments to pass to `pytensor.function`
-    use_hessp:
+    use_grad: bool, optional
+        If True, pass the gradient function to `scipy.optimize.minimize` (where it is referred to as `jac`).
+    use_hessp: bool, optional
         If True, pass the hessian vector product to `scipy.optimize.minimize`.
-    use_hess:
-        If True, pass the hessian to `scipy.optimize.minimize`. Note that
-        this is generally not recommended since its computation can be slow
-        and memory-intensive if there are many parameters.
+    use_hess: bool, optional
+        If True, pass the hessian to `scipy.optimize.minimize`. Note that this is generally not recommended since its
+        computation can be slow and memory-intensive if there are many parameters.
+    progressbar: bool
+        Whether or not to show a progress bar during optimization. Default is True.
+    optimizer_kwargs:
+        Additional keyword arguments to pass to the ``scipy.optimize.minimize`` function. See the documentation of
+        that function for details.
     Returns
     -------
@@ -95,16 +99,25 @@ def fit_dadvi(
     References
     ----------
-    Giordano, R., Ingram, M., & Broderick, T. (2024). Black Box
-    Variational Inference with a Deterministic Objective: Faster, More
-    Accurate, and Even More Black Box. Journal of Machine Learning
-    Research, 25(18), 1–39.
+    Giordano, R., Ingram, M., & Broderick, T. (2024). Black Box Variational Inference with a Deterministic Objective:
+    Faster, More Accurate, and Even More Black Box. Journal of Machine Learning Research, 25(18), 1–39.
     """
     model = pymc.modelcontext(model) if model is None else model
+    do_basinhopping = optimizer_method == "basinhopping"
+    minimizer_kwargs = optimizer_kwargs.pop("minimizer_kwargs", {})
+    if do_basinhopping:
+        # For a nice API, we let the user set method="basinhopping", but if we're doing basinhopping we still need
+        # another method for the inner optimizer. This will be set in the minimizer_kwargs, but also needs a default
+        # if one isn't provided.
+        optimizer_method = minimizer_kwargs.pop("method", "L-BFGS-B")
+        minimizer_kwargs["method"] = optimizer_method
     initial_point_dict = model.initial_point()
-    n_params = DictToArrayBijection.map(initial_point_dict).data.shape[0]
+    initial_point = DictToArrayBijection.map(initial_point_dict)
+    n_params = initial_point.data.shape[0]
     var_params, objective = create_dadvi_graph(
         model,
@@ -113,44 +126,100 @@ def fit_dadvi(
         n_params=n_params,
     )
-    f_fused, f_hessp = _compile_functions_for_scipy_optimize(
-        objective,
-        [var_params],
-        compute_grad=use_grad,
-        compute_hessp=use_hessp,
-        compute_hess=use_hess,
+    use_grad, use_hess, use_hessp = set_optimizer_function_defaults(
+        optimizer_method, use_grad, use_hess, use_hessp
     )
-    derivative_kwargs = {}
-    if use_grad:
-        derivative_kwargs["jac"] = True
-    if use_hessp:
-        derivative_kwargs["hessp"] = f_hessp
-    if use_hess:
-        derivative_kwargs["hess"] = True
+    f_fused, f_hessp = scipy_optimize_funcs_from_loss(
+        loss=objective,
+        inputs=[var_params],
+        initial_point_dict=None,
+        use_grad=use_grad,
+        use_hessp=use_hessp,
+        use_hess=use_hess,
+        gradient_backend=gradient_backend,
+        compile_kwargs=compile_kwargs,
+        inputs_are_flat=True,
+    )
-    result = minimize(
-        f_fused,
-        np.zeros(2 * n_params),
-        method=optimizer_method,
-        **derivative_kwargs,
-        **minimize_kwargs,
+    dadvi_initial_point = {
+        f"{var_name}_mu": np.zeros_like(value).ravel()
+        for var_name, value in initial_point_dict.items()
+    }
+    dadvi_initial_point.update(
+        {
+            f"{var_name}_sigma__log": np.zeros_like(value).ravel()
+            for var_name, value in initial_point_dict.items()
+        }
     )
+    dadvi_initial_point = DictToArrayBijection.map(dadvi_initial_point)
+    args = optimizer_kwargs.pop("args", ())
+    if do_basinhopping:
+        if "args" not in minimizer_kwargs:
+            minimizer_kwargs["args"] = args
+        if "hessp" not in minimizer_kwargs:
+            minimizer_kwargs["hessp"] = f_hessp
+        if "method" not in minimizer_kwargs:
+            minimizer_kwargs["method"] = optimizer_method
+        result = basinhopping(
+            func=f_fused,
+            x0=dadvi_initial_point.data,
+            progressbar=progressbar,
+            minimizer_kwargs=minimizer_kwargs,
+            **optimizer_kwargs,
+        )
+    else:
+        result = minimize(
+            f=f_fused,
+            x0=dadvi_initial_point.data,
+            args=args,
+            method=optimizer_method,
+            hessp=f_hessp,
+            progressbar=progressbar,
+            **optimizer_kwargs,
+        )
+    raveled_optimized = RaveledVars(result.x, dadvi_initial_point.point_map_info)
     opt_var_params = result.x
     opt_means, opt_log_sds = np.split(opt_var_params, 2)
-    # Make the draws:
-    generator = np.random.default_rng(seed=random_seed)
-    draws_raw = generator.standard_normal(size=(n_draws, n_params))
+    posterior, unconstrained_posterior = draws_from_laplace_approx(
+        mean=opt_means,
+        standard_deviation=np.exp(opt_log_sds),
+        draws=n_draws,
+        model=model,
+        vectorize_draws=False,
+        return_unconstrained=include_transformed,
+        random_seed=random_seed,
+    )
+    idata = InferenceData(posterior=posterior)
+    if include_transformed:
+        idata.add_groups(unconstrained_posterior=unconstrained_posterior)
+    var_name_to_model_var = {f"{var_name}_mu": var_name for var_name in initial_point_dict.keys()}
+    var_name_to_model_var.update(
+        {f"{var_name}_sigma__log": var_name for var_name in initial_point_dict.keys()}
+    )
-    draws = opt_means + draws_raw * np.exp(opt_log_sds)
-    draws_arviz = unstack_laplace_draws(draws, model, chains=1, draws=n_draws)
+    idata = add_optimizer_result_to_inference_data(
+        idata=idata,
+        result=result,
+        method=optimizer_method,
+        mu=raveled_optimized,
+        model=model,
+        var_name_to_model_var=var_name_to_model_var,
+    )
-    transformed_draws = transform_draws(draws_arviz, model, keep_untransformed=keep_untransformed)
+    idata = add_data_to_inference_data(
+        idata=idata, progressbar=False, model=model, compile_kwargs=compile_kwargs
+    )
-    return transformed_draws
+    return idata
 def create_dadvi_graph(
@@ -211,51 +280,3 @@ def create_dadvi_graph(
     objective = -mean_log_density - entropy
     return var_params, objective
-def transform_draws(
-    unstacked_draws: xarray.Dataset,
-    model: Model,
-    keep_untransformed: bool = False,
-):
-    """
-    Transforms the unconstrained draws back into the constrained space.
-    Parameters
-    ----------
-    unstacked_draws : xarray.Dataset
-        The draws to constrain back into the original space.
-    model : Model
-        The PyMC model the variables were derived from.
-    n_draws: int
-        The number of draws to return from the variational approximation.
-    keep_untransformed: bool
-        Whether or not to keep the unconstrained variables in the output.
-    Returns
-    -------
-    :class:`~arviz.InferenceData`
-        Draws from the original constrained parameters.
-    """
-    filtered_var_names = model.unobserved_value_vars
-    vars_to_sample = list(
-        get_default_varnames(filtered_var_names, include_transformed=keep_untransformed)
-    )
-    fn = pytensor.function(model.value_vars, vars_to_sample)
-    point_func = PointFunc(fn)
-    coords, dims = coords_and_dims_for_inferencedata(model)
-    transformed_result = apply_function_over_dataset(
-        point_func,
-        unstacked_draws,
-        output_var_names=[x.name for x in vars_to_sample],
-        coords=coords,
-        dims=dims,
-    )
-    return transformed_result

pymc_extras/inference/laplace_approx/find_map.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 import pymc as pm
 from better_optimize import basinhopping, minimize
-from better_optimize.constants import MINIMIZE_MODE_KWARGS, minimize_method
+from better_optimize.constants import minimize_method
 from pymc.blocking import DictToArrayBijection, RaveledVars
 from pymc.initial_point import make_initial_point_fn
 from pymc.model.transform.optimization import freeze_dims_and_data
@@ -24,40 +24,12 @@ from pymc_extras.inference.laplace_approx.idata import (
 from pymc_extras.inference.laplace_approx.scipy_interface import (
     GradientBackend,
     scipy_optimize_funcs_from_loss,
+    set_optimizer_function_defaults,
 )
 _log = logging.getLogger(__name__)
-def set_optimizer_function_defaults(method, use_grad, use_hess, use_hessp):
-    method_info = MINIMIZE_MODE_KWARGS[method].copy()
-    if use_hess and use_hessp:
-        _log.warning(
-            'Both "use_hess" and "use_hessp" are set to True, but scipy.optimize.minimize never uses both at the '
-            'same time. When possible "use_hessp" is preferred because its is computationally more efficient. '
-            'Setting "use_hess" to False.'
-        )
-        use_hess = False
-    use_grad = use_grad if use_grad is not None else method_info["uses_grad"]
-    if use_hessp is not None and use_hess is None:
-        use_hess = not use_hessp
-    elif use_hess is not None and use_hessp is None:
-        use_hessp = not use_hess
-    elif use_hessp is None and use_hess is None:
-        use_hessp = method_info["uses_hessp"]
-        use_hess = method_info["uses_hess"]
-        if use_hessp and use_hess:
-            # If a method could use either hess or hessp, we default to using hessp
-            use_hess = False
-    return use_grad, use_hess, use_hessp
 def get_nearest_psd(A: np.ndarray) -> np.ndarray:
     """
     Compute the nearest positive semi-definite matrix to a given matrix.
@@ -196,6 +168,7 @@ def find_MAP(
     jitter_rvs: list[TensorVariable] | None = None,
     progressbar: bool = True,
     include_transformed: bool = True,
+    freeze_model: bool = True,
     gradient_backend: GradientBackend = "pytensor",
     compile_kwargs: dict | None = None,
     compute_hessian: bool = False,
@@ -238,6 +211,10 @@ def find_MAP(
         Whether to display a progress bar during optimization. Defaults to True.
     include_transformed: bool, optional
         Whether to include transformed variable values in the returned dictionary. Defaults to True.
+    freeze_model: bool, optional
+        If True, freeze_dims_and_data will be called on the model before compiling the loss functions. This is
+        sometimes necessary for JAX, and can sometimes improve performance by allowing constant folding. Defaults to
+        True.
     gradient_backend: str, default "pytensor"
         Which backend to use to compute gradients. Must be one of "pytensor" or "jax".
     compute_hessian: bool
@@ -257,11 +234,13 @@ def find_MAP(
         Results of Maximum A Posteriori (MAP) estimation, including the optimized point, inverse Hessian, transformed
         latent variables, and optimizer results.
     """
-    model = pm.modelcontext(model) if model is None else model
-    frozen_model = freeze_dims_and_data(model)
     compile_kwargs = {} if compile_kwargs is None else compile_kwargs
+    model = pm.modelcontext(model) if model is None else model
+    if freeze_model:
+        model = freeze_dims_and_data(model)
-    initial_params = _make_initial_point(frozen_model, initvals, random_seed, jitter_rvs)
+    initial_params = _make_initial_point(model, initvals, random_seed, jitter_rvs)
     do_basinhopping = method == "basinhopping"
     minimizer_kwargs = optimizer_kwargs.pop("minimizer_kwargs", {})
@@ -279,8 +258,8 @@ def find_MAP(
     )
     f_fused, f_hessp = scipy_optimize_funcs_from_loss(
-        loss=-frozen_model.logp(),
-        inputs=frozen_model.continuous_value_vars + frozen_model.discrete_value_vars,
+        loss=-model.logp(),
+        inputs=model.continuous_value_vars + model.discrete_value_vars,
         initial_point_dict=DictToArrayBijection.rmap(initial_params),
         use_grad=use_grad,
         use_hess=use_hess,
@@ -344,12 +323,10 @@ def find_MAP(
     }
     idata = map_results_to_inference_data(
-        map_point=optimized_point, model=frozen_model, include_transformed=include_transformed
+        map_point=optimized_point, model=model, include_transformed=include_transformed
     )
-    idata = add_fit_to_inference_data(
-        idata=idata, mu=raveled_optimized, H_inv=H_inv, model=frozen_model
-    )
+    idata = add_fit_to_inference_data(idata=idata, mu=raveled_optimized, H_inv=H_inv, model=model)
     idata = add_optimizer_result_to_inference_data(
         idata=idata, result=optimizer_result, method=method, mu=raveled_optimized, model=model

pymc-extras 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl