PyPI - pymc-extras - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

pymc_extras/deserialize.py +10 -4
pymc_extras/distributions/continuous.py +1 -1
pymc_extras/distributions/histogram_utils.py +6 -4
pymc_extras/distributions/multivariate/r2d2m2cp.py +4 -3
pymc_extras/distributions/timeseries.py +14 -12
pymc_extras/inference/dadvi/dadvi.py +149 -128
pymc_extras/inference/laplace_approx/find_map.py +16 -39
pymc_extras/inference/laplace_approx/idata.py +22 -4
pymc_extras/inference/laplace_approx/laplace.py +196 -151
pymc_extras/inference/laplace_approx/scipy_interface.py +47 -7
pymc_extras/inference/pathfinder/idata.py +517 -0
pymc_extras/inference/pathfinder/pathfinder.py +71 -12
pymc_extras/inference/smc/sampling.py +2 -2
pymc_extras/model/marginal/distributions.py +4 -2
pymc_extras/model/marginal/graph_analysis.py +2 -2
pymc_extras/model/marginal/marginal_model.py +12 -2
pymc_extras/model_builder.py +9 -4
pymc_extras/prior.py +203 -8
pymc_extras/statespace/core/compile.py +1 -1
pymc_extras/statespace/core/statespace.py +2 -1
pymc_extras/statespace/filters/distributions.py +15 -13
pymc_extras/statespace/filters/kalman_filter.py +24 -22
pymc_extras/statespace/filters/kalman_smoother.py +3 -5
pymc_extras/statespace/filters/utilities.py +2 -5
pymc_extras/statespace/models/DFM.py +12 -27
pymc_extras/statespace/models/ETS.py +190 -198
pymc_extras/statespace/models/SARIMAX.py +5 -17
pymc_extras/statespace/models/VARMAX.py +15 -67
pymc_extras/statespace/models/structural/components/autoregressive.py +4 -4
pymc_extras/statespace/models/structural/components/regression.py +4 -26
pymc_extras/statespace/models/utilities.py +7 -0
pymc_extras/utils/model_equivalence.py +2 -2
pymc_extras/utils/prior.py +10 -14
pymc_extras/utils/spline.py +4 -10
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/METADATA +4 -4
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/RECORD +38 -37
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/WHEEL +1 -1
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/licenses/LICENSE +0 -0

pymc_extras/inference/laplace_approx/idata.py CHANGED Viewed

@@ -22,10 +22,15 @@ def make_default_labels(name: str, shape: tuple[int, ...]) -> list:
     return [list(range(dim)) for dim in shape]
-def make_unpacked_variable_names(names: list[str], model: pm.Model) -> list[str]:
+def make_unpacked_variable_names(
+    names: list[str], model: pm.Model, var_name_to_model_var: dict[str, str] | None = None
+) -> list[str]:
     coords = model.coords
     initial_point = model.initial_point()
+    if var_name_to_model_var is None:
+        var_name_to_model_var = {}
     value_to_dim = {
         value.name: model.named_vars_to_dims.get(model.values_to_rvs[value].name, None)
         for value in model.value_vars
@@ -37,6 +42,7 @@ def make_unpacked_variable_names(names: list[str], model: pm.Model) -> list[str]
     unpacked_variable_names = []
     for name in names:
+        name = var_name_to_model_var.get(name, name)
         shape = initial_point[name].shape
         if shape:
             dims = dims_dict.get(name)
@@ -109,7 +115,7 @@ def map_results_to_inference_data(
         x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=True)
     ]
-    unconstrained_names = set(all_varnames) - set(constrained_names)
+    unconstrained_names = sorted(set(all_varnames) - set(constrained_names))
     idata = az.from_dict(
         posterior={
@@ -258,6 +264,7 @@ def optimizer_result_to_dataset(
     method: minimize_method | Literal["basinhopping"],
     mu: RaveledVars | None = None,
     model: pm.Model | None = None,
+    var_name_to_model_var: dict[str, str] | None = None,
 ) -> xr.Dataset:
     """
     Convert an OptimizeResult object to an xarray Dataset object.
@@ -268,6 +275,9 @@ def optimizer_result_to_dataset(
         The result of the optimization process.
     method: minimize_method or "basinhopping"
         The optimization method used.
+    var_name_to_model_var: dict, optional
+        Mapping between variables in the optimization result and the model variable names. Used when auxiliary
+        variables were introduced, e.g. in DADVI.
     Returns
     -------
@@ -279,7 +289,9 @@ def optimizer_result_to_dataset(
     model = pm.modelcontext(model) if model is None else model
     variable_names, *_ = zip(*mu.point_map_info)
-    unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
+    unpacked_variable_names = make_unpacked_variable_names(
+        variable_names, model, var_name_to_model_var
+    )
     data_vars = {}
@@ -368,6 +380,7 @@ def add_optimizer_result_to_inference_data(
     method: minimize_method | Literal["basinhopping"],
     mu: RaveledVars | None = None,
     model: pm.Model | None = None,
+    var_name_to_model_var: dict[str, str] | None = None,
 ) -> az.InferenceData:
     """
     Add the optimization result to an InferenceData object.
@@ -384,13 +397,18 @@ def add_optimizer_result_to_inference_data(
         The MAP estimate of the model parameters.
     model: Model, optional
         A PyMC model. If None, the model is taken from the current model context.
+    var_name_to_model_var: dict, optional
+        Mapping between variables in the optimization result and the model variable names. Used when auxiliary
+        variables were introduced, e.g. in DADVI.
     Returns
     -------
     idata: az.InferenceData
         The provided InferenceData, with the optimization results added to the "optimizer" group.
     """
-    dataset = optimizer_result_to_dataset(result, method=method, mu=mu, model=model)
+    dataset = optimizer_result_to_dataset(
+        result, method=method, mu=mu, model=model, var_name_to_model_var=var_name_to_model_var
+    )
     idata.add_groups({"optimizer_result": dataset})
     return idata

pymc_extras/inference/laplace_approx/laplace.py CHANGED Viewed

@@ -16,9 +16,7 @@
 import logging
 from collections.abc import Callable
-from functools import partial
 from typing import Literal
-from typing import cast as type_cast
 import arviz as az
 import numpy as np
@@ -27,16 +25,18 @@ import pytensor
 import pytensor.tensor as pt
 import xarray as xr
+from arviz import dict_to_dataset
 from better_optimize.constants import minimize_method
 from numpy.typing import ArrayLike
+from pymc import Model
+from pymc.backends.arviz import coords_and_dims_for_inferencedata
 from pymc.blocking import DictToArrayBijection
 from pymc.model.transform.optimization import freeze_dims_and_data
-from pymc.pytensorf import join_nonshared_inputs
-from pymc.util import get_default_varnames
+from pymc.util import get_untransformed_name, is_transformed_name
 from pytensor.graph import vectorize_graph
 from pytensor.tensor import TensorVariable
 from pytensor.tensor.optimize import minimize
-from pytensor.tensor.type import Variable
+from xarray import Dataset
 from pymc_extras.inference.laplace_approx.find_map import (
     _compute_inverse_hessian,
@@ -147,130 +147,175 @@ def get_conditional_gaussian_approximation(
     return pytensor.function(args, [x0, conditional_gaussian_approx])
-def _unconstrained_vector_to_constrained_rvs(model):
-    outputs = get_default_varnames(model.unobserved_value_vars, include_transformed=True)
-    constrained_names = [
-        x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=False)
-    ]
-    names = [x.name for x in outputs]
+def unpack_last_axis(packed_input, packed_shapes):
+    if len(packed_shapes) == 1:
+        # Single case currently fails in unpack
+        return [pt.split_dims(packed_input, packed_shapes[0], axis=-1)]
-    unconstrained_names = [name for name in names if name not in constrained_names]
+    keep_axes = tuple(range(packed_input.ndim))[:-1]
+    return pt.unpack(packed_input, axes=keep_axes, packed_shapes=packed_shapes)
-    new_outputs, unconstrained_vector = join_nonshared_inputs(
-        model.initial_point(),
-        inputs=model.value_vars,
-        outputs=outputs,
-    )
-    constrained_rvs = [x for x, name in zip(new_outputs, names) if name in constrained_names]
-    value_rvs = [x for x in new_outputs if x not in constrained_rvs]
-    unconstrained_vector.name = "unconstrained_vector"
-    # Redo the names list to ensure it is sorted to match the return order
-    names = [*constrained_names, *unconstrained_names]
+def draws_from_laplace_approx(
+    *,
+    mean,
+    covariance=None,
+    standard_deviation=None,
+    draws: int,
+    model: Model,
+    vectorize_draws: bool = True,
+    return_unconstrained: bool = True,
+    random_seed=None,
+    compile_kwargs: dict | None = None,
+) -> tuple[Dataset, Dataset | None]:
+    """
+    Generate draws from the Laplace approximation of the posterior.
-    return names, constrained_rvs, value_rvs, unconstrained_vector
+    Parameters
+    ----------
+    mean : np.ndarray
+        The mean of the Laplace approximation (MAP estimate).
+    covariance : np.ndarray, optional
+        The covariance matrix of the Laplace approximation.
+        Mutually exclusive with `standard_deviation`.
+    standard_deviation : np.ndarray, optional
+        The standard deviation of the Laplace approximation (diagonal approximation).
+        Mutually exclusive with `covariance`.
+    draws : int
+        The number of draws.
+    model : pm.Model
+        The PyMC model.
+    vectorize_draws : bool, default True
+        Whether to vectorize the draws.
+    return_unconstrained : bool, default True
+        Whether to return the unconstrained draws in addition to the constrained ones.
+    random_seed : int, optional
+        Random seed for reproducibility.
+    compile_kwargs: dict, optional
+        Optional compile kwargs
+    Returns
+    -------
+    tuple[Dataset, Dataset | None]
+        A tuple containing the constrained draws (trace) and optionally the unconstrained draws.
+    Raises
+    ------
+    ValueError
+        If neither `covariance` nor `standard_deviation` is provided,
+        or if both are provided.
+    """
+    # This function assumes that mean/covariance/standard_deviation are aligned with model.initial_point()
+    if covariance is None and standard_deviation is None:
+        raise ValueError("Must specify either covariance or standard_deviation")
+    if covariance is not None and standard_deviation is not None:
+        raise ValueError("Cannot specify both covariance and standard_deviation")
+    if compile_kwargs is None:
+        compile_kwargs = {}
-def model_to_laplace_approx(
-    model: pm.Model, unpacked_variable_names: list[str], chains: int = 1, draws: int = 500
-):
     initial_point = model.initial_point()
-    raveled_vars = DictToArrayBijection.map(initial_point)
-    raveled_shape = raveled_vars.data.shape[0]
-    # temp_chain and temp_draw are a hack to allow sampling from the Laplace approximation. We only have one mu and cov,
-    # so we add batch dims (which correspond to chains and draws). But the names "chain" and "draw" are reserved.
-    names, constrained_rvs, value_rvs, unconstrained_vector = (
-        _unconstrained_vector_to_constrained_rvs(model)
+    n = int(np.sum([np.prod(v.shape) for v in initial_point.values()]))
+    assert mean.shape == (n,)
+    if covariance is not None:
+        assert covariance.shape == (n, n)
+    elif standard_deviation is not None:
+        assert standard_deviation.shape == (n,)
+    vars_to_sample = [v for v in model.free_RVs + model.deterministics]
+    var_names = [v.name for v in vars_to_sample]
+    orig_constrained_vars = model.value_vars
+    orig_outputs = model.replace_rvs_by_values(vars_to_sample)
+    if return_unconstrained:
+        orig_outputs.extend(model.value_vars)
+    mu_pt = pt.vector("mu", shape=(n,), dtype=mean.dtype)
+    size = (draws,) if vectorize_draws else ()
+    if covariance is not None:
+        sigma_pt = pt.matrix("cov", shape=(n, n), dtype=covariance.dtype)
+        laplace_approximation = pm.MvNormal.dist(mu=mu_pt, cov=sigma_pt, size=size, method="svd")
+    else:
+        sigma_pt = pt.vector("sigma", shape=(n,), dtype=standard_deviation.dtype)
+        laplace_approximation = pm.Normal.dist(mu=mu_pt, sigma=sigma_pt, size=(*size, n))
+    constrained_vars = unpack_last_axis(
+        laplace_approximation,
+        [initial_point[v.name].shape for v in orig_constrained_vars],
+    )
+    outputs = vectorize_graph(
+        orig_outputs, replace=dict(zip(orig_constrained_vars, constrained_vars))
     )
-    coords = model.coords | {
-        "temp_chain": np.arange(chains),
-        "temp_draw": np.arange(draws),
-        "unpacked_variable_names": unpacked_variable_names,
-    }
-    with pm.Model(coords=coords, model=None) as laplace_model:
-        mu = pm.Flat("mean_vector", shape=(raveled_shape,))
-        cov = pm.Flat("covariance_matrix", shape=(raveled_shape, raveled_shape))
-        laplace_approximation = pm.MvNormal(
-            "laplace_approximation",
-            mu=mu,
-            cov=cov,
-            dims=["temp_chain", "temp_draw", "unpacked_variable_names"],
-            method="svd",
-        )
-        cast_to_var = partial(type_cast, Variable)
-        batched_rvs = vectorize_graph(
-            type_cast(list[Variable], constrained_rvs),
-            replace={cast_to_var(unconstrained_vector): cast_to_var(laplace_approximation)},
-        )
-        for name, batched_rv in zip(names, batched_rvs):
-            batch_dims = ("temp_chain", "temp_draw")
-            if batched_rv.ndim == 2:
-                dims = batch_dims
-            elif name in model.named_vars_to_dims:
-                dims = (*batch_dims, *model.named_vars_to_dims[name])
-            else:
-                dims = (*batch_dims, *[f"{name}_dim_{i}" for i in range(batched_rv.ndim - 2)])
-                initval = initial_point.get(name, None)
-                dim_shapes = initval.shape if initval is not None else batched_rv.type.shape[2:]
-                laplace_model.add_coords(
-                    {name: np.arange(shape) for name, shape in zip(dims[2:], dim_shapes)}
-                )
-            pm.Deterministic(name, batched_rv, dims=dims)
-    return laplace_model
-def unstack_laplace_draws(laplace_data, model, chains=2, draws=500):
-    """
-    The `model_to_laplace_approx` function returns a model with a single MvNormal distribution, draws from which are
-    in the unconstrained variable space. These might be interesting to the user, but since they come back stacked in a
-    single vector, it's not easy to work with.
-    This function unpacks each component of the vector into its own DataArray, with the appropriate dimensions and
-    coordinates, where possible.
-    """
-    initial_point = DictToArrayBijection.map(model.initial_point())
-    cursor = 0
-    unstacked_laplace_draws = {}
-    coords = model.coords | {"chain": range(chains), "draw": range(draws)}
-    # There are corner cases where the value_vars will not have the same dimensions as the random variable (e.g.
-    # simplex transform of a Dirichlet). In these cases, we don't try to guess what the labels should be, and just
-    # add an arviz-style default dim and label.
-    for rv, (name, shape, size, dtype) in zip(model.free_RVs, initial_point.point_map_info):
-        rv_dims = []
-        for i, dim in enumerate(
-            model.named_vars_to_dims.get(rv.name, [f"{name}_dim_{i}" for i in range(len(shape))])
-        ):
-            if coords.get(dim) and shape[i] == len(coords[dim]):
-                rv_dims.append(dim)
-            else:
-                rv_dims.append(f"{name}_dim_{i}")
-                coords[f"{name}_dim_{i}"] = np.arange(shape[i])
-        dims = ("chain", "draw", *rv_dims)
-        values = (
-            laplace_data[..., cursor : cursor + size].reshape((chains, draws, *shape)).astype(dtype)
+    fn = pm.pytensorf.compile(
+        [mu_pt, sigma_pt],
+        outputs,
+        random_seed=random_seed,
+        trust_input=True,
+        **compile_kwargs,
+    )
+    sigma = covariance if covariance is not None else standard_deviation
+    if vectorize_draws:
+        output_buffers = fn(mean, sigma)
+    else:
+        # Take one draw to find the shape of the outputs
+        output_buffers = []
+        for out_draw in fn(mean, sigma):
+            output_buffer = np.empty((draws, *out_draw.shape), dtype=out_draw.dtype)
+            output_buffer[0] = out_draw
+            output_buffers.append(output_buffer)
+        # Fill one draws at a time
+        for i in range(1, draws):
+            for out_buffer, out_draw in zip(output_buffers, fn(mean, sigma)):
+                out_buffer[i] = out_draw
+    model_coords, model_dims = coords_and_dims_for_inferencedata(model)
+    posterior = {
+        var_name: out_buffer[None]
+        for var_name, out_buffer in (
+            zip(var_names, output_buffers, strict=not return_unconstrained)
         )
-        unstacked_laplace_draws[name] = xr.DataArray(
-            values, dims=dims, coords={dim: list(coords[dim]) for dim in dims}
+    }
+    posterior_dataset = dict_to_dataset(posterior, coords=model_coords, dims=model_dims, library=pm)
+    unconstrained_posterior_dataset = None
+    if return_unconstrained:
+        unconstrained_posterior = {
+            var.name: out_buffer[None]
+            for var, out_buffer in zip(
+                model.value_vars, output_buffers[len(posterior) :], strict=True
+            )
+        }
+        # Attempt to map constrained dims to unconstrained dims
+        for var_name, var_draws in unconstrained_posterior.items():
+            if not is_transformed_name(var_name):
+                # constrained == unconstrained, dims already shared
+                continue
+            constrained_dims = model_dims.get(get_untransformed_name(var_name))
+            if constrained_dims is None or (len(constrained_dims) != (var_draws.ndim - 2)):
+                continue
+            # Reuse dims from constrained variable if they match in length with unconstrained draws
+            inferred_dims = []
+            for i, (constrained_dim, unconstrained_dim_length) in enumerate(
+                zip(constrained_dims, var_draws.shape[2:], strict=True)
+            ):
+                if model_coords.get(constrained_dim) is not None and (
+                    len(model_coords[constrained_dim]) == unconstrained_dim_length
+                ):
+                    # Assume coordinates map. This could be fooled, by e.g., having a transform that reverses values
+                    inferred_dims.append(constrained_dim)
+                else:
+                    # Size mismatch (e.g., Simplex), make no assumption about mapping
+                    inferred_dims.append(f"{var_name}_dim_{i}")
+            model_dims[var_name] = inferred_dims
+        unconstrained_posterior_dataset = dict_to_dataset(
+            unconstrained_posterior,
+            coords=model_coords,
+            dims=model_dims,
+            library=pm,
         )
-        cursor += size
-    unstacked_laplace_draws = xr.Dataset(unstacked_laplace_draws)
-    return unstacked_laplace_draws
+    return posterior_dataset, unconstrained_posterior_dataset
 def fit_laplace(
@@ -285,9 +330,11 @@ def fit_laplace(
     jitter_rvs: list[pt.TensorVariable] | None = None,
     progressbar: bool = True,
     include_transformed: bool = True,
+    freeze_model: bool = True,
     gradient_backend: GradientBackend = "pytensor",
-    chains: int = 2,
+    chains: None | int = None,
     draws: int = 500,
+    vectorize_draws: bool = True,
     optimizer_kwargs: dict | None = None,
     compile_kwargs: dict | None = None,
 ) -> az.InferenceData:
@@ -328,18 +375,20 @@ def fit_laplace(
     include_transformed: bool, default True
         Whether to include transformed variables in the output. If True, transformed variables will be included in the
         output InferenceData object. If False, only the original variables will be included.
+    freeze_model: bool, optional
+        If True, freeze_dims_and_data will be called on the model before compiling the loss functions. This is
+        sometimes necessary for JAX, and can sometimes improve performance by allowing constant folding. Defaults to
+        True.
     gradient_backend: str, default "pytensor"
         The backend to use for gradient computations. Must be one of "pytensor" or "jax".
-    chains: int, default: 2
-        The number of chain dimensions to sample. Note that this is *not* the number of chains to run in parallel,
-        because the Laplace approximation is not an MCMC method. This argument exists to ensure that outputs are
-        compatible with the ArviZ library.
     draws: int, default: 500
-        The number of samples to draw from the approximated posterior. Totals samples will be chains * draws.
+        The number of samples to draw from the approximated posterior.
     optimizer_kwargs
         Additional keyword arguments to pass to the ``scipy.optimize`` function being used. Unless
         ``method = "basinhopping"``, ``scipy.optimize.minimize`` will be used. For ``basinhopping``,
         ``scipy.optimize.basinhopping`` will be used. See the documentation of these functions for details.
+    vectorize_draws: bool, default True
+        Whether to natively vectorize the random function or take one at a time in a python loop.
     compile_kwargs: dict, optional
         Additional keyword arguments to pass to pytensor.function.
@@ -354,7 +403,7 @@ def fit_laplace(
     >>> import numpy as np
     >>> import pymc as pm
     >>> import arviz as az
-    >>> y = np.array([2642, 3503, 4358]*10)
+    >>> y = np.array([2642, 3503, 4358] * 10)
     >>> with pm.Model() as m:
     >>>     logsigma = pm.Uniform("logsigma", 1, 100)
     >>>     mu = pm.Uniform("mu", -10000, 10000)
@@ -372,10 +421,19 @@ def fit_laplace(
           will forward the call to 'fit_laplace'.
     """
+    if chains is not None:
+        raise ValueError(
+            "chains argument has been deprecated. "
+            "The behavior can be recreated by unstacking draws into multiple chains after fitting"
+        )
     compile_kwargs = {} if compile_kwargs is None else compile_kwargs
     optimizer_kwargs = {} if optimizer_kwargs is None else optimizer_kwargs
     model = pm.modelcontext(model) if model is None else model
+    if freeze_model:
+        model = freeze_dims_and_data(model)
     idata = find_MAP(
         method=optimize_method,
         model=model,
@@ -387,17 +445,17 @@ def fit_laplace(
         jitter_rvs=jitter_rvs,
         progressbar=progressbar,
         include_transformed=include_transformed,
+        freeze_model=False,
         gradient_backend=gradient_backend,
         compile_kwargs=compile_kwargs,
         compute_hessian=True,
         **optimizer_kwargs,
     )
-    unpacked_variable_names = idata.fit["mean_vector"].coords["rows"].values.tolist()
     if "covariance_matrix" not in idata.fit:
         # The user didn't use `use_hess` or `use_hessp` (or an optimization method that returns an inverse Hessian), so
         # we have to go back and compute the Hessian at the MAP point now.
+        unpacked_variable_names = idata.fit["mean_vector"].coords["rows"].values.tolist()
         frozen_model = freeze_dims_and_data(model)
         initial_params = _make_initial_point(frozen_model, initvals, random_seed, jitter_rvs)
@@ -426,29 +484,16 @@ def fit_laplace(
             coords={"rows": unpacked_variable_names, "columns": unpacked_variable_names},
         )
-    with model_to_laplace_approx(model, unpacked_variable_names, chains, draws) as laplace_model:
-        new_posterior = (
-            pm.sample_posterior_predictive(
-                idata.fit.expand_dims(chain=[0], draw=[0]),
-                extend_inferencedata=False,
-                random_seed=random_seed,
-                var_names=[
-                    "laplace_approximation",
-                    *[x.name for x in laplace_model.deterministics],
-                ],
-            )
-            .posterior_predictive.squeeze(["chain", "draw"])
-            .drop_vars(["chain", "draw"])
-            .rename({"temp_chain": "chain", "temp_draw": "draw"})
-        )
-        if include_transformed:
-            idata.unconstrained_posterior = unstack_laplace_draws(
-                new_posterior.laplace_approximation.values, model, chains=chains, draws=draws
-            )
-        idata.posterior = new_posterior.drop_vars(
-            ["laplace_approximation", "unpacked_variable_names"]
-        )
+    # We override the posterior/unconstrained_posterior from find_MAP
+    idata.posterior, unconstrained_posterior = draws_from_laplace_approx(
+        mean=idata.fit["mean_vector"].values,
+        covariance=idata.fit["covariance_matrix"].values,
+        draws=draws,
+        return_unconstrained=include_transformed,
+        model=model,
+        vectorize_draws=vectorize_draws,
+        random_seed=random_seed,
+    )
+    if include_transformed:
+        idata.unconstrained_posterior = unconstrained_posterior
     return idata

pymc_extras/inference/laplace_approx/scipy_interface.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import logging
 from collections.abc import Callable
 from importlib.util import find_spec
 from typing import Literal, get_args
@@ -6,6 +8,7 @@ import numpy as np
 import pymc as pm
 import pytensor
+from better_optimize.constants import MINIMIZE_MODE_KWARGS
 from pymc import join_nonshared_inputs
 from pytensor import tensor as pt
 from pytensor.compile import Function
@@ -14,6 +17,39 @@ from pytensor.tensor import TensorVariable
 GradientBackend = Literal["pytensor", "jax"]
 VALID_BACKENDS = get_args(GradientBackend)
+_log = logging.getLogger(__name__)
+def set_optimizer_function_defaults(
+    method: str, use_grad: bool | None, use_hess: bool | None, use_hessp: bool | None
+):
+    method_info = MINIMIZE_MODE_KWARGS[method].copy()
+    if use_hess and use_hessp:
+        _log.warning(
+            'Both "use_hess" and "use_hessp" are set to True, but scipy.optimize.minimize never uses both at the '
+            'same time. When possible "use_hessp" is preferred because its is computationally more efficient. '
+            'Setting "use_hess" to False.'
+        )
+        use_hess = False
+    use_grad = use_grad if use_grad is not None else method_info["uses_grad"]
+    if use_hessp is not None and use_hess is None:
+        use_hess = not use_hessp
+    elif use_hess is not None and use_hessp is None:
+        use_hessp = not use_hess
+    elif use_hessp is None and use_hess is None:
+        use_hessp = method_info["uses_hessp"]
+        use_hess = method_info["uses_hess"]
+        if use_hessp and use_hess:
+            # If a method could use either hess or hessp, we default to using hessp
+            use_hess = False
+    return use_grad, use_hess, use_hessp
 def _compile_grad_and_hess_to_jax(
     f_fused: Function, use_hess: bool, use_hessp: bool
@@ -144,12 +180,13 @@ def _compile_functions_for_scipy_optimize(
 def scipy_optimize_funcs_from_loss(
     loss: TensorVariable,
     inputs: list[TensorVariable],
-    initial_point_dict: dict[str, np.ndarray | float | int],
-    use_grad: bool,
-    use_hess: bool,
-    use_hessp: bool,
+    initial_point_dict: dict[str, np.ndarray | float | int] | None = None,
+    use_grad: bool | None = None,
+    use_hess: bool | None = None,
+    use_hessp: bool | None = None,
     gradient_backend: GradientBackend = "pytensor",
     compile_kwargs: dict | None = None,
+    inputs_are_flat: bool = False,
 ) -> tuple[Callable, ...]:
     """
     Compile loss functions for use with scipy.optimize.minimize.
@@ -206,9 +243,12 @@ def scipy_optimize_funcs_from_loss(
     if not isinstance(inputs, list):
         inputs = [inputs]
-    [loss], flat_input = join_nonshared_inputs(
-        point=initial_point_dict, outputs=[loss], inputs=inputs
-    )
+    if inputs_are_flat:
+        [flat_input] = inputs
+    else:
+        [loss], flat_input = join_nonshared_inputs(
+            point=initial_point_dict, outputs=[loss], inputs=inputs
+        )
     # If we use pytensor gradients, we will use the pytensor function wrapper that handles shared variables. When
     # computing jax gradients, we discard the function wrapper, so we can't handle shared variables --> rewrite them

pymc-extras 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl