PyPI - pymc-extras - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

pymc-extras 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pymc_extras/inference/__init__.py CHANGED Viewed

@@ -12,9 +12,16 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+from pymc_extras.inference.dadvi.dadvi import fit_dadvi
 from pymc_extras.inference.fit import fit
 from pymc_extras.inference.laplace_approx.find_map import find_MAP
 from pymc_extras.inference.laplace_approx.laplace import fit_laplace
 from pymc_extras.inference.pathfinder.pathfinder import fit_pathfinder
-__all__ = ["find_MAP", "fit", "fit_laplace", "fit_pathfinder"]
+__all__ = [
+    "find_MAP",
+    "fit",
+    "fit_laplace",
+    "fit_pathfinder",
+    "fit_dadvi",
+]

pymc_extras/inference/dadvi/__init__.py ADDED Viewed

File without changes

pymc_extras/inference/dadvi/dadvi.py ADDED Viewed

@@ -0,0 +1,261 @@
+import arviz as az
+import numpy as np
+import pymc
+import pytensor
+import pytensor.tensor as pt
+import xarray
+from better_optimize import minimize
+from better_optimize.constants import minimize_method
+from pymc import DictToArrayBijection, Model, join_nonshared_inputs
+from pymc.backends.arviz import (
+    PointFunc,
+    apply_function_over_dataset,
+    coords_and_dims_for_inferencedata,
+)
+from pymc.util import RandomSeed, get_default_varnames
+from pytensor.tensor.variable import TensorVariable
+from pymc_extras.inference.laplace_approx.laplace import unstack_laplace_draws
+from pymc_extras.inference.laplace_approx.scipy_interface import (
+    _compile_functions_for_scipy_optimize,
+)
+def fit_dadvi(
+    model: Model | None = None,
+    n_fixed_draws: int = 30,
+    random_seed: RandomSeed = None,
+    n_draws: int = 1000,
+    keep_untransformed: bool = False,
+    optimizer_method: minimize_method = "trust-ncg",
+    use_grad: bool = True,
+    use_hessp: bool = True,
+    use_hess: bool = False,
+    **minimize_kwargs,
+) -> az.InferenceData:
+    """
+    Does inference using deterministic ADVI (automatic differentiation
+    variational inference), DADVI for short.
+    For full details see the paper cited in the references:
+    https://www.jmlr.org/papers/v25/23-1015.html
+    Parameters
+    ----------
+    model : pm.Model
+        The PyMC model to be fit. If None, the current model context is used.
+    n_fixed_draws : int
+        The number of fixed draws to use for the optimisation. More
+        draws will result in more accurate estimates, but also
+        increase inference time. Usually, the default of 30 is a good
+        tradeoff.between speed and accuracy.
+    random_seed: int
+        The random seed to use for the fixed draws. Running the optimisation
+        twice with the same seed should arrive at the same result.
+    n_draws: int
+        The number of draws to return from the variational approximation.
+    keep_untransformed: bool
+        Whether or not to keep the unconstrained variables (such as
+        logs of positive-constrained parameters) in the output.
+    optimizer_method: str
+        Which optimization method to use. The function calls
+        ``scipy.optimize.minimize``, so any of the methods there can
+        be used. The default is trust-ncg, which uses second-order
+        information and is generally very reliable. Other methods such
+        as L-BFGS-B might be faster but potentially more brittle and
+        may not converge exactly to the optimum.
+    minimize_kwargs:
+        Additional keyword arguments to pass to the
+        ``scipy.optimize.minimize`` function. See the documentation of
+        that function for details.
+    use_grad:
+        If True, pass the gradient function to
+        `scipy.optimize.minimize` (where it is referred to as `jac`).
+    use_hessp:
+        If True, pass the hessian vector product to `scipy.optimize.minimize`.
+    use_hess:
+        If True, pass the hessian to `scipy.optimize.minimize`. Note that
+        this is generally not recommended since its computation can be slow
+        and memory-intensive if there are many parameters.
+    Returns
+    -------
+    :class:`~arviz.InferenceData`
+        The inference data containing the results of the DADVI algorithm.
+    References
+    ----------
+    Giordano, R., Ingram, M., & Broderick, T. (2024). Black Box
+    Variational Inference with a Deterministic Objective: Faster, More
+    Accurate, and Even More Black Box. Journal of Machine Learning
+    Research, 25(18), 1–39.
+    """
+    model = pymc.modelcontext(model) if model is None else model
+    initial_point_dict = model.initial_point()
+    n_params = DictToArrayBijection.map(initial_point_dict).data.shape[0]
+    var_params, objective = create_dadvi_graph(
+        model,
+        n_fixed_draws=n_fixed_draws,
+        random_seed=random_seed,
+        n_params=n_params,
+    )
+    f_fused, f_hessp = _compile_functions_for_scipy_optimize(
+        objective,
+        [var_params],
+        compute_grad=use_grad,
+        compute_hessp=use_hessp,
+        compute_hess=use_hess,
+    )
+    derivative_kwargs = {}
+    if use_grad:
+        derivative_kwargs["jac"] = True
+    if use_hessp:
+        derivative_kwargs["hessp"] = f_hessp
+    if use_hess:
+        derivative_kwargs["hess"] = True
+    result = minimize(
+        f_fused,
+        np.zeros(2 * n_params),
+        method=optimizer_method,
+        **derivative_kwargs,
+        **minimize_kwargs,
+    )
+    opt_var_params = result.x
+    opt_means, opt_log_sds = np.split(opt_var_params, 2)
+    # Make the draws:
+    generator = np.random.default_rng(seed=random_seed)
+    draws_raw = generator.standard_normal(size=(n_draws, n_params))
+    draws = opt_means + draws_raw * np.exp(opt_log_sds)
+    draws_arviz = unstack_laplace_draws(draws, model, chains=1, draws=n_draws)
+    transformed_draws = transform_draws(draws_arviz, model, keep_untransformed=keep_untransformed)
+    return transformed_draws
+def create_dadvi_graph(
+    model: Model,
+    n_params: int,
+    n_fixed_draws: int = 30,
+    random_seed: RandomSeed = None,
+) -> tuple[TensorVariable, TensorVariable]:
+    """
+    Sets up the DADVI graph in pytensor and returns it.
+    Parameters
+    ----------
+    model : pm.Model
+        The PyMC model to be fit.
+    n_params: int
+        The total number of parameters in the model.
+    n_fixed_draws : int
+        The number of fixed draws to use.
+    random_seed: int
+        The random seed to use for the fixed draws.
+    Returns
+    -------
+    Tuple[TensorVariable, TensorVariable]
+        A tuple whose first element contains the variational parameters,
+        and whose second contains the DADVI objective.
+    """
+    # Make the fixed draws
+    generator = np.random.default_rng(seed=random_seed)
+    draws = generator.standard_normal(size=(n_fixed_draws, n_params))
+    inputs = model.continuous_value_vars + model.discrete_value_vars
+    initial_point_dict = model.initial_point()
+    logp = model.logp()
+    # Graph in terms of a flat input
+    [logp], flat_input = join_nonshared_inputs(
+        point=initial_point_dict, outputs=[logp], inputs=inputs
+    )
+    var_params = pt.vector(name="eta", shape=(2 * n_params,))
+    means, log_sds = pt.split(var_params, axis=0, splits_size=[n_params, n_params], n_splits=2)
+    draw_matrix = pt.constant(draws)
+    samples = means + pt.exp(log_sds) * draw_matrix
+    logp_vectorized_draws = pytensor.graph.vectorize_graph(logp, replace={flat_input: samples})
+    mean_log_density = pt.mean(logp_vectorized_draws)
+    entropy = pt.sum(log_sds)
+    objective = -mean_log_density - entropy
+    return var_params, objective
+def transform_draws(
+    unstacked_draws: xarray.Dataset,
+    model: Model,
+    keep_untransformed: bool = False,
+):
+    """
+    Transforms the unconstrained draws back into the constrained space.
+    Parameters
+    ----------
+    unstacked_draws : xarray.Dataset
+        The draws to constrain back into the original space.
+    model : Model
+        The PyMC model the variables were derived from.
+    n_draws: int
+        The number of draws to return from the variational approximation.
+    keep_untransformed: bool
+        Whether or not to keep the unconstrained variables in the output.
+    Returns
+    -------
+    :class:`~arviz.InferenceData`
+        Draws from the original constrained parameters.
+    """
+    filtered_var_names = model.unobserved_value_vars
+    vars_to_sample = list(
+        get_default_varnames(filtered_var_names, include_transformed=keep_untransformed)
+    )
+    fn = pytensor.function(model.value_vars, vars_to_sample)
+    point_func = PointFunc(fn)
+    coords, dims = coords_and_dims_for_inferencedata(model)
+    transformed_result = apply_function_over_dataset(
+        point_func,
+        unstacked_draws,
+        output_var_names=[x.name for x in vars_to_sample],
+        coords=coords,
+        dims=dims,
+    )
+    return transformed_result

pymc_extras/inference/fit.py CHANGED Viewed

@@ -40,3 +40,8 @@ def fit(method: str, **kwargs) -> az.InferenceData:
         from pymc_extras.inference import fit_laplace
         return fit_laplace(**kwargs)
+    if method == "dadvi":
+        from pymc_extras.inference import fit_dadvi
+        return fit_dadvi(**kwargs)

pymc_extras/inference/laplace_approx/find_map.py CHANGED Viewed

@@ -198,6 +198,7 @@ def find_MAP(
     include_transformed: bool = True,
     gradient_backend: GradientBackend = "pytensor",
     compile_kwargs: dict | None = None,
+    compute_hessian: bool = False,
     **optimizer_kwargs,
 ) -> (
     dict[str, np.ndarray]
@@ -239,6 +240,10 @@ def find_MAP(
         Whether to include transformed variable values in the returned dictionary. Defaults to True.
     gradient_backend: str, default "pytensor"
         Which backend to use to compute gradients. Must be one of "pytensor" or "jax".
+    compute_hessian: bool
+        If True, the inverse Hessian matrix at the optimum will be computed and included in the returned
+        InferenceData object. This is needed for the Laplace approximation, but can be computationally expensive for
+        high-dimensional problems. Defaults to False.
     compile_kwargs: dict, optional
         Additional options to pass to the ``pytensor.function`` function when compiling loss functions.
     **optimizer_kwargs
@@ -316,14 +321,17 @@ def find_MAP(
             **optimizer_kwargs,
         )
-    H_inv = _compute_inverse_hessian(
-        optimizer_result=optimizer_result,
-        optimal_point=None,
-        f_fused=f_fused,
-        f_hessp=f_hessp,
-        use_hess=use_hess,
-        method=method,
-    )
+    if compute_hessian:
+        H_inv = _compute_inverse_hessian(
+            optimizer_result=optimizer_result,
+            optimal_point=None,
+            f_fused=f_fused,
+            f_hessp=f_hessp,
+            use_hess=use_hess,
+            method=method,
+        )
+    else:
+        H_inv = None
     raveled_optimized = RaveledVars(optimizer_result.x, initial_params.point_map_info)
     unobserved_vars = get_default_varnames(model.unobserved_value_vars, include_transformed=True)

pymc_extras/inference/laplace_approx/idata.py CHANGED Viewed

@@ -136,7 +136,10 @@ def map_results_to_inference_data(
 def add_fit_to_inference_data(
-    idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
+    idata: az.InferenceData,
+    mu: RaveledVars,
+    H_inv: np.ndarray | None,
+    model: pm.Model | None = None,
 ) -> az.InferenceData:
     """
     Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
@@ -147,7 +150,7 @@ def add_fit_to_inference_data(
         An InferenceData object containing the approximated posterior samples.
     mu: RaveledVars
         The MAP estimate of the model parameters.
-    H_inv: np.ndarray
+    H_inv: np.ndarray, optional
         The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
     model: Model, optional
         A PyMC model. If None, the model is taken from the current model context.

pymc_extras/inference/laplace_approx/laplace.py CHANGED Viewed

@@ -389,6 +389,7 @@ def fit_laplace(
         include_transformed=include_transformed,
         gradient_backend=gradient_backend,
         compile_kwargs=compile_kwargs,
+        compute_hessian=True,
         **optimizer_kwargs,
     )

pymc-extras 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

pymc-extras 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl