PyPI - pymc-extras - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

pymc_extras/deserialize.py +10 -4
pymc_extras/distributions/continuous.py +1 -1
pymc_extras/distributions/histogram_utils.py +6 -4
pymc_extras/distributions/multivariate/r2d2m2cp.py +4 -3
pymc_extras/distributions/timeseries.py +14 -12
pymc_extras/inference/dadvi/dadvi.py +149 -128
pymc_extras/inference/laplace_approx/find_map.py +16 -39
pymc_extras/inference/laplace_approx/idata.py +22 -4
pymc_extras/inference/laplace_approx/laplace.py +196 -151
pymc_extras/inference/laplace_approx/scipy_interface.py +47 -7
pymc_extras/inference/pathfinder/idata.py +517 -0
pymc_extras/inference/pathfinder/pathfinder.py +71 -12
pymc_extras/inference/smc/sampling.py +2 -2
pymc_extras/model/marginal/distributions.py +4 -2
pymc_extras/model/marginal/graph_analysis.py +2 -2
pymc_extras/model/marginal/marginal_model.py +12 -2
pymc_extras/model_builder.py +9 -4
pymc_extras/prior.py +203 -8
pymc_extras/statespace/core/compile.py +1 -1
pymc_extras/statespace/core/statespace.py +2 -1
pymc_extras/statespace/filters/distributions.py +15 -13
pymc_extras/statespace/filters/kalman_filter.py +24 -22
pymc_extras/statespace/filters/kalman_smoother.py +3 -5
pymc_extras/statespace/filters/utilities.py +2 -5
pymc_extras/statespace/models/DFM.py +12 -27
pymc_extras/statespace/models/ETS.py +190 -198
pymc_extras/statespace/models/SARIMAX.py +5 -17
pymc_extras/statespace/models/VARMAX.py +15 -67
pymc_extras/statespace/models/structural/components/autoregressive.py +4 -4
pymc_extras/statespace/models/structural/components/regression.py +4 -26
pymc_extras/statespace/models/utilities.py +7 -0
pymc_extras/utils/model_equivalence.py +2 -2
pymc_extras/utils/prior.py +10 -14
pymc_extras/utils/spline.py +4 -10
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/METADATA +4 -4
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/RECORD +38 -37
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/WHEEL +1 -1
{pymc_extras-0.5.0.dist-info → pymc_extras-0.7.0.dist-info}/licenses/LICENSE +0 -0

pymc_extras/inference/pathfinder/idata.py ADDED Viewed

@@ -0,0 +1,517 @@
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+"""Utilities for converting Pathfinder results to xarray and adding them to InferenceData."""
+from __future__ import annotations
+import warnings
+from dataclasses import asdict
+import arviz as az
+import numpy as np
+import pymc as pm
+import xarray as xr
+from pymc.blocking import DictToArrayBijection
+from pymc_extras.inference.pathfinder.lbfgs import LBFGSStatus
+from pymc_extras.inference.pathfinder.pathfinder import (
+    MultiPathfinderResult,
+    PathfinderConfig,
+    PathfinderResult,
+    PathStatus,
+)
+def get_param_coords(model: pm.Model | None, n_params: int) -> list[str]:
+    """
+    Get parameter coordinate labels from PyMC model.
+    Parameters
+    ----------
+    model : pm.Model | None
+        PyMC model to extract variable names from. If None, returns numeric indices.
+    n_params : int
+        Number of parameters (for fallback indexing when model is None)
+    Returns
+    -------
+    list[str]
+        Parameter coordinate labels
+    """
+    if model is None:
+        return [str(i) for i in range(n_params)]
+    ip = model.initial_point()
+    bij = DictToArrayBijection.map(ip)
+    coords = []
+    for var_name, shape, size, _ in bij.point_map_info:
+        if size == 1:
+            coords.append(var_name)
+        else:
+            for i in range(size):
+                coords.append(f"{var_name}[{i}]")
+    return coords
+def _status_counter_to_dataarray(counter, status_enum_cls) -> xr.DataArray:
+    """Convert a Counter of status values to a dense xarray DataArray."""
+    all_statuses = list(status_enum_cls)
+    status_names = [s.name for s in all_statuses]
+    counts = np.array([counter.get(status, 0) for status in all_statuses])
+    return xr.DataArray(
+        counts, dims=["status"], coords={"status": status_names}, name="status_counts"
+    )
+def _extract_scalar(value):
+    """Extract scalar from array-like or return as-is."""
+    if hasattr(value, "item"):
+        return value.item()
+    elif hasattr(value, "__len__") and len(value) == 1:
+        return value[0]
+    return value
+def pathfinder_result_to_xarray(
+    result: PathfinderResult,
+    model: pm.Model | None = None,
+) -> xr.Dataset:
+    """
+    Convert a PathfinderResult to an xarray Dataset.
+    Parameters
+    ----------
+    result : PathfinderResult
+        Single pathfinder run result
+    model : pm.Model | None
+        PyMC model for parameter name extraction
+    Returns
+    -------
+    xr.Dataset
+        Dataset with pathfinder results
+    Examples
+    --------
+    >>> import pymc as pm
+    >>> import pymc_extras as pmx
+    >>>
+    >>> with pm.Model() as model:
+    ...     x = pm.Normal("x", 0, 1)
+    ...     y = pm.Normal("y", x, 1, observed=2.0)
+    >>> # Assuming we have a PathfinderResult from a pathfinder run
+    >>> ds = pathfinder_result_to_xarray(result, model=model)
+    >>> print(ds.data_vars)  # Shows lbfgs_niter, elbo_argmax, status info, etc.
+    >>> print(ds.attrs)  # Shows metadata like lbfgs_status, path_status
+    """
+    data_vars = {}
+    coords = {}
+    attrs = {}
+    n_params = None
+    if result.samples is not None:
+        n_params = result.samples.shape[-1]
+    elif hasattr(result, "lbfgs_niter") and result.lbfgs_niter is not None:
+        if model is not None:
+            try:
+                ip = model.initial_point()
+                n_params = len(DictToArrayBijection.map(ip).data)
+            except Exception:
+                pass
+    if n_params is not None:
+        coords["param"] = get_param_coords(model, n_params)
+    if result.lbfgs_niter is not None:
+        data_vars["lbfgs_niter"] = xr.DataArray(_extract_scalar(result.lbfgs_niter))
+    if result.elbo_argmax is not None:
+        data_vars["elbo_argmax"] = xr.DataArray(_extract_scalar(result.elbo_argmax))
+    data_vars["lbfgs_status_code"] = xr.DataArray(result.lbfgs_status.value)
+    data_vars["lbfgs_status_name"] = xr.DataArray(result.lbfgs_status.name)
+    data_vars["path_status_code"] = xr.DataArray(result.path_status.value)
+    data_vars["path_status_name"] = xr.DataArray(result.path_status.name)
+    if n_params is not None and result.samples is not None:
+        if result.samples.ndim >= 2:
+            representative_sample = result.samples[0, -1, :]
+            data_vars["final_sample"] = xr.DataArray(
+                representative_sample, dims=["param"], coords={"param": coords["param"]}
+            )
+    if result.logP is not None:
+        logP = result.logP.flatten() if hasattr(result.logP, "flatten") else result.logP
+        if hasattr(logP, "__len__") and len(logP) > 0:
+            data_vars["logP_mean"] = xr.DataArray(np.mean(logP))
+            data_vars["logP_std"] = xr.DataArray(np.std(logP))
+            data_vars["logP_max"] = xr.DataArray(np.max(logP))
+    if result.logQ is not None:
+        logQ = result.logQ.flatten() if hasattr(result.logQ, "flatten") else result.logQ
+        if hasattr(logQ, "__len__") and len(logQ) > 0:
+            data_vars["logQ_mean"] = xr.DataArray(np.mean(logQ))
+            data_vars["logQ_std"] = xr.DataArray(np.std(logQ))
+            data_vars["logQ_max"] = xr.DataArray(np.max(logQ))
+    attrs["lbfgs_status"] = result.lbfgs_status.name
+    attrs["path_status"] = result.path_status.name
+    ds = xr.Dataset(data_vars, coords=coords, attrs=attrs)
+    return ds
+def multipathfinder_result_to_xarray(
+    result: MultiPathfinderResult,
+    model: pm.Model | None = None,
+    *,
+    store_diagnostics: bool = False,
+) -> xr.Dataset:
+    """
+    Convert a MultiPathfinderResult to a single consolidated xarray Dataset.
+    Parameters
+    ----------
+    result : MultiPathfinderResult
+        Multi-path pathfinder result
+    model : pm.Model | None
+        PyMC model for parameter name extraction
+    store_diagnostics : bool
+        Whether to include potentially large diagnostic arrays
+    Returns
+    -------
+    xr.Dataset
+        Single consolidated dataset with all pathfinder results
+    Examples
+    --------
+    >>> import pymc as pm
+    >>> import pymc_extras as pmx
+    >>>
+    >>> with pm.Model() as model:
+        ...     x = pm.Normal("x", 0, 1)
+    ...
+    >>> # Assuming we have a MultiPathfinderResult from multiple pathfinder runs
+    >>> ds = multipathfinder_result_to_xarray(result, model=model)
+    >>> print("All data:", ds.data_vars)
+    >>> print(
+    ...     "Summary:",
+    ...     [
+    ...         k
+    ...         for k in ds.data_vars.keys()
+    ...         if not k.startswith(("paths/", "config/", "diagnostics/"))
+    ...     ],
+    ... )
+    >>> print("Per-path:", [k for k in ds.data_vars.keys() if k.startswith("paths/")])
+    >>> print("Config:", [k for k in ds.data_vars.keys() if k.startswith("config/")])
+    """
+    n_params = result.samples.shape[-1] if result.samples is not None else None
+    param_coords = get_param_coords(model, n_params) if n_params is not None else None
+    data_vars = {}
+    coords = {}
+    attrs = {}
+    # Add parameter coordinates if available
+    if param_coords is not None:
+        coords["param"] = param_coords
+    # Build summary-level data (top level)
+    _add_summary_data(result, data_vars, coords, attrs)
+    # Build per-path data (with paths/ prefix)
+    if not result.all_paths_failed and result.samples is not None:
+        _add_paths_data(result, data_vars, coords, param_coords, n_params)
+    # Build configuration data (with config/ prefix)
+    if result.pathfinder_config is not None:
+        _add_config_data(result.pathfinder_config, data_vars)
+    # Build diagnostics data (with diagnostics/ prefix) if requested
+    if store_diagnostics:
+        _add_diagnostics_data(result, data_vars, coords, param_coords)
+    return xr.Dataset(data_vars, coords=coords, attrs=attrs)
+def _add_summary_data(
+    result: MultiPathfinderResult, data_vars: dict, coords: dict, attrs: dict
+) -> None:
+    """Add summary-level statistics to the pathfinder dataset."""
+    if result.num_paths is not None:
+        data_vars["num_paths"] = xr.DataArray(result.num_paths)
+    if result.num_draws is not None:
+        data_vars["num_draws"] = xr.DataArray(result.num_draws)
+    if result.compile_time is not None:
+        data_vars["compile_time"] = xr.DataArray(result.compile_time)
+    if result.compute_time is not None:
+        data_vars["compute_time"] = xr.DataArray(result.compute_time)
+        if result.compile_time is not None:
+            data_vars["total_time"] = xr.DataArray(result.compile_time + result.compute_time)
+    data_vars["importance_sampling_method"] = xr.DataArray(result.importance_sampling or "none")
+    if result.pareto_k is not None:
+        data_vars["pareto_k"] = xr.DataArray(result.pareto_k)
+    if result.lbfgs_status:
+        data_vars["lbfgs_status_counts"] = _status_counter_to_dataarray(
+            result.lbfgs_status, LBFGSStatus
+        )
+    if result.path_status:
+        data_vars["path_status_counts"] = _status_counter_to_dataarray(
+            result.path_status, PathStatus
+        )
+    data_vars["all_paths_failed"] = xr.DataArray(result.all_paths_failed)
+    if not result.all_paths_failed and result.samples is not None:
+        data_vars["num_successful_paths"] = xr.DataArray(result.samples.shape[0])
+    if result.lbfgs_niter is not None:
+        data_vars["lbfgs_niter_mean"] = xr.DataArray(np.mean(result.lbfgs_niter))
+        data_vars["lbfgs_niter_std"] = xr.DataArray(np.std(result.lbfgs_niter))
+    if result.elbo_argmax is not None:
+        data_vars["elbo_argmax_mean"] = xr.DataArray(np.mean(result.elbo_argmax))
+        data_vars["elbo_argmax_std"] = xr.DataArray(np.std(result.elbo_argmax))
+    if result.logP is not None:
+        data_vars["logP_mean"] = xr.DataArray(np.mean(result.logP))
+        data_vars["logP_std"] = xr.DataArray(np.std(result.logP))
+        data_vars["logP_max"] = xr.DataArray(np.max(result.logP))
+    if result.logQ is not None:
+        data_vars["logQ_mean"] = xr.DataArray(np.mean(result.logQ))
+        data_vars["logQ_std"] = xr.DataArray(np.std(result.logQ))
+        data_vars["logQ_max"] = xr.DataArray(np.max(result.logQ))
+    # Add warnings to attributes
+    if result.warnings:
+        attrs["warnings"] = list(result.warnings)
+def _add_paths_data(
+    result: MultiPathfinderResult,
+    data_vars: dict,
+    coords: dict,
+    param_coords: list[str] | None,
+    n_params: int | None,
+) -> None:
+    """Add per-path diagnostics to the pathfinder dataset with 'paths/' prefix."""
+    n_paths = _determine_num_paths(result)
+    # Add path coordinate
+    coords["path"] = list(range(n_paths))
+    def _add_path_scalar(name: str, data):
+        """Add a per-path scalar array to data_vars with paths/ prefix."""
+        if data is not None:
+            data_vars[f"paths/{name}"] = xr.DataArray(
+                data, dims=["path"], coords={"path": coords["path"]}
+            )
+    _add_path_scalar("lbfgs_niter", result.lbfgs_niter)
+    _add_path_scalar("elbo_argmax", result.elbo_argmax)
+    if result.logP is not None:
+        _add_path_scalar("logP_mean", np.mean(result.logP, axis=1))
+        _add_path_scalar("logP_max", np.max(result.logP, axis=1))
+    if result.logQ is not None:
+        _add_path_scalar("logQ_mean", np.mean(result.logQ, axis=1))
+        _add_path_scalar("logQ_max", np.max(result.logQ, axis=1))
+    if n_params is not None and result.samples is not None and result.samples.ndim >= 3:
+        final_samples = result.samples[:, -1, :]  # (S, N)
+        data_vars["paths/final_sample"] = xr.DataArray(
+            final_samples,
+            dims=["path", "param"],
+            coords={"path": coords["path"], "param": coords["param"]},
+        )
+def _add_config_data(config: PathfinderConfig, data_vars: dict) -> None:
+    """Add configuration parameters to the pathfinder dataset with 'config/' prefix."""
+    config_dict = asdict(config)
+    for key, value in config_dict.items():
+        data_vars[f"config/{key}"] = xr.DataArray(value)
+def _add_diagnostics_data(
+    result: MultiPathfinderResult, data_vars: dict, coords: dict, param_coords: list[str] | None
+) -> None:
+    """Add detailed diagnostics to the pathfinder dataset with 'diagnostics/' prefix."""
+    if result.logP is not None:
+        n_paths, n_draws_per_path = result.logP.shape
+        if "path" not in coords:
+            coords["path"] = list(range(n_paths))
+        coords["draw_per_path"] = list(range(n_draws_per_path))
+        data_vars["diagnostics/logP_full"] = xr.DataArray(
+            result.logP,
+            dims=["path", "draw_per_path"],
+            coords={"path": coords["path"], "draw_per_path": coords["draw_per_path"]},
+        )
+    if result.logQ is not None:
+        if "draw_per_path" not in coords:
+            n_paths, n_draws_per_path = result.logQ.shape
+            if "path" not in coords:
+                coords["path"] = list(range(n_paths))
+            coords["draw_per_path"] = list(range(n_draws_per_path))
+        data_vars["diagnostics/logQ_full"] = xr.DataArray(
+            result.logQ,
+            dims=["path", "draw_per_path"],
+            coords={"path": coords["path"], "draw_per_path": coords["draw_per_path"]},
+        )
+    if result.samples is not None and result.samples.ndim == 3 and param_coords is not None:
+        n_paths, n_draws_per_path, n_params = result.samples.shape
+        if "path" not in coords:
+            coords["path"] = list(range(n_paths))
+        if "draw_per_path" not in coords:
+            coords["draw_per_path"] = list(range(n_draws_per_path))
+        data_vars["diagnostics/samples_full"] = xr.DataArray(
+            result.samples,
+            dims=["path", "draw_per_path", "param"],
+            coords={
+                "path": coords["path"],
+                "draw_per_path": coords["draw_per_path"],
+                "param": coords["param"],
+            },
+        )
+def _determine_num_paths(result: MultiPathfinderResult) -> int:
+    """
+    Determine the number of paths from per-path arrays.
+    When importance sampling is applied, result.samples may be collapsed,
+    so we use per-path diagnostic arrays to determine the true path count.
+    """
+    if result.lbfgs_niter is not None:
+        return len(result.lbfgs_niter)
+    elif result.elbo_argmax is not None:
+        return len(result.elbo_argmax)
+    elif result.logP is not None:
+        return result.logP.shape[0]
+    elif result.logQ is not None:
+        return result.logQ.shape[0]
+    if result.lbfgs_status:
+        return sum(result.lbfgs_status.values())
+    elif result.path_status:
+        return sum(result.path_status.values())
+    if result.samples is not None:
+        return result.samples.shape[0]
+    raise ValueError("Cannot determine number of paths from result")
+def add_pathfinder_to_inference_data(
+    idata: az.InferenceData,
+    result: PathfinderResult | MultiPathfinderResult,
+    model: pm.Model | None = None,
+    *,
+    group: str = "pathfinder",
+    paths_group: str = "pathfinder_paths",  # Deprecated, kept for API compatibility
+    diagnostics_group: str = "pathfinder_diagnostics",  # Deprecated, kept for API compatibility
+    config_group: str = "pathfinder_config",  # Deprecated, kept for API compatibility
+    store_diagnostics: bool = False,
+) -> az.InferenceData:
+    """
+    Add pathfinder results to an ArviZ InferenceData object as a single consolidated group.
+    All pathfinder output is now consolidated under a single group with nested structure:
+    - Summary statistics at the top level
+    - Per-path data with 'paths/' prefix
+    - Configuration with 'config/' prefix
+    - Diagnostics with 'diagnostics/' prefix (if store_diagnostics=True)
+    Parameters
+    ----------
+    idata : az.InferenceData
+        InferenceData object to modify
+    result : PathfinderResult | MultiPathfinderResult
+        Pathfinder results to add
+    model : pm.Model | None
+        PyMC model for parameter name extraction
+    group : str
+        Name for the pathfinder group (default: "pathfinder")
+    paths_group : str
+        Deprecated: no longer used, kept for API compatibility
+    diagnostics_group : str
+        Deprecated: no longer used, kept for API compatibility
+    config_group : str
+        Deprecated: no longer used, kept for API compatibility
+    store_diagnostics : bool
+        Whether to include potentially large diagnostic arrays
+    Returns
+    -------
+    az.InferenceData
+        Modified InferenceData object with consolidated pathfinder group added
+    Examples
+    --------
+    >>> import pymc as pm
+    >>> import pymc_extras as pmx
+    >>>
+    >>> with pm.Model() as model:
+    ...     x = pm.Normal("x", 0, 1)
+    ...     idata = pmx.fit(method="pathfinder", model=model, add_pathfinder_groups=False)
+    >>> # Assuming we have pathfinder results
+    >>> idata = add_pathfinder_to_inference_data(idata, results, model=model)
+    >>> print(list(idata.groups()))  # Will show ['posterior', 'pathfinder']
+    >>> # Access nested data:
+    >>> print(
+    ...     [k for k in idata.pathfinder.data_vars.keys() if k.startswith("paths/")]
+    ... )  # Per-path data
+    >>> print(
+    ...     [k for k in idata.pathfinder.data_vars.keys() if k.startswith("config/")]
+    ... )  # Config data
+    """
+    # Detect if this is a multi-path result
+    # Use isinstance() as primary check, but fall back to duck typing for compatibility
+    # with mocks and testing (MultiPathfinderResult has Counter-type status fields)
+    is_multipath = isinstance(result, MultiPathfinderResult) or (
+        hasattr(result, "lbfgs_status")
+        and hasattr(result.lbfgs_status, "values")
+        and callable(getattr(result.lbfgs_status, "values"))
+    )
+    if is_multipath:
+        consolidated_ds = multipathfinder_result_to_xarray(
+            result, model=model, store_diagnostics=store_diagnostics
+        )
+    else:
+        consolidated_ds = pathfinder_result_to_xarray(result, model=model)
+    if group in idata.groups():
+        warnings.warn(f"Group '{group}' already exists in InferenceData, it will be replaced.")
+    idata.add_groups({group: consolidated_ds})
+    return idata

pymc-extras 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pymc-extras 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl