PyPI - arviz - Versions diffs - 0.16.1__py3-none-any.whl → 0.17.1__py3-none-any.whl - Mend

arviz 0.16.1py3-none-any.whl → 0.17.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arviz/__init__.py +1 -1
arviz/data/inference_data.py +34 -7
arviz/data/io_beanmachine.py +6 -1
arviz/data/io_cmdstanpy.py +439 -50
arviz/data/io_pyjags.py +5 -2
arviz/data/io_pystan.py +1 -2
arviz/labels.py +2 -0
arviz/plots/backends/bokeh/bpvplot.py +7 -2
arviz/plots/backends/bokeh/compareplot.py +7 -4
arviz/plots/backends/bokeh/densityplot.py +0 -1
arviz/plots/backends/bokeh/distplot.py +0 -2
arviz/plots/backends/bokeh/forestplot.py +3 -5
arviz/plots/backends/bokeh/kdeplot.py +0 -2
arviz/plots/backends/bokeh/pairplot.py +0 -4
arviz/plots/backends/matplotlib/bfplot.py +0 -1
arviz/plots/backends/matplotlib/bpvplot.py +3 -3
arviz/plots/backends/matplotlib/compareplot.py +1 -1
arviz/plots/backends/matplotlib/dotplot.py +1 -1
arviz/plots/backends/matplotlib/forestplot.py +2 -4
arviz/plots/backends/matplotlib/kdeplot.py +0 -1
arviz/plots/backends/matplotlib/khatplot.py +0 -1
arviz/plots/backends/matplotlib/lmplot.py +4 -5
arviz/plots/backends/matplotlib/pairplot.py +0 -1
arviz/plots/backends/matplotlib/ppcplot.py +8 -5
arviz/plots/backends/matplotlib/traceplot.py +1 -2
arviz/plots/bfplot.py +7 -6
arviz/plots/bpvplot.py +7 -2
arviz/plots/compareplot.py +2 -2
arviz/plots/ecdfplot.py +37 -112
arviz/plots/elpdplot.py +1 -1
arviz/plots/essplot.py +2 -2
arviz/plots/kdeplot.py +0 -1
arviz/plots/pairplot.py +1 -1
arviz/plots/plot_utils.py +0 -1
arviz/plots/ppcplot.py +51 -45
arviz/plots/separationplot.py +0 -1
arviz/stats/__init__.py +2 -0
arviz/stats/density_utils.py +2 -2
arviz/stats/diagnostics.py +2 -3
arviz/stats/ecdf_utils.py +165 -0
arviz/stats/stats.py +241 -38
arviz/stats/stats_utils.py +36 -7
arviz/tests/base_tests/test_data.py +73 -5
arviz/tests/base_tests/test_plots_bokeh.py +0 -1
arviz/tests/base_tests/test_plots_matplotlib.py +24 -1
arviz/tests/base_tests/test_stats.py +43 -1
arviz/tests/base_tests/test_stats_ecdf_utils.py +153 -0
arviz/tests/base_tests/test_stats_utils.py +3 -3
arviz/tests/external_tests/test_data_beanmachine.py +2 -0
arviz/tests/external_tests/test_data_numpyro.py +3 -3
arviz/tests/external_tests/test_data_pyjags.py +3 -1
arviz/tests/external_tests/test_data_pyro.py +3 -3
arviz/tests/helpers.py +8 -8
arviz/utils.py +15 -7
arviz/wrappers/wrap_pymc.py +1 -1
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/METADATA +16 -15
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/RECORD +60 -58
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/WHEEL +1 -1
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/LICENSE +0 -0
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/top_level.txt +0 -0

arviz/stats/stats.py CHANGED Viewed

@@ -30,6 +30,7 @@ from .density_utils import kde as _kde
 from .diagnostics import _mc_error, _multichain_statistics, ess
 from .stats_utils import ELPDData, _circular_standard_deviation, smooth_data
 from .stats_utils import get_log_likelihood as _get_log_likelihood
+from .stats_utils import get_log_prior as _get_log_prior
 from .stats_utils import logsumexp as _logsumexp
 from .stats_utils import make_ufunc as _make_ufunc
 from .stats_utils import stats_variance_2d as svar
@@ -51,6 +52,7 @@ __all__ = [
     "waic",
     "weight_predictions",
     "_calculate_ics",
+    "psens",
 ]
@@ -144,6 +146,7 @@ def compare(
     Compare the centered and non centered models of the eight school problem:
     .. ipython::
+       :okwarning:
         In [1]: import arviz as az
            ...: data1 = az.load_arviz_data("non_centered_eight")
@@ -155,6 +158,7 @@ def compare(
     weights using the stacking method.
     .. ipython::
+       :okwarning:
         In [1]: az.compare(compare_dict, ic="loo", method="stacking", scale="log")
@@ -178,37 +182,19 @@ def compare(
     except Exception as e:
         raise e.__class__("Encountered error in ELPD computation of compare.") from e
     names = list(ics_dict.keys())
-    if ic == "loo":
-        df_comp = pd.DataFrame(
-            index=names,
-            columns=[
-                "rank",
-                "elpd_loo",
-                "p_loo",
-                "elpd_diff",
-                "weight",
-                "se",
-                "dse",
-                "warning",
-                "scale",
-            ],
-            dtype=np.float_,
-        )
-    elif ic == "waic":
+    if ic in {"loo", "waic"}:
         df_comp = pd.DataFrame(
-            index=names,
-            columns=[
-                "rank",
-                "elpd_waic",
-                "p_waic",
-                "elpd_diff",
-                "weight",
-                "se",
-                "dse",
-                "warning",
-                "scale",
-            ],
-            dtype=np.float_,
+            {
+                "rank": pd.Series(index=names, dtype="int"),
+                f"elpd_{ic}": pd.Series(index=names, dtype="float"),
+                f"p_{ic}": pd.Series(index=names, dtype="float"),
+                "elpd_diff": pd.Series(index=names, dtype="float"),
+                "weight": pd.Series(index=names, dtype="float"),
+                "se": pd.Series(index=names, dtype="float"),
+                "dse": pd.Series(index=names, dtype="float"),
+                "warning": pd.Series(index=names, dtype="boolean"),
+                "scale": pd.Series(index=names, dtype="str"),
+            }
         )
     else:
         raise NotImplementedError(f"The information criterion {ic} is not supported.")
@@ -630,7 +616,7 @@ def _hdi(ary, hdi_prob, circular, skipna):
     ary = np.sort(ary)
     interval_idx_inc = int(np.floor(hdi_prob * n))
     n_intervals = n - interval_idx_inc
-    interval_width = np.subtract(ary[interval_idx_inc:], ary[:n_intervals], dtype=np.float_)
+    interval_width = np.subtract(ary[interval_idx_inc:], ary[:n_intervals], dtype=np.float64)
     if len(interval_width) == 0:
         raise ValueError("Too few elements for interval calculation. ")
@@ -878,17 +864,18 @@ def psislw(log_weights, reff=1.0):
     Parameters
     ----------
-    log_weights: array
+    log_weights : DataArray or (..., N) array-like
         Array of size (n_observations, n_samples)
-    reff: float
+    reff : float, default 1
         relative MCMC efficiency, ``ess / n``
     Returns
     -------
-    lw_out: array
-        Smoothed log weights
-    kss: array
-        Pareto tail indices
+    lw_out : DataArray or (..., N) ndarray
+        Smoothed, truncated and normalized log weights.
+    kss : DataArray or (...) ndarray
+        Estimates of the shape parameter *k* of the generalized Pareto
+        distribution.
     References
     ----------
@@ -2093,7 +2080,7 @@ def weight_predictions(idatas, weights=None):
     weights /= weights.sum()
     len_idatas = [
-        idata.posterior_predictive.dims["chain"] * idata.posterior_predictive.dims["draw"]
+        idata.posterior_predictive.sizes["chain"] * idata.posterior_predictive.sizes["draw"]
         for idata in idatas
     ]
@@ -2113,3 +2100,219 @@ def weight_predictions(idatas, weights=None):
     )
     return weighted_samples
+def psens(
+    data,
+    *,
+    component="prior",
+    component_var_names=None,
+    component_coords=None,
+    var_names=None,
+    coords=None,
+    filter_vars=None,
+    delta=0.01,
+    dask_kwargs=None,
+):
+    """Compute power-scaling sensitivity diagnostic.
+    Power-scales the prior or likelihood and calculates how much the posterior is affected.
+    Parameters
+    ----------
+    data : obj
+        Any object that can be converted to an :class:`arviz.InferenceData` object.
+        Refer to documentation of :func:`arviz.convert_to_dataset` for details.
+        For ndarray: shape = (chain, draw).
+        For n-dimensional ndarray transform first to dataset with ``az.convert_to_dataset``.
+    component : {"prior", "likelihood"}, default "prior"
+        When `component` is "likelihood", the log likelihood values are retrieved
+        from the ``log_likelihood`` group as pointwise log likelihood and added
+        together. With "prior", the log prior values are retrieved from the
+        ``log_prior`` group.
+    component_var_names : str, optional
+        Name of the prior or log likelihood variables to use
+    component_coords : dict, optional
+        Coordinates defining a subset over the component element for which to
+        compute the prior sensitivity diagnostic.
+    var_names : list of str, optional
+        Names of posterior variables to include in the power scaling sensitivity diagnostic
+    coords : dict, optional
+        Coordinates defining a subset over the posterior. Only these variables will
+        be used when computing the prior sensitivity.
+    filter_vars: {None, "like", "regex"}, default None
+        If ``None`` (default), interpret var_names as the real variables names.
+        If "like", interpret var_names as substrings of the real variables names.
+        If "regex", interpret var_names as regular expressions on the real variables names.
+    delta : float
+        Value for finite difference derivative calculation.
+    dask_kwargs : dict, optional
+        Dask related kwargs passed to :func:`~arviz.wrap_xarray_ufunc`.
+    Returns
+    -------
+    xarray.Dataset
+        Returns dataset of power-scaling sensitivity diagnostic values.
+        Higher sensitivity values indicate greater sensitivity.
+        Prior sensitivity above 0.05 indicates informative prior.
+        Likelihood sensitivity below 0.05 indicates weak or nonin-formative likelihood.
+    Examples
+    --------
+    Compute the likelihood sensitivity for the non centered eight model:
+    .. ipython::
+        In [1]: import arviz as az
+           ...: data = az.load_arviz_data("non_centered_eight")
+           ...: az.psens(data, component="likelihood")
+    To compute the prior sensitivity, we need to first compute the log prior
+    at each posterior sample. In our case, we know mu has a normal prior :math:`N(0, 5)`,
+    tau is a half cauchy prior with scale/beta parameter 5,
+    and theta has a standard normal as prior.
+    We add this information to the ``log_prior`` group before computing powerscaling
+    check with ``psens``
+    .. ipython::
+        In [1]: from xarray_einstats.stats import XrContinuousRV
+           ...: from scipy.stats import norm, halfcauchy
+           ...: post = data.posterior
+           ...: log_prior = {
+           ...:     "mu": XrContinuousRV(norm, 0, 5).logpdf(post["mu"]),
+           ...:     "tau": XrContinuousRV(halfcauchy, scale=5).logpdf(post["tau"]),
+           ...:     "theta_t": XrContinuousRV(norm, 0, 1).logpdf(post["theta_t"]),
+           ...: }
+           ...: data.add_groups({"log_prior": log_prior})
+           ...: az.psens(data, component="prior")
+    Notes
+    -----
+    The diagnostic is computed by power-scaling the specified component (prior or likelihood)
+    and determining the degree to which the posterior changes as described in [1]_.
+    It uses Pareto-smoothed importance sampling to avoid refitting the model.
+    References
+    ----------
+    .. [1] Kallioinen et al, *Detecting and diagnosing prior and likelihood sensitivity with
+       power-scaling*, 2022, https://arxiv.org/abs/2107.14054
+    """
+    dataset = extract(data, var_names=var_names, filter_vars=filter_vars, group="posterior")
+    if coords is None:
+        dataset = dataset.sel(coords)
+    if component == "likelihood":
+        component_draws = _get_log_likelihood(data, var_name=component_var_names, single_var=False)
+    elif component == "prior":
+        component_draws = _get_log_prior(data, var_names=component_var_names)
+    else:
+        raise ValueError("Value for `component` argument not recognized")
+    component_draws = component_draws.stack(__sample__=("chain", "draw"))
+    if component_coords is None:
+        component_draws = component_draws.sel(component_coords)
+    if isinstance(component_draws, xr.DataArray):
+        component_draws = component_draws.to_dataset()
+    if len(component_draws.dims):
+        component_draws = component_draws.to_stacked_array(
+            "latent-obs_var", sample_dims=("__sample__",)
+        ).sum("latent-obs_var")
+    # from here component_draws is a 1d object with dimensions (sample,)
+    # calculate lower and upper alpha values
+    lower_alpha = 1 / (1 + delta)
+    upper_alpha = 1 + delta
+    # calculate importance sampling weights for lower and upper alpha power-scaling
+    lower_w = np.exp(_powerscale_lw(component_draws=component_draws, alpha=lower_alpha))
+    lower_w = lower_w / np.sum(lower_w)
+    upper_w = np.exp(_powerscale_lw(component_draws=component_draws, alpha=upper_alpha))
+    upper_w = upper_w / np.sum(upper_w)
+    ufunc_kwargs = {"n_dims": 1, "ravel": False}
+    func_kwargs = {"lower_weights": lower_w.values, "upper_weights": upper_w.values, "delta": delta}
+    # calculate the sensitivity diagnostic based on the importance weights and draws
+    return _wrap_xarray_ufunc(
+        _powerscale_sens,
+        dataset,
+        ufunc_kwargs=ufunc_kwargs,
+        func_kwargs=func_kwargs,
+        dask_kwargs=dask_kwargs,
+        input_core_dims=[["sample"]],
+    )
+def _powerscale_sens(draws, *, lower_weights=None, upper_weights=None, delta=0.01):
+    """
+    Calculate power-scaling sensitivity by finite difference
+    second derivative of CJS
+    """
+    lower_cjs = max(
+        _cjs_dist(draws=draws, weights=lower_weights),
+        _cjs_dist(draws=-1 * draws, weights=lower_weights),
+    )
+    upper_cjs = max(
+        _cjs_dist(draws=draws, weights=upper_weights),
+        _cjs_dist(draws=-1 * draws, weights=upper_weights),
+    )
+    logdiffsquare = 2 * np.log2(1 + delta)
+    grad = (lower_cjs + upper_cjs) / logdiffsquare
+    return grad
+def _powerscale_lw(alpha, component_draws):
+    """
+    Calculate log weights for power-scaling component by alpha.
+    """
+    log_weights = (alpha - 1) * component_draws
+    log_weights = psislw(log_weights)[0]
+    return log_weights
+def _cjs_dist(draws, weights):
+    """
+    Calculate the cumulative Jensen-Shannon distance between original draws and weighted draws.
+    """
+    # sort draws and weights
+    order = np.argsort(draws)
+    draws = draws[order]
+    weights = weights[order]
+    binwidth = np.diff(draws)
+    # ecdfs
+    cdf_p = np.linspace(1 / len(draws), 1 - 1 / len(draws), len(draws) - 1)
+    cdf_q = np.cumsum(weights / np.sum(weights))[:-1]
+    # integrals of ecdfs
+    cdf_p_int = np.dot(cdf_p, binwidth)
+    cdf_q_int = np.dot(cdf_q, binwidth)
+    # cjs calculation
+    pq_numer = np.log2(cdf_p, out=np.zeros_like(cdf_p), where=cdf_p != 0)
+    qp_numer = np.log2(cdf_q, out=np.zeros_like(cdf_q), where=cdf_q != 0)
+    denom = 0.5 * (cdf_p + cdf_q)
+    denom = np.log2(denom, out=np.zeros_like(denom), where=denom != 0)
+    cjs_pq = np.sum(binwidth * (cdf_p * (pq_numer - denom))) + 0.5 / np.log(2) * (
+        cdf_q_int - cdf_p_int
+    )
+    cjs_qp = np.sum(binwidth * (cdf_q * (qp_numer - denom))) + 0.5 / np.log(2) * (
+        cdf_p_int - cdf_q_int
+    )
+    cjs_pq = max(0, cjs_pq)
+    cjs_qp = max(0, cjs_qp)
+    bound = cdf_p_int + cdf_q_int
+    return np.sqrt((cjs_pq + cjs_qp) / bound)

arviz/stats/stats_utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ from ..utils import conditional_jit, conditional_vect, conditional_dask
 from .density_utils import histogram as _histogram
-__all__ = ["autocorr", "autocov", "ELPDData", "make_ufunc", "wrap_xarray_ufunc"]
+__all__ = ["autocorr", "autocov", "ELPDData", "make_ufunc", "smooth_data", "wrap_xarray_ufunc"]
 def autocov(ary, axis=-1):
@@ -409,7 +409,7 @@ def not_valid(ary, check_nan=True, check_shape=True, nan_kwargs=None, shape_kwar
     return nan_error | chain_error | draw_error
-def get_log_likelihood(idata, var_name=None):
+def get_log_likelihood(idata, var_name=None, single_var=True):
     """Retrieve the log likelihood dataarray of a given variable."""
     if (
         not hasattr(idata, "log_likelihood")
@@ -426,9 +426,11 @@ def get_log_likelihood(idata, var_name=None):
     if var_name is None:
         var_names = list(idata.log_likelihood.data_vars)
         if len(var_names) > 1:
-            raise TypeError(
-                f"Found several log likelihood arrays {var_names}, var_name cannot be None"
-            )
+            if single_var:
+                raise TypeError(
+                    f"Found several log likelihood arrays {var_names}, var_name cannot be None"
+                )
+            return idata.log_likelihood[var_names]
         return idata.log_likelihood[var_names[0]]
     else:
         try:
@@ -482,7 +484,7 @@ class ELPDData(pd.Series):  # pylint: disable=too-many-ancestors
             base += "\n\nThere has been a warning during the calculation. Please check the results."
         if kind == "loo" and "pareto_k" in self:
-            bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
+            bins = np.asarray([-np.inf, 0.5, 0.7, 1, np.inf])
             counts, *_ = _histogram(self.pareto_k.values, bins)
             extended = POINTWISE_LOO_FMT.format(max(4, len(str(np.max(counts)))))
             extended = extended.format(
@@ -562,7 +564,25 @@ def _circular_standard_deviation(samples, high=2 * np.pi, low=0, skipna=False, a
 def smooth_data(obs_vals, pp_vals):
-    """Smooth data, helper function for discrete data in plot_pbv, loo_pit and plot_loo_pit."""
+    """Smooth data using a cubic spline.
+    Helper function for discrete data in plot_pbv, loo_pit and plot_loo_pit.
+    Parameters
+    ----------
+    obs_vals : (N) array-like
+        Observed data
+    pp_vals : (S, N) array-like
+        Posterior predictive samples. ``N`` is the number of observations,
+        and ``S`` is the number of samples (generally n_chains*n_draws).
+    Returns
+    -------
+    obs_vals : (N) ndarray
+        Smoothed observed data
+    pp_vals : (S, N) ndarray
+        Smoothed posterior predictive samples
+    """
     x = np.linspace(0, 1, len(obs_vals))
     csi = CubicSpline(x, obs_vals)
     obs_vals = csi(np.linspace(0.01, 0.99, len(obs_vals)))
@@ -572,3 +592,12 @@ def smooth_data(obs_vals, pp_vals):
     pp_vals = csi(np.linspace(0.01, 0.99, pp_vals.shape[1]))
     return obs_vals, pp_vals
+def get_log_prior(idata, var_names=None):
+    """Retrieve the log prior dataarray of a given variable."""
+    if not hasattr(idata, "log_prior"):
+        raise TypeError("log prior not found in inference data object")
+    if var_names is None:
+        var_names = list(idata.log_prior.data_vars)
+    return idata.log_prior[var_names]

arviz/tests/base_tests/test_data.py CHANGED Viewed

@@ -496,7 +496,7 @@ class TestInferenceData:  # pylint: disable=too-many-public-methods
         with pytest.raises(KeyError):
             idata.sel(inplace=False, chain_prior=True, chain=[0, 1, 3])
-    @pytest.mark.parametrize("use", ("del", "delattr"))
+    @pytest.mark.parametrize("use", ("del", "delattr", "delitem"))
     def test_del(self, use):
         # create inference data object
         data = np.random.normal(size=(4, 500, 8))
@@ -523,6 +523,8 @@ class TestInferenceData:  # pylint: disable=too-many-public-methods
         # Use del method
         if use == "del":
             del idata.sample_stats
+        elif use == "delitem":
+            del idata["sample_stats"]
         else:
             delattr(idata, "sample_stats")
@@ -763,6 +765,69 @@ class TestInferenceData:  # pylint: disable=too-many-public-methods
                     )
             assert all(item in test_data.columns for item in ("chain", "draw"))
+    @pytest.mark.parametrize(
+        "kwargs",
+        (
+            {
+                "var_names": ["parameter_1", "parameter_2", "variable_1", "variable_2"],
+                "filter_vars": None,
+                "var_results": [
+                    ("posterior", "parameter_1"),
+                    ("posterior", "parameter_2"),
+                    ("prior", "parameter_1"),
+                    ("prior", "parameter_2"),
+                    ("posterior", "variable_1"),
+                    ("posterior", "variable_2"),
+                ],
+            },
+            {
+                "var_names": "parameter",
+                "filter_vars": "like",
+                "groups": "posterior",
+                "var_results": ["parameter_1", "parameter_2"],
+            },
+            {
+                "var_names": "~parameter",
+                "filter_vars": "like",
+                "groups": "posterior",
+                "var_results": ["variable_1", "variable_2", "custom_name"],
+            },
+            {
+                "var_names": [".+_2$", "custom_name"],
+                "filter_vars": "regex",
+                "groups": "posterior",
+                "var_results": ["parameter_2", "variable_2", "custom_name"],
+            },
+            {
+                "var_names": ["lp"],
+                "filter_vars": "regex",
+                "groups": "sample_stats",
+                "var_results": ["lp"],
+            },
+        ),
+    )
+    def test_to_dataframe_selection(self, kwargs):
+        results = kwargs.pop("var_results")
+        idata = from_dict(
+            posterior={
+                "parameter_1": np.random.randn(4, 100),
+                "parameter_2": np.random.randn(4, 100),
+                "variable_1": np.random.randn(4, 100),
+                "variable_2": np.random.randn(4, 100),
+                "custom_name": np.random.randn(4, 100),
+            },
+            prior={
+                "parameter_1": np.random.randn(4, 100),
+                "parameter_2": np.random.randn(4, 100),
+            },
+            sample_stats={
+                "lp": np.random.randn(4, 100),
+            },
+        )
+        test_data = idata.to_dataframe(**kwargs)
+        assert not test_data.empty
+        assert set(test_data.columns).symmetric_difference(results) == set(["chain", "draw"])
     def test_to_dataframe_bad(self):
         idata = from_dict(
             posterior={"a": np.random.randn(4, 100, 3, 4, 5), "b": np.random.randn(4, 100)},
@@ -781,6 +846,9 @@ class TestInferenceData:  # pylint: disable=too-many-public-methods
         with pytest.raises(KeyError):
             idata.to_dataframe(groups=["invalid_group"])
+        with pytest.raises(ValueError):
+            idata.to_dataframe(var_names=["c"])
     @pytest.mark.parametrize("use", (None, "args", "kwargs"))
     def test_map(self, use):
         idata = load_arviz_data("centered_eight")
@@ -1173,7 +1241,7 @@ class TestDataDict:
         self.check_var_names_coords_dims(inference_data.prior_predictive)
         self.check_var_names_coords_dims(inference_data.sample_stats_prior)
-        pred_dims = inference_data.predictions.dims["school_pred"]
+        pred_dims = inference_data.predictions.sizes["school_pred"]
         assert pred_dims == 8
     def test_inference_data_warmup(self, data, eight_schools_params):
@@ -1518,8 +1586,8 @@ class TestExtractDataset:
         idata = load_arviz_data("centered_eight")
         post = extract(idata, combined=False)
         assert "sample" not in post.dims
-        assert post.dims["chain"] == 4
-        assert post.dims["draw"] == 500
+        assert post.sizes["chain"] == 4
+        assert post.sizes["draw"] == 500
     def test_var_name_group(self):
         idata = load_arviz_data("centered_eight")
@@ -1539,5 +1607,5 @@ class TestExtractDataset:
     def test_subset_samples(self):
         idata = load_arviz_data("centered_eight")
         post = extract(idata, num_samples=10)
-        assert post.dims["sample"] == 10
+        assert post.sizes["sample"] == 10
         assert post.attrs == idata.posterior.attrs

arviz/tests/base_tests/test_plots_bokeh.py CHANGED Viewed

@@ -327,7 +327,6 @@ def test_plot_autocorr_var_names(models, var_names):
     "kwargs", [{"insample_dev": False}, {"plot_standard_error": False}, {"plot_ic_diff": False}]
 )
 def test_plot_compare(models, kwargs):
     model_compare = compare({"Model 1": models.model_1, "Model 2": models.model_2})
     axes = plot_compare(model_compare, backend="bokeh", show=False, **kwargs)

arviz/tests/base_tests/test_plots_matplotlib.py CHANGED Viewed

@@ -9,6 +9,7 @@ import pytest
 from matplotlib import animation
 from pandas import DataFrame
 from scipy.stats import gaussian_kde, norm
+import xarray as xr
 from ...data import from_dict, load_arviz_data
 from ...plots import (
@@ -732,6 +733,28 @@ def test_plot_ppc(models, kind, alpha, animated, observed, observed_rug):
     assert axes
+def test_plot_ppc_transposed():
+    idata = load_arviz_data("rugby")
+    idata.map(
+        lambda ds: ds.assign(points=xr.concat((ds.home_points, ds.away_points), "field")),
+        groups="observed_vars",
+        inplace=True,
+    )
+    assert idata.posterior_predictive.points.dims == ("field", "chain", "draw", "match")
+    ax = plot_ppc(
+        idata,
+        kind="scatter",
+        var_names="points",
+        flatten=["field"],
+        coords={"match": ["Wales Italy"]},
+        random_seed=3,
+        num_pp_samples=8,
+    )
+    x, y = ax.get_lines()[2].get_data()
+    assert not np.isclose(y[0], 0)
+    assert np.all(np.array([40, 43, 10, 9]) == x)
 @pytest.mark.parametrize("kind", ["kde", "cumulative", "scatter"])
 @pytest.mark.parametrize("jitter", [None, 0, 0.1, 1, 3])
 @pytest.mark.parametrize("animated", [False, True])
@@ -1898,7 +1921,7 @@ def test_plot_ts(kwargs):
         dims={"y": ["obs_dim"], "z": ["pred_dim"]},
     )
-    ax = plot_ts(idata=idata, y="y", show=True, **kwargs)
+    ax = plot_ts(idata=idata, y="y", **kwargs)
     assert np.all(ax)

arviz/tests/base_tests/test_stats.py CHANGED Viewed

@@ -10,8 +10,9 @@ from numpy.testing import (
     assert_array_equal,
 )
 from scipy.special import logsumexp
-from scipy.stats import linregress
+from scipy.stats import linregress, norm, halfcauchy
 from xarray import DataArray, Dataset
+from xarray_einstats.stats import XrContinuousRV
 from ...data import concat, convert_to_inference_data, from_dict, load_arviz_data
 from ...rcparams import rcParams
@@ -22,6 +23,7 @@ from ...stats import (
     hdi,
     loo,
     loo_pit,
+    psens,
     psislw,
     r2_score,
     summary,
@@ -829,3 +831,43 @@ def test_weight_predictions():
     assert_almost_equal(new.posterior_predictive["a"].mean(), 0, decimal=1)
     new = weight_predictions([idata0, idata1], weights=[0.9, 0.1])
     assert_almost_equal(new.posterior_predictive["a"].mean(), -0.8, decimal=1)
+@pytest.fixture(scope="module")
+def psens_data():
+    non_centered_eight = load_arviz_data("non_centered_eight")
+    post = non_centered_eight.posterior
+    log_prior = {
+        "mu": XrContinuousRV(norm, 0, 5).logpdf(post["mu"]),
+        "tau": XrContinuousRV(halfcauchy, scale=5).logpdf(post["tau"]),
+        "theta_t": XrContinuousRV(norm, 0, 1).logpdf(post["theta_t"]),
+    }
+    non_centered_eight.add_groups({"log_prior": log_prior})
+    return non_centered_eight
+@pytest.mark.parametrize("component", ("prior", "likelihood"))
+def test_priorsens_global(psens_data, component):
+    result = psens(psens_data, component=component)
+    assert "mu" in result
+    assert "theta" in result
+    assert "school" in result.theta_t.dims
+def test_priorsens_var_names(psens_data):
+    result1 = psens(
+        psens_data, component="prior", component_var_names=["mu", "tau"], var_names=["mu", "tau"]
+    )
+    result2 = psens(psens_data, component="prior", var_names=["mu", "tau"])
+    for result in (result1, result2):
+        assert "theta" not in result
+        assert "mu" in result
+        assert "tau" in result
+    assert not np.isclose(result1.mu, result2.mu)
+def test_priorsens_coords(psens_data):
+    result = psens(psens_data, component="likelihood", component_coords={"school": "Choate"})
+    assert "mu" in result
+    assert "theta" in result
+    assert "school" in result.theta_t.dims

arviz 0.16.1__py3-none-any.whl → 0.17.1__py3-none-any.whl

arviz 0.16.1py3-none-any.whl → 0.17.1py3-none-any.whl