PyPI - arviz - Versions diffs - 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

arviz 0.21.0py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

arviz/__init__.py +8 -3
arviz/data/inference_data.py +37 -19
arviz/data/io_datatree.py +2 -2
arviz/data/io_numpyro.py +112 -4
arviz/plots/autocorrplot.py +12 -2
arviz/plots/backends/bokeh/hdiplot.py +7 -6
arviz/plots/backends/bokeh/lmplot.py +19 -3
arviz/plots/backends/bokeh/pairplot.py +18 -48
arviz/plots/backends/matplotlib/khatplot.py +8 -1
arviz/plots/backends/matplotlib/lmplot.py +13 -7
arviz/plots/backends/matplotlib/pairplot.py +14 -22
arviz/plots/kdeplot.py +4 -4
arviz/plots/lmplot.py +41 -14
arviz/plots/pairplot.py +10 -3
arviz/stats/density_utils.py +1 -1
arviz/stats/stats.py +19 -7
arviz/tests/base_tests/test_data.py +0 -4
arviz/tests/base_tests/test_plots_bokeh.py +60 -2
arviz/tests/base_tests/test_plots_matplotlib.py +77 -1
arviz/tests/base_tests/test_stats.py +42 -1
arviz/tests/external_tests/test_data_numpyro.py +130 -3
arviz/wrappers/base.py +1 -1
arviz/wrappers/wrap_stan.py +1 -1
{arviz-0.21.0.dist-info → arviz-0.22.0.dist-info}/METADATA +7 -7
{arviz-0.21.0.dist-info → arviz-0.22.0.dist-info}/RECORD +28 -28
{arviz-0.21.0.dist-info → arviz-0.22.0.dist-info}/LICENSE +0 -0
{arviz-0.21.0.dist-info → arviz-0.22.0.dist-info}/WHEEL +0 -0
{arviz-0.21.0.dist-info → arviz-0.22.0.dist-info}/top_level.txt +0 -0

arviz/plots/backends/matplotlib/pairplot.py CHANGED Viewed

@@ -30,6 +30,8 @@ def plot_pair(
     diverging_mask,
     divergences_kwargs,
     flat_var_names,
+    flat_ref_slices,
+    flat_var_labels,
     backend_kwargs,
     marginal_kwargs,
     show,
@@ -77,24 +79,12 @@ def plot_pair(
         kde_kwargs["contour_kwargs"].setdefault("colors", "k")
     if reference_values:
-        reference_values_copy = {}
-        label = []
-        for variable in list(reference_values.keys()):
-            if " " in variable:
-                variable_copy = variable.replace(" ", "\n", 1)
-            else:
-                variable_copy = variable
-            label.append(variable_copy)
-            reference_values_copy[variable_copy] = reference_values[variable]
-        difference = set(flat_var_names).difference(set(label))
+        difference = set(flat_var_names).difference(set(reference_values.keys()))
         if difference:
-            warn = [diff.replace("\n", " ", 1) for diff in difference]
             warnings.warn(
                 "Argument reference_values does not include reference value for: {}".format(
-                    ", ".join(warn)
+                    ", ".join(difference)
                 ),
                 UserWarning,
             )
@@ -211,12 +201,12 @@ def plot_pair(
         if reference_values:
             ax.plot(
-                reference_values_copy[flat_var_names[0]],
-                reference_values_copy[flat_var_names[1]],
+                np.array(reference_values[flat_var_names[0]])[flat_ref_slices[0]],
+                np.array(reference_values[flat_var_names[1]])[flat_ref_slices[1]],
                 **reference_values_kwargs,
             )
-        ax.set_xlabel(f"{flat_var_names[0]}", fontsize=ax_labelsize, wrap=True)
-        ax.set_ylabel(f"{flat_var_names[1]}", fontsize=ax_labelsize, wrap=True)
+        ax.set_xlabel(f"{flat_var_labels[0]}", fontsize=ax_labelsize, wrap=True)
+        ax.set_ylabel(f"{flat_var_labels[1]}", fontsize=ax_labelsize, wrap=True)
         ax.tick_params(labelsize=xt_labelsize)
     else:
@@ -336,20 +326,22 @@ def plot_pair(
                         y_name = flat_var_names[j + not_marginals]
                         if (x_name not in difference) and (y_name not in difference):
                             ax[j, i].plot(
-                                reference_values_copy[x_name],
-                                reference_values_copy[y_name],
+                                np.array(reference_values[x_name])[flat_ref_slices[i]],
+                                np.array(reference_values[y_name])[
+                                    flat_ref_slices[j + not_marginals]
+                                ],
                                 **reference_values_kwargs,
                             )
                 if j != vars_to_plot - 1:
                     plt.setp(ax[j, i].get_xticklabels(), visible=False)
                 else:
-                    ax[j, i].set_xlabel(f"{flat_var_names[i]}", fontsize=ax_labelsize, wrap=True)
+                    ax[j, i].set_xlabel(f"{flat_var_labels[i]}", fontsize=ax_labelsize, wrap=True)
                 if i != 0:
                     plt.setp(ax[j, i].get_yticklabels(), visible=False)
                 else:
                     ax[j, i].set_ylabel(
-                        f"{flat_var_names[j + not_marginals]}",
+                        f"{flat_var_labels[j + not_marginals]}",
                         fontsize=ax_labelsize,
                         wrap=True,
                     )

arviz/plots/kdeplot.py CHANGED Viewed

@@ -255,6 +255,10 @@ def plot_kde(
             "or plot_pair instead of plot_kde"
         )
+    if backend is None:
+        backend = rcParams["plot.backend"]
+    backend = backend.lower()
     if values2 is None:
         if bw == "default":
             bw = "taylor" if is_circular else "experimental"
@@ -346,10 +350,6 @@ def plot_kde(
         **kwargs,
     )
-    if backend is None:
-        backend = rcParams["plot.backend"]
-    backend = backend.lower()
     # TODO: Add backend kwargs
     plot = get_plotting_function("plot_kde", "kdeplot", backend)
     ax = plot(**kde_plot_args)

arviz/plots/lmplot.py CHANGED Viewed

@@ -300,20 +300,47 @@ def plot_lm(
     # Filter out the required values to generate plotters
     if y_model is not None:
         if kind_model == "lines":
-            y_model = y_model.stack(__sample__=("chain", "draw"))[..., pp_sample_ix]
-        y_model = [
-            tup
-            for _, tup in zip(
-                range(len_y),
-                xarray_var_iter(
-                    y_model,
-                    skip_dims=set(y_model.dims),
-                    combined=True,
-                ),
-            )
-        ]
-        y_model = _repeat_flatten_list(y_model, len_x)
+            var_name = y_model.name if y_model.name else "y_model"
+            data = y_model.values
+            total_samples = data.shape[0] * data.shape[1]
+            data = data.reshape(total_samples, *data.shape[2:])
+            if pp_sample_ix is not None:
+                data = data[pp_sample_ix]
+            if plot_dim is not None:
+                # For plot_dim case, transpose to get dimension first
+                data = data.transpose(1, 0, 2)[..., 0]
+            # Create plotter tuple(s)
+            if plot_dim is not None:
+                y_model = [(var_name, {}, {}, data) for _ in range(length_plotters)]
+            else:
+                y_model = [(var_name, {}, {}, data)]
+                y_model = _repeat_flatten_list(y_model, len_x)
+        elif kind_model == "hdi":
+            var_name = y_model.name if y_model.name else "y_model"
+            data = y_model.values
+            if plot_dim is not None:
+                # First transpose to get plot_dim first
+                data = data.transpose(2, 0, 1, 3)
+                # For plot_dim case, we just want HDI for first dimension
+                data = data[..., 0]
+                # Reshape to (samples, points)
+                data = data.transpose(1, 2, 0).reshape(-1, data.shape[0])
+                y_model = [(var_name, {}, {}, data) for _ in range(length_plotters)]
+            else:
+                data = data.reshape(-1, data.shape[-1])
+                y_model = [(var_name, {}, {}, data)]
+                y_model = _repeat_flatten_list(y_model, len_x)
+        if len(y_model) == 1:
+            y_model = _repeat_flatten_list(y_model, len_x)
     rows, cols = default_grid(length_plotters)

arviz/plots/pairplot.py CHANGED Viewed

@@ -196,9 +196,14 @@ def plot_pair(
             get_coords(dataset, coords), var_names=var_names, skip_dims=combine_dims, combined=True
         )
     )
-    flat_var_names = [
-        labeller.make_label_vert(var_name, sel, isel) for var_name, sel, isel, _ in plotters
-    ]
+    flat_var_names = []
+    flat_ref_slices = []
+    flat_var_labels = []
+    for var_name, sel, isel, _ in plotters:
+        dims = [dim for dim in dataset[var_name].dims if dim not in ["chain", "draw"]]
+        flat_var_names.append(var_name)
+        flat_ref_slices.append(tuple(isel[dim] if dim in isel else slice(None) for dim in dims))
+        flat_var_labels.append(labeller.make_label_vert(var_name, sel, isel))
     divergent_data = None
     diverging_mask = None
@@ -253,6 +258,8 @@ def plot_pair(
         diverging_mask=diverging_mask,
         divergences_kwargs=divergences_kwargs,
         flat_var_names=flat_var_names,
+        flat_ref_slices=flat_ref_slices,
+        flat_var_labels=flat_var_labels,
         backend_kwargs=backend_kwargs,
         marginal_kwargs=marginal_kwargs,
         show=show,

arviz/stats/density_utils.py CHANGED Viewed

@@ -635,7 +635,7 @@ def _kde_circular(
     cumulative: bool, optional
         Whether return the PDF or the cumulative PDF. Defaults to False.
     grid_len: int, optional
-        The number of intervals used to bin the data pointa i.e. the length of the grid used in the
+        The number of intervals used to bin the data point i.e. the length of the grid used in the
         estimation. Defaults to 512.
     """
     # All values between -pi and pi

arviz/stats/stats.py CHANGED Viewed

@@ -869,7 +869,7 @@ def loo(data, pointwise=None, var_name=None, reff=None, scale=None):
     )
-def psislw(log_weights, reff=1.0):
+def psislw(log_weights, reff=1.0, normalize=True):
     """
     Pareto smoothed importance sampling (PSIS).
@@ -887,11 +887,13 @@ def psislw(log_weights, reff=1.0):
         Array of size (n_observations, n_samples)
     reff : float, default 1
         relative MCMC efficiency, ``ess / n``
+    normalize : bool, default True
+        return normalized log weights
     Returns
     -------
     lw_out : DataArray or (..., N) ndarray
-        Smoothed, truncated and normalized log weights.
+        Smoothed, truncated and possibly normalized log weights.
     kss : DataArray or (...) ndarray
         Estimates of the shape parameter *k* of the generalized Pareto
         distribution.
@@ -936,7 +938,12 @@ def psislw(log_weights, reff=1.0):
     out = np.empty_like(log_weights), np.empty(shape)
     # define kwargs
-    func_kwargs = {"cutoff_ind": cutoff_ind, "cutoffmin": cutoffmin, "out": out}
+    func_kwargs = {
+        "cutoff_ind": cutoff_ind,
+        "cutoffmin": cutoffmin,
+        "out": out,
+        "normalize": normalize,
+    }
     ufunc_kwargs = {"n_dims": 1, "n_output": 2, "ravel": False, "check_shape": False}
     kwargs = {"input_core_dims": [["__sample__"]], "output_core_dims": [["__sample__"], []]}
     log_weights, pareto_shape = _wrap_xarray_ufunc(
@@ -953,7 +960,7 @@ def psislw(log_weights, reff=1.0):
     return log_weights, pareto_shape
-def _psislw(log_weights, cutoff_ind, cutoffmin):
+def _psislw(log_weights, cutoff_ind, cutoffmin, normalize):
     """
     Pareto smoothed importance sampling (PSIS) for a 1D vector.
@@ -963,7 +970,7 @@ def _psislw(log_weights, cutoff_ind, cutoffmin):
         Array of length n_observations
     cutoff_ind: int
     cutoffmin: float
-    k_min: float
+    normalize: bool
     Returns
     -------
@@ -975,7 +982,8 @@ def _psislw(log_weights, cutoff_ind, cutoffmin):
     x = np.asarray(log_weights)
     # improve numerical accuracy
-    x -= np.max(x)
+    max_x = np.max(x)
+    x -= max_x
     # sort the array
     x_sort_ind = np.argsort(x)
     # divide log weights into body and right tail
@@ -1007,8 +1015,12 @@ def _psislw(log_weights, cutoff_ind, cutoffmin):
             x[tailinds[x_tail_si]] = smoothed_tail
             # truncate smoothed values to the largest raw weight 0
             x[x > 0] = 0
     # renormalize weights
-    x -= _logsumexp(x)
+    if normalize:
+        x -= _logsumexp(x)
+    else:
+        x += max_x
     return x, k

arviz/tests/base_tests/test_data.py CHANGED Viewed

@@ -1501,10 +1501,6 @@ class TestJSON:
         assert not os.path.exists(filepath)
-@pytest.mark.skipif(
-    not (importlib.util.find_spec("datatree") or "ARVIZ_REQUIRE_ALL_DEPS" in os.environ),
-    reason="test requires xarray-datatree library",
-)
 class TestDataTree:
     def test_datatree(self):
         idata = load_arviz_data("centered_eight")

arviz/tests/base_tests/test_plots_bokeh.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pandas import DataFrame  # pylint: disable=wrong-import-position
 from scipy.stats import norm  # pylint: disable=wrong-import-position
 from ...data import from_dict, load_arviz_data  # pylint: disable=wrong-import-position
+from ...labels import MapLabeller  # pylint: disable=wrong-import-position
 from ...plots import (  # pylint: disable=wrong-import-position
     plot_autocorr,
     plot_bpv,
@@ -773,7 +774,6 @@ def test_plot_mcse_no_divergences(models):
         {"divergences": True, "var_names": ["theta", "mu"]},
         {"kind": "kde", "var_names": ["theta"]},
         {"kind": "hexbin", "var_names": ["theta"]},
-        {"kind": "hexbin", "var_names": ["theta"]},
         {
             "kind": "hexbin",
             "var_names": ["theta"],
@@ -785,6 +785,21 @@ def test_plot_mcse_no_divergences(models):
             "reference_values": {"mu": 0, "tau": 0},
             "reference_values_kwargs": {"line_color": "blue"},
         },
+        {
+            "var_names": ["mu", "tau"],
+            "reference_values": {"mu": 0, "tau": 0},
+            "labeller": MapLabeller({"mu": r"$\mu$", "theta": r"$\theta"}),
+        },
+        {
+            "var_names": ["theta"],
+            "reference_values": {"theta": [0.0] * 8},
+            "labeller": MapLabeller({"theta": r"$\theta$"}),
+        },
+        {
+            "var_names": ["theta"],
+            "reference_values": {"theta": np.zeros(8)},
+            "labeller": MapLabeller({"theta": r"$\theta$"}),
+        },
     ],
 )
 def test_plot_pair(models, kwargs):
@@ -1201,7 +1216,7 @@ def test_plot_dot_rotated(continuous_model, kwargs):
         },
     ],
 )
-def test_plot_lm(models, kwargs):
+def test_plot_lm_1d(models, kwargs):
     """Test functionality for 1D data."""
     idata = models.model_1
     if "constant_data" not in idata.groups():
@@ -1228,3 +1243,46 @@ def test_plot_lm_list():
     """Test the plots when input data is list or ndarray."""
     y = [1, 2, 3, 4, 5]
     assert plot_lm(y=y, x=np.arange(len(y)), show=False, backend="bokeh")
+def generate_lm_1d_data():
+    rng = np.random.default_rng()
+    return from_dict(
+        observed_data={"y": rng.normal(size=7)},
+        posterior_predictive={"y": rng.normal(size=(4, 1000, 7)) / 2},
+        posterior={"y_model": rng.normal(size=(4, 1000, 7))},
+        dims={"y": ["dim1"]},
+        coords={"dim1": range(7)},
+    )
+def generate_lm_2d_data():
+    rng = np.random.default_rng()
+    return from_dict(
+        observed_data={"y": rng.normal(size=(5, 7))},
+        posterior_predictive={"y": rng.normal(size=(4, 1000, 5, 7)) / 2},
+        posterior={"y_model": rng.normal(size=(4, 1000, 5, 7))},
+        dims={"y": ["dim1", "dim2"]},
+        coords={"dim1": range(5), "dim2": range(7)},
+    )
+@pytest.mark.parametrize("data", ("1d", "2d"))
+@pytest.mark.parametrize("kind", ("lines", "hdi"))
+@pytest.mark.parametrize("use_y_model", (True, False))
+def test_plot_lm(data, kind, use_y_model):
+    if data == "1d":
+        idata = generate_lm_1d_data()
+    else:
+        idata = generate_lm_2d_data()
+    kwargs = {"idata": idata, "y": "y", "kind_model": kind, "backend": "bokeh", "show": False}
+    if data == "2d":
+        kwargs["plot_dim"] = "dim1"
+    if use_y_model:
+        kwargs["y_model"] = "y_model"
+    if kind == "lines":
+        kwargs["num_samples"] = 50
+    ax = plot_lm(**kwargs)
+    assert ax is not None

arviz/tests/base_tests/test_plots_matplotlib.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pandas import DataFrame
 from scipy.stats import gaussian_kde, norm
 from ...data import from_dict, load_arviz_data
+from ...labels import MapLabeller
 from ...plots import (
     plot_autocorr,
     plot_bf,
@@ -599,6 +600,21 @@ def test_plot_kde_inference_data(models):
             "reference_values": {"mu": 0, "tau": 0},
             "reference_values_kwargs": {"c": "C0", "marker": "*"},
         },
+        {
+            "var_names": ["mu", "tau"],
+            "reference_values": {"mu": 0, "tau": 0},
+            "labeller": MapLabeller({"mu": r"$\mu$", "theta": r"$\theta"}),
+        },
+        {
+            "var_names": ["theta"],
+            "reference_values": {"theta": [0.0] * 8},
+            "labeller": MapLabeller({"theta": r"$\theta$"}),
+        },
+        {
+            "var_names": ["theta"],
+            "reference_values": {"theta": np.zeros(8)},
+            "labeller": MapLabeller({"theta": r"$\theta$"}),
+        },
     ],
 )
 def test_plot_pair(models, kwargs):
@@ -1914,7 +1930,7 @@ def test_wilkinson_algorithm(continuous_model):
         },
     ],
 )
-def test_plot_lm(models, kwargs):
+def test_plot_lm_1d(models, kwargs):
     """Test functionality for 1D data."""
     idata = models.model_1
     if "constant_data" not in idata.groups():
@@ -2102,3 +2118,63 @@ def test_plot_bf():
     )
     _, bf_plot = plot_bf(idata, var_name="a", ref_val=0)
     assert bf_plot is not None
+def generate_lm_1d_data():
+    rng = np.random.default_rng()
+    return from_dict(
+        observed_data={"y": rng.normal(size=7)},
+        posterior_predictive={"y": rng.normal(size=(4, 1000, 7)) / 2},
+        posterior={"y_model": rng.normal(size=(4, 1000, 7))},
+        dims={"y": ["dim1"]},
+        coords={"dim1": range(7)},
+    )
+def generate_lm_2d_data():
+    rng = np.random.default_rng()
+    return from_dict(
+        observed_data={"y": rng.normal(size=(5, 7))},
+        posterior_predictive={"y": rng.normal(size=(4, 1000, 5, 7)) / 2},
+        posterior={"y_model": rng.normal(size=(4, 1000, 5, 7))},
+        dims={"y": ["dim1", "dim2"]},
+        coords={"dim1": range(5), "dim2": range(7)},
+    )
+@pytest.mark.parametrize("data", ("1d", "2d"))
+@pytest.mark.parametrize("kind", ("lines", "hdi"))
+@pytest.mark.parametrize("use_y_model", (True, False))
+def test_plot_lm(data, kind, use_y_model):
+    if data == "1d":
+        idata = generate_lm_1d_data()
+    else:
+        idata = generate_lm_2d_data()
+    kwargs = {"idata": idata, "y": "y", "kind_model": kind}
+    if data == "2d":
+        kwargs["plot_dim"] = "dim1"
+    if use_y_model:
+        kwargs["y_model"] = "y_model"
+    if kind == "lines":
+        kwargs["num_samples"] = 50
+    ax = plot_lm(**kwargs)
+    assert ax is not None
+@pytest.mark.parametrize(
+    "coords, expected_vars",
+    [
+        ({"school": ["Choate"]}, ["theta"]),
+        ({"school": ["Lawrenceville"]}, ["theta"]),
+        ({}, ["theta"]),
+    ],
+)
+def test_plot_autocorr_coords(coords, expected_vars):
+    """Test plot_autocorr with coords kwarg."""
+    idata = load_arviz_data("centered_eight")
+    axes = plot_autocorr(idata, var_names=expected_vars, coords=coords, show=False)
+    assert axes is not None

arviz/tests/base_tests/test_stats.py CHANGED Viewed

@@ -14,7 +14,7 @@ from scipy.stats import linregress, norm, halfcauchy
 from xarray import DataArray, Dataset
 from xarray_einstats.stats import XrContinuousRV
-from ...data import concat, convert_to_inference_data, from_dict, load_arviz_data
+from ...data import concat, convert_to_inference_data, from_dict, load_arviz_data, InferenceData
 from ...rcparams import rcParams
 from ...stats import (
     apply_test_function,
@@ -882,3 +882,44 @@ def test_bayes_factor():
     bf_dict1 = bayes_factor(idata, prior=np.random.normal(0, 10, 5000), var_name="a", ref_val=0)
     assert bf_dict0["BF10"] > bf_dict0["BF01"]
     assert bf_dict1["BF10"] < bf_dict1["BF01"]
+def test_compare_sorting_consistency():
+    chains, draws = 4, 1000
+    # Model 1 - good fit
+    log_lik1 = np.random.normal(-2, 1, size=(chains, draws))
+    posterior1 = Dataset(
+        {"theta": (("chain", "draw"), np.random.normal(0, 1, size=(chains, draws)))},
+        coords={"chain": range(chains), "draw": range(draws)},
+    )
+    log_like1 = Dataset(
+        {"y": (("chain", "draw"), log_lik1)},
+        coords={"chain": range(chains), "draw": range(draws)},
+    )
+    data1 = InferenceData(posterior=posterior1, log_likelihood=log_like1)
+    # Model 2 - poor fit (higher variance)
+    log_lik2 = np.random.normal(-5, 2, size=(chains, draws))
+    posterior2 = Dataset(
+        {"theta": (("chain", "draw"), np.random.normal(0, 1, size=(chains, draws)))},
+        coords={"chain": range(chains), "draw": range(draws)},
+    )
+    log_like2 = Dataset(
+        {"y": (("chain", "draw"), log_lik2)},
+        coords={"chain": range(chains), "draw": range(draws)},
+    )
+    data2 = InferenceData(posterior=posterior2, log_likelihood=log_like2)
+    # Compare models in different orders
+    comp_dict1 = {"M1": data1, "M2": data2}
+    comp_dict2 = {"M2": data2, "M1": data1}
+    comparison1 = compare(comp_dict1, method="bb-pseudo-bma")
+    comparison2 = compare(comp_dict2, method="bb-pseudo-bma")
+    assert comparison1.index.tolist() == comparison2.index.tolist()
+    se1 = comparison1["se"].values
+    se2 = comparison2["se"].values
+    np.testing.assert_array_almost_equal(se1, se2)

arviz 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

arviz 0.21.0py3-none-any.whl → 0.22.0py3-none-any.whl