PyPI - arviz - Versions diffs - 0.18.0__py3-none-any.whl → 0.20.0__py3-none-any.whl - Mend

arviz 0.18.0py3-none-any.whl → 0.20.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

arviz/__init__.py +2 -1
arviz/data/base.py +18 -7
arviz/data/converters.py +7 -3
arviz/data/inference_data.py +8 -0
arviz/data/io_cmdstan.py +4 -0
arviz/data/io_numpyro.py +1 -1
arviz/plots/backends/bokeh/ecdfplot.py +1 -2
arviz/plots/backends/bokeh/khatplot.py +8 -3
arviz/plots/backends/bokeh/pairplot.py +2 -6
arviz/plots/backends/matplotlib/ecdfplot.py +1 -2
arviz/plots/backends/matplotlib/khatplot.py +7 -3
arviz/plots/backends/matplotlib/traceplot.py +1 -1
arviz/plots/bpvplot.py +2 -2
arviz/plots/compareplot.py +4 -4
arviz/plots/densityplot.py +1 -1
arviz/plots/dotplot.py +2 -2
arviz/plots/ecdfplot.py +213 -89
arviz/plots/essplot.py +2 -2
arviz/plots/forestplot.py +3 -3
arviz/plots/hdiplot.py +2 -2
arviz/plots/kdeplot.py +9 -2
arviz/plots/khatplot.py +23 -6
arviz/plots/loopitplot.py +2 -2
arviz/plots/mcseplot.py +3 -1
arviz/plots/plot_utils.py +2 -4
arviz/plots/posteriorplot.py +1 -1
arviz/plots/rankplot.py +2 -2
arviz/plots/violinplot.py +1 -1
arviz/preview.py +17 -0
arviz/rcparams.py +27 -2
arviz/stats/diagnostics.py +13 -9
arviz/stats/ecdf_utils.py +168 -10
arviz/stats/stats.py +41 -20
arviz/stats/stats_utils.py +8 -6
arviz/tests/base_tests/test_data.py +11 -2
arviz/tests/base_tests/test_data_zarr.py +0 -1
arviz/tests/base_tests/test_diagnostics_numba.py +2 -7
arviz/tests/base_tests/test_helpers.py +2 -2
arviz/tests/base_tests/test_plot_utils.py +5 -13
arviz/tests/base_tests/test_plots_matplotlib.py +95 -2
arviz/tests/base_tests/test_rcparams.py +12 -0
arviz/tests/base_tests/test_stats.py +1 -1
arviz/tests/base_tests/test_stats_ecdf_utils.py +15 -2
arviz/tests/base_tests/test_stats_numba.py +2 -7
arviz/tests/base_tests/test_utils_numba.py +2 -5
arviz/tests/external_tests/test_data_pystan.py +5 -5
arviz/tests/helpers.py +17 -9
arviz/utils.py +4 -0
{arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/METADATA +23 -19
{arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/RECORD +53 -52
{arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/WHEEL +1 -1
{arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/LICENSE +0 -0
{arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/top_level.txt +0 -0

arviz/plots/ecdfplot.py CHANGED Viewed

@@ -1,24 +1,32 @@
 """Plot ecdf or ecdf-difference plot with confidence bands."""
+import warnings
 import numpy as np
 from scipy.stats import uniform
+try:
+    from scipy.stats import ecdf as scipy_ecdf
+except ImportError:
+    scipy_ecdf = None
 from ..rcparams import rcParams
-from ..stats.ecdf_utils import compute_ecdf, ecdf_confidence_band, _get_ecdf_points
+from ..stats.ecdf_utils import ecdf_confidence_band, _get_ecdf_points
+from ..utils import BehaviourChangeWarning
 from .plot_utils import get_plotting_function
 def plot_ecdf(
     values,
     values2=None,
+    eval_points=None,
     cdf=None,
     difference=False,
-    pit=False,
-    confidence_bands=None,
-    pointwise=False,
-    npoints=100,
+    confidence_bands=False,
+    ci_prob=None,
     num_trials=500,
-    fpr=0.05,
+    rvs=None,
+    random_state=None,
     figsize=None,
     fill_band=True,
     plot_kwargs=None,
@@ -28,15 +36,19 @@ def plot_ecdf(
     show=None,
     backend=None,
     backend_kwargs=None,
+    npoints=100,
+    pointwise=False,
+    fpr=None,
+    pit=False,
     **kwargs,
 ):
     r"""Plot ECDF or ECDF-Difference Plot with Confidence bands.
-    Plots of the empirical CDF estimates of an array. When `values2` argument is provided,
-    the two empirical CDFs are overlaid with the distribution of `values` on top
-    (in a darker shade) and confidence bands in a more transparent shade. Optionally, the difference
-    between the two empirical CDFs can be computed, and the PIT for a single dataset or a comparison
-    between two samples.
+    Plots of the empirical cumulative distribution function (ECDF) of an array. Optionally, A `cdf`
+    argument representing a reference CDF may be provided for comparison using a difference ECDF
+    plot and/or confidence bands.
+    Alternatively, the PIT for a single dataset may be visualized.
     Notes
     -----
@@ -47,26 +59,40 @@ def plot_ecdf(
     values : array-like
         Values to plot from an unknown continuous or discrete distribution.
     values2 : array-like, optional
-        Values to compare to the original sample.
+        values to compare to the original sample.
+        .. deprecated:: 0.18.0
+           Instead use ``cdf=scipy.stats.ecdf(values2).cdf.evaluate``.
     cdf : callable, optional
         Cumulative distribution function of the distribution to compare the original sample.
         The function must take as input a numpy array of draws from the distribution.
     difference : bool, default False
         If True then plot ECDF-difference plot otherwise ECDF plot.
-    pit : bool, default False
-        If True plots the ECDF or ECDF-diff of PIT of sample.
-    confidence_bands : bool, default None
-        If True plots the simultaneous or pointwise confidence bands with `1 - fpr`
-        confidence level.
-    pointwise : bool, default False
-        If True plots pointwise confidence bands otherwise simultaneous bands.
-    npoints : int, default 100
-        This denotes the granularity size of our plot i.e the number of evaluation points
-        for the ecdf or ecdf-difference plots.
+    confidence_bands : str or bool
+        - False: No confidence bands are plotted (default).
+        - True: Plot bands computed with the default algorithm (subject to change)
+        - "pointwise": Compute the pointwise (i.e. marginal) confidence band.
+        - "optimized": Use optimization to estimate a simultaneous confidence band.
+        - "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence
+          band.
+        For simultaneous confidence bands to be correctly calibrated, provide `eval_points` that
+        are not dependent on the `values`.
+    ci_prob : float, default 0.94
+        The probability that the true ECDF lies within the confidence band. If `confidence_bands`
+        is "pointwise", this is the marginal probability instead of the joint probability.
+    eval_points : array-like, optional
+        The points at which to evaluate the ECDF. If None, `npoints` uniformly spaced points
+        between the data bounds will be used.
+    rvs: callable, optional
+        A function that takes an integer `ndraws` and optionally the object passed to
+        `random_state` and returns an array of `ndraws` samples from the same distribution
+        as the original dataset. Required if `method` is "simulated" and variable is discrete.
+    random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
     num_trials : int, default 500
-        The number of random ECDFs to generate for constructing simultaneous confidence bands.
-    fpr : float, default 0.05
-        The type I error rate s.t `1 - fpr` denotes the confidence level of bands.
+        The number of random ECDFs to generate for constructing simultaneous confidence bands
+        (if `confidence_bands` is "simulated").
     figsize : (float,float), optional
         Figure size. If `None` it will be defined automatically.
     fill_band : bool, default True
@@ -91,6 +117,26 @@ def plot_ecdf(
         These are kwargs specific to the backend being used, passed to
         :func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`.
         For additional documentation check the plotting method of the backend.
+    npoints : int, default 100
+        The number of evaluation points for the ecdf or ecdf-difference plots, if `eval_points` is
+        not provided or `pit` is `True`.
+        .. deprecated:: 0.18.0
+           Instead specify ``eval_points=np.linspace(np.min(values), np.max(values), npoints)``
+           unless `pit` is `True`.
+    pointwise : bool, default False
+        .. deprecated:: 0.18.0
+           Instead use `confidence_bands="pointwise"`.
+    fpr : float, optional
+        .. deprecated:: 0.18.0
+           Instead use `ci_prob=1-fpr`.
+    pit : bool, default False
+        If True plots the ECDF or ECDF-diff of PIT of sample.
+        .. deprecated:: 0.18.0
+           See below example instead.
     Returns
     -------
@@ -98,135 +144,213 @@ def plot_ecdf(
     References
     ----------
-    .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A., 2021. Graphical Test for
+    .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A. (2022). Graphical Test for
         Discrete Uniformity and its Applications in Goodness of Fit Evaluation and
-        Multiple Sample Comparison. arXiv preprint arXiv:2103.10522.
+        Multiple Sample Comparison. Statistics and Computing, 32(32).
     Examples
     --------
-    Plot ecdf plot for a given sample
+    In a future release, the default behaviour of ``plot_ecdf`` will change.
+    To maintain the original behaviour you should do:
     .. plot::
         :context: close-figs
         >>> import arviz as az
-        >>> from scipy.stats import uniform, binom, norm
+        >>> import numpy as np
+        >>> from scipy.stats import uniform, norm
+        >>>
         >>> sample = norm(0,1).rvs(1000)
-        >>> az.plot_ecdf(sample)
+        >>> npoints = 100
+        >>> az.plot_ecdf(sample, eval_points=np.linspace(sample.min(), sample.max(), npoints))
-    Plot ecdf plot with confidence bands for comparing a given sample w.r.t a given distribution
+    However, seeing this warning isn't an indicator of anything being wrong,
+    if you are happy to get different behaviour as ArviZ improves and adds
+    new algorithms you can ignore it like so:
     .. plot::
         :context: close-figs
-        >>> distribution = norm(0,1)
-        >>> az.plot_ecdf(sample, cdf = distribution.cdf, confidence_bands = True)
+        >>> import warnings
+        >>> warnings.filterwarnings("ignore", category=az.utils.BehaviourChangeWarning)
-    Plot ecdf-difference plot with confidence bands for comparing a given sample
-    w.r.t a given distribution
+    Plot an ECDF plot for a given sample evaluated at the sample points. This will become
+    the new behaviour when `eval_points` is not provided:
     .. plot::
         :context: close-figs
-        >>> az.plot_ecdf(sample, cdf = distribution.cdf,
-        >>>              confidence_bands = True, difference = True)
+        >>> az.plot_ecdf(sample, eval_points=np.unique(sample))
-    Plot ecdf plot with confidence bands for PIT of sample for comparing a given sample
-    w.r.t a given distribution
+    Plot an ECDF plot with confidence bands for comparing a given sample to a given distribution.
+    We manually specify evaluation points independent of the values so that the confidence bands
+    are correctly calibrated.
     .. plot::
         :context: close-figs
-        >>> az.plot_ecdf(sample, cdf = distribution.cdf,
-        >>>              confidence_bands = True, pit = True)
+        >>> distribution = norm(0,1)
+        >>> eval_points = np.linspace(*distribution.ppf([0.001, 0.999]), 100)
+        >>> az.plot_ecdf(
+        >>>     sample, eval_points=eval_points,
+        >>>     cdf=distribution.cdf, confidence_bands=True
+        >>> )
-    Plot ecdf-difference plot with confidence bands for PIT of sample for comparing a given
-    sample w.r.t a given distribution
+    Plot an ECDF-difference plot with confidence bands for comparing a given sample
+    to a given distribution.
     .. plot::
         :context: close-figs
-        >>> az.plot_ecdf(sample, cdf = distribution.cdf,
-        >>>              confidence_bands = True, difference = True, pit = True)
+        >>> az.plot_ecdf(
+        >>>     sample, cdf=distribution.cdf,
+        >>>     confidence_bands=True, difference=True
+        >>> )
-    You could also plot the above w.r.t another sample rather than a given distribution.
-    For eg: Plot ecdf-difference plot with confidence bands for PIT of sample for
-    comparing a given sample w.r.t a given sample
+    Plot an ECDF plot with confidence bands for the probability integral transform (PIT) of a
+    continuous sample. If drawn from the reference distribution, the PIT values should be uniformly
+    distributed.
     .. plot::
         :context: close-figs
-        >>> sample2 = norm(0,1).rvs(5000)
-        >>> az.plot_ecdf(sample, sample2, confidence_bands = True, difference = True, pit = True)
-    """
-    if confidence_bands is None:
-        confidence_bands = (values2 is not None) or (cdf is not None)
-    if values2 is None and cdf is None and confidence_bands is True:
-        raise ValueError("For confidence bands you need to specify values2 or the cdf")
+        >>> pit_vals = distribution.cdf(sample)
+        >>> uniform_dist = uniform(0, 1)
+        >>> az.plot_ecdf(
+        >>>     pit_vals, cdf=uniform_dist.cdf, confidence_bands=True,
+        >>> )
-    if cdf is not None and values2 is not None:
-        raise ValueError("To compare sample you need either cdf or values2 and not both")
+    Plot an ECDF-difference plot of PIT values.
-    if values2 is None and cdf is None and pit is True:
-        raise ValueError("For PIT specify either cdf or values2")
+    .. plot::
+        :context: close-figs
-    if values2 is None and cdf is None and difference is True:
-        raise ValueError("For ECDF difference plot need either cdf or values2")
+        >>> az.plot_ecdf(
+        >>>     pit_vals, cdf = uniform_dist.cdf, confidence_bands = True,
+        >>>     difference = True
+        >>> )
+    """
+    if confidence_bands is True:
+        if pointwise:
+            warnings.warn(
+                "`pointwise` has been deprecated. Use `confidence_bands='pointwise'` instead.",
+                FutureWarning,
+            )
+            confidence_bands = "pointwise"
+        else:
+            confidence_bands = "auto"
+        # if pointwise specified, confidence_bands must be a bool or 'pointwise'
+    elif confidence_bands not in [False, "pointwise"] and pointwise:
+        raise ValueError(
+            f"Cannot specify both `confidence_bands='{confidence_bands}'` and `pointwise=True`"
+        )
+    if fpr is not None:
+        warnings.warn(
+            "`fpr` has been deprecated. Use `ci_prob=1-fpr` or set `rcParam['stats.ci_prob']` to"
+            "`1-fpr`.",
+            FutureWarning,
+        )
+        if ci_prob is not None:
+            raise ValueError("Cannot specify both `fpr` and `ci_prob`")
+        ci_prob = 1 - fpr
+    if ci_prob is None:
+        ci_prob = rcParams["stats.ci_prob"]
     if values2 is not None:
-        values2 = np.ravel(values2)
-        values2.sort()
+        if cdf is not None:
+            raise ValueError("You cannot specify both `values2` and `cdf`")
+        if scipy_ecdf is None:
+            raise ValueError(
+                "The `values2` argument is deprecated and `scipy.stats.ecdf` is not available. "
+                "Please use `cdf` instead."
+            )
+        warnings.warn(
+            "`values2` has been deprecated. Use `cdf=scipy.stats.ecdf(values2).cdf.evaluate` "
+            "instead.",
+            FutureWarning,
+        )
+        cdf = scipy_ecdf(np.ravel(values2)).cdf.evaluate
+    if cdf is None:
+        if confidence_bands:
+            raise ValueError("For confidence bands you must specify cdf")
+        if difference is True:
+            raise ValueError("For ECDF difference plot you must specify cdf")
+        if pit:
+            raise ValueError("For PIT plot you must specify cdf")
     values = np.ravel(values)
     values.sort()
     if pit:
-        eval_points = np.linspace(1 / npoints, 1, npoints)
-        if cdf:
-            sample = cdf(values)
-        else:
-            sample = compute_ecdf(values2, values) / len(values2)
-        cdf_at_eval_points = eval_points
+        warnings.warn(
+            "`pit` has been deprecated. Specify `values=cdf(values)` instead.",
+            FutureWarning,
+        )
+        values = cdf(values)
+        cdf = uniform(0, 1).cdf
         rvs = uniform(0, 1).rvs
-    else:
+        eval_points = np.linspace(1 / npoints, 1, npoints)
+    if eval_points is None:
+        warnings.warn(
+            "In future versions, if `eval_points` is not provided, then the ECDF will be evaluated"
+            " at the unique values of the sample. To keep the current behavior, provide "
+            "`eval_points` explicitly.",
+            BehaviourChangeWarning,
+        )
+        if confidence_bands in ["optimized", "simulated"]:
+            warnings.warn(
+                "For simultaneous bands to be correctly calibrated, specify `eval_points` "
+                "independent of the `values`"
+            )
         eval_points = np.linspace(values[0], values[-1], npoints)
-        sample = values
-        if confidence_bands or difference:
-            if cdf:
-                cdf_at_eval_points = cdf(eval_points)
-            else:
-                cdf_at_eval_points = compute_ecdf(values2, eval_points)
-        else:
-            cdf_at_eval_points = np.zeros_like(eval_points)
-        rvs = None
+    else:
+        eval_points = np.asarray(eval_points)
-    x_coord, y_coord = _get_ecdf_points(sample, eval_points, difference)
+    if difference or confidence_bands:
+        cdf_at_eval_points = cdf(eval_points)
+    else:
+        cdf_at_eval_points = np.zeros_like(eval_points)
+    x_coord, y_coord = _get_ecdf_points(values, eval_points, difference)
     if difference:
         y_coord -= cdf_at_eval_points
     if confidence_bands:
         ndraws = len(values)
-        band_kwargs = {"prob": 1 - fpr, "num_trials": num_trials, "rvs": rvs, "random_state": None}
-        band_kwargs["method"] = "pointwise" if pointwise else "simulated"
-        lower, higher = ecdf_confidence_band(ndraws, eval_points, cdf_at_eval_points, **band_kwargs)
+        if confidence_bands == "auto":
+            if ndraws < 200 or num_trials >= 250 * np.sqrt(ndraws):
+                confidence_bands = "optimized"
+            else:
+                confidence_bands = "simulated"
+        x_bands = eval_points
+        lower, higher = ecdf_confidence_band(
+            ndraws,
+            eval_points,
+            cdf_at_eval_points,
+            method=confidence_bands,
+            prob=ci_prob,
+            num_trials=num_trials,
+            rvs=rvs,
+            random_state=random_state,
+        )
         if difference:
             lower -= cdf_at_eval_points
             higher -= cdf_at_eval_points
     else:
-        lower, higher = None, None
+        x_bands, lower, higher = None, None, None
     ecdf_plot_args = dict(
         x_coord=x_coord,
         y_coord=y_coord,
-        x_bands=eval_points,
+        x_bands=x_bands,
         lower=lower,
         higher=higher,
-        confidence_bands=confidence_bands,
         figsize=figsize,
         fill_band=fill_band,
         plot_kwargs=plot_kwargs,

arviz/plots/essplot.py CHANGED Viewed

@@ -138,9 +138,9 @@ def plot_ess(
     References
     ----------
-    .. [1] Vehtari et al. (2019). Rank-normalization, folding, and
+    .. [1] Vehtari et al. (2021). Rank-normalization, folding, and
         localization: An improved Rhat for assessing convergence of
-        MCMC https://arxiv.org/abs/1903.08008
+        MCMC. Bayesian analysis, 16(2):667-718.
     Examples
     --------

arviz/plots/forestplot.py CHANGED Viewed

@@ -55,8 +55,8 @@ def plot_forest(
         Specify the kind of plot:
         * The ``kind="forestplot"`` generates credible intervals, where the central points are the
-          estimated posterior means, the thick lines are the central quartiles, and the thin lines
-          represent the :math:`100\times`(`hdi_prob`)% highest density intervals.
+          estimated posterior median, the thick lines are the central quartiles, and the thin lines
+          represent the :math:`100\times(hdi\_prob)\%` highest density intervals.
         * The ``kind="ridgeplot"`` option generates density plots (kernel density estimate or
           histograms) in the same graph. Ridge plots can be configured to have different overlap,
           truncation bounds and quantile markers.
@@ -246,7 +246,7 @@ def plot_forest(
         width_ratios.append(1)
     if hdi_prob is None:
-        hdi_prob = rcParams["stats.hdi_prob"]
+        hdi_prob = rcParams["stats.ci_prob"]
     elif not 1 >= hdi_prob > 0:
         raise ValueError("The value of hdi_prob should be in the interval (0, 1]")

arviz/plots/hdiplot.py CHANGED Viewed

@@ -42,7 +42,7 @@ def plot_hdi(
     hdi_data : array_like, optional
         Precomputed HDI values to use. Assumed shape is ``(*x.shape, 2)``.
     hdi_prob : float, optional
-        Probability for the highest density interval. Defaults to ``stats.hdi_prob`` rcParam.
+        Probability for the highest density interval. Defaults to ``stats.ci_prob`` rcParam.
         See :ref:`this section <common_ hdi_prob>` for usage examples.
     color : str, default "C1"
         Color used for the limits of the HDI and fill. Should be a valid matplotlib color.
@@ -155,7 +155,7 @@ def plot_hdi(
     else:
         y = np.asarray(y)
         if hdi_prob is None:
-            hdi_prob = rcParams["stats.hdi_prob"]
+            hdi_prob = rcParams["stats.ci_prob"]
         elif not 1 >= hdi_prob > 0:
             raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
         hdi_data = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False, **hdi_kwargs)

arviz/plots/kdeplot.py CHANGED Viewed

@@ -72,7 +72,7 @@ def plot_kde(
         If True plot the 2D KDE using contours, otherwise plot a smooth 2D KDE.
     hdi_probs : list, optional
         Plots highest density credibility regions for the provided probabilities for a 2D KDE.
-        Defaults to matplotlib chosen levels with no fixed probability associated.
+        Defaults to [0.5, 0.8, 0.94].
     fill_last : bool, default False
         If True fill the last contour of the 2D KDE plot.
     figsize : (float, float), optional
@@ -270,6 +270,9 @@ def plot_kde(
         gridsize = (128, 128) if contour else (256, 256)
         density, xmin, xmax, ymin, ymax = _fast_kde_2d(values, values2, gridsize=gridsize)
+        if hdi_probs is None:
+            hdi_probs = [0.5, 0.8, 0.94]
         if hdi_probs is not None:
             # Check hdi probs are within bounds (0, 1)
             if min(hdi_probs) <= 0 or max(hdi_probs) >= 1:
@@ -289,7 +292,11 @@ def plot_kde(
                     "Using 'hdi_probs' in favor of 'levels'.",
                     UserWarning,
                 )
-            contour_kwargs["levels"] = contour_level_list
+            if backend == "bokeh":
+                contour_kwargs["levels"] = contour_level_list
+            elif backend == "matplotlib":
+                contour_kwargs["levels"] = contour_level_list[1:]
             contourf_kwargs = _init_kwargs_dict(contourf_kwargs)
             if "levels" in contourf_kwargs:

arviz/plots/khatplot.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Pareto tail indices plot."""
 import logging
+import warnings
 import numpy as np
 from xarray import DataArray
@@ -40,10 +41,8 @@ def plot_khat(
     Parameters
     ----------
-    khats : ELPDData or array-like
-        The input Pareto tail indices to be plotted. It can be an ``ELPDData`` object containing
-        Pareto shapes or an array. In this second case, all the values in the array are interpreted
-        as Pareto tail indices.
+    khats : ELPDData
+        The input Pareto tail indices to be plotted.
     color : str or array_like, default "C0"
         Colors of the scatter plot, if color is a str all dots will have the same color,
         if it is the size of the observations, each dot will have the specified color,
@@ -149,8 +148,9 @@ def plot_khat(
     References
     ----------
-    .. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J.,
-        2019. Pareto Smoothed Importance Sampling. arXiv:1507.02646 [stat].
+    .. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J. (2024).
+        Pareto Smoothed Importance Sampling. Journal of Machine Learning
+        Research, 25(72):1-58.
     """
     if annotate:
@@ -164,13 +164,29 @@ def plot_khat(
         color = "C0"
     if isinstance(khats, np.ndarray):
+        warnings.warn(
+            "support for arrays will be deprecated, please use ELPDData."
+            "The reason for this, is that we need to know the numbers of draws"
+            "sampled from the posterior",
+            FutureWarning,
+        )
         khats = khats.flatten()
         xlabels = False
         legend = False
         dims = []
+        good_k = None
     else:
         if isinstance(khats, ELPDData):
+            good_k = khats.good_k
             khats = khats.pareto_k
+        else:
+            good_k = None
+            warnings.warn(
+                "support for DataArrays will be deprecated, please use ELPDData."
+                "The reason for this, is that we need to know the numbers of draws"
+                "sampled from the posterior",
+                FutureWarning,
+            )
         if not isinstance(khats, DataArray):
             raise ValueError("Incorrect khat data input. Check the documentation")
@@ -191,6 +207,7 @@ def plot_khat(
         figsize=figsize,
         xdata=xdata,
         khats=khats,
+        good_k=good_k,
         kwargs=kwargs,
         threshold=threshold,
         coord_labels=coord_labels,

arviz/plots/loopitplot.py CHANGED Viewed

@@ -55,7 +55,7 @@ def plot_loo_pit(
         In this case, instead of overlaying uniform distributions, the beta ``hdi_prob``
         around the theoretical uniform CDF is shown. This approximation only holds
         for large S and ECDF values not very close to 0 nor 1. For more information, see
-        `Vehtari et al. (2019)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
+        `Vehtari et al. (2021)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
     ecdf_fill : bool, optional
         Use :meth:`matplotlib.axes.Axes.fill_between` to mark the area
         inside the credible interval. Otherwise, plot the
@@ -159,7 +159,7 @@ def plot_loo_pit(
     x_vals = None
     if hdi_prob is None:
-        hdi_prob = rcParams["stats.hdi_prob"]
+        hdi_prob = rcParams["stats.ci_prob"]
     elif not 1 >= hdi_prob > 0:
         raise ValueError("The value of hdi_prob should be in the interval (0, 1]")

arviz/plots/mcseplot.py CHANGED Viewed

@@ -109,7 +109,9 @@ def plot_mcse(
     References
     ----------
-    * Vehtari et al. (2019) see https://arxiv.org/abs/1903.08008
+    .. [1] Vehtari et al. (2021). Rank-normalization, folding, and
+        localization: An improved Rhat for assessing convergence of
+        MCMC. Bayesian analysis, 16(2):667-718.
     Examples
     --------

arviz/plots/plot_utils.py CHANGED Viewed

@@ -245,10 +245,8 @@ def format_coords_as_labels(dataarray, skip_dims=None):
     coord_labels = coord_labels.values
     if isinstance(coord_labels[0], tuple):
         fmt = ", ".join(["{}" for _ in coord_labels[0]])
-        coord_labels[:] = [fmt.format(*x) for x in coord_labels]
-    else:
-        coord_labels[:] = [f"{s}" for s in coord_labels]
-    return coord_labels
+        return np.array([fmt.format(*x) for x in coord_labels])
+    return np.array([f"{s}" for s in coord_labels])
 def set_xticklabels(ax, coord_labels):

arviz/plots/posteriorplot.py CHANGED Viewed

@@ -237,7 +237,7 @@ def plot_posterior(
         labeller = BaseLabeller()
     if hdi_prob is None:
-        hdi_prob = rcParams["stats.hdi_prob"]
+        hdi_prob = rcParams["stats.ci_prob"]
     elif hdi_prob not in (None, "hide"):
         if not 1 >= hdi_prob > 0:
             raise ValueError("The value of hdi_prob should be in the interval (0, 1]")

arviz/plots/rankplot.py CHANGED Viewed

@@ -46,8 +46,8 @@ def plot_rank(
     indicates good mixing of the chains.
     This plot was introduced by Aki Vehtari, Andrew Gelman, Daniel Simpson, Bob Carpenter,
-    Paul-Christian Burkner (2019): Rank-normalization, folding, and localization: An improved R-hat
-    for assessing convergence of MCMC. arXiv preprint https://arxiv.org/abs/1903.08008
+    Paul-Christian Burkner (2021): Rank-normalization, folding, and localization:
+    An improved R-hat for assessing convergence of MCMC. Bayesian analysis, 16(2):667-718.
     Parameters

arviz/plots/violinplot.py CHANGED Viewed

@@ -152,7 +152,7 @@ def plot_violin(
     rows, cols = default_grid(len(plotters), grid=grid)
     if hdi_prob is None:
-        hdi_prob = rcParams["stats.hdi_prob"]
+        hdi_prob = rcParams["stats.ci_prob"]
     elif not 1 >= hdi_prob > 0:
         raise ValueError("The value of hdi_prob should be in the interval (0, 1]")

arviz 0.18.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

arviz 0.18.0py3-none-any.whl → 0.20.0py3-none-any.whl