PyPI - arviz - Versions diffs - 0.16.1__py3-none-any.whl → 0.17.1__py3-none-any.whl - Mend

arviz 0.16.1py3-none-any.whl → 0.17.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arviz/__init__.py +1 -1
arviz/data/inference_data.py +34 -7
arviz/data/io_beanmachine.py +6 -1
arviz/data/io_cmdstanpy.py +439 -50
arviz/data/io_pyjags.py +5 -2
arviz/data/io_pystan.py +1 -2
arviz/labels.py +2 -0
arviz/plots/backends/bokeh/bpvplot.py +7 -2
arviz/plots/backends/bokeh/compareplot.py +7 -4
arviz/plots/backends/bokeh/densityplot.py +0 -1
arviz/plots/backends/bokeh/distplot.py +0 -2
arviz/plots/backends/bokeh/forestplot.py +3 -5
arviz/plots/backends/bokeh/kdeplot.py +0 -2
arviz/plots/backends/bokeh/pairplot.py +0 -4
arviz/plots/backends/matplotlib/bfplot.py +0 -1
arviz/plots/backends/matplotlib/bpvplot.py +3 -3
arviz/plots/backends/matplotlib/compareplot.py +1 -1
arviz/plots/backends/matplotlib/dotplot.py +1 -1
arviz/plots/backends/matplotlib/forestplot.py +2 -4
arviz/plots/backends/matplotlib/kdeplot.py +0 -1
arviz/plots/backends/matplotlib/khatplot.py +0 -1
arviz/plots/backends/matplotlib/lmplot.py +4 -5
arviz/plots/backends/matplotlib/pairplot.py +0 -1
arviz/plots/backends/matplotlib/ppcplot.py +8 -5
arviz/plots/backends/matplotlib/traceplot.py +1 -2
arviz/plots/bfplot.py +7 -6
arviz/plots/bpvplot.py +7 -2
arviz/plots/compareplot.py +2 -2
arviz/plots/ecdfplot.py +37 -112
arviz/plots/elpdplot.py +1 -1
arviz/plots/essplot.py +2 -2
arviz/plots/kdeplot.py +0 -1
arviz/plots/pairplot.py +1 -1
arviz/plots/plot_utils.py +0 -1
arviz/plots/ppcplot.py +51 -45
arviz/plots/separationplot.py +0 -1
arviz/stats/__init__.py +2 -0
arviz/stats/density_utils.py +2 -2
arviz/stats/diagnostics.py +2 -3
arviz/stats/ecdf_utils.py +165 -0
arviz/stats/stats.py +241 -38
arviz/stats/stats_utils.py +36 -7
arviz/tests/base_tests/test_data.py +73 -5
arviz/tests/base_tests/test_plots_bokeh.py +0 -1
arviz/tests/base_tests/test_plots_matplotlib.py +24 -1
arviz/tests/base_tests/test_stats.py +43 -1
arviz/tests/base_tests/test_stats_ecdf_utils.py +153 -0
arviz/tests/base_tests/test_stats_utils.py +3 -3
arviz/tests/external_tests/test_data_beanmachine.py +2 -0
arviz/tests/external_tests/test_data_numpyro.py +3 -3
arviz/tests/external_tests/test_data_pyjags.py +3 -1
arviz/tests/external_tests/test_data_pyro.py +3 -3
arviz/tests/helpers.py +8 -8
arviz/utils.py +15 -7
arviz/wrappers/wrap_pymc.py +1 -1
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/METADATA +16 -15
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/RECORD +60 -58
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/WHEEL +1 -1
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/LICENSE +0 -0
{arviz-0.16.1.dist-info → arviz-0.17.1.dist-info}/top_level.txt +0 -0

arviz/plots/kdeplot.py CHANGED Viewed

@@ -256,7 +256,6 @@ def plot_kde(
         )
     if values2 is None:
         if bw == "default":
             bw = "taylor" if is_circular else "experimental"

arviz/plots/pairplot.py CHANGED Viewed

@@ -229,7 +229,7 @@ def plot_pair(
             )
     if gridsize == "auto":
-        gridsize = int(dataset.dims["draw"] ** 0.35)
+        gridsize = int(dataset.sizes["draw"] ** 0.35)
     numvars = len(flat_var_names)

arviz/plots/plot_utils.py CHANGED Viewed

@@ -364,7 +364,6 @@ def calculate_point_estimate(point_estimate, values, bw="default", circular=Fals
         else:
             point_value = int(mode(values).mode)
     elif point_estimate == "median":
         point_value = np.nanmedian(values) if skipna else np.median(values)
     return point_value

arviz/plots/ppcplot.py CHANGED Viewed

@@ -19,7 +19,7 @@ def plot_ppc(
     kind="kde",
     alpha=None,
     mean=True,
-    observed=True,
+    observed=None,
     observed_rug=False,
     color=None,
     colors=None,
@@ -50,37 +50,35 @@ def plot_ppc(
     Parameters
     ----------
-    data: az.InferenceData object
+    data : InferenceData
         :class:`arviz.InferenceData` object containing the observed and posterior/prior
         predictive data.
-    kind: str
-        Type of plot to display ("kde", "cumulative", or "scatter"). Defaults to `kde`.
-    alpha: float
+    kind : str, default "kde"
+        Type of plot to display ("kde", "cumulative", or "scatter").
+    alpha : float, optional
         Opacity of posterior/prior predictive density curves.
         Defaults to 0.2 for ``kind = kde`` and cumulative, for scatter defaults to 0.7.
-    mean: bool
+    mean : bool, default True
         Whether or not to plot the mean posterior/prior predictive distribution.
-        Defaults to ``True``.
-    observed: bool, default True
-        Whether or not to plot the observed data.
-    observed_rug: bool, default False
+    observed : bool, optional
+        Whether or not to plot the observed data. Defaults to True for ``group = posterior``
+        and False for ``group = prior``.
+    observed_rug : bool, default False
         Whether or not to plot a rug plot for the observed data. Only valid if `observed` is
         `True` and for kind `kde` or `cumulative`.
-    color: str
-        Valid matplotlib ``color``. Defaults to ``C0``.
-    color: list
+    color : list, optional
         List with valid matplotlib colors corresponding to the posterior/prior predictive
         distribution, observed data and mean of the posterior/prior predictive distribution.
         Defaults to ["C0", "k", "C1"].
-    grid : tuple
+    grid : tuple, optional
         Number of rows and columns. Defaults to None, the rows and columns are
         automatically inferred.
-    figsize: tuple
+    figsize : tuple, optional
         Figure size. If None, it will be defined automatically.
-    textsize: float
+    textsize : float, optional
         Text size scaling factor for labels, titles and lines. If None, it will be
         autoscaled based on ``figsize``.
-    data_pairs: dict
+    data_pairs : dict, optional
         Dictionary containing relations between observed data and posterior/prior predictive data.
         Dictionary structure:
@@ -90,84 +88,86 @@ def plot_ppc(
         For example, ``data_pairs = {'y' : 'y_hat'}``
         If None, it will assume that the observed data and the posterior/prior
         predictive data have the same variable name.
-    var_names: list of variable names
+    var_names : list of str, optional
         Variables to be plotted, if `None` all variable are plotted. Prefix the
         variables by ``~`` when you want to exclude them from the plot.
-    filter_vars: {None, "like", "regex"}, optional, default=None
+    filter_vars : {None, "like", "regex"}, default None
         If `None` (default), interpret var_names as the real variables names. If "like",
         interpret var_names as substrings of the real variables names. If "regex",
         interpret var_names as regular expressions on the real variables names. A la
         ``pandas.filter``.
-    coords: dict
+    coords : dict, optional
         Dictionary mapping dimensions to selected coordinates to be plotted.
         Dimensions without a mapping specified will include all coordinates for
         that dimension. Defaults to including all coordinates for all
         dimensions if None.
-    flatten: list
+    flatten : list
         List of dimensions to flatten in ``observed_data``. Only flattens across the coordinates
         specified in the ``coords`` argument. Defaults to flattening all of the dimensions.
-    flatten_pp: list
+    flatten_pp : list
         List of dimensions to flatten in posterior_predictive/prior_predictive. Only flattens
         across the coordinates specified in the ``coords`` argument. Defaults to flattening all
         of the dimensions. Dimensions should match flatten excluding dimensions for ``data_pairs``
         parameters. If ``flatten`` is defined and ``flatten_pp`` is None, then
         ``flatten_pp = flatten``.
-    num_pp_samples: int
+    num_pp_samples : int
         The number of posterior/prior predictive samples to plot. For ``kind`` = 'scatter' and
         ``animation = False`` if defaults to a maximum of 5 samples and will set jitter to 0.7.
         unless defined. Otherwise it defaults to all provided samples.
-    random_seed: int
+    random_seed : int
         Random number generator seed passed to ``numpy.random.seed`` to allow
         reproducibility of the plot. By default, no seed will be provided
         and the plot will change each call if a random sample is specified
         by ``num_pp_samples``.
-    jitter: float
+    jitter : float, default 0
         If ``kind`` is "scatter", jitter will add random uniform noise to the height
-        of the ppc samples and observed data. By default 0.
-    animated: bool
+        of the ppc samples and observed data.
+    animated : bool, default False
         Create an animation of one posterior/prior predictive sample per frame.
-        Defaults to ``False``. Only works with matploblib backend.
+        Only works with matploblib backend.
         To run animations inside a notebook you have to use the `nbAgg` matplotlib's backend.
         Try with `%matplotlib notebook` or  `%matplotlib  nbAgg`. You can switch back to the
         default matplotlib's backend with `%matplotlib  inline` or `%matplotlib  auto`.
         If switching back and forth between matplotlib's backend, you may need to run twice the cell
         with the animation.
         If you experience problems rendering the animation try setting
-        `animation_kwargs({'blit':False}`) or changing the matplotlib's backend (e.g. to TkAgg)
-        If you run the animation from a script write `ax, ani = az.plot_ppc(.)`
+        ``animation_kwargs({'blit':False})`` or changing the matplotlib's backend (e.g. to TkAgg)
+        If you run the animation from a script write ``ax, ani = az.plot_ppc(.)``
     animation_kwargs : dict
         Keywords passed to  :class:`matplotlib.animation.FuncAnimation`. Ignored with
         matplotlib backend.
-    legend : bool
-        Add legend to figure. By default ``True``.
-    labeller : labeller instance, optional
+    legend : bool, default True
+        Add legend to figure.
+    labeller : labeller, optional
         Class providing the method ``make_pp_label`` to generate the labels in the plot titles.
         Read the :ref:`label_guide` for more details and usage examples.
-    ax: numpy array-like of matplotlib axes or bokeh figures, optional
+    ax : numpy array-like of matplotlib_axes or bokeh figures, optional
         A 2D array of locations into which to plot the densities. If not supplied, Arviz will create
         its own array of plot areas (and return it).
-    backend: str, optional
+    backend : str, optional
         Select plotting backend {"matplotlib","bokeh"}. Default to "matplotlib".
-    backend_kwargs: bool, optional
+    backend_kwargs : dict, optional
         These are kwargs specific to the backend being used, passed to
         :func:`matplotlib.pyplot.subplots` or :func:`bokeh.plotting.figure`.
         For additional documentation check the plotting method of the backend.
-    group: {"prior", "posterior"}, optional
+    group : {"prior", "posterior"}, optional
         Specifies which InferenceData group should be plotted. Defaults to 'posterior'.
         Other value can be 'prior'.
-    show: bool, optional
+    show : bool, optional
         Call backend show function.
     Returns
     -------
-    axes: matplotlib axes or bokeh figures
+    axes : matplotlib_axes or bokeh_figures
+    ani : matplotlib.animation.FuncAnimation, optional
+        Only provided if `animated` is ``True``.
     See Also
     --------
-    plot_bpv: Plot Bayesian p-value for observed data and Posterior/Prior predictive.
-    plot_lm: Posterior predictive and mean plots for regression-like data.
-    plot_ppc: plot for posterior/prior predictive checks.
-    plot_ts: Plot timeseries data.
+    plot_bpv : Plot Bayesian p-value for observed data and Posterior/Prior predictive.
+    plot_loo_pit : Plot for posterior predictive checks using cross validation.
+    plot_lm : Posterior predictive and mean plots for regression-like data.
+    plot_ts : Plot timeseries data.
     Examples
     --------
@@ -254,8 +254,12 @@ def plot_ppc(
     if group == "posterior":
         predictive_dataset = data.posterior_predictive
+        if observed is None:
+            observed = True
     elif group == "prior":
         predictive_dataset = data.prior_predictive
+        if observed is None:
+            observed = False
     if var_names is None:
         var_names = list(observed_data.data_vars)
@@ -265,11 +269,11 @@ def plot_ppc(
     if flatten_pp is None:
         if flatten is None:
-            flatten_pp = list(predictive_dataset.dims.keys())
+            flatten_pp = list(predictive_dataset.dims)
         else:
             flatten_pp = flatten
     if flatten is None:
-        flatten = list(observed_data.dims.keys())
+        flatten = list(observed_data.dims)
     if coords is None:
         coords = {}
@@ -308,6 +312,7 @@ def plot_ppc(
                 skip_dims=set(flatten),
                 var_names=var_names,
                 combined=True,
+                dim_order=["chain", "draw"],
             )
         ),
         "plot_ppc",
@@ -322,6 +327,7 @@ def plot_ppc(
                 var_names=pp_var_names,
                 skip_dims=set(flatten_pp),
                 combined=True,
+                dim_order=["chain", "draw"],
             ),
         )
     ]

arviz/plots/separationplot.py CHANGED Viewed

@@ -110,7 +110,6 @@ def plot_separation(
             )
     else:
         if y_hat is None and isinstance(y, str):
             label_y_hat = y
             y_hat = y

arviz/stats/__init__.py CHANGED Viewed

@@ -28,7 +28,9 @@ __all__ = [
     "autocorr",
     "autocov",
     "make_ufunc",
+    "smooth_data",
     "wrap_xarray_ufunc",
     "reloo",
     "_calculate_ics",
+    "psens",
 ]

arviz/stats/density_utils.py CHANGED Viewed

@@ -231,8 +231,8 @@ def _fixed_point(t, N, k_sq, a_sq):
        Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
        Ann. Statist. 38 (2010), no. 5, 2916--2957.
     """
-    k_sq = np.asfarray(k_sq, dtype=np.float64)
-    a_sq = np.asfarray(a_sq, dtype=np.float64)
+    k_sq = np.asarray(k_sq, dtype=np.float64)
+    a_sq = np.asarray(a_sq, dtype=np.float64)
     l = 7
     f = np.sum(np.power(k_sq, l) * a_sq * np.exp(-k_sq * np.pi**2 * t))

arviz/stats/diagnostics.py CHANGED Viewed

@@ -457,10 +457,10 @@ def ks_summary(pareto_tail_indices):
     """
     _numba_flag = Numba.numba_flag
     if _numba_flag:
-        bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
+        bins = np.asarray([-np.inf, 0.5, 0.7, 1, np.inf])
         kcounts, *_ = _histogram(pareto_tail_indices, bins)
     else:
-        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.Inf, 0.5, 0.7, 1, np.Inf])
+        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.inf, 0.5, 0.7, 1, np.inf])
     kprop = kcounts / len(pareto_tail_indices) * 100
     df_k = pd.DataFrame(
         dict(_=["(good)", "(ok)", "(bad)", "(very bad)"], Count=kcounts, Pct=kprop)
@@ -889,7 +889,6 @@ def _mc_error(ary, batches=5, circular=False):
     """
     _numba_flag = Numba.numba_flag
     if ary.ndim > 1:
         dims = np.shape(ary)
         trace = np.transpose([t.ravel() for t in ary])

arviz/stats/ecdf_utils.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Functions for evaluating ECDFs and their confidence bands."""
+from typing import Any, Callable, Optional, Tuple
+import warnings
+import numpy as np
+from scipy.stats import uniform, binom
+def compute_ecdf(sample: np.ndarray, eval_points: np.ndarray) -> np.ndarray:
+    """Compute ECDF of the sorted `sample` at the evaluation points."""
+    return np.searchsorted(sample, eval_points, side="right") / len(sample)
+def _get_ecdf_points(
+    sample: np.ndarray, eval_points: np.ndarray, difference: bool
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the coordinates for the ecdf points using compute_ecdf."""
+    x = eval_points
+    y = compute_ecdf(sample, eval_points)
+    if not difference and y[0] > 0:
+        x = np.insert(x, 0, x[0])
+        y = np.insert(y, 0, 0)
+    return x, y
+def _simulate_ecdf(
+    ndraws: int,
+    eval_points: np.ndarray,
+    rvs: Callable[[int, Optional[Any]], np.ndarray],
+    random_state: Optional[Any] = None,
+) -> np.ndarray:
+    """Simulate ECDF at the `eval_points` using the given random variable sampler"""
+    sample = rvs(ndraws, random_state=random_state)
+    sample.sort()
+    return compute_ecdf(sample, eval_points)
+def _fit_pointwise_band_probability(
+    ndraws: int,
+    ecdf_at_eval_points: np.ndarray,
+    cdf_at_eval_points: np.ndarray,
+) -> float:
+    """Compute the smallest marginal probability of a pointwise confidence band that
+    contains the ECDF."""
+    ecdf_scaled = (ndraws * ecdf_at_eval_points).astype(int)
+    prob_lower_tail = np.amin(binom.cdf(ecdf_scaled, ndraws, cdf_at_eval_points))
+    prob_upper_tail = np.amin(binom.sf(ecdf_scaled - 1, ndraws, cdf_at_eval_points))
+    prob_pointwise = 1 - 2 * min(prob_lower_tail, prob_upper_tail)
+    return prob_pointwise
+def _get_pointwise_confidence_band(
+    prob: float, ndraws: int, cdf_at_eval_points: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the `prob`-level pointwise confidence band."""
+    count_lower, count_upper = binom.interval(prob, ndraws, cdf_at_eval_points)
+    prob_lower = count_lower / ndraws
+    prob_upper = count_upper / ndraws
+    return prob_lower, prob_upper
+def ecdf_confidence_band(
+    ndraws: int,
+    eval_points: np.ndarray,
+    cdf_at_eval_points: np.ndarray,
+    prob: float = 0.95,
+    method="simulated",
+    **kwargs,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the `prob`-level confidence band for the ECDF.
+    Arguments
+    ---------
+    ndraws : int
+        Number of samples in the original dataset.
+    eval_points : np.ndarray
+        Points at which the ECDF is evaluated. If these are dependent on the sample
+        values, simultaneous confidence bands may not be correctly calibrated.
+    cdf_at_eval_points : np.ndarray
+        CDF values at the evaluation points.
+    prob : float, default 0.95
+        The target probability that a true ECDF lies within the confidence band.
+    method : string, default "simulated"
+        The method used to compute the confidence band. Valid options are:
+        - "pointwise": Compute the pointwise (i.e. marginal) confidence band.
+        - "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence band.
+          `rvs` must be provided.
+    rvs: callable, optional
+        A function that takes an integer `ndraws` and optionally the object passed to
+        `random_state` and returns an array of `ndraws` samples from the same distribution
+        as the original dataset. Required if `method` is "simulated" and variable is discrete.
+    num_trials : int, default 1000
+        The number of random ECDFs to generate for constructing simultaneous confidence bands
+        (if `method` is "simulated").
+    random_state : {None, int, `numpy.random.Generator`,
+                    `numpy.random.RandomState`}, optional
+        If `None`, the `numpy.random.RandomState` singleton is used. If an `int`, a new
+        ``numpy.random.RandomState`` instance is used, seeded with seed. If a `RandomState` or
+        `Generator` instance, the instance is used.
+    Returns
+    -------
+    prob_lower : np.ndarray
+        Lower confidence band for the ECDF at the evaluation points.
+    prob_upper : np.ndarray
+        Upper confidence band for the ECDF at the evaluation points.
+    """
+    if not 0 < prob < 1:
+        raise ValueError(f"Invalid value for `prob`. Expected 0 < prob < 1, but got {prob}.")
+    if method == "pointwise":
+        prob_pointwise = prob
+    elif method == "simulated":
+        prob_pointwise = _simulate_simultaneous_ecdf_band_probability(
+            ndraws, eval_points, cdf_at_eval_points, prob=prob, **kwargs
+        )
+    else:
+        raise ValueError(f"Unknown method {method}. Valid options are 'pointwise' or 'simulated'.")
+    prob_lower, prob_upper = _get_pointwise_confidence_band(
+        prob_pointwise, ndraws, cdf_at_eval_points
+    )
+    return prob_lower, prob_upper
+def _simulate_simultaneous_ecdf_band_probability(
+    ndraws: int,
+    eval_points: np.ndarray,
+    cdf_at_eval_points: np.ndarray,
+    prob: float = 0.95,
+    rvs: Optional[Callable[[int, Optional[Any]], np.ndarray]] = None,
+    num_trials: int = 1000,
+    random_state: Optional[Any] = None,
+) -> float:
+    """Estimate probability for simultaneous confidence band using simulation.
+    This function simulates the pointwise probability needed to construct pointwise
+    confidence bands that form a `prob`-level confidence envelope for the ECDF
+    of a sample.
+    """
+    if rvs is None:
+        warnings.warn(
+            "Assuming variable is continuous for calibration of pointwise bands. "
+            "If the variable is discrete, specify random variable sampler `rvs`.",
+            UserWarning,
+        )
+        # if variable continuous, we can calibrate the confidence band using a uniform
+        # distribution
+        rvs = uniform(0, 1).rvs
+        eval_points_sim = cdf_at_eval_points
+    else:
+        eval_points_sim = eval_points
+    probs_pointwise = np.empty(num_trials)
+    for i in range(num_trials):
+        ecdf_at_eval_points = _simulate_ecdf(
+            ndraws, eval_points_sim, rvs, random_state=random_state
+        )
+        prob_pointwise = _fit_pointwise_band_probability(
+            ndraws, ecdf_at_eval_points, cdf_at_eval_points
+        )
+        probs_pointwise[i] = prob_pointwise
+    return np.quantile(probs_pointwise, prob)

arviz 0.16.1__py3-none-any.whl → 0.17.1__py3-none-any.whl

arviz 0.16.1py3-none-any.whl → 0.17.1py3-none-any.whl