arviz 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arviz/__init__.py +2 -1
- arviz/data/io_cmdstan.py +4 -0
- arviz/data/io_numpyro.py +1 -1
- arviz/plots/backends/bokeh/ecdfplot.py +1 -2
- arviz/plots/backends/bokeh/khatplot.py +8 -3
- arviz/plots/backends/bokeh/pairplot.py +2 -6
- arviz/plots/backends/matplotlib/ecdfplot.py +1 -2
- arviz/plots/backends/matplotlib/khatplot.py +7 -3
- arviz/plots/backends/matplotlib/traceplot.py +1 -1
- arviz/plots/bpvplot.py +2 -2
- arviz/plots/densityplot.py +1 -1
- arviz/plots/dotplot.py +2 -2
- arviz/plots/ecdfplot.py +205 -89
- arviz/plots/essplot.py +2 -2
- arviz/plots/forestplot.py +1 -1
- arviz/plots/hdiplot.py +2 -2
- arviz/plots/khatplot.py +23 -6
- arviz/plots/loopitplot.py +2 -2
- arviz/plots/mcseplot.py +3 -1
- arviz/plots/plot_utils.py +2 -4
- arviz/plots/posteriorplot.py +1 -1
- arviz/plots/rankplot.py +2 -2
- arviz/plots/violinplot.py +1 -1
- arviz/preview.py +17 -0
- arviz/rcparams.py +27 -2
- arviz/stats/diagnostics.py +13 -9
- arviz/stats/ecdf_utils.py +11 -8
- arviz/stats/stats.py +31 -16
- arviz/stats/stats_utils.py +8 -6
- arviz/tests/base_tests/test_data.py +1 -2
- arviz/tests/base_tests/test_data_zarr.py +0 -1
- arviz/tests/base_tests/test_diagnostics_numba.py +2 -7
- arviz/tests/base_tests/test_helpers.py +2 -2
- arviz/tests/base_tests/test_plot_utils.py +5 -13
- arviz/tests/base_tests/test_plots_matplotlib.py +92 -2
- arviz/tests/base_tests/test_rcparams.py +12 -0
- arviz/tests/base_tests/test_stats.py +1 -1
- arviz/tests/base_tests/test_stats_numba.py +2 -7
- arviz/tests/base_tests/test_utils_numba.py +2 -5
- arviz/tests/external_tests/test_data_pystan.py +5 -5
- arviz/tests/helpers.py +17 -9
- arviz/utils.py +4 -0
- {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/METADATA +8 -4
- {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/RECORD +47 -46
- {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/LICENSE +0 -0
- {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/WHEEL +0 -0
- {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/top_level.txt +0 -0
arviz/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pylint: disable=wildcard-import,invalid-name,wrong-import-position
|
|
2
2
|
"""ArviZ is a library for exploratory analysis of Bayesian models."""
|
|
3
|
-
__version__ = "0.
|
|
3
|
+
__version__ = "0.19.0"
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
@@ -37,6 +37,7 @@ from .stats import *
|
|
|
37
37
|
from .rcparams import rc_context, rcParams
|
|
38
38
|
from .utils import Numba, Dask, interactive_backend
|
|
39
39
|
from .wrappers import *
|
|
40
|
+
from . import preview
|
|
40
41
|
|
|
41
42
|
# add ArviZ's styles to matplotlib's styles
|
|
42
43
|
_arviz_style_path = os.path.join(os.path.dirname(__file__), "plots", "styles")
|
arviz/data/io_cmdstan.py
CHANGED
|
@@ -738,6 +738,7 @@ def _process_configuration(comments):
|
|
|
738
738
|
elif "=" in comment:
|
|
739
739
|
match_int = re.search(r"^(\S+)\s*=\s*([-+]?[0-9]+)$", comment)
|
|
740
740
|
match_float = re.search(r"^(\S+)\s*=\s*([-+]?[0-9]+\.[0-9]+)$", comment)
|
|
741
|
+
match_str_bool = re.search(r"^(\S+)\s*=\s*(true|false)$", comment)
|
|
741
742
|
match_str = re.search(r"^(\S+)\s*=\s*(\S+)$", comment)
|
|
742
743
|
match_empty = re.search(r"^(\S+)\s*=\s*$", comment)
|
|
743
744
|
if match_int:
|
|
@@ -746,6 +747,9 @@ def _process_configuration(comments):
|
|
|
746
747
|
elif match_float:
|
|
747
748
|
key, value = match_float.group(1), match_float.group(2)
|
|
748
749
|
results[key] = float(value)
|
|
750
|
+
elif match_str_bool:
|
|
751
|
+
key, value = match_str_bool.group(1), match_str_bool.group(2)
|
|
752
|
+
results[key] = int(value == "true")
|
|
749
753
|
elif match_str:
|
|
750
754
|
key, value = match_str.group(1), match_str.group(2)
|
|
751
755
|
results[key] = value
|
arviz/data/io_numpyro.py
CHANGED
|
@@ -194,7 +194,7 @@ class NumPyroConverter:
|
|
|
194
194
|
)
|
|
195
195
|
for obs_name, log_like in log_likelihood_dict.items():
|
|
196
196
|
shape = (self.nchains, self.ndraws) + log_like.shape[1:]
|
|
197
|
-
data[obs_name] = np.reshape(
|
|
197
|
+
data[obs_name] = np.reshape(np.asarray(log_like), shape)
|
|
198
198
|
return dict_to_dataset(
|
|
199
199
|
data,
|
|
200
200
|
library=self.numpyro,
|
|
@@ -13,7 +13,6 @@ def plot_ecdf(
|
|
|
13
13
|
x_bands,
|
|
14
14
|
lower,
|
|
15
15
|
higher,
|
|
16
|
-
confidence_bands,
|
|
17
16
|
plot_kwargs,
|
|
18
17
|
fill_kwargs,
|
|
19
18
|
plot_outline_kwargs,
|
|
@@ -58,7 +57,7 @@ def plot_ecdf(
|
|
|
58
57
|
plot_outline_kwargs.setdefault("color", to_hex("C0"))
|
|
59
58
|
plot_outline_kwargs.setdefault("alpha", 0.2)
|
|
60
59
|
|
|
61
|
-
if
|
|
60
|
+
if x_bands is not None:
|
|
62
61
|
ax.step(x_coord, y_coord, **plot_kwargs)
|
|
63
62
|
|
|
64
63
|
if fill_band:
|
|
@@ -21,6 +21,7 @@ def plot_khat(
|
|
|
21
21
|
figsize,
|
|
22
22
|
xdata,
|
|
23
23
|
khats,
|
|
24
|
+
good_k,
|
|
24
25
|
kwargs,
|
|
25
26
|
threshold,
|
|
26
27
|
coord_labels,
|
|
@@ -53,7 +54,11 @@ def plot_khat(
|
|
|
53
54
|
|
|
54
55
|
if hlines_kwargs is None:
|
|
55
56
|
hlines_kwargs = {}
|
|
56
|
-
|
|
57
|
+
|
|
58
|
+
if good_k is None:
|
|
59
|
+
good_k = 0.7
|
|
60
|
+
|
|
61
|
+
hlines_kwargs.setdefault("hlines", [0, good_k, 1])
|
|
57
62
|
|
|
58
63
|
cmap = None
|
|
59
64
|
if isinstance(color, str):
|
|
@@ -75,7 +80,7 @@ def plot_khat(
|
|
|
75
80
|
rgba_c = cmap(color)
|
|
76
81
|
|
|
77
82
|
khats = khats if isinstance(khats, np.ndarray) else khats.values.flatten()
|
|
78
|
-
alphas = 0.5 + 0.2 * (khats >
|
|
83
|
+
alphas = 0.5 + 0.2 * (khats > good_k) + 0.3 * (khats > 1)
|
|
79
84
|
|
|
80
85
|
rgba_c = vectorized_to_hex(rgba_c)
|
|
81
86
|
|
|
@@ -130,7 +135,7 @@ def plot_khat(
|
|
|
130
135
|
xmax = len(khats)
|
|
131
136
|
|
|
132
137
|
if show_bins:
|
|
133
|
-
bin_edges = np.array([ymin,
|
|
138
|
+
bin_edges = np.array([ymin, good_k, 1, ymax])
|
|
134
139
|
bin_edges = bin_edges[(bin_edges >= ymin) & (bin_edges <= ymax)]
|
|
135
140
|
hist, _, _ = histogram(khats, bin_edges)
|
|
136
141
|
for idx, count in enumerate(hist):
|
|
@@ -174,12 +174,8 @@ def plot_pair(
|
|
|
174
174
|
source = ColumnDataSource(data=source_dict)
|
|
175
175
|
|
|
176
176
|
if divergences:
|
|
177
|
-
source_nondiv = CDSView(
|
|
178
|
-
|
|
179
|
-
)
|
|
180
|
-
source_div = CDSView(
|
|
181
|
-
source=source, filters=[GroupFilter(column_name=divergenve_name, group="1")]
|
|
182
|
-
)
|
|
177
|
+
source_nondiv = CDSView(filter=GroupFilter(column_name=divergenve_name, group="0"))
|
|
178
|
+
source_div = CDSView(filter=GroupFilter(column_name=divergenve_name, group="1"))
|
|
183
179
|
|
|
184
180
|
def get_width_and_height(jointplot, rotate):
|
|
185
181
|
"""Compute subplots dimensions for two or more variables."""
|
|
@@ -13,7 +13,6 @@ def plot_ecdf(
|
|
|
13
13
|
x_bands,
|
|
14
14
|
lower,
|
|
15
15
|
higher,
|
|
16
|
-
confidence_bands,
|
|
17
16
|
plot_kwargs,
|
|
18
17
|
fill_kwargs,
|
|
19
18
|
plot_outline_kwargs,
|
|
@@ -59,7 +58,7 @@ def plot_ecdf(
|
|
|
59
58
|
|
|
60
59
|
ax.step(x_coord, y_coord, **plot_kwargs)
|
|
61
60
|
|
|
62
|
-
if
|
|
61
|
+
if x_bands is not None:
|
|
63
62
|
if fill_band:
|
|
64
63
|
ax.fill_between(x_bands, lower, higher, **fill_kwargs)
|
|
65
64
|
else:
|
|
@@ -20,6 +20,7 @@ def plot_khat(
|
|
|
20
20
|
figsize,
|
|
21
21
|
xdata,
|
|
22
22
|
khats,
|
|
23
|
+
good_k,
|
|
23
24
|
kwargs,
|
|
24
25
|
threshold,
|
|
25
26
|
coord_labels,
|
|
@@ -61,8 +62,11 @@ def plot_khat(
|
|
|
61
62
|
backend_kwargs.setdefault("figsize", figsize)
|
|
62
63
|
backend_kwargs["squeeze"] = True
|
|
63
64
|
|
|
65
|
+
if good_k is None:
|
|
66
|
+
good_k = 0.7
|
|
67
|
+
|
|
64
68
|
hlines_kwargs = matplotlib_kwarg_dealiaser(hlines_kwargs, "hlines")
|
|
65
|
-
hlines_kwargs.setdefault("hlines", [0,
|
|
69
|
+
hlines_kwargs.setdefault("hlines", [0, good_k, 1])
|
|
66
70
|
hlines_kwargs.setdefault("linestyle", [":", "-.", "--", "-"])
|
|
67
71
|
hlines_kwargs.setdefault("alpha", 0.7)
|
|
68
72
|
hlines_kwargs.setdefault("zorder", -1)
|
|
@@ -102,7 +106,7 @@ def plot_khat(
|
|
|
102
106
|
rgba_c = cmap(norm_fun(color))
|
|
103
107
|
|
|
104
108
|
khats = khats if isinstance(khats, np.ndarray) else khats.values.flatten()
|
|
105
|
-
alphas = 0.5 + 0.2 * (khats >
|
|
109
|
+
alphas = 0.5 + 0.2 * (khats > good_k) + 0.3 * (khats > 1)
|
|
106
110
|
rgba_c[:, 3] = alphas
|
|
107
111
|
rgba_c = vectorized_to_hex(rgba_c)
|
|
108
112
|
kwargs["c"] = rgba_c
|
|
@@ -151,7 +155,7 @@ def plot_khat(
|
|
|
151
155
|
)
|
|
152
156
|
|
|
153
157
|
if show_bins:
|
|
154
|
-
bin_edges = np.array([ymin,
|
|
158
|
+
bin_edges = np.array([ymin, good_k, 1, ymax])
|
|
155
159
|
bin_edges = bin_edges[(bin_edges >= ymin) & (bin_edges <= ymax)]
|
|
156
160
|
hist, _, _ = histogram(khats, bin_edges)
|
|
157
161
|
for idx, count in enumerate(hist):
|
|
@@ -440,7 +440,7 @@ def plot_trace(
|
|
|
440
440
|
[], [], label="combined", **dealiase_sel_kwargs(plot_kwargs, chain_prop, -1)
|
|
441
441
|
),
|
|
442
442
|
)
|
|
443
|
-
ax.figure.axes[
|
|
443
|
+
ax.figure.axes[1].legend(handles=handles, title="chain", loc="upper right")
|
|
444
444
|
|
|
445
445
|
if axes is None:
|
|
446
446
|
axes = np.array(ax.figure.axes).reshape(-1, 2)
|
arviz/plots/bpvplot.py
CHANGED
|
@@ -80,7 +80,7 @@ def plot_bpv(
|
|
|
80
80
|
hdi_prob : float, optional
|
|
81
81
|
Probability for the highest density interval for the analytical reference distribution when
|
|
82
82
|
``kind=u_values``. Should be in the interval (0, 1]. Defaults to the
|
|
83
|
-
rcParam ``stats.
|
|
83
|
+
rcParam ``stats.ci_prob``. See :ref:`this section <common_hdi_prob>` for usage examples.
|
|
84
84
|
color : str, optional
|
|
85
85
|
Matplotlib color
|
|
86
86
|
grid : tuple, optional
|
|
@@ -202,7 +202,7 @@ def plot_bpv(
|
|
|
202
202
|
raise TypeError("`reference` argument must be either `analytical`, `samples`, or `None`")
|
|
203
203
|
|
|
204
204
|
if hdi_prob is None:
|
|
205
|
-
hdi_prob = rcParams["stats.
|
|
205
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
206
206
|
elif not 1 >= hdi_prob > 0:
|
|
207
207
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|
|
208
208
|
|
arviz/plots/densityplot.py
CHANGED
arviz/plots/dotplot.py
CHANGED
|
@@ -67,7 +67,7 @@ def plot_dot(
|
|
|
67
67
|
The shape of the marker. Valid for matplotlib backend.
|
|
68
68
|
hdi_prob : float, optional
|
|
69
69
|
Valid only when point_interval is True. Plots HDI for chosen percentage of density.
|
|
70
|
-
Defaults to ``stats.
|
|
70
|
+
Defaults to ``stats.ci_prob`` rcParam. See :ref:`this section <common_hdi_prob>`
|
|
71
71
|
for usage examples.
|
|
72
72
|
rotated : bool, default False
|
|
73
73
|
Whether to rotate the dot plot by 90 degrees.
|
|
@@ -151,7 +151,7 @@ def plot_dot(
|
|
|
151
151
|
values.sort()
|
|
152
152
|
|
|
153
153
|
if hdi_prob is None:
|
|
154
|
-
hdi_prob = rcParams["stats.
|
|
154
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
155
155
|
elif not 1 >= hdi_prob > 0:
|
|
156
156
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|
|
157
157
|
|
arviz/plots/ecdfplot.py
CHANGED
|
@@ -1,24 +1,32 @@
|
|
|
1
1
|
"""Plot ecdf or ecdf-difference plot with confidence bands."""
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
3
5
|
import numpy as np
|
|
4
6
|
from scipy.stats import uniform
|
|
5
7
|
|
|
8
|
+
try:
|
|
9
|
+
from scipy.stats import ecdf as scipy_ecdf
|
|
10
|
+
except ImportError:
|
|
11
|
+
scipy_ecdf = None
|
|
12
|
+
|
|
6
13
|
from ..rcparams import rcParams
|
|
7
|
-
from ..stats.ecdf_utils import
|
|
14
|
+
from ..stats.ecdf_utils import ecdf_confidence_band, _get_ecdf_points
|
|
15
|
+
from ..utils import BehaviourChangeWarning
|
|
8
16
|
from .plot_utils import get_plotting_function
|
|
9
17
|
|
|
10
18
|
|
|
11
19
|
def plot_ecdf(
|
|
12
20
|
values,
|
|
13
21
|
values2=None,
|
|
22
|
+
eval_points=None,
|
|
14
23
|
cdf=None,
|
|
15
24
|
difference=False,
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
pointwise=False,
|
|
19
|
-
npoints=100,
|
|
25
|
+
confidence_bands=False,
|
|
26
|
+
ci_prob=None,
|
|
20
27
|
num_trials=500,
|
|
21
|
-
|
|
28
|
+
rvs=None,
|
|
29
|
+
random_state=None,
|
|
22
30
|
figsize=None,
|
|
23
31
|
fill_band=True,
|
|
24
32
|
plot_kwargs=None,
|
|
@@ -28,15 +36,19 @@ def plot_ecdf(
|
|
|
28
36
|
show=None,
|
|
29
37
|
backend=None,
|
|
30
38
|
backend_kwargs=None,
|
|
39
|
+
npoints=100,
|
|
40
|
+
pointwise=False,
|
|
41
|
+
fpr=None,
|
|
42
|
+
pit=False,
|
|
31
43
|
**kwargs,
|
|
32
44
|
):
|
|
33
45
|
r"""Plot ECDF or ECDF-Difference Plot with Confidence bands.
|
|
34
46
|
|
|
35
|
-
Plots of the empirical
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
47
|
+
Plots of the empirical cumulative distribution function (ECDF) of an array. Optionally, A `cdf`
|
|
48
|
+
argument representing a reference CDF may be provided for comparison using a difference ECDF
|
|
49
|
+
plot and/or confidence bands.
|
|
50
|
+
|
|
51
|
+
Alternatively, the PIT for a single dataset may be visualized.
|
|
40
52
|
|
|
41
53
|
Notes
|
|
42
54
|
-----
|
|
@@ -47,26 +59,39 @@ def plot_ecdf(
|
|
|
47
59
|
values : array-like
|
|
48
60
|
Values to plot from an unknown continuous or discrete distribution.
|
|
49
61
|
values2 : array-like, optional
|
|
50
|
-
|
|
62
|
+
values to compare to the original sample.
|
|
63
|
+
|
|
64
|
+
.. deprecated:: 0.18.0
|
|
65
|
+
Instead use ``cdf=scipy.stats.ecdf(values2).cdf.evaluate``.
|
|
51
66
|
cdf : callable, optional
|
|
52
67
|
Cumulative distribution function of the distribution to compare the original sample.
|
|
53
68
|
The function must take as input a numpy array of draws from the distribution.
|
|
54
69
|
difference : bool, default False
|
|
55
70
|
If True then plot ECDF-difference plot otherwise ECDF plot.
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
confidence
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
71
|
+
confidence_bands : str or bool
|
|
72
|
+
|
|
73
|
+
- False: No confidence bands are plotted (default).
|
|
74
|
+
- True: Plot bands computed with the default algorithm (subject to change)
|
|
75
|
+
- "pointwise": Compute the pointwise (i.e. marginal) confidence band.
|
|
76
|
+
- "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence
|
|
77
|
+
band.
|
|
78
|
+
|
|
79
|
+
For simultaneous confidence bands to be correctly calibrated, provide `eval_points` that
|
|
80
|
+
are not dependent on the `values`.
|
|
81
|
+
ci_prob : float, default 0.94
|
|
82
|
+
The probability that the true ECDF lies within the confidence band. If `confidence_bands`
|
|
83
|
+
is "pointwise", this is the marginal probability instead of the joint probability.
|
|
84
|
+
eval_points : array-like, optional
|
|
85
|
+
The points at which to evaluate the ECDF. If None, `npoints` uniformly spaced points
|
|
86
|
+
between the data bounds will be used.
|
|
87
|
+
rvs: callable, optional
|
|
88
|
+
A function that takes an integer `ndraws` and optionally the object passed to
|
|
89
|
+
`random_state` and returns an array of `ndraws` samples from the same distribution
|
|
90
|
+
as the original dataset. Required if `method` is "simulated" and variable is discrete.
|
|
91
|
+
random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
|
|
66
92
|
num_trials : int, default 500
|
|
67
|
-
The number of random ECDFs to generate for constructing simultaneous confidence bands
|
|
68
|
-
|
|
69
|
-
The type I error rate s.t `1 - fpr` denotes the confidence level of bands.
|
|
93
|
+
The number of random ECDFs to generate for constructing simultaneous confidence bands
|
|
94
|
+
(if `confidence_bands` is "simulated").
|
|
70
95
|
figsize : (float,float), optional
|
|
71
96
|
Figure size. If `None` it will be defined automatically.
|
|
72
97
|
fill_band : bool, default True
|
|
@@ -91,6 +116,26 @@ def plot_ecdf(
|
|
|
91
116
|
These are kwargs specific to the backend being used, passed to
|
|
92
117
|
:func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`.
|
|
93
118
|
For additional documentation check the plotting method of the backend.
|
|
119
|
+
npoints : int, default 100
|
|
120
|
+
The number of evaluation points for the ecdf or ecdf-difference plots, if `eval_points` is
|
|
121
|
+
not provided or `pit` is `True`.
|
|
122
|
+
|
|
123
|
+
.. deprecated:: 0.18.0
|
|
124
|
+
Instead specify ``eval_points=np.linspace(np.min(values), np.max(values), npoints)``
|
|
125
|
+
unless `pit` is `True`.
|
|
126
|
+
pointwise : bool, default False
|
|
127
|
+
|
|
128
|
+
.. deprecated:: 0.18.0
|
|
129
|
+
Instead use `confidence_bands="pointwise"`.
|
|
130
|
+
fpr : float, optional
|
|
131
|
+
|
|
132
|
+
.. deprecated:: 0.18.0
|
|
133
|
+
Instead use `ci_prob=1-fpr`.
|
|
134
|
+
pit : bool, default False
|
|
135
|
+
If True plots the ECDF or ECDF-diff of PIT of sample.
|
|
136
|
+
|
|
137
|
+
.. deprecated:: 0.18.0
|
|
138
|
+
See below example instead.
|
|
94
139
|
|
|
95
140
|
Returns
|
|
96
141
|
-------
|
|
@@ -98,135 +143,206 @@ def plot_ecdf(
|
|
|
98
143
|
|
|
99
144
|
References
|
|
100
145
|
----------
|
|
101
|
-
.. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A
|
|
146
|
+
.. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A. (2022). Graphical Test for
|
|
102
147
|
Discrete Uniformity and its Applications in Goodness of Fit Evaluation and
|
|
103
|
-
Multiple Sample Comparison.
|
|
148
|
+
Multiple Sample Comparison. Statistics and Computing, 32(32).
|
|
104
149
|
|
|
105
150
|
Examples
|
|
106
151
|
--------
|
|
107
|
-
|
|
152
|
+
In a future release, the default behaviour of ``plot_ecdf`` will change.
|
|
153
|
+
To maintain the original behaviour you should do:
|
|
108
154
|
|
|
109
155
|
.. plot::
|
|
110
156
|
:context: close-figs
|
|
111
157
|
|
|
112
158
|
>>> import arviz as az
|
|
113
|
-
>>>
|
|
114
|
-
|
|
159
|
+
>>> import numpy as np
|
|
160
|
+
>>> from scipy.stats import uniform, norm
|
|
161
|
+
>>>
|
|
115
162
|
>>> sample = norm(0,1).rvs(1000)
|
|
116
|
-
>>>
|
|
163
|
+
>>> npoints = 100
|
|
164
|
+
>>> az.plot_ecdf(sample, eval_points=np.linspace(sample.min(), sample.max(), npoints))
|
|
117
165
|
|
|
118
|
-
|
|
166
|
+
However, seeing this warning isn't an indicator of anything being wrong,
|
|
167
|
+
if you are happy to get different behaviour as ArviZ improves and adds
|
|
168
|
+
new algorithms you can ignore it like so:
|
|
119
169
|
|
|
120
170
|
.. plot::
|
|
121
171
|
:context: close-figs
|
|
122
172
|
|
|
123
|
-
>>>
|
|
124
|
-
>>>
|
|
173
|
+
>>> import warnings
|
|
174
|
+
>>> warnings.filterwarnings("ignore", category=az.utils.BehaviourChangeWarning)
|
|
125
175
|
|
|
126
|
-
Plot
|
|
127
|
-
|
|
176
|
+
Plot an ECDF plot for a given sample evaluated at the sample points. This will become
|
|
177
|
+
the new behaviour when `eval_points` is not provided:
|
|
128
178
|
|
|
129
179
|
.. plot::
|
|
130
180
|
:context: close-figs
|
|
131
181
|
|
|
132
|
-
>>> az.plot_ecdf(sample,
|
|
133
|
-
>>> confidence_bands = True, difference = True)
|
|
182
|
+
>>> az.plot_ecdf(sample, eval_points=np.unique(sample))
|
|
134
183
|
|
|
135
|
-
Plot
|
|
136
|
-
|
|
184
|
+
Plot an ECDF plot with confidence bands for comparing a given sample to a given distribution.
|
|
185
|
+
We manually specify evaluation points independent of the values so that the confidence bands
|
|
186
|
+
are correctly calibrated.
|
|
137
187
|
|
|
138
188
|
.. plot::
|
|
139
189
|
:context: close-figs
|
|
140
190
|
|
|
141
|
-
>>>
|
|
142
|
-
>>>
|
|
191
|
+
>>> distribution = norm(0,1)
|
|
192
|
+
>>> eval_points = np.linspace(*distribution.ppf([0.001, 0.999]), 100)
|
|
193
|
+
>>> az.plot_ecdf(
|
|
194
|
+
>>> sample, eval_points=eval_points,
|
|
195
|
+
>>> cdf=distribution.cdf, confidence_bands=True
|
|
196
|
+
>>> )
|
|
143
197
|
|
|
144
|
-
Plot
|
|
145
|
-
|
|
198
|
+
Plot an ECDF-difference plot with confidence bands for comparing a given sample
|
|
199
|
+
to a given distribution.
|
|
146
200
|
|
|
147
201
|
.. plot::
|
|
148
202
|
:context: close-figs
|
|
149
203
|
|
|
150
|
-
>>> az.plot_ecdf(
|
|
151
|
-
>>>
|
|
204
|
+
>>> az.plot_ecdf(
|
|
205
|
+
>>> sample, cdf=distribution.cdf,
|
|
206
|
+
>>> confidence_bands=True, difference=True
|
|
207
|
+
>>> )
|
|
152
208
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
209
|
+
Plot an ECDF plot with confidence bands for the probability integral transform (PIT) of a
|
|
210
|
+
continuous sample. If drawn from the reference distribution, the PIT values should be uniformly
|
|
211
|
+
distributed.
|
|
156
212
|
|
|
157
213
|
.. plot::
|
|
158
214
|
:context: close-figs
|
|
159
215
|
|
|
160
|
-
>>>
|
|
161
|
-
>>>
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
if values2 is None and cdf is None and confidence_bands is True:
|
|
168
|
-
raise ValueError("For confidence bands you need to specify values2 or the cdf")
|
|
216
|
+
>>> pit_vals = distribution.cdf(sample)
|
|
217
|
+
>>> uniform_dist = uniform(0, 1)
|
|
218
|
+
>>> az.plot_ecdf(
|
|
219
|
+
>>> pit_vals, cdf=uniform_dist.cdf,
|
|
220
|
+
>>> rvs=uniform_dist.rvs, confidence_bands=True
|
|
221
|
+
>>> )
|
|
169
222
|
|
|
170
|
-
|
|
171
|
-
raise ValueError("To compare sample you need either cdf or values2 and not both")
|
|
223
|
+
Plot an ECDF-difference plot of PIT values.
|
|
172
224
|
|
|
173
|
-
|
|
174
|
-
|
|
225
|
+
.. plot::
|
|
226
|
+
:context: close-figs
|
|
175
227
|
|
|
176
|
-
|
|
177
|
-
|
|
228
|
+
>>> az.plot_ecdf(
|
|
229
|
+
>>> pit_vals, cdf = uniform_dist.cdf, rvs = uniform_dist.rvs,
|
|
230
|
+
>>> confidence_bands = True, difference = True
|
|
231
|
+
>>> )
|
|
232
|
+
"""
|
|
233
|
+
if confidence_bands is True:
|
|
234
|
+
if pointwise:
|
|
235
|
+
warnings.warn(
|
|
236
|
+
"`pointwise` has been deprecated. Use `confidence_bands='pointwise'` instead.",
|
|
237
|
+
FutureWarning,
|
|
238
|
+
)
|
|
239
|
+
confidence_bands = "pointwise"
|
|
240
|
+
else:
|
|
241
|
+
confidence_bands = "simulated"
|
|
242
|
+
elif confidence_bands == "simulated" and pointwise:
|
|
243
|
+
raise ValueError("Cannot specify both `confidence_bands='simulated'` and `pointwise=True`")
|
|
244
|
+
|
|
245
|
+
if fpr is not None:
|
|
246
|
+
warnings.warn(
|
|
247
|
+
"`fpr` has been deprecated. Use `ci_prob=1-fpr` or set `rcParam['stats.ci_prob']` to"
|
|
248
|
+
"`1-fpr`.",
|
|
249
|
+
FutureWarning,
|
|
250
|
+
)
|
|
251
|
+
if ci_prob is not None:
|
|
252
|
+
raise ValueError("Cannot specify both `fpr` and `ci_prob`")
|
|
253
|
+
ci_prob = 1 - fpr
|
|
254
|
+
|
|
255
|
+
if ci_prob is None:
|
|
256
|
+
ci_prob = rcParams["stats.ci_prob"]
|
|
178
257
|
|
|
179
258
|
if values2 is not None:
|
|
180
|
-
|
|
181
|
-
|
|
259
|
+
if cdf is not None:
|
|
260
|
+
raise ValueError("You cannot specify both `values2` and `cdf`")
|
|
261
|
+
if scipy_ecdf is None:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
"The `values2` argument is deprecated and `scipy.stats.ecdf` is not available. "
|
|
264
|
+
"Please use `cdf` instead."
|
|
265
|
+
)
|
|
266
|
+
warnings.warn(
|
|
267
|
+
"`values2` has been deprecated. Use `cdf=scipy.stats.ecdf(values2).cdf.evaluate` "
|
|
268
|
+
"instead.",
|
|
269
|
+
FutureWarning,
|
|
270
|
+
)
|
|
271
|
+
cdf = scipy_ecdf(np.ravel(values2)).cdf.evaluate
|
|
272
|
+
|
|
273
|
+
if cdf is None:
|
|
274
|
+
if confidence_bands:
|
|
275
|
+
raise ValueError("For confidence bands you must specify cdf")
|
|
276
|
+
if difference is True:
|
|
277
|
+
raise ValueError("For ECDF difference plot you must specify cdf")
|
|
278
|
+
if pit:
|
|
279
|
+
raise ValueError("For PIT plot you must specify cdf")
|
|
182
280
|
|
|
183
281
|
values = np.ravel(values)
|
|
184
282
|
values.sort()
|
|
185
283
|
|
|
186
284
|
if pit:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
285
|
+
warnings.warn(
|
|
286
|
+
"`pit` has been deprecated. Specify `values=cdf(values)` instead.",
|
|
287
|
+
FutureWarning,
|
|
288
|
+
)
|
|
289
|
+
values = cdf(values)
|
|
290
|
+
cdf = uniform(0, 1).cdf
|
|
193
291
|
rvs = uniform(0, 1).rvs
|
|
194
|
-
|
|
292
|
+
eval_points = np.linspace(1 / npoints, 1, npoints)
|
|
293
|
+
|
|
294
|
+
if eval_points is None:
|
|
295
|
+
warnings.warn(
|
|
296
|
+
"In future versions, if `eval_points` is not provided, then the ECDF will be evaluated"
|
|
297
|
+
" at the unique values of the sample. To keep the current behavior, provide "
|
|
298
|
+
"`eval_points` explicitly.",
|
|
299
|
+
BehaviourChangeWarning,
|
|
300
|
+
)
|
|
301
|
+
if confidence_bands == "simulated":
|
|
302
|
+
warnings.warn(
|
|
303
|
+
"For simultaneous bands to be correctly calibrated, specify `eval_points` "
|
|
304
|
+
"independent of the `values`"
|
|
305
|
+
)
|
|
195
306
|
eval_points = np.linspace(values[0], values[-1], npoints)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
cdf_at_eval_points = np.zeros_like(eval_points)
|
|
204
|
-
rvs = None
|
|
307
|
+
else:
|
|
308
|
+
eval_points = np.asarray(eval_points)
|
|
309
|
+
|
|
310
|
+
if difference or confidence_bands:
|
|
311
|
+
cdf_at_eval_points = cdf(eval_points)
|
|
312
|
+
else:
|
|
313
|
+
cdf_at_eval_points = np.zeros_like(eval_points)
|
|
205
314
|
|
|
206
|
-
x_coord, y_coord = _get_ecdf_points(
|
|
315
|
+
x_coord, y_coord = _get_ecdf_points(values, eval_points, difference)
|
|
207
316
|
|
|
208
317
|
if difference:
|
|
209
318
|
y_coord -= cdf_at_eval_points
|
|
210
319
|
|
|
211
320
|
if confidence_bands:
|
|
212
321
|
ndraws = len(values)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
322
|
+
x_bands = eval_points
|
|
323
|
+
lower, higher = ecdf_confidence_band(
|
|
324
|
+
ndraws,
|
|
325
|
+
eval_points,
|
|
326
|
+
cdf_at_eval_points,
|
|
327
|
+
method=confidence_bands,
|
|
328
|
+
prob=ci_prob,
|
|
329
|
+
num_trials=num_trials,
|
|
330
|
+
rvs=rvs,
|
|
331
|
+
random_state=random_state,
|
|
332
|
+
)
|
|
216
333
|
|
|
217
334
|
if difference:
|
|
218
335
|
lower -= cdf_at_eval_points
|
|
219
336
|
higher -= cdf_at_eval_points
|
|
220
337
|
else:
|
|
221
|
-
lower, higher = None, None
|
|
338
|
+
x_bands, lower, higher = None, None, None
|
|
222
339
|
|
|
223
340
|
ecdf_plot_args = dict(
|
|
224
341
|
x_coord=x_coord,
|
|
225
342
|
y_coord=y_coord,
|
|
226
|
-
x_bands=
|
|
343
|
+
x_bands=x_bands,
|
|
227
344
|
lower=lower,
|
|
228
345
|
higher=higher,
|
|
229
|
-
confidence_bands=confidence_bands,
|
|
230
346
|
figsize=figsize,
|
|
231
347
|
fill_band=fill_band,
|
|
232
348
|
plot_kwargs=plot_kwargs,
|
arviz/plots/essplot.py
CHANGED
|
@@ -138,9 +138,9 @@ def plot_ess(
|
|
|
138
138
|
|
|
139
139
|
References
|
|
140
140
|
----------
|
|
141
|
-
.. [1] Vehtari et al. (
|
|
141
|
+
.. [1] Vehtari et al. (2021). Rank-normalization, folding, and
|
|
142
142
|
localization: An improved Rhat for assessing convergence of
|
|
143
|
-
MCMC
|
|
143
|
+
MCMC. Bayesian analysis, 16(2):667-718.
|
|
144
144
|
|
|
145
145
|
Examples
|
|
146
146
|
--------
|