arviz 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arviz/__init__.py +4 -2
- arviz/data/__init__.py +5 -2
- arviz/data/base.py +102 -11
- arviz/data/converters.py +5 -0
- arviz/data/datasets.py +1 -0
- arviz/data/example_data/data_remote.json +10 -3
- arviz/data/inference_data.py +20 -22
- arviz/data/io_cmdstan.py +5 -3
- arviz/data/io_datatree.py +1 -0
- arviz/data/io_dict.py +5 -3
- arviz/data/io_emcee.py +1 -0
- arviz/data/io_numpyro.py +2 -1
- arviz/data/io_pyjags.py +1 -0
- arviz/data/io_pyro.py +1 -0
- arviz/data/utils.py +1 -0
- arviz/plots/__init__.py +1 -0
- arviz/plots/autocorrplot.py +1 -0
- arviz/plots/backends/bokeh/autocorrplot.py +1 -0
- arviz/plots/backends/bokeh/bpvplot.py +1 -0
- arviz/plots/backends/bokeh/compareplot.py +1 -0
- arviz/plots/backends/bokeh/densityplot.py +1 -0
- arviz/plots/backends/bokeh/distplot.py +1 -0
- arviz/plots/backends/bokeh/dotplot.py +1 -0
- arviz/plots/backends/bokeh/ecdfplot.py +2 -2
- arviz/plots/backends/bokeh/elpdplot.py +1 -0
- arviz/plots/backends/bokeh/energyplot.py +1 -0
- arviz/plots/backends/bokeh/hdiplot.py +1 -0
- arviz/plots/backends/bokeh/kdeplot.py +3 -3
- arviz/plots/backends/bokeh/khatplot.py +9 -3
- arviz/plots/backends/bokeh/lmplot.py +1 -0
- arviz/plots/backends/bokeh/loopitplot.py +1 -0
- arviz/plots/backends/bokeh/mcseplot.py +1 -0
- arviz/plots/backends/bokeh/pairplot.py +3 -6
- arviz/plots/backends/bokeh/parallelplot.py +1 -0
- arviz/plots/backends/bokeh/posteriorplot.py +1 -0
- arviz/plots/backends/bokeh/ppcplot.py +1 -0
- arviz/plots/backends/bokeh/rankplot.py +1 -0
- arviz/plots/backends/bokeh/separationplot.py +1 -0
- arviz/plots/backends/bokeh/traceplot.py +1 -0
- arviz/plots/backends/bokeh/violinplot.py +1 -0
- arviz/plots/backends/matplotlib/autocorrplot.py +1 -0
- arviz/plots/backends/matplotlib/bpvplot.py +1 -0
- arviz/plots/backends/matplotlib/compareplot.py +1 -0
- arviz/plots/backends/matplotlib/densityplot.py +1 -0
- arviz/plots/backends/matplotlib/distcomparisonplot.py +2 -3
- arviz/plots/backends/matplotlib/distplot.py +1 -0
- arviz/plots/backends/matplotlib/dotplot.py +1 -0
- arviz/plots/backends/matplotlib/ecdfplot.py +2 -2
- arviz/plots/backends/matplotlib/elpdplot.py +1 -0
- arviz/plots/backends/matplotlib/energyplot.py +1 -0
- arviz/plots/backends/matplotlib/essplot.py +6 -5
- arviz/plots/backends/matplotlib/forestplot.py +1 -0
- arviz/plots/backends/matplotlib/hdiplot.py +1 -0
- arviz/plots/backends/matplotlib/kdeplot.py +5 -3
- arviz/plots/backends/matplotlib/khatplot.py +8 -3
- arviz/plots/backends/matplotlib/lmplot.py +1 -0
- arviz/plots/backends/matplotlib/loopitplot.py +1 -0
- arviz/plots/backends/matplotlib/mcseplot.py +11 -10
- arviz/plots/backends/matplotlib/pairplot.py +2 -1
- arviz/plots/backends/matplotlib/parallelplot.py +1 -0
- arviz/plots/backends/matplotlib/posteriorplot.py +1 -0
- arviz/plots/backends/matplotlib/ppcplot.py +1 -0
- arviz/plots/backends/matplotlib/rankplot.py +1 -0
- arviz/plots/backends/matplotlib/separationplot.py +1 -0
- arviz/plots/backends/matplotlib/traceplot.py +2 -1
- arviz/plots/backends/matplotlib/tsplot.py +1 -0
- arviz/plots/backends/matplotlib/violinplot.py +2 -1
- arviz/plots/bpvplot.py +3 -2
- arviz/plots/compareplot.py +1 -0
- arviz/plots/densityplot.py +2 -1
- arviz/plots/distcomparisonplot.py +1 -0
- arviz/plots/dotplot.py +3 -2
- arviz/plots/ecdfplot.py +206 -89
- arviz/plots/elpdplot.py +1 -0
- arviz/plots/energyplot.py +1 -0
- arviz/plots/essplot.py +3 -2
- arviz/plots/forestplot.py +2 -1
- arviz/plots/hdiplot.py +3 -2
- arviz/plots/khatplot.py +24 -6
- arviz/plots/lmplot.py +1 -0
- arviz/plots/loopitplot.py +3 -2
- arviz/plots/mcseplot.py +4 -1
- arviz/plots/pairplot.py +1 -0
- arviz/plots/parallelplot.py +1 -0
- arviz/plots/plot_utils.py +3 -4
- arviz/plots/posteriorplot.py +2 -1
- arviz/plots/ppcplot.py +1 -0
- arviz/plots/rankplot.py +3 -2
- arviz/plots/separationplot.py +1 -0
- arviz/plots/traceplot.py +1 -0
- arviz/plots/tsplot.py +1 -0
- arviz/plots/violinplot.py +2 -1
- arviz/preview.py +17 -0
- arviz/rcparams.py +28 -2
- arviz/sel_utils.py +1 -0
- arviz/static/css/style.css +2 -1
- arviz/stats/density_utils.py +2 -1
- arviz/stats/diagnostics.py +15 -11
- arviz/stats/ecdf_utils.py +12 -8
- arviz/stats/stats.py +31 -16
- arviz/stats/stats_refitting.py +1 -0
- arviz/stats/stats_utils.py +13 -7
- arviz/tests/base_tests/test_data.py +15 -2
- arviz/tests/base_tests/test_data_zarr.py +0 -1
- arviz/tests/base_tests/test_diagnostics.py +1 -0
- arviz/tests/base_tests/test_diagnostics_numba.py +2 -6
- arviz/tests/base_tests/test_helpers.py +2 -2
- arviz/tests/base_tests/test_labels.py +1 -0
- arviz/tests/base_tests/test_plot_utils.py +5 -13
- arviz/tests/base_tests/test_plots_matplotlib.py +98 -7
- arviz/tests/base_tests/test_rcparams.py +12 -0
- arviz/tests/base_tests/test_stats.py +5 -5
- arviz/tests/base_tests/test_stats_numba.py +2 -7
- arviz/tests/base_tests/test_stats_utils.py +1 -0
- arviz/tests/base_tests/test_utils.py +3 -2
- arviz/tests/base_tests/test_utils_numba.py +2 -5
- arviz/tests/external_tests/test_data_pystan.py +5 -5
- arviz/tests/helpers.py +18 -10
- arviz/utils.py +4 -0
- arviz/wrappers/__init__.py +1 -0
- {arviz-0.17.1.dist-info → arviz-0.19.0.dist-info}/METADATA +13 -9
- arviz-0.19.0.dist-info/RECORD +183 -0
- arviz-0.17.1.dist-info/RECORD +0 -182
- {arviz-0.17.1.dist-info → arviz-0.19.0.dist-info}/LICENSE +0 -0
- {arviz-0.17.1.dist-info → arviz-0.19.0.dist-info}/WHEEL +0 -0
- {arviz-0.17.1.dist-info → arviz-0.19.0.dist-info}/top_level.txt +0 -0
arviz/plots/ecdfplot.py
CHANGED
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
"""Plot ecdf or ecdf-difference plot with confidence bands."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
2
5
|
import numpy as np
|
|
3
6
|
from scipy.stats import uniform
|
|
4
7
|
|
|
8
|
+
try:
|
|
9
|
+
from scipy.stats import ecdf as scipy_ecdf
|
|
10
|
+
except ImportError:
|
|
11
|
+
scipy_ecdf = None
|
|
12
|
+
|
|
5
13
|
from ..rcparams import rcParams
|
|
6
|
-
from ..stats.ecdf_utils import
|
|
14
|
+
from ..stats.ecdf_utils import ecdf_confidence_band, _get_ecdf_points
|
|
15
|
+
from ..utils import BehaviourChangeWarning
|
|
7
16
|
from .plot_utils import get_plotting_function
|
|
8
17
|
|
|
9
18
|
|
|
10
19
|
def plot_ecdf(
|
|
11
20
|
values,
|
|
12
21
|
values2=None,
|
|
22
|
+
eval_points=None,
|
|
13
23
|
cdf=None,
|
|
14
24
|
difference=False,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
pointwise=False,
|
|
18
|
-
npoints=100,
|
|
25
|
+
confidence_bands=False,
|
|
26
|
+
ci_prob=None,
|
|
19
27
|
num_trials=500,
|
|
20
|
-
|
|
28
|
+
rvs=None,
|
|
29
|
+
random_state=None,
|
|
21
30
|
figsize=None,
|
|
22
31
|
fill_band=True,
|
|
23
32
|
plot_kwargs=None,
|
|
@@ -27,15 +36,19 @@ def plot_ecdf(
|
|
|
27
36
|
show=None,
|
|
28
37
|
backend=None,
|
|
29
38
|
backend_kwargs=None,
|
|
39
|
+
npoints=100,
|
|
40
|
+
pointwise=False,
|
|
41
|
+
fpr=None,
|
|
42
|
+
pit=False,
|
|
30
43
|
**kwargs,
|
|
31
44
|
):
|
|
32
45
|
r"""Plot ECDF or ECDF-Difference Plot with Confidence bands.
|
|
33
46
|
|
|
34
|
-
Plots of the empirical
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
47
|
+
Plots of the empirical cumulative distribution function (ECDF) of an array. Optionally, A `cdf`
|
|
48
|
+
argument representing a reference CDF may be provided for comparison using a difference ECDF
|
|
49
|
+
plot and/or confidence bands.
|
|
50
|
+
|
|
51
|
+
Alternatively, the PIT for a single dataset may be visualized.
|
|
39
52
|
|
|
40
53
|
Notes
|
|
41
54
|
-----
|
|
@@ -46,26 +59,39 @@ def plot_ecdf(
|
|
|
46
59
|
values : array-like
|
|
47
60
|
Values to plot from an unknown continuous or discrete distribution.
|
|
48
61
|
values2 : array-like, optional
|
|
49
|
-
|
|
62
|
+
values to compare to the original sample.
|
|
63
|
+
|
|
64
|
+
.. deprecated:: 0.18.0
|
|
65
|
+
Instead use ``cdf=scipy.stats.ecdf(values2).cdf.evaluate``.
|
|
50
66
|
cdf : callable, optional
|
|
51
67
|
Cumulative distribution function of the distribution to compare the original sample.
|
|
52
68
|
The function must take as input a numpy array of draws from the distribution.
|
|
53
69
|
difference : bool, default False
|
|
54
70
|
If True then plot ECDF-difference plot otherwise ECDF plot.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
confidence
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
confidence_bands : str or bool
|
|
72
|
+
|
|
73
|
+
- False: No confidence bands are plotted (default).
|
|
74
|
+
- True: Plot bands computed with the default algorithm (subject to change)
|
|
75
|
+
- "pointwise": Compute the pointwise (i.e. marginal) confidence band.
|
|
76
|
+
- "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence
|
|
77
|
+
band.
|
|
78
|
+
|
|
79
|
+
For simultaneous confidence bands to be correctly calibrated, provide `eval_points` that
|
|
80
|
+
are not dependent on the `values`.
|
|
81
|
+
ci_prob : float, default 0.94
|
|
82
|
+
The probability that the true ECDF lies within the confidence band. If `confidence_bands`
|
|
83
|
+
is "pointwise", this is the marginal probability instead of the joint probability.
|
|
84
|
+
eval_points : array-like, optional
|
|
85
|
+
The points at which to evaluate the ECDF. If None, `npoints` uniformly spaced points
|
|
86
|
+
between the data bounds will be used.
|
|
87
|
+
rvs: callable, optional
|
|
88
|
+
A function that takes an integer `ndraws` and optionally the object passed to
|
|
89
|
+
`random_state` and returns an array of `ndraws` samples from the same distribution
|
|
90
|
+
as the original dataset. Required if `method` is "simulated" and variable is discrete.
|
|
91
|
+
random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
|
|
65
92
|
num_trials : int, default 500
|
|
66
|
-
The number of random ECDFs to generate for constructing simultaneous confidence bands
|
|
67
|
-
|
|
68
|
-
The type I error rate s.t `1 - fpr` denotes the confidence level of bands.
|
|
93
|
+
The number of random ECDFs to generate for constructing simultaneous confidence bands
|
|
94
|
+
(if `confidence_bands` is "simulated").
|
|
69
95
|
figsize : (float,float), optional
|
|
70
96
|
Figure size. If `None` it will be defined automatically.
|
|
71
97
|
fill_band : bool, default True
|
|
@@ -90,6 +116,26 @@ def plot_ecdf(
|
|
|
90
116
|
These are kwargs specific to the backend being used, passed to
|
|
91
117
|
:func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`.
|
|
92
118
|
For additional documentation check the plotting method of the backend.
|
|
119
|
+
npoints : int, default 100
|
|
120
|
+
The number of evaluation points for the ecdf or ecdf-difference plots, if `eval_points` is
|
|
121
|
+
not provided or `pit` is `True`.
|
|
122
|
+
|
|
123
|
+
.. deprecated:: 0.18.0
|
|
124
|
+
Instead specify ``eval_points=np.linspace(np.min(values), np.max(values), npoints)``
|
|
125
|
+
unless `pit` is `True`.
|
|
126
|
+
pointwise : bool, default False
|
|
127
|
+
|
|
128
|
+
.. deprecated:: 0.18.0
|
|
129
|
+
Instead use `confidence_bands="pointwise"`.
|
|
130
|
+
fpr : float, optional
|
|
131
|
+
|
|
132
|
+
.. deprecated:: 0.18.0
|
|
133
|
+
Instead use `ci_prob=1-fpr`.
|
|
134
|
+
pit : bool, default False
|
|
135
|
+
If True plots the ECDF or ECDF-diff of PIT of sample.
|
|
136
|
+
|
|
137
|
+
.. deprecated:: 0.18.0
|
|
138
|
+
See below example instead.
|
|
93
139
|
|
|
94
140
|
Returns
|
|
95
141
|
-------
|
|
@@ -97,135 +143,206 @@ def plot_ecdf(
|
|
|
97
143
|
|
|
98
144
|
References
|
|
99
145
|
----------
|
|
100
|
-
.. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A
|
|
146
|
+
.. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A. (2022). Graphical Test for
|
|
101
147
|
Discrete Uniformity and its Applications in Goodness of Fit Evaluation and
|
|
102
|
-
Multiple Sample Comparison.
|
|
148
|
+
Multiple Sample Comparison. Statistics and Computing, 32(32).
|
|
103
149
|
|
|
104
150
|
Examples
|
|
105
151
|
--------
|
|
106
|
-
|
|
152
|
+
In a future release, the default behaviour of ``plot_ecdf`` will change.
|
|
153
|
+
To maintain the original behaviour you should do:
|
|
107
154
|
|
|
108
155
|
.. plot::
|
|
109
156
|
:context: close-figs
|
|
110
157
|
|
|
111
158
|
>>> import arviz as az
|
|
112
|
-
>>>
|
|
113
|
-
|
|
159
|
+
>>> import numpy as np
|
|
160
|
+
>>> from scipy.stats import uniform, norm
|
|
161
|
+
>>>
|
|
114
162
|
>>> sample = norm(0,1).rvs(1000)
|
|
115
|
-
>>>
|
|
163
|
+
>>> npoints = 100
|
|
164
|
+
>>> az.plot_ecdf(sample, eval_points=np.linspace(sample.min(), sample.max(), npoints))
|
|
116
165
|
|
|
117
|
-
|
|
166
|
+
However, seeing this warning isn't an indicator of anything being wrong,
|
|
167
|
+
if you are happy to get different behaviour as ArviZ improves and adds
|
|
168
|
+
new algorithms you can ignore it like so:
|
|
118
169
|
|
|
119
170
|
.. plot::
|
|
120
171
|
:context: close-figs
|
|
121
172
|
|
|
122
|
-
>>>
|
|
123
|
-
>>>
|
|
173
|
+
>>> import warnings
|
|
174
|
+
>>> warnings.filterwarnings("ignore", category=az.utils.BehaviourChangeWarning)
|
|
124
175
|
|
|
125
|
-
Plot
|
|
126
|
-
|
|
176
|
+
Plot an ECDF plot for a given sample evaluated at the sample points. This will become
|
|
177
|
+
the new behaviour when `eval_points` is not provided:
|
|
127
178
|
|
|
128
179
|
.. plot::
|
|
129
180
|
:context: close-figs
|
|
130
181
|
|
|
131
|
-
>>> az.plot_ecdf(sample,
|
|
132
|
-
>>> confidence_bands = True, difference = True)
|
|
182
|
+
>>> az.plot_ecdf(sample, eval_points=np.unique(sample))
|
|
133
183
|
|
|
134
|
-
Plot
|
|
135
|
-
|
|
184
|
+
Plot an ECDF plot with confidence bands for comparing a given sample to a given distribution.
|
|
185
|
+
We manually specify evaluation points independent of the values so that the confidence bands
|
|
186
|
+
are correctly calibrated.
|
|
136
187
|
|
|
137
188
|
.. plot::
|
|
138
189
|
:context: close-figs
|
|
139
190
|
|
|
140
|
-
>>>
|
|
141
|
-
>>>
|
|
191
|
+
>>> distribution = norm(0,1)
|
|
192
|
+
>>> eval_points = np.linspace(*distribution.ppf([0.001, 0.999]), 100)
|
|
193
|
+
>>> az.plot_ecdf(
|
|
194
|
+
>>> sample, eval_points=eval_points,
|
|
195
|
+
>>> cdf=distribution.cdf, confidence_bands=True
|
|
196
|
+
>>> )
|
|
142
197
|
|
|
143
|
-
Plot
|
|
144
|
-
|
|
198
|
+
Plot an ECDF-difference plot with confidence bands for comparing a given sample
|
|
199
|
+
to a given distribution.
|
|
145
200
|
|
|
146
201
|
.. plot::
|
|
147
202
|
:context: close-figs
|
|
148
203
|
|
|
149
|
-
>>> az.plot_ecdf(
|
|
150
|
-
>>>
|
|
204
|
+
>>> az.plot_ecdf(
|
|
205
|
+
>>> sample, cdf=distribution.cdf,
|
|
206
|
+
>>> confidence_bands=True, difference=True
|
|
207
|
+
>>> )
|
|
151
208
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
209
|
+
Plot an ECDF plot with confidence bands for the probability integral transform (PIT) of a
|
|
210
|
+
continuous sample. If drawn from the reference distribution, the PIT values should be uniformly
|
|
211
|
+
distributed.
|
|
155
212
|
|
|
156
213
|
.. plot::
|
|
157
214
|
:context: close-figs
|
|
158
215
|
|
|
159
|
-
>>>
|
|
160
|
-
>>>
|
|
216
|
+
>>> pit_vals = distribution.cdf(sample)
|
|
217
|
+
>>> uniform_dist = uniform(0, 1)
|
|
218
|
+
>>> az.plot_ecdf(
|
|
219
|
+
>>> pit_vals, cdf=uniform_dist.cdf,
|
|
220
|
+
>>> rvs=uniform_dist.rvs, confidence_bands=True
|
|
221
|
+
>>> )
|
|
161
222
|
|
|
162
|
-
|
|
163
|
-
if confidence_bands is None:
|
|
164
|
-
confidence_bands = (values2 is not None) or (cdf is not None)
|
|
223
|
+
Plot an ECDF-difference plot of PIT values.
|
|
165
224
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if cdf is not None and values2 is not None:
|
|
170
|
-
raise ValueError("To compare sample you need either cdf or values2 and not both")
|
|
171
|
-
|
|
172
|
-
if values2 is None and cdf is None and pit is True:
|
|
173
|
-
raise ValueError("For PIT specify either cdf or values2")
|
|
225
|
+
.. plot::
|
|
226
|
+
:context: close-figs
|
|
174
227
|
|
|
175
|
-
|
|
176
|
-
|
|
228
|
+
>>> az.plot_ecdf(
|
|
229
|
+
>>> pit_vals, cdf = uniform_dist.cdf, rvs = uniform_dist.rvs,
|
|
230
|
+
>>> confidence_bands = True, difference = True
|
|
231
|
+
>>> )
|
|
232
|
+
"""
|
|
233
|
+
if confidence_bands is True:
|
|
234
|
+
if pointwise:
|
|
235
|
+
warnings.warn(
|
|
236
|
+
"`pointwise` has been deprecated. Use `confidence_bands='pointwise'` instead.",
|
|
237
|
+
FutureWarning,
|
|
238
|
+
)
|
|
239
|
+
confidence_bands = "pointwise"
|
|
240
|
+
else:
|
|
241
|
+
confidence_bands = "simulated"
|
|
242
|
+
elif confidence_bands == "simulated" and pointwise:
|
|
243
|
+
raise ValueError("Cannot specify both `confidence_bands='simulated'` and `pointwise=True`")
|
|
244
|
+
|
|
245
|
+
if fpr is not None:
|
|
246
|
+
warnings.warn(
|
|
247
|
+
"`fpr` has been deprecated. Use `ci_prob=1-fpr` or set `rcParam['stats.ci_prob']` to"
|
|
248
|
+
"`1-fpr`.",
|
|
249
|
+
FutureWarning,
|
|
250
|
+
)
|
|
251
|
+
if ci_prob is not None:
|
|
252
|
+
raise ValueError("Cannot specify both `fpr` and `ci_prob`")
|
|
253
|
+
ci_prob = 1 - fpr
|
|
254
|
+
|
|
255
|
+
if ci_prob is None:
|
|
256
|
+
ci_prob = rcParams["stats.ci_prob"]
|
|
177
257
|
|
|
178
258
|
if values2 is not None:
|
|
179
|
-
|
|
180
|
-
|
|
259
|
+
if cdf is not None:
|
|
260
|
+
raise ValueError("You cannot specify both `values2` and `cdf`")
|
|
261
|
+
if scipy_ecdf is None:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
"The `values2` argument is deprecated and `scipy.stats.ecdf` is not available. "
|
|
264
|
+
"Please use `cdf` instead."
|
|
265
|
+
)
|
|
266
|
+
warnings.warn(
|
|
267
|
+
"`values2` has been deprecated. Use `cdf=scipy.stats.ecdf(values2).cdf.evaluate` "
|
|
268
|
+
"instead.",
|
|
269
|
+
FutureWarning,
|
|
270
|
+
)
|
|
271
|
+
cdf = scipy_ecdf(np.ravel(values2)).cdf.evaluate
|
|
272
|
+
|
|
273
|
+
if cdf is None:
|
|
274
|
+
if confidence_bands:
|
|
275
|
+
raise ValueError("For confidence bands you must specify cdf")
|
|
276
|
+
if difference is True:
|
|
277
|
+
raise ValueError("For ECDF difference plot you must specify cdf")
|
|
278
|
+
if pit:
|
|
279
|
+
raise ValueError("For PIT plot you must specify cdf")
|
|
181
280
|
|
|
182
281
|
values = np.ravel(values)
|
|
183
282
|
values.sort()
|
|
184
283
|
|
|
185
284
|
if pit:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
285
|
+
warnings.warn(
|
|
286
|
+
"`pit` has been deprecated. Specify `values=cdf(values)` instead.",
|
|
287
|
+
FutureWarning,
|
|
288
|
+
)
|
|
289
|
+
values = cdf(values)
|
|
290
|
+
cdf = uniform(0, 1).cdf
|
|
192
291
|
rvs = uniform(0, 1).rvs
|
|
193
|
-
|
|
292
|
+
eval_points = np.linspace(1 / npoints, 1, npoints)
|
|
293
|
+
|
|
294
|
+
if eval_points is None:
|
|
295
|
+
warnings.warn(
|
|
296
|
+
"In future versions, if `eval_points` is not provided, then the ECDF will be evaluated"
|
|
297
|
+
" at the unique values of the sample. To keep the current behavior, provide "
|
|
298
|
+
"`eval_points` explicitly.",
|
|
299
|
+
BehaviourChangeWarning,
|
|
300
|
+
)
|
|
301
|
+
if confidence_bands == "simulated":
|
|
302
|
+
warnings.warn(
|
|
303
|
+
"For simultaneous bands to be correctly calibrated, specify `eval_points` "
|
|
304
|
+
"independent of the `values`"
|
|
305
|
+
)
|
|
194
306
|
eval_points = np.linspace(values[0], values[-1], npoints)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
cdf_at_eval_points = np.zeros_like(eval_points)
|
|
203
|
-
rvs = None
|
|
307
|
+
else:
|
|
308
|
+
eval_points = np.asarray(eval_points)
|
|
309
|
+
|
|
310
|
+
if difference or confidence_bands:
|
|
311
|
+
cdf_at_eval_points = cdf(eval_points)
|
|
312
|
+
else:
|
|
313
|
+
cdf_at_eval_points = np.zeros_like(eval_points)
|
|
204
314
|
|
|
205
|
-
x_coord, y_coord = _get_ecdf_points(
|
|
315
|
+
x_coord, y_coord = _get_ecdf_points(values, eval_points, difference)
|
|
206
316
|
|
|
207
317
|
if difference:
|
|
208
318
|
y_coord -= cdf_at_eval_points
|
|
209
319
|
|
|
210
320
|
if confidence_bands:
|
|
211
321
|
ndraws = len(values)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
322
|
+
x_bands = eval_points
|
|
323
|
+
lower, higher = ecdf_confidence_band(
|
|
324
|
+
ndraws,
|
|
325
|
+
eval_points,
|
|
326
|
+
cdf_at_eval_points,
|
|
327
|
+
method=confidence_bands,
|
|
328
|
+
prob=ci_prob,
|
|
329
|
+
num_trials=num_trials,
|
|
330
|
+
rvs=rvs,
|
|
331
|
+
random_state=random_state,
|
|
332
|
+
)
|
|
215
333
|
|
|
216
334
|
if difference:
|
|
217
335
|
lower -= cdf_at_eval_points
|
|
218
336
|
higher -= cdf_at_eval_points
|
|
219
337
|
else:
|
|
220
|
-
lower, higher = None, None
|
|
338
|
+
x_bands, lower, higher = None, None, None
|
|
221
339
|
|
|
222
340
|
ecdf_plot_args = dict(
|
|
223
341
|
x_coord=x_coord,
|
|
224
342
|
y_coord=y_coord,
|
|
225
|
-
x_bands=
|
|
343
|
+
x_bands=x_bands,
|
|
226
344
|
lower=lower,
|
|
227
345
|
higher=higher,
|
|
228
|
-
confidence_bands=confidence_bands,
|
|
229
346
|
figsize=figsize,
|
|
230
347
|
fill_band=fill_band,
|
|
231
348
|
plot_kwargs=plot_kwargs,
|
arviz/plots/elpdplot.py
CHANGED
arviz/plots/energyplot.py
CHANGED
arviz/plots/essplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Plot quantile or local effective sample sizes."""
|
|
2
|
+
|
|
2
3
|
import numpy as np
|
|
3
4
|
import xarray as xr
|
|
4
5
|
|
|
@@ -137,9 +138,9 @@ def plot_ess(
|
|
|
137
138
|
|
|
138
139
|
References
|
|
139
140
|
----------
|
|
140
|
-
.. [1] Vehtari et al. (
|
|
141
|
+
.. [1] Vehtari et al. (2021). Rank-normalization, folding, and
|
|
141
142
|
localization: An improved Rhat for assessing convergence of
|
|
142
|
-
MCMC
|
|
143
|
+
MCMC. Bayesian analysis, 16(2):667-718.
|
|
143
144
|
|
|
144
145
|
Examples
|
|
145
146
|
--------
|
arviz/plots/forestplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Forest plot."""
|
|
2
|
+
|
|
2
3
|
from ..data import convert_to_dataset
|
|
3
4
|
from ..labels import BaseLabeller, NoModelLabeller
|
|
4
5
|
from ..rcparams import rcParams
|
|
@@ -245,7 +246,7 @@ def plot_forest(
|
|
|
245
246
|
width_ratios.append(1)
|
|
246
247
|
|
|
247
248
|
if hdi_prob is None:
|
|
248
|
-
hdi_prob = rcParams["stats.
|
|
249
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
249
250
|
elif not 1 >= hdi_prob > 0:
|
|
250
251
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|
|
251
252
|
|
arviz/plots/hdiplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Plot highest density intervals for regression data."""
|
|
2
|
+
|
|
2
3
|
import warnings
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
@@ -41,7 +42,7 @@ def plot_hdi(
|
|
|
41
42
|
hdi_data : array_like, optional
|
|
42
43
|
Precomputed HDI values to use. Assumed shape is ``(*x.shape, 2)``.
|
|
43
44
|
hdi_prob : float, optional
|
|
44
|
-
Probability for the highest density interval. Defaults to ``stats.
|
|
45
|
+
Probability for the highest density interval. Defaults to ``stats.ci_prob`` rcParam.
|
|
45
46
|
See :ref:`this section <common_ hdi_prob>` for usage examples.
|
|
46
47
|
color : str, default "C1"
|
|
47
48
|
Color used for the limits of the HDI and fill. Should be a valid matplotlib color.
|
|
@@ -154,7 +155,7 @@ def plot_hdi(
|
|
|
154
155
|
else:
|
|
155
156
|
y = np.asarray(y)
|
|
156
157
|
if hdi_prob is None:
|
|
157
|
-
hdi_prob = rcParams["stats.
|
|
158
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
158
159
|
elif not 1 >= hdi_prob > 0:
|
|
159
160
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|
|
160
161
|
hdi_data = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False, **hdi_kwargs)
|
arviz/plots/khatplot.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Pareto tail indices plot."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
4
|
+
import warnings
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
from xarray import DataArray
|
|
@@ -39,10 +41,8 @@ def plot_khat(
|
|
|
39
41
|
|
|
40
42
|
Parameters
|
|
41
43
|
----------
|
|
42
|
-
khats : ELPDData
|
|
43
|
-
The input Pareto tail indices to be plotted.
|
|
44
|
-
Pareto shapes or an array. In this second case, all the values in the array are interpreted
|
|
45
|
-
as Pareto tail indices.
|
|
44
|
+
khats : ELPDData
|
|
45
|
+
The input Pareto tail indices to be plotted.
|
|
46
46
|
color : str or array_like, default "C0"
|
|
47
47
|
Colors of the scatter plot, if color is a str all dots will have the same color,
|
|
48
48
|
if it is the size of the observations, each dot will have the specified color,
|
|
@@ -148,8 +148,9 @@ def plot_khat(
|
|
|
148
148
|
|
|
149
149
|
References
|
|
150
150
|
----------
|
|
151
|
-
.. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J
|
|
152
|
-
|
|
151
|
+
.. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J. (2024).
|
|
152
|
+
Pareto Smoothed Importance Sampling. Journal of Machine Learning
|
|
153
|
+
Research, 25(72):1-58.
|
|
153
154
|
|
|
154
155
|
"""
|
|
155
156
|
if annotate:
|
|
@@ -163,13 +164,29 @@ def plot_khat(
|
|
|
163
164
|
color = "C0"
|
|
164
165
|
|
|
165
166
|
if isinstance(khats, np.ndarray):
|
|
167
|
+
warnings.warn(
|
|
168
|
+
"support for arrays will be deprecated, please use ELPDData."
|
|
169
|
+
"The reason for this, is that we need to know the numbers of draws"
|
|
170
|
+
"sampled from the posterior",
|
|
171
|
+
FutureWarning,
|
|
172
|
+
)
|
|
166
173
|
khats = khats.flatten()
|
|
167
174
|
xlabels = False
|
|
168
175
|
legend = False
|
|
169
176
|
dims = []
|
|
177
|
+
good_k = None
|
|
170
178
|
else:
|
|
171
179
|
if isinstance(khats, ELPDData):
|
|
180
|
+
good_k = khats.good_k
|
|
172
181
|
khats = khats.pareto_k
|
|
182
|
+
else:
|
|
183
|
+
good_k = None
|
|
184
|
+
warnings.warn(
|
|
185
|
+
"support for DataArrays will be deprecated, please use ELPDData."
|
|
186
|
+
"The reason for this, is that we need to know the numbers of draws"
|
|
187
|
+
"sampled from the posterior",
|
|
188
|
+
FutureWarning,
|
|
189
|
+
)
|
|
173
190
|
if not isinstance(khats, DataArray):
|
|
174
191
|
raise ValueError("Incorrect khat data input. Check the documentation")
|
|
175
192
|
|
|
@@ -190,6 +207,7 @@ def plot_khat(
|
|
|
190
207
|
figsize=figsize,
|
|
191
208
|
xdata=xdata,
|
|
192
209
|
khats=khats,
|
|
210
|
+
good_k=good_k,
|
|
193
211
|
kwargs=kwargs,
|
|
194
212
|
threshold=threshold,
|
|
195
213
|
coord_labels=coord_labels,
|
arviz/plots/lmplot.py
CHANGED
arviz/plots/loopitplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Plot LOO-PIT predictive checks of inference data."""
|
|
2
|
+
|
|
2
3
|
import numpy as np
|
|
3
4
|
from scipy import stats
|
|
4
5
|
|
|
@@ -54,7 +55,7 @@ def plot_loo_pit(
|
|
|
54
55
|
In this case, instead of overlaying uniform distributions, the beta ``hdi_prob``
|
|
55
56
|
around the theoretical uniform CDF is shown. This approximation only holds
|
|
56
57
|
for large S and ECDF values not very close to 0 nor 1. For more information, see
|
|
57
|
-
`Vehtari et al. (
|
|
58
|
+
`Vehtari et al. (2021)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
|
|
58
59
|
ecdf_fill : bool, optional
|
|
59
60
|
Use :meth:`matplotlib.axes.Axes.fill_between` to mark the area
|
|
60
61
|
inside the credible interval. Otherwise, plot the
|
|
@@ -158,7 +159,7 @@ def plot_loo_pit(
|
|
|
158
159
|
x_vals = None
|
|
159
160
|
|
|
160
161
|
if hdi_prob is None:
|
|
161
|
-
hdi_prob = rcParams["stats.
|
|
162
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
162
163
|
elif not 1 >= hdi_prob > 0:
|
|
163
164
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|
|
164
165
|
|
arviz/plots/mcseplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Plot quantile MC standard error."""
|
|
2
|
+
|
|
2
3
|
import numpy as np
|
|
3
4
|
import xarray as xr
|
|
4
5
|
|
|
@@ -108,7 +109,9 @@ def plot_mcse(
|
|
|
108
109
|
|
|
109
110
|
References
|
|
110
111
|
----------
|
|
111
|
-
|
|
112
|
+
.. [1] Vehtari et al. (2021). Rank-normalization, folding, and
|
|
113
|
+
localization: An improved Rhat for assessing convergence of
|
|
114
|
+
MCMC. Bayesian analysis, 16(2):667-718.
|
|
112
115
|
|
|
113
116
|
Examples
|
|
114
117
|
--------
|
arviz/plots/pairplot.py
CHANGED
arviz/plots/parallelplot.py
CHANGED
arviz/plots/plot_utils.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Utilities for plotting."""
|
|
2
|
+
|
|
2
3
|
import importlib
|
|
3
4
|
import warnings
|
|
4
5
|
from typing import Any, Dict
|
|
@@ -244,10 +245,8 @@ def format_coords_as_labels(dataarray, skip_dims=None):
|
|
|
244
245
|
coord_labels = coord_labels.values
|
|
245
246
|
if isinstance(coord_labels[0], tuple):
|
|
246
247
|
fmt = ", ".join(["{}" for _ in coord_labels[0]])
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
coord_labels[:] = [f"{s}" for s in coord_labels]
|
|
250
|
-
return coord_labels
|
|
248
|
+
return np.array([fmt.format(*x) for x in coord_labels])
|
|
249
|
+
return np.array([f"{s}" for s in coord_labels])
|
|
251
250
|
|
|
252
251
|
|
|
253
252
|
def set_xticklabels(ax, coord_labels):
|
arviz/plots/posteriorplot.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Plot posterior densities."""
|
|
2
|
+
|
|
2
3
|
from ..data import convert_to_dataset
|
|
3
4
|
from ..labels import BaseLabeller
|
|
4
5
|
from ..sel_utils import xarray_var_iter
|
|
@@ -236,7 +237,7 @@ def plot_posterior(
|
|
|
236
237
|
labeller = BaseLabeller()
|
|
237
238
|
|
|
238
239
|
if hdi_prob is None:
|
|
239
|
-
hdi_prob = rcParams["stats.
|
|
240
|
+
hdi_prob = rcParams["stats.ci_prob"]
|
|
240
241
|
elif hdi_prob not in (None, "hide"):
|
|
241
242
|
if not 1 >= hdi_prob > 0:
|
|
242
243
|
raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
|