arviz 0.18.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. arviz/__init__.py +2 -1
  2. arviz/data/base.py +18 -7
  3. arviz/data/converters.py +7 -3
  4. arviz/data/inference_data.py +8 -0
  5. arviz/data/io_cmdstan.py +4 -0
  6. arviz/data/io_numpyro.py +1 -1
  7. arviz/plots/backends/bokeh/ecdfplot.py +1 -2
  8. arviz/plots/backends/bokeh/khatplot.py +8 -3
  9. arviz/plots/backends/bokeh/pairplot.py +2 -6
  10. arviz/plots/backends/matplotlib/ecdfplot.py +1 -2
  11. arviz/plots/backends/matplotlib/khatplot.py +7 -3
  12. arviz/plots/backends/matplotlib/traceplot.py +1 -1
  13. arviz/plots/bpvplot.py +2 -2
  14. arviz/plots/compareplot.py +4 -4
  15. arviz/plots/densityplot.py +1 -1
  16. arviz/plots/dotplot.py +2 -2
  17. arviz/plots/ecdfplot.py +213 -89
  18. arviz/plots/essplot.py +2 -2
  19. arviz/plots/forestplot.py +3 -3
  20. arviz/plots/hdiplot.py +2 -2
  21. arviz/plots/kdeplot.py +9 -2
  22. arviz/plots/khatplot.py +23 -6
  23. arviz/plots/loopitplot.py +2 -2
  24. arviz/plots/mcseplot.py +3 -1
  25. arviz/plots/plot_utils.py +2 -4
  26. arviz/plots/posteriorplot.py +1 -1
  27. arviz/plots/rankplot.py +2 -2
  28. arviz/plots/violinplot.py +1 -1
  29. arviz/preview.py +17 -0
  30. arviz/rcparams.py +27 -2
  31. arviz/stats/diagnostics.py +13 -9
  32. arviz/stats/ecdf_utils.py +168 -10
  33. arviz/stats/stats.py +41 -20
  34. arviz/stats/stats_utils.py +8 -6
  35. arviz/tests/base_tests/test_data.py +11 -2
  36. arviz/tests/base_tests/test_data_zarr.py +0 -1
  37. arviz/tests/base_tests/test_diagnostics_numba.py +2 -7
  38. arviz/tests/base_tests/test_helpers.py +2 -2
  39. arviz/tests/base_tests/test_plot_utils.py +5 -13
  40. arviz/tests/base_tests/test_plots_matplotlib.py +95 -2
  41. arviz/tests/base_tests/test_rcparams.py +12 -0
  42. arviz/tests/base_tests/test_stats.py +1 -1
  43. arviz/tests/base_tests/test_stats_ecdf_utils.py +15 -2
  44. arviz/tests/base_tests/test_stats_numba.py +2 -7
  45. arviz/tests/base_tests/test_utils_numba.py +2 -5
  46. arviz/tests/external_tests/test_data_pystan.py +5 -5
  47. arviz/tests/helpers.py +17 -9
  48. arviz/utils.py +4 -0
  49. {arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/METADATA +23 -19
  50. {arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/RECORD +53 -52
  51. {arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/WHEEL +1 -1
  52. {arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/LICENSE +0 -0
  53. {arviz-0.18.0.dist-info → arviz-0.20.0.dist-info}/top_level.txt +0 -0
arviz/plots/ecdfplot.py CHANGED
@@ -1,24 +1,32 @@
1
1
  """Plot ecdf or ecdf-difference plot with confidence bands."""
2
2
 
3
+ import warnings
4
+
3
5
  import numpy as np
4
6
  from scipy.stats import uniform
5
7
 
8
+ try:
9
+ from scipy.stats import ecdf as scipy_ecdf
10
+ except ImportError:
11
+ scipy_ecdf = None
12
+
6
13
  from ..rcparams import rcParams
7
- from ..stats.ecdf_utils import compute_ecdf, ecdf_confidence_band, _get_ecdf_points
14
+ from ..stats.ecdf_utils import ecdf_confidence_band, _get_ecdf_points
15
+ from ..utils import BehaviourChangeWarning
8
16
  from .plot_utils import get_plotting_function
9
17
 
10
18
 
11
19
  def plot_ecdf(
12
20
  values,
13
21
  values2=None,
22
+ eval_points=None,
14
23
  cdf=None,
15
24
  difference=False,
16
- pit=False,
17
- confidence_bands=None,
18
- pointwise=False,
19
- npoints=100,
25
+ confidence_bands=False,
26
+ ci_prob=None,
20
27
  num_trials=500,
21
- fpr=0.05,
28
+ rvs=None,
29
+ random_state=None,
22
30
  figsize=None,
23
31
  fill_band=True,
24
32
  plot_kwargs=None,
@@ -28,15 +36,19 @@ def plot_ecdf(
28
36
  show=None,
29
37
  backend=None,
30
38
  backend_kwargs=None,
39
+ npoints=100,
40
+ pointwise=False,
41
+ fpr=None,
42
+ pit=False,
31
43
  **kwargs,
32
44
  ):
33
45
  r"""Plot ECDF or ECDF-Difference Plot with Confidence bands.
34
46
 
35
- Plots of the empirical CDF estimates of an array. When `values2` argument is provided,
36
- the two empirical CDFs are overlaid with the distribution of `values` on top
37
- (in a darker shade) and confidence bands in a more transparent shade. Optionally, the difference
38
- between the two empirical CDFs can be computed, and the PIT for a single dataset or a comparison
39
- between two samples.
47
+ Plots of the empirical cumulative distribution function (ECDF) of an array. Optionally, A `cdf`
48
+ argument representing a reference CDF may be provided for comparison using a difference ECDF
49
+ plot and/or confidence bands.
50
+
51
+ Alternatively, the PIT for a single dataset may be visualized.
40
52
 
41
53
  Notes
42
54
  -----
@@ -47,26 +59,40 @@ def plot_ecdf(
47
59
  values : array-like
48
60
  Values to plot from an unknown continuous or discrete distribution.
49
61
  values2 : array-like, optional
50
- Values to compare to the original sample.
62
+ values to compare to the original sample.
63
+
64
+ .. deprecated:: 0.18.0
65
+ Instead use ``cdf=scipy.stats.ecdf(values2).cdf.evaluate``.
51
66
  cdf : callable, optional
52
67
  Cumulative distribution function of the distribution to compare the original sample.
53
68
  The function must take as input a numpy array of draws from the distribution.
54
69
  difference : bool, default False
55
70
  If True then plot ECDF-difference plot otherwise ECDF plot.
56
- pit : bool, default False
57
- If True plots the ECDF or ECDF-diff of PIT of sample.
58
- confidence_bands : bool, default None
59
- If True plots the simultaneous or pointwise confidence bands with `1 - fpr`
60
- confidence level.
61
- pointwise : bool, default False
62
- If True plots pointwise confidence bands otherwise simultaneous bands.
63
- npoints : int, default 100
64
- This denotes the granularity size of our plot i.e the number of evaluation points
65
- for the ecdf or ecdf-difference plots.
71
+ confidence_bands : str or bool
72
+
73
+ - False: No confidence bands are plotted (default).
74
+ - True: Plot bands computed with the default algorithm (subject to change)
75
+ - "pointwise": Compute the pointwise (i.e. marginal) confidence band.
76
+ - "optimized": Use optimization to estimate a simultaneous confidence band.
77
+ - "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence
78
+ band.
79
+
80
+ For simultaneous confidence bands to be correctly calibrated, provide `eval_points` that
81
+ are not dependent on the `values`.
82
+ ci_prob : float, default 0.94
83
+ The probability that the true ECDF lies within the confidence band. If `confidence_bands`
84
+ is "pointwise", this is the marginal probability instead of the joint probability.
85
+ eval_points : array-like, optional
86
+ The points at which to evaluate the ECDF. If None, `npoints` uniformly spaced points
87
+ between the data bounds will be used.
88
+ rvs: callable, optional
89
+ A function that takes an integer `ndraws` and optionally the object passed to
90
+ `random_state` and returns an array of `ndraws` samples from the same distribution
91
+ as the original dataset. Required if `method` is "simulated" and variable is discrete.
92
+ random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
66
93
  num_trials : int, default 500
67
- The number of random ECDFs to generate for constructing simultaneous confidence bands.
68
- fpr : float, default 0.05
69
- The type I error rate s.t `1 - fpr` denotes the confidence level of bands.
94
+ The number of random ECDFs to generate for constructing simultaneous confidence bands
95
+ (if `confidence_bands` is "simulated").
70
96
  figsize : (float,float), optional
71
97
  Figure size. If `None` it will be defined automatically.
72
98
  fill_band : bool, default True
@@ -91,6 +117,26 @@ def plot_ecdf(
91
117
  These are kwargs specific to the backend being used, passed to
92
118
  :func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`.
93
119
  For additional documentation check the plotting method of the backend.
120
+ npoints : int, default 100
121
+ The number of evaluation points for the ecdf or ecdf-difference plots, if `eval_points` is
122
+ not provided or `pit` is `True`.
123
+
124
+ .. deprecated:: 0.18.0
125
+ Instead specify ``eval_points=np.linspace(np.min(values), np.max(values), npoints)``
126
+ unless `pit` is `True`.
127
+ pointwise : bool, default False
128
+
129
+ .. deprecated:: 0.18.0
130
+ Instead use `confidence_bands="pointwise"`.
131
+ fpr : float, optional
132
+
133
+ .. deprecated:: 0.18.0
134
+ Instead use `ci_prob=1-fpr`.
135
+ pit : bool, default False
136
+ If True plots the ECDF or ECDF-diff of PIT of sample.
137
+
138
+ .. deprecated:: 0.18.0
139
+ See below example instead.
94
140
 
95
141
  Returns
96
142
  -------
@@ -98,135 +144,213 @@ def plot_ecdf(
98
144
 
99
145
  References
100
146
  ----------
101
- .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A., 2021. Graphical Test for
147
+ .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A. (2022). Graphical Test for
102
148
  Discrete Uniformity and its Applications in Goodness of Fit Evaluation and
103
- Multiple Sample Comparison. arXiv preprint arXiv:2103.10522.
149
+ Multiple Sample Comparison. Statistics and Computing, 32(32).
104
150
 
105
151
  Examples
106
152
  --------
107
- Plot ecdf plot for a given sample
153
+ In a future release, the default behaviour of ``plot_ecdf`` will change.
154
+ To maintain the original behaviour you should do:
108
155
 
109
156
  .. plot::
110
157
  :context: close-figs
111
158
 
112
159
  >>> import arviz as az
113
- >>> from scipy.stats import uniform, binom, norm
114
-
160
+ >>> import numpy as np
161
+ >>> from scipy.stats import uniform, norm
162
+ >>>
115
163
  >>> sample = norm(0,1).rvs(1000)
116
- >>> az.plot_ecdf(sample)
164
+ >>> npoints = 100
165
+ >>> az.plot_ecdf(sample, eval_points=np.linspace(sample.min(), sample.max(), npoints))
117
166
 
118
- Plot ecdf plot with confidence bands for comparing a given sample w.r.t a given distribution
167
+ However, seeing this warning isn't an indicator of anything being wrong,
168
+ if you are happy to get different behaviour as ArviZ improves and adds
169
+ new algorithms you can ignore it like so:
119
170
 
120
171
  .. plot::
121
172
  :context: close-figs
122
173
 
123
- >>> distribution = norm(0,1)
124
- >>> az.plot_ecdf(sample, cdf = distribution.cdf, confidence_bands = True)
174
+ >>> import warnings
175
+ >>> warnings.filterwarnings("ignore", category=az.utils.BehaviourChangeWarning)
125
176
 
126
- Plot ecdf-difference plot with confidence bands for comparing a given sample
127
- w.r.t a given distribution
177
+ Plot an ECDF plot for a given sample evaluated at the sample points. This will become
178
+ the new behaviour when `eval_points` is not provided:
128
179
 
129
180
  .. plot::
130
181
  :context: close-figs
131
182
 
132
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
133
- >>> confidence_bands = True, difference = True)
183
+ >>> az.plot_ecdf(sample, eval_points=np.unique(sample))
134
184
 
135
- Plot ecdf plot with confidence bands for PIT of sample for comparing a given sample
136
- w.r.t a given distribution
185
+ Plot an ECDF plot with confidence bands for comparing a given sample to a given distribution.
186
+ We manually specify evaluation points independent of the values so that the confidence bands
187
+ are correctly calibrated.
137
188
 
138
189
  .. plot::
139
190
  :context: close-figs
140
191
 
141
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
142
- >>> confidence_bands = True, pit = True)
192
+ >>> distribution = norm(0,1)
193
+ >>> eval_points = np.linspace(*distribution.ppf([0.001, 0.999]), 100)
194
+ >>> az.plot_ecdf(
195
+ >>> sample, eval_points=eval_points,
196
+ >>> cdf=distribution.cdf, confidence_bands=True
197
+ >>> )
143
198
 
144
- Plot ecdf-difference plot with confidence bands for PIT of sample for comparing a given
145
- sample w.r.t a given distribution
199
+ Plot an ECDF-difference plot with confidence bands for comparing a given sample
200
+ to a given distribution.
146
201
 
147
202
  .. plot::
148
203
  :context: close-figs
149
204
 
150
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
151
- >>> confidence_bands = True, difference = True, pit = True)
205
+ >>> az.plot_ecdf(
206
+ >>> sample, cdf=distribution.cdf,
207
+ >>> confidence_bands=True, difference=True
208
+ >>> )
152
209
 
153
- You could also plot the above w.r.t another sample rather than a given distribution.
154
- For eg: Plot ecdf-difference plot with confidence bands for PIT of sample for
155
- comparing a given sample w.r.t a given sample
210
+ Plot an ECDF plot with confidence bands for the probability integral transform (PIT) of a
211
+ continuous sample. If drawn from the reference distribution, the PIT values should be uniformly
212
+ distributed.
156
213
 
157
214
  .. plot::
158
215
  :context: close-figs
159
216
 
160
- >>> sample2 = norm(0,1).rvs(5000)
161
- >>> az.plot_ecdf(sample, sample2, confidence_bands = True, difference = True, pit = True)
162
-
163
- """
164
- if confidence_bands is None:
165
- confidence_bands = (values2 is not None) or (cdf is not None)
166
-
167
- if values2 is None and cdf is None and confidence_bands is True:
168
- raise ValueError("For confidence bands you need to specify values2 or the cdf")
217
+ >>> pit_vals = distribution.cdf(sample)
218
+ >>> uniform_dist = uniform(0, 1)
219
+ >>> az.plot_ecdf(
220
+ >>> pit_vals, cdf=uniform_dist.cdf, confidence_bands=True,
221
+ >>> )
169
222
 
170
- if cdf is not None and values2 is not None:
171
- raise ValueError("To compare sample you need either cdf or values2 and not both")
223
+ Plot an ECDF-difference plot of PIT values.
172
224
 
173
- if values2 is None and cdf is None and pit is True:
174
- raise ValueError("For PIT specify either cdf or values2")
225
+ .. plot::
226
+ :context: close-figs
175
227
 
176
- if values2 is None and cdf is None and difference is True:
177
- raise ValueError("For ECDF difference plot need either cdf or values2")
228
+ >>> az.plot_ecdf(
229
+ >>> pit_vals, cdf = uniform_dist.cdf, confidence_bands = True,
230
+ >>> difference = True
231
+ >>> )
232
+ """
233
+ if confidence_bands is True:
234
+ if pointwise:
235
+ warnings.warn(
236
+ "`pointwise` has been deprecated. Use `confidence_bands='pointwise'` instead.",
237
+ FutureWarning,
238
+ )
239
+ confidence_bands = "pointwise"
240
+ else:
241
+ confidence_bands = "auto"
242
+ # if pointwise specified, confidence_bands must be a bool or 'pointwise'
243
+ elif confidence_bands not in [False, "pointwise"] and pointwise:
244
+ raise ValueError(
245
+ f"Cannot specify both `confidence_bands='{confidence_bands}'` and `pointwise=True`"
246
+ )
247
+
248
+ if fpr is not None:
249
+ warnings.warn(
250
+ "`fpr` has been deprecated. Use `ci_prob=1-fpr` or set `rcParam['stats.ci_prob']` to"
251
+ "`1-fpr`.",
252
+ FutureWarning,
253
+ )
254
+ if ci_prob is not None:
255
+ raise ValueError("Cannot specify both `fpr` and `ci_prob`")
256
+ ci_prob = 1 - fpr
257
+
258
+ if ci_prob is None:
259
+ ci_prob = rcParams["stats.ci_prob"]
178
260
 
179
261
  if values2 is not None:
180
- values2 = np.ravel(values2)
181
- values2.sort()
262
+ if cdf is not None:
263
+ raise ValueError("You cannot specify both `values2` and `cdf`")
264
+ if scipy_ecdf is None:
265
+ raise ValueError(
266
+ "The `values2` argument is deprecated and `scipy.stats.ecdf` is not available. "
267
+ "Please use `cdf` instead."
268
+ )
269
+ warnings.warn(
270
+ "`values2` has been deprecated. Use `cdf=scipy.stats.ecdf(values2).cdf.evaluate` "
271
+ "instead.",
272
+ FutureWarning,
273
+ )
274
+ cdf = scipy_ecdf(np.ravel(values2)).cdf.evaluate
275
+
276
+ if cdf is None:
277
+ if confidence_bands:
278
+ raise ValueError("For confidence bands you must specify cdf")
279
+ if difference is True:
280
+ raise ValueError("For ECDF difference plot you must specify cdf")
281
+ if pit:
282
+ raise ValueError("For PIT plot you must specify cdf")
182
283
 
183
284
  values = np.ravel(values)
184
285
  values.sort()
185
286
 
186
287
  if pit:
187
- eval_points = np.linspace(1 / npoints, 1, npoints)
188
- if cdf:
189
- sample = cdf(values)
190
- else:
191
- sample = compute_ecdf(values2, values) / len(values2)
192
- cdf_at_eval_points = eval_points
288
+ warnings.warn(
289
+ "`pit` has been deprecated. Specify `values=cdf(values)` instead.",
290
+ FutureWarning,
291
+ )
292
+ values = cdf(values)
293
+ cdf = uniform(0, 1).cdf
193
294
  rvs = uniform(0, 1).rvs
194
- else:
295
+ eval_points = np.linspace(1 / npoints, 1, npoints)
296
+
297
+ if eval_points is None:
298
+ warnings.warn(
299
+ "In future versions, if `eval_points` is not provided, then the ECDF will be evaluated"
300
+ " at the unique values of the sample. To keep the current behavior, provide "
301
+ "`eval_points` explicitly.",
302
+ BehaviourChangeWarning,
303
+ )
304
+ if confidence_bands in ["optimized", "simulated"]:
305
+ warnings.warn(
306
+ "For simultaneous bands to be correctly calibrated, specify `eval_points` "
307
+ "independent of the `values`"
308
+ )
195
309
  eval_points = np.linspace(values[0], values[-1], npoints)
196
- sample = values
197
- if confidence_bands or difference:
198
- if cdf:
199
- cdf_at_eval_points = cdf(eval_points)
200
- else:
201
- cdf_at_eval_points = compute_ecdf(values2, eval_points)
202
- else:
203
- cdf_at_eval_points = np.zeros_like(eval_points)
204
- rvs = None
310
+ else:
311
+ eval_points = np.asarray(eval_points)
205
312
 
206
- x_coord, y_coord = _get_ecdf_points(sample, eval_points, difference)
313
+ if difference or confidence_bands:
314
+ cdf_at_eval_points = cdf(eval_points)
315
+ else:
316
+ cdf_at_eval_points = np.zeros_like(eval_points)
317
+
318
+ x_coord, y_coord = _get_ecdf_points(values, eval_points, difference)
207
319
 
208
320
  if difference:
209
321
  y_coord -= cdf_at_eval_points
210
322
 
211
323
  if confidence_bands:
212
324
  ndraws = len(values)
213
- band_kwargs = {"prob": 1 - fpr, "num_trials": num_trials, "rvs": rvs, "random_state": None}
214
- band_kwargs["method"] = "pointwise" if pointwise else "simulated"
215
- lower, higher = ecdf_confidence_band(ndraws, eval_points, cdf_at_eval_points, **band_kwargs)
325
+ if confidence_bands == "auto":
326
+ if ndraws < 200 or num_trials >= 250 * np.sqrt(ndraws):
327
+ confidence_bands = "optimized"
328
+ else:
329
+ confidence_bands = "simulated"
330
+ x_bands = eval_points
331
+ lower, higher = ecdf_confidence_band(
332
+ ndraws,
333
+ eval_points,
334
+ cdf_at_eval_points,
335
+ method=confidence_bands,
336
+ prob=ci_prob,
337
+ num_trials=num_trials,
338
+ rvs=rvs,
339
+ random_state=random_state,
340
+ )
216
341
 
217
342
  if difference:
218
343
  lower -= cdf_at_eval_points
219
344
  higher -= cdf_at_eval_points
220
345
  else:
221
- lower, higher = None, None
346
+ x_bands, lower, higher = None, None, None
222
347
 
223
348
  ecdf_plot_args = dict(
224
349
  x_coord=x_coord,
225
350
  y_coord=y_coord,
226
- x_bands=eval_points,
351
+ x_bands=x_bands,
227
352
  lower=lower,
228
353
  higher=higher,
229
- confidence_bands=confidence_bands,
230
354
  figsize=figsize,
231
355
  fill_band=fill_band,
232
356
  plot_kwargs=plot_kwargs,
arviz/plots/essplot.py CHANGED
@@ -138,9 +138,9 @@ def plot_ess(
138
138
 
139
139
  References
140
140
  ----------
141
- .. [1] Vehtari et al. (2019). Rank-normalization, folding, and
141
+ .. [1] Vehtari et al. (2021). Rank-normalization, folding, and
142
142
  localization: An improved Rhat for assessing convergence of
143
- MCMC https://arxiv.org/abs/1903.08008
143
+ MCMC. Bayesian analysis, 16(2):667-718.
144
144
 
145
145
  Examples
146
146
  --------
arviz/plots/forestplot.py CHANGED
@@ -55,8 +55,8 @@ def plot_forest(
55
55
  Specify the kind of plot:
56
56
 
57
57
  * The ``kind="forestplot"`` generates credible intervals, where the central points are the
58
- estimated posterior means, the thick lines are the central quartiles, and the thin lines
59
- represent the :math:`100\times`(`hdi_prob`)% highest density intervals.
58
+ estimated posterior median, the thick lines are the central quartiles, and the thin lines
59
+ represent the :math:`100\times(hdi\_prob)\%` highest density intervals.
60
60
  * The ``kind="ridgeplot"`` option generates density plots (kernel density estimate or
61
61
  histograms) in the same graph. Ridge plots can be configured to have different overlap,
62
62
  truncation bounds and quantile markers.
@@ -246,7 +246,7 @@ def plot_forest(
246
246
  width_ratios.append(1)
247
247
 
248
248
  if hdi_prob is None:
249
- hdi_prob = rcParams["stats.hdi_prob"]
249
+ hdi_prob = rcParams["stats.ci_prob"]
250
250
  elif not 1 >= hdi_prob > 0:
251
251
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
252
252
 
arviz/plots/hdiplot.py CHANGED
@@ -42,7 +42,7 @@ def plot_hdi(
42
42
  hdi_data : array_like, optional
43
43
  Precomputed HDI values to use. Assumed shape is ``(*x.shape, 2)``.
44
44
  hdi_prob : float, optional
45
- Probability for the highest density interval. Defaults to ``stats.hdi_prob`` rcParam.
45
+ Probability for the highest density interval. Defaults to ``stats.ci_prob`` rcParam.
46
46
  See :ref:`this section <common_ hdi_prob>` for usage examples.
47
47
  color : str, default "C1"
48
48
  Color used for the limits of the HDI and fill. Should be a valid matplotlib color.
@@ -155,7 +155,7 @@ def plot_hdi(
155
155
  else:
156
156
  y = np.asarray(y)
157
157
  if hdi_prob is None:
158
- hdi_prob = rcParams["stats.hdi_prob"]
158
+ hdi_prob = rcParams["stats.ci_prob"]
159
159
  elif not 1 >= hdi_prob > 0:
160
160
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
161
161
  hdi_data = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False, **hdi_kwargs)
arviz/plots/kdeplot.py CHANGED
@@ -72,7 +72,7 @@ def plot_kde(
72
72
  If True plot the 2D KDE using contours, otherwise plot a smooth 2D KDE.
73
73
  hdi_probs : list, optional
74
74
  Plots highest density credibility regions for the provided probabilities for a 2D KDE.
75
- Defaults to matplotlib chosen levels with no fixed probability associated.
75
+ Defaults to [0.5, 0.8, 0.94].
76
76
  fill_last : bool, default False
77
77
  If True fill the last contour of the 2D KDE plot.
78
78
  figsize : (float, float), optional
@@ -270,6 +270,9 @@ def plot_kde(
270
270
  gridsize = (128, 128) if contour else (256, 256)
271
271
  density, xmin, xmax, ymin, ymax = _fast_kde_2d(values, values2, gridsize=gridsize)
272
272
 
273
+ if hdi_probs is None:
274
+ hdi_probs = [0.5, 0.8, 0.94]
275
+
273
276
  if hdi_probs is not None:
274
277
  # Check hdi probs are within bounds (0, 1)
275
278
  if min(hdi_probs) <= 0 or max(hdi_probs) >= 1:
@@ -289,7 +292,11 @@ def plot_kde(
289
292
  "Using 'hdi_probs' in favor of 'levels'.",
290
293
  UserWarning,
291
294
  )
292
- contour_kwargs["levels"] = contour_level_list
295
+
296
+ if backend == "bokeh":
297
+ contour_kwargs["levels"] = contour_level_list
298
+ elif backend == "matplotlib":
299
+ contour_kwargs["levels"] = contour_level_list[1:]
293
300
 
294
301
  contourf_kwargs = _init_kwargs_dict(contourf_kwargs)
295
302
  if "levels" in contourf_kwargs:
arviz/plots/khatplot.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Pareto tail indices plot."""
2
2
 
3
3
  import logging
4
+ import warnings
4
5
 
5
6
  import numpy as np
6
7
  from xarray import DataArray
@@ -40,10 +41,8 @@ def plot_khat(
40
41
 
41
42
  Parameters
42
43
  ----------
43
- khats : ELPDData or array-like
44
- The input Pareto tail indices to be plotted. It can be an ``ELPDData`` object containing
45
- Pareto shapes or an array. In this second case, all the values in the array are interpreted
46
- as Pareto tail indices.
44
+ khats : ELPDData
45
+ The input Pareto tail indices to be plotted.
47
46
  color : str or array_like, default "C0"
48
47
  Colors of the scatter plot, if color is a str all dots will have the same color,
49
48
  if it is the size of the observations, each dot will have the specified color,
@@ -149,8 +148,9 @@ def plot_khat(
149
148
 
150
149
  References
151
150
  ----------
152
- .. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J.,
153
- 2019. Pareto Smoothed Importance Sampling. arXiv:1507.02646 [stat].
151
+ .. [1] Vehtari, A., Simpson, D., Gelman, A., Yao, Y., Gabry, J. (2024).
152
+ Pareto Smoothed Importance Sampling. Journal of Machine Learning
153
+ Research, 25(72):1-58.
154
154
 
155
155
  """
156
156
  if annotate:
@@ -164,13 +164,29 @@ def plot_khat(
164
164
  color = "C0"
165
165
 
166
166
  if isinstance(khats, np.ndarray):
167
+ warnings.warn(
168
+ "support for arrays will be deprecated, please use ELPDData."
169
+ "The reason for this, is that we need to know the numbers of draws"
170
+ "sampled from the posterior",
171
+ FutureWarning,
172
+ )
167
173
  khats = khats.flatten()
168
174
  xlabels = False
169
175
  legend = False
170
176
  dims = []
177
+ good_k = None
171
178
  else:
172
179
  if isinstance(khats, ELPDData):
180
+ good_k = khats.good_k
173
181
  khats = khats.pareto_k
182
+ else:
183
+ good_k = None
184
+ warnings.warn(
185
+ "support for DataArrays will be deprecated, please use ELPDData."
186
+ "The reason for this, is that we need to know the numbers of draws"
187
+ "sampled from the posterior",
188
+ FutureWarning,
189
+ )
174
190
  if not isinstance(khats, DataArray):
175
191
  raise ValueError("Incorrect khat data input. Check the documentation")
176
192
 
@@ -191,6 +207,7 @@ def plot_khat(
191
207
  figsize=figsize,
192
208
  xdata=xdata,
193
209
  khats=khats,
210
+ good_k=good_k,
194
211
  kwargs=kwargs,
195
212
  threshold=threshold,
196
213
  coord_labels=coord_labels,
arviz/plots/loopitplot.py CHANGED
@@ -55,7 +55,7 @@ def plot_loo_pit(
55
55
  In this case, instead of overlaying uniform distributions, the beta ``hdi_prob``
56
56
  around the theoretical uniform CDF is shown. This approximation only holds
57
57
  for large S and ECDF values not very close to 0 nor 1. For more information, see
58
- `Vehtari et al. (2019)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
58
+ `Vehtari et al. (2021)`, `Appendix G <https://avehtari.github.io/rhat_ess/rhat_ess.html>`_.
59
59
  ecdf_fill : bool, optional
60
60
  Use :meth:`matplotlib.axes.Axes.fill_between` to mark the area
61
61
  inside the credible interval. Otherwise, plot the
@@ -159,7 +159,7 @@ def plot_loo_pit(
159
159
  x_vals = None
160
160
 
161
161
  if hdi_prob is None:
162
- hdi_prob = rcParams["stats.hdi_prob"]
162
+ hdi_prob = rcParams["stats.ci_prob"]
163
163
  elif not 1 >= hdi_prob > 0:
164
164
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
165
165
 
arviz/plots/mcseplot.py CHANGED
@@ -109,7 +109,9 @@ def plot_mcse(
109
109
 
110
110
  References
111
111
  ----------
112
- * Vehtari et al. (2019) see https://arxiv.org/abs/1903.08008
112
+ .. [1] Vehtari et al. (2021). Rank-normalization, folding, and
113
+ localization: An improved Rhat for assessing convergence of
114
+ MCMC. Bayesian analysis, 16(2):667-718.
113
115
 
114
116
  Examples
115
117
  --------
arviz/plots/plot_utils.py CHANGED
@@ -245,10 +245,8 @@ def format_coords_as_labels(dataarray, skip_dims=None):
245
245
  coord_labels = coord_labels.values
246
246
  if isinstance(coord_labels[0], tuple):
247
247
  fmt = ", ".join(["{}" for _ in coord_labels[0]])
248
- coord_labels[:] = [fmt.format(*x) for x in coord_labels]
249
- else:
250
- coord_labels[:] = [f"{s}" for s in coord_labels]
251
- return coord_labels
248
+ return np.array([fmt.format(*x) for x in coord_labels])
249
+ return np.array([f"{s}" for s in coord_labels])
252
250
 
253
251
 
254
252
  def set_xticklabels(ax, coord_labels):
@@ -237,7 +237,7 @@ def plot_posterior(
237
237
  labeller = BaseLabeller()
238
238
 
239
239
  if hdi_prob is None:
240
- hdi_prob = rcParams["stats.hdi_prob"]
240
+ hdi_prob = rcParams["stats.ci_prob"]
241
241
  elif hdi_prob not in (None, "hide"):
242
242
  if not 1 >= hdi_prob > 0:
243
243
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
arviz/plots/rankplot.py CHANGED
@@ -46,8 +46,8 @@ def plot_rank(
46
46
  indicates good mixing of the chains.
47
47
 
48
48
  This plot was introduced by Aki Vehtari, Andrew Gelman, Daniel Simpson, Bob Carpenter,
49
- Paul-Christian Burkner (2019): Rank-normalization, folding, and localization: An improved R-hat
50
- for assessing convergence of MCMC. arXiv preprint https://arxiv.org/abs/1903.08008
49
+ Paul-Christian Burkner (2021): Rank-normalization, folding, and localization:
50
+ An improved R-hat for assessing convergence of MCMC. Bayesian analysis, 16(2):667-718.
51
51
 
52
52
 
53
53
  Parameters
arviz/plots/violinplot.py CHANGED
@@ -152,7 +152,7 @@ def plot_violin(
152
152
  rows, cols = default_grid(len(plotters), grid=grid)
153
153
 
154
154
  if hdi_prob is None:
155
- hdi_prob = rcParams["stats.hdi_prob"]
155
+ hdi_prob = rcParams["stats.ci_prob"]
156
156
  elif not 1 >= hdi_prob > 0:
157
157
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
158
158