arviz 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. arviz/__init__.py +2 -1
  2. arviz/data/io_cmdstan.py +4 -0
  3. arviz/data/io_numpyro.py +1 -1
  4. arviz/plots/backends/bokeh/ecdfplot.py +1 -2
  5. arviz/plots/backends/bokeh/khatplot.py +8 -3
  6. arviz/plots/backends/bokeh/pairplot.py +2 -6
  7. arviz/plots/backends/matplotlib/ecdfplot.py +1 -2
  8. arviz/plots/backends/matplotlib/khatplot.py +7 -3
  9. arviz/plots/backends/matplotlib/traceplot.py +1 -1
  10. arviz/plots/bpvplot.py +2 -2
  11. arviz/plots/densityplot.py +1 -1
  12. arviz/plots/dotplot.py +2 -2
  13. arviz/plots/ecdfplot.py +205 -89
  14. arviz/plots/essplot.py +2 -2
  15. arviz/plots/forestplot.py +1 -1
  16. arviz/plots/hdiplot.py +2 -2
  17. arviz/plots/khatplot.py +23 -6
  18. arviz/plots/loopitplot.py +2 -2
  19. arviz/plots/mcseplot.py +3 -1
  20. arviz/plots/plot_utils.py +2 -4
  21. arviz/plots/posteriorplot.py +1 -1
  22. arviz/plots/rankplot.py +2 -2
  23. arviz/plots/violinplot.py +1 -1
  24. arviz/preview.py +17 -0
  25. arviz/rcparams.py +27 -2
  26. arviz/stats/diagnostics.py +13 -9
  27. arviz/stats/ecdf_utils.py +11 -8
  28. arviz/stats/stats.py +31 -16
  29. arviz/stats/stats_utils.py +8 -6
  30. arviz/tests/base_tests/test_data.py +1 -2
  31. arviz/tests/base_tests/test_data_zarr.py +0 -1
  32. arviz/tests/base_tests/test_diagnostics_numba.py +2 -7
  33. arviz/tests/base_tests/test_helpers.py +2 -2
  34. arviz/tests/base_tests/test_plot_utils.py +5 -13
  35. arviz/tests/base_tests/test_plots_matplotlib.py +92 -2
  36. arviz/tests/base_tests/test_rcparams.py +12 -0
  37. arviz/tests/base_tests/test_stats.py +1 -1
  38. arviz/tests/base_tests/test_stats_numba.py +2 -7
  39. arviz/tests/base_tests/test_utils_numba.py +2 -5
  40. arviz/tests/external_tests/test_data_pystan.py +5 -5
  41. arviz/tests/helpers.py +17 -9
  42. arviz/utils.py +4 -0
  43. {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/METADATA +8 -4
  44. {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/RECORD +47 -46
  45. {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/LICENSE +0 -0
  46. {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/WHEEL +0 -0
  47. {arviz-0.18.0.dist-info → arviz-0.19.0.dist-info}/top_level.txt +0 -0
arviz/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # pylint: disable=wildcard-import,invalid-name,wrong-import-position
2
2
  """ArviZ is a library for exploratory analysis of Bayesian models."""
3
- __version__ = "0.18.0"
3
+ __version__ = "0.19.0"
4
4
 
5
5
  import logging
6
6
  import os
@@ -37,6 +37,7 @@ from .stats import *
37
37
  from .rcparams import rc_context, rcParams
38
38
  from .utils import Numba, Dask, interactive_backend
39
39
  from .wrappers import *
40
+ from . import preview
40
41
 
41
42
  # add ArviZ's styles to matplotlib's styles
42
43
  _arviz_style_path = os.path.join(os.path.dirname(__file__), "plots", "styles")
arviz/data/io_cmdstan.py CHANGED
@@ -738,6 +738,7 @@ def _process_configuration(comments):
738
738
  elif "=" in comment:
739
739
  match_int = re.search(r"^(\S+)\s*=\s*([-+]?[0-9]+)$", comment)
740
740
  match_float = re.search(r"^(\S+)\s*=\s*([-+]?[0-9]+\.[0-9]+)$", comment)
741
+ match_str_bool = re.search(r"^(\S+)\s*=\s*(true|false)$", comment)
741
742
  match_str = re.search(r"^(\S+)\s*=\s*(\S+)$", comment)
742
743
  match_empty = re.search(r"^(\S+)\s*=\s*$", comment)
743
744
  if match_int:
@@ -746,6 +747,9 @@ def _process_configuration(comments):
746
747
  elif match_float:
747
748
  key, value = match_float.group(1), match_float.group(2)
748
749
  results[key] = float(value)
750
+ elif match_str_bool:
751
+ key, value = match_str_bool.group(1), match_str_bool.group(2)
752
+ results[key] = int(value == "true")
749
753
  elif match_str:
750
754
  key, value = match_str.group(1), match_str.group(2)
751
755
  results[key] = value
arviz/data/io_numpyro.py CHANGED
@@ -194,7 +194,7 @@ class NumPyroConverter:
194
194
  )
195
195
  for obs_name, log_like in log_likelihood_dict.items():
196
196
  shape = (self.nchains, self.ndraws) + log_like.shape[1:]
197
- data[obs_name] = np.reshape(log_like.copy(), shape)
197
+ data[obs_name] = np.reshape(np.asarray(log_like), shape)
198
198
  return dict_to_dataset(
199
199
  data,
200
200
  library=self.numpyro,
@@ -13,7 +13,6 @@ def plot_ecdf(
13
13
  x_bands,
14
14
  lower,
15
15
  higher,
16
- confidence_bands,
17
16
  plot_kwargs,
18
17
  fill_kwargs,
19
18
  plot_outline_kwargs,
@@ -58,7 +57,7 @@ def plot_ecdf(
58
57
  plot_outline_kwargs.setdefault("color", to_hex("C0"))
59
58
  plot_outline_kwargs.setdefault("alpha", 0.2)
60
59
 
61
- if confidence_bands:
60
+ if x_bands is not None:
62
61
  ax.step(x_coord, y_coord, **plot_kwargs)
63
62
 
64
63
  if fill_band:
@@ -21,6 +21,7 @@ def plot_khat(
21
21
  figsize,
22
22
  xdata,
23
23
  khats,
24
+ good_k,
24
25
  kwargs,
25
26
  threshold,
26
27
  coord_labels,
@@ -53,7 +54,11 @@ def plot_khat(
53
54
 
54
55
  if hlines_kwargs is None:
55
56
  hlines_kwargs = {}
56
- hlines_kwargs.setdefault("hlines", [0, 0.5, 0.7, 1])
57
+
58
+ if good_k is None:
59
+ good_k = 0.7
60
+
61
+ hlines_kwargs.setdefault("hlines", [0, good_k, 1])
57
62
 
58
63
  cmap = None
59
64
  if isinstance(color, str):
@@ -75,7 +80,7 @@ def plot_khat(
75
80
  rgba_c = cmap(color)
76
81
 
77
82
  khats = khats if isinstance(khats, np.ndarray) else khats.values.flatten()
78
- alphas = 0.5 + 0.2 * (khats > 0.5) + 0.3 * (khats > 1)
83
+ alphas = 0.5 + 0.2 * (khats > good_k) + 0.3 * (khats > 1)
79
84
 
80
85
  rgba_c = vectorized_to_hex(rgba_c)
81
86
 
@@ -130,7 +135,7 @@ def plot_khat(
130
135
  xmax = len(khats)
131
136
 
132
137
  if show_bins:
133
- bin_edges = np.array([ymin, 0.5, 0.7, 1, ymax])
138
+ bin_edges = np.array([ymin, good_k, 1, ymax])
134
139
  bin_edges = bin_edges[(bin_edges >= ymin) & (bin_edges <= ymax)]
135
140
  hist, _, _ = histogram(khats, bin_edges)
136
141
  for idx, count in enumerate(hist):
@@ -174,12 +174,8 @@ def plot_pair(
174
174
  source = ColumnDataSource(data=source_dict)
175
175
 
176
176
  if divergences:
177
- source_nondiv = CDSView(
178
- source=source, filters=[GroupFilter(column_name=divergenve_name, group="0")]
179
- )
180
- source_div = CDSView(
181
- source=source, filters=[GroupFilter(column_name=divergenve_name, group="1")]
182
- )
177
+ source_nondiv = CDSView(filter=GroupFilter(column_name=divergenve_name, group="0"))
178
+ source_div = CDSView(filter=GroupFilter(column_name=divergenve_name, group="1"))
183
179
 
184
180
  def get_width_and_height(jointplot, rotate):
185
181
  """Compute subplots dimensions for two or more variables."""
@@ -13,7 +13,6 @@ def plot_ecdf(
13
13
  x_bands,
14
14
  lower,
15
15
  higher,
16
- confidence_bands,
17
16
  plot_kwargs,
18
17
  fill_kwargs,
19
18
  plot_outline_kwargs,
@@ -59,7 +58,7 @@ def plot_ecdf(
59
58
 
60
59
  ax.step(x_coord, y_coord, **plot_kwargs)
61
60
 
62
- if confidence_bands:
61
+ if x_bands is not None:
63
62
  if fill_band:
64
63
  ax.fill_between(x_bands, lower, higher, **fill_kwargs)
65
64
  else:
@@ -20,6 +20,7 @@ def plot_khat(
20
20
  figsize,
21
21
  xdata,
22
22
  khats,
23
+ good_k,
23
24
  kwargs,
24
25
  threshold,
25
26
  coord_labels,
@@ -61,8 +62,11 @@ def plot_khat(
61
62
  backend_kwargs.setdefault("figsize", figsize)
62
63
  backend_kwargs["squeeze"] = True
63
64
 
65
+ if good_k is None:
66
+ good_k = 0.7
67
+
64
68
  hlines_kwargs = matplotlib_kwarg_dealiaser(hlines_kwargs, "hlines")
65
- hlines_kwargs.setdefault("hlines", [0, 0.5, 0.7, 1])
69
+ hlines_kwargs.setdefault("hlines", [0, good_k, 1])
66
70
  hlines_kwargs.setdefault("linestyle", [":", "-.", "--", "-"])
67
71
  hlines_kwargs.setdefault("alpha", 0.7)
68
72
  hlines_kwargs.setdefault("zorder", -1)
@@ -102,7 +106,7 @@ def plot_khat(
102
106
  rgba_c = cmap(norm_fun(color))
103
107
 
104
108
  khats = khats if isinstance(khats, np.ndarray) else khats.values.flatten()
105
- alphas = 0.5 + 0.2 * (khats > 0.5) + 0.3 * (khats > 1)
109
+ alphas = 0.5 + 0.2 * (khats > good_k) + 0.3 * (khats > 1)
106
110
  rgba_c[:, 3] = alphas
107
111
  rgba_c = vectorized_to_hex(rgba_c)
108
112
  kwargs["c"] = rgba_c
@@ -151,7 +155,7 @@ def plot_khat(
151
155
  )
152
156
 
153
157
  if show_bins:
154
- bin_edges = np.array([ymin, 0.5, 0.7, 1, ymax])
158
+ bin_edges = np.array([ymin, good_k, 1, ymax])
155
159
  bin_edges = bin_edges[(bin_edges >= ymin) & (bin_edges <= ymax)]
156
160
  hist, _, _ = histogram(khats, bin_edges)
157
161
  for idx, count in enumerate(hist):
@@ -440,7 +440,7 @@ def plot_trace(
440
440
  [], [], label="combined", **dealiase_sel_kwargs(plot_kwargs, chain_prop, -1)
441
441
  ),
442
442
  )
443
- ax.figure.axes[0].legend(handles=handles, title="chain", loc="upper right")
443
+ ax.figure.axes[1].legend(handles=handles, title="chain", loc="upper right")
444
444
 
445
445
  if axes is None:
446
446
  axes = np.array(ax.figure.axes).reshape(-1, 2)
arviz/plots/bpvplot.py CHANGED
@@ -80,7 +80,7 @@ def plot_bpv(
80
80
  hdi_prob : float, optional
81
81
  Probability for the highest density interval for the analytical reference distribution when
82
82
  ``kind=u_values``. Should be in the interval (0, 1]. Defaults to the
83
- rcParam ``stats.hdi_prob``. See :ref:`this section <common_hdi_prob>` for usage examples.
83
+ rcParam ``stats.ci_prob``. See :ref:`this section <common_hdi_prob>` for usage examples.
84
84
  color : str, optional
85
85
  Matplotlib color
86
86
  grid : tuple, optional
@@ -202,7 +202,7 @@ def plot_bpv(
202
202
  raise TypeError("`reference` argument must be either `analytical`, `samples`, or `None`")
203
203
 
204
204
  if hdi_prob is None:
205
- hdi_prob = rcParams["stats.hdi_prob"]
205
+ hdi_prob = rcParams["stats.ci_prob"]
206
206
  elif not 1 >= hdi_prob > 0:
207
207
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
208
208
 
@@ -209,7 +209,7 @@ def plot_density(
209
209
  )
210
210
 
211
211
  if hdi_prob is None:
212
- hdi_prob = rcParams["stats.hdi_prob"]
212
+ hdi_prob = rcParams["stats.ci_prob"]
213
213
  elif not 1 >= hdi_prob > 0:
214
214
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
215
215
 
arviz/plots/dotplot.py CHANGED
@@ -67,7 +67,7 @@ def plot_dot(
67
67
  The shape of the marker. Valid for matplotlib backend.
68
68
  hdi_prob : float, optional
69
69
  Valid only when point_interval is True. Plots HDI for chosen percentage of density.
70
- Defaults to ``stats.hdi_prob`` rcParam. See :ref:`this section <common_hdi_prob>`
70
+ Defaults to ``stats.ci_prob`` rcParam. See :ref:`this section <common_hdi_prob>`
71
71
  for usage examples.
72
72
  rotated : bool, default False
73
73
  Whether to rotate the dot plot by 90 degrees.
@@ -151,7 +151,7 @@ def plot_dot(
151
151
  values.sort()
152
152
 
153
153
  if hdi_prob is None:
154
- hdi_prob = rcParams["stats.hdi_prob"]
154
+ hdi_prob = rcParams["stats.ci_prob"]
155
155
  elif not 1 >= hdi_prob > 0:
156
156
  raise ValueError("The value of hdi_prob should be in the interval (0, 1]")
157
157
 
arviz/plots/ecdfplot.py CHANGED
@@ -1,24 +1,32 @@
1
1
  """Plot ecdf or ecdf-difference plot with confidence bands."""
2
2
 
3
+ import warnings
4
+
3
5
  import numpy as np
4
6
  from scipy.stats import uniform
5
7
 
8
+ try:
9
+ from scipy.stats import ecdf as scipy_ecdf
10
+ except ImportError:
11
+ scipy_ecdf = None
12
+
6
13
  from ..rcparams import rcParams
7
- from ..stats.ecdf_utils import compute_ecdf, ecdf_confidence_band, _get_ecdf_points
14
+ from ..stats.ecdf_utils import ecdf_confidence_band, _get_ecdf_points
15
+ from ..utils import BehaviourChangeWarning
8
16
  from .plot_utils import get_plotting_function
9
17
 
10
18
 
11
19
  def plot_ecdf(
12
20
  values,
13
21
  values2=None,
22
+ eval_points=None,
14
23
  cdf=None,
15
24
  difference=False,
16
- pit=False,
17
- confidence_bands=None,
18
- pointwise=False,
19
- npoints=100,
25
+ confidence_bands=False,
26
+ ci_prob=None,
20
27
  num_trials=500,
21
- fpr=0.05,
28
+ rvs=None,
29
+ random_state=None,
22
30
  figsize=None,
23
31
  fill_band=True,
24
32
  plot_kwargs=None,
@@ -28,15 +36,19 @@ def plot_ecdf(
28
36
  show=None,
29
37
  backend=None,
30
38
  backend_kwargs=None,
39
+ npoints=100,
40
+ pointwise=False,
41
+ fpr=None,
42
+ pit=False,
31
43
  **kwargs,
32
44
  ):
33
45
  r"""Plot ECDF or ECDF-Difference Plot with Confidence bands.
34
46
 
35
- Plots of the empirical CDF estimates of an array. When `values2` argument is provided,
36
- the two empirical CDFs are overlaid with the distribution of `values` on top
37
- (in a darker shade) and confidence bands in a more transparent shade. Optionally, the difference
38
- between the two empirical CDFs can be computed, and the PIT for a single dataset or a comparison
39
- between two samples.
47
+ Plots of the empirical cumulative distribution function (ECDF) of an array. Optionally, A `cdf`
48
+ argument representing a reference CDF may be provided for comparison using a difference ECDF
49
+ plot and/or confidence bands.
50
+
51
+ Alternatively, the PIT for a single dataset may be visualized.
40
52
 
41
53
  Notes
42
54
  -----
@@ -47,26 +59,39 @@ def plot_ecdf(
47
59
  values : array-like
48
60
  Values to plot from an unknown continuous or discrete distribution.
49
61
  values2 : array-like, optional
50
- Values to compare to the original sample.
62
+ values to compare to the original sample.
63
+
64
+ .. deprecated:: 0.18.0
65
+ Instead use ``cdf=scipy.stats.ecdf(values2).cdf.evaluate``.
51
66
  cdf : callable, optional
52
67
  Cumulative distribution function of the distribution to compare the original sample.
53
68
  The function must take as input a numpy array of draws from the distribution.
54
69
  difference : bool, default False
55
70
  If True then plot ECDF-difference plot otherwise ECDF plot.
56
- pit : bool, default False
57
- If True plots the ECDF or ECDF-diff of PIT of sample.
58
- confidence_bands : bool, default None
59
- If True plots the simultaneous or pointwise confidence bands with `1 - fpr`
60
- confidence level.
61
- pointwise : bool, default False
62
- If True plots pointwise confidence bands otherwise simultaneous bands.
63
- npoints : int, default 100
64
- This denotes the granularity size of our plot i.e the number of evaluation points
65
- for the ecdf or ecdf-difference plots.
71
+ confidence_bands : str or bool
72
+
73
+ - False: No confidence bands are plotted (default).
74
+ - True: Plot bands computed with the default algorithm (subject to change)
75
+ - "pointwise": Compute the pointwise (i.e. marginal) confidence band.
76
+ - "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence
77
+ band.
78
+
79
+ For simultaneous confidence bands to be correctly calibrated, provide `eval_points` that
80
+ are not dependent on the `values`.
81
+ ci_prob : float, default 0.94
82
+ The probability that the true ECDF lies within the confidence band. If `confidence_bands`
83
+ is "pointwise", this is the marginal probability instead of the joint probability.
84
+ eval_points : array-like, optional
85
+ The points at which to evaluate the ECDF. If None, `npoints` uniformly spaced points
86
+ between the data bounds will be used.
87
+ rvs: callable, optional
88
+ A function that takes an integer `ndraws` and optionally the object passed to
89
+ `random_state` and returns an array of `ndraws` samples from the same distribution
90
+ as the original dataset. Required if `method` is "simulated" and variable is discrete.
91
+ random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
66
92
  num_trials : int, default 500
67
- The number of random ECDFs to generate for constructing simultaneous confidence bands.
68
- fpr : float, default 0.05
69
- The type I error rate s.t `1 - fpr` denotes the confidence level of bands.
93
+ The number of random ECDFs to generate for constructing simultaneous confidence bands
94
+ (if `confidence_bands` is "simulated").
70
95
  figsize : (float,float), optional
71
96
  Figure size. If `None` it will be defined automatically.
72
97
  fill_band : bool, default True
@@ -91,6 +116,26 @@ def plot_ecdf(
91
116
  These are kwargs specific to the backend being used, passed to
92
117
  :func:`matplotlib.pyplot.subplots` or :class:`bokeh.plotting.figure`.
93
118
  For additional documentation check the plotting method of the backend.
119
+ npoints : int, default 100
120
+ The number of evaluation points for the ecdf or ecdf-difference plots, if `eval_points` is
121
+ not provided or `pit` is `True`.
122
+
123
+ .. deprecated:: 0.18.0
124
+ Instead specify ``eval_points=np.linspace(np.min(values), np.max(values), npoints)``
125
+ unless `pit` is `True`.
126
+ pointwise : bool, default False
127
+
128
+ .. deprecated:: 0.18.0
129
+ Instead use `confidence_bands="pointwise"`.
130
+ fpr : float, optional
131
+
132
+ .. deprecated:: 0.18.0
133
+ Instead use `ci_prob=1-fpr`.
134
+ pit : bool, default False
135
+ If True plots the ECDF or ECDF-diff of PIT of sample.
136
+
137
+ .. deprecated:: 0.18.0
138
+ See below example instead.
94
139
 
95
140
  Returns
96
141
  -------
@@ -98,135 +143,206 @@ def plot_ecdf(
98
143
 
99
144
  References
100
145
  ----------
101
- .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A., 2021. Graphical Test for
146
+ .. [1] Säilynoja, T., Bürkner, P.C. and Vehtari, A. (2022). Graphical Test for
102
147
  Discrete Uniformity and its Applications in Goodness of Fit Evaluation and
103
- Multiple Sample Comparison. arXiv preprint arXiv:2103.10522.
148
+ Multiple Sample Comparison. Statistics and Computing, 32(32).
104
149
 
105
150
  Examples
106
151
  --------
107
- Plot ecdf plot for a given sample
152
+ In a future release, the default behaviour of ``plot_ecdf`` will change.
153
+ To maintain the original behaviour you should do:
108
154
 
109
155
  .. plot::
110
156
  :context: close-figs
111
157
 
112
158
  >>> import arviz as az
113
- >>> from scipy.stats import uniform, binom, norm
114
-
159
+ >>> import numpy as np
160
+ >>> from scipy.stats import uniform, norm
161
+ >>>
115
162
  >>> sample = norm(0,1).rvs(1000)
116
- >>> az.plot_ecdf(sample)
163
+ >>> npoints = 100
164
+ >>> az.plot_ecdf(sample, eval_points=np.linspace(sample.min(), sample.max(), npoints))
117
165
 
118
- Plot ecdf plot with confidence bands for comparing a given sample w.r.t a given distribution
166
+ However, seeing this warning isn't an indicator of anything being wrong,
167
+ if you are happy to get different behaviour as ArviZ improves and adds
168
+ new algorithms you can ignore it like so:
119
169
 
120
170
  .. plot::
121
171
  :context: close-figs
122
172
 
123
- >>> distribution = norm(0,1)
124
- >>> az.plot_ecdf(sample, cdf = distribution.cdf, confidence_bands = True)
173
+ >>> import warnings
174
+ >>> warnings.filterwarnings("ignore", category=az.utils.BehaviourChangeWarning)
125
175
 
126
- Plot ecdf-difference plot with confidence bands for comparing a given sample
127
- w.r.t a given distribution
176
+ Plot an ECDF plot for a given sample evaluated at the sample points. This will become
177
+ the new behaviour when `eval_points` is not provided:
128
178
 
129
179
  .. plot::
130
180
  :context: close-figs
131
181
 
132
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
133
- >>> confidence_bands = True, difference = True)
182
+ >>> az.plot_ecdf(sample, eval_points=np.unique(sample))
134
183
 
135
- Plot ecdf plot with confidence bands for PIT of sample for comparing a given sample
136
- w.r.t a given distribution
184
+ Plot an ECDF plot with confidence bands for comparing a given sample to a given distribution.
185
+ We manually specify evaluation points independent of the values so that the confidence bands
186
+ are correctly calibrated.
137
187
 
138
188
  .. plot::
139
189
  :context: close-figs
140
190
 
141
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
142
- >>> confidence_bands = True, pit = True)
191
+ >>> distribution = norm(0,1)
192
+ >>> eval_points = np.linspace(*distribution.ppf([0.001, 0.999]), 100)
193
+ >>> az.plot_ecdf(
194
+ >>> sample, eval_points=eval_points,
195
+ >>> cdf=distribution.cdf, confidence_bands=True
196
+ >>> )
143
197
 
144
- Plot ecdf-difference plot with confidence bands for PIT of sample for comparing a given
145
- sample w.r.t a given distribution
198
+ Plot an ECDF-difference plot with confidence bands for comparing a given sample
199
+ to a given distribution.
146
200
 
147
201
  .. plot::
148
202
  :context: close-figs
149
203
 
150
- >>> az.plot_ecdf(sample, cdf = distribution.cdf,
151
- >>> confidence_bands = True, difference = True, pit = True)
204
+ >>> az.plot_ecdf(
205
+ >>> sample, cdf=distribution.cdf,
206
+ >>> confidence_bands=True, difference=True
207
+ >>> )
152
208
 
153
- You could also plot the above w.r.t another sample rather than a given distribution.
154
- For eg: Plot ecdf-difference plot with confidence bands for PIT of sample for
155
- comparing a given sample w.r.t a given sample
209
+ Plot an ECDF plot with confidence bands for the probability integral transform (PIT) of a
210
+ continuous sample. If drawn from the reference distribution, the PIT values should be uniformly
211
+ distributed.
156
212
 
157
213
  .. plot::
158
214
  :context: close-figs
159
215
 
160
- >>> sample2 = norm(0,1).rvs(5000)
161
- >>> az.plot_ecdf(sample, sample2, confidence_bands = True, difference = True, pit = True)
162
-
163
- """
164
- if confidence_bands is None:
165
- confidence_bands = (values2 is not None) or (cdf is not None)
166
-
167
- if values2 is None and cdf is None and confidence_bands is True:
168
- raise ValueError("For confidence bands you need to specify values2 or the cdf")
216
+ >>> pit_vals = distribution.cdf(sample)
217
+ >>> uniform_dist = uniform(0, 1)
218
+ >>> az.plot_ecdf(
219
+ >>> pit_vals, cdf=uniform_dist.cdf,
220
+ >>> rvs=uniform_dist.rvs, confidence_bands=True
221
+ >>> )
169
222
 
170
- if cdf is not None and values2 is not None:
171
- raise ValueError("To compare sample you need either cdf or values2 and not both")
223
+ Plot an ECDF-difference plot of PIT values.
172
224
 
173
- if values2 is None and cdf is None and pit is True:
174
- raise ValueError("For PIT specify either cdf or values2")
225
+ .. plot::
226
+ :context: close-figs
175
227
 
176
- if values2 is None and cdf is None and difference is True:
177
- raise ValueError("For ECDF difference plot need either cdf or values2")
228
+ >>> az.plot_ecdf(
229
+ >>> pit_vals, cdf = uniform_dist.cdf, rvs = uniform_dist.rvs,
230
+ >>> confidence_bands = True, difference = True
231
+ >>> )
232
+ """
233
+ if confidence_bands is True:
234
+ if pointwise:
235
+ warnings.warn(
236
+ "`pointwise` has been deprecated. Use `confidence_bands='pointwise'` instead.",
237
+ FutureWarning,
238
+ )
239
+ confidence_bands = "pointwise"
240
+ else:
241
+ confidence_bands = "simulated"
242
+ elif confidence_bands == "simulated" and pointwise:
243
+ raise ValueError("Cannot specify both `confidence_bands='simulated'` and `pointwise=True`")
244
+
245
+ if fpr is not None:
246
+ warnings.warn(
247
+ "`fpr` has been deprecated. Use `ci_prob=1-fpr` or set `rcParam['stats.ci_prob']` to"
248
+ "`1-fpr`.",
249
+ FutureWarning,
250
+ )
251
+ if ci_prob is not None:
252
+ raise ValueError("Cannot specify both `fpr` and `ci_prob`")
253
+ ci_prob = 1 - fpr
254
+
255
+ if ci_prob is None:
256
+ ci_prob = rcParams["stats.ci_prob"]
178
257
 
179
258
  if values2 is not None:
180
- values2 = np.ravel(values2)
181
- values2.sort()
259
+ if cdf is not None:
260
+ raise ValueError("You cannot specify both `values2` and `cdf`")
261
+ if scipy_ecdf is None:
262
+ raise ValueError(
263
+ "The `values2` argument is deprecated and `scipy.stats.ecdf` is not available. "
264
+ "Please use `cdf` instead."
265
+ )
266
+ warnings.warn(
267
+ "`values2` has been deprecated. Use `cdf=scipy.stats.ecdf(values2).cdf.evaluate` "
268
+ "instead.",
269
+ FutureWarning,
270
+ )
271
+ cdf = scipy_ecdf(np.ravel(values2)).cdf.evaluate
272
+
273
+ if cdf is None:
274
+ if confidence_bands:
275
+ raise ValueError("For confidence bands you must specify cdf")
276
+ if difference is True:
277
+ raise ValueError("For ECDF difference plot you must specify cdf")
278
+ if pit:
279
+ raise ValueError("For PIT plot you must specify cdf")
182
280
 
183
281
  values = np.ravel(values)
184
282
  values.sort()
185
283
 
186
284
  if pit:
187
- eval_points = np.linspace(1 / npoints, 1, npoints)
188
- if cdf:
189
- sample = cdf(values)
190
- else:
191
- sample = compute_ecdf(values2, values) / len(values2)
192
- cdf_at_eval_points = eval_points
285
+ warnings.warn(
286
+ "`pit` has been deprecated. Specify `values=cdf(values)` instead.",
287
+ FutureWarning,
288
+ )
289
+ values = cdf(values)
290
+ cdf = uniform(0, 1).cdf
193
291
  rvs = uniform(0, 1).rvs
194
- else:
292
+ eval_points = np.linspace(1 / npoints, 1, npoints)
293
+
294
+ if eval_points is None:
295
+ warnings.warn(
296
+ "In future versions, if `eval_points` is not provided, then the ECDF will be evaluated"
297
+ " at the unique values of the sample. To keep the current behavior, provide "
298
+ "`eval_points` explicitly.",
299
+ BehaviourChangeWarning,
300
+ )
301
+ if confidence_bands == "simulated":
302
+ warnings.warn(
303
+ "For simultaneous bands to be correctly calibrated, specify `eval_points` "
304
+ "independent of the `values`"
305
+ )
195
306
  eval_points = np.linspace(values[0], values[-1], npoints)
196
- sample = values
197
- if confidence_bands or difference:
198
- if cdf:
199
- cdf_at_eval_points = cdf(eval_points)
200
- else:
201
- cdf_at_eval_points = compute_ecdf(values2, eval_points)
202
- else:
203
- cdf_at_eval_points = np.zeros_like(eval_points)
204
- rvs = None
307
+ else:
308
+ eval_points = np.asarray(eval_points)
309
+
310
+ if difference or confidence_bands:
311
+ cdf_at_eval_points = cdf(eval_points)
312
+ else:
313
+ cdf_at_eval_points = np.zeros_like(eval_points)
205
314
 
206
- x_coord, y_coord = _get_ecdf_points(sample, eval_points, difference)
315
+ x_coord, y_coord = _get_ecdf_points(values, eval_points, difference)
207
316
 
208
317
  if difference:
209
318
  y_coord -= cdf_at_eval_points
210
319
 
211
320
  if confidence_bands:
212
321
  ndraws = len(values)
213
- band_kwargs = {"prob": 1 - fpr, "num_trials": num_trials, "rvs": rvs, "random_state": None}
214
- band_kwargs["method"] = "pointwise" if pointwise else "simulated"
215
- lower, higher = ecdf_confidence_band(ndraws, eval_points, cdf_at_eval_points, **band_kwargs)
322
+ x_bands = eval_points
323
+ lower, higher = ecdf_confidence_band(
324
+ ndraws,
325
+ eval_points,
326
+ cdf_at_eval_points,
327
+ method=confidence_bands,
328
+ prob=ci_prob,
329
+ num_trials=num_trials,
330
+ rvs=rvs,
331
+ random_state=random_state,
332
+ )
216
333
 
217
334
  if difference:
218
335
  lower -= cdf_at_eval_points
219
336
  higher -= cdf_at_eval_points
220
337
  else:
221
- lower, higher = None, None
338
+ x_bands, lower, higher = None, None, None
222
339
 
223
340
  ecdf_plot_args = dict(
224
341
  x_coord=x_coord,
225
342
  y_coord=y_coord,
226
- x_bands=eval_points,
343
+ x_bands=x_bands,
227
344
  lower=lower,
228
345
  higher=higher,
229
- confidence_bands=confidence_bands,
230
346
  figsize=figsize,
231
347
  fill_band=fill_band,
232
348
  plot_kwargs=plot_kwargs,
arviz/plots/essplot.py CHANGED
@@ -138,9 +138,9 @@ def plot_ess(
138
138
 
139
139
  References
140
140
  ----------
141
- .. [1] Vehtari et al. (2019). Rank-normalization, folding, and
141
+ .. [1] Vehtari et al. (2021). Rank-normalization, folding, and
142
142
  localization: An improved Rhat for assessing convergence of
143
- MCMC https://arxiv.org/abs/1903.08008
143
+ MCMC. Bayesian analysis, 16(2):667-718.
144
144
 
145
145
  Examples
146
146
  --------