arviz 0.23.3__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. arviz/__init__.py +52 -367
  2. arviz-1.0.0rc0.dist-info/METADATA +182 -0
  3. arviz-1.0.0rc0.dist-info/RECORD +5 -0
  4. {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/WHEEL +1 -2
  5. {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/licenses/LICENSE +0 -1
  6. arviz/data/__init__.py +0 -55
  7. arviz/data/base.py +0 -596
  8. arviz/data/converters.py +0 -203
  9. arviz/data/datasets.py +0 -161
  10. arviz/data/example_data/code/radon/radon.json +0 -326
  11. arviz/data/example_data/data/centered_eight.nc +0 -0
  12. arviz/data/example_data/data/non_centered_eight.nc +0 -0
  13. arviz/data/example_data/data_local.json +0 -12
  14. arviz/data/example_data/data_remote.json +0 -58
  15. arviz/data/inference_data.py +0 -2386
  16. arviz/data/io_beanmachine.py +0 -112
  17. arviz/data/io_cmdstan.py +0 -1036
  18. arviz/data/io_cmdstanpy.py +0 -1233
  19. arviz/data/io_datatree.py +0 -23
  20. arviz/data/io_dict.py +0 -462
  21. arviz/data/io_emcee.py +0 -317
  22. arviz/data/io_json.py +0 -54
  23. arviz/data/io_netcdf.py +0 -68
  24. arviz/data/io_numpyro.py +0 -497
  25. arviz/data/io_pyjags.py +0 -378
  26. arviz/data/io_pyro.py +0 -333
  27. arviz/data/io_pystan.py +0 -1095
  28. arviz/data/io_zarr.py +0 -46
  29. arviz/data/utils.py +0 -139
  30. arviz/labels.py +0 -210
  31. arviz/plots/__init__.py +0 -61
  32. arviz/plots/autocorrplot.py +0 -171
  33. arviz/plots/backends/__init__.py +0 -223
  34. arviz/plots/backends/bokeh/__init__.py +0 -166
  35. arviz/plots/backends/bokeh/autocorrplot.py +0 -101
  36. arviz/plots/backends/bokeh/bfplot.py +0 -23
  37. arviz/plots/backends/bokeh/bpvplot.py +0 -193
  38. arviz/plots/backends/bokeh/compareplot.py +0 -167
  39. arviz/plots/backends/bokeh/densityplot.py +0 -239
  40. arviz/plots/backends/bokeh/distcomparisonplot.py +0 -23
  41. arviz/plots/backends/bokeh/distplot.py +0 -183
  42. arviz/plots/backends/bokeh/dotplot.py +0 -113
  43. arviz/plots/backends/bokeh/ecdfplot.py +0 -73
  44. arviz/plots/backends/bokeh/elpdplot.py +0 -203
  45. arviz/plots/backends/bokeh/energyplot.py +0 -155
  46. arviz/plots/backends/bokeh/essplot.py +0 -176
  47. arviz/plots/backends/bokeh/forestplot.py +0 -772
  48. arviz/plots/backends/bokeh/hdiplot.py +0 -54
  49. arviz/plots/backends/bokeh/kdeplot.py +0 -268
  50. arviz/plots/backends/bokeh/khatplot.py +0 -163
  51. arviz/plots/backends/bokeh/lmplot.py +0 -185
  52. arviz/plots/backends/bokeh/loopitplot.py +0 -211
  53. arviz/plots/backends/bokeh/mcseplot.py +0 -184
  54. arviz/plots/backends/bokeh/pairplot.py +0 -328
  55. arviz/plots/backends/bokeh/parallelplot.py +0 -81
  56. arviz/plots/backends/bokeh/posteriorplot.py +0 -324
  57. arviz/plots/backends/bokeh/ppcplot.py +0 -379
  58. arviz/plots/backends/bokeh/rankplot.py +0 -149
  59. arviz/plots/backends/bokeh/separationplot.py +0 -107
  60. arviz/plots/backends/bokeh/traceplot.py +0 -436
  61. arviz/plots/backends/bokeh/violinplot.py +0 -164
  62. arviz/plots/backends/matplotlib/__init__.py +0 -124
  63. arviz/plots/backends/matplotlib/autocorrplot.py +0 -72
  64. arviz/plots/backends/matplotlib/bfplot.py +0 -78
  65. arviz/plots/backends/matplotlib/bpvplot.py +0 -177
  66. arviz/plots/backends/matplotlib/compareplot.py +0 -135
  67. arviz/plots/backends/matplotlib/densityplot.py +0 -194
  68. arviz/plots/backends/matplotlib/distcomparisonplot.py +0 -119
  69. arviz/plots/backends/matplotlib/distplot.py +0 -178
  70. arviz/plots/backends/matplotlib/dotplot.py +0 -116
  71. arviz/plots/backends/matplotlib/ecdfplot.py +0 -70
  72. arviz/plots/backends/matplotlib/elpdplot.py +0 -189
  73. arviz/plots/backends/matplotlib/energyplot.py +0 -113
  74. arviz/plots/backends/matplotlib/essplot.py +0 -180
  75. arviz/plots/backends/matplotlib/forestplot.py +0 -656
  76. arviz/plots/backends/matplotlib/hdiplot.py +0 -48
  77. arviz/plots/backends/matplotlib/kdeplot.py +0 -177
  78. arviz/plots/backends/matplotlib/khatplot.py +0 -241
  79. arviz/plots/backends/matplotlib/lmplot.py +0 -149
  80. arviz/plots/backends/matplotlib/loopitplot.py +0 -144
  81. arviz/plots/backends/matplotlib/mcseplot.py +0 -161
  82. arviz/plots/backends/matplotlib/pairplot.py +0 -355
  83. arviz/plots/backends/matplotlib/parallelplot.py +0 -58
  84. arviz/plots/backends/matplotlib/posteriorplot.py +0 -348
  85. arviz/plots/backends/matplotlib/ppcplot.py +0 -478
  86. arviz/plots/backends/matplotlib/rankplot.py +0 -119
  87. arviz/plots/backends/matplotlib/separationplot.py +0 -97
  88. arviz/plots/backends/matplotlib/traceplot.py +0 -526
  89. arviz/plots/backends/matplotlib/tsplot.py +0 -121
  90. arviz/plots/backends/matplotlib/violinplot.py +0 -148
  91. arviz/plots/bfplot.py +0 -128
  92. arviz/plots/bpvplot.py +0 -308
  93. arviz/plots/compareplot.py +0 -177
  94. arviz/plots/densityplot.py +0 -284
  95. arviz/plots/distcomparisonplot.py +0 -197
  96. arviz/plots/distplot.py +0 -233
  97. arviz/plots/dotplot.py +0 -233
  98. arviz/plots/ecdfplot.py +0 -372
  99. arviz/plots/elpdplot.py +0 -174
  100. arviz/plots/energyplot.py +0 -147
  101. arviz/plots/essplot.py +0 -319
  102. arviz/plots/forestplot.py +0 -304
  103. arviz/plots/hdiplot.py +0 -211
  104. arviz/plots/kdeplot.py +0 -357
  105. arviz/plots/khatplot.py +0 -236
  106. arviz/plots/lmplot.py +0 -380
  107. arviz/plots/loopitplot.py +0 -224
  108. arviz/plots/mcseplot.py +0 -194
  109. arviz/plots/pairplot.py +0 -281
  110. arviz/plots/parallelplot.py +0 -204
  111. arviz/plots/plot_utils.py +0 -599
  112. arviz/plots/posteriorplot.py +0 -298
  113. arviz/plots/ppcplot.py +0 -369
  114. arviz/plots/rankplot.py +0 -232
  115. arviz/plots/separationplot.py +0 -167
  116. arviz/plots/styles/arviz-bluish.mplstyle +0 -1
  117. arviz/plots/styles/arviz-brownish.mplstyle +0 -1
  118. arviz/plots/styles/arviz-colors.mplstyle +0 -2
  119. arviz/plots/styles/arviz-cyanish.mplstyle +0 -1
  120. arviz/plots/styles/arviz-darkgrid.mplstyle +0 -40
  121. arviz/plots/styles/arviz-doc.mplstyle +0 -88
  122. arviz/plots/styles/arviz-docgrid.mplstyle +0 -88
  123. arviz/plots/styles/arviz-grayscale.mplstyle +0 -41
  124. arviz/plots/styles/arviz-greenish.mplstyle +0 -1
  125. arviz/plots/styles/arviz-orangish.mplstyle +0 -1
  126. arviz/plots/styles/arviz-plasmish.mplstyle +0 -1
  127. arviz/plots/styles/arviz-purplish.mplstyle +0 -1
  128. arviz/plots/styles/arviz-redish.mplstyle +0 -1
  129. arviz/plots/styles/arviz-royish.mplstyle +0 -1
  130. arviz/plots/styles/arviz-viridish.mplstyle +0 -1
  131. arviz/plots/styles/arviz-white.mplstyle +0 -40
  132. arviz/plots/styles/arviz-whitegrid.mplstyle +0 -40
  133. arviz/plots/traceplot.py +0 -273
  134. arviz/plots/tsplot.py +0 -440
  135. arviz/plots/violinplot.py +0 -192
  136. arviz/preview.py +0 -58
  137. arviz/py.typed +0 -0
  138. arviz/rcparams.py +0 -606
  139. arviz/sel_utils.py +0 -223
  140. arviz/static/css/style.css +0 -340
  141. arviz/static/html/icons-svg-inline.html +0 -15
  142. arviz/stats/__init__.py +0 -37
  143. arviz/stats/density_utils.py +0 -1013
  144. arviz/stats/diagnostics.py +0 -1013
  145. arviz/stats/ecdf_utils.py +0 -324
  146. arviz/stats/stats.py +0 -2422
  147. arviz/stats/stats_refitting.py +0 -119
  148. arviz/stats/stats_utils.py +0 -609
  149. arviz/tests/__init__.py +0 -1
  150. arviz/tests/base_tests/__init__.py +0 -1
  151. arviz/tests/base_tests/test_data.py +0 -1679
  152. arviz/tests/base_tests/test_data_zarr.py +0 -143
  153. arviz/tests/base_tests/test_diagnostics.py +0 -511
  154. arviz/tests/base_tests/test_diagnostics_numba.py +0 -87
  155. arviz/tests/base_tests/test_helpers.py +0 -18
  156. arviz/tests/base_tests/test_labels.py +0 -69
  157. arviz/tests/base_tests/test_plot_utils.py +0 -342
  158. arviz/tests/base_tests/test_plots_bokeh.py +0 -1288
  159. arviz/tests/base_tests/test_plots_matplotlib.py +0 -2197
  160. arviz/tests/base_tests/test_rcparams.py +0 -317
  161. arviz/tests/base_tests/test_stats.py +0 -925
  162. arviz/tests/base_tests/test_stats_ecdf_utils.py +0 -166
  163. arviz/tests/base_tests/test_stats_numba.py +0 -45
  164. arviz/tests/base_tests/test_stats_utils.py +0 -384
  165. arviz/tests/base_tests/test_utils.py +0 -376
  166. arviz/tests/base_tests/test_utils_numba.py +0 -87
  167. arviz/tests/conftest.py +0 -46
  168. arviz/tests/external_tests/__init__.py +0 -1
  169. arviz/tests/external_tests/test_data_beanmachine.py +0 -78
  170. arviz/tests/external_tests/test_data_cmdstan.py +0 -398
  171. arviz/tests/external_tests/test_data_cmdstanpy.py +0 -496
  172. arviz/tests/external_tests/test_data_emcee.py +0 -166
  173. arviz/tests/external_tests/test_data_numpyro.py +0 -434
  174. arviz/tests/external_tests/test_data_pyjags.py +0 -119
  175. arviz/tests/external_tests/test_data_pyro.py +0 -260
  176. arviz/tests/external_tests/test_data_pystan.py +0 -307
  177. arviz/tests/helpers.py +0 -677
  178. arviz/utils.py +0 -773
  179. arviz/wrappers/__init__.py +0 -13
  180. arviz/wrappers/base.py +0 -236
  181. arviz/wrappers/wrap_pymc.py +0 -36
  182. arviz/wrappers/wrap_stan.py +0 -148
  183. arviz-0.23.3.dist-info/METADATA +0 -264
  184. arviz-0.23.3.dist-info/RECORD +0 -183
  185. arviz-0.23.3.dist-info/top_level.txt +0 -1
@@ -1,1013 +0,0 @@
1
- # pylint: disable=invalid-name,too-many-lines
2
- """Density estimation functions for ArviZ."""
3
- import warnings
4
-
5
- import numpy as np
6
- from scipy.fftpack import fft
7
- from scipy.optimize import brentq
8
- from scipy.signal import convolve, convolve2d
9
- from scipy.signal.windows import gaussian
10
- from scipy.sparse import coo_matrix
11
- from scipy.special import ive # pylint: disable=no-name-in-module
12
-
13
- from ..utils import _cov, _dot, _stack, conditional_jit
14
-
15
- __all__ = ["kde"]
16
-
17
-
18
- def _bw_scott(x, x_std=None, **kwargs): # pylint: disable=unused-argument
19
- """Scott's Rule."""
20
- if x_std is None:
21
- x_std = np.std(x)
22
- bw = 1.06 * x_std * len(x) ** (-0.2)
23
- return bw
24
-
25
-
26
- def _bw_silverman(x, x_std=None, **kwargs): # pylint: disable=unused-argument
27
- """Silverman's Rule."""
28
- if x_std is None:
29
- x_std = np.std(x)
30
- q75, q25 = np.percentile(x, [75, 25])
31
- x_iqr = q75 - q25
32
- a = min(x_std, x_iqr / 1.34)
33
- bw = 0.9 * a * len(x) ** (-0.2)
34
- return bw
35
-
36
-
37
- def _bw_isj(x, grid_counts=None, x_std=None, x_range=None):
38
- """Improved Sheather-Jones bandwidth estimation.
39
-
40
- Improved Sheather and Jones method as explained in [1]_. This method is used internally by the
41
- KDE estimator, resulting in saved computation time as minimums, maximums and the grid are
42
- pre-computed.
43
-
44
- References
45
- ----------
46
- .. [1] Kernel density estimation via diffusion.
47
- Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
48
- Ann. Statist. 38 (2010), no. 5, 2916--2957.
49
- """
50
- x_len = len(x)
51
- if x_range is None:
52
- x_min = np.min(x)
53
- x_max = np.max(x)
54
- x_range = x_max - x_min
55
-
56
- # Relative frequency per bin
57
- if grid_counts is None:
58
- x_std = np.std(x)
59
- grid_len = 256
60
- grid_min = x_min - 0.5 * x_std
61
- grid_max = x_max + 0.5 * x_std
62
- grid_counts, _, _ = histogram(x, grid_len, (grid_min, grid_max))
63
- else:
64
- grid_len = len(grid_counts) - 1
65
-
66
- grid_relfreq = grid_counts / x_len
67
-
68
- # Discrete cosine transform of the data
69
- a_k = _dct1d(grid_relfreq)
70
-
71
- k_sq = np.arange(1, grid_len) ** 2
72
- a_sq = a_k[range(1, grid_len)] ** 2
73
-
74
- t = _root(_fixed_point, x_len, args=(x_len, k_sq, a_sq), x=x)
75
- h = t**0.5 * x_range
76
- return h
77
-
78
-
79
- def _bw_experimental(x, grid_counts=None, x_std=None, x_range=None):
80
- """Experimental bandwidth estimator."""
81
- bw_silverman = _bw_silverman(x, x_std=x_std)
82
- bw_isj = _bw_isj(x, grid_counts=grid_counts, x_range=x_range)
83
- return 0.5 * (bw_silverman + bw_isj)
84
-
85
-
86
- def _bw_taylor(x):
87
- """Taylor's rule for circular bandwidth estimation.
88
-
89
- This function implements a rule-of-thumb for choosing the bandwidth of a von Mises kernel
90
- density estimator that assumes the underlying distribution is von Mises as introduced in [1]_.
91
- It is analogous to Scott's rule for the Gaussian KDE.
92
-
93
- Circular bandwidth has a different scale from linear bandwidth. Unlike linear scale, low
94
- bandwidths are associated with oversmoothing and high values with undersmoothing.
95
-
96
- References
97
- ----------
98
- .. [1] C.C Taylor (2008). Automatic bandwidth selection for circular
99
- density estimation.
100
- Computational Statistics and Data Analysis, 52, 7, 3493–3500.
101
- """
102
- x_len = len(x)
103
- kappa = _kappa_mle(x)
104
- num = 3 * x_len * kappa**2 * ive(2, 2 * kappa)
105
- den = 4 * np.pi**0.5 * ive(0, kappa) ** 2
106
- return (num / den) ** 0.4
107
-
108
-
109
- _BW_METHODS_LINEAR = {
110
- "scott": _bw_scott,
111
- "silverman": _bw_silverman,
112
- "isj": _bw_isj,
113
- "experimental": _bw_experimental,
114
- }
115
-
116
-
117
- def _get_bw(x, bw, grid_counts=None, x_std=None, x_range=None):
118
- """Compute bandwidth for a given data `x` and `bw`.
119
-
120
- Also checks `bw` is correctly specified.
121
-
122
- Parameters
123
- ----------
124
- x : 1-D numpy array
125
- 1 dimensional array of sample data from the
126
- variable for which a density estimate is desired.
127
- bw: int, float or str
128
- If numeric, indicates the bandwidth and must be positive.
129
- If str, indicates the method to estimate the bandwidth.
130
-
131
- Returns
132
- -------
133
- bw: float
134
- Bandwidth
135
- """
136
- if isinstance(bw, bool):
137
- raise ValueError(
138
- (
139
- "`bw` must not be of type `bool`.\n"
140
- "Expected a positive numeric or one of the following strings:\n"
141
- f"{list(_BW_METHODS_LINEAR)}."
142
- )
143
- )
144
- if isinstance(bw, (int, float)):
145
- if bw < 0:
146
- raise ValueError(f"Numeric `bw` must be positive.\nInput: {bw:.4f}.")
147
- elif isinstance(bw, str):
148
- bw_lower = bw.lower()
149
-
150
- if bw_lower not in _BW_METHODS_LINEAR:
151
- raise ValueError(
152
- "Unrecognized bandwidth method.\n"
153
- f"Input is: {bw_lower}.\n"
154
- f"Expected one of: {list(_BW_METHODS_LINEAR)}."
155
- )
156
-
157
- bw_fun = _BW_METHODS_LINEAR[bw_lower]
158
- bw = bw_fun(x, grid_counts=grid_counts, x_std=x_std, x_range=x_range)
159
- else:
160
- raise ValueError(
161
- "Unrecognized `bw` argument.\n"
162
- "Expected a positive numeric or one of the following strings:\n"
163
- f"{list(_BW_METHODS_LINEAR)}."
164
- )
165
- return bw
166
-
167
-
168
- def _vonmises_pdf(x, mu, kappa):
169
- """Calculate vonmises_pdf."""
170
- if kappa <= 0:
171
- raise ValueError("Argument 'kappa' must be positive.")
172
- pdf = 1 / (2 * np.pi * ive(0, kappa)) * np.exp(np.cos(x - mu) - 1) ** kappa
173
- return pdf
174
-
175
-
176
- def _a1inv(x):
177
- """Compute inverse function.
178
-
179
- Inverse function of the ratio of the first and
180
- zeroth order Bessel functions of the first kind.
181
-
182
- Returns the value k, such that a1inv(x) = k, i.e. a1(k) = x.
183
- """
184
- if 0 <= x < 0.53:
185
- return 2 * x + x**3 + (5 * x**5) / 6
186
- elif x < 0.85:
187
- return -0.4 + 1.39 * x + 0.43 / (1 - x)
188
- else:
189
- return 1 / (x**3 - 4 * x**2 + 3 * x)
190
-
191
-
192
- def _kappa_mle(x):
193
- mean = _circular_mean(x)
194
- kappa = _a1inv(np.mean(np.cos(x - mean)))
195
- return kappa
196
-
197
-
198
- def _dct1d(x):
199
- """Discrete Cosine Transform in 1 Dimension.
200
-
201
- Parameters
202
- ----------
203
- x : numpy array
204
- 1 dimensional array of values for which the
205
- DCT is desired
206
-
207
- Returns
208
- -------
209
- output : DTC transformed values
210
- """
211
- x_len = len(x)
212
-
213
- even_increasing = np.arange(0, x_len, 2)
214
- odd_decreasing = np.arange(x_len - 1, 0, -2)
215
-
216
- x = np.concatenate((x[even_increasing], x[odd_decreasing]))
217
-
218
- w_1k = np.r_[1, (2 * np.exp(-(0 + 1j) * (np.arange(1, x_len)) * np.pi / (2 * x_len)))]
219
- output = np.real(w_1k * fft(x))
220
-
221
- return output
222
-
223
-
224
- def _fixed_point(t, N, k_sq, a_sq):
225
- """Calculate t-zeta*gamma^[l](t).
226
-
227
- Implementation of the function t-zeta*gamma^[l](t) derived from equation (30) in [1].
228
-
229
- References
230
- ----------
231
- .. [1] Kernel density estimation via diffusion.
232
- Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
233
- Ann. Statist. 38 (2010), no. 5, 2916--2957.
234
- """
235
- k_sq = np.asarray(k_sq, dtype=np.float64)
236
- a_sq = np.asarray(a_sq, dtype=np.float64)
237
-
238
- l = 7
239
- f = np.sum(np.power(k_sq, l) * a_sq * np.exp(-k_sq * np.pi**2 * t))
240
- f *= 0.5 * np.pi ** (2.0 * l)
241
-
242
- for j in np.arange(l - 1, 2 - 1, -1):
243
- c1 = (1 + 0.5 ** (j + 0.5)) / 3
244
- c2 = np.prod(np.arange(1.0, 2 * j + 1, 2, dtype=np.float64))
245
- c2 /= (np.pi / 2) ** 0.5
246
- t_j = np.power((c1 * (c2 / (N * f))), (2.0 / (3.0 + 2.0 * j)))
247
- f = np.sum(k_sq**j * a_sq * np.exp(-k_sq * np.pi**2.0 * t_j))
248
- f *= 0.5 * np.pi ** (2 * j)
249
-
250
- out = t - (2 * N * np.pi**0.5 * f) ** (-0.4)
251
- return out
252
-
253
-
254
- def _root(function, N, args, x):
255
- # The right bound is at most 0.01
256
- found = False
257
- N = max(min(1050, N), 50)
258
- tol = 10e-12 + 0.01 * (N - 50) / 1000
259
-
260
- while not found:
261
- try:
262
- bw, res = brentq(function, 0, 0.01, args=args, full_output=True, disp=False)
263
- found = res.converged
264
- except ValueError:
265
- bw = 0
266
- tol *= 2.0
267
- found = False
268
- if bw <= 0 or tol >= 1:
269
- bw = (_bw_silverman(x) / np.ptp(x)) ** 2
270
- return bw
271
- return bw
272
-
273
-
274
- def _check_custom_lims(custom_lims, x_min, x_max):
275
- """Check if `custom_lims` are of the correct type.
276
-
277
- It accepts numeric lists/tuples of length 2.
278
-
279
- Parameters
280
- ----------
281
- custom_lims : Object whose type is checked.
282
-
283
- Returns
284
- -------
285
- None: Object of type None
286
- """
287
- if not isinstance(custom_lims, (list, tuple)):
288
- raise TypeError(
289
- "`custom_lims` must be a numeric list or tuple of length 2.\n"
290
- f"Not an object of {type(custom_lims)}."
291
- )
292
-
293
- if len(custom_lims) != 2:
294
- raise AttributeError(f"`len(custom_lims)` must be 2, not {len(custom_lims)}.")
295
-
296
- any_bool = any(isinstance(i, bool) for i in custom_lims)
297
- if any_bool:
298
- raise TypeError("Elements of `custom_lims` must be numeric or None, not bool.")
299
-
300
- custom_lims = list(custom_lims) # convert to a mutable object
301
- if custom_lims[0] is None:
302
- custom_lims[0] = x_min
303
-
304
- if custom_lims[1] is None:
305
- custom_lims[1] = x_max
306
-
307
- all_numeric = all(isinstance(i, (int, float, np.integer, np.number)) for i in custom_lims)
308
- if not all_numeric:
309
- raise TypeError(
310
- "Elements of `custom_lims` must be numeric or None.\nAt least one of them is not."
311
- )
312
-
313
- if not custom_lims[0] < custom_lims[1]:
314
- raise ValueError("`custom_lims[0]` must be smaller than `custom_lims[1]`.")
315
-
316
- if custom_lims[0] > x_min or custom_lims[1] < x_max:
317
- raise ValueError("Some observations are outside `custom_lims` boundaries.")
318
-
319
- return custom_lims
320
-
321
-
322
- def _get_grid(
323
- x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend=True, bound_correction=False
324
- ):
325
- """Compute the grid that bins the data used to estimate the density function.
326
-
327
- Parameters
328
- ----------
329
- x_min : float
330
- Minimum value of the data
331
- x_max: float
332
- Maximum value of the data.
333
- x_std: float
334
- Standard deviation of the data.
335
- extend_fct: bool
336
- Indicates the factor by which `x_std` is multiplied
337
- to extend the range of the data.
338
- grid_len: int
339
- Number of bins
340
- custom_lims: tuple or list
341
- Custom limits for the domain of the density estimation.
342
- Must be numeric of length 2. Overrides `extend`.
343
- extend: bool, optional
344
- Whether to extend the range of the data or not.
345
- Default is True.
346
- bound_correction: bool, optional
347
- Whether the density estimations performs boundary correction or not.
348
- This does not impacts directly in the output, but is used
349
- to override `extend`. Overrides `extend`.
350
- Default is False.
351
-
352
- Returns
353
- -------
354
- grid_len: int
355
- Number of bins
356
- grid_min: float
357
- Minimum value of the grid
358
- grid_max: float
359
- Maximum value of the grid
360
- """
361
- # Set up number of bins.
362
- grid_len = max(int(grid_len), 100)
363
-
364
- # Set up domain
365
- if custom_lims is not None:
366
- custom_lims = _check_custom_lims(custom_lims, x_min, x_max)
367
- grid_min = custom_lims[0]
368
- grid_max = custom_lims[1]
369
- elif extend and not bound_correction:
370
- grid_extend = extend_fct * x_std
371
- grid_min = x_min - grid_extend
372
- grid_max = x_max + grid_extend
373
- else:
374
- grid_min = x_min
375
- grid_max = x_max
376
- return grid_min, grid_max, grid_len
377
-
378
-
379
- def kde(x, circular=False, **kwargs):
380
- """One dimensional density estimation.
381
-
382
- It is a wrapper around ``kde_linear()`` and ``kde_circular()``.
383
-
384
- Parameters
385
- ----------
386
- x : 1D numpy array
387
- Data used to calculate the density estimation.
388
- circular : bool, optional
389
- Whether ``x`` is a circular variable or not. Defaults to False.
390
- kwargs : dict, optional
391
- Arguments passed to ``kde_linear()`` and ``kde_circular()``.
392
- See their documentation for more info.
393
-
394
- Returns
395
- -------
396
- grid : numpy.ndarray
397
- Gridded numpy array for the x values.
398
- pdf : numpy.ndarray
399
- Numpy array for the density estimates.
400
- bw : float
401
- The estimated bandwidth. Only returned if requested.
402
-
403
- Examples
404
- --------
405
- Default density estimation for linear data
406
-
407
- .. plot::
408
- :context: close-figs
409
-
410
- >>> import numpy as np
411
- >>> import matplotlib.pyplot as plt
412
- >>> from arviz import kde
413
- >>>
414
- >>> rng = np.random.default_rng(49)
415
- >>> rvs = rng.gamma(shape=1.8, size=1000)
416
- >>> grid, pdf = kde(rvs)
417
- >>> plt.plot(grid, pdf)
418
-
419
- Density estimation for linear data with Silverman's rule bandwidth
420
-
421
- .. plot::
422
- :context: close-figs
423
-
424
- >>> grid, pdf = kde(rvs, bw="silverman")
425
- >>> plt.plot(grid, pdf)
426
-
427
- Density estimation for linear data with scaled bandwidth
428
-
429
- .. plot::
430
- :context: close-figs
431
-
432
- >>> # bw_fct > 1 means more smoothness.
433
- >>> grid, pdf = kde(rvs, bw_fct=2.5)
434
- >>> plt.plot(grid, pdf)
435
-
436
- Default density estimation for linear data with extended limits
437
-
438
- .. plot::
439
- :context: close-figs
440
-
441
- >>> grid, pdf = kde(rvs, bound_correction=False, extend=True, extend_fct=0.5)
442
- >>> plt.plot(grid, pdf)
443
-
444
- Default density estimation for linear data with custom limits
445
-
446
- .. plot::
447
- :context: close-figs
448
-
449
- >>> # It accepts tuples and lists of length 2.
450
- >>> grid, pdf = kde(rvs, bound_correction=False, custom_lims=(0, 11))
451
- >>> plt.plot(grid, pdf)
452
-
453
- Default density estimation for circular data
454
-
455
- .. plot::
456
- :context: close-figs
457
-
458
- >>> rvs = np.random.vonmises(mu=np.pi, kappa=1, size=500)
459
- >>> grid, pdf = kde(rvs, circular=True)
460
- >>> plt.plot(grid, pdf)
461
-
462
- Density estimation for circular data with scaled bandwidth
463
-
464
- .. plot::
465
- :context: close-figs
466
-
467
- >>> rvs = np.random.vonmises(mu=np.pi, kappa=1, size=500)
468
- >>> # bw_fct > 1 means less smoothness.
469
- >>> grid, pdf = kde(rvs, circular=True, bw_fct=3)
470
- >>> plt.plot(grid, pdf)
471
-
472
- Density estimation for circular data with custom limits
473
-
474
- .. plot::
475
- :context: close-figs
476
-
477
- >>> # This is still experimental, does not always work.
478
- >>> rvs = np.random.vonmises(mu=0, kappa=30, size=500)
479
- >>> grid, pdf = kde(rvs, circular=True, custom_lims=(-1, 1))
480
- >>> plt.plot(grid, pdf)
481
-
482
- See Also
483
- --------
484
- plot_kde : Compute and plot a kernel density estimate.
485
- """
486
- x = x[np.isfinite(x)]
487
- if x.size == 0 or np.all(x == x[0]):
488
- warnings.warn("Your data appears to have a single value or no finite values")
489
-
490
- return np.zeros(2), np.array([np.nan] * 2)
491
-
492
- if circular:
493
- if circular == "degrees":
494
- x = np.radians(x)
495
- kde_fun = _kde_circular
496
- else:
497
- kde_fun = _kde_linear
498
-
499
- return kde_fun(x, **kwargs)
500
-
501
-
502
- def _kde_linear(
503
- x,
504
- bw="experimental",
505
- adaptive=False,
506
- extend=False,
507
- bound_correction=True,
508
- extend_fct=0,
509
- bw_fct=1,
510
- bw_return=False,
511
- custom_lims=None,
512
- cumulative=False,
513
- grid_len=512,
514
- **kwargs, # pylint: disable=unused-argument
515
- ):
516
- """One dimensional density estimation for linear data.
517
-
518
- Given an array of data points `x` it returns an estimate of
519
- the probability density function that generated the samples in `x`.
520
-
521
- Parameters
522
- ----------
523
- x : 1D numpy array
524
- Data used to calculate the density estimation.
525
- bw: int, float or str, optional
526
- If numeric, indicates the bandwidth and must be positive.
527
- If str, indicates the method to estimate the bandwidth and must be one of "scott",
528
- "silverman", "isj" or "experimental". Defaults to "experimental".
529
- adaptive: boolean, optional
530
- Indicates if the bandwidth is adaptive or not.
531
- It is the recommended approach when there are multiple modes with different spread.
532
- It is not compatible with convolution. Defaults to False.
533
- extend: boolean, optional
534
- Whether to extend the observed range for `x` in the estimation.
535
- It extends each bound by a multiple of the standard deviation of `x` given by `extend_fct`.
536
- Defaults to False.
537
- bound_correction: boolean, optional
538
- Whether to perform boundary correction on the bounds of `x` or not.
539
- Defaults to True.
540
- extend_fct: float, optional
541
- Number of standard deviations used to widen the lower and upper bounds of `x`.
542
- Defaults to 0.5.
543
- bw_fct: float, optional
544
- A value that multiplies `bw` which enables tuning smoothness by hand.
545
- Must be positive. Values below 1 decrease smoothness while values above 1 decrease it.
546
- Defaults to 1 (no modification).
547
- bw_return: bool, optional
548
- Whether to return the estimated bandwidth in addition to the other objects.
549
- Defaults to False.
550
- custom_lims: list or tuple, optional
551
- A list or tuple of length 2 indicating custom bounds for the range of `x`.
552
- Defaults to None which disables custom bounds.
553
- cumulative: bool, optional
554
- Whether return the PDF or the cumulative PDF. Defaults to False.
555
- grid_len: int, optional
556
- The number of intervals used to bin the data points i.e. the length of the grid used in
557
- the estimation. Defaults to 512.
558
-
559
- Returns
560
- -------
561
- grid : Gridded numpy array for the x values.
562
- pdf : Numpy array for the density estimates.
563
- bw: optional, the estimated bandwidth.
564
- """
565
- # Check `bw_fct` is numeric and positive
566
- if not isinstance(bw_fct, (int, float, np.integer, np.floating)):
567
- raise TypeError(f"`bw_fct` must be a positive number, not an object of {type(bw_fct)}.")
568
-
569
- if bw_fct <= 0:
570
- raise ValueError(f"`bw_fct` must be a positive number, not {bw_fct}.")
571
-
572
- # Preliminary calculations
573
- x_min = x.min()
574
- x_max = x.max()
575
- x_std = np.std(x)
576
- x_range = x_max - x_min
577
-
578
- # Determine grid
579
- grid_min, grid_max, grid_len = _get_grid(
580
- x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend, bound_correction
581
- )
582
- grid_counts, _, grid_edges = histogram(x, grid_len, (grid_min, grid_max))
583
-
584
- # Bandwidth estimation
585
- bw = bw_fct * _get_bw(x, bw, grid_counts, x_std, x_range)
586
-
587
- # Density estimation
588
- if adaptive:
589
- grid, pdf = _kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
590
- else:
591
- grid, pdf = _kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
592
-
593
- if cumulative:
594
- pdf = pdf.cumsum() / pdf.sum()
595
-
596
- if bw_return:
597
- return grid, pdf, bw
598
- else:
599
- return grid, pdf
600
-
601
-
602
- def _kde_circular(
603
- x,
604
- bw="taylor",
605
- bw_fct=1,
606
- bw_return=False,
607
- custom_lims=None,
608
- cumulative=False,
609
- grid_len=512,
610
- **kwargs, # pylint: disable=unused-argument
611
- ):
612
- """One dimensional density estimation for circular data.
613
-
614
- Given an array of data points `x` measured in radians, it returns an estimate of the
615
- probability density function that generated the samples in `x`.
616
-
617
- Parameters
618
- ----------
619
- x : 1D numpy array
620
- Data used to calculate the density estimation.
621
- bw: int, float or str, optional
622
- If numeric, indicates the bandwidth and must be positive.
623
- If str, indicates the method to estimate the bandwidth and must be "taylor" since it is the
624
- only option supported so far. Defaults to "taylor".
625
- bw_fct: float, optional
626
- A value that multiplies `bw` which enables tuning smoothness by hand. Must be positive.
627
- Values above 1 decrease smoothness while values below 1 decrease it.
628
- Defaults to 1 (no modification).
629
- bw_return: bool, optional
630
- Whether to return the estimated bandwidth in addition to the other objects.
631
- Defaults to False.
632
- custom_lims: list or tuple, optional
633
- A list or tuple of length 2 indicating custom bounds for the range of `x`.
634
- Defaults to None which means the estimation limits are [-pi, pi].
635
- cumulative: bool, optional
636
- Whether return the PDF or the cumulative PDF. Defaults to False.
637
- grid_len: int, optional
638
- The number of intervals used to bin the data point i.e. the length of the grid used in the
639
- estimation. Defaults to 512.
640
- """
641
- # All values between -pi and pi
642
- x = _normalize_angle(x)
643
-
644
- # Check `bw_fct` is numeric and positive
645
- if not isinstance(bw_fct, (int, float, np.integer, np.floating)):
646
- raise TypeError(f"`bw_fct` must be a positive number, not an object of {type(bw_fct)}.")
647
-
648
- if bw_fct <= 0:
649
- raise ValueError(f"`bw_fct` must be a positive number, not {bw_fct}.")
650
-
651
- # Determine bandwidth
652
- if isinstance(bw, bool):
653
- raise ValueError("`bw` can't be of type `bool`.\nExpected a positive numeric or 'taylor'")
654
- if isinstance(bw, (int, float)) and bw < 0:
655
- raise ValueError(f"Numeric `bw` must be positive.\nInput: {bw:.4f}.")
656
- if isinstance(bw, str):
657
- if bw == "taylor":
658
- bw = _bw_taylor(x)
659
- else:
660
- raise ValueError(f"`bw` must be a positive numeric or `taylor`, not {bw}")
661
- bw *= bw_fct
662
-
663
- # Determine grid
664
- if custom_lims is not None:
665
- custom_lims = _check_custom_lims(custom_lims, x.min(), x.max())
666
- grid_min = custom_lims[0]
667
- grid_max = custom_lims[1]
668
- assert grid_min >= -np.pi, "Lower limit can't be smaller than -pi"
669
- assert grid_max <= np.pi, "Upper limit can't be larger than pi"
670
- else:
671
- grid_min = -np.pi
672
- grid_max = np.pi
673
-
674
- bins = np.linspace(grid_min, grid_max, grid_len + 1)
675
- bin_counts, _, bin_edges = histogram(x, bins=bins)
676
- grid = 0.5 * (bin_edges[1:] + bin_edges[:-1])
677
-
678
- kern = _vonmises_pdf(x=grid, mu=0, kappa=bw)
679
- pdf = np.fft.fftshift(np.fft.irfft(np.fft.rfft(kern) * np.fft.rfft(bin_counts)))
680
- pdf /= len(x)
681
-
682
- if cumulative:
683
- pdf = pdf.cumsum() / pdf.sum()
684
-
685
- if bw_return:
686
- return grid, pdf, bw
687
- else:
688
- return grid, pdf
689
-
690
-
691
- # pylint: disable=unused-argument
692
- def _kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction, **kwargs):
693
- """Kernel density with convolution.
694
-
695
- One dimensional Gaussian kernel density estimation via convolution of the binned relative
696
- frequencies and a Gaussian filter. This is an internal function used by `kde()`.
697
- """
698
- # Calculate relative frequencies per bin
699
- bin_width = grid_edges[1] - grid_edges[0]
700
- f = grid_counts / bin_width / len(x)
701
-
702
- # Bandwidth must consider the bin width
703
- bw /= bin_width
704
-
705
- # See: https://stackoverflow.com/questions/2773606/gaussian-filter-in-matlab
706
-
707
- grid = (grid_edges[1:] + grid_edges[:-1]) / 2
708
-
709
- kernel_n = int(bw * 2 * np.pi)
710
- if kernel_n == 0:
711
- kernel_n = 1
712
-
713
- kernel = gaussian(kernel_n, bw)
714
-
715
- if bound_correction:
716
- npad = int(grid_len / 5)
717
- f = np.concatenate([f[npad - 1 :: -1], f, f[grid_len : grid_len - npad - 1 : -1]])
718
- pdf = convolve(f, kernel, mode="same", method="direct")[npad : npad + grid_len]
719
- else:
720
- pdf = convolve(f, kernel, mode="same", method="direct")
721
- pdf /= bw * (2 * np.pi) ** 0.5
722
-
723
- return grid, pdf
724
-
725
-
726
- def _kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction, **kwargs):
727
- """Compute Adaptive Kernel Density Estimation.
728
-
729
- One dimensional adaptive Gaussian kernel density estimation. The implementation uses the binning
730
- technique. Since there is not an unique `bw`, the convolution is not possible. The alternative
731
- implemented in this function is known as Abramson's method.
732
- This is an internal function used by `kde()`.
733
- """
734
- # Pilot computations used for bandwidth adjustment
735
- pilot_grid, pilot_pdf = _kde_convolution(
736
- x, bw, grid_edges, grid_counts, grid_len, bound_correction
737
- )
738
-
739
- # Adds to avoid np.log(0) and zero division
740
- pilot_pdf += 1e-9
741
-
742
- # Determine the modification factors
743
- pdf_interp = np.interp(x, pilot_grid, pilot_pdf)
744
- geom_mean = np.exp(np.mean(np.log(pdf_interp)))
745
-
746
- # Power of c = 0.5 -> Abramson's method
747
- adj_factor = (geom_mean / pilot_pdf) ** 0.5
748
- bw_adj = bw * adj_factor
749
-
750
- # Estimation of Gaussian KDE via binned method (convolution not possible)
751
- grid = pilot_grid
752
-
753
- if bound_correction:
754
- grid_npad = int(grid_len / 5)
755
- grid_width = grid_edges[1] - grid_edges[0]
756
- grid_pad = grid_npad * grid_width
757
- grid_padded = np.linspace(
758
- grid_edges[0] - grid_pad,
759
- grid_edges[grid_len - 1] + grid_pad,
760
- num=grid_len + 2 * grid_npad,
761
- )
762
- grid_counts = np.concatenate(
763
- [
764
- grid_counts[grid_npad - 1 :: -1],
765
- grid_counts,
766
- grid_counts[grid_len : grid_len - grid_npad - 1 : -1],
767
- ]
768
- )
769
- bw_adj = np.concatenate(
770
- [bw_adj[grid_npad - 1 :: -1], bw_adj, bw_adj[grid_len : grid_len - grid_npad - 1 : -1]]
771
- )
772
- pdf_mat = (grid_padded - grid_padded[:, None]) / bw_adj[:, None]
773
- pdf_mat = np.exp(-0.5 * pdf_mat**2) * grid_counts[:, None]
774
- pdf_mat /= (2 * np.pi) ** 0.5 * bw_adj[:, None]
775
- pdf = np.sum(pdf_mat[:, grid_npad : grid_npad + grid_len], axis=0) / len(x)
776
-
777
- else:
778
- pdf_mat = (grid - grid[:, None]) / bw_adj[:, None]
779
- pdf_mat = np.exp(-0.5 * pdf_mat**2) * grid_counts[:, None]
780
- pdf_mat /= (2 * np.pi) ** 0.5 * bw_adj[:, None]
781
- pdf = np.sum(pdf_mat, axis=0) / len(x)
782
-
783
- return grid, pdf
784
-
785
-
786
- def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False):
787
- """
788
- 2D fft-based Gaussian kernel density estimate (KDE).
789
-
790
- The code was adapted from https://github.com/mfouesneau/faststats
791
-
792
- Parameters
793
- ----------
794
- x : Numpy array or list
795
- y : Numpy array or list
796
- gridsize : tuple
797
- Number of points used to discretize data. Use powers of 2 for fft optimization
798
- circular: bool
799
- If True use circular boundaries. Defaults to False
800
-
801
- Returns
802
- -------
803
- grid: A gridded 2D KDE of the input points (x, y)
804
- xmin: minimum value of x
805
- xmax: maximum value of x
806
- ymin: minimum value of y
807
- ymax: maximum value of y
808
- """
809
- x = np.asarray(x, dtype=float)
810
- x = x[np.isfinite(x)]
811
- y = np.asarray(y, dtype=float)
812
- y = y[np.isfinite(y)]
813
-
814
- xmin, xmax = x.min(), x.max()
815
- ymin, ymax = y.min(), y.max()
816
-
817
- len_x = len(x)
818
- weights = np.ones(len_x)
819
- n_x, n_y = gridsize
820
-
821
- d_x = (xmax - xmin) / (n_x - 1)
822
- d_y = (ymax - ymin) / (n_y - 1)
823
-
824
- xyi = _stack(x, y).T
825
- xyi -= [xmin, ymin]
826
- xyi /= [d_x, d_y]
827
- xyi = np.floor(xyi, xyi).T
828
-
829
- scotts_factor = len_x ** (-1 / 6)
830
- cov = _cov(xyi)
831
- std_devs = np.diag(cov) ** 0.5
832
- kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs)
833
-
834
- inv_cov = np.linalg.inv(cov * scotts_factor**2)
835
-
836
- x_x = np.arange(kern_nx) - kern_nx / 2
837
- y_y = np.arange(kern_ny) - kern_ny / 2
838
- x_x, y_y = np.meshgrid(x_x, y_y)
839
-
840
- kernel = _stack(x_x.flatten(), y_y.flatten())
841
- kernel = _dot(inv_cov, kernel) * kernel
842
- kernel = np.exp(-kernel.sum(axis=0) / 2)
843
- kernel = kernel.reshape((int(kern_ny), int(kern_nx)))
844
-
845
- boundary = "wrap" if circular else "symm"
846
-
847
- grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray()
848
- grid = convolve2d(grid, kernel, mode="same", boundary=boundary)
849
-
850
- norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor**2)
851
- norm_factor = len_x * d_x * d_y * norm_factor**0.5
852
-
853
- grid /= norm_factor
854
-
855
- return grid, xmin, xmax, ymin, ymax
856
-
857
-
858
- def get_bins(values):
859
- """
860
- Automatically compute the number of bins for discrete variables.
861
-
862
- Parameters
863
- ----------
864
- values = numpy array
865
- values
866
-
867
- Returns
868
- -------
869
- array with the bins
870
-
871
- Notes
872
- -----
873
- Computes the width of the bins by taking the maximum of the Sturges and the Freedman-Diaconis
874
- estimators. According to numpy `np.histogram` this provides good all around performance.
875
-
876
- The Sturges is a very simplistic estimator based on the assumption of normality of the data.
877
- This estimator has poor performance for non-normal data, which becomes especially obvious for
878
- large data sets. The estimate depends only on size of the data.
879
-
880
- The Freedman-Diaconis rule uses interquartile range (IQR) to estimate the binwidth.
881
- It is considered a robust version of the Scott rule as the IQR is less affected by outliers
882
- than the standard deviation. However, the IQR depends on fewer points than the standard
883
- deviation, so it is less accurate, especially for long tailed distributions.
884
- """
885
- dtype = values.dtype.kind
886
-
887
- if dtype == "i":
888
- x_min = values.min().astype(int)
889
- x_max = values.max().astype(int)
890
- else:
891
- x_min = values.min().astype(float)
892
- x_max = values.max().astype(float)
893
-
894
- # Sturges histogram bin estimator
895
- bins_sturges = (x_max - x_min) / (np.log2(values.size) + 1)
896
-
897
- # The Freedman-Diaconis histogram bin estimator.
898
- iqr = np.subtract(*np.percentile(values, [75, 25])) # pylint: disable=assignment-from-no-return
899
- bins_fd = 2 * iqr * values.size ** (-1 / 3)
900
-
901
- if dtype == "i":
902
- width = np.round(np.max([1, bins_sturges, bins_fd])).astype(int)
903
- bins = np.arange(x_min, x_max + width + 1, width)
904
- else:
905
- width = np.max([bins_sturges, bins_fd])
906
- if np.isclose(x_min, x_max):
907
- width = 1e-3
908
- bins = np.arange(x_min, x_max + width, width)
909
-
910
- return bins
911
-
912
-
913
- def _sturges_formula(dataset, mult=1):
914
- """Use Sturges' formula to determine number of bins.
915
-
916
- See https://en.wikipedia.org/wiki/Histogram#Sturges'_formula
917
- or https://doi.org/10.1080%2F01621459.1926.10502161
918
-
919
- Parameters
920
- ----------
921
- dataset: xarray.DataSet
922
- Must have the `draw` dimension
923
-
924
- mult: float
925
- Used to scale the number of bins up or down. Default is 1 for Sturges' formula.
926
-
927
- Returns
928
- -------
929
- int
930
- Number of bins to use
931
- """
932
- return int(np.ceil(mult * np.log2(dataset.draw.size)) + 1)
933
-
934
-
935
- def _circular_mean(x):
936
- """Compute mean of circular variable measured in radians.
937
-
938
- The result is between -pi and pi.
939
- """
940
- sinr = np.sum(np.sin(x))
941
- cosr = np.sum(np.cos(x))
942
- mean = np.arctan2(sinr, cosr)
943
-
944
- return mean
945
-
946
-
947
- def _normalize_angle(x, zero_centered=True):
948
- """Normalize angles.
949
-
950
- Normalize angles in radians to [-pi, pi) or [0, 2 * pi) according to `zero_centered`.
951
- """
952
- if zero_centered:
953
- return (x + np.pi) % (2 * np.pi) - np.pi
954
- else:
955
- return x % (2 * np.pi)
956
-
957
-
958
- @conditional_jit(cache=True, nopython=True)
959
- def histogram(data, bins, range_hist=None):
960
- """Conditionally jitted histogram.
961
-
962
- Parameters
963
- ----------
964
- data : array-like
965
- Input data. Passed as first positional argument to ``np.histogram``.
966
- bins : int or array-like
967
- Passed as keyword argument ``bins`` to ``np.histogram``.
968
- range_hist : (float, float), optional
969
- Passed as keyword argument ``range`` to ``np.histogram``.
970
-
971
- Returns
972
- -------
973
- hist : array
974
- The number of counts per bin.
975
- density : array
976
- The density corresponding to each bin.
977
- bin_edges : array
978
- The edges of the bins used.
979
- """
980
- hist, bin_edges = np.histogram(data, bins=bins, range=range_hist)
981
- hist_dens = hist / (hist.sum() * np.diff(bin_edges))
982
- return hist, hist_dens, bin_edges
983
-
984
-
985
- def _find_hdi_contours(density, hdi_probs):
986
- """
987
- Find contours enclosing regions of highest posterior density.
988
-
989
- Parameters
990
- ----------
991
- density : array-like
992
- A 2D KDE on a grid with cells of equal area.
993
- hdi_probs : array-like
994
- An array of highest density interval confidence probabilities.
995
-
996
- Returns
997
- -------
998
- contour_levels : array
999
- The contour levels corresponding to the given HDI probabilities.
1000
- """
1001
- # Using the algorithm from corner.py
1002
- sorted_density = np.sort(density, axis=None)[::-1]
1003
- sm = sorted_density.cumsum()
1004
- sm /= sm[-1]
1005
-
1006
- contours = np.empty_like(hdi_probs)
1007
- for idx, hdi_prob in enumerate(hdi_probs):
1008
- try:
1009
- contours[idx] = sorted_density[sm <= hdi_prob][-1]
1010
- except IndexError:
1011
- contours[idx] = sorted_density[0]
1012
-
1013
- return contours