arviz 0.23.3__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arviz/__init__.py +52 -367
- arviz-1.0.0rc0.dist-info/METADATA +182 -0
- arviz-1.0.0rc0.dist-info/RECORD +5 -0
- {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/WHEEL +1 -2
- {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/licenses/LICENSE +0 -1
- arviz/data/__init__.py +0 -55
- arviz/data/base.py +0 -596
- arviz/data/converters.py +0 -203
- arviz/data/datasets.py +0 -161
- arviz/data/example_data/code/radon/radon.json +0 -326
- arviz/data/example_data/data/centered_eight.nc +0 -0
- arviz/data/example_data/data/non_centered_eight.nc +0 -0
- arviz/data/example_data/data_local.json +0 -12
- arviz/data/example_data/data_remote.json +0 -58
- arviz/data/inference_data.py +0 -2386
- arviz/data/io_beanmachine.py +0 -112
- arviz/data/io_cmdstan.py +0 -1036
- arviz/data/io_cmdstanpy.py +0 -1233
- arviz/data/io_datatree.py +0 -23
- arviz/data/io_dict.py +0 -462
- arviz/data/io_emcee.py +0 -317
- arviz/data/io_json.py +0 -54
- arviz/data/io_netcdf.py +0 -68
- arviz/data/io_numpyro.py +0 -497
- arviz/data/io_pyjags.py +0 -378
- arviz/data/io_pyro.py +0 -333
- arviz/data/io_pystan.py +0 -1095
- arviz/data/io_zarr.py +0 -46
- arviz/data/utils.py +0 -139
- arviz/labels.py +0 -210
- arviz/plots/__init__.py +0 -61
- arviz/plots/autocorrplot.py +0 -171
- arviz/plots/backends/__init__.py +0 -223
- arviz/plots/backends/bokeh/__init__.py +0 -166
- arviz/plots/backends/bokeh/autocorrplot.py +0 -101
- arviz/plots/backends/bokeh/bfplot.py +0 -23
- arviz/plots/backends/bokeh/bpvplot.py +0 -193
- arviz/plots/backends/bokeh/compareplot.py +0 -167
- arviz/plots/backends/bokeh/densityplot.py +0 -239
- arviz/plots/backends/bokeh/distcomparisonplot.py +0 -23
- arviz/plots/backends/bokeh/distplot.py +0 -183
- arviz/plots/backends/bokeh/dotplot.py +0 -113
- arviz/plots/backends/bokeh/ecdfplot.py +0 -73
- arviz/plots/backends/bokeh/elpdplot.py +0 -203
- arviz/plots/backends/bokeh/energyplot.py +0 -155
- arviz/plots/backends/bokeh/essplot.py +0 -176
- arviz/plots/backends/bokeh/forestplot.py +0 -772
- arviz/plots/backends/bokeh/hdiplot.py +0 -54
- arviz/plots/backends/bokeh/kdeplot.py +0 -268
- arviz/plots/backends/bokeh/khatplot.py +0 -163
- arviz/plots/backends/bokeh/lmplot.py +0 -185
- arviz/plots/backends/bokeh/loopitplot.py +0 -211
- arviz/plots/backends/bokeh/mcseplot.py +0 -184
- arviz/plots/backends/bokeh/pairplot.py +0 -328
- arviz/plots/backends/bokeh/parallelplot.py +0 -81
- arviz/plots/backends/bokeh/posteriorplot.py +0 -324
- arviz/plots/backends/bokeh/ppcplot.py +0 -379
- arviz/plots/backends/bokeh/rankplot.py +0 -149
- arviz/plots/backends/bokeh/separationplot.py +0 -107
- arviz/plots/backends/bokeh/traceplot.py +0 -436
- arviz/plots/backends/bokeh/violinplot.py +0 -164
- arviz/plots/backends/matplotlib/__init__.py +0 -124
- arviz/plots/backends/matplotlib/autocorrplot.py +0 -72
- arviz/plots/backends/matplotlib/bfplot.py +0 -78
- arviz/plots/backends/matplotlib/bpvplot.py +0 -177
- arviz/plots/backends/matplotlib/compareplot.py +0 -135
- arviz/plots/backends/matplotlib/densityplot.py +0 -194
- arviz/plots/backends/matplotlib/distcomparisonplot.py +0 -119
- arviz/plots/backends/matplotlib/distplot.py +0 -178
- arviz/plots/backends/matplotlib/dotplot.py +0 -116
- arviz/plots/backends/matplotlib/ecdfplot.py +0 -70
- arviz/plots/backends/matplotlib/elpdplot.py +0 -189
- arviz/plots/backends/matplotlib/energyplot.py +0 -113
- arviz/plots/backends/matplotlib/essplot.py +0 -180
- arviz/plots/backends/matplotlib/forestplot.py +0 -656
- arviz/plots/backends/matplotlib/hdiplot.py +0 -48
- arviz/plots/backends/matplotlib/kdeplot.py +0 -177
- arviz/plots/backends/matplotlib/khatplot.py +0 -241
- arviz/plots/backends/matplotlib/lmplot.py +0 -149
- arviz/plots/backends/matplotlib/loopitplot.py +0 -144
- arviz/plots/backends/matplotlib/mcseplot.py +0 -161
- arviz/plots/backends/matplotlib/pairplot.py +0 -355
- arviz/plots/backends/matplotlib/parallelplot.py +0 -58
- arviz/plots/backends/matplotlib/posteriorplot.py +0 -348
- arviz/plots/backends/matplotlib/ppcplot.py +0 -478
- arviz/plots/backends/matplotlib/rankplot.py +0 -119
- arviz/plots/backends/matplotlib/separationplot.py +0 -97
- arviz/plots/backends/matplotlib/traceplot.py +0 -526
- arviz/plots/backends/matplotlib/tsplot.py +0 -121
- arviz/plots/backends/matplotlib/violinplot.py +0 -148
- arviz/plots/bfplot.py +0 -128
- arviz/plots/bpvplot.py +0 -308
- arviz/plots/compareplot.py +0 -177
- arviz/plots/densityplot.py +0 -284
- arviz/plots/distcomparisonplot.py +0 -197
- arviz/plots/distplot.py +0 -233
- arviz/plots/dotplot.py +0 -233
- arviz/plots/ecdfplot.py +0 -372
- arviz/plots/elpdplot.py +0 -174
- arviz/plots/energyplot.py +0 -147
- arviz/plots/essplot.py +0 -319
- arviz/plots/forestplot.py +0 -304
- arviz/plots/hdiplot.py +0 -211
- arviz/plots/kdeplot.py +0 -357
- arviz/plots/khatplot.py +0 -236
- arviz/plots/lmplot.py +0 -380
- arviz/plots/loopitplot.py +0 -224
- arviz/plots/mcseplot.py +0 -194
- arviz/plots/pairplot.py +0 -281
- arviz/plots/parallelplot.py +0 -204
- arviz/plots/plot_utils.py +0 -599
- arviz/plots/posteriorplot.py +0 -298
- arviz/plots/ppcplot.py +0 -369
- arviz/plots/rankplot.py +0 -232
- arviz/plots/separationplot.py +0 -167
- arviz/plots/styles/arviz-bluish.mplstyle +0 -1
- arviz/plots/styles/arviz-brownish.mplstyle +0 -1
- arviz/plots/styles/arviz-colors.mplstyle +0 -2
- arviz/plots/styles/arviz-cyanish.mplstyle +0 -1
- arviz/plots/styles/arviz-darkgrid.mplstyle +0 -40
- arviz/plots/styles/arviz-doc.mplstyle +0 -88
- arviz/plots/styles/arviz-docgrid.mplstyle +0 -88
- arviz/plots/styles/arviz-grayscale.mplstyle +0 -41
- arviz/plots/styles/arviz-greenish.mplstyle +0 -1
- arviz/plots/styles/arviz-orangish.mplstyle +0 -1
- arviz/plots/styles/arviz-plasmish.mplstyle +0 -1
- arviz/plots/styles/arviz-purplish.mplstyle +0 -1
- arviz/plots/styles/arviz-redish.mplstyle +0 -1
- arviz/plots/styles/arviz-royish.mplstyle +0 -1
- arviz/plots/styles/arviz-viridish.mplstyle +0 -1
- arviz/plots/styles/arviz-white.mplstyle +0 -40
- arviz/plots/styles/arviz-whitegrid.mplstyle +0 -40
- arviz/plots/traceplot.py +0 -273
- arviz/plots/tsplot.py +0 -440
- arviz/plots/violinplot.py +0 -192
- arviz/preview.py +0 -58
- arviz/py.typed +0 -0
- arviz/rcparams.py +0 -606
- arviz/sel_utils.py +0 -223
- arviz/static/css/style.css +0 -340
- arviz/static/html/icons-svg-inline.html +0 -15
- arviz/stats/__init__.py +0 -37
- arviz/stats/density_utils.py +0 -1013
- arviz/stats/diagnostics.py +0 -1013
- arviz/stats/ecdf_utils.py +0 -324
- arviz/stats/stats.py +0 -2422
- arviz/stats/stats_refitting.py +0 -119
- arviz/stats/stats_utils.py +0 -609
- arviz/tests/__init__.py +0 -1
- arviz/tests/base_tests/__init__.py +0 -1
- arviz/tests/base_tests/test_data.py +0 -1679
- arviz/tests/base_tests/test_data_zarr.py +0 -143
- arviz/tests/base_tests/test_diagnostics.py +0 -511
- arviz/tests/base_tests/test_diagnostics_numba.py +0 -87
- arviz/tests/base_tests/test_helpers.py +0 -18
- arviz/tests/base_tests/test_labels.py +0 -69
- arviz/tests/base_tests/test_plot_utils.py +0 -342
- arviz/tests/base_tests/test_plots_bokeh.py +0 -1288
- arviz/tests/base_tests/test_plots_matplotlib.py +0 -2197
- arviz/tests/base_tests/test_rcparams.py +0 -317
- arviz/tests/base_tests/test_stats.py +0 -925
- arviz/tests/base_tests/test_stats_ecdf_utils.py +0 -166
- arviz/tests/base_tests/test_stats_numba.py +0 -45
- arviz/tests/base_tests/test_stats_utils.py +0 -384
- arviz/tests/base_tests/test_utils.py +0 -376
- arviz/tests/base_tests/test_utils_numba.py +0 -87
- arviz/tests/conftest.py +0 -46
- arviz/tests/external_tests/__init__.py +0 -1
- arviz/tests/external_tests/test_data_beanmachine.py +0 -78
- arviz/tests/external_tests/test_data_cmdstan.py +0 -398
- arviz/tests/external_tests/test_data_cmdstanpy.py +0 -496
- arviz/tests/external_tests/test_data_emcee.py +0 -166
- arviz/tests/external_tests/test_data_numpyro.py +0 -434
- arviz/tests/external_tests/test_data_pyjags.py +0 -119
- arviz/tests/external_tests/test_data_pyro.py +0 -260
- arviz/tests/external_tests/test_data_pystan.py +0 -307
- arviz/tests/helpers.py +0 -677
- arviz/utils.py +0 -773
- arviz/wrappers/__init__.py +0 -13
- arviz/wrappers/base.py +0 -236
- arviz/wrappers/wrap_pymc.py +0 -36
- arviz/wrappers/wrap_stan.py +0 -148
- arviz-0.23.3.dist-info/METADATA +0 -264
- arviz-0.23.3.dist-info/RECORD +0 -183
- arviz-0.23.3.dist-info/top_level.txt +0 -1
arviz/stats/density_utils.py
DELETED
|
@@ -1,1013 +0,0 @@
|
|
|
1
|
-
# pylint: disable=invalid-name,too-many-lines
|
|
2
|
-
"""Density estimation functions for ArviZ."""
|
|
3
|
-
import warnings
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
from scipy.fftpack import fft
|
|
7
|
-
from scipy.optimize import brentq
|
|
8
|
-
from scipy.signal import convolve, convolve2d
|
|
9
|
-
from scipy.signal.windows import gaussian
|
|
10
|
-
from scipy.sparse import coo_matrix
|
|
11
|
-
from scipy.special import ive # pylint: disable=no-name-in-module
|
|
12
|
-
|
|
13
|
-
from ..utils import _cov, _dot, _stack, conditional_jit
|
|
14
|
-
|
|
15
|
-
__all__ = ["kde"]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _bw_scott(x, x_std=None, **kwargs): # pylint: disable=unused-argument
|
|
19
|
-
"""Scott's Rule."""
|
|
20
|
-
if x_std is None:
|
|
21
|
-
x_std = np.std(x)
|
|
22
|
-
bw = 1.06 * x_std * len(x) ** (-0.2)
|
|
23
|
-
return bw
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _bw_silverman(x, x_std=None, **kwargs): # pylint: disable=unused-argument
|
|
27
|
-
"""Silverman's Rule."""
|
|
28
|
-
if x_std is None:
|
|
29
|
-
x_std = np.std(x)
|
|
30
|
-
q75, q25 = np.percentile(x, [75, 25])
|
|
31
|
-
x_iqr = q75 - q25
|
|
32
|
-
a = min(x_std, x_iqr / 1.34)
|
|
33
|
-
bw = 0.9 * a * len(x) ** (-0.2)
|
|
34
|
-
return bw
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _bw_isj(x, grid_counts=None, x_std=None, x_range=None):
|
|
38
|
-
"""Improved Sheather-Jones bandwidth estimation.
|
|
39
|
-
|
|
40
|
-
Improved Sheather and Jones method as explained in [1]_. This method is used internally by the
|
|
41
|
-
KDE estimator, resulting in saved computation time as minimums, maximums and the grid are
|
|
42
|
-
pre-computed.
|
|
43
|
-
|
|
44
|
-
References
|
|
45
|
-
----------
|
|
46
|
-
.. [1] Kernel density estimation via diffusion.
|
|
47
|
-
Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
|
|
48
|
-
Ann. Statist. 38 (2010), no. 5, 2916--2957.
|
|
49
|
-
"""
|
|
50
|
-
x_len = len(x)
|
|
51
|
-
if x_range is None:
|
|
52
|
-
x_min = np.min(x)
|
|
53
|
-
x_max = np.max(x)
|
|
54
|
-
x_range = x_max - x_min
|
|
55
|
-
|
|
56
|
-
# Relative frequency per bin
|
|
57
|
-
if grid_counts is None:
|
|
58
|
-
x_std = np.std(x)
|
|
59
|
-
grid_len = 256
|
|
60
|
-
grid_min = x_min - 0.5 * x_std
|
|
61
|
-
grid_max = x_max + 0.5 * x_std
|
|
62
|
-
grid_counts, _, _ = histogram(x, grid_len, (grid_min, grid_max))
|
|
63
|
-
else:
|
|
64
|
-
grid_len = len(grid_counts) - 1
|
|
65
|
-
|
|
66
|
-
grid_relfreq = grid_counts / x_len
|
|
67
|
-
|
|
68
|
-
# Discrete cosine transform of the data
|
|
69
|
-
a_k = _dct1d(grid_relfreq)
|
|
70
|
-
|
|
71
|
-
k_sq = np.arange(1, grid_len) ** 2
|
|
72
|
-
a_sq = a_k[range(1, grid_len)] ** 2
|
|
73
|
-
|
|
74
|
-
t = _root(_fixed_point, x_len, args=(x_len, k_sq, a_sq), x=x)
|
|
75
|
-
h = t**0.5 * x_range
|
|
76
|
-
return h
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _bw_experimental(x, grid_counts=None, x_std=None, x_range=None):
|
|
80
|
-
"""Experimental bandwidth estimator."""
|
|
81
|
-
bw_silverman = _bw_silverman(x, x_std=x_std)
|
|
82
|
-
bw_isj = _bw_isj(x, grid_counts=grid_counts, x_range=x_range)
|
|
83
|
-
return 0.5 * (bw_silverman + bw_isj)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def _bw_taylor(x):
|
|
87
|
-
"""Taylor's rule for circular bandwidth estimation.
|
|
88
|
-
|
|
89
|
-
This function implements a rule-of-thumb for choosing the bandwidth of a von Mises kernel
|
|
90
|
-
density estimator that assumes the underlying distribution is von Mises as introduced in [1]_.
|
|
91
|
-
It is analogous to Scott's rule for the Gaussian KDE.
|
|
92
|
-
|
|
93
|
-
Circular bandwidth has a different scale from linear bandwidth. Unlike linear scale, low
|
|
94
|
-
bandwidths are associated with oversmoothing and high values with undersmoothing.
|
|
95
|
-
|
|
96
|
-
References
|
|
97
|
-
----------
|
|
98
|
-
.. [1] C.C Taylor (2008). Automatic bandwidth selection for circular
|
|
99
|
-
density estimation.
|
|
100
|
-
Computational Statistics and Data Analysis, 52, 7, 3493–3500.
|
|
101
|
-
"""
|
|
102
|
-
x_len = len(x)
|
|
103
|
-
kappa = _kappa_mle(x)
|
|
104
|
-
num = 3 * x_len * kappa**2 * ive(2, 2 * kappa)
|
|
105
|
-
den = 4 * np.pi**0.5 * ive(0, kappa) ** 2
|
|
106
|
-
return (num / den) ** 0.4
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
_BW_METHODS_LINEAR = {
|
|
110
|
-
"scott": _bw_scott,
|
|
111
|
-
"silverman": _bw_silverman,
|
|
112
|
-
"isj": _bw_isj,
|
|
113
|
-
"experimental": _bw_experimental,
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _get_bw(x, bw, grid_counts=None, x_std=None, x_range=None):
|
|
118
|
-
"""Compute bandwidth for a given data `x` and `bw`.
|
|
119
|
-
|
|
120
|
-
Also checks `bw` is correctly specified.
|
|
121
|
-
|
|
122
|
-
Parameters
|
|
123
|
-
----------
|
|
124
|
-
x : 1-D numpy array
|
|
125
|
-
1 dimensional array of sample data from the
|
|
126
|
-
variable for which a density estimate is desired.
|
|
127
|
-
bw: int, float or str
|
|
128
|
-
If numeric, indicates the bandwidth and must be positive.
|
|
129
|
-
If str, indicates the method to estimate the bandwidth.
|
|
130
|
-
|
|
131
|
-
Returns
|
|
132
|
-
-------
|
|
133
|
-
bw: float
|
|
134
|
-
Bandwidth
|
|
135
|
-
"""
|
|
136
|
-
if isinstance(bw, bool):
|
|
137
|
-
raise ValueError(
|
|
138
|
-
(
|
|
139
|
-
"`bw` must not be of type `bool`.\n"
|
|
140
|
-
"Expected a positive numeric or one of the following strings:\n"
|
|
141
|
-
f"{list(_BW_METHODS_LINEAR)}."
|
|
142
|
-
)
|
|
143
|
-
)
|
|
144
|
-
if isinstance(bw, (int, float)):
|
|
145
|
-
if bw < 0:
|
|
146
|
-
raise ValueError(f"Numeric `bw` must be positive.\nInput: {bw:.4f}.")
|
|
147
|
-
elif isinstance(bw, str):
|
|
148
|
-
bw_lower = bw.lower()
|
|
149
|
-
|
|
150
|
-
if bw_lower not in _BW_METHODS_LINEAR:
|
|
151
|
-
raise ValueError(
|
|
152
|
-
"Unrecognized bandwidth method.\n"
|
|
153
|
-
f"Input is: {bw_lower}.\n"
|
|
154
|
-
f"Expected one of: {list(_BW_METHODS_LINEAR)}."
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
bw_fun = _BW_METHODS_LINEAR[bw_lower]
|
|
158
|
-
bw = bw_fun(x, grid_counts=grid_counts, x_std=x_std, x_range=x_range)
|
|
159
|
-
else:
|
|
160
|
-
raise ValueError(
|
|
161
|
-
"Unrecognized `bw` argument.\n"
|
|
162
|
-
"Expected a positive numeric or one of the following strings:\n"
|
|
163
|
-
f"{list(_BW_METHODS_LINEAR)}."
|
|
164
|
-
)
|
|
165
|
-
return bw
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def _vonmises_pdf(x, mu, kappa):
|
|
169
|
-
"""Calculate vonmises_pdf."""
|
|
170
|
-
if kappa <= 0:
|
|
171
|
-
raise ValueError("Argument 'kappa' must be positive.")
|
|
172
|
-
pdf = 1 / (2 * np.pi * ive(0, kappa)) * np.exp(np.cos(x - mu) - 1) ** kappa
|
|
173
|
-
return pdf
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def _a1inv(x):
|
|
177
|
-
"""Compute inverse function.
|
|
178
|
-
|
|
179
|
-
Inverse function of the ratio of the first and
|
|
180
|
-
zeroth order Bessel functions of the first kind.
|
|
181
|
-
|
|
182
|
-
Returns the value k, such that a1inv(x) = k, i.e. a1(k) = x.
|
|
183
|
-
"""
|
|
184
|
-
if 0 <= x < 0.53:
|
|
185
|
-
return 2 * x + x**3 + (5 * x**5) / 6
|
|
186
|
-
elif x < 0.85:
|
|
187
|
-
return -0.4 + 1.39 * x + 0.43 / (1 - x)
|
|
188
|
-
else:
|
|
189
|
-
return 1 / (x**3 - 4 * x**2 + 3 * x)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
def _kappa_mle(x):
|
|
193
|
-
mean = _circular_mean(x)
|
|
194
|
-
kappa = _a1inv(np.mean(np.cos(x - mean)))
|
|
195
|
-
return kappa
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
def _dct1d(x):
|
|
199
|
-
"""Discrete Cosine Transform in 1 Dimension.
|
|
200
|
-
|
|
201
|
-
Parameters
|
|
202
|
-
----------
|
|
203
|
-
x : numpy array
|
|
204
|
-
1 dimensional array of values for which the
|
|
205
|
-
DCT is desired
|
|
206
|
-
|
|
207
|
-
Returns
|
|
208
|
-
-------
|
|
209
|
-
output : DTC transformed values
|
|
210
|
-
"""
|
|
211
|
-
x_len = len(x)
|
|
212
|
-
|
|
213
|
-
even_increasing = np.arange(0, x_len, 2)
|
|
214
|
-
odd_decreasing = np.arange(x_len - 1, 0, -2)
|
|
215
|
-
|
|
216
|
-
x = np.concatenate((x[even_increasing], x[odd_decreasing]))
|
|
217
|
-
|
|
218
|
-
w_1k = np.r_[1, (2 * np.exp(-(0 + 1j) * (np.arange(1, x_len)) * np.pi / (2 * x_len)))]
|
|
219
|
-
output = np.real(w_1k * fft(x))
|
|
220
|
-
|
|
221
|
-
return output
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def _fixed_point(t, N, k_sq, a_sq):
|
|
225
|
-
"""Calculate t-zeta*gamma^[l](t).
|
|
226
|
-
|
|
227
|
-
Implementation of the function t-zeta*gamma^[l](t) derived from equation (30) in [1].
|
|
228
|
-
|
|
229
|
-
References
|
|
230
|
-
----------
|
|
231
|
-
.. [1] Kernel density estimation via diffusion.
|
|
232
|
-
Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
|
|
233
|
-
Ann. Statist. 38 (2010), no. 5, 2916--2957.
|
|
234
|
-
"""
|
|
235
|
-
k_sq = np.asarray(k_sq, dtype=np.float64)
|
|
236
|
-
a_sq = np.asarray(a_sq, dtype=np.float64)
|
|
237
|
-
|
|
238
|
-
l = 7
|
|
239
|
-
f = np.sum(np.power(k_sq, l) * a_sq * np.exp(-k_sq * np.pi**2 * t))
|
|
240
|
-
f *= 0.5 * np.pi ** (2.0 * l)
|
|
241
|
-
|
|
242
|
-
for j in np.arange(l - 1, 2 - 1, -1):
|
|
243
|
-
c1 = (1 + 0.5 ** (j + 0.5)) / 3
|
|
244
|
-
c2 = np.prod(np.arange(1.0, 2 * j + 1, 2, dtype=np.float64))
|
|
245
|
-
c2 /= (np.pi / 2) ** 0.5
|
|
246
|
-
t_j = np.power((c1 * (c2 / (N * f))), (2.0 / (3.0 + 2.0 * j)))
|
|
247
|
-
f = np.sum(k_sq**j * a_sq * np.exp(-k_sq * np.pi**2.0 * t_j))
|
|
248
|
-
f *= 0.5 * np.pi ** (2 * j)
|
|
249
|
-
|
|
250
|
-
out = t - (2 * N * np.pi**0.5 * f) ** (-0.4)
|
|
251
|
-
return out
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def _root(function, N, args, x):
|
|
255
|
-
# The right bound is at most 0.01
|
|
256
|
-
found = False
|
|
257
|
-
N = max(min(1050, N), 50)
|
|
258
|
-
tol = 10e-12 + 0.01 * (N - 50) / 1000
|
|
259
|
-
|
|
260
|
-
while not found:
|
|
261
|
-
try:
|
|
262
|
-
bw, res = brentq(function, 0, 0.01, args=args, full_output=True, disp=False)
|
|
263
|
-
found = res.converged
|
|
264
|
-
except ValueError:
|
|
265
|
-
bw = 0
|
|
266
|
-
tol *= 2.0
|
|
267
|
-
found = False
|
|
268
|
-
if bw <= 0 or tol >= 1:
|
|
269
|
-
bw = (_bw_silverman(x) / np.ptp(x)) ** 2
|
|
270
|
-
return bw
|
|
271
|
-
return bw
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def _check_custom_lims(custom_lims, x_min, x_max):
|
|
275
|
-
"""Check if `custom_lims` are of the correct type.
|
|
276
|
-
|
|
277
|
-
It accepts numeric lists/tuples of length 2.
|
|
278
|
-
|
|
279
|
-
Parameters
|
|
280
|
-
----------
|
|
281
|
-
custom_lims : Object whose type is checked.
|
|
282
|
-
|
|
283
|
-
Returns
|
|
284
|
-
-------
|
|
285
|
-
None: Object of type None
|
|
286
|
-
"""
|
|
287
|
-
if not isinstance(custom_lims, (list, tuple)):
|
|
288
|
-
raise TypeError(
|
|
289
|
-
"`custom_lims` must be a numeric list or tuple of length 2.\n"
|
|
290
|
-
f"Not an object of {type(custom_lims)}."
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
if len(custom_lims) != 2:
|
|
294
|
-
raise AttributeError(f"`len(custom_lims)` must be 2, not {len(custom_lims)}.")
|
|
295
|
-
|
|
296
|
-
any_bool = any(isinstance(i, bool) for i in custom_lims)
|
|
297
|
-
if any_bool:
|
|
298
|
-
raise TypeError("Elements of `custom_lims` must be numeric or None, not bool.")
|
|
299
|
-
|
|
300
|
-
custom_lims = list(custom_lims) # convert to a mutable object
|
|
301
|
-
if custom_lims[0] is None:
|
|
302
|
-
custom_lims[0] = x_min
|
|
303
|
-
|
|
304
|
-
if custom_lims[1] is None:
|
|
305
|
-
custom_lims[1] = x_max
|
|
306
|
-
|
|
307
|
-
all_numeric = all(isinstance(i, (int, float, np.integer, np.number)) for i in custom_lims)
|
|
308
|
-
if not all_numeric:
|
|
309
|
-
raise TypeError(
|
|
310
|
-
"Elements of `custom_lims` must be numeric or None.\nAt least one of them is not."
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
if not custom_lims[0] < custom_lims[1]:
|
|
314
|
-
raise ValueError("`custom_lims[0]` must be smaller than `custom_lims[1]`.")
|
|
315
|
-
|
|
316
|
-
if custom_lims[0] > x_min or custom_lims[1] < x_max:
|
|
317
|
-
raise ValueError("Some observations are outside `custom_lims` boundaries.")
|
|
318
|
-
|
|
319
|
-
return custom_lims
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def _get_grid(
|
|
323
|
-
x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend=True, bound_correction=False
|
|
324
|
-
):
|
|
325
|
-
"""Compute the grid that bins the data used to estimate the density function.
|
|
326
|
-
|
|
327
|
-
Parameters
|
|
328
|
-
----------
|
|
329
|
-
x_min : float
|
|
330
|
-
Minimum value of the data
|
|
331
|
-
x_max: float
|
|
332
|
-
Maximum value of the data.
|
|
333
|
-
x_std: float
|
|
334
|
-
Standard deviation of the data.
|
|
335
|
-
extend_fct: bool
|
|
336
|
-
Indicates the factor by which `x_std` is multiplied
|
|
337
|
-
to extend the range of the data.
|
|
338
|
-
grid_len: int
|
|
339
|
-
Number of bins
|
|
340
|
-
custom_lims: tuple or list
|
|
341
|
-
Custom limits for the domain of the density estimation.
|
|
342
|
-
Must be numeric of length 2. Overrides `extend`.
|
|
343
|
-
extend: bool, optional
|
|
344
|
-
Whether to extend the range of the data or not.
|
|
345
|
-
Default is True.
|
|
346
|
-
bound_correction: bool, optional
|
|
347
|
-
Whether the density estimations performs boundary correction or not.
|
|
348
|
-
This does not impacts directly in the output, but is used
|
|
349
|
-
to override `extend`. Overrides `extend`.
|
|
350
|
-
Default is False.
|
|
351
|
-
|
|
352
|
-
Returns
|
|
353
|
-
-------
|
|
354
|
-
grid_len: int
|
|
355
|
-
Number of bins
|
|
356
|
-
grid_min: float
|
|
357
|
-
Minimum value of the grid
|
|
358
|
-
grid_max: float
|
|
359
|
-
Maximum value of the grid
|
|
360
|
-
"""
|
|
361
|
-
# Set up number of bins.
|
|
362
|
-
grid_len = max(int(grid_len), 100)
|
|
363
|
-
|
|
364
|
-
# Set up domain
|
|
365
|
-
if custom_lims is not None:
|
|
366
|
-
custom_lims = _check_custom_lims(custom_lims, x_min, x_max)
|
|
367
|
-
grid_min = custom_lims[0]
|
|
368
|
-
grid_max = custom_lims[1]
|
|
369
|
-
elif extend and not bound_correction:
|
|
370
|
-
grid_extend = extend_fct * x_std
|
|
371
|
-
grid_min = x_min - grid_extend
|
|
372
|
-
grid_max = x_max + grid_extend
|
|
373
|
-
else:
|
|
374
|
-
grid_min = x_min
|
|
375
|
-
grid_max = x_max
|
|
376
|
-
return grid_min, grid_max, grid_len
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
def kde(x, circular=False, **kwargs):
|
|
380
|
-
"""One dimensional density estimation.
|
|
381
|
-
|
|
382
|
-
It is a wrapper around ``kde_linear()`` and ``kde_circular()``.
|
|
383
|
-
|
|
384
|
-
Parameters
|
|
385
|
-
----------
|
|
386
|
-
x : 1D numpy array
|
|
387
|
-
Data used to calculate the density estimation.
|
|
388
|
-
circular : bool, optional
|
|
389
|
-
Whether ``x`` is a circular variable or not. Defaults to False.
|
|
390
|
-
kwargs : dict, optional
|
|
391
|
-
Arguments passed to ``kde_linear()`` and ``kde_circular()``.
|
|
392
|
-
See their documentation for more info.
|
|
393
|
-
|
|
394
|
-
Returns
|
|
395
|
-
-------
|
|
396
|
-
grid : numpy.ndarray
|
|
397
|
-
Gridded numpy array for the x values.
|
|
398
|
-
pdf : numpy.ndarray
|
|
399
|
-
Numpy array for the density estimates.
|
|
400
|
-
bw : float
|
|
401
|
-
The estimated bandwidth. Only returned if requested.
|
|
402
|
-
|
|
403
|
-
Examples
|
|
404
|
-
--------
|
|
405
|
-
Default density estimation for linear data
|
|
406
|
-
|
|
407
|
-
.. plot::
|
|
408
|
-
:context: close-figs
|
|
409
|
-
|
|
410
|
-
>>> import numpy as np
|
|
411
|
-
>>> import matplotlib.pyplot as plt
|
|
412
|
-
>>> from arviz import kde
|
|
413
|
-
>>>
|
|
414
|
-
>>> rng = np.random.default_rng(49)
|
|
415
|
-
>>> rvs = rng.gamma(shape=1.8, size=1000)
|
|
416
|
-
>>> grid, pdf = kde(rvs)
|
|
417
|
-
>>> plt.plot(grid, pdf)
|
|
418
|
-
|
|
419
|
-
Density estimation for linear data with Silverman's rule bandwidth
|
|
420
|
-
|
|
421
|
-
.. plot::
|
|
422
|
-
:context: close-figs
|
|
423
|
-
|
|
424
|
-
>>> grid, pdf = kde(rvs, bw="silverman")
|
|
425
|
-
>>> plt.plot(grid, pdf)
|
|
426
|
-
|
|
427
|
-
Density estimation for linear data with scaled bandwidth
|
|
428
|
-
|
|
429
|
-
.. plot::
|
|
430
|
-
:context: close-figs
|
|
431
|
-
|
|
432
|
-
>>> # bw_fct > 1 means more smoothness.
|
|
433
|
-
>>> grid, pdf = kde(rvs, bw_fct=2.5)
|
|
434
|
-
>>> plt.plot(grid, pdf)
|
|
435
|
-
|
|
436
|
-
Default density estimation for linear data with extended limits
|
|
437
|
-
|
|
438
|
-
.. plot::
|
|
439
|
-
:context: close-figs
|
|
440
|
-
|
|
441
|
-
>>> grid, pdf = kde(rvs, bound_correction=False, extend=True, extend_fct=0.5)
|
|
442
|
-
>>> plt.plot(grid, pdf)
|
|
443
|
-
|
|
444
|
-
Default density estimation for linear data with custom limits
|
|
445
|
-
|
|
446
|
-
.. plot::
|
|
447
|
-
:context: close-figs
|
|
448
|
-
|
|
449
|
-
>>> # It accepts tuples and lists of length 2.
|
|
450
|
-
>>> grid, pdf = kde(rvs, bound_correction=False, custom_lims=(0, 11))
|
|
451
|
-
>>> plt.plot(grid, pdf)
|
|
452
|
-
|
|
453
|
-
Default density estimation for circular data
|
|
454
|
-
|
|
455
|
-
.. plot::
|
|
456
|
-
:context: close-figs
|
|
457
|
-
|
|
458
|
-
>>> rvs = np.random.vonmises(mu=np.pi, kappa=1, size=500)
|
|
459
|
-
>>> grid, pdf = kde(rvs, circular=True)
|
|
460
|
-
>>> plt.plot(grid, pdf)
|
|
461
|
-
|
|
462
|
-
Density estimation for circular data with scaled bandwidth
|
|
463
|
-
|
|
464
|
-
.. plot::
|
|
465
|
-
:context: close-figs
|
|
466
|
-
|
|
467
|
-
>>> rvs = np.random.vonmises(mu=np.pi, kappa=1, size=500)
|
|
468
|
-
>>> # bw_fct > 1 means less smoothness.
|
|
469
|
-
>>> grid, pdf = kde(rvs, circular=True, bw_fct=3)
|
|
470
|
-
>>> plt.plot(grid, pdf)
|
|
471
|
-
|
|
472
|
-
Density estimation for circular data with custom limits
|
|
473
|
-
|
|
474
|
-
.. plot::
|
|
475
|
-
:context: close-figs
|
|
476
|
-
|
|
477
|
-
>>> # This is still experimental, does not always work.
|
|
478
|
-
>>> rvs = np.random.vonmises(mu=0, kappa=30, size=500)
|
|
479
|
-
>>> grid, pdf = kde(rvs, circular=True, custom_lims=(-1, 1))
|
|
480
|
-
>>> plt.plot(grid, pdf)
|
|
481
|
-
|
|
482
|
-
See Also
|
|
483
|
-
--------
|
|
484
|
-
plot_kde : Compute and plot a kernel density estimate.
|
|
485
|
-
"""
|
|
486
|
-
x = x[np.isfinite(x)]
|
|
487
|
-
if x.size == 0 or np.all(x == x[0]):
|
|
488
|
-
warnings.warn("Your data appears to have a single value or no finite values")
|
|
489
|
-
|
|
490
|
-
return np.zeros(2), np.array([np.nan] * 2)
|
|
491
|
-
|
|
492
|
-
if circular:
|
|
493
|
-
if circular == "degrees":
|
|
494
|
-
x = np.radians(x)
|
|
495
|
-
kde_fun = _kde_circular
|
|
496
|
-
else:
|
|
497
|
-
kde_fun = _kde_linear
|
|
498
|
-
|
|
499
|
-
return kde_fun(x, **kwargs)
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def _kde_linear(
|
|
503
|
-
x,
|
|
504
|
-
bw="experimental",
|
|
505
|
-
adaptive=False,
|
|
506
|
-
extend=False,
|
|
507
|
-
bound_correction=True,
|
|
508
|
-
extend_fct=0,
|
|
509
|
-
bw_fct=1,
|
|
510
|
-
bw_return=False,
|
|
511
|
-
custom_lims=None,
|
|
512
|
-
cumulative=False,
|
|
513
|
-
grid_len=512,
|
|
514
|
-
**kwargs, # pylint: disable=unused-argument
|
|
515
|
-
):
|
|
516
|
-
"""One dimensional density estimation for linear data.
|
|
517
|
-
|
|
518
|
-
Given an array of data points `x` it returns an estimate of
|
|
519
|
-
the probability density function that generated the samples in `x`.
|
|
520
|
-
|
|
521
|
-
Parameters
|
|
522
|
-
----------
|
|
523
|
-
x : 1D numpy array
|
|
524
|
-
Data used to calculate the density estimation.
|
|
525
|
-
bw: int, float or str, optional
|
|
526
|
-
If numeric, indicates the bandwidth and must be positive.
|
|
527
|
-
If str, indicates the method to estimate the bandwidth and must be one of "scott",
|
|
528
|
-
"silverman", "isj" or "experimental". Defaults to "experimental".
|
|
529
|
-
adaptive: boolean, optional
|
|
530
|
-
Indicates if the bandwidth is adaptive or not.
|
|
531
|
-
It is the recommended approach when there are multiple modes with different spread.
|
|
532
|
-
It is not compatible with convolution. Defaults to False.
|
|
533
|
-
extend: boolean, optional
|
|
534
|
-
Whether to extend the observed range for `x` in the estimation.
|
|
535
|
-
It extends each bound by a multiple of the standard deviation of `x` given by `extend_fct`.
|
|
536
|
-
Defaults to False.
|
|
537
|
-
bound_correction: boolean, optional
|
|
538
|
-
Whether to perform boundary correction on the bounds of `x` or not.
|
|
539
|
-
Defaults to True.
|
|
540
|
-
extend_fct: float, optional
|
|
541
|
-
Number of standard deviations used to widen the lower and upper bounds of `x`.
|
|
542
|
-
Defaults to 0.5.
|
|
543
|
-
bw_fct: float, optional
|
|
544
|
-
A value that multiplies `bw` which enables tuning smoothness by hand.
|
|
545
|
-
Must be positive. Values below 1 decrease smoothness while values above 1 decrease it.
|
|
546
|
-
Defaults to 1 (no modification).
|
|
547
|
-
bw_return: bool, optional
|
|
548
|
-
Whether to return the estimated bandwidth in addition to the other objects.
|
|
549
|
-
Defaults to False.
|
|
550
|
-
custom_lims: list or tuple, optional
|
|
551
|
-
A list or tuple of length 2 indicating custom bounds for the range of `x`.
|
|
552
|
-
Defaults to None which disables custom bounds.
|
|
553
|
-
cumulative: bool, optional
|
|
554
|
-
Whether return the PDF or the cumulative PDF. Defaults to False.
|
|
555
|
-
grid_len: int, optional
|
|
556
|
-
The number of intervals used to bin the data points i.e. the length of the grid used in
|
|
557
|
-
the estimation. Defaults to 512.
|
|
558
|
-
|
|
559
|
-
Returns
|
|
560
|
-
-------
|
|
561
|
-
grid : Gridded numpy array for the x values.
|
|
562
|
-
pdf : Numpy array for the density estimates.
|
|
563
|
-
bw: optional, the estimated bandwidth.
|
|
564
|
-
"""
|
|
565
|
-
# Check `bw_fct` is numeric and positive
|
|
566
|
-
if not isinstance(bw_fct, (int, float, np.integer, np.floating)):
|
|
567
|
-
raise TypeError(f"`bw_fct` must be a positive number, not an object of {type(bw_fct)}.")
|
|
568
|
-
|
|
569
|
-
if bw_fct <= 0:
|
|
570
|
-
raise ValueError(f"`bw_fct` must be a positive number, not {bw_fct}.")
|
|
571
|
-
|
|
572
|
-
# Preliminary calculations
|
|
573
|
-
x_min = x.min()
|
|
574
|
-
x_max = x.max()
|
|
575
|
-
x_std = np.std(x)
|
|
576
|
-
x_range = x_max - x_min
|
|
577
|
-
|
|
578
|
-
# Determine grid
|
|
579
|
-
grid_min, grid_max, grid_len = _get_grid(
|
|
580
|
-
x_min, x_max, x_std, extend_fct, grid_len, custom_lims, extend, bound_correction
|
|
581
|
-
)
|
|
582
|
-
grid_counts, _, grid_edges = histogram(x, grid_len, (grid_min, grid_max))
|
|
583
|
-
|
|
584
|
-
# Bandwidth estimation
|
|
585
|
-
bw = bw_fct * _get_bw(x, bw, grid_counts, x_std, x_range)
|
|
586
|
-
|
|
587
|
-
# Density estimation
|
|
588
|
-
if adaptive:
|
|
589
|
-
grid, pdf = _kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
|
|
590
|
-
else:
|
|
591
|
-
grid, pdf = _kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction)
|
|
592
|
-
|
|
593
|
-
if cumulative:
|
|
594
|
-
pdf = pdf.cumsum() / pdf.sum()
|
|
595
|
-
|
|
596
|
-
if bw_return:
|
|
597
|
-
return grid, pdf, bw
|
|
598
|
-
else:
|
|
599
|
-
return grid, pdf
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
def _kde_circular(
|
|
603
|
-
x,
|
|
604
|
-
bw="taylor",
|
|
605
|
-
bw_fct=1,
|
|
606
|
-
bw_return=False,
|
|
607
|
-
custom_lims=None,
|
|
608
|
-
cumulative=False,
|
|
609
|
-
grid_len=512,
|
|
610
|
-
**kwargs, # pylint: disable=unused-argument
|
|
611
|
-
):
|
|
612
|
-
"""One dimensional density estimation for circular data.
|
|
613
|
-
|
|
614
|
-
Given an array of data points `x` measured in radians, it returns an estimate of the
|
|
615
|
-
probability density function that generated the samples in `x`.
|
|
616
|
-
|
|
617
|
-
Parameters
|
|
618
|
-
----------
|
|
619
|
-
x : 1D numpy array
|
|
620
|
-
Data used to calculate the density estimation.
|
|
621
|
-
bw: int, float or str, optional
|
|
622
|
-
If numeric, indicates the bandwidth and must be positive.
|
|
623
|
-
If str, indicates the method to estimate the bandwidth and must be "taylor" since it is the
|
|
624
|
-
only option supported so far. Defaults to "taylor".
|
|
625
|
-
bw_fct: float, optional
|
|
626
|
-
A value that multiplies `bw` which enables tuning smoothness by hand. Must be positive.
|
|
627
|
-
Values above 1 decrease smoothness while values below 1 decrease it.
|
|
628
|
-
Defaults to 1 (no modification).
|
|
629
|
-
bw_return: bool, optional
|
|
630
|
-
Whether to return the estimated bandwidth in addition to the other objects.
|
|
631
|
-
Defaults to False.
|
|
632
|
-
custom_lims: list or tuple, optional
|
|
633
|
-
A list or tuple of length 2 indicating custom bounds for the range of `x`.
|
|
634
|
-
Defaults to None which means the estimation limits are [-pi, pi].
|
|
635
|
-
cumulative: bool, optional
|
|
636
|
-
Whether return the PDF or the cumulative PDF. Defaults to False.
|
|
637
|
-
grid_len: int, optional
|
|
638
|
-
The number of intervals used to bin the data point i.e. the length of the grid used in the
|
|
639
|
-
estimation. Defaults to 512.
|
|
640
|
-
"""
|
|
641
|
-
# All values between -pi and pi
|
|
642
|
-
x = _normalize_angle(x)
|
|
643
|
-
|
|
644
|
-
# Check `bw_fct` is numeric and positive
|
|
645
|
-
if not isinstance(bw_fct, (int, float, np.integer, np.floating)):
|
|
646
|
-
raise TypeError(f"`bw_fct` must be a positive number, not an object of {type(bw_fct)}.")
|
|
647
|
-
|
|
648
|
-
if bw_fct <= 0:
|
|
649
|
-
raise ValueError(f"`bw_fct` must be a positive number, not {bw_fct}.")
|
|
650
|
-
|
|
651
|
-
# Determine bandwidth
|
|
652
|
-
if isinstance(bw, bool):
|
|
653
|
-
raise ValueError("`bw` can't be of type `bool`.\nExpected a positive numeric or 'taylor'")
|
|
654
|
-
if isinstance(bw, (int, float)) and bw < 0:
|
|
655
|
-
raise ValueError(f"Numeric `bw` must be positive.\nInput: {bw:.4f}.")
|
|
656
|
-
if isinstance(bw, str):
|
|
657
|
-
if bw == "taylor":
|
|
658
|
-
bw = _bw_taylor(x)
|
|
659
|
-
else:
|
|
660
|
-
raise ValueError(f"`bw` must be a positive numeric or `taylor`, not {bw}")
|
|
661
|
-
bw *= bw_fct
|
|
662
|
-
|
|
663
|
-
# Determine grid
|
|
664
|
-
if custom_lims is not None:
|
|
665
|
-
custom_lims = _check_custom_lims(custom_lims, x.min(), x.max())
|
|
666
|
-
grid_min = custom_lims[0]
|
|
667
|
-
grid_max = custom_lims[1]
|
|
668
|
-
assert grid_min >= -np.pi, "Lower limit can't be smaller than -pi"
|
|
669
|
-
assert grid_max <= np.pi, "Upper limit can't be larger than pi"
|
|
670
|
-
else:
|
|
671
|
-
grid_min = -np.pi
|
|
672
|
-
grid_max = np.pi
|
|
673
|
-
|
|
674
|
-
bins = np.linspace(grid_min, grid_max, grid_len + 1)
|
|
675
|
-
bin_counts, _, bin_edges = histogram(x, bins=bins)
|
|
676
|
-
grid = 0.5 * (bin_edges[1:] + bin_edges[:-1])
|
|
677
|
-
|
|
678
|
-
kern = _vonmises_pdf(x=grid, mu=0, kappa=bw)
|
|
679
|
-
pdf = np.fft.fftshift(np.fft.irfft(np.fft.rfft(kern) * np.fft.rfft(bin_counts)))
|
|
680
|
-
pdf /= len(x)
|
|
681
|
-
|
|
682
|
-
if cumulative:
|
|
683
|
-
pdf = pdf.cumsum() / pdf.sum()
|
|
684
|
-
|
|
685
|
-
if bw_return:
|
|
686
|
-
return grid, pdf, bw
|
|
687
|
-
else:
|
|
688
|
-
return grid, pdf
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
# pylint: disable=unused-argument
|
|
692
|
-
def _kde_convolution(x, bw, grid_edges, grid_counts, grid_len, bound_correction, **kwargs):
|
|
693
|
-
"""Kernel density with convolution.
|
|
694
|
-
|
|
695
|
-
One dimensional Gaussian kernel density estimation via convolution of the binned relative
|
|
696
|
-
frequencies and a Gaussian filter. This is an internal function used by `kde()`.
|
|
697
|
-
"""
|
|
698
|
-
# Calculate relative frequencies per bin
|
|
699
|
-
bin_width = grid_edges[1] - grid_edges[0]
|
|
700
|
-
f = grid_counts / bin_width / len(x)
|
|
701
|
-
|
|
702
|
-
# Bandwidth must consider the bin width
|
|
703
|
-
bw /= bin_width
|
|
704
|
-
|
|
705
|
-
# See: https://stackoverflow.com/questions/2773606/gaussian-filter-in-matlab
|
|
706
|
-
|
|
707
|
-
grid = (grid_edges[1:] + grid_edges[:-1]) / 2
|
|
708
|
-
|
|
709
|
-
kernel_n = int(bw * 2 * np.pi)
|
|
710
|
-
if kernel_n == 0:
|
|
711
|
-
kernel_n = 1
|
|
712
|
-
|
|
713
|
-
kernel = gaussian(kernel_n, bw)
|
|
714
|
-
|
|
715
|
-
if bound_correction:
|
|
716
|
-
npad = int(grid_len / 5)
|
|
717
|
-
f = np.concatenate([f[npad - 1 :: -1], f, f[grid_len : grid_len - npad - 1 : -1]])
|
|
718
|
-
pdf = convolve(f, kernel, mode="same", method="direct")[npad : npad + grid_len]
|
|
719
|
-
else:
|
|
720
|
-
pdf = convolve(f, kernel, mode="same", method="direct")
|
|
721
|
-
pdf /= bw * (2 * np.pi) ** 0.5
|
|
722
|
-
|
|
723
|
-
return grid, pdf
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
def _kde_adaptive(x, bw, grid_edges, grid_counts, grid_len, bound_correction, **kwargs):
|
|
727
|
-
"""Compute Adaptive Kernel Density Estimation.
|
|
728
|
-
|
|
729
|
-
One dimensional adaptive Gaussian kernel density estimation. The implementation uses the binning
|
|
730
|
-
technique. Since there is not an unique `bw`, the convolution is not possible. The alternative
|
|
731
|
-
implemented in this function is known as Abramson's method.
|
|
732
|
-
This is an internal function used by `kde()`.
|
|
733
|
-
"""
|
|
734
|
-
# Pilot computations used for bandwidth adjustment
|
|
735
|
-
pilot_grid, pilot_pdf = _kde_convolution(
|
|
736
|
-
x, bw, grid_edges, grid_counts, grid_len, bound_correction
|
|
737
|
-
)
|
|
738
|
-
|
|
739
|
-
# Adds to avoid np.log(0) and zero division
|
|
740
|
-
pilot_pdf += 1e-9
|
|
741
|
-
|
|
742
|
-
# Determine the modification factors
|
|
743
|
-
pdf_interp = np.interp(x, pilot_grid, pilot_pdf)
|
|
744
|
-
geom_mean = np.exp(np.mean(np.log(pdf_interp)))
|
|
745
|
-
|
|
746
|
-
# Power of c = 0.5 -> Abramson's method
|
|
747
|
-
adj_factor = (geom_mean / pilot_pdf) ** 0.5
|
|
748
|
-
bw_adj = bw * adj_factor
|
|
749
|
-
|
|
750
|
-
# Estimation of Gaussian KDE via binned method (convolution not possible)
|
|
751
|
-
grid = pilot_grid
|
|
752
|
-
|
|
753
|
-
if bound_correction:
|
|
754
|
-
grid_npad = int(grid_len / 5)
|
|
755
|
-
grid_width = grid_edges[1] - grid_edges[0]
|
|
756
|
-
grid_pad = grid_npad * grid_width
|
|
757
|
-
grid_padded = np.linspace(
|
|
758
|
-
grid_edges[0] - grid_pad,
|
|
759
|
-
grid_edges[grid_len - 1] + grid_pad,
|
|
760
|
-
num=grid_len + 2 * grid_npad,
|
|
761
|
-
)
|
|
762
|
-
grid_counts = np.concatenate(
|
|
763
|
-
[
|
|
764
|
-
grid_counts[grid_npad - 1 :: -1],
|
|
765
|
-
grid_counts,
|
|
766
|
-
grid_counts[grid_len : grid_len - grid_npad - 1 : -1],
|
|
767
|
-
]
|
|
768
|
-
)
|
|
769
|
-
bw_adj = np.concatenate(
|
|
770
|
-
[bw_adj[grid_npad - 1 :: -1], bw_adj, bw_adj[grid_len : grid_len - grid_npad - 1 : -1]]
|
|
771
|
-
)
|
|
772
|
-
pdf_mat = (grid_padded - grid_padded[:, None]) / bw_adj[:, None]
|
|
773
|
-
pdf_mat = np.exp(-0.5 * pdf_mat**2) * grid_counts[:, None]
|
|
774
|
-
pdf_mat /= (2 * np.pi) ** 0.5 * bw_adj[:, None]
|
|
775
|
-
pdf = np.sum(pdf_mat[:, grid_npad : grid_npad + grid_len], axis=0) / len(x)
|
|
776
|
-
|
|
777
|
-
else:
|
|
778
|
-
pdf_mat = (grid - grid[:, None]) / bw_adj[:, None]
|
|
779
|
-
pdf_mat = np.exp(-0.5 * pdf_mat**2) * grid_counts[:, None]
|
|
780
|
-
pdf_mat /= (2 * np.pi) ** 0.5 * bw_adj[:, None]
|
|
781
|
-
pdf = np.sum(pdf_mat, axis=0) / len(x)
|
|
782
|
-
|
|
783
|
-
return grid, pdf
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False):
|
|
787
|
-
"""
|
|
788
|
-
2D fft-based Gaussian kernel density estimate (KDE).
|
|
789
|
-
|
|
790
|
-
The code was adapted from https://github.com/mfouesneau/faststats
|
|
791
|
-
|
|
792
|
-
Parameters
|
|
793
|
-
----------
|
|
794
|
-
x : Numpy array or list
|
|
795
|
-
y : Numpy array or list
|
|
796
|
-
gridsize : tuple
|
|
797
|
-
Number of points used to discretize data. Use powers of 2 for fft optimization
|
|
798
|
-
circular: bool
|
|
799
|
-
If True use circular boundaries. Defaults to False
|
|
800
|
-
|
|
801
|
-
Returns
|
|
802
|
-
-------
|
|
803
|
-
grid: A gridded 2D KDE of the input points (x, y)
|
|
804
|
-
xmin: minimum value of x
|
|
805
|
-
xmax: maximum value of x
|
|
806
|
-
ymin: minimum value of y
|
|
807
|
-
ymax: maximum value of y
|
|
808
|
-
"""
|
|
809
|
-
x = np.asarray(x, dtype=float)
|
|
810
|
-
x = x[np.isfinite(x)]
|
|
811
|
-
y = np.asarray(y, dtype=float)
|
|
812
|
-
y = y[np.isfinite(y)]
|
|
813
|
-
|
|
814
|
-
xmin, xmax = x.min(), x.max()
|
|
815
|
-
ymin, ymax = y.min(), y.max()
|
|
816
|
-
|
|
817
|
-
len_x = len(x)
|
|
818
|
-
weights = np.ones(len_x)
|
|
819
|
-
n_x, n_y = gridsize
|
|
820
|
-
|
|
821
|
-
d_x = (xmax - xmin) / (n_x - 1)
|
|
822
|
-
d_y = (ymax - ymin) / (n_y - 1)
|
|
823
|
-
|
|
824
|
-
xyi = _stack(x, y).T
|
|
825
|
-
xyi -= [xmin, ymin]
|
|
826
|
-
xyi /= [d_x, d_y]
|
|
827
|
-
xyi = np.floor(xyi, xyi).T
|
|
828
|
-
|
|
829
|
-
scotts_factor = len_x ** (-1 / 6)
|
|
830
|
-
cov = _cov(xyi)
|
|
831
|
-
std_devs = np.diag(cov) ** 0.5
|
|
832
|
-
kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs)
|
|
833
|
-
|
|
834
|
-
inv_cov = np.linalg.inv(cov * scotts_factor**2)
|
|
835
|
-
|
|
836
|
-
x_x = np.arange(kern_nx) - kern_nx / 2
|
|
837
|
-
y_y = np.arange(kern_ny) - kern_ny / 2
|
|
838
|
-
x_x, y_y = np.meshgrid(x_x, y_y)
|
|
839
|
-
|
|
840
|
-
kernel = _stack(x_x.flatten(), y_y.flatten())
|
|
841
|
-
kernel = _dot(inv_cov, kernel) * kernel
|
|
842
|
-
kernel = np.exp(-kernel.sum(axis=0) / 2)
|
|
843
|
-
kernel = kernel.reshape((int(kern_ny), int(kern_nx)))
|
|
844
|
-
|
|
845
|
-
boundary = "wrap" if circular else "symm"
|
|
846
|
-
|
|
847
|
-
grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray()
|
|
848
|
-
grid = convolve2d(grid, kernel, mode="same", boundary=boundary)
|
|
849
|
-
|
|
850
|
-
norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor**2)
|
|
851
|
-
norm_factor = len_x * d_x * d_y * norm_factor**0.5
|
|
852
|
-
|
|
853
|
-
grid /= norm_factor
|
|
854
|
-
|
|
855
|
-
return grid, xmin, xmax, ymin, ymax
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
def get_bins(values):
|
|
859
|
-
"""
|
|
860
|
-
Automatically compute the number of bins for discrete variables.
|
|
861
|
-
|
|
862
|
-
Parameters
|
|
863
|
-
----------
|
|
864
|
-
values = numpy array
|
|
865
|
-
values
|
|
866
|
-
|
|
867
|
-
Returns
|
|
868
|
-
-------
|
|
869
|
-
array with the bins
|
|
870
|
-
|
|
871
|
-
Notes
|
|
872
|
-
-----
|
|
873
|
-
Computes the width of the bins by taking the maximum of the Sturges and the Freedman-Diaconis
|
|
874
|
-
estimators. According to numpy `np.histogram` this provides good all around performance.
|
|
875
|
-
|
|
876
|
-
The Sturges is a very simplistic estimator based on the assumption of normality of the data.
|
|
877
|
-
This estimator has poor performance for non-normal data, which becomes especially obvious for
|
|
878
|
-
large data sets. The estimate depends only on size of the data.
|
|
879
|
-
|
|
880
|
-
The Freedman-Diaconis rule uses interquartile range (IQR) to estimate the binwidth.
|
|
881
|
-
It is considered a robust version of the Scott rule as the IQR is less affected by outliers
|
|
882
|
-
than the standard deviation. However, the IQR depends on fewer points than the standard
|
|
883
|
-
deviation, so it is less accurate, especially for long tailed distributions.
|
|
884
|
-
"""
|
|
885
|
-
dtype = values.dtype.kind
|
|
886
|
-
|
|
887
|
-
if dtype == "i":
|
|
888
|
-
x_min = values.min().astype(int)
|
|
889
|
-
x_max = values.max().astype(int)
|
|
890
|
-
else:
|
|
891
|
-
x_min = values.min().astype(float)
|
|
892
|
-
x_max = values.max().astype(float)
|
|
893
|
-
|
|
894
|
-
# Sturges histogram bin estimator
|
|
895
|
-
bins_sturges = (x_max - x_min) / (np.log2(values.size) + 1)
|
|
896
|
-
|
|
897
|
-
# The Freedman-Diaconis histogram bin estimator.
|
|
898
|
-
iqr = np.subtract(*np.percentile(values, [75, 25])) # pylint: disable=assignment-from-no-return
|
|
899
|
-
bins_fd = 2 * iqr * values.size ** (-1 / 3)
|
|
900
|
-
|
|
901
|
-
if dtype == "i":
|
|
902
|
-
width = np.round(np.max([1, bins_sturges, bins_fd])).astype(int)
|
|
903
|
-
bins = np.arange(x_min, x_max + width + 1, width)
|
|
904
|
-
else:
|
|
905
|
-
width = np.max([bins_sturges, bins_fd])
|
|
906
|
-
if np.isclose(x_min, x_max):
|
|
907
|
-
width = 1e-3
|
|
908
|
-
bins = np.arange(x_min, x_max + width, width)
|
|
909
|
-
|
|
910
|
-
return bins
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
def _sturges_formula(dataset, mult=1):
|
|
914
|
-
"""Use Sturges' formula to determine number of bins.
|
|
915
|
-
|
|
916
|
-
See https://en.wikipedia.org/wiki/Histogram#Sturges'_formula
|
|
917
|
-
or https://doi.org/10.1080%2F01621459.1926.10502161
|
|
918
|
-
|
|
919
|
-
Parameters
|
|
920
|
-
----------
|
|
921
|
-
dataset: xarray.DataSet
|
|
922
|
-
Must have the `draw` dimension
|
|
923
|
-
|
|
924
|
-
mult: float
|
|
925
|
-
Used to scale the number of bins up or down. Default is 1 for Sturges' formula.
|
|
926
|
-
|
|
927
|
-
Returns
|
|
928
|
-
-------
|
|
929
|
-
int
|
|
930
|
-
Number of bins to use
|
|
931
|
-
"""
|
|
932
|
-
return int(np.ceil(mult * np.log2(dataset.draw.size)) + 1)
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
def _circular_mean(x):
|
|
936
|
-
"""Compute mean of circular variable measured in radians.
|
|
937
|
-
|
|
938
|
-
The result is between -pi and pi.
|
|
939
|
-
"""
|
|
940
|
-
sinr = np.sum(np.sin(x))
|
|
941
|
-
cosr = np.sum(np.cos(x))
|
|
942
|
-
mean = np.arctan2(sinr, cosr)
|
|
943
|
-
|
|
944
|
-
return mean
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
def _normalize_angle(x, zero_centered=True):
|
|
948
|
-
"""Normalize angles.
|
|
949
|
-
|
|
950
|
-
Normalize angles in radians to [-pi, pi) or [0, 2 * pi) according to `zero_centered`.
|
|
951
|
-
"""
|
|
952
|
-
if zero_centered:
|
|
953
|
-
return (x + np.pi) % (2 * np.pi) - np.pi
|
|
954
|
-
else:
|
|
955
|
-
return x % (2 * np.pi)
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
@conditional_jit(cache=True, nopython=True)
|
|
959
|
-
def histogram(data, bins, range_hist=None):
|
|
960
|
-
"""Conditionally jitted histogram.
|
|
961
|
-
|
|
962
|
-
Parameters
|
|
963
|
-
----------
|
|
964
|
-
data : array-like
|
|
965
|
-
Input data. Passed as first positional argument to ``np.histogram``.
|
|
966
|
-
bins : int or array-like
|
|
967
|
-
Passed as keyword argument ``bins`` to ``np.histogram``.
|
|
968
|
-
range_hist : (float, float), optional
|
|
969
|
-
Passed as keyword argument ``range`` to ``np.histogram``.
|
|
970
|
-
|
|
971
|
-
Returns
|
|
972
|
-
-------
|
|
973
|
-
hist : array
|
|
974
|
-
The number of counts per bin.
|
|
975
|
-
density : array
|
|
976
|
-
The density corresponding to each bin.
|
|
977
|
-
bin_edges : array
|
|
978
|
-
The edges of the bins used.
|
|
979
|
-
"""
|
|
980
|
-
hist, bin_edges = np.histogram(data, bins=bins, range=range_hist)
|
|
981
|
-
hist_dens = hist / (hist.sum() * np.diff(bin_edges))
|
|
982
|
-
return hist, hist_dens, bin_edges
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
def _find_hdi_contours(density, hdi_probs):
|
|
986
|
-
"""
|
|
987
|
-
Find contours enclosing regions of highest posterior density.
|
|
988
|
-
|
|
989
|
-
Parameters
|
|
990
|
-
----------
|
|
991
|
-
density : array-like
|
|
992
|
-
A 2D KDE on a grid with cells of equal area.
|
|
993
|
-
hdi_probs : array-like
|
|
994
|
-
An array of highest density interval confidence probabilities.
|
|
995
|
-
|
|
996
|
-
Returns
|
|
997
|
-
-------
|
|
998
|
-
contour_levels : array
|
|
999
|
-
The contour levels corresponding to the given HDI probabilities.
|
|
1000
|
-
"""
|
|
1001
|
-
# Using the algorithm from corner.py
|
|
1002
|
-
sorted_density = np.sort(density, axis=None)[::-1]
|
|
1003
|
-
sm = sorted_density.cumsum()
|
|
1004
|
-
sm /= sm[-1]
|
|
1005
|
-
|
|
1006
|
-
contours = np.empty_like(hdi_probs)
|
|
1007
|
-
for idx, hdi_prob in enumerate(hdi_probs):
|
|
1008
|
-
try:
|
|
1009
|
-
contours[idx] = sorted_density[sm <= hdi_prob][-1]
|
|
1010
|
-
except IndexError:
|
|
1011
|
-
contours[idx] = sorted_density[0]
|
|
1012
|
-
|
|
1013
|
-
return contours
|