arviz 0.23.3__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arviz/__init__.py +52 -367
- arviz-1.0.0rc0.dist-info/METADATA +182 -0
- arviz-1.0.0rc0.dist-info/RECORD +5 -0
- {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/WHEEL +1 -2
- {arviz-0.23.3.dist-info → arviz-1.0.0rc0.dist-info}/licenses/LICENSE +0 -1
- arviz/data/__init__.py +0 -55
- arviz/data/base.py +0 -596
- arviz/data/converters.py +0 -203
- arviz/data/datasets.py +0 -161
- arviz/data/example_data/code/radon/radon.json +0 -326
- arviz/data/example_data/data/centered_eight.nc +0 -0
- arviz/data/example_data/data/non_centered_eight.nc +0 -0
- arviz/data/example_data/data_local.json +0 -12
- arviz/data/example_data/data_remote.json +0 -58
- arviz/data/inference_data.py +0 -2386
- arviz/data/io_beanmachine.py +0 -112
- arviz/data/io_cmdstan.py +0 -1036
- arviz/data/io_cmdstanpy.py +0 -1233
- arviz/data/io_datatree.py +0 -23
- arviz/data/io_dict.py +0 -462
- arviz/data/io_emcee.py +0 -317
- arviz/data/io_json.py +0 -54
- arviz/data/io_netcdf.py +0 -68
- arviz/data/io_numpyro.py +0 -497
- arviz/data/io_pyjags.py +0 -378
- arviz/data/io_pyro.py +0 -333
- arviz/data/io_pystan.py +0 -1095
- arviz/data/io_zarr.py +0 -46
- arviz/data/utils.py +0 -139
- arviz/labels.py +0 -210
- arviz/plots/__init__.py +0 -61
- arviz/plots/autocorrplot.py +0 -171
- arviz/plots/backends/__init__.py +0 -223
- arviz/plots/backends/bokeh/__init__.py +0 -166
- arviz/plots/backends/bokeh/autocorrplot.py +0 -101
- arviz/plots/backends/bokeh/bfplot.py +0 -23
- arviz/plots/backends/bokeh/bpvplot.py +0 -193
- arviz/plots/backends/bokeh/compareplot.py +0 -167
- arviz/plots/backends/bokeh/densityplot.py +0 -239
- arviz/plots/backends/bokeh/distcomparisonplot.py +0 -23
- arviz/plots/backends/bokeh/distplot.py +0 -183
- arviz/plots/backends/bokeh/dotplot.py +0 -113
- arviz/plots/backends/bokeh/ecdfplot.py +0 -73
- arviz/plots/backends/bokeh/elpdplot.py +0 -203
- arviz/plots/backends/bokeh/energyplot.py +0 -155
- arviz/plots/backends/bokeh/essplot.py +0 -176
- arviz/plots/backends/bokeh/forestplot.py +0 -772
- arviz/plots/backends/bokeh/hdiplot.py +0 -54
- arviz/plots/backends/bokeh/kdeplot.py +0 -268
- arviz/plots/backends/bokeh/khatplot.py +0 -163
- arviz/plots/backends/bokeh/lmplot.py +0 -185
- arviz/plots/backends/bokeh/loopitplot.py +0 -211
- arviz/plots/backends/bokeh/mcseplot.py +0 -184
- arviz/plots/backends/bokeh/pairplot.py +0 -328
- arviz/plots/backends/bokeh/parallelplot.py +0 -81
- arviz/plots/backends/bokeh/posteriorplot.py +0 -324
- arviz/plots/backends/bokeh/ppcplot.py +0 -379
- arviz/plots/backends/bokeh/rankplot.py +0 -149
- arviz/plots/backends/bokeh/separationplot.py +0 -107
- arviz/plots/backends/bokeh/traceplot.py +0 -436
- arviz/plots/backends/bokeh/violinplot.py +0 -164
- arviz/plots/backends/matplotlib/__init__.py +0 -124
- arviz/plots/backends/matplotlib/autocorrplot.py +0 -72
- arviz/plots/backends/matplotlib/bfplot.py +0 -78
- arviz/plots/backends/matplotlib/bpvplot.py +0 -177
- arviz/plots/backends/matplotlib/compareplot.py +0 -135
- arviz/plots/backends/matplotlib/densityplot.py +0 -194
- arviz/plots/backends/matplotlib/distcomparisonplot.py +0 -119
- arviz/plots/backends/matplotlib/distplot.py +0 -178
- arviz/plots/backends/matplotlib/dotplot.py +0 -116
- arviz/plots/backends/matplotlib/ecdfplot.py +0 -70
- arviz/plots/backends/matplotlib/elpdplot.py +0 -189
- arviz/plots/backends/matplotlib/energyplot.py +0 -113
- arviz/plots/backends/matplotlib/essplot.py +0 -180
- arviz/plots/backends/matplotlib/forestplot.py +0 -656
- arviz/plots/backends/matplotlib/hdiplot.py +0 -48
- arviz/plots/backends/matplotlib/kdeplot.py +0 -177
- arviz/plots/backends/matplotlib/khatplot.py +0 -241
- arviz/plots/backends/matplotlib/lmplot.py +0 -149
- arviz/plots/backends/matplotlib/loopitplot.py +0 -144
- arviz/plots/backends/matplotlib/mcseplot.py +0 -161
- arviz/plots/backends/matplotlib/pairplot.py +0 -355
- arviz/plots/backends/matplotlib/parallelplot.py +0 -58
- arviz/plots/backends/matplotlib/posteriorplot.py +0 -348
- arviz/plots/backends/matplotlib/ppcplot.py +0 -478
- arviz/plots/backends/matplotlib/rankplot.py +0 -119
- arviz/plots/backends/matplotlib/separationplot.py +0 -97
- arviz/plots/backends/matplotlib/traceplot.py +0 -526
- arviz/plots/backends/matplotlib/tsplot.py +0 -121
- arviz/plots/backends/matplotlib/violinplot.py +0 -148
- arviz/plots/bfplot.py +0 -128
- arviz/plots/bpvplot.py +0 -308
- arviz/plots/compareplot.py +0 -177
- arviz/plots/densityplot.py +0 -284
- arviz/plots/distcomparisonplot.py +0 -197
- arviz/plots/distplot.py +0 -233
- arviz/plots/dotplot.py +0 -233
- arviz/plots/ecdfplot.py +0 -372
- arviz/plots/elpdplot.py +0 -174
- arviz/plots/energyplot.py +0 -147
- arviz/plots/essplot.py +0 -319
- arviz/plots/forestplot.py +0 -304
- arviz/plots/hdiplot.py +0 -211
- arviz/plots/kdeplot.py +0 -357
- arviz/plots/khatplot.py +0 -236
- arviz/plots/lmplot.py +0 -380
- arviz/plots/loopitplot.py +0 -224
- arviz/plots/mcseplot.py +0 -194
- arviz/plots/pairplot.py +0 -281
- arviz/plots/parallelplot.py +0 -204
- arviz/plots/plot_utils.py +0 -599
- arviz/plots/posteriorplot.py +0 -298
- arviz/plots/ppcplot.py +0 -369
- arviz/plots/rankplot.py +0 -232
- arviz/plots/separationplot.py +0 -167
- arviz/plots/styles/arviz-bluish.mplstyle +0 -1
- arviz/plots/styles/arviz-brownish.mplstyle +0 -1
- arviz/plots/styles/arviz-colors.mplstyle +0 -2
- arviz/plots/styles/arviz-cyanish.mplstyle +0 -1
- arviz/plots/styles/arviz-darkgrid.mplstyle +0 -40
- arviz/plots/styles/arviz-doc.mplstyle +0 -88
- arviz/plots/styles/arviz-docgrid.mplstyle +0 -88
- arviz/plots/styles/arviz-grayscale.mplstyle +0 -41
- arviz/plots/styles/arviz-greenish.mplstyle +0 -1
- arviz/plots/styles/arviz-orangish.mplstyle +0 -1
- arviz/plots/styles/arviz-plasmish.mplstyle +0 -1
- arviz/plots/styles/arviz-purplish.mplstyle +0 -1
- arviz/plots/styles/arviz-redish.mplstyle +0 -1
- arviz/plots/styles/arviz-royish.mplstyle +0 -1
- arviz/plots/styles/arviz-viridish.mplstyle +0 -1
- arviz/plots/styles/arviz-white.mplstyle +0 -40
- arviz/plots/styles/arviz-whitegrid.mplstyle +0 -40
- arviz/plots/traceplot.py +0 -273
- arviz/plots/tsplot.py +0 -440
- arviz/plots/violinplot.py +0 -192
- arviz/preview.py +0 -58
- arviz/py.typed +0 -0
- arviz/rcparams.py +0 -606
- arviz/sel_utils.py +0 -223
- arviz/static/css/style.css +0 -340
- arviz/static/html/icons-svg-inline.html +0 -15
- arviz/stats/__init__.py +0 -37
- arviz/stats/density_utils.py +0 -1013
- arviz/stats/diagnostics.py +0 -1013
- arviz/stats/ecdf_utils.py +0 -324
- arviz/stats/stats.py +0 -2422
- arviz/stats/stats_refitting.py +0 -119
- arviz/stats/stats_utils.py +0 -609
- arviz/tests/__init__.py +0 -1
- arviz/tests/base_tests/__init__.py +0 -1
- arviz/tests/base_tests/test_data.py +0 -1679
- arviz/tests/base_tests/test_data_zarr.py +0 -143
- arviz/tests/base_tests/test_diagnostics.py +0 -511
- arviz/tests/base_tests/test_diagnostics_numba.py +0 -87
- arviz/tests/base_tests/test_helpers.py +0 -18
- arviz/tests/base_tests/test_labels.py +0 -69
- arviz/tests/base_tests/test_plot_utils.py +0 -342
- arviz/tests/base_tests/test_plots_bokeh.py +0 -1288
- arviz/tests/base_tests/test_plots_matplotlib.py +0 -2197
- arviz/tests/base_tests/test_rcparams.py +0 -317
- arviz/tests/base_tests/test_stats.py +0 -925
- arviz/tests/base_tests/test_stats_ecdf_utils.py +0 -166
- arviz/tests/base_tests/test_stats_numba.py +0 -45
- arviz/tests/base_tests/test_stats_utils.py +0 -384
- arviz/tests/base_tests/test_utils.py +0 -376
- arviz/tests/base_tests/test_utils_numba.py +0 -87
- arviz/tests/conftest.py +0 -46
- arviz/tests/external_tests/__init__.py +0 -1
- arviz/tests/external_tests/test_data_beanmachine.py +0 -78
- arviz/tests/external_tests/test_data_cmdstan.py +0 -398
- arviz/tests/external_tests/test_data_cmdstanpy.py +0 -496
- arviz/tests/external_tests/test_data_emcee.py +0 -166
- arviz/tests/external_tests/test_data_numpyro.py +0 -434
- arviz/tests/external_tests/test_data_pyjags.py +0 -119
- arviz/tests/external_tests/test_data_pyro.py +0 -260
- arviz/tests/external_tests/test_data_pystan.py +0 -307
- arviz/tests/helpers.py +0 -677
- arviz/utils.py +0 -773
- arviz/wrappers/__init__.py +0 -13
- arviz/wrappers/base.py +0 -236
- arviz/wrappers/wrap_pymc.py +0 -36
- arviz/wrappers/wrap_stan.py +0 -148
- arviz-0.23.3.dist-info/METADATA +0 -264
- arviz-0.23.3.dist-info/RECORD +0 -183
- arviz-0.23.3.dist-info/top_level.txt +0 -1
arviz/stats/ecdf_utils.py
DELETED
|
@@ -1,324 +0,0 @@
|
|
|
1
|
-
"""Functions for evaluating ECDFs and their confidence bands."""
|
|
2
|
-
|
|
3
|
-
import math
|
|
4
|
-
from typing import Any, Callable, Optional, Tuple
|
|
5
|
-
import warnings
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
from scipy.stats import uniform, binom
|
|
9
|
-
from scipy.optimize import minimize_scalar
|
|
10
|
-
|
|
11
|
-
try:
|
|
12
|
-
from numba import jit, vectorize
|
|
13
|
-
except ImportError:
|
|
14
|
-
|
|
15
|
-
def jit(*args, **kwargs): # pylint: disable=unused-argument
|
|
16
|
-
return lambda f: f
|
|
17
|
-
|
|
18
|
-
def vectorize(*args, **kwargs): # pylint: disable=unused-argument
|
|
19
|
-
return lambda f: f
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
from ..utils import Numba
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def compute_ecdf(sample: np.ndarray, eval_points: np.ndarray) -> np.ndarray:
|
|
26
|
-
"""Compute ECDF of the sorted `sample` at the evaluation points."""
|
|
27
|
-
return np.searchsorted(sample, eval_points, side="right") / len(sample)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _get_ecdf_points(
|
|
31
|
-
sample: np.ndarray, eval_points: np.ndarray, difference: bool
|
|
32
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
|
33
|
-
"""Compute the coordinates for the ecdf points using compute_ecdf."""
|
|
34
|
-
x = eval_points
|
|
35
|
-
y = compute_ecdf(sample, eval_points)
|
|
36
|
-
|
|
37
|
-
if not difference and y[0] > 0:
|
|
38
|
-
x = np.insert(x, 0, x[0])
|
|
39
|
-
y = np.insert(y, 0, 0)
|
|
40
|
-
return x, y
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _call_rvs(rvs, ndraws, random_state):
|
|
44
|
-
if random_state is None:
|
|
45
|
-
return rvs(ndraws)
|
|
46
|
-
else:
|
|
47
|
-
return rvs(ndraws, random_state=random_state)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _simulate_ecdf(
|
|
51
|
-
ndraws: int,
|
|
52
|
-
eval_points: np.ndarray,
|
|
53
|
-
rvs: Callable[[int, Optional[Any]], np.ndarray],
|
|
54
|
-
random_state: Optional[Any] = None,
|
|
55
|
-
) -> np.ndarray:
|
|
56
|
-
"""Simulate ECDF at the `eval_points` using the given random variable sampler"""
|
|
57
|
-
sample = _call_rvs(rvs, ndraws, random_state)
|
|
58
|
-
sample.sort()
|
|
59
|
-
return compute_ecdf(sample, eval_points)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _fit_pointwise_band_probability(
|
|
63
|
-
ndraws: int,
|
|
64
|
-
ecdf_at_eval_points: np.ndarray,
|
|
65
|
-
cdf_at_eval_points: np.ndarray,
|
|
66
|
-
) -> float:
|
|
67
|
-
"""Compute the smallest marginal probability of a pointwise confidence band that
|
|
68
|
-
contains the ECDF."""
|
|
69
|
-
ecdf_scaled = (ndraws * ecdf_at_eval_points).astype(int)
|
|
70
|
-
prob_lower_tail = np.amin(binom.cdf(ecdf_scaled, ndraws, cdf_at_eval_points))
|
|
71
|
-
prob_upper_tail = np.amin(binom.sf(ecdf_scaled - 1, ndraws, cdf_at_eval_points))
|
|
72
|
-
prob_pointwise = 1 - 2 * min(prob_lower_tail, prob_upper_tail)
|
|
73
|
-
return prob_pointwise
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def _get_pointwise_confidence_band(
|
|
77
|
-
prob: float, ndraws: int, cdf_at_eval_points: np.ndarray
|
|
78
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
|
79
|
-
"""Compute the `prob`-level pointwise confidence band."""
|
|
80
|
-
count_lower, count_upper = binom.interval(prob, ndraws, cdf_at_eval_points)
|
|
81
|
-
prob_lower = count_lower / ndraws
|
|
82
|
-
prob_upper = count_upper / ndraws
|
|
83
|
-
return prob_lower, prob_upper
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def ecdf_confidence_band(
|
|
87
|
-
ndraws: int,
|
|
88
|
-
eval_points: np.ndarray,
|
|
89
|
-
cdf_at_eval_points: np.ndarray,
|
|
90
|
-
prob: float = 0.95,
|
|
91
|
-
method="optimized",
|
|
92
|
-
**kwargs,
|
|
93
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
|
94
|
-
"""Compute the `prob`-level confidence band for the ECDF.
|
|
95
|
-
|
|
96
|
-
Arguments
|
|
97
|
-
---------
|
|
98
|
-
ndraws : int
|
|
99
|
-
Number of samples in the original dataset.
|
|
100
|
-
eval_points : np.ndarray
|
|
101
|
-
Points at which the ECDF is evaluated. If these are dependent on the sample
|
|
102
|
-
values, simultaneous confidence bands may not be correctly calibrated.
|
|
103
|
-
cdf_at_eval_points : np.ndarray
|
|
104
|
-
CDF values at the evaluation points.
|
|
105
|
-
prob : float, default 0.95
|
|
106
|
-
The target probability that a true ECDF lies within the confidence band.
|
|
107
|
-
method : string, default "simulated"
|
|
108
|
-
The method used to compute the confidence band. Valid options are:
|
|
109
|
-
- "pointwise": Compute the pointwise (i.e. marginal) confidence band.
|
|
110
|
-
- "optimized": Use optimization to estimate a simultaneous confidence band.
|
|
111
|
-
- "simulated": Use Monte Carlo simulation to estimate a simultaneous confidence band.
|
|
112
|
-
`rvs` must be provided.
|
|
113
|
-
rvs: callable, optional
|
|
114
|
-
A function that takes an integer `ndraws` and optionally the object passed to
|
|
115
|
-
`random_state` and returns an array of `ndraws` samples from the same distribution
|
|
116
|
-
as the original dataset. Required if `method` is "simulated" and variable is discrete.
|
|
117
|
-
num_trials : int, default 500
|
|
118
|
-
The number of random ECDFs to generate for constructing simultaneous confidence bands
|
|
119
|
-
(if `method` is "simulated").
|
|
120
|
-
random_state : int, numpy.random.Generator or numpy.random.RandomState, optional
|
|
121
|
-
|
|
122
|
-
Returns
|
|
123
|
-
-------
|
|
124
|
-
prob_lower : np.ndarray
|
|
125
|
-
Lower confidence band for the ECDF at the evaluation points.
|
|
126
|
-
prob_upper : np.ndarray
|
|
127
|
-
Upper confidence band for the ECDF at the evaluation points.
|
|
128
|
-
"""
|
|
129
|
-
if not 0 < prob < 1:
|
|
130
|
-
raise ValueError(f"Invalid value for `prob`. Expected 0 < prob < 1, but got {prob}.")
|
|
131
|
-
|
|
132
|
-
if method == "pointwise":
|
|
133
|
-
prob_pointwise = prob
|
|
134
|
-
elif method == "optimized":
|
|
135
|
-
prob_pointwise = _optimize_simultaneous_ecdf_band_probability(
|
|
136
|
-
ndraws, eval_points, cdf_at_eval_points, prob=prob, **kwargs
|
|
137
|
-
)
|
|
138
|
-
elif method == "simulated":
|
|
139
|
-
prob_pointwise = _simulate_simultaneous_ecdf_band_probability(
|
|
140
|
-
ndraws, eval_points, cdf_at_eval_points, prob=prob, **kwargs
|
|
141
|
-
)
|
|
142
|
-
else:
|
|
143
|
-
raise ValueError(
|
|
144
|
-
f"Unknown method {method}. Valid options are 'pointwise', 'optimized', or 'simulated'."
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
prob_lower, prob_upper = _get_pointwise_confidence_band(
|
|
148
|
-
prob_pointwise, ndraws, cdf_at_eval_points
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
return prob_lower, prob_upper
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def _update_ecdf_band_interior_probabilities(
|
|
155
|
-
prob_left: np.ndarray,
|
|
156
|
-
interval_left: np.ndarray,
|
|
157
|
-
interval_right: np.ndarray,
|
|
158
|
-
p: float,
|
|
159
|
-
ndraws: int,
|
|
160
|
-
) -> np.ndarray:
|
|
161
|
-
"""Update the probability that an ECDF has been within the envelope including at the current
|
|
162
|
-
point.
|
|
163
|
-
|
|
164
|
-
Arguments
|
|
165
|
-
---------
|
|
166
|
-
prob_left : np.ndarray
|
|
167
|
-
For each point in the interior at the previous point, the joint probability that it and all
|
|
168
|
-
points before are in the interior.
|
|
169
|
-
interval_left : np.ndarray
|
|
170
|
-
The set of points in the interior at the previous point.
|
|
171
|
-
interval_right : np.ndarray
|
|
172
|
-
The set of points in the interior at the current point.
|
|
173
|
-
p : float
|
|
174
|
-
The probability of any given point found between the previous point and the current one.
|
|
175
|
-
ndraws : int
|
|
176
|
-
Number of draws in the original dataset.
|
|
177
|
-
|
|
178
|
-
Returns
|
|
179
|
-
-------
|
|
180
|
-
prob_right : np.ndarray
|
|
181
|
-
For each point in the interior at the current point, the joint probability that it and all
|
|
182
|
-
previous points are in the interior.
|
|
183
|
-
"""
|
|
184
|
-
interval_left = interval_left[:, np.newaxis]
|
|
185
|
-
prob_conditional = binom.pmf(interval_right, ndraws - interval_left, p, loc=interval_left)
|
|
186
|
-
prob_right = prob_left.dot(prob_conditional)
|
|
187
|
-
return prob_right
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@vectorize(["float64(int64, int64, float64, int64)"])
|
|
191
|
-
def _binom_pmf(k, n, p, loc):
|
|
192
|
-
k -= loc
|
|
193
|
-
if k < 0 or k > n:
|
|
194
|
-
return 0.0
|
|
195
|
-
if p == 0:
|
|
196
|
-
return 1.0 if k == 0 else 0.0
|
|
197
|
-
if p == 1:
|
|
198
|
-
return 1.0 if k == n else 0.0
|
|
199
|
-
if k == 0:
|
|
200
|
-
return (1 - p) ** n
|
|
201
|
-
if k == n:
|
|
202
|
-
return p**n
|
|
203
|
-
lbinom = math.lgamma(n + 1) - math.lgamma(k + 1) - math.lgamma(n - k + 1)
|
|
204
|
-
return np.exp(lbinom + k * np.log(p) + (n - k) * np.log1p(-p))
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
@jit(nopython=True)
|
|
208
|
-
def _update_ecdf_band_interior_probabilities_numba(
|
|
209
|
-
prob_left: np.ndarray,
|
|
210
|
-
interval_left: np.ndarray,
|
|
211
|
-
interval_right: np.ndarray,
|
|
212
|
-
p: float,
|
|
213
|
-
ndraws: int,
|
|
214
|
-
) -> np.ndarray:
|
|
215
|
-
interval_left = interval_left[:, np.newaxis]
|
|
216
|
-
prob_conditional = _binom_pmf(interval_right, ndraws - interval_left, p, interval_left)
|
|
217
|
-
prob_right = prob_left.dot(prob_conditional)
|
|
218
|
-
return prob_right
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
def _ecdf_band_interior_probability(prob_between_points, ndraws, lower_count, upper_count):
|
|
222
|
-
interval_left = np.arange(1)
|
|
223
|
-
prob_interior = np.ones(1)
|
|
224
|
-
for i in range(prob_between_points.shape[0]):
|
|
225
|
-
interval_right = np.arange(lower_count[i], upper_count[i])
|
|
226
|
-
prob_interior = _update_ecdf_band_interior_probabilities(
|
|
227
|
-
prob_interior, interval_left, interval_right, prob_between_points[i], ndraws
|
|
228
|
-
)
|
|
229
|
-
interval_left = interval_right
|
|
230
|
-
return prob_interior.sum()
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
@jit(nopython=True)
|
|
234
|
-
def _ecdf_band_interior_probability_numba(prob_between_points, ndraws, lower_count, upper_count):
|
|
235
|
-
interval_left = np.arange(1)
|
|
236
|
-
prob_interior = np.ones(1)
|
|
237
|
-
for i in range(prob_between_points.shape[0]):
|
|
238
|
-
interval_right = np.arange(lower_count[i], upper_count[i])
|
|
239
|
-
prob_interior = _update_ecdf_band_interior_probabilities_numba(
|
|
240
|
-
prob_interior, interval_left, interval_right, prob_between_points[i], ndraws
|
|
241
|
-
)
|
|
242
|
-
interval_left = interval_right
|
|
243
|
-
return prob_interior.sum()
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
def _ecdf_band_optimization_objective(
|
|
247
|
-
prob_pointwise: float,
|
|
248
|
-
cdf_at_eval_points: np.ndarray,
|
|
249
|
-
ndraws: int,
|
|
250
|
-
prob_target: float,
|
|
251
|
-
) -> float:
|
|
252
|
-
"""Objective function for optimizing the simultaneous confidence band probability."""
|
|
253
|
-
lower, upper = _get_pointwise_confidence_band(prob_pointwise, ndraws, cdf_at_eval_points)
|
|
254
|
-
lower_count = (lower * ndraws).astype(int)
|
|
255
|
-
upper_count = (upper * ndraws).astype(int) + 1
|
|
256
|
-
cdf_with_zero = np.insert(cdf_at_eval_points[:-1], 0, 0)
|
|
257
|
-
prob_between_points = (cdf_at_eval_points - cdf_with_zero) / (1 - cdf_with_zero)
|
|
258
|
-
if Numba.numba_flag:
|
|
259
|
-
prob_interior = _ecdf_band_interior_probability_numba(
|
|
260
|
-
prob_between_points, ndraws, lower_count, upper_count
|
|
261
|
-
)
|
|
262
|
-
else:
|
|
263
|
-
prob_interior = _ecdf_band_interior_probability(
|
|
264
|
-
prob_between_points, ndraws, lower_count, upper_count
|
|
265
|
-
)
|
|
266
|
-
return abs(prob_interior - prob_target)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def _optimize_simultaneous_ecdf_band_probability(
|
|
270
|
-
ndraws: int,
|
|
271
|
-
eval_points: np.ndarray, # pylint: disable=unused-argument
|
|
272
|
-
cdf_at_eval_points: np.ndarray,
|
|
273
|
-
prob: float = 0.95,
|
|
274
|
-
**kwargs, # pylint: disable=unused-argument
|
|
275
|
-
):
|
|
276
|
-
"""Estimate probability for simultaneous confidence band using optimization.
|
|
277
|
-
|
|
278
|
-
This function simulates the pointwise probability needed to construct pointwise confidence bands
|
|
279
|
-
that form a `prob`-level confidence envelope for the ECDF of a sample.
|
|
280
|
-
"""
|
|
281
|
-
cdf_at_eval_points = np.unique(cdf_at_eval_points)
|
|
282
|
-
objective = lambda p: _ecdf_band_optimization_objective(p, cdf_at_eval_points, ndraws, prob)
|
|
283
|
-
prob_pointwise = minimize_scalar(objective, bounds=(prob, 1), method="bounded").x
|
|
284
|
-
return prob_pointwise
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
def _simulate_simultaneous_ecdf_band_probability(
|
|
288
|
-
ndraws: int,
|
|
289
|
-
eval_points: np.ndarray,
|
|
290
|
-
cdf_at_eval_points: np.ndarray,
|
|
291
|
-
prob: float = 0.95,
|
|
292
|
-
rvs: Optional[Callable[[int, Optional[Any]], np.ndarray]] = None,
|
|
293
|
-
num_trials: int = 500,
|
|
294
|
-
random_state: Optional[Any] = None,
|
|
295
|
-
) -> float:
|
|
296
|
-
"""Estimate probability for simultaneous confidence band using simulation.
|
|
297
|
-
|
|
298
|
-
This function simulates the pointwise probability needed to construct pointwise
|
|
299
|
-
confidence bands that form a `prob`-level confidence envelope for the ECDF
|
|
300
|
-
of a sample.
|
|
301
|
-
"""
|
|
302
|
-
if rvs is None:
|
|
303
|
-
warnings.warn(
|
|
304
|
-
"Assuming variable is continuous for calibration of pointwise bands. "
|
|
305
|
-
"If the variable is discrete, specify random variable sampler `rvs`.",
|
|
306
|
-
UserWarning,
|
|
307
|
-
)
|
|
308
|
-
# if variable continuous, we can calibrate the confidence band using a uniform
|
|
309
|
-
# distribution
|
|
310
|
-
rvs = uniform(0, 1).rvs
|
|
311
|
-
eval_points_sim = cdf_at_eval_points
|
|
312
|
-
else:
|
|
313
|
-
eval_points_sim = eval_points
|
|
314
|
-
|
|
315
|
-
probs_pointwise = np.empty(num_trials)
|
|
316
|
-
for i in range(num_trials):
|
|
317
|
-
ecdf_at_eval_points = _simulate_ecdf(
|
|
318
|
-
ndraws, eval_points_sim, rvs, random_state=random_state
|
|
319
|
-
)
|
|
320
|
-
prob_pointwise = _fit_pointwise_band_probability(
|
|
321
|
-
ndraws, ecdf_at_eval_points, cdf_at_eval_points
|
|
322
|
-
)
|
|
323
|
-
probs_pointwise[i] = prob_pointwise
|
|
324
|
-
return np.quantile(probs_pointwise, prob)
|