bootstrapx-lib 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bootstrapx_lib-0.1.3/LICENSE +21 -0
- bootstrapx_lib-0.1.3/PKG-INFO +158 -0
- bootstrapx_lib-0.1.3/README.md +115 -0
- bootstrapx_lib-0.1.3/pyproject.toml +63 -0
- bootstrapx_lib-0.1.3/setup.cfg +4 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/__init__.py +8 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/api.py +218 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/engine/__init__.py +0 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/engine/backend.py +114 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/generators/__init__.py +0 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/generators/hierarchical.py +35 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/generators/iid.py +63 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/generators/timeseries.py +237 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/stats/__init__.py +0 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/stats/confidence.py +95 -0
- bootstrapx_lib-0.1.3/src/bootstrapx/utils.py +27 -0
- bootstrapx_lib-0.1.3/src/bootstrapx_lib.egg-info/PKG-INFO +158 -0
- bootstrapx_lib-0.1.3/src/bootstrapx_lib.egg-info/SOURCES.txt +26 -0
- bootstrapx_lib-0.1.3/src/bootstrapx_lib.egg-info/dependency_links.txt +1 -0
- bootstrapx_lib-0.1.3/src/bootstrapx_lib.egg-info/requires.txt +22 -0
- bootstrapx_lib-0.1.3/src/bootstrapx_lib.egg-info/top_level.txt +1 -0
- bootstrapx_lib-0.1.3/tests/test_backend.py +9 -0
- bootstrapx_lib-0.1.3/tests/test_confidence.py +25 -0
- bootstrapx_lib-0.1.3/tests/test_edge_cases.py +20 -0
- bootstrapx_lib-0.1.3/tests/test_hierarchical.py +19 -0
- bootstrapx_lib-0.1.3/tests/test_iid.py +52 -0
- bootstrapx_lib-0.1.3/tests/test_timeseries.py +40 -0
- bootstrapx_lib-0.1.3/tests/test_validation.py +21 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Artem Erokhin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bootstrapx-lib
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Production-grade uncertainty estimation for Python.
|
|
5
|
+
Author-email: Artem Erokhin <artyerokhin@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/artyerokhin/bootstrapx
|
|
8
|
+
Project-URL: Documentation, https://artyerokhin.github.io/bootstrapx
|
|
9
|
+
Project-URL: Repository, https://github.com/artyerokhin/bootstrapx
|
|
10
|
+
Project-URL: Issues, https://github.com/artyerokhin/bootstrapx/issues
|
|
11
|
+
Keywords: bootstrap,statistics,confidence-interval,resampling,numba
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.23
|
|
25
|
+
Requires-Dist: scipy>=1.10
|
|
26
|
+
Requires-Dist: numba>=0.57
|
|
27
|
+
Requires-Dist: joblib>=1.3
|
|
28
|
+
Provides-Extra: cuda
|
|
29
|
+
Requires-Dist: numba>=0.57; extra == "cuda"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
32
|
+
Requires-Dist: hypothesis; extra == "dev"
|
|
33
|
+
Requires-Dist: coverage; extra == "dev"
|
|
34
|
+
Requires-Dist: mypy; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff; extra == "dev"
|
|
36
|
+
Provides-Extra: docs
|
|
37
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
38
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
39
|
+
Provides-Extra: bench
|
|
40
|
+
Requires-Dist: asv; extra == "bench"
|
|
41
|
+
Requires-Dist: virtualenv; extra == "bench"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
<div align="center">
|
|
45
|
+
|
|
46
|
+
# bootstrapx
|
|
47
|
+
|
|
48
|
+
**Production-grade uncertainty estimation for Python.**
|
|
49
|
+
|
|
50
|
+
[](https://github.com/artyerokhin/bootstrapx/actions)
|
|
51
|
+
[](https://pypi.org/project/bootstrapx/)
|
|
52
|
+
[](https://pypi.org/project/bootstrapx/)
|
|
53
|
+
[](LICENSE)
|
|
54
|
+
[](https://artyerokhin.github.io/bootstrapx)
|
|
55
|
+
|
|
56
|
+
*14 bootstrap methods · Numba JIT · Optional CUDA GPU · Memory-safe batching*
|
|
57
|
+
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Why bootstrapx?
|
|
63
|
+
|
|
64
|
+
`scipy.stats.bootstrap` supports only 3 CI methods and has no time-series support.
|
|
65
|
+
The R `boot` package is comprehensive but not accessible from Python.
|
|
66
|
+
**bootstrapx** bridges this gap with 14 methods, Numba acceleration, and a clean API.
|
|
67
|
+
|
|
68
|
+
| Feature | `scipy` | R `boot` | **bootstrapx** |
|
|
69
|
+
|---|:---:|:---:|:---:|
|
|
70
|
+
| BCa interval | ✅ | ✅ | ✅ |
|
|
71
|
+
| Studentized (bootstrap-t) | ❌ | ✅ | ✅ |
|
|
72
|
+
| Poisson / Bernoulli weights | ❌ | ❌ | ✅ |
|
|
73
|
+
| Time-series (MBB, CBB, Stationary, Sieve, Tapered, Wild) | ❌ | Partial | ✅ |
|
|
74
|
+
| Cluster / Stratified | ❌ | Partial | ✅ |
|
|
75
|
+
| Numba JIT | ❌ | N/A | ✅ |
|
|
76
|
+
| CUDA GPU | ❌ | ❌ | ✅ |
|
|
77
|
+
| Generator batching (constant memory) | ❌ | ❌ | ✅ |
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install bootstrapx
|
|
85
|
+
|
|
86
|
+
# With GPU
|
|
87
|
+
pip install "bootstrapx[cuda]"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Quick Start
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
import numpy as np
|
|
96
|
+
from bootstrapx import bootstrap
|
|
97
|
+
|
|
98
|
+
data = np.random.default_rng(42).normal(5, 2, size=200)
|
|
99
|
+
|
|
100
|
+
result = bootstrap(data, np.mean)
|
|
101
|
+
print(result)
|
|
102
|
+
# BootstrapResult(method='bca', theta_hat=4.94, se=0.13, CI=[4.70, 5.19])
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Performance
|
|
108
|
+
|
|
109
|
+
bootstrapx is significantly faster than `scipy.stats.bootstrap` for large datasets:
|
|
110
|
+
|
|
111
|
+
| N | Method | Scipy | Bootstrapx | Speedup |
|
|
112
|
+
|---|---|---|---|---|
|
|
113
|
+
| 5,000 | BCa | 0.80s | 0.27s | **3.0x** |
|
|
114
|
+
| 50,000 | Percentile | 7.29s | 2.01s | **3.6x** |
|
|
115
|
+
| 100,000 | Percentile | 54.34s | 3.99s | **13.6x** |
|
|
116
|
+
|
|
117
|
+
*Benchmark on Apple M1, Python 3.12. See [Benchmarks](https://artyerokhin.github.io/bootstrapx/benchmarks/) for details.*
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Coverage Accuracy
|
|
122
|
+
|
|
123
|
+
Monte Carlo simulation ($N=1000$, 500 runs) confirms correct statistical coverage:
|
|
124
|
+
|
|
125
|
+
| Distribution | BCa Coverage (Nominal 95%) |
|
|
126
|
+
|---|---|
|
|
127
|
+
| Normal | **94.8%** |
|
|
128
|
+
| Skewed (Exponential) | **95.0%** |
|
|
129
|
+
| Heavy-Tailed (t-dist) | **94.0%** |
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Documentation
|
|
134
|
+
|
|
135
|
+
📖 **Full docs:** [artyerokhin.github.io/bootstrapx](https://artyerokhin.github.io/bootstrapx)
|
|
136
|
+
|
|
137
|
+
- [Getting Started](https://artyerokhin.github.io/bootstrapx/getting-started/)
|
|
138
|
+
- [Methods Guide](https://artyerokhin.github.io/bootstrapx/methods/) — math behind each method
|
|
139
|
+
- [API Reference](https://artyerokhin.github.io/bootstrapx/reference/)
|
|
140
|
+
- [Benchmarks](https://artyerokhin.github.io/bootstrapx/benchmarks/)
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Contributing
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
git clone https://github.com/artyerokhin/bootstrapx.git
|
|
148
|
+
cd bootstrapx
|
|
149
|
+
pip install -e ".[dev,docs]"
|
|
150
|
+
pytest tests/ -v
|
|
151
|
+
mkdocs serve
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# bootstrapx
|
|
4
|
+
|
|
5
|
+
**Production-grade uncertainty estimation for Python.**
|
|
6
|
+
|
|
7
|
+
[](https://github.com/artyerokhin/bootstrapx/actions)
|
|
8
|
+
[](https://pypi.org/project/bootstrapx/)
|
|
9
|
+
[](https://pypi.org/project/bootstrapx/)
|
|
10
|
+
[](LICENSE)
|
|
11
|
+
[](https://artyerokhin.github.io/bootstrapx)
|
|
12
|
+
|
|
13
|
+
*14 bootstrap methods · Numba JIT · Optional CUDA GPU · Memory-safe batching*
|
|
14
|
+
|
|
15
|
+
</div>
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Why bootstrapx?
|
|
20
|
+
|
|
21
|
+
`scipy.stats.bootstrap` supports only 3 CI methods and has no time-series support.
|
|
22
|
+
The R `boot` package is comprehensive but not accessible from Python.
|
|
23
|
+
**bootstrapx** bridges this gap with 14 methods, Numba acceleration, and a clean API.
|
|
24
|
+
|
|
25
|
+
| Feature | `scipy` | R `boot` | **bootstrapx** |
|
|
26
|
+
|---|:---:|:---:|:---:|
|
|
27
|
+
| BCa interval | ✅ | ✅ | ✅ |
|
|
28
|
+
| Studentized (bootstrap-t) | ❌ | ✅ | ✅ |
|
|
29
|
+
| Poisson / Bernoulli weights | ❌ | ❌ | ✅ |
|
|
30
|
+
| Time-series (MBB, CBB, Stationary, Sieve, Tapered, Wild) | ❌ | Partial | ✅ |
|
|
31
|
+
| Cluster / Stratified | ❌ | Partial | ✅ |
|
|
32
|
+
| Numba JIT | ❌ | N/A | ✅ |
|
|
33
|
+
| CUDA GPU | ❌ | ❌ | ✅ |
|
|
34
|
+
| Generator batching (constant memory) | ❌ | ❌ | ✅ |
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install bootstrapx
|
|
42
|
+
|
|
43
|
+
# With GPU
|
|
44
|
+
pip install "bootstrapx[cuda]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import numpy as np
|
|
53
|
+
from bootstrapx import bootstrap
|
|
54
|
+
|
|
55
|
+
data = np.random.default_rng(42).normal(5, 2, size=200)
|
|
56
|
+
|
|
57
|
+
result = bootstrap(data, np.mean)
|
|
58
|
+
print(result)
|
|
59
|
+
# BootstrapResult(method='bca', theta_hat=4.94, se=0.13, CI=[4.70, 5.19])
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Performance
|
|
65
|
+
|
|
66
|
+
bootstrapx is significantly faster than `scipy.stats.bootstrap` for large datasets:
|
|
67
|
+
|
|
68
|
+
| N | Method | Scipy | Bootstrapx | Speedup |
|
|
69
|
+
|---|---|---|---|---|
|
|
70
|
+
| 5,000 | BCa | 0.80s | 0.27s | **3.0x** |
|
|
71
|
+
| 50,000 | Percentile | 7.29s | 2.01s | **3.6x** |
|
|
72
|
+
| 100,000 | Percentile | 54.34s | 3.99s | **13.6x** |
|
|
73
|
+
|
|
74
|
+
*Benchmark on Apple M1, Python 3.12. See [Benchmarks](https://artyerokhin.github.io/bootstrapx/benchmarks/) for details.*
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Coverage Accuracy
|
|
79
|
+
|
|
80
|
+
Monte Carlo simulation ($N=1000$, 500 runs) confirms correct statistical coverage:
|
|
81
|
+
|
|
82
|
+
| Distribution | BCa Coverage (Nominal 95%) |
|
|
83
|
+
|---|---|
|
|
84
|
+
| Normal | **94.8%** |
|
|
85
|
+
| Skewed (Exponential) | **95.0%** |
|
|
86
|
+
| Heavy-Tailed (t-dist) | **94.0%** |
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Documentation
|
|
91
|
+
|
|
92
|
+
📖 **Full docs:** [artyerokhin.github.io/bootstrapx](https://artyerokhin.github.io/bootstrapx)
|
|
93
|
+
|
|
94
|
+
- [Getting Started](https://artyerokhin.github.io/bootstrapx/getting-started/)
|
|
95
|
+
- [Methods Guide](https://artyerokhin.github.io/bootstrapx/methods/) — math behind each method
|
|
96
|
+
- [API Reference](https://artyerokhin.github.io/bootstrapx/reference/)
|
|
97
|
+
- [Benchmarks](https://artyerokhin.github.io/bootstrapx/benchmarks/)
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Contributing
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
git clone https://github.com/artyerokhin/bootstrapx.git
|
|
105
|
+
cd bootstrapx
|
|
106
|
+
pip install -e ".[dev,docs]"
|
|
107
|
+
pytest tests/ -v
|
|
108
|
+
mkdocs serve
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bootstrapx-lib"
|
|
7
|
+
version = "0.1.3"
|
|
8
|
+
description = "Production-grade uncertainty estimation for Python."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "Artem Erokhin", email = "artyerokhin@gmail.com"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
]
|
|
24
|
+
keywords = ["bootstrap", "statistics", "confidence-interval", "resampling", "numba"]
|
|
25
|
+
|
|
26
|
+
dependencies = [
|
|
27
|
+
"numpy>=1.23",
|
|
28
|
+
"scipy>=1.10",
|
|
29
|
+
"numba>=0.57",
|
|
30
|
+
"joblib>=1.3",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/artyerokhin/bootstrapx"
|
|
35
|
+
Documentation = "https://artyerokhin.github.io/bootstrapx"
|
|
36
|
+
Repository = "https://github.com/artyerokhin/bootstrapx"
|
|
37
|
+
Issues = "https://github.com/artyerokhin/bootstrapx/issues"
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
cuda = ["numba>=0.57"]
|
|
41
|
+
dev = ["pytest>=7", "hypothesis", "coverage", "mypy", "ruff"]
|
|
42
|
+
docs = ["mkdocs-material", "mkdocstrings[python]"]
|
|
43
|
+
bench = ["asv", "virtualenv"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["src"]
|
|
47
|
+
|
|
48
|
+
[tool.mypy]
|
|
49
|
+
python_version = "3.10"
|
|
50
|
+
warn_return_any = true
|
|
51
|
+
warn_unused_configs = true
|
|
52
|
+
ignore_missing_imports = true
|
|
53
|
+
|
|
54
|
+
[tool.ruff]
|
|
55
|
+
line-length = 100
|
|
56
|
+
target-version = "py39"
|
|
57
|
+
|
|
58
|
+
[tool.ruff.lint]
|
|
59
|
+
select = ["E", "W", "F", "I", "UP", "B"]
|
|
60
|
+
|
|
61
|
+
[tool.pytest.ini_options]
|
|
62
|
+
pythonpath = ["src"]
|
|
63
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""bootstrapx — Production-grade uncertainty estimation for Python."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
# Use relative import to work without installation
|
|
5
|
+
from .api import bootstrap, BootstrapResult
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
__all__ = ["bootstrap", "BootstrapResult"]
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from bootstrapx.engine.backend import resolve_backend, apply_statistic_batched
|
|
9
|
+
from bootstrapx.generators.iid import (
|
|
10
|
+
basic_resample,
|
|
11
|
+
bernoulli_resample,
|
|
12
|
+
poisson_resample,
|
|
13
|
+
subsampling_resample,
|
|
14
|
+
)
|
|
15
|
+
from bootstrapx.generators.timeseries import (
|
|
16
|
+
cbb_resample,
|
|
17
|
+
mbb_resample,
|
|
18
|
+
sieve_resample,
|
|
19
|
+
stationary_resample,
|
|
20
|
+
tapered_block_resample,
|
|
21
|
+
wild_resample,
|
|
22
|
+
)
|
|
23
|
+
from bootstrapx.generators.hierarchical import cluster_resample, strata_resample
|
|
24
|
+
from bootstrapx.stats.confidence import (
|
|
25
|
+
ConfidenceInterval,
|
|
26
|
+
basic_interval,
|
|
27
|
+
bca_interval,
|
|
28
|
+
percentile_interval,
|
|
29
|
+
studentized_interval,
|
|
30
|
+
)
|
|
31
|
+
from bootstrapx.utils import auto_batch_size, validate_data
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class BootstrapResult:
|
|
36
|
+
confidence_interval: ConfidenceInterval
|
|
37
|
+
bootstrap_distribution: np.ndarray
|
|
38
|
+
theta_hat: float
|
|
39
|
+
standard_error: float
|
|
40
|
+
n_resamples: int
|
|
41
|
+
method: str
|
|
42
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
43
|
+
|
|
44
|
+
def __repr__(self) -> str:
|
|
45
|
+
ci = self.confidence_interval
|
|
46
|
+
return (
|
|
47
|
+
f"BootstrapResult(method={self.method!r}, "
|
|
48
|
+
f"theta_hat={self.theta_hat:.6g}, "
|
|
49
|
+
f"se={self.standard_error:.6g}, "
|
|
50
|
+
f"CI=[{ci.low:.6g}, {ci.high:.6g}])"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _collect(gen, statistic: Callable[[np.ndarray], float]) -> list[float]:
|
|
55
|
+
results = []
|
|
56
|
+
for batch in gen:
|
|
57
|
+
if isinstance(batch, tuple):
|
|
58
|
+
data_ref, weights = batch
|
|
59
|
+
# weights is (B, N), iterate over rows
|
|
60
|
+
for i in range(weights.shape[0]):
|
|
61
|
+
w = weights[i]
|
|
62
|
+
mask = w > 0
|
|
63
|
+
val = statistic(data_ref[mask]) if mask.any() else statistic(data_ref)
|
|
64
|
+
results.append(float(val))
|
|
65
|
+
elif isinstance(batch, list):
|
|
66
|
+
for arr in batch:
|
|
67
|
+
results.append(float(statistic(arr)))
|
|
68
|
+
else:
|
|
69
|
+
# Standard array batch (B, N)
|
|
70
|
+
for i in range(batch.shape[0]):
|
|
71
|
+
results.append(float(statistic(batch[i])))
|
|
72
|
+
return results
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_IID_METHODS = {
|
|
76
|
+
"percentile",
|
|
77
|
+
"basic",
|
|
78
|
+
"bca",
|
|
79
|
+
"studentized",
|
|
80
|
+
"poisson",
|
|
81
|
+
"bernoulli",
|
|
82
|
+
"subsampling",
|
|
83
|
+
}
|
|
84
|
+
_TS_METHODS = {"mbb", "cbb", "stationary", "tapered", "sieve", "wild"}
|
|
85
|
+
_HIER_METHODS = {"cluster", "strata"}
|
|
86
|
+
_ALL_METHODS = _IID_METHODS | _TS_METHODS | _HIER_METHODS
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def bootstrap(
|
|
90
|
+
data: Any,
|
|
91
|
+
statistic: Callable[[np.ndarray], float],
|
|
92
|
+
*,
|
|
93
|
+
method: str = "bca",
|
|
94
|
+
n_resamples: int = 9999,
|
|
95
|
+
batch_size: int | None = None,
|
|
96
|
+
confidence_level: float = 0.95,
|
|
97
|
+
backend: str = "auto",
|
|
98
|
+
random_state: int | np.random.Generator | None = None,
|
|
99
|
+
n_jobs: int = 1,
|
|
100
|
+
**kwargs: Any,
|
|
101
|
+
) -> BootstrapResult:
|
|
102
|
+
method = method.lower().strip()
|
|
103
|
+
if method not in _ALL_METHODS:
|
|
104
|
+
valid = sorted(_ALL_METHODS)
|
|
105
|
+
raise ValueError(f"Unknown method {method!r}. Choose from {valid}.")
|
|
106
|
+
|
|
107
|
+
arr = validate_data(data, allow_2d=(method in _HIER_METHODS))
|
|
108
|
+
n = arr.shape[0]
|
|
109
|
+
|
|
110
|
+
if isinstance(random_state, np.random.Generator):
|
|
111
|
+
rng = random_state
|
|
112
|
+
else:
|
|
113
|
+
rng = np.random.default_rng(random_state)
|
|
114
|
+
|
|
115
|
+
if batch_size is None:
|
|
116
|
+
batch_size = auto_batch_size(n, n_resamples)
|
|
117
|
+
|
|
118
|
+
backend_kind = resolve_backend(backend)
|
|
119
|
+
theta_hat = float(statistic(arr))
|
|
120
|
+
|
|
121
|
+
if method in {"percentile", "basic", "bca", "studentized"}:
|
|
122
|
+
boot_stats = apply_statistic_batched(
|
|
123
|
+
arr, statistic, batch_size, n_resamples, backend_kind, rng
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if method == "percentile":
|
|
127
|
+
ci = percentile_interval(boot_stats, confidence_level)
|
|
128
|
+
elif method == "basic":
|
|
129
|
+
ci = basic_interval(boot_stats, theta_hat, confidence_level)
|
|
130
|
+
elif method == "bca":
|
|
131
|
+
ci = bca_interval(boot_stats, arr, statistic, theta_hat, confidence_level)
|
|
132
|
+
elif method == "studentized":
|
|
133
|
+
# Nested bootstrap for SE
|
|
134
|
+
n_inner = kwargs.get("n_inner", 50)
|
|
135
|
+
idx_all = rng.integers(0, n, size=(n_resamples, n))
|
|
136
|
+
boot_se = np.empty(n_resamples, dtype=np.float64)
|
|
137
|
+
|
|
138
|
+
for b in range(n_resamples):
|
|
139
|
+
sample = arr[idx_all[b]]
|
|
140
|
+
# Inner loop simple bootstrap
|
|
141
|
+
inner_idx = rng.integers(0, n, size=(n_inner, n))
|
|
142
|
+
inner_vals = [statistic(sample[inner_idx[k]]) for k in range(n_inner)]
|
|
143
|
+
boot_se[b] = np.std(inner_vals, ddof=1)
|
|
144
|
+
|
|
145
|
+
ci = studentized_interval(
|
|
146
|
+
arr, statistic, theta_hat, boot_stats, boot_se, confidence_level
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
# Should not happen
|
|
150
|
+
raise ValueError("Unreachable")
|
|
151
|
+
|
|
152
|
+
else:
|
|
153
|
+
# Generator-based methods
|
|
154
|
+
if method == "cluster":
|
|
155
|
+
cids = kwargs.get("cluster_ids")
|
|
156
|
+
if cids is None:
|
|
157
|
+
raise ValueError("cluster method requires `cluster_ids` kwarg.")
|
|
158
|
+
gen = cluster_resample(arr, np.asarray(cids), n_resamples, batch_size, rng)
|
|
159
|
+
|
|
160
|
+
elif method == "strata":
|
|
161
|
+
sids = kwargs.get("strata_ids")
|
|
162
|
+
if sids is None:
|
|
163
|
+
raise ValueError("strata method requires `strata_ids` kwarg.")
|
|
164
|
+
gen = strata_resample(arr, np.asarray(sids), n_resamples, batch_size, rng)
|
|
165
|
+
|
|
166
|
+
else:
|
|
167
|
+
# Map method name to generator function
|
|
168
|
+
if method == "poisson":
|
|
169
|
+
gen = poisson_resample(arr, n_resamples, batch_size, rng)
|
|
170
|
+
elif method == "bernoulli":
|
|
171
|
+
prob = kwargs.get("prob", 0.5)
|
|
172
|
+
gen = bernoulli_resample(arr, n_resamples, batch_size, rng, prob=prob)
|
|
173
|
+
elif method == "subsampling":
|
|
174
|
+
ss = kwargs.get("subsample_size")
|
|
175
|
+
gen = subsampling_resample(
|
|
176
|
+
arr, n_resamples, batch_size, rng, subsample_size=ss
|
|
177
|
+
)
|
|
178
|
+
elif method == "mbb":
|
|
179
|
+
bl = kwargs.get("block_length", 10)
|
|
180
|
+
gen = mbb_resample(arr, n_resamples, batch_size, rng, block_length=bl)
|
|
181
|
+
elif method == "cbb":
|
|
182
|
+
bl = kwargs.get("block_length", 10)
|
|
183
|
+
gen = cbb_resample(arr, n_resamples, batch_size, rng, block_length=bl)
|
|
184
|
+
elif method == "stationary":
|
|
185
|
+
mb = kwargs.get("mean_block", 10.0)
|
|
186
|
+
gen = stationary_resample(
|
|
187
|
+
arr, n_resamples, batch_size, rng, mean_block=mb
|
|
188
|
+
)
|
|
189
|
+
elif method == "tapered":
|
|
190
|
+
bl = kwargs.get("block_length", 10)
|
|
191
|
+
tp = kwargs.get("taper", "tukey")
|
|
192
|
+
gen = tapered_block_resample(
|
|
193
|
+
arr, n_resamples, batch_size, rng, block_length=bl, taper=tp
|
|
194
|
+
)
|
|
195
|
+
elif method == "sieve":
|
|
196
|
+
ar = kwargs.get("ar_order")
|
|
197
|
+
gen = sieve_resample(arr, n_resamples, batch_size, rng, ar_order=ar)
|
|
198
|
+
elif method == "wild":
|
|
199
|
+
fit = kwargs.get("fitted")
|
|
200
|
+
dist = kwargs.get("distribution", "rademacher")
|
|
201
|
+
gen = wild_resample(
|
|
202
|
+
arr, n_resamples, batch_size, rng, fitted=fit, distribution=dist
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
# Should not happen due to check above
|
|
206
|
+
raise ValueError(f"Method {method} not implemented in dispatcher.")
|
|
207
|
+
|
|
208
|
+
boot_stats = np.array(_collect(gen, statistic), dtype=np.float64)
|
|
209
|
+
ci = percentile_interval(boot_stats, confidence_level)
|
|
210
|
+
|
|
211
|
+
return BootstrapResult(
|
|
212
|
+
confidence_interval=ci,
|
|
213
|
+
bootstrap_distribution=boot_stats,
|
|
214
|
+
theta_hat=theta_hat,
|
|
215
|
+
standard_error=float(np.std(boot_stats, ddof=1)),
|
|
216
|
+
n_resamples=len(boot_stats),
|
|
217
|
+
method=method,
|
|
218
|
+
)
|
|
File without changes
|