diff-diff 2.5.0__tar.gz → 2.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.5.0 → diff_diff-2.6.1}/PKG-INFO +4 -1
- {diff_diff-2.5.0 → diff_diff-2.6.1}/README.md +3 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/__init__.py +38 -1
- diff_diff-2.6.1/diff_diff/bootstrap_utils.py +279 -0
- diff_diff-2.6.1/diff_diff/continuous_did.py +1155 -0
- diff_diff-2.6.1/diff_diff/continuous_did_bspline.py +188 -0
- diff_diff-2.6.1/diff_diff/continuous_did_results.py +353 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/prep.py +1 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/prep_dgp.py +156 -1
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered.py +3 -1
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_bootstrap.py +23 -251
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/sun_abraham.py +9 -41
- {diff_diff-2.5.0 → diff_diff-2.6.1}/pyproject.toml +1 -1
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/Cargo.lock +24 -24
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/Cargo.toml +1 -1
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/_backend.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/bacon.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/datasets.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/estimators.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation_bootstrap.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation_results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/linalg.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/power.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/stacked_did.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/trop.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/trop_results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/twfe.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage_bootstrap.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage_results.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/utils.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/visualization.py +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/build.rs +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/lib.rs +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/linalg.rs +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/trop.rs +0 -0
- {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.1
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -138,6 +138,9 @@ We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
|
138
138
|
| `08_triple_diff.ipynb` | Triple Difference (DDD) estimation with proper covariate handling |
|
|
139
139
|
| `09_real_world_examples.ipynb` | Real-world data examples (Card-Krueger, Castle Doctrine, Divorce Laws) |
|
|
140
140
|
| `10_trop.ipynb` | Triply Robust Panel (TROP) estimation with factor model adjustment |
|
|
141
|
+
| `11_imputation_did.ipynb` | Imputation DiD (Borusyak et al. 2024), pre-trend test, efficiency comparison |
|
|
142
|
+
| `12_two_stage_did.ipynb` | Two-Stage DiD (Gardner 2022), GMM sandwich variance, per-observation effects |
|
|
143
|
+
| `13_stacked_did.ipynb` | Stacked DiD (Wing et al. 2024), Q-weights, sub-experiment inspection, trimming, clean control definitions |
|
|
141
144
|
|
|
142
145
|
## Data Preparation
|
|
143
146
|
|
|
@@ -100,6 +100,9 @@ We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
|
100
100
|
| `08_triple_diff.ipynb` | Triple Difference (DDD) estimation with proper covariate handling |
|
|
101
101
|
| `09_real_world_examples.ipynb` | Real-world data examples (Card-Krueger, Castle Doctrine, Divorce Laws) |
|
|
102
102
|
| `10_trop.ipynb` | Triply Robust Panel (TROP) estimation with factor model adjustment |
|
|
103
|
+
| `11_imputation_did.ipynb` | Imputation DiD (Borusyak et al. 2024), pre-trend test, efficiency comparison |
|
|
104
|
+
| `12_two_stage_did.ipynb` | Two-Stage DiD (Gardner 2022), GMM sandwich variance, per-observation effects |
|
|
105
|
+
| `13_stacked_did.ipynb` | Stacked DiD (Wing et al. 2024), Q-weights, sub-experiment inspection, trimming, clean control definitions |
|
|
103
106
|
|
|
104
107
|
## Data Preparation
|
|
105
108
|
|
|
@@ -70,6 +70,7 @@ from diff_diff.prep import (
|
|
|
70
70
|
aggregate_to_cohorts,
|
|
71
71
|
balance_panel,
|
|
72
72
|
create_event_time,
|
|
73
|
+
generate_continuous_did_data,
|
|
73
74
|
generate_did_data,
|
|
74
75
|
generate_ddd_data,
|
|
75
76
|
generate_event_study_data,
|
|
@@ -122,6 +123,11 @@ from diff_diff.triple_diff import (
|
|
|
122
123
|
TripleDifferenceResults,
|
|
123
124
|
triple_difference,
|
|
124
125
|
)
|
|
126
|
+
from diff_diff.continuous_did import (
|
|
127
|
+
ContinuousDiD,
|
|
128
|
+
ContinuousDiDResults,
|
|
129
|
+
DoseResponseCurve,
|
|
130
|
+
)
|
|
125
131
|
from diff_diff.trop import (
|
|
126
132
|
TROP,
|
|
127
133
|
TROPResults,
|
|
@@ -153,7 +159,21 @@ from diff_diff.datasets import (
|
|
|
153
159
|
load_mpdta,
|
|
154
160
|
)
|
|
155
161
|
|
|
156
|
-
|
|
162
|
+
# Estimator aliases — short names for convenience
|
|
163
|
+
DiD = DifferenceInDifferences
|
|
164
|
+
TWFE = TwoWayFixedEffects
|
|
165
|
+
EventStudy = MultiPeriodDiD
|
|
166
|
+
SDiD = SyntheticDiD
|
|
167
|
+
CS = CallawaySantAnna
|
|
168
|
+
CDiD = ContinuousDiD
|
|
169
|
+
SA = SunAbraham
|
|
170
|
+
BJS = ImputationDiD
|
|
171
|
+
Gardner = TwoStageDiD
|
|
172
|
+
DDD = TripleDifference
|
|
173
|
+
Stacked = StackedDiD
|
|
174
|
+
Bacon = BaconDecomposition
|
|
175
|
+
|
|
176
|
+
__version__ = "2.6.1"
|
|
157
177
|
__all__ = [
|
|
158
178
|
# Estimators
|
|
159
179
|
"DifferenceInDifferences",
|
|
@@ -161,12 +181,26 @@ __all__ = [
|
|
|
161
181
|
"MultiPeriodDiD",
|
|
162
182
|
"SyntheticDiD",
|
|
163
183
|
"CallawaySantAnna",
|
|
184
|
+
"ContinuousDiD",
|
|
164
185
|
"SunAbraham",
|
|
165
186
|
"ImputationDiD",
|
|
166
187
|
"TwoStageDiD",
|
|
167
188
|
"TripleDifference",
|
|
168
189
|
"TROP",
|
|
169
190
|
"StackedDiD",
|
|
191
|
+
# Estimator aliases (short names)
|
|
192
|
+
"DiD",
|
|
193
|
+
"TWFE",
|
|
194
|
+
"EventStudy",
|
|
195
|
+
"SDiD",
|
|
196
|
+
"CS",
|
|
197
|
+
"CDiD",
|
|
198
|
+
"SA",
|
|
199
|
+
"BJS",
|
|
200
|
+
"Gardner",
|
|
201
|
+
"DDD",
|
|
202
|
+
"Stacked",
|
|
203
|
+
"Bacon",
|
|
170
204
|
# Bacon Decomposition
|
|
171
205
|
"BaconDecomposition",
|
|
172
206
|
"BaconDecompositionResults",
|
|
@@ -181,6 +215,8 @@ __all__ = [
|
|
|
181
215
|
"CallawaySantAnnaResults",
|
|
182
216
|
"CSBootstrapResults",
|
|
183
217
|
"GroupTimeEffect",
|
|
218
|
+
"ContinuousDiDResults",
|
|
219
|
+
"DoseResponseCurve",
|
|
184
220
|
"SunAbrahamResults",
|
|
185
221
|
"SABootstrapResults",
|
|
186
222
|
"ImputationDiDResults",
|
|
@@ -228,6 +264,7 @@ __all__ = [
|
|
|
228
264
|
"generate_ddd_data",
|
|
229
265
|
"generate_panel_data",
|
|
230
266
|
"generate_event_study_data",
|
|
267
|
+
"generate_continuous_did_data",
|
|
231
268
|
"create_event_time",
|
|
232
269
|
"aggregate_to_cohorts",
|
|
233
270
|
"rank_control_units",
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared bootstrap utilities for multiplier bootstrap inference.
|
|
3
|
+
|
|
4
|
+
Provides weight generation, percentile CI, and p-value helpers used by
|
|
5
|
+
both CallawaySantAnna and ContinuousDiD estimators.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import warnings
|
|
9
|
+
from typing import Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_weights
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"generate_bootstrap_weights",
|
|
17
|
+
"generate_bootstrap_weights_batch",
|
|
18
|
+
"generate_bootstrap_weights_batch_numpy",
|
|
19
|
+
"compute_percentile_ci",
|
|
20
|
+
"compute_bootstrap_pvalue",
|
|
21
|
+
"compute_effect_bootstrap_stats",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def generate_bootstrap_weights(
|
|
26
|
+
n_units: int,
|
|
27
|
+
weight_type: str,
|
|
28
|
+
rng: np.random.Generator,
|
|
29
|
+
) -> np.ndarray:
|
|
30
|
+
"""
|
|
31
|
+
Generate bootstrap weights for multiplier bootstrap.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
n_units : int
|
|
36
|
+
Number of units (clusters) to generate weights for.
|
|
37
|
+
weight_type : str
|
|
38
|
+
Type of weights: "rademacher", "mammen", or "webb".
|
|
39
|
+
rng : np.random.Generator
|
|
40
|
+
Random number generator.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
np.ndarray
|
|
45
|
+
Array of bootstrap weights with shape (n_units,).
|
|
46
|
+
"""
|
|
47
|
+
if weight_type == "rademacher":
|
|
48
|
+
return rng.choice([-1.0, 1.0], size=n_units)
|
|
49
|
+
elif weight_type == "mammen":
|
|
50
|
+
sqrt5 = np.sqrt(5)
|
|
51
|
+
val1 = -(sqrt5 - 1) / 2
|
|
52
|
+
val2 = (sqrt5 + 1) / 2
|
|
53
|
+
p1 = (sqrt5 + 1) / (2 * sqrt5)
|
|
54
|
+
return rng.choice([val1, val2], size=n_units, p=[p1, 1 - p1])
|
|
55
|
+
elif weight_type == "webb":
|
|
56
|
+
values = np.array([
|
|
57
|
+
-np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
|
|
58
|
+
np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
|
|
59
|
+
])
|
|
60
|
+
return rng.choice(values, size=n_units)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"weight_type must be 'rademacher', 'mammen', or 'webb', "
|
|
64
|
+
f"got '{weight_type}'"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def generate_bootstrap_weights_batch(
|
|
69
|
+
n_bootstrap: int,
|
|
70
|
+
n_units: int,
|
|
71
|
+
weight_type: str,
|
|
72
|
+
rng: np.random.Generator,
|
|
73
|
+
) -> np.ndarray:
|
|
74
|
+
"""
|
|
75
|
+
Generate all bootstrap weights at once (vectorized).
|
|
76
|
+
|
|
77
|
+
Uses Rust backend if available for parallel generation.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
n_bootstrap : int
|
|
82
|
+
Number of bootstrap iterations.
|
|
83
|
+
n_units : int
|
|
84
|
+
Number of units (clusters) to generate weights for.
|
|
85
|
+
weight_type : str
|
|
86
|
+
Type of weights: "rademacher", "mammen", or "webb".
|
|
87
|
+
rng : np.random.Generator
|
|
88
|
+
Random number generator.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
np.ndarray
|
|
93
|
+
Array of bootstrap weights with shape (n_bootstrap, n_units).
|
|
94
|
+
"""
|
|
95
|
+
if HAS_RUST_BACKEND and _rust_bootstrap_weights is not None:
|
|
96
|
+
seed = rng.integers(0, 2**63 - 1)
|
|
97
|
+
return _rust_bootstrap_weights(n_bootstrap, n_units, weight_type, seed)
|
|
98
|
+
return generate_bootstrap_weights_batch_numpy(n_bootstrap, n_units, weight_type, rng)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def generate_bootstrap_weights_batch_numpy(
|
|
102
|
+
n_bootstrap: int,
|
|
103
|
+
n_units: int,
|
|
104
|
+
weight_type: str,
|
|
105
|
+
rng: np.random.Generator,
|
|
106
|
+
) -> np.ndarray:
|
|
107
|
+
"""
|
|
108
|
+
NumPy fallback implementation of :func:`generate_bootstrap_weights_batch`.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
n_bootstrap : int
|
|
113
|
+
Number of bootstrap iterations.
|
|
114
|
+
n_units : int
|
|
115
|
+
Number of units (clusters) to generate weights for.
|
|
116
|
+
weight_type : str
|
|
117
|
+
Type of weights: "rademacher", "mammen", or "webb".
|
|
118
|
+
rng : np.random.Generator
|
|
119
|
+
Random number generator.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
np.ndarray
|
|
124
|
+
Array of bootstrap weights with shape (n_bootstrap, n_units).
|
|
125
|
+
"""
|
|
126
|
+
if weight_type == "rademacher":
|
|
127
|
+
return rng.choice([-1.0, 1.0], size=(n_bootstrap, n_units))
|
|
128
|
+
elif weight_type == "mammen":
|
|
129
|
+
sqrt5 = np.sqrt(5)
|
|
130
|
+
val1 = -(sqrt5 - 1) / 2
|
|
131
|
+
val2 = (sqrt5 + 1) / 2
|
|
132
|
+
p1 = (sqrt5 + 1) / (2 * sqrt5)
|
|
133
|
+
return rng.choice([val1, val2], size=(n_bootstrap, n_units), p=[p1, 1 - p1])
|
|
134
|
+
elif weight_type == "webb":
|
|
135
|
+
values = np.array([
|
|
136
|
+
-np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
|
|
137
|
+
np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
|
|
138
|
+
])
|
|
139
|
+
return rng.choice(values, size=(n_bootstrap, n_units))
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"weight_type must be 'rademacher', 'mammen', or 'webb', "
|
|
143
|
+
f"got '{weight_type}'"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def compute_percentile_ci(
|
|
148
|
+
boot_dist: np.ndarray,
|
|
149
|
+
alpha: float,
|
|
150
|
+
) -> Tuple[float, float]:
|
|
151
|
+
"""
|
|
152
|
+
Compute percentile confidence interval from bootstrap distribution.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
boot_dist : np.ndarray
|
|
157
|
+
Bootstrap distribution (1-D array).
|
|
158
|
+
alpha : float
|
|
159
|
+
Significance level (e.g., 0.05 for 95% CI).
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
tuple of float
|
|
164
|
+
``(lower, upper)`` confidence interval bounds.
|
|
165
|
+
"""
|
|
166
|
+
lower = float(np.percentile(boot_dist, alpha / 2 * 100))
|
|
167
|
+
upper = float(np.percentile(boot_dist, (1 - alpha / 2) * 100))
|
|
168
|
+
return (lower, upper)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def compute_bootstrap_pvalue(
|
|
172
|
+
original_effect: float,
|
|
173
|
+
boot_dist: np.ndarray,
|
|
174
|
+
n_valid: Optional[int] = None,
|
|
175
|
+
) -> float:
|
|
176
|
+
"""
|
|
177
|
+
Compute two-sided bootstrap p-value using the percentile method.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
original_effect : float
|
|
182
|
+
Original point estimate.
|
|
183
|
+
boot_dist : np.ndarray
|
|
184
|
+
Bootstrap distribution of the effect.
|
|
185
|
+
n_valid : int, optional
|
|
186
|
+
Number of valid bootstrap samples for p-value floor.
|
|
187
|
+
If None, uses ``len(boot_dist)``.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
float
|
|
192
|
+
Two-sided bootstrap p-value.
|
|
193
|
+
"""
|
|
194
|
+
if original_effect >= 0:
|
|
195
|
+
p_one_sided = np.mean(boot_dist <= 0)
|
|
196
|
+
else:
|
|
197
|
+
p_one_sided = np.mean(boot_dist >= 0)
|
|
198
|
+
|
|
199
|
+
p_value = min(2 * p_one_sided, 1.0)
|
|
200
|
+
n_for_floor = n_valid if n_valid is not None else len(boot_dist)
|
|
201
|
+
p_value = max(p_value, 1 / (n_for_floor + 1))
|
|
202
|
+
return float(p_value)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def compute_effect_bootstrap_stats(
|
|
206
|
+
original_effect: float,
|
|
207
|
+
boot_dist: np.ndarray,
|
|
208
|
+
alpha: float = 0.05,
|
|
209
|
+
context: str = "bootstrap distribution",
|
|
210
|
+
) -> Tuple[float, Tuple[float, float], float]:
|
|
211
|
+
"""
|
|
212
|
+
Compute bootstrap statistics for a single effect.
|
|
213
|
+
|
|
214
|
+
Filters non-finite samples, returning NaN for all statistics if
|
|
215
|
+
fewer than 50% of samples are valid.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
original_effect : float
|
|
220
|
+
Original point estimate.
|
|
221
|
+
boot_dist : np.ndarray
|
|
222
|
+
Bootstrap distribution of the effect.
|
|
223
|
+
alpha : float, default=0.05
|
|
224
|
+
Significance level.
|
|
225
|
+
context : str, optional
|
|
226
|
+
Description for warning messages.
|
|
227
|
+
|
|
228
|
+
Returns
|
|
229
|
+
-------
|
|
230
|
+
se : float
|
|
231
|
+
Bootstrap standard error.
|
|
232
|
+
ci : tuple of float
|
|
233
|
+
Percentile confidence interval.
|
|
234
|
+
p_value : float
|
|
235
|
+
Bootstrap p-value.
|
|
236
|
+
"""
|
|
237
|
+
if not np.isfinite(original_effect):
|
|
238
|
+
return np.nan, (np.nan, np.nan), np.nan
|
|
239
|
+
|
|
240
|
+
finite_mask = np.isfinite(boot_dist)
|
|
241
|
+
n_valid = np.sum(finite_mask)
|
|
242
|
+
n_total = len(boot_dist)
|
|
243
|
+
|
|
244
|
+
if n_valid < n_total:
|
|
245
|
+
n_nonfinite = n_total - n_valid
|
|
246
|
+
warnings.warn(
|
|
247
|
+
f"Dropping {n_nonfinite}/{n_total} non-finite bootstrap samples "
|
|
248
|
+
f"in {context}. Bootstrap estimates based on remaining valid samples.",
|
|
249
|
+
RuntimeWarning,
|
|
250
|
+
stacklevel=3,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if n_valid < n_total * 0.5:
|
|
254
|
+
warnings.warn(
|
|
255
|
+
f"Too few valid bootstrap samples ({n_valid}/{n_total}) in {context}. "
|
|
256
|
+
"Returning NaN for SE/CI/p-value to signal invalid inference.",
|
|
257
|
+
RuntimeWarning,
|
|
258
|
+
stacklevel=3,
|
|
259
|
+
)
|
|
260
|
+
return np.nan, (np.nan, np.nan), np.nan
|
|
261
|
+
|
|
262
|
+
valid_dist = boot_dist[finite_mask]
|
|
263
|
+
se = float(np.std(valid_dist, ddof=1))
|
|
264
|
+
|
|
265
|
+
# Guard: if SE is not finite or zero, all inference fields must be NaN.
|
|
266
|
+
if not np.isfinite(se) or se <= 0:
|
|
267
|
+
warnings.warn(
|
|
268
|
+
f"Bootstrap SE is non-finite or zero (n_valid={n_valid}) in {context}. "
|
|
269
|
+
"Returning NaN for SE/CI/p-value.",
|
|
270
|
+
RuntimeWarning,
|
|
271
|
+
stacklevel=3,
|
|
272
|
+
)
|
|
273
|
+
return np.nan, (np.nan, np.nan), np.nan
|
|
274
|
+
|
|
275
|
+
ci = compute_percentile_ci(valid_dist, alpha)
|
|
276
|
+
p_value = compute_bootstrap_pvalue(
|
|
277
|
+
original_effect, valid_dist, n_valid=len(valid_dist)
|
|
278
|
+
)
|
|
279
|
+
return se, ci, p_value
|