skxperiments 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skxperiments/__init__.py +5 -0
- skxperiments/core/__init__.py +42 -0
- skxperiments/core/assignment.py +589 -0
- skxperiments/core/base.py +512 -0
- skxperiments/core/exceptions.py +145 -0
- skxperiments/core/potential_outcomes.py +168 -0
- skxperiments/core/results.py +624 -0
- skxperiments/design/__init__.py +22 -0
- skxperiments/design/balance.py +182 -0
- skxperiments/design/blocked_crd.py +157 -0
- skxperiments/design/crd.py +162 -0
- skxperiments/design/factorial.py +174 -0
- skxperiments/design/power.py +233 -0
- skxperiments/design/rerandomized_crd.py +319 -0
- skxperiments/diagnostics/__init__.py +21 -0
- skxperiments/diagnostics/aa_test.py +277 -0
- skxperiments/diagnostics/balance_report.py +224 -0
- skxperiments/diagnostics/srm.py +327 -0
- skxperiments/estimators/__init__.py +23 -0
- skxperiments/estimators/blocked_difference_in_means.py +197 -0
- skxperiments/estimators/cuped.py +280 -0
- skxperiments/estimators/difference_in_means.py +161 -0
- skxperiments/estimators/factorial_estimator.py +213 -0
- skxperiments/estimators/lin_estimator.py +298 -0
- skxperiments/inference/__init__.py +17 -0
- skxperiments/inference/bootstrap.py +450 -0
- skxperiments/inference/multiple.py +365 -0
- skxperiments/inference/neyman.py +386 -0
- skxperiments/inference/randomization_test.py +319 -0
- skxperiments/pipeline.py +366 -0
- skxperiments/reporting/__init__.py +30 -0
- skxperiments/reporting/plots.py +411 -0
- skxperiments/reporting/summary.py +185 -0
- skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
- skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
- skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
"""Neyman variance-based confidence intervals for finite-population inference.
|
|
2
|
+
|
|
3
|
+
Implements ``NeymanCI``: wraps a fitted scalar estimator
|
|
4
|
+
(``DifferenceInMeans`` or ``BlockedDifferenceInMeans``), computes the
|
|
5
|
+
Neyman conservative variance for CRD or the stratified variance for
|
|
6
|
+
blocked designs, and constructs a two-sided Wald confidence interval and
|
|
7
|
+
p-value under the normal approximation.
|
|
8
|
+
|
|
9
|
+
References
|
|
10
|
+
----------
|
|
11
|
+
Neyman, J. (1923/1990). On the application of probability theory to
|
|
12
|
+
agricultural experiments. Statistical Science, 5(4), 465-472.
|
|
13
|
+
Imbens, G. W., & Rubin, D. B. (2015). Causal Inference for Statistics,
|
|
14
|
+
Social, and Biomedical Sciences. Cambridge University Press.
|
|
15
|
+
Chapters 6 (CRD) and 9 (stratified/blocked).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from scipy.stats import norm
|
|
20
|
+
|
|
21
|
+
from skxperiments.core.assignment import (
|
|
22
|
+
BlockedAssignment,
|
|
23
|
+
CRDAssignment,
|
|
24
|
+
)
|
|
25
|
+
from skxperiments.core.base import BaseEstimator, BaseInference
|
|
26
|
+
from skxperiments.core.exceptions import (
|
|
27
|
+
DesignEstimatorMismatch,
|
|
28
|
+
InsufficientDataError,
|
|
29
|
+
InvalidDesignError,
|
|
30
|
+
)
|
|
31
|
+
from skxperiments.core.results import Results
|
|
32
|
+
from skxperiments.estimators.blocked_difference_in_means import (
|
|
33
|
+
BlockedDifferenceInMeans,
|
|
34
|
+
)
|
|
35
|
+
from skxperiments.estimators.difference_in_means import DifferenceInMeans
|
|
36
|
+
|
|
37
|
+
_ACCEPTED_ESTIMATORS = (DifferenceInMeans, BlockedDifferenceInMeans)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class NeymanCI(BaseInference):
|
|
41
|
+
"""Neyman conservative confidence intervals for finite-population inference.
|
|
42
|
+
|
|
43
|
+
Wraps a scalar estimator and computes a two-sided Wald CI using
|
|
44
|
+
Neyman's variance estimator, dispatched by the assignment type:
|
|
45
|
+
|
|
46
|
+
**CRD** (Neyman 1923):
|
|
47
|
+
|
|
48
|
+
V_hat = s_t^2 / n_t + s_c^2 / n_c
|
|
49
|
+
|
|
50
|
+
where ``s_t^2`` and ``s_c^2`` are the sample variances (``ddof=1``) of
|
|
51
|
+
the outcome in the treated and control arms.
|
|
52
|
+
|
|
53
|
+
**Blocked** (stratified, consistent with the size-weighted ATE of
|
|
54
|
+
``BlockedDifferenceInMeans``):
|
|
55
|
+
|
|
56
|
+
V_hat = sum_b (N_b / N)^2 * V_hat_b,
|
|
57
|
+
V_hat_b = s_{t,b}^2 / n_{t,b} + s_{c,b}^2 / n_{c,b}
|
|
58
|
+
|
|
59
|
+
The confidence interval is ``ATE_hat +/- z_{1 - alpha/2} * SE`` and the
|
|
60
|
+
p-value is the two-sided Wald test ``z = ATE_hat / SE``,
|
|
61
|
+
``p = 2 * (1 - Phi(|z|))``, both under the normal approximation.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
estimator : DifferenceInMeans or BlockedDifferenceInMeans
|
|
66
|
+
Causal estimator producing a scalar ATE (``Results.ate``). Need
|
|
67
|
+
not be pre-fitted: ``fit`` refits it on the supplied assignment.
|
|
68
|
+
Any other type raises ``DesignEstimatorMismatch`` at construction.
|
|
69
|
+
``CUPED`` and ``LinEstimator`` support is planned for a future
|
|
70
|
+
sub-phase (see ROADMAP).
|
|
71
|
+
alpha : float, optional
|
|
72
|
+
Significance level for the confidence interval, by default 0.05.
|
|
73
|
+
The CI is two-sided: a ``(1 - alpha) * 100%`` interval.
|
|
74
|
+
|
|
75
|
+
Attributes
|
|
76
|
+
----------
|
|
77
|
+
assignment_ : CRDAssignment or BlockedAssignment
|
|
78
|
+
The assignment passed to ``fit``.
|
|
79
|
+
variance_ : float
|
|
80
|
+
Estimated Neyman variance ``V_hat``.
|
|
81
|
+
|
|
82
|
+
Notes
|
|
83
|
+
-----
|
|
84
|
+
**Finite-population scope.** ``NeymanCI`` v1 targets finite-population
|
|
85
|
+
inference. The Neyman variance formula is numerically identical under
|
|
86
|
+
the superpopulation interpretation, so the restriction is a scope
|
|
87
|
+
choice rather than a mathematical limitation: for superpopulation
|
|
88
|
+
inference use ``BootstrapCI`` (Phase 4.4). If a wrapped estimator
|
|
89
|
+
reports ``inference_mode="superpopulation"`` in its ``Results.extra``,
|
|
90
|
+
``fit`` raises ``InvalidDesignError`` redirecting to ``BootstrapCI``.
|
|
91
|
+
|
|
92
|
+
**Conservative variance.** Neyman's estimator is conservative
|
|
93
|
+
(upward-biased) when individual treatment effects vary across units,
|
|
94
|
+
and exact when the effect is constant. Empirical CI coverage is
|
|
95
|
+
therefore ``>= (1 - alpha)``.
|
|
96
|
+
|
|
97
|
+
**Rerandomization.** A ``CRDAssignment`` produced by ``ReRandomizedCRD``
|
|
98
|
+
is accepted: the variance formula is the same as for plain CRD.
|
|
99
|
+
Rerandomization improves covariate balance; Neyman's estimator remains
|
|
100
|
+
valid (and conservative) under it.
|
|
101
|
+
|
|
102
|
+
**Estimator compatibility.** ``NeymanCI`` accepts both ``CRDAssignment``
|
|
103
|
+
and ``BlockedAssignment``, but each wrapped estimator enforces its own
|
|
104
|
+
assignment contract in ``fit``: pairing ``DifferenceInMeans`` with a
|
|
105
|
+
``BlockedAssignment`` (or vice versa) raises ``DesignEstimatorMismatch``
|
|
106
|
+
from the estimator. Only the matching pairs
|
|
107
|
+
(``DifferenceInMeans`` + CRD, ``BlockedDifferenceInMeans`` + blocked)
|
|
108
|
+
proceed to variance computation.
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
>>> from skxperiments.design.crd import CRD
|
|
113
|
+
>>> from skxperiments.estimators.difference_in_means import (
|
|
114
|
+
... DifferenceInMeans,
|
|
115
|
+
... )
|
|
116
|
+
>>> from skxperiments.inference import NeymanCI
|
|
117
|
+
>>> design = CRD(p=0.5, seed=42)
|
|
118
|
+
>>> assignment = design.randomize(df) # doctest: +SKIP
|
|
119
|
+
>>> dim = DifferenceInMeans(outcome_col="y")
|
|
120
|
+
>>> ci = NeymanCI(estimator=dim, alpha=0.05)
|
|
121
|
+
>>> result = ci.fit(assignment).estimate() # doctest: +SKIP
|
|
122
|
+
>>> result.ci # doctest: +SKIP
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def __init__(
|
|
126
|
+
self,
|
|
127
|
+
estimator: BaseEstimator,
|
|
128
|
+
alpha: float = 0.05,
|
|
129
|
+
) -> None:
|
|
130
|
+
if not isinstance(estimator, _ACCEPTED_ESTIMATORS):
|
|
131
|
+
accepted_names = " or ".join(
|
|
132
|
+
t.__name__ for t in _ACCEPTED_ESTIMATORS
|
|
133
|
+
)
|
|
134
|
+
raise DesignEstimatorMismatch(
|
|
135
|
+
estimator_name=type(self).__name__,
|
|
136
|
+
received_type=type(estimator).__name__,
|
|
137
|
+
expected_type=accepted_names,
|
|
138
|
+
suggestion=(
|
|
139
|
+
f"{accepted_names}. CUPED and LinEstimator support is "
|
|
140
|
+
f"planned for a future sub-phase (see ROADMAP)."
|
|
141
|
+
),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if not isinstance(alpha, (int, float)) or isinstance(alpha, bool):
|
|
145
|
+
raise InvalidDesignError(
|
|
146
|
+
f"alpha must be a float in (0, 1), got "
|
|
147
|
+
f"{type(alpha).__name__}."
|
|
148
|
+
)
|
|
149
|
+
if not (0.0 < alpha < 1.0):
|
|
150
|
+
raise InvalidDesignError(
|
|
151
|
+
f"alpha must be in (0, 1), got {alpha}."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
self.estimator = estimator
|
|
155
|
+
self.alpha = alpha
|
|
156
|
+
|
|
157
|
+
def fit(
|
|
158
|
+
self,
|
|
159
|
+
assignment: CRDAssignment | BlockedAssignment,
|
|
160
|
+
) -> "NeymanCI":
|
|
161
|
+
"""Refit the estimator and compute the Neyman variance.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
assignment : CRDAssignment or BlockedAssignment
|
|
166
|
+
Observed assignment. ``FactorialAssignment`` is rejected with
|
|
167
|
+
``DesignEstimatorMismatch``.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
NeymanCI
|
|
172
|
+
Returns self.
|
|
173
|
+
|
|
174
|
+
Raises
|
|
175
|
+
------
|
|
176
|
+
DesignEstimatorMismatch
|
|
177
|
+
If ``assignment`` is not a ``CRDAssignment`` or
|
|
178
|
+
``BlockedAssignment``.
|
|
179
|
+
InvalidDesignError
|
|
180
|
+
If the estimator returns a multi-effect ``Results``
|
|
181
|
+
(``Results.ate is None``; v1 supports only scalar estimands),
|
|
182
|
+
or if it reports ``inference_mode="superpopulation"`` (use
|
|
183
|
+
``BootstrapCI`` instead).
|
|
184
|
+
InsufficientDataError
|
|
185
|
+
If any arm (CRD) or any arm within a block (blocked) has fewer
|
|
186
|
+
than 2 observations, so the sample variance is undefined.
|
|
187
|
+
"""
|
|
188
|
+
self._validate_assignment_type(
|
|
189
|
+
assignment, (CRDAssignment, BlockedAssignment)
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Refit on the original assignment to obtain the point estimate.
|
|
193
|
+
# Any prior fit state of self.estimator is discarded.
|
|
194
|
+
self.estimator.fit(assignment)
|
|
195
|
+
base_results = self.estimator.estimate()
|
|
196
|
+
|
|
197
|
+
if base_results.ate is None:
|
|
198
|
+
raise InvalidDesignError(
|
|
199
|
+
"NeymanCI v1 supports only estimators producing a scalar "
|
|
200
|
+
"ATE (Results.ate). The supplied estimator "
|
|
201
|
+
f"({type(self.estimator).__name__}) produced a multi-effect "
|
|
202
|
+
"Results (Results.effects). Multi-effect support is planned "
|
|
203
|
+
"for v2."
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# inference_mode defaults to finite_population; only LinEstimator
|
|
207
|
+
# currently writes this key, but the guard is enforced for any
|
|
208
|
+
# whitelisted estimator that may emit it.
|
|
209
|
+
if base_results.extra is not None:
|
|
210
|
+
inference_mode = base_results.extra.get(
|
|
211
|
+
"inference_mode", "finite_population"
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
inference_mode = "finite_population"
|
|
215
|
+
|
|
216
|
+
if inference_mode == "superpopulation":
|
|
217
|
+
raise InvalidDesignError(
|
|
218
|
+
"NeymanCI v1 targets finite-population inference. The "
|
|
219
|
+
"supplied estimator reported "
|
|
220
|
+
"inference_mode='superpopulation'. The Neyman variance "
|
|
221
|
+
"formula is identical under both interpretations; this "
|
|
222
|
+
"restriction is a scope choice, not a mathematical "
|
|
223
|
+
"limitation. For superpopulation inference use BootstrapCI "
|
|
224
|
+
"(Phase 4.4)."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Capture metadata BEFORE variance computation, so estimate() does
|
|
228
|
+
# not depend on any post-fit state of self.estimator.
|
|
229
|
+
self._n_obs = base_results.n_obs
|
|
230
|
+
self._n_treated = base_results.n_treated
|
|
231
|
+
self._n_control = base_results.n_control
|
|
232
|
+
self._estimator_name = base_results.estimator_name
|
|
233
|
+
self._design_name = base_results.design_name
|
|
234
|
+
self._ate = float(base_results.ate)
|
|
235
|
+
self._inference_mode = inference_mode
|
|
236
|
+
|
|
237
|
+
# Dispatch the variance computation by assignment type.
|
|
238
|
+
if isinstance(assignment, BlockedAssignment):
|
|
239
|
+
variance = self._neyman_variance_blocked(assignment)
|
|
240
|
+
self._variance_type = "neyman_stratified"
|
|
241
|
+
else:
|
|
242
|
+
variance = self._neyman_variance_crd(assignment)
|
|
243
|
+
self._variance_type = "neyman"
|
|
244
|
+
|
|
245
|
+
self.assignment_: CRDAssignment | BlockedAssignment = assignment
|
|
246
|
+
self.variance_: float = float(variance)
|
|
247
|
+
|
|
248
|
+
return self
|
|
249
|
+
|
|
250
|
+
def _neyman_variance_crd(self, assignment: CRDAssignment) -> float:
|
|
251
|
+
"""Compute the Neyman conservative variance for CRD."""
|
|
252
|
+
y = assignment.data_[self.estimator.outcome_col].values
|
|
253
|
+
y_t = y[assignment.treated_ids()]
|
|
254
|
+
y_c = y[assignment.control_ids()]
|
|
255
|
+
|
|
256
|
+
n_t = len(y_t)
|
|
257
|
+
n_c = len(y_c)
|
|
258
|
+
|
|
259
|
+
if n_t < 2:
|
|
260
|
+
raise InsufficientDataError(
|
|
261
|
+
context="NeymanCI variance (treated arm)",
|
|
262
|
+
minimum=2,
|
|
263
|
+
received=n_t,
|
|
264
|
+
)
|
|
265
|
+
if n_c < 2:
|
|
266
|
+
raise InsufficientDataError(
|
|
267
|
+
context="NeymanCI variance (control arm)",
|
|
268
|
+
minimum=2,
|
|
269
|
+
received=n_c,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
s2_t = float(np.var(y_t, ddof=1))
|
|
273
|
+
s2_c = float(np.var(y_c, ddof=1))
|
|
274
|
+
|
|
275
|
+
return s2_t / n_t + s2_c / n_c
|
|
276
|
+
|
|
277
|
+
def _neyman_variance_blocked(self, assignment: BlockedAssignment) -> float:
|
|
278
|
+
"""Compute the stratified Neyman variance for a blocked design."""
|
|
279
|
+
data = assignment.data_
|
|
280
|
+
y_col = self.estimator.outcome_col
|
|
281
|
+
treat_col = assignment.treatment_col_
|
|
282
|
+
block_col = assignment.block_col_
|
|
283
|
+
n_total = assignment.n_units_
|
|
284
|
+
|
|
285
|
+
variance_total = 0.0
|
|
286
|
+
|
|
287
|
+
for block_val, n_b in assignment.block_sizes_.items():
|
|
288
|
+
block_data = data.loc[data[block_col] == block_val]
|
|
289
|
+
block_treat = block_data[treat_col].values
|
|
290
|
+
block_y = block_data[y_col].values
|
|
291
|
+
|
|
292
|
+
y_t = block_y[block_treat == 1]
|
|
293
|
+
y_c = block_y[block_treat == 0]
|
|
294
|
+
|
|
295
|
+
n_t_b = len(y_t)
|
|
296
|
+
n_c_b = len(y_c)
|
|
297
|
+
|
|
298
|
+
if n_t_b < 2:
|
|
299
|
+
raise InsufficientDataError(
|
|
300
|
+
context=(
|
|
301
|
+
f"NeymanCI variance (treated arm in block "
|
|
302
|
+
f"'{block_val}')"
|
|
303
|
+
),
|
|
304
|
+
minimum=2,
|
|
305
|
+
received=n_t_b,
|
|
306
|
+
)
|
|
307
|
+
if n_c_b < 2:
|
|
308
|
+
raise InsufficientDataError(
|
|
309
|
+
context=(
|
|
310
|
+
f"NeymanCI variance (control arm in block "
|
|
311
|
+
f"'{block_val}')"
|
|
312
|
+
),
|
|
313
|
+
minimum=2,
|
|
314
|
+
received=n_c_b,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
s2_t_b = float(np.var(y_t, ddof=1))
|
|
318
|
+
s2_c_b = float(np.var(y_c, ddof=1))
|
|
319
|
+
|
|
320
|
+
v_b = s2_t_b / n_t_b + s2_c_b / n_c_b
|
|
321
|
+
weight = n_b / n_total
|
|
322
|
+
variance_total += weight**2 * v_b
|
|
323
|
+
|
|
324
|
+
return variance_total
|
|
325
|
+
|
|
326
|
+
def estimate(self) -> Results:
|
|
327
|
+
"""Return a Results object with the ATE, SE, CI, and p-value.
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
Results
|
|
332
|
+
Results with:
|
|
333
|
+
|
|
334
|
+
- ``ate`` set to the observed point estimate;
|
|
335
|
+
- ``se`` set to ``sqrt(V_hat)``;
|
|
336
|
+
- ``ci`` set to the two-sided ``(1 - alpha) * 100%`` Wald CI;
|
|
337
|
+
- ``p_value`` set to the two-sided Wald p-value;
|
|
338
|
+
- ``alpha`` set to ``self.alpha``;
|
|
339
|
+
- ``inference_name="NeymanCI"``;
|
|
340
|
+
- ``extra`` containing ``variance_type`` and ``inference_mode``.
|
|
341
|
+
|
|
342
|
+
Raises
|
|
343
|
+
------
|
|
344
|
+
NotFittedError
|
|
345
|
+
If ``fit`` has not been called.
|
|
346
|
+
InvalidDesignError
|
|
347
|
+
If the standard error is zero (degenerate case: constant
|
|
348
|
+
outcomes within each arm).
|
|
349
|
+
"""
|
|
350
|
+
self._check_is_fitted()
|
|
351
|
+
|
|
352
|
+
se = float(np.sqrt(self.variance_))
|
|
353
|
+
|
|
354
|
+
if se == 0.0:
|
|
355
|
+
raise InvalidDesignError(
|
|
356
|
+
"NeymanCI: the estimated standard error is zero, indicating "
|
|
357
|
+
"a degenerate dataset (constant outcomes within each arm). "
|
|
358
|
+
"Cannot compute a confidence interval or p-value."
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
z_crit = float(norm.ppf(1.0 - self.alpha / 2.0))
|
|
362
|
+
ci_lower = self._ate - z_crit * se
|
|
363
|
+
ci_upper = self._ate + z_crit * se
|
|
364
|
+
|
|
365
|
+
z_stat = self._ate / se
|
|
366
|
+
p_value = float(2.0 * (1.0 - norm.cdf(abs(z_stat))))
|
|
367
|
+
# Clamp to [0, 1] for numerical safety.
|
|
368
|
+
p_value = max(0.0, min(1.0, p_value))
|
|
369
|
+
|
|
370
|
+
return Results(
|
|
371
|
+
ate=self._ate,
|
|
372
|
+
se=se,
|
|
373
|
+
ci=(ci_lower, ci_upper),
|
|
374
|
+
p_value=p_value,
|
|
375
|
+
alpha=self.alpha,
|
|
376
|
+
n_obs=self._n_obs,
|
|
377
|
+
n_treated=self._n_treated,
|
|
378
|
+
n_control=self._n_control,
|
|
379
|
+
estimator_name=self._estimator_name,
|
|
380
|
+
design_name=self._design_name,
|
|
381
|
+
inference_name=type(self).__name__,
|
|
382
|
+
extra={
|
|
383
|
+
"variance_type": self._variance_type,
|
|
384
|
+
"inference_mode": self._inference_mode,
|
|
385
|
+
},
|
|
386
|
+
)
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Randomization-based inference via Fisher's sharp null hypothesis.
|
|
2
|
+
|
|
3
|
+
Implements ``RandomizationTest``: for each of ``n_permutations``
|
|
4
|
+
draws, generates a fresh ``Assignment`` via ``BaseAssignment.draw()``
|
|
5
|
+
(which respects the original randomization mechanism — including
|
|
6
|
+
rerandomization Mahalanobis criteria and within-block proportions —
|
|
7
|
+
because each Assignment subclass routes ``draw()`` through its
|
|
8
|
+
generating design), refits the estimator, and collects the resulting
|
|
9
|
+
ATE under the sharp null of no individual treatment effect.
|
|
10
|
+
|
|
11
|
+
The p-value is computed with the Phipson & Smyth (2010) continuity
|
|
12
|
+
correction, ``(1 + n_extreme) / (1 + n_permutations)``, which
|
|
13
|
+
guarantees a valid Monte Carlo p-value bounded away from zero.
|
|
14
|
+
|
|
15
|
+
References
|
|
16
|
+
----------
|
|
17
|
+
Fisher, R. A. (1935). The Design of Experiments. Oliver and Boyd.
|
|
18
|
+
Phipson, B., & Smyth, G. K. (2010). Permutation P-values should never
|
|
19
|
+
be zero: calculating exact P-values when permutations are randomly
|
|
20
|
+
drawn. Statistical Applications in Genetics and Molecular Biology,
|
|
21
|
+
9(1), Article 39.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
from skxperiments.core.assignment import (
|
|
27
|
+
BlockedAssignment,
|
|
28
|
+
CRDAssignment,
|
|
29
|
+
)
|
|
30
|
+
from skxperiments.core.base import BaseEstimator, BaseInference
|
|
31
|
+
from skxperiments.core.exceptions import InvalidDesignError
|
|
32
|
+
from skxperiments.core.results import Results
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RandomizationTest(BaseInference):
|
|
36
|
+
"""Fisher randomization test via Monte Carlo permutations.
|
|
37
|
+
|
|
38
|
+
Tests Fisher's sharp null hypothesis,
|
|
39
|
+
``H0: Y_i(1) = Y_i(0)`` for all i, by generating
|
|
40
|
+
``n_permutations`` fresh assignments from the same randomization
|
|
41
|
+
mechanism that produced the observed assignment, refitting the
|
|
42
|
+
estimator on each, and comparing the observed ATE to the resulting
|
|
43
|
+
null distribution.
|
|
44
|
+
|
|
45
|
+
The p-value uses the Phipson & Smyth (2010) continuity correction:
|
|
46
|
+
|
|
47
|
+
p = (1 + n_extreme) / (1 + n_permutations)
|
|
48
|
+
|
|
49
|
+
which guarantees a valid Monte Carlo p-value strictly greater than
|
|
50
|
+
zero.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
estimator : BaseEstimator
|
|
55
|
+
Causal estimator producing a scalar ATE (``Results.ate``).
|
|
56
|
+
Need not be pre-fitted: ``RandomizationTest.fit`` will refit
|
|
57
|
+
it on the supplied assignment. Estimators producing
|
|
58
|
+
multi-effect ``Results`` (e.g., ``FactorialEstimator``) are
|
|
59
|
+
not supported in v1; ``fit`` will raise ``InvalidDesignError``
|
|
60
|
+
if the estimator returns ``Results.ate is None``.
|
|
61
|
+
n_permutations : int, optional
|
|
62
|
+
Number of Monte Carlo permutations, by default 10_000. Must
|
|
63
|
+
be a positive integer.
|
|
64
|
+
alternative : {"two-sided", "greater", "less"}, optional
|
|
65
|
+
Alternative hypothesis, by default ``"two-sided"``.
|
|
66
|
+
|
|
67
|
+
- ``"two-sided"`` uses the criterion ``|T_perm| >= |T_obs|``.
|
|
68
|
+
Valid under any null distribution shape but most natural
|
|
69
|
+
when the null is approximately symmetric around zero (the
|
|
70
|
+
typical case under CRD with Fisher's sharp null and
|
|
71
|
+
balanced sample sizes). Under ``BlockedAssignment`` with
|
|
72
|
+
highly unequal blocks or ``ReRandomizedCRD`` with a tight
|
|
73
|
+
threshold, the null distribution may be slightly asymmetric;
|
|
74
|
+
the absolute-value criterion remains valid and slightly
|
|
75
|
+
conservative. For directional hypotheses with strong
|
|
76
|
+
expected asymmetry, prefer ``"greater"`` or ``"less"``.
|
|
77
|
+
- ``"greater"`` uses ``T_perm >= T_obs``.
|
|
78
|
+
- ``"less"`` uses ``T_perm <= T_obs``.
|
|
79
|
+
|
|
80
|
+
seed : int or None, optional
|
|
81
|
+
Random seed for reproducibility. The same ``seed`` produces
|
|
82
|
+
the same ``null_distribution_``. Internally, a single
|
|
83
|
+
``np.random.default_rng(seed)`` pre-generates one seed per
|
|
84
|
+
permutation, which is then passed to ``Assignment.draw``.
|
|
85
|
+
By default None.
|
|
86
|
+
|
|
87
|
+
Attributes
|
|
88
|
+
----------
|
|
89
|
+
assignment_ : CRDAssignment or BlockedAssignment
|
|
90
|
+
The assignment passed to ``fit``.
|
|
91
|
+
observed_statistic_ : float
|
|
92
|
+
ATE estimated by the estimator on the original assignment,
|
|
93
|
+
captured before the permutation loop runs.
|
|
94
|
+
null_distribution_ : np.ndarray
|
|
95
|
+
Array of permuted ATEs under the sharp null. Length equals
|
|
96
|
+
``n_permutations``.
|
|
97
|
+
p_value_ : float
|
|
98
|
+
Monte Carlo p-value computed with the Phipson & Smyth
|
|
99
|
+
continuity correction.
|
|
100
|
+
|
|
101
|
+
Notes
|
|
102
|
+
-----
|
|
103
|
+
**Sharp null vs. Neyman null.** This class tests Fisher's sharp
|
|
104
|
+
null of no individual treatment effect, not Neyman's null of zero
|
|
105
|
+
average treatment effect. ``BootstrapCI`` (Phase 4.4) will offer
|
|
106
|
+
superpopulation inference.
|
|
107
|
+
|
|
108
|
+
**Rerandomization.** When ``assignment`` is a ``CRDAssignment``
|
|
109
|
+
produced by ``ReRandomizedCRD``, each permutation respects the
|
|
110
|
+
Mahalanobis acceptance criterion automatically: ``CRDAssignment.draw``
|
|
111
|
+
routes through ``ReRandomizedCRD._randomize_with_cached_cov``, which
|
|
112
|
+
reuses the cached covariance matrix without recomputation.
|
|
113
|
+
|
|
114
|
+
**Blocking.** When ``assignment`` is a ``BlockedAssignment``, each
|
|
115
|
+
permutation rerandomizes within blocks, preserving the within-block
|
|
116
|
+
treatment proportion. This is the correct null distribution for the
|
|
117
|
+
blocked design.
|
|
118
|
+
|
|
119
|
+
**Estimator state after `fit`.** The permutation loop refits
|
|
120
|
+
``self.estimator`` ``n_permutations`` times. After ``fit``
|
|
121
|
+
completes, ``self.estimator`` is in the state of the *last*
|
|
122
|
+
permutation, not the original assignment. To inspect the estimator
|
|
123
|
+
on the original assignment, refit manually:
|
|
124
|
+
``rt.estimator.fit(rt.assignment_)``. The ``Results`` returned by
|
|
125
|
+
``estimate()`` is unaffected: it uses the observed statistic and
|
|
126
|
+
metadata captured during ``fit`` before the loop runs.
|
|
127
|
+
|
|
128
|
+
**Refit semantics.** Any prior fit state of ``estimator`` is
|
|
129
|
+
discarded. Passing an estimator already fitted on a different
|
|
130
|
+
dataset is allowed; it will be silently refitted on the assignment
|
|
131
|
+
passed to ``fit``.
|
|
132
|
+
|
|
133
|
+
**Future work (v2).** A ``"two-sided-conservative"`` alternative
|
|
134
|
+
using ``2 * min(p_greater, p_less)`` may be added for cases with
|
|
135
|
+
strong null asymmetry. Exact enumeration of all permutations for
|
|
136
|
+
small N is also deferred to v2.
|
|
137
|
+
|
|
138
|
+
Examples
|
|
139
|
+
--------
|
|
140
|
+
>>> from skxperiments.design.crd import CRD
|
|
141
|
+
>>> from skxperiments.estimators.difference_in_means import (
|
|
142
|
+
... DifferenceInMeans,
|
|
143
|
+
... )
|
|
144
|
+
>>> from skxperiments.inference import RandomizationTest
|
|
145
|
+
>>> design = CRD(p=0.5, seed=42)
|
|
146
|
+
>>> assignment = design.randomize(df) # doctest: +SKIP
|
|
147
|
+
>>> dim = DifferenceInMeans(outcome_col="y")
|
|
148
|
+
>>> rt = RandomizationTest(estimator=dim, n_permutations=10_000, seed=0)
|
|
149
|
+
>>> result = rt.fit(assignment).estimate() # doctest: +SKIP
|
|
150
|
+
>>> result.p_value # doctest: +SKIP
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
_VALID_ALTERNATIVES = ("two-sided", "greater", "less")
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
estimator: BaseEstimator,
|
|
158
|
+
n_permutations: int = 10_000,
|
|
159
|
+
alternative: str = "two-sided",
|
|
160
|
+
seed: int | None = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
if not isinstance(estimator, BaseEstimator):
|
|
163
|
+
raise InvalidDesignError(
|
|
164
|
+
f"estimator must be an instance of BaseEstimator, got "
|
|
165
|
+
f"{type(estimator).__name__}."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if not isinstance(n_permutations, int) or isinstance(
|
|
169
|
+
n_permutations, bool
|
|
170
|
+
):
|
|
171
|
+
raise InvalidDesignError(
|
|
172
|
+
f"n_permutations must be an integer, got "
|
|
173
|
+
f"{type(n_permutations).__name__}."
|
|
174
|
+
)
|
|
175
|
+
if n_permutations <= 0:
|
|
176
|
+
raise InvalidDesignError(
|
|
177
|
+
f"n_permutations must be > 0, got {n_permutations}."
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
if alternative not in self._VALID_ALTERNATIVES:
|
|
181
|
+
raise InvalidDesignError(
|
|
182
|
+
f"alternative must be one of {self._VALID_ALTERNATIVES}, "
|
|
183
|
+
f"got {alternative!r}."
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
self.estimator = estimator
|
|
187
|
+
self.n_permutations = n_permutations
|
|
188
|
+
self.alternative = alternative
|
|
189
|
+
self.seed = seed
|
|
190
|
+
|
|
191
|
+
def fit(
|
|
192
|
+
self,
|
|
193
|
+
assignment: CRDAssignment | BlockedAssignment,
|
|
194
|
+
) -> "RandomizationTest":
|
|
195
|
+
"""Run the permutation loop and compute the p-value.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
assignment : CRDAssignment or BlockedAssignment
|
|
200
|
+
Observed assignment. ``FactorialAssignment`` is rejected
|
|
201
|
+
with ``DesignEstimatorMismatch``.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
RandomizationTest
|
|
206
|
+
Returns self.
|
|
207
|
+
|
|
208
|
+
Raises
|
|
209
|
+
------
|
|
210
|
+
DesignEstimatorMismatch
|
|
211
|
+
If ``assignment`` is not a ``CRDAssignment`` or
|
|
212
|
+
``BlockedAssignment``.
|
|
213
|
+
InvalidDesignError
|
|
214
|
+
If the estimator produces a multi-effect ``Results``
|
|
215
|
+
(i.e., ``Results.ate is None``); v1 supports only scalar
|
|
216
|
+
estimands.
|
|
217
|
+
"""
|
|
218
|
+
self._validate_assignment_type(
|
|
219
|
+
assignment, (CRDAssignment, BlockedAssignment)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Refit on the original assignment to compute the observed
|
|
223
|
+
# statistic. Any prior fit state of self.estimator is discarded.
|
|
224
|
+
self.estimator.fit(assignment)
|
|
225
|
+
base_results = self.estimator.estimate()
|
|
226
|
+
|
|
227
|
+
if base_results.ate is None:
|
|
228
|
+
raise InvalidDesignError(
|
|
229
|
+
"RandomizationTest v1 supports only estimators producing "
|
|
230
|
+
"a scalar ATE (Results.ate). The supplied estimator "
|
|
231
|
+
f"({type(self.estimator).__name__}) produced a "
|
|
232
|
+
"multi-effect Results (Results.effects). Multi-effect "
|
|
233
|
+
"support is planned for v2."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
observed_statistic = float(base_results.ate)
|
|
237
|
+
|
|
238
|
+
# Capture metadata BEFORE the loop runs, so estimate() does
|
|
239
|
+
# not depend on the post-loop state of self.estimator.
|
|
240
|
+
self._n_obs = base_results.n_obs
|
|
241
|
+
self._n_treated = base_results.n_treated
|
|
242
|
+
self._n_control = base_results.n_control
|
|
243
|
+
self._estimator_name = base_results.estimator_name
|
|
244
|
+
self._design_name = base_results.design_name
|
|
245
|
+
|
|
246
|
+
# Pre-generate one seed per permutation for reproducibility:
|
|
247
|
+
# same self.seed -> same null_distribution_.
|
|
248
|
+
rng = np.random.default_rng(self.seed)
|
|
249
|
+
permutation_seeds = rng.integers(
|
|
250
|
+
0, 2**32, size=self.n_permutations
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
null_distribution = np.empty(self.n_permutations, dtype=float)
|
|
254
|
+
for i, perm_seed in enumerate(permutation_seeds):
|
|
255
|
+
perm_assignment = assignment.draw(seed=int(perm_seed))
|
|
256
|
+
self.estimator.fit(perm_assignment)
|
|
257
|
+
null_distribution[i] = self.estimator.estimate().ate
|
|
258
|
+
|
|
259
|
+
# Phipson & Smyth (2010) continuity correction.
|
|
260
|
+
if self.alternative == "greater":
|
|
261
|
+
n_extreme = int(np.sum(null_distribution >= observed_statistic))
|
|
262
|
+
elif self.alternative == "less":
|
|
263
|
+
n_extreme = int(np.sum(null_distribution <= observed_statistic))
|
|
264
|
+
else: # "two-sided"
|
|
265
|
+
n_extreme = int(
|
|
266
|
+
np.sum(np.abs(null_distribution) >= abs(observed_statistic))
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
p_value = (1 + n_extreme) / (1 + self.n_permutations)
|
|
270
|
+
|
|
271
|
+
self.assignment_: CRDAssignment | BlockedAssignment = assignment
|
|
272
|
+
self.observed_statistic_: float = observed_statistic
|
|
273
|
+
self.null_distribution_: np.ndarray = null_distribution
|
|
274
|
+
self.p_value_: float = float(p_value)
|
|
275
|
+
|
|
276
|
+
return self
|
|
277
|
+
|
|
278
|
+
def estimate(self) -> Results:
|
|
279
|
+
"""Return a Results object with the observed ATE and p-value.
|
|
280
|
+
|
|
281
|
+
Reads metadata from attributes captured during ``fit`` (before
|
|
282
|
+
the permutation loop ran), not from ``self.estimator``, which
|
|
283
|
+
is in the state of the last permutation after ``fit`` completes.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
Results
|
|
288
|
+
Results with:
|
|
289
|
+
|
|
290
|
+
- ``ate`` set to the observed statistic;
|
|
291
|
+
- ``p_value`` set to the Monte Carlo p-value;
|
|
292
|
+
- ``inference_name="RandomizationTest"``;
|
|
293
|
+
- ``extra`` containing ``n_permutations``,
|
|
294
|
+
``null_distribution``, ``alternative``;
|
|
295
|
+
- ``se`` and ``ci`` set to ``None`` (RandomizationTest
|
|
296
|
+
produces only a p-value).
|
|
297
|
+
|
|
298
|
+
Raises
|
|
299
|
+
------
|
|
300
|
+
NotFittedError
|
|
301
|
+
If ``fit`` has not been called.
|
|
302
|
+
"""
|
|
303
|
+
self._check_is_fitted()
|
|
304
|
+
|
|
305
|
+
return Results(
|
|
306
|
+
ate=self.observed_statistic_,
|
|
307
|
+
p_value=self.p_value_,
|
|
308
|
+
n_obs=self._n_obs,
|
|
309
|
+
n_treated=self._n_treated,
|
|
310
|
+
n_control=self._n_control,
|
|
311
|
+
estimator_name=self._estimator_name,
|
|
312
|
+
design_name=self._design_name,
|
|
313
|
+
inference_name=type(self).__name__,
|
|
314
|
+
extra={
|
|
315
|
+
"n_permutations": self.n_permutations,
|
|
316
|
+
"null_distribution": self.null_distribution_,
|
|
317
|
+
"alternative": self.alternative,
|
|
318
|
+
},
|
|
319
|
+
)
|