skxperiments 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. skxperiments/__init__.py +5 -0
  2. skxperiments/core/__init__.py +42 -0
  3. skxperiments/core/assignment.py +589 -0
  4. skxperiments/core/base.py +512 -0
  5. skxperiments/core/exceptions.py +145 -0
  6. skxperiments/core/potential_outcomes.py +168 -0
  7. skxperiments/core/results.py +624 -0
  8. skxperiments/design/__init__.py +22 -0
  9. skxperiments/design/balance.py +182 -0
  10. skxperiments/design/blocked_crd.py +157 -0
  11. skxperiments/design/crd.py +162 -0
  12. skxperiments/design/factorial.py +174 -0
  13. skxperiments/design/power.py +233 -0
  14. skxperiments/design/rerandomized_crd.py +319 -0
  15. skxperiments/diagnostics/__init__.py +21 -0
  16. skxperiments/diagnostics/aa_test.py +277 -0
  17. skxperiments/diagnostics/balance_report.py +224 -0
  18. skxperiments/diagnostics/srm.py +327 -0
  19. skxperiments/estimators/__init__.py +23 -0
  20. skxperiments/estimators/blocked_difference_in_means.py +197 -0
  21. skxperiments/estimators/cuped.py +280 -0
  22. skxperiments/estimators/difference_in_means.py +161 -0
  23. skxperiments/estimators/factorial_estimator.py +213 -0
  24. skxperiments/estimators/lin_estimator.py +298 -0
  25. skxperiments/inference/__init__.py +17 -0
  26. skxperiments/inference/bootstrap.py +450 -0
  27. skxperiments/inference/multiple.py +365 -0
  28. skxperiments/inference/neyman.py +386 -0
  29. skxperiments/inference/randomization_test.py +319 -0
  30. skxperiments/pipeline.py +366 -0
  31. skxperiments/reporting/__init__.py +30 -0
  32. skxperiments/reporting/plots.py +411 -0
  33. skxperiments/reporting/summary.py +185 -0
  34. skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
  35. skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
  36. skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
@@ -0,0 +1,386 @@
1
+ """Neyman variance-based confidence intervals for finite-population inference.
2
+
3
+ Implements ``NeymanCI``: wraps a fitted scalar estimator
4
+ (``DifferenceInMeans`` or ``BlockedDifferenceInMeans``), computes the
5
+ Neyman conservative variance for CRD or the stratified variance for
6
+ blocked designs, and constructs a two-sided Wald confidence interval and
7
+ p-value under the normal approximation.
8
+
9
+ References
10
+ ----------
11
+ Neyman, J. (1923/1990). On the application of probability theory to
12
+ agricultural experiments. Statistical Science, 5(4), 465-472.
13
+ Imbens, G. W., & Rubin, D. B. (2015). Causal Inference for Statistics,
14
+ Social, and Biomedical Sciences. Cambridge University Press.
15
+ Chapters 6 (CRD) and 9 (stratified/blocked).
16
+ """
17
+
18
+ import numpy as np
19
+ from scipy.stats import norm
20
+
21
+ from skxperiments.core.assignment import (
22
+ BlockedAssignment,
23
+ CRDAssignment,
24
+ )
25
+ from skxperiments.core.base import BaseEstimator, BaseInference
26
+ from skxperiments.core.exceptions import (
27
+ DesignEstimatorMismatch,
28
+ InsufficientDataError,
29
+ InvalidDesignError,
30
+ )
31
+ from skxperiments.core.results import Results
32
+ from skxperiments.estimators.blocked_difference_in_means import (
33
+ BlockedDifferenceInMeans,
34
+ )
35
+ from skxperiments.estimators.difference_in_means import DifferenceInMeans
36
+
37
+ _ACCEPTED_ESTIMATORS = (DifferenceInMeans, BlockedDifferenceInMeans)
38
+
39
+
40
+ class NeymanCI(BaseInference):
41
+ """Neyman conservative confidence intervals for finite-population inference.
42
+
43
+ Wraps a scalar estimator and computes a two-sided Wald CI using
44
+ Neyman's variance estimator, dispatched by the assignment type:
45
+
46
+ **CRD** (Neyman 1923):
47
+
48
+ V_hat = s_t^2 / n_t + s_c^2 / n_c
49
+
50
+ where ``s_t^2`` and ``s_c^2`` are the sample variances (``ddof=1``) of
51
+ the outcome in the treated and control arms.
52
+
53
+ **Blocked** (stratified, consistent with the size-weighted ATE of
54
+ ``BlockedDifferenceInMeans``):
55
+
56
+ V_hat = sum_b (N_b / N)^2 * V_hat_b,
57
+ V_hat_b = s_{t,b}^2 / n_{t,b} + s_{c,b}^2 / n_{c,b}
58
+
59
+ The confidence interval is ``ATE_hat +/- z_{1 - alpha/2} * SE`` and the
60
+ p-value is the two-sided Wald test ``z = ATE_hat / SE``,
61
+ ``p = 2 * (1 - Phi(|z|))``, both under the normal approximation.
62
+
63
+ Parameters
64
+ ----------
65
+ estimator : DifferenceInMeans or BlockedDifferenceInMeans
66
+ Causal estimator producing a scalar ATE (``Results.ate``). Need
67
+ not be pre-fitted: ``fit`` refits it on the supplied assignment.
68
+ Any other type raises ``DesignEstimatorMismatch`` at construction.
69
+ ``CUPED`` and ``LinEstimator`` support is planned for a future
70
+ sub-phase (see ROADMAP).
71
+ alpha : float, optional
72
+ Significance level for the confidence interval, by default 0.05.
73
+ The CI is two-sided: a ``(1 - alpha) * 100%`` interval.
74
+
75
+ Attributes
76
+ ----------
77
+ assignment_ : CRDAssignment or BlockedAssignment
78
+ The assignment passed to ``fit``.
79
+ variance_ : float
80
+ Estimated Neyman variance ``V_hat``.
81
+
82
+ Notes
83
+ -----
84
+ **Finite-population scope.** ``NeymanCI`` v1 targets finite-population
85
+ inference. The Neyman variance formula is numerically identical under
86
+ the superpopulation interpretation, so the restriction is a scope
87
+ choice rather than a mathematical limitation: for superpopulation
88
+ inference use ``BootstrapCI`` (Phase 4.4). If a wrapped estimator
89
+ reports ``inference_mode="superpopulation"`` in its ``Results.extra``,
90
+ ``fit`` raises ``InvalidDesignError`` redirecting to ``BootstrapCI``.
91
+
92
+ **Conservative variance.** Neyman's estimator is conservative
93
+ (upward-biased) when individual treatment effects vary across units,
94
+ and exact when the effect is constant. Empirical CI coverage is
95
+ therefore ``>= (1 - alpha)``.
96
+
97
+ **Rerandomization.** A ``CRDAssignment`` produced by ``ReRandomizedCRD``
98
+ is accepted: the variance formula is the same as for plain CRD.
99
+ Rerandomization improves covariate balance; Neyman's estimator remains
100
+ valid (and conservative) under it.
101
+
102
+ **Estimator compatibility.** ``NeymanCI`` accepts both ``CRDAssignment``
103
+ and ``BlockedAssignment``, but each wrapped estimator enforces its own
104
+ assignment contract in ``fit``: pairing ``DifferenceInMeans`` with a
105
+ ``BlockedAssignment`` (or vice versa) raises ``DesignEstimatorMismatch``
106
+ from the estimator. Only the matching pairs
107
+ (``DifferenceInMeans`` + CRD, ``BlockedDifferenceInMeans`` + blocked)
108
+ proceed to variance computation.
109
+
110
+ Examples
111
+ --------
112
+ >>> from skxperiments.design.crd import CRD
113
+ >>> from skxperiments.estimators.difference_in_means import (
114
+ ... DifferenceInMeans,
115
+ ... )
116
+ >>> from skxperiments.inference import NeymanCI
117
+ >>> design = CRD(p=0.5, seed=42)
118
+ >>> assignment = design.randomize(df) # doctest: +SKIP
119
+ >>> dim = DifferenceInMeans(outcome_col="y")
120
+ >>> ci = NeymanCI(estimator=dim, alpha=0.05)
121
+ >>> result = ci.fit(assignment).estimate() # doctest: +SKIP
122
+ >>> result.ci # doctest: +SKIP
123
+ """
124
+
125
+ def __init__(
126
+ self,
127
+ estimator: BaseEstimator,
128
+ alpha: float = 0.05,
129
+ ) -> None:
130
+ if not isinstance(estimator, _ACCEPTED_ESTIMATORS):
131
+ accepted_names = " or ".join(
132
+ t.__name__ for t in _ACCEPTED_ESTIMATORS
133
+ )
134
+ raise DesignEstimatorMismatch(
135
+ estimator_name=type(self).__name__,
136
+ received_type=type(estimator).__name__,
137
+ expected_type=accepted_names,
138
+ suggestion=(
139
+ f"{accepted_names}. CUPED and LinEstimator support is "
140
+ f"planned for a future sub-phase (see ROADMAP)."
141
+ ),
142
+ )
143
+
144
+ if not isinstance(alpha, (int, float)) or isinstance(alpha, bool):
145
+ raise InvalidDesignError(
146
+ f"alpha must be a float in (0, 1), got "
147
+ f"{type(alpha).__name__}."
148
+ )
149
+ if not (0.0 < alpha < 1.0):
150
+ raise InvalidDesignError(
151
+ f"alpha must be in (0, 1), got {alpha}."
152
+ )
153
+
154
+ self.estimator = estimator
155
+ self.alpha = alpha
156
+
157
+ def fit(
158
+ self,
159
+ assignment: CRDAssignment | BlockedAssignment,
160
+ ) -> "NeymanCI":
161
+ """Refit the estimator and compute the Neyman variance.
162
+
163
+ Parameters
164
+ ----------
165
+ assignment : CRDAssignment or BlockedAssignment
166
+ Observed assignment. ``FactorialAssignment`` is rejected with
167
+ ``DesignEstimatorMismatch``.
168
+
169
+ Returns
170
+ -------
171
+ NeymanCI
172
+ Returns self.
173
+
174
+ Raises
175
+ ------
176
+ DesignEstimatorMismatch
177
+ If ``assignment`` is not a ``CRDAssignment`` or
178
+ ``BlockedAssignment``.
179
+ InvalidDesignError
180
+ If the estimator returns a multi-effect ``Results``
181
+ (``Results.ate is None``; v1 supports only scalar estimands),
182
+ or if it reports ``inference_mode="superpopulation"`` (use
183
+ ``BootstrapCI`` instead).
184
+ InsufficientDataError
185
+ If any arm (CRD) or any arm within a block (blocked) has fewer
186
+ than 2 observations, so the sample variance is undefined.
187
+ """
188
+ self._validate_assignment_type(
189
+ assignment, (CRDAssignment, BlockedAssignment)
190
+ )
191
+
192
+ # Refit on the original assignment to obtain the point estimate.
193
+ # Any prior fit state of self.estimator is discarded.
194
+ self.estimator.fit(assignment)
195
+ base_results = self.estimator.estimate()
196
+
197
+ if base_results.ate is None:
198
+ raise InvalidDesignError(
199
+ "NeymanCI v1 supports only estimators producing a scalar "
200
+ "ATE (Results.ate). The supplied estimator "
201
+ f"({type(self.estimator).__name__}) produced a multi-effect "
202
+ "Results (Results.effects). Multi-effect support is planned "
203
+ "for v2."
204
+ )
205
+
206
+ # inference_mode defaults to finite_population; only LinEstimator
207
+ # currently writes this key, but the guard is enforced for any
208
+ # whitelisted estimator that may emit it.
209
+ if base_results.extra is not None:
210
+ inference_mode = base_results.extra.get(
211
+ "inference_mode", "finite_population"
212
+ )
213
+ else:
214
+ inference_mode = "finite_population"
215
+
216
+ if inference_mode == "superpopulation":
217
+ raise InvalidDesignError(
218
+ "NeymanCI v1 targets finite-population inference. The "
219
+ "supplied estimator reported "
220
+ "inference_mode='superpopulation'. The Neyman variance "
221
+ "formula is identical under both interpretations; this "
222
+ "restriction is a scope choice, not a mathematical "
223
+ "limitation. For superpopulation inference use BootstrapCI "
224
+ "(Phase 4.4)."
225
+ )
226
+
227
+ # Capture metadata BEFORE variance computation, so estimate() does
228
+ # not depend on any post-fit state of self.estimator.
229
+ self._n_obs = base_results.n_obs
230
+ self._n_treated = base_results.n_treated
231
+ self._n_control = base_results.n_control
232
+ self._estimator_name = base_results.estimator_name
233
+ self._design_name = base_results.design_name
234
+ self._ate = float(base_results.ate)
235
+ self._inference_mode = inference_mode
236
+
237
+ # Dispatch the variance computation by assignment type.
238
+ if isinstance(assignment, BlockedAssignment):
239
+ variance = self._neyman_variance_blocked(assignment)
240
+ self._variance_type = "neyman_stratified"
241
+ else:
242
+ variance = self._neyman_variance_crd(assignment)
243
+ self._variance_type = "neyman"
244
+
245
+ self.assignment_: CRDAssignment | BlockedAssignment = assignment
246
+ self.variance_: float = float(variance)
247
+
248
+ return self
249
+
250
+ def _neyman_variance_crd(self, assignment: CRDAssignment) -> float:
251
+ """Compute the Neyman conservative variance for CRD."""
252
+ y = assignment.data_[self.estimator.outcome_col].values
253
+ y_t = y[assignment.treated_ids()]
254
+ y_c = y[assignment.control_ids()]
255
+
256
+ n_t = len(y_t)
257
+ n_c = len(y_c)
258
+
259
+ if n_t < 2:
260
+ raise InsufficientDataError(
261
+ context="NeymanCI variance (treated arm)",
262
+ minimum=2,
263
+ received=n_t,
264
+ )
265
+ if n_c < 2:
266
+ raise InsufficientDataError(
267
+ context="NeymanCI variance (control arm)",
268
+ minimum=2,
269
+ received=n_c,
270
+ )
271
+
272
+ s2_t = float(np.var(y_t, ddof=1))
273
+ s2_c = float(np.var(y_c, ddof=1))
274
+
275
+ return s2_t / n_t + s2_c / n_c
276
+
277
+ def _neyman_variance_blocked(self, assignment: BlockedAssignment) -> float:
278
+ """Compute the stratified Neyman variance for a blocked design."""
279
+ data = assignment.data_
280
+ y_col = self.estimator.outcome_col
281
+ treat_col = assignment.treatment_col_
282
+ block_col = assignment.block_col_
283
+ n_total = assignment.n_units_
284
+
285
+ variance_total = 0.0
286
+
287
+ for block_val, n_b in assignment.block_sizes_.items():
288
+ block_data = data.loc[data[block_col] == block_val]
289
+ block_treat = block_data[treat_col].values
290
+ block_y = block_data[y_col].values
291
+
292
+ y_t = block_y[block_treat == 1]
293
+ y_c = block_y[block_treat == 0]
294
+
295
+ n_t_b = len(y_t)
296
+ n_c_b = len(y_c)
297
+
298
+ if n_t_b < 2:
299
+ raise InsufficientDataError(
300
+ context=(
301
+ f"NeymanCI variance (treated arm in block "
302
+ f"'{block_val}')"
303
+ ),
304
+ minimum=2,
305
+ received=n_t_b,
306
+ )
307
+ if n_c_b < 2:
308
+ raise InsufficientDataError(
309
+ context=(
310
+ f"NeymanCI variance (control arm in block "
311
+ f"'{block_val}')"
312
+ ),
313
+ minimum=2,
314
+ received=n_c_b,
315
+ )
316
+
317
+ s2_t_b = float(np.var(y_t, ddof=1))
318
+ s2_c_b = float(np.var(y_c, ddof=1))
319
+
320
+ v_b = s2_t_b / n_t_b + s2_c_b / n_c_b
321
+ weight = n_b / n_total
322
+ variance_total += weight**2 * v_b
323
+
324
+ return variance_total
325
+
326
+ def estimate(self) -> Results:
327
+ """Return a Results object with the ATE, SE, CI, and p-value.
328
+
329
+ Returns
330
+ -------
331
+ Results
332
+ Results with:
333
+
334
+ - ``ate`` set to the observed point estimate;
335
+ - ``se`` set to ``sqrt(V_hat)``;
336
+ - ``ci`` set to the two-sided ``(1 - alpha) * 100%`` Wald CI;
337
+ - ``p_value`` set to the two-sided Wald p-value;
338
+ - ``alpha`` set to ``self.alpha``;
339
+ - ``inference_name="NeymanCI"``;
340
+ - ``extra`` containing ``variance_type`` and ``inference_mode``.
341
+
342
+ Raises
343
+ ------
344
+ NotFittedError
345
+ If ``fit`` has not been called.
346
+ InvalidDesignError
347
+ If the standard error is zero (degenerate case: constant
348
+ outcomes within each arm).
349
+ """
350
+ self._check_is_fitted()
351
+
352
+ se = float(np.sqrt(self.variance_))
353
+
354
+ if se == 0.0:
355
+ raise InvalidDesignError(
356
+ "NeymanCI: the estimated standard error is zero, indicating "
357
+ "a degenerate dataset (constant outcomes within each arm). "
358
+ "Cannot compute a confidence interval or p-value."
359
+ )
360
+
361
+ z_crit = float(norm.ppf(1.0 - self.alpha / 2.0))
362
+ ci_lower = self._ate - z_crit * se
363
+ ci_upper = self._ate + z_crit * se
364
+
365
+ z_stat = self._ate / se
366
+ p_value = float(2.0 * (1.0 - norm.cdf(abs(z_stat))))
367
+ # Clamp to [0, 1] for numerical safety.
368
+ p_value = max(0.0, min(1.0, p_value))
369
+
370
+ return Results(
371
+ ate=self._ate,
372
+ se=se,
373
+ ci=(ci_lower, ci_upper),
374
+ p_value=p_value,
375
+ alpha=self.alpha,
376
+ n_obs=self._n_obs,
377
+ n_treated=self._n_treated,
378
+ n_control=self._n_control,
379
+ estimator_name=self._estimator_name,
380
+ design_name=self._design_name,
381
+ inference_name=type(self).__name__,
382
+ extra={
383
+ "variance_type": self._variance_type,
384
+ "inference_mode": self._inference_mode,
385
+ },
386
+ )
@@ -0,0 +1,319 @@
1
+ """Randomization-based inference via Fisher's sharp null hypothesis.
2
+
3
+ Implements ``RandomizationTest``: for each of ``n_permutations``
4
+ draws, generates a fresh ``Assignment`` via ``BaseAssignment.draw()``
5
+ (which respects the original randomization mechanism — including
6
+ rerandomization Mahalanobis criteria and within-block proportions —
7
+ because each Assignment subclass routes ``draw()`` through its
8
+ generating design), refits the estimator, and collects the resulting
9
+ ATE under the sharp null of no individual treatment effect.
10
+
11
+ The p-value is computed with the Phipson & Smyth (2010) continuity
12
+ correction, ``(1 + n_extreme) / (1 + n_permutations)``, which
13
+ guarantees a valid Monte Carlo p-value bounded away from zero.
14
+
15
+ References
16
+ ----------
17
+ Fisher, R. A. (1935). The Design of Experiments. Oliver and Boyd.
18
+ Phipson, B., & Smyth, G. K. (2010). Permutation P-values should never
19
+ be zero: calculating exact P-values when permutations are randomly
20
+ drawn. Statistical Applications in Genetics and Molecular Biology,
21
+ 9(1), Article 39.
22
+ """
23
+
24
+ import numpy as np
25
+
26
+ from skxperiments.core.assignment import (
27
+ BlockedAssignment,
28
+ CRDAssignment,
29
+ )
30
+ from skxperiments.core.base import BaseEstimator, BaseInference
31
+ from skxperiments.core.exceptions import InvalidDesignError
32
+ from skxperiments.core.results import Results
33
+
34
+
35
+ class RandomizationTest(BaseInference):
36
+ """Fisher randomization test via Monte Carlo permutations.
37
+
38
+ Tests Fisher's sharp null hypothesis,
39
+ ``H0: Y_i(1) = Y_i(0)`` for all i, by generating
40
+ ``n_permutations`` fresh assignments from the same randomization
41
+ mechanism that produced the observed assignment, refitting the
42
+ estimator on each, and comparing the observed ATE to the resulting
43
+ null distribution.
44
+
45
+ The p-value uses the Phipson & Smyth (2010) continuity correction:
46
+
47
+ p = (1 + n_extreme) / (1 + n_permutations)
48
+
49
+ which guarantees a valid Monte Carlo p-value strictly greater than
50
+ zero.
51
+
52
+ Parameters
53
+ ----------
54
+ estimator : BaseEstimator
55
+ Causal estimator producing a scalar ATE (``Results.ate``).
56
+ Need not be pre-fitted: ``RandomizationTest.fit`` will refit
57
+ it on the supplied assignment. Estimators producing
58
+ multi-effect ``Results`` (e.g., ``FactorialEstimator``) are
59
+ not supported in v1; ``fit`` will raise ``InvalidDesignError``
60
+ if the estimator returns ``Results.ate is None``.
61
+ n_permutations : int, optional
62
+ Number of Monte Carlo permutations, by default 10_000. Must
63
+ be a positive integer.
64
+ alternative : {"two-sided", "greater", "less"}, optional
65
+ Alternative hypothesis, by default ``"two-sided"``.
66
+
67
+ - ``"two-sided"`` uses the criterion ``|T_perm| >= |T_obs|``.
68
+ Valid under any null distribution shape but most natural
69
+ when the null is approximately symmetric around zero (the
70
+ typical case under CRD with Fisher's sharp null and
71
+ balanced sample sizes). Under ``BlockedAssignment`` with
72
+ highly unequal blocks or ``ReRandomizedCRD`` with a tight
73
+ threshold, the null distribution may be slightly asymmetric;
74
+ the absolute-value criterion remains valid and slightly
75
+ conservative. For directional hypotheses with strong
76
+ expected asymmetry, prefer ``"greater"`` or ``"less"``.
77
+ - ``"greater"`` uses ``T_perm >= T_obs``.
78
+ - ``"less"`` uses ``T_perm <= T_obs``.
79
+
80
+ seed : int or None, optional
81
+ Random seed for reproducibility. The same ``seed`` produces
82
+ the same ``null_distribution_``. Internally, a single
83
+ ``np.random.default_rng(seed)`` pre-generates one seed per
84
+ permutation, which is then passed to ``Assignment.draw``.
85
+ By default None.
86
+
87
+ Attributes
88
+ ----------
89
+ assignment_ : CRDAssignment or BlockedAssignment
90
+ The assignment passed to ``fit``.
91
+ observed_statistic_ : float
92
+ ATE estimated by the estimator on the original assignment,
93
+ captured before the permutation loop runs.
94
+ null_distribution_ : np.ndarray
95
+ Array of permuted ATEs under the sharp null. Length equals
96
+ ``n_permutations``.
97
+ p_value_ : float
98
+ Monte Carlo p-value computed with the Phipson & Smyth
99
+ continuity correction.
100
+
101
+ Notes
102
+ -----
103
+ **Sharp null vs. Neyman null.** This class tests Fisher's sharp
104
+ null of no individual treatment effect, not Neyman's null of zero
105
+ average treatment effect. ``BootstrapCI`` (Phase 4.4) will offer
106
+ superpopulation inference.
107
+
108
+ **Rerandomization.** When ``assignment`` is a ``CRDAssignment``
109
+ produced by ``ReRandomizedCRD``, each permutation respects the
110
+ Mahalanobis acceptance criterion automatically: ``CRDAssignment.draw``
111
+ routes through ``ReRandomizedCRD._randomize_with_cached_cov``, which
112
+ reuses the cached covariance matrix without recomputation.
113
+
114
+ **Blocking.** When ``assignment`` is a ``BlockedAssignment``, each
115
+ permutation rerandomizes within blocks, preserving the within-block
116
+ treatment proportion. This is the correct null distribution for the
117
+ blocked design.
118
+
119
+ **Estimator state after `fit`.** The permutation loop refits
120
+ ``self.estimator`` ``n_permutations`` times. After ``fit``
121
+ completes, ``self.estimator`` is in the state of the *last*
122
+ permutation, not the original assignment. To inspect the estimator
123
+ on the original assignment, refit manually:
124
+ ``rt.estimator.fit(rt.assignment_)``. The ``Results`` returned by
125
+ ``estimate()`` is unaffected: it uses the observed statistic and
126
+ metadata captured during ``fit`` before the loop runs.
127
+
128
+ **Refit semantics.** Any prior fit state of ``estimator`` is
129
+ discarded. Passing an estimator already fitted on a different
130
+ dataset is allowed; it will be silently refitted on the assignment
131
+ passed to ``fit``.
132
+
133
+ **Future work (v2).** A ``"two-sided-conservative"`` alternative
134
+ using ``2 * min(p_greater, p_less)`` may be added for cases with
135
+ strong null asymmetry. Exact enumeration of all permutations for
136
+ small N is also deferred to v2.
137
+
138
+ Examples
139
+ --------
140
+ >>> from skxperiments.design.crd import CRD
141
+ >>> from skxperiments.estimators.difference_in_means import (
142
+ ... DifferenceInMeans,
143
+ ... )
144
+ >>> from skxperiments.inference import RandomizationTest
145
+ >>> design = CRD(p=0.5, seed=42)
146
+ >>> assignment = design.randomize(df) # doctest: +SKIP
147
+ >>> dim = DifferenceInMeans(outcome_col="y")
148
+ >>> rt = RandomizationTest(estimator=dim, n_permutations=10_000, seed=0)
149
+ >>> result = rt.fit(assignment).estimate() # doctest: +SKIP
150
+ >>> result.p_value # doctest: +SKIP
151
+ """
152
+
153
+ _VALID_ALTERNATIVES = ("two-sided", "greater", "less")
154
+
155
+ def __init__(
156
+ self,
157
+ estimator: BaseEstimator,
158
+ n_permutations: int = 10_000,
159
+ alternative: str = "two-sided",
160
+ seed: int | None = None,
161
+ ) -> None:
162
+ if not isinstance(estimator, BaseEstimator):
163
+ raise InvalidDesignError(
164
+ f"estimator must be an instance of BaseEstimator, got "
165
+ f"{type(estimator).__name__}."
166
+ )
167
+
168
+ if not isinstance(n_permutations, int) or isinstance(
169
+ n_permutations, bool
170
+ ):
171
+ raise InvalidDesignError(
172
+ f"n_permutations must be an integer, got "
173
+ f"{type(n_permutations).__name__}."
174
+ )
175
+ if n_permutations <= 0:
176
+ raise InvalidDesignError(
177
+ f"n_permutations must be > 0, got {n_permutations}."
178
+ )
179
+
180
+ if alternative not in self._VALID_ALTERNATIVES:
181
+ raise InvalidDesignError(
182
+ f"alternative must be one of {self._VALID_ALTERNATIVES}, "
183
+ f"got {alternative!r}."
184
+ )
185
+
186
+ self.estimator = estimator
187
+ self.n_permutations = n_permutations
188
+ self.alternative = alternative
189
+ self.seed = seed
190
+
191
+ def fit(
192
+ self,
193
+ assignment: CRDAssignment | BlockedAssignment,
194
+ ) -> "RandomizationTest":
195
+ """Run the permutation loop and compute the p-value.
196
+
197
+ Parameters
198
+ ----------
199
+ assignment : CRDAssignment or BlockedAssignment
200
+ Observed assignment. ``FactorialAssignment`` is rejected
201
+ with ``DesignEstimatorMismatch``.
202
+
203
+ Returns
204
+ -------
205
+ RandomizationTest
206
+ Returns self.
207
+
208
+ Raises
209
+ ------
210
+ DesignEstimatorMismatch
211
+ If ``assignment`` is not a ``CRDAssignment`` or
212
+ ``BlockedAssignment``.
213
+ InvalidDesignError
214
+ If the estimator produces a multi-effect ``Results``
215
+ (i.e., ``Results.ate is None``); v1 supports only scalar
216
+ estimands.
217
+ """
218
+ self._validate_assignment_type(
219
+ assignment, (CRDAssignment, BlockedAssignment)
220
+ )
221
+
222
+ # Refit on the original assignment to compute the observed
223
+ # statistic. Any prior fit state of self.estimator is discarded.
224
+ self.estimator.fit(assignment)
225
+ base_results = self.estimator.estimate()
226
+
227
+ if base_results.ate is None:
228
+ raise InvalidDesignError(
229
+ "RandomizationTest v1 supports only estimators producing "
230
+ "a scalar ATE (Results.ate). The supplied estimator "
231
+ f"({type(self.estimator).__name__}) produced a "
232
+ "multi-effect Results (Results.effects). Multi-effect "
233
+ "support is planned for v2."
234
+ )
235
+
236
+ observed_statistic = float(base_results.ate)
237
+
238
+ # Capture metadata BEFORE the loop runs, so estimate() does
239
+ # not depend on the post-loop state of self.estimator.
240
+ self._n_obs = base_results.n_obs
241
+ self._n_treated = base_results.n_treated
242
+ self._n_control = base_results.n_control
243
+ self._estimator_name = base_results.estimator_name
244
+ self._design_name = base_results.design_name
245
+
246
+ # Pre-generate one seed per permutation for reproducibility:
247
+ # same self.seed -> same null_distribution_.
248
+ rng = np.random.default_rng(self.seed)
249
+ permutation_seeds = rng.integers(
250
+ 0, 2**32, size=self.n_permutations
251
+ )
252
+
253
+ null_distribution = np.empty(self.n_permutations, dtype=float)
254
+ for i, perm_seed in enumerate(permutation_seeds):
255
+ perm_assignment = assignment.draw(seed=int(perm_seed))
256
+ self.estimator.fit(perm_assignment)
257
+ null_distribution[i] = self.estimator.estimate().ate
258
+
259
+ # Phipson & Smyth (2010) continuity correction.
260
+ if self.alternative == "greater":
261
+ n_extreme = int(np.sum(null_distribution >= observed_statistic))
262
+ elif self.alternative == "less":
263
+ n_extreme = int(np.sum(null_distribution <= observed_statistic))
264
+ else: # "two-sided"
265
+ n_extreme = int(
266
+ np.sum(np.abs(null_distribution) >= abs(observed_statistic))
267
+ )
268
+
269
+ p_value = (1 + n_extreme) / (1 + self.n_permutations)
270
+
271
+ self.assignment_: CRDAssignment | BlockedAssignment = assignment
272
+ self.observed_statistic_: float = observed_statistic
273
+ self.null_distribution_: np.ndarray = null_distribution
274
+ self.p_value_: float = float(p_value)
275
+
276
+ return self
277
+
278
+ def estimate(self) -> Results:
279
+ """Return a Results object with the observed ATE and p-value.
280
+
281
+ Reads metadata from attributes captured during ``fit`` (before
282
+ the permutation loop ran), not from ``self.estimator``, which
283
+ is in the state of the last permutation after ``fit`` completes.
284
+
285
+ Returns
286
+ -------
287
+ Results
288
+ Results with:
289
+
290
+ - ``ate`` set to the observed statistic;
291
+ - ``p_value`` set to the Monte Carlo p-value;
292
+ - ``inference_name="RandomizationTest"``;
293
+ - ``extra`` containing ``n_permutations``,
294
+ ``null_distribution``, ``alternative``;
295
+ - ``se`` and ``ci`` set to ``None`` (RandomizationTest
296
+ produces only a p-value).
297
+
298
+ Raises
299
+ ------
300
+ NotFittedError
301
+ If ``fit`` has not been called.
302
+ """
303
+ self._check_is_fitted()
304
+
305
+ return Results(
306
+ ate=self.observed_statistic_,
307
+ p_value=self.p_value_,
308
+ n_obs=self._n_obs,
309
+ n_treated=self._n_treated,
310
+ n_control=self._n_control,
311
+ estimator_name=self._estimator_name,
312
+ design_name=self._design_name,
313
+ inference_name=type(self).__name__,
314
+ extra={
315
+ "n_permutations": self.n_permutations,
316
+ "null_distribution": self.null_distribution_,
317
+ "alternative": self.alternative,
318
+ },
319
+ )