skxperiments 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. skxperiments/__init__.py +5 -0
  2. skxperiments/core/__init__.py +42 -0
  3. skxperiments/core/assignment.py +589 -0
  4. skxperiments/core/base.py +512 -0
  5. skxperiments/core/exceptions.py +145 -0
  6. skxperiments/core/potential_outcomes.py +168 -0
  7. skxperiments/core/results.py +624 -0
  8. skxperiments/design/__init__.py +22 -0
  9. skxperiments/design/balance.py +182 -0
  10. skxperiments/design/blocked_crd.py +157 -0
  11. skxperiments/design/crd.py +162 -0
  12. skxperiments/design/factorial.py +174 -0
  13. skxperiments/design/power.py +233 -0
  14. skxperiments/design/rerandomized_crd.py +319 -0
  15. skxperiments/diagnostics/__init__.py +21 -0
  16. skxperiments/diagnostics/aa_test.py +277 -0
  17. skxperiments/diagnostics/balance_report.py +224 -0
  18. skxperiments/diagnostics/srm.py +327 -0
  19. skxperiments/estimators/__init__.py +23 -0
  20. skxperiments/estimators/blocked_difference_in_means.py +197 -0
  21. skxperiments/estimators/cuped.py +280 -0
  22. skxperiments/estimators/difference_in_means.py +161 -0
  23. skxperiments/estimators/factorial_estimator.py +213 -0
  24. skxperiments/estimators/lin_estimator.py +298 -0
  25. skxperiments/inference/__init__.py +17 -0
  26. skxperiments/inference/bootstrap.py +450 -0
  27. skxperiments/inference/multiple.py +365 -0
  28. skxperiments/inference/neyman.py +386 -0
  29. skxperiments/inference/randomization_test.py +319 -0
  30. skxperiments/pipeline.py +366 -0
  31. skxperiments/reporting/__init__.py +30 -0
  32. skxperiments/reporting/plots.py +411 -0
  33. skxperiments/reporting/summary.py +185 -0
  34. skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
  35. skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
  36. skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
@@ -0,0 +1,233 @@
1
+ """Power analysis for two-sample experiments with continuous outcomes.
2
+
3
+ Provides ``power_analysis``, a standalone function that solves for any
4
+ one of (sample size, minimum detectable effect, power) given the other
5
+ two, under the normal-approximation framework for the difference of
6
+ two means.
7
+
8
+ Scope (v1):
9
+ - Two groups (treated and control), continuous outcome.
10
+ - Test of mean difference, two-sided by default.
11
+ - Asymptotic normal approximation; valid for large n. Does not use
12
+ the t distribution.
13
+ - Designs other than two-arm CRD (blocked, factorial, cluster,
14
+ sequential) are out of scope; binary outcomes are out of scope.
15
+ """
16
+
17
+ import math
18
+ from dataclasses import dataclass
19
+
20
+ from scipy.stats import norm
21
+
22
+ from skxperiments.core.exceptions import InvalidDesignError
23
+
24
+
25
+ @dataclass
26
+ class PowerResult:
27
+ """Result of a power analysis.
28
+
29
+ All fields are populated, including those that were inputs to the
30
+ call. This makes the result self-describing and convenient for
31
+ logging or downstream reuse.
32
+
33
+ Attributes
34
+ ----------
35
+ n_total : int
36
+ Total sample size.
37
+ n_treated : int
38
+ Number of units allocated to treatment.
39
+ n_control : int
40
+ Number of units allocated to control. Always satisfies
41
+ ``n_treated + n_control == n_total``.
42
+ mde : float
43
+ Minimum detectable effect (positive by convention).
44
+ power : float
45
+ Statistical power (1 - beta) in (0, 1).
46
+ alpha : float
47
+ Significance level in (0, 1).
48
+ std : float
49
+ Standard deviation of the outcome (assumed equal across groups).
50
+ allocation : float
51
+ Proportion of units allocated to treatment.
52
+ two_sided : bool
53
+ Whether the test is two-sided.
54
+ """
55
+
56
+ n_total: int
57
+ n_treated: int
58
+ n_control: int
59
+ mde: float
60
+ power: float
61
+ alpha: float
62
+ std: float
63
+ allocation: float
64
+ two_sided: bool
65
+
66
+
67
+ def power_analysis(
68
+ *,
69
+ n: int | None = None,
70
+ mde: float | None = None,
71
+ power: float | None = None,
72
+ std: float,
73
+ alpha: float = 0.05,
74
+ allocation: float = 0.5,
75
+ two_sided: bool = True,
76
+ ) -> PowerResult:
77
+ """Solve for sample size, MDE, or power in a two-sample experiment.
78
+
79
+ Exactly one of ``n``, ``mde``, ``power`` must be ``None`` — that
80
+ is the quantity to be resolved. The other two must be provided.
81
+
82
+ Parameters
83
+ ----------
84
+ n : int or None, optional
85
+ Total sample size. Pass None to solve for n. By default None.
86
+ mde : float or None, optional
87
+ Minimum detectable effect (mean difference). Pass None to
88
+ solve for MDE. By default None.
89
+ power : float or None, optional
90
+ Desired power (1 - beta). Pass None to solve for power. By
91
+ default None.
92
+ std : float
93
+ Standard deviation of the outcome, assumed equal across
94
+ groups. Required.
95
+ alpha : float, optional
96
+ Significance level, by default 0.05.
97
+ allocation : float, optional
98
+ Proportion of units allocated to treatment, in (0, 1), by
99
+ default 0.5.
100
+ two_sided : bool, optional
101
+ Whether the test is two-sided, by default True.
102
+
103
+ Returns
104
+ -------
105
+ PowerResult
106
+ Self-describing result with all fields populated.
107
+
108
+ Raises
109
+ ------
110
+ InvalidDesignError
111
+ If more than one (or none) of ``n``, ``mde``, ``power`` is
112
+ None; if ``alpha`` or ``allocation`` are not in (0, 1); if
113
+ ``power`` (when provided) is not in (0, 1); if ``std <= 0``;
114
+ if ``mde == 0`` when provided; if ``n`` is not a positive
115
+ integer when provided.
116
+
117
+ Notes
118
+ -----
119
+ Under the normal approximation for the difference of two
120
+ independent means with common variance ``std**2``:
121
+
122
+ sigma_eff = std * sqrt(1/allocation + 1/(1 - allocation))
123
+
124
+ With ``z_alpha = Phi^{-1}(1 - alpha/2)`` for two-sided tests
125
+ (or ``Phi^{-1}(1 - alpha)`` for one-sided) and
126
+ ``z_beta = Phi^{-1}(power)``:
127
+
128
+ n_total = ceil(((z_alpha + z_beta) * sigma_eff / mde)**2)
129
+ mde = (z_alpha + z_beta) * sigma_eff / sqrt(n_total)
130
+ power = Phi(sqrt(n_total) * |mde| / sigma_eff - z_alpha)
131
+
132
+ See Cohen (1988), *Statistical Power Analysis for the Behavioral
133
+ Sciences*, for derivations.
134
+
135
+ Examples
136
+ --------
137
+ >>> result = power_analysis(
138
+ ... mde=0.2, power=0.8, std=1.0, alpha=0.05
139
+ ... )
140
+ >>> result.n_total # close to 394 by classical formula
141
+ 394
142
+ """
143
+ # --- Validate which target to solve for -----------------------
144
+ targets_none = sum(x is None for x in (n, mde, power))
145
+ if targets_none != 1:
146
+ raise InvalidDesignError(
147
+ f"power_analysis requires exactly one of n, mde, power to be "
148
+ f"None; received {targets_none} None values. "
149
+ f"n={n!r}, mde={mde!r}, power={power!r}."
150
+ )
151
+
152
+ # --- Validate other parameters --------------------------------
153
+ if not (0.0 < alpha < 1.0):
154
+ raise InvalidDesignError(
155
+ f"alpha must be in (0, 1), but received {alpha}."
156
+ )
157
+
158
+ if not (0.0 < allocation < 1.0):
159
+ raise InvalidDesignError(
160
+ f"allocation must be in (0, 1), but received {allocation}."
161
+ )
162
+
163
+ if std <= 0:
164
+ raise InvalidDesignError(
165
+ f"std must be > 0, but received {std}."
166
+ )
167
+
168
+ if power is not None and not (0.0 < power < 1.0):
169
+ raise InvalidDesignError(
170
+ f"power must be in (0, 1), but received {power}."
171
+ )
172
+
173
+ if mde is not None and mde == 0:
174
+ raise InvalidDesignError(
175
+ "mde must be non-zero when provided; received 0."
176
+ )
177
+
178
+ if n is not None:
179
+ if not isinstance(n, int) or n <= 0:
180
+ raise InvalidDesignError(
181
+ f"n must be a positive integer when provided, "
182
+ f"but received {n!r}."
183
+ )
184
+
185
+ # --- Compute z-quantiles --------------------------------------
186
+ if two_sided:
187
+ z_alpha = float(norm.ppf(1.0 - alpha / 2.0))
188
+ else:
189
+ z_alpha = float(norm.ppf(1.0 - alpha))
190
+
191
+ sigma_eff = std * math.sqrt(1.0 / allocation + 1.0 / (1.0 - allocation))
192
+
193
+ # --- Solve for the missing target -----------------------------
194
+ if n is None:
195
+ # Solve for n_total given mde, power.
196
+ z_beta = float(norm.ppf(power))
197
+ mde_abs = abs(mde)
198
+ n_total = math.ceil(((z_alpha + z_beta) * sigma_eff / mde_abs) ** 2)
199
+ resolved_mde = abs(mde)
200
+ resolved_power = power
201
+
202
+ elif mde is None:
203
+ # Solve for mde given n, power.
204
+ z_beta = float(norm.ppf(power))
205
+ resolved_mde = (z_alpha + z_beta) * sigma_eff / math.sqrt(n)
206
+ # By convention, MDE is reported as positive.
207
+ resolved_mde = abs(resolved_mde)
208
+ n_total = n
209
+ resolved_power = power
210
+
211
+ else:
212
+ # Solve for power given n, mde.
213
+ mde_abs = abs(mde)
214
+ z_beta = math.sqrt(n) * mde_abs / sigma_eff - z_alpha
215
+ resolved_power = float(norm.cdf(z_beta))
216
+ n_total = n
217
+ resolved_mde = mde_abs
218
+
219
+ # --- Build allocation counts that sum exactly to n_total ------
220
+ n_treated = int(round(n_total * allocation))
221
+ n_control = n_total - n_treated
222
+
223
+ return PowerResult(
224
+ n_total=n_total,
225
+ n_treated=n_treated,
226
+ n_control=n_control,
227
+ mde=resolved_mde,
228
+ power=resolved_power,
229
+ alpha=alpha,
230
+ std=std,
231
+ allocation=allocation,
232
+ two_sided=two_sided,
233
+ )
@@ -0,0 +1,319 @@
1
+ """Rerandomized Completely Randomized Design (Morgan & Rubin 2012).
2
+
3
+ Repeatedly proposes CRD assignments and accepts the first one whose
4
+ between-group Mahalanobis distance on the specified covariates is
5
+ below a fixed threshold. Improves covariate balance over plain CRD
6
+ while preserving the validity of randomization-based inference, as
7
+ long as the same acceptance criterion is applied when generating
8
+ permutations under the null.
9
+ """
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from pandas.api.types import is_numeric_dtype
14
+
15
+ from skxperiments.core.assignment import CRDAssignment
16
+ from skxperiments.core.base import BaseDesign
17
+ from skxperiments.core.exceptions import (
18
+ InsufficientDataError,
19
+ InvalidDesignError,
20
+ )
21
+
22
+
23
+ class ReRandomizedCRD(BaseDesign):
24
+ """Rerandomized CRD with Mahalanobis acceptance criterion.
25
+
26
+ Treatment is randomized completely at random (as in CRD), but only
27
+ realizations whose Mahalanobis distance between treated and control
28
+ means on ``covariates`` is at most ``threshold`` are accepted.
29
+
30
+ Parameters
31
+ ----------
32
+ covariates : list of str
33
+ Names of covariates used to compute the Mahalanobis distance.
34
+ Must be non-empty, all numeric, and contain no NaN.
35
+ threshold : float
36
+ Maximum Mahalanobis distance for acceptance. Must be > 0.
37
+ Has the interpretation of a chi-squared quantile with ``k``
38
+ degrees of freedom, where ``k = len(covariates)`` (e.g.,
39
+ ``scipy.stats.chi2.ppf(0.01, df=k)`` accepts approximately
40
+ 1% of CRD randomizations under regularity conditions).
41
+ n_treated : int or None, optional
42
+ Number of treated units. Provide either ``n_treated`` or ``p``,
43
+ not both. By default None.
44
+ p : float or None, optional
45
+ Treatment proportion in (0, 1). Provide either ``n_treated`` or
46
+ ``p``, not both. By default None.
47
+ seed : int or None, optional
48
+ Random seed for reproducibility, by default None.
49
+ treatment_col : str, optional
50
+ Name of the treatment column added to the output, by default
51
+ ``"treatment"``.
52
+ max_attempts : int, optional
53
+ Maximum number of randomizations attempted before giving up,
54
+ by default 10_000.
55
+
56
+ Notes
57
+ -----
58
+ The Mahalanobis distance between treated and control means is
59
+ defined as (Morgan & Rubin 2012):
60
+
61
+ M = d^T [(1/n_T + 1/n_C) * S_X]^(-1) d
62
+
63
+ where ``d = mean_treated - mean_control`` and ``S_X`` is the sample
64
+ covariance (``ddof=1``) of the covariates over the full DataFrame.
65
+ The covariance matrix is computed once in ``randomize`` and reused
66
+ in every ``draw`` call to ensure the null distribution generated
67
+ by ``RandomizationTest`` respects the same acceptance criterion.
68
+
69
+ References
70
+ ----------
71
+ Morgan, K. L., & Rubin, D. B. (2012). Rerandomization to improve
72
+ covariate balance in experiments. Annals of Statistics, 40(2).
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ covariates: list[str],
78
+ threshold: float,
79
+ n_treated: int | None = None,
80
+ p: float | None = None,
81
+ seed: int | None = None,
82
+ treatment_col: str = "treatment",
83
+ max_attempts: int = 10_000,
84
+ ) -> None:
85
+ # n_treated XOR p
86
+ if n_treated is None and p is None:
87
+ raise InvalidDesignError(
88
+ "ReRandomizedCRD requires exactly one of n_treated or p; "
89
+ "both are None."
90
+ )
91
+ if n_treated is not None and p is not None:
92
+ raise InvalidDesignError(
93
+ "ReRandomizedCRD requires exactly one of n_treated or p; "
94
+ "both were provided."
95
+ )
96
+
97
+ if not isinstance(covariates, list) or len(covariates) == 0:
98
+ raise InvalidDesignError(
99
+ "ReRandomizedCRD requires a non-empty list of covariates."
100
+ )
101
+
102
+ if not isinstance(threshold, (int, float)) or threshold <= 0:
103
+ raise InvalidDesignError(
104
+ f"threshold must be > 0, but received {threshold}."
105
+ )
106
+
107
+ if p is not None and not (0.0 < p < 1.0):
108
+ raise InvalidDesignError(
109
+ f"Treatment proportion p must be in (0, 1), but received {p}."
110
+ )
111
+
112
+ if not isinstance(max_attempts, int) or max_attempts < 1:
113
+ raise InvalidDesignError(
114
+ f"max_attempts must be a positive integer, but received "
115
+ f"{max_attempts}."
116
+ )
117
+
118
+ self.covariates = covariates
119
+ self.threshold = threshold
120
+ self.n_treated = n_treated
121
+ self.p = p
122
+ self.seed = seed
123
+ self.treatment_col = treatment_col
124
+ self.max_attempts = max_attempts
125
+
126
+ def randomize(self, df: pd.DataFrame) -> CRDAssignment:
127
+ """Perform rerandomization and return a CRDAssignment.
128
+
129
+ Parameters
130
+ ----------
131
+ df : pd.DataFrame
132
+ DataFrame with experimental units. Must contain all
133
+ ``covariates`` and must not contain ``treatment_col``.
134
+
135
+ Returns
136
+ -------
137
+ CRDAssignment
138
+ Assignment whose Mahalanobis distance is at most
139
+ ``threshold``. ``rerandomization_metadata`` is populated.
140
+
141
+ Raises
142
+ ------
143
+ InvalidDesignError
144
+ For any validation failure listed in the class docstring,
145
+ or when ``max_attempts`` is reached.
146
+ InsufficientDataError
147
+ When ``len(df) < n_treated``.
148
+ """
149
+ n_total = len(df)
150
+
151
+ if self.treatment_col in df.columns:
152
+ raise InvalidDesignError(
153
+ f"Treatment column '{self.treatment_col}' already exists "
154
+ f"in DataFrame. Drop or rename it before calling randomize()."
155
+ )
156
+
157
+ # Validate covariates exist
158
+ missing = [c for c in self.covariates if c not in df.columns]
159
+ if missing:
160
+ raise InvalidDesignError(
161
+ f"Covariates not found in DataFrame: {missing}. "
162
+ f"Available columns: {list(df.columns)}."
163
+ )
164
+
165
+ # Validate numeric dtype
166
+ non_numeric = [
167
+ c for c in self.covariates if not is_numeric_dtype(df[c])
168
+ ]
169
+ if non_numeric:
170
+ raise InvalidDesignError(
171
+ f"Covariates must be numeric: {non_numeric} are not."
172
+ )
173
+
174
+ # Validate no NaN
175
+ cols_with_nan = [c for c in self.covariates if df[c].isna().any()]
176
+ if cols_with_nan:
177
+ raise InvalidDesignError(
178
+ f"Covariates contain NaN values: {cols_with_nan}. "
179
+ f"Impute or drop NaN before calling randomize()."
180
+ )
181
+
182
+ # Resolve n_treated
183
+ if self.n_treated is not None:
184
+ if n_total < self.n_treated:
185
+ raise InsufficientDataError(
186
+ context="ReRandomizedCRD randomization",
187
+ minimum=self.n_treated,
188
+ received=n_total,
189
+ )
190
+ n_treated = self.n_treated
191
+ else:
192
+ n_treated = int(round(self.p * n_total))
193
+
194
+ if n_treated <= 0 or n_treated >= n_total:
195
+ raise InvalidDesignError(
196
+ f"Resolved n_treated={n_treated} for N={n_total}; must "
197
+ f"be strictly between 0 and N. Adjust n_treated or p."
198
+ )
199
+
200
+ # Compute covariance matrix once (ddof=1)
201
+ cov_matrix = df[self.covariates].cov(ddof=1).values
202
+
203
+ # Check for singularity
204
+ k = len(self.covariates)
205
+ rank = np.linalg.matrix_rank(cov_matrix)
206
+ if rank < k:
207
+ raise InvalidDesignError(
208
+ f"Covariance matrix of covariates is singular "
209
+ f"(rank {rank} < {k}). Covariates are likely collinear; "
210
+ f"remove redundant variables."
211
+ )
212
+
213
+ rng = np.random.default_rng(self.seed)
214
+
215
+ # Pre-resolve target n_treated and store on self for the loop
216
+ # to use without recomputation.
217
+ self._resolved_n_treated = n_treated
218
+
219
+ return self._randomize_with_cached_cov(
220
+ df=df,
221
+ cov_matrix=cov_matrix,
222
+ rng=rng,
223
+ )
224
+
225
+ def _randomize_with_cached_cov(
226
+ self,
227
+ df: pd.DataFrame,
228
+ cov_matrix: np.ndarray,
229
+ rng: np.random.Generator,
230
+ ) -> CRDAssignment:
231
+ """Acceptance/rejection loop with a pre-computed covariance matrix.
232
+
233
+ Used by ``randomize`` and reused by ``CRDAssignment.draw`` to
234
+ avoid recomputing the covariance matrix.
235
+
236
+ Parameters
237
+ ----------
238
+ df : pd.DataFrame
239
+ DataFrame without the treatment column.
240
+ cov_matrix : np.ndarray
241
+ Sample covariance matrix of covariates (ddof=1), already
242
+ computed once.
243
+ rng : np.random.Generator
244
+ Random generator driving the loop.
245
+
246
+ Returns
247
+ -------
248
+ CRDAssignment
249
+ Accepted assignment with metadata populated.
250
+
251
+ Raises
252
+ ------
253
+ InvalidDesignError
254
+ If ``max_attempts`` is reached without acceptance.
255
+ """
256
+ n_total = len(df)
257
+
258
+ # Resolve n_treated. When called from randomize(), it was set
259
+ # via self._resolved_n_treated. When called from draw(), we
260
+ # rederive it from self.n_treated / self.p.
261
+ n_treated = getattr(self, "_resolved_n_treated", None)
262
+ if n_treated is None:
263
+ if self.n_treated is not None:
264
+ n_treated = self.n_treated
265
+ else:
266
+ n_treated = int(round(self.p * n_total))
267
+
268
+ n_control = n_total - n_treated
269
+ scaling_factor = 1.0 / n_treated + 1.0 / n_control
270
+
271
+ scaled_cov = scaling_factor * cov_matrix
272
+ try:
273
+ inv_scaled_cov = np.linalg.inv(scaled_cov)
274
+ except np.linalg.LinAlgError as exc:
275
+ raise InvalidDesignError(
276
+ f"Failed to invert scaled covariance matrix: {exc}. "
277
+ f"Covariates may be collinear."
278
+ ) from None
279
+
280
+ cov_values = df[self.covariates].values
281
+
282
+ for attempt in range(1, self.max_attempts + 1):
283
+ treatment = np.zeros(n_total, dtype=int)
284
+ chosen = rng.choice(n_total, size=n_treated, replace=False)
285
+ treatment[chosen] = 1
286
+
287
+ treated_mask = treatment == 1
288
+ mean_t = cov_values[treated_mask].mean(axis=0)
289
+ mean_c = cov_values[~treated_mask].mean(axis=0)
290
+ d = mean_t - mean_c
291
+
292
+ distance = float(d @ inv_scaled_cov @ d)
293
+
294
+ if distance <= self.threshold:
295
+ df_out = df.copy()
296
+ df_out[self.treatment_col] = treatment
297
+
298
+ metadata = {
299
+ "covariates": list(self.covariates),
300
+ "threshold": float(self.threshold),
301
+ "cov_matrix": cov_matrix,
302
+ "attempts": attempt,
303
+ "scaling_factor": scaling_factor,
304
+ }
305
+
306
+ return CRDAssignment(
307
+ data=df_out,
308
+ treatment_col=self.treatment_col,
309
+ design=self,
310
+ seed=self.seed,
311
+ rerandomization_metadata=metadata,
312
+ )
313
+
314
+ raise InvalidDesignError(
315
+ f"ReRandomizedCRD failed to find an assignment with "
316
+ f"Mahalanobis distance <= {self.threshold} after "
317
+ f"{self.max_attempts} attempts. Increase threshold or "
318
+ f"max_attempts."
319
+ )
@@ -0,0 +1,21 @@
1
+ """Diagnostics module for checking design and estimation assumptions.
2
+
3
+ This module contains tools for balance checks, covariate diagnostics,
4
+ and other pre- and post-estimation diagnostics.
5
+ """
6
+
7
+ from skxperiments.diagnostics.aa_test import AAResult, AATest
8
+ from skxperiments.diagnostics.balance_report import (
9
+ BalanceReport,
10
+ BalanceResult,
11
+ )
12
+ from skxperiments.diagnostics.srm import SRMResult, SRMTest
13
+
14
+ __all__ = [
15
+ "AAResult",
16
+ "AATest",
17
+ "BalanceReport",
18
+ "BalanceResult",
19
+ "SRMResult",
20
+ "SRMTest",
21
+ ]