skxperiments 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skxperiments/__init__.py +5 -0
- skxperiments/core/__init__.py +42 -0
- skxperiments/core/assignment.py +589 -0
- skxperiments/core/base.py +512 -0
- skxperiments/core/exceptions.py +145 -0
- skxperiments/core/potential_outcomes.py +168 -0
- skxperiments/core/results.py +624 -0
- skxperiments/design/__init__.py +22 -0
- skxperiments/design/balance.py +182 -0
- skxperiments/design/blocked_crd.py +157 -0
- skxperiments/design/crd.py +162 -0
- skxperiments/design/factorial.py +174 -0
- skxperiments/design/power.py +233 -0
- skxperiments/design/rerandomized_crd.py +319 -0
- skxperiments/diagnostics/__init__.py +21 -0
- skxperiments/diagnostics/aa_test.py +277 -0
- skxperiments/diagnostics/balance_report.py +224 -0
- skxperiments/diagnostics/srm.py +327 -0
- skxperiments/estimators/__init__.py +23 -0
- skxperiments/estimators/blocked_difference_in_means.py +197 -0
- skxperiments/estimators/cuped.py +280 -0
- skxperiments/estimators/difference_in_means.py +161 -0
- skxperiments/estimators/factorial_estimator.py +213 -0
- skxperiments/estimators/lin_estimator.py +298 -0
- skxperiments/inference/__init__.py +17 -0
- skxperiments/inference/bootstrap.py +450 -0
- skxperiments/inference/multiple.py +365 -0
- skxperiments/inference/neyman.py +386 -0
- skxperiments/inference/randomization_test.py +319 -0
- skxperiments/pipeline.py +366 -0
- skxperiments/reporting/__init__.py +30 -0
- skxperiments/reporting/plots.py +411 -0
- skxperiments/reporting/summary.py +185 -0
- skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
- skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
- skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Power analysis for two-sample experiments with continuous outcomes.
|
|
2
|
+
|
|
3
|
+
Provides ``power_analysis``, a standalone function that solves for any
|
|
4
|
+
one of (sample size, minimum detectable effect, power) given the other
|
|
5
|
+
two, under the normal-approximation framework for the difference of
|
|
6
|
+
two means.
|
|
7
|
+
|
|
8
|
+
Scope (v1):
|
|
9
|
+
- Two groups (treated and control), continuous outcome.
|
|
10
|
+
- Test of mean difference, two-sided by default.
|
|
11
|
+
- Asymptotic normal approximation; valid for large n. Does not use
|
|
12
|
+
the t distribution.
|
|
13
|
+
- Designs other than two-arm CRD (blocked, factorial, cluster,
|
|
14
|
+
sequential) are out of scope; binary outcomes are out of scope.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
|
|
20
|
+
from scipy.stats import norm
|
|
21
|
+
|
|
22
|
+
from skxperiments.core.exceptions import InvalidDesignError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PowerResult:
|
|
27
|
+
"""Result of a power analysis.
|
|
28
|
+
|
|
29
|
+
All fields are populated, including those that were inputs to the
|
|
30
|
+
call. This makes the result self-describing and convenient for
|
|
31
|
+
logging or downstream reuse.
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
n_total : int
|
|
36
|
+
Total sample size.
|
|
37
|
+
n_treated : int
|
|
38
|
+
Number of units allocated to treatment.
|
|
39
|
+
n_control : int
|
|
40
|
+
Number of units allocated to control. Always satisfies
|
|
41
|
+
``n_treated + n_control == n_total``.
|
|
42
|
+
mde : float
|
|
43
|
+
Minimum detectable effect (positive by convention).
|
|
44
|
+
power : float
|
|
45
|
+
Statistical power (1 - beta) in (0, 1).
|
|
46
|
+
alpha : float
|
|
47
|
+
Significance level in (0, 1).
|
|
48
|
+
std : float
|
|
49
|
+
Standard deviation of the outcome (assumed equal across groups).
|
|
50
|
+
allocation : float
|
|
51
|
+
Proportion of units allocated to treatment.
|
|
52
|
+
two_sided : bool
|
|
53
|
+
Whether the test is two-sided.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
n_total: int
|
|
57
|
+
n_treated: int
|
|
58
|
+
n_control: int
|
|
59
|
+
mde: float
|
|
60
|
+
power: float
|
|
61
|
+
alpha: float
|
|
62
|
+
std: float
|
|
63
|
+
allocation: float
|
|
64
|
+
two_sided: bool
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def power_analysis(
|
|
68
|
+
*,
|
|
69
|
+
n: int | None = None,
|
|
70
|
+
mde: float | None = None,
|
|
71
|
+
power: float | None = None,
|
|
72
|
+
std: float,
|
|
73
|
+
alpha: float = 0.05,
|
|
74
|
+
allocation: float = 0.5,
|
|
75
|
+
two_sided: bool = True,
|
|
76
|
+
) -> PowerResult:
|
|
77
|
+
"""Solve for sample size, MDE, or power in a two-sample experiment.
|
|
78
|
+
|
|
79
|
+
Exactly one of ``n``, ``mde``, ``power`` must be ``None`` — that
|
|
80
|
+
is the quantity to be resolved. The other two must be provided.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
n : int or None, optional
|
|
85
|
+
Total sample size. Pass None to solve for n. By default None.
|
|
86
|
+
mde : float or None, optional
|
|
87
|
+
Minimum detectable effect (mean difference). Pass None to
|
|
88
|
+
solve for MDE. By default None.
|
|
89
|
+
power : float or None, optional
|
|
90
|
+
Desired power (1 - beta). Pass None to solve for power. By
|
|
91
|
+
default None.
|
|
92
|
+
std : float
|
|
93
|
+
Standard deviation of the outcome, assumed equal across
|
|
94
|
+
groups. Required.
|
|
95
|
+
alpha : float, optional
|
|
96
|
+
Significance level, by default 0.05.
|
|
97
|
+
allocation : float, optional
|
|
98
|
+
Proportion of units allocated to treatment, in (0, 1), by
|
|
99
|
+
default 0.5.
|
|
100
|
+
two_sided : bool, optional
|
|
101
|
+
Whether the test is two-sided, by default True.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
PowerResult
|
|
106
|
+
Self-describing result with all fields populated.
|
|
107
|
+
|
|
108
|
+
Raises
|
|
109
|
+
------
|
|
110
|
+
InvalidDesignError
|
|
111
|
+
If more than one (or none) of ``n``, ``mde``, ``power`` is
|
|
112
|
+
None; if ``alpha`` or ``allocation`` are not in (0, 1); if
|
|
113
|
+
``power`` (when provided) is not in (0, 1); if ``std <= 0``;
|
|
114
|
+
if ``mde == 0`` when provided; if ``n`` is not a positive
|
|
115
|
+
integer when provided.
|
|
116
|
+
|
|
117
|
+
Notes
|
|
118
|
+
-----
|
|
119
|
+
Under the normal approximation for the difference of two
|
|
120
|
+
independent means with common variance ``std**2``:
|
|
121
|
+
|
|
122
|
+
sigma_eff = std * sqrt(1/allocation + 1/(1 - allocation))
|
|
123
|
+
|
|
124
|
+
With ``z_alpha = Phi^{-1}(1 - alpha/2)`` for two-sided tests
|
|
125
|
+
(or ``Phi^{-1}(1 - alpha)`` for one-sided) and
|
|
126
|
+
``z_beta = Phi^{-1}(power)``:
|
|
127
|
+
|
|
128
|
+
n_total = ceil(((z_alpha + z_beta) * sigma_eff / mde)**2)
|
|
129
|
+
mde = (z_alpha + z_beta) * sigma_eff / sqrt(n_total)
|
|
130
|
+
power = Phi(sqrt(n_total) * |mde| / sigma_eff - z_alpha)
|
|
131
|
+
|
|
132
|
+
See Cohen (1988), *Statistical Power Analysis for the Behavioral
|
|
133
|
+
Sciences*, for derivations.
|
|
134
|
+
|
|
135
|
+
Examples
|
|
136
|
+
--------
|
|
137
|
+
>>> result = power_analysis(
|
|
138
|
+
... mde=0.2, power=0.8, std=1.0, alpha=0.05
|
|
139
|
+
... )
|
|
140
|
+
>>> result.n_total # close to 394 by classical formula
|
|
141
|
+
394
|
|
142
|
+
"""
|
|
143
|
+
# --- Validate which target to solve for -----------------------
|
|
144
|
+
targets_none = sum(x is None for x in (n, mde, power))
|
|
145
|
+
if targets_none != 1:
|
|
146
|
+
raise InvalidDesignError(
|
|
147
|
+
f"power_analysis requires exactly one of n, mde, power to be "
|
|
148
|
+
f"None; received {targets_none} None values. "
|
|
149
|
+
f"n={n!r}, mde={mde!r}, power={power!r}."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# --- Validate other parameters --------------------------------
|
|
153
|
+
if not (0.0 < alpha < 1.0):
|
|
154
|
+
raise InvalidDesignError(
|
|
155
|
+
f"alpha must be in (0, 1), but received {alpha}."
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if not (0.0 < allocation < 1.0):
|
|
159
|
+
raise InvalidDesignError(
|
|
160
|
+
f"allocation must be in (0, 1), but received {allocation}."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
if std <= 0:
|
|
164
|
+
raise InvalidDesignError(
|
|
165
|
+
f"std must be > 0, but received {std}."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if power is not None and not (0.0 < power < 1.0):
|
|
169
|
+
raise InvalidDesignError(
|
|
170
|
+
f"power must be in (0, 1), but received {power}."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if mde is not None and mde == 0:
|
|
174
|
+
raise InvalidDesignError(
|
|
175
|
+
"mde must be non-zero when provided; received 0."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if n is not None:
|
|
179
|
+
if not isinstance(n, int) or n <= 0:
|
|
180
|
+
raise InvalidDesignError(
|
|
181
|
+
f"n must be a positive integer when provided, "
|
|
182
|
+
f"but received {n!r}."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# --- Compute z-quantiles --------------------------------------
|
|
186
|
+
if two_sided:
|
|
187
|
+
z_alpha = float(norm.ppf(1.0 - alpha / 2.0))
|
|
188
|
+
else:
|
|
189
|
+
z_alpha = float(norm.ppf(1.0 - alpha))
|
|
190
|
+
|
|
191
|
+
sigma_eff = std * math.sqrt(1.0 / allocation + 1.0 / (1.0 - allocation))
|
|
192
|
+
|
|
193
|
+
# --- Solve for the missing target -----------------------------
|
|
194
|
+
if n is None:
|
|
195
|
+
# Solve for n_total given mde, power.
|
|
196
|
+
z_beta = float(norm.ppf(power))
|
|
197
|
+
mde_abs = abs(mde)
|
|
198
|
+
n_total = math.ceil(((z_alpha + z_beta) * sigma_eff / mde_abs) ** 2)
|
|
199
|
+
resolved_mde = abs(mde)
|
|
200
|
+
resolved_power = power
|
|
201
|
+
|
|
202
|
+
elif mde is None:
|
|
203
|
+
# Solve for mde given n, power.
|
|
204
|
+
z_beta = float(norm.ppf(power))
|
|
205
|
+
resolved_mde = (z_alpha + z_beta) * sigma_eff / math.sqrt(n)
|
|
206
|
+
# By convention, MDE is reported as positive.
|
|
207
|
+
resolved_mde = abs(resolved_mde)
|
|
208
|
+
n_total = n
|
|
209
|
+
resolved_power = power
|
|
210
|
+
|
|
211
|
+
else:
|
|
212
|
+
# Solve for power given n, mde.
|
|
213
|
+
mde_abs = abs(mde)
|
|
214
|
+
z_beta = math.sqrt(n) * mde_abs / sigma_eff - z_alpha
|
|
215
|
+
resolved_power = float(norm.cdf(z_beta))
|
|
216
|
+
n_total = n
|
|
217
|
+
resolved_mde = mde_abs
|
|
218
|
+
|
|
219
|
+
# --- Build allocation counts that sum exactly to n_total ------
|
|
220
|
+
n_treated = int(round(n_total * allocation))
|
|
221
|
+
n_control = n_total - n_treated
|
|
222
|
+
|
|
223
|
+
return PowerResult(
|
|
224
|
+
n_total=n_total,
|
|
225
|
+
n_treated=n_treated,
|
|
226
|
+
n_control=n_control,
|
|
227
|
+
mde=resolved_mde,
|
|
228
|
+
power=resolved_power,
|
|
229
|
+
alpha=alpha,
|
|
230
|
+
std=std,
|
|
231
|
+
allocation=allocation,
|
|
232
|
+
two_sided=two_sided,
|
|
233
|
+
)
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Rerandomized Completely Randomized Design (Morgan & Rubin 2012).
|
|
2
|
+
|
|
3
|
+
Repeatedly proposes CRD assignments and accepts the first one whose
|
|
4
|
+
between-group Mahalanobis distance on the specified covariates is
|
|
5
|
+
below a fixed threshold. Improves covariate balance over plain CRD
|
|
6
|
+
while preserving the validity of randomization-based inference, as
|
|
7
|
+
long as the same acceptance criterion is applied when generating
|
|
8
|
+
permutations under the null.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from pandas.api.types import is_numeric_dtype
|
|
14
|
+
|
|
15
|
+
from skxperiments.core.assignment import CRDAssignment
|
|
16
|
+
from skxperiments.core.base import BaseDesign
|
|
17
|
+
from skxperiments.core.exceptions import (
|
|
18
|
+
InsufficientDataError,
|
|
19
|
+
InvalidDesignError,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ReRandomizedCRD(BaseDesign):
|
|
24
|
+
"""Rerandomized CRD with Mahalanobis acceptance criterion.
|
|
25
|
+
|
|
26
|
+
Treatment is randomized completely at random (as in CRD), but only
|
|
27
|
+
realizations whose Mahalanobis distance between treated and control
|
|
28
|
+
means on ``covariates`` is at most ``threshold`` are accepted.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
covariates : list of str
|
|
33
|
+
Names of covariates used to compute the Mahalanobis distance.
|
|
34
|
+
Must be non-empty, all numeric, and contain no NaN.
|
|
35
|
+
threshold : float
|
|
36
|
+
Maximum Mahalanobis distance for acceptance. Must be > 0.
|
|
37
|
+
Has the interpretation of a chi-squared quantile with ``k``
|
|
38
|
+
degrees of freedom, where ``k = len(covariates)`` (e.g.,
|
|
39
|
+
``scipy.stats.chi2.ppf(0.01, df=k)`` accepts approximately
|
|
40
|
+
1% of CRD randomizations under regularity conditions).
|
|
41
|
+
n_treated : int or None, optional
|
|
42
|
+
Number of treated units. Provide either ``n_treated`` or ``p``,
|
|
43
|
+
not both. By default None.
|
|
44
|
+
p : float or None, optional
|
|
45
|
+
Treatment proportion in (0, 1). Provide either ``n_treated`` or
|
|
46
|
+
``p``, not both. By default None.
|
|
47
|
+
seed : int or None, optional
|
|
48
|
+
Random seed for reproducibility, by default None.
|
|
49
|
+
treatment_col : str, optional
|
|
50
|
+
Name of the treatment column added to the output, by default
|
|
51
|
+
``"treatment"``.
|
|
52
|
+
max_attempts : int, optional
|
|
53
|
+
Maximum number of randomizations attempted before giving up,
|
|
54
|
+
by default 10_000.
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
The Mahalanobis distance between treated and control means is
|
|
59
|
+
defined as (Morgan & Rubin 2012):
|
|
60
|
+
|
|
61
|
+
M = d^T [(1/n_T + 1/n_C) * S_X]^(-1) d
|
|
62
|
+
|
|
63
|
+
where ``d = mean_treated - mean_control`` and ``S_X`` is the sample
|
|
64
|
+
covariance (``ddof=1``) of the covariates over the full DataFrame.
|
|
65
|
+
The covariance matrix is computed once in ``randomize`` and reused
|
|
66
|
+
in every ``draw`` call to ensure the null distribution generated
|
|
67
|
+
by ``RandomizationTest`` respects the same acceptance criterion.
|
|
68
|
+
|
|
69
|
+
References
|
|
70
|
+
----------
|
|
71
|
+
Morgan, K. L., & Rubin, D. B. (2012). Rerandomization to improve
|
|
72
|
+
covariate balance in experiments. Annals of Statistics, 40(2).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
covariates: list[str],
|
|
78
|
+
threshold: float,
|
|
79
|
+
n_treated: int | None = None,
|
|
80
|
+
p: float | None = None,
|
|
81
|
+
seed: int | None = None,
|
|
82
|
+
treatment_col: str = "treatment",
|
|
83
|
+
max_attempts: int = 10_000,
|
|
84
|
+
) -> None:
|
|
85
|
+
# n_treated XOR p
|
|
86
|
+
if n_treated is None and p is None:
|
|
87
|
+
raise InvalidDesignError(
|
|
88
|
+
"ReRandomizedCRD requires exactly one of n_treated or p; "
|
|
89
|
+
"both are None."
|
|
90
|
+
)
|
|
91
|
+
if n_treated is not None and p is not None:
|
|
92
|
+
raise InvalidDesignError(
|
|
93
|
+
"ReRandomizedCRD requires exactly one of n_treated or p; "
|
|
94
|
+
"both were provided."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if not isinstance(covariates, list) or len(covariates) == 0:
|
|
98
|
+
raise InvalidDesignError(
|
|
99
|
+
"ReRandomizedCRD requires a non-empty list of covariates."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if not isinstance(threshold, (int, float)) or threshold <= 0:
|
|
103
|
+
raise InvalidDesignError(
|
|
104
|
+
f"threshold must be > 0, but received {threshold}."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if p is not None and not (0.0 < p < 1.0):
|
|
108
|
+
raise InvalidDesignError(
|
|
109
|
+
f"Treatment proportion p must be in (0, 1), but received {p}."
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if not isinstance(max_attempts, int) or max_attempts < 1:
|
|
113
|
+
raise InvalidDesignError(
|
|
114
|
+
f"max_attempts must be a positive integer, but received "
|
|
115
|
+
f"{max_attempts}."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
self.covariates = covariates
|
|
119
|
+
self.threshold = threshold
|
|
120
|
+
self.n_treated = n_treated
|
|
121
|
+
self.p = p
|
|
122
|
+
self.seed = seed
|
|
123
|
+
self.treatment_col = treatment_col
|
|
124
|
+
self.max_attempts = max_attempts
|
|
125
|
+
|
|
126
|
+
def randomize(self, df: pd.DataFrame) -> CRDAssignment:
|
|
127
|
+
"""Perform rerandomization and return a CRDAssignment.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
df : pd.DataFrame
|
|
132
|
+
DataFrame with experimental units. Must contain all
|
|
133
|
+
``covariates`` and must not contain ``treatment_col``.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
CRDAssignment
|
|
138
|
+
Assignment whose Mahalanobis distance is at most
|
|
139
|
+
``threshold``. ``rerandomization_metadata`` is populated.
|
|
140
|
+
|
|
141
|
+
Raises
|
|
142
|
+
------
|
|
143
|
+
InvalidDesignError
|
|
144
|
+
For any validation failure listed in the class docstring,
|
|
145
|
+
or when ``max_attempts`` is reached.
|
|
146
|
+
InsufficientDataError
|
|
147
|
+
When ``len(df) < n_treated``.
|
|
148
|
+
"""
|
|
149
|
+
n_total = len(df)
|
|
150
|
+
|
|
151
|
+
if self.treatment_col in df.columns:
|
|
152
|
+
raise InvalidDesignError(
|
|
153
|
+
f"Treatment column '{self.treatment_col}' already exists "
|
|
154
|
+
f"in DataFrame. Drop or rename it before calling randomize()."
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Validate covariates exist
|
|
158
|
+
missing = [c for c in self.covariates if c not in df.columns]
|
|
159
|
+
if missing:
|
|
160
|
+
raise InvalidDesignError(
|
|
161
|
+
f"Covariates not found in DataFrame: {missing}. "
|
|
162
|
+
f"Available columns: {list(df.columns)}."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Validate numeric dtype
|
|
166
|
+
non_numeric = [
|
|
167
|
+
c for c in self.covariates if not is_numeric_dtype(df[c])
|
|
168
|
+
]
|
|
169
|
+
if non_numeric:
|
|
170
|
+
raise InvalidDesignError(
|
|
171
|
+
f"Covariates must be numeric: {non_numeric} are not."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Validate no NaN
|
|
175
|
+
cols_with_nan = [c for c in self.covariates if df[c].isna().any()]
|
|
176
|
+
if cols_with_nan:
|
|
177
|
+
raise InvalidDesignError(
|
|
178
|
+
f"Covariates contain NaN values: {cols_with_nan}. "
|
|
179
|
+
f"Impute or drop NaN before calling randomize()."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Resolve n_treated
|
|
183
|
+
if self.n_treated is not None:
|
|
184
|
+
if n_total < self.n_treated:
|
|
185
|
+
raise InsufficientDataError(
|
|
186
|
+
context="ReRandomizedCRD randomization",
|
|
187
|
+
minimum=self.n_treated,
|
|
188
|
+
received=n_total,
|
|
189
|
+
)
|
|
190
|
+
n_treated = self.n_treated
|
|
191
|
+
else:
|
|
192
|
+
n_treated = int(round(self.p * n_total))
|
|
193
|
+
|
|
194
|
+
if n_treated <= 0 or n_treated >= n_total:
|
|
195
|
+
raise InvalidDesignError(
|
|
196
|
+
f"Resolved n_treated={n_treated} for N={n_total}; must "
|
|
197
|
+
f"be strictly between 0 and N. Adjust n_treated or p."
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Compute covariance matrix once (ddof=1)
|
|
201
|
+
cov_matrix = df[self.covariates].cov(ddof=1).values
|
|
202
|
+
|
|
203
|
+
# Check for singularity
|
|
204
|
+
k = len(self.covariates)
|
|
205
|
+
rank = np.linalg.matrix_rank(cov_matrix)
|
|
206
|
+
if rank < k:
|
|
207
|
+
raise InvalidDesignError(
|
|
208
|
+
f"Covariance matrix of covariates is singular "
|
|
209
|
+
f"(rank {rank} < {k}). Covariates are likely collinear; "
|
|
210
|
+
f"remove redundant variables."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
rng = np.random.default_rng(self.seed)
|
|
214
|
+
|
|
215
|
+
# Pre-resolve target n_treated and store on self for the loop
|
|
216
|
+
# to use without recomputation.
|
|
217
|
+
self._resolved_n_treated = n_treated
|
|
218
|
+
|
|
219
|
+
return self._randomize_with_cached_cov(
|
|
220
|
+
df=df,
|
|
221
|
+
cov_matrix=cov_matrix,
|
|
222
|
+
rng=rng,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def _randomize_with_cached_cov(
|
|
226
|
+
self,
|
|
227
|
+
df: pd.DataFrame,
|
|
228
|
+
cov_matrix: np.ndarray,
|
|
229
|
+
rng: np.random.Generator,
|
|
230
|
+
) -> CRDAssignment:
|
|
231
|
+
"""Acceptance/rejection loop with a pre-computed covariance matrix.
|
|
232
|
+
|
|
233
|
+
Used by ``randomize`` and reused by ``CRDAssignment.draw`` to
|
|
234
|
+
avoid recomputing the covariance matrix.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
df : pd.DataFrame
|
|
239
|
+
DataFrame without the treatment column.
|
|
240
|
+
cov_matrix : np.ndarray
|
|
241
|
+
Sample covariance matrix of covariates (ddof=1), already
|
|
242
|
+
computed once.
|
|
243
|
+
rng : np.random.Generator
|
|
244
|
+
Random generator driving the loop.
|
|
245
|
+
|
|
246
|
+
Returns
|
|
247
|
+
-------
|
|
248
|
+
CRDAssignment
|
|
249
|
+
Accepted assignment with metadata populated.
|
|
250
|
+
|
|
251
|
+
Raises
|
|
252
|
+
------
|
|
253
|
+
InvalidDesignError
|
|
254
|
+
If ``max_attempts`` is reached without acceptance.
|
|
255
|
+
"""
|
|
256
|
+
n_total = len(df)
|
|
257
|
+
|
|
258
|
+
# Resolve n_treated. When called from randomize(), it was set
|
|
259
|
+
# via self._resolved_n_treated. When called from draw(), we
|
|
260
|
+
# rederive it from self.n_treated / self.p.
|
|
261
|
+
n_treated = getattr(self, "_resolved_n_treated", None)
|
|
262
|
+
if n_treated is None:
|
|
263
|
+
if self.n_treated is not None:
|
|
264
|
+
n_treated = self.n_treated
|
|
265
|
+
else:
|
|
266
|
+
n_treated = int(round(self.p * n_total))
|
|
267
|
+
|
|
268
|
+
n_control = n_total - n_treated
|
|
269
|
+
scaling_factor = 1.0 / n_treated + 1.0 / n_control
|
|
270
|
+
|
|
271
|
+
scaled_cov = scaling_factor * cov_matrix
|
|
272
|
+
try:
|
|
273
|
+
inv_scaled_cov = np.linalg.inv(scaled_cov)
|
|
274
|
+
except np.linalg.LinAlgError as exc:
|
|
275
|
+
raise InvalidDesignError(
|
|
276
|
+
f"Failed to invert scaled covariance matrix: {exc}. "
|
|
277
|
+
f"Covariates may be collinear."
|
|
278
|
+
) from None
|
|
279
|
+
|
|
280
|
+
cov_values = df[self.covariates].values
|
|
281
|
+
|
|
282
|
+
for attempt in range(1, self.max_attempts + 1):
|
|
283
|
+
treatment = np.zeros(n_total, dtype=int)
|
|
284
|
+
chosen = rng.choice(n_total, size=n_treated, replace=False)
|
|
285
|
+
treatment[chosen] = 1
|
|
286
|
+
|
|
287
|
+
treated_mask = treatment == 1
|
|
288
|
+
mean_t = cov_values[treated_mask].mean(axis=0)
|
|
289
|
+
mean_c = cov_values[~treated_mask].mean(axis=0)
|
|
290
|
+
d = mean_t - mean_c
|
|
291
|
+
|
|
292
|
+
distance = float(d @ inv_scaled_cov @ d)
|
|
293
|
+
|
|
294
|
+
if distance <= self.threshold:
|
|
295
|
+
df_out = df.copy()
|
|
296
|
+
df_out[self.treatment_col] = treatment
|
|
297
|
+
|
|
298
|
+
metadata = {
|
|
299
|
+
"covariates": list(self.covariates),
|
|
300
|
+
"threshold": float(self.threshold),
|
|
301
|
+
"cov_matrix": cov_matrix,
|
|
302
|
+
"attempts": attempt,
|
|
303
|
+
"scaling_factor": scaling_factor,
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return CRDAssignment(
|
|
307
|
+
data=df_out,
|
|
308
|
+
treatment_col=self.treatment_col,
|
|
309
|
+
design=self,
|
|
310
|
+
seed=self.seed,
|
|
311
|
+
rerandomization_metadata=metadata,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
raise InvalidDesignError(
|
|
315
|
+
f"ReRandomizedCRD failed to find an assignment with "
|
|
316
|
+
f"Mahalanobis distance <= {self.threshold} after "
|
|
317
|
+
f"{self.max_attempts} attempts. Increase threshold or "
|
|
318
|
+
f"max_attempts."
|
|
319
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Diagnostics module for checking design and estimation assumptions.
|
|
2
|
+
|
|
3
|
+
This module contains tools for balance checks, covariate diagnostics,
|
|
4
|
+
and other pre- and post-estimation diagnostics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from skxperiments.diagnostics.aa_test import AAResult, AATest
|
|
8
|
+
from skxperiments.diagnostics.balance_report import (
|
|
9
|
+
BalanceReport,
|
|
10
|
+
BalanceResult,
|
|
11
|
+
)
|
|
12
|
+
from skxperiments.diagnostics.srm import SRMResult, SRMTest
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AAResult",
|
|
16
|
+
"AATest",
|
|
17
|
+
"BalanceReport",
|
|
18
|
+
"BalanceResult",
|
|
19
|
+
"SRMResult",
|
|
20
|
+
"SRMTest",
|
|
21
|
+
]
|