skxperiments 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. skxperiments/__init__.py +5 -0
  2. skxperiments/core/__init__.py +42 -0
  3. skxperiments/core/assignment.py +589 -0
  4. skxperiments/core/base.py +512 -0
  5. skxperiments/core/exceptions.py +145 -0
  6. skxperiments/core/potential_outcomes.py +168 -0
  7. skxperiments/core/results.py +624 -0
  8. skxperiments/design/__init__.py +22 -0
  9. skxperiments/design/balance.py +182 -0
  10. skxperiments/design/blocked_crd.py +157 -0
  11. skxperiments/design/crd.py +162 -0
  12. skxperiments/design/factorial.py +174 -0
  13. skxperiments/design/power.py +233 -0
  14. skxperiments/design/rerandomized_crd.py +319 -0
  15. skxperiments/diagnostics/__init__.py +21 -0
  16. skxperiments/diagnostics/aa_test.py +277 -0
  17. skxperiments/diagnostics/balance_report.py +224 -0
  18. skxperiments/diagnostics/srm.py +327 -0
  19. skxperiments/estimators/__init__.py +23 -0
  20. skxperiments/estimators/blocked_difference_in_means.py +197 -0
  21. skxperiments/estimators/cuped.py +280 -0
  22. skxperiments/estimators/difference_in_means.py +161 -0
  23. skxperiments/estimators/factorial_estimator.py +213 -0
  24. skxperiments/estimators/lin_estimator.py +298 -0
  25. skxperiments/inference/__init__.py +17 -0
  26. skxperiments/inference/bootstrap.py +450 -0
  27. skxperiments/inference/multiple.py +365 -0
  28. skxperiments/inference/neyman.py +386 -0
  29. skxperiments/inference/randomization_test.py +319 -0
  30. skxperiments/pipeline.py +366 -0
  31. skxperiments/reporting/__init__.py +30 -0
  32. skxperiments/reporting/plots.py +411 -0
  33. skxperiments/reporting/summary.py +185 -0
  34. skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
  35. skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
  36. skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
@@ -0,0 +1,272 @@
1
+ Metadata-Version: 2.4
2
+ Name: skxperiments
3
+ Version: 0.1.0.dev0
4
+ Summary: Randomization-based experimental design and causal inference, sklearn-style.
5
+ Project-URL: Homepage, https://github.com/gusbruschi13/skxperiments
6
+ Project-URL: Repository, https://github.com/gusbruschi13/skxperiments
7
+ Project-URL: Issues, https://github.com/gusbruschi13/skxperiments/issues
8
+ Author-email: Gustavo Bruschi <gustavo.bruschi@riachuelo.com.br>
9
+ License-Expression: MIT
10
+ Keywords: ab-testing,causal-inference,design-of-experiments,experiments,potential-outcomes,randomization
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: numpy
21
+ Requires-Dist: pandas
22
+ Requires-Dist: scipy
23
+ Provides-Extra: dev
24
+ Requires-Dist: black; extra == 'dev'
25
+ Requires-Dist: hypothesis; extra == 'dev'
26
+ Requires-Dist: ipykernel; extra == 'dev'
27
+ Requires-Dist: matplotlib; extra == 'dev'
28
+ Requires-Dist: mypy; extra == 'dev'
29
+ Requires-Dist: nbmake; extra == 'dev'
30
+ Requires-Dist: pre-commit; extra == 'dev'
31
+ Requires-Dist: pytest; extra == 'dev'
32
+ Requires-Dist: pytest-cov; extra == 'dev'
33
+ Requires-Dist: ruff; extra == 'dev'
34
+ Provides-Extra: viz
35
+ Requires-Dist: matplotlib; extra == 'viz'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # skxperiments
39
+
40
+ > Randomization-based experimental design and causal inference, sklearn-style.
41
+
42
+ ![CI](https://github.com/username/skxperiments/actions/workflows/ci.yml/badge.svg)
43
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
44
+ ![Status](https://img.shields.io/badge/status-alpha-orange)
45
+
46
+ A Python library for designing randomized experiments and estimating causal effects under the
47
+ potential outcomes framework (Rubin Causal Model). Treatment assignment is the starting point;
48
+ statistical models come second.
49
+
50
+ ## Status
51
+
52
+ The v1 feature set is complete: Phases 0–7 are done (sequential testing is
53
+ deferred to v2). See [Project status](#project-status) below for details.
54
+
55
+ ## Installation
56
+
57
+ ```bash
58
+ pip install skxperiments
59
+ ```
60
+
61
+ Requires Python 3.10+. Dependencies: `numpy`, `pandas`, `scipy`.
62
+
63
+ ## Quick start
64
+
65
+ ```python
66
+ import numpy as np
67
+ import pandas as pd
68
+ from skxperiments.design.crd import CRD
69
+ from skxperiments.estimators.difference_in_means import DifferenceInMeans
70
+ from skxperiments.inference import RandomizationTest
71
+
72
+ # 1. Generate a synthetic dataset
73
+ rng = np.random.default_rng(42)
74
+ df = pd.DataFrame({
75
+ "x": rng.normal(0.0, 1.0, 200),
76
+ "y": rng.normal(0.0, 1.0, 200),
77
+ })
78
+
79
+ # 2. Design: completely randomized assignment, 50/50 split
80
+ design = CRD(p=0.5, seed=42)
81
+ assignment = design.randomize(df)
82
+
83
+ # 3. Point estimate of the ATE
84
+ estimator = DifferenceInMeans(outcome_col="y")
85
+ result = estimator.fit(assignment).estimate()
86
+ print(result.ate)
87
+
88
+ # 4. Randomization-based p-value (Fisher's sharp null)
89
+ rt = RandomizationTest(estimator=estimator, n_permutations=10_000, seed=0)
90
+ result = rt.fit(assignment).estimate()
91
+ print(result.ate, result.p_value)
92
+ ```
93
+
94
+ For variance reduction with covariates, use `LinEstimator` (Lin 2013) or `CUPED` (Deng et al.
95
+ 2013). For blocked or factorial designs, use `BlockedCRD` + `BlockedDifferenceInMeans` or
96
+ `FactorialDesign` + `FactorialEstimator`. For rerandomization on Mahalanobis distance,
97
+ use `ReRandomizedCRD` (Morgan & Rubin 2012). `RandomizationTest` works with all of these
98
+ (except `FactorialAssignment` in v1). To control the family-wise error rate or false
99
+ discovery rate when reporting multiple effects, wrap the result in
100
+ `MultipleTestingCorrection`.
101
+
102
+ ## Documentation and tutorials
103
+
104
+ Learning-path notebooks (bilingual, Portuguese and English) live in
105
+ [`examples/for_starters/`](examples/for_starters/); conceptual docs (a glossary
106
+ and a "how to choose" guide) are in [`docs/`](docs/README.md).
107
+
108
+ ## Design philosophy
109
+
110
+ 1. **The assignment mechanism is primary**, not the statistical model.
111
+ 2. **API in scikit-learn style**: parameters in `__init__`, data in `fit()`, learned attributes
112
+ end with `_`.
113
+ 3. **`Assignment` is the contract** between designs and estimators — estimators receive
114
+ `Assignment` objects, not loose DataFrames.
115
+ 4. **Randomization-based inference is the default**; classical t-tests are not.
116
+ 5. **Finite-population vs. superpopulation inference are distinguished explicitly.**
117
+ 6. **Fail fast** with clear messages when designs and estimators are incompatible.
118
+ 7. **No side effects**: `fit()` and `randomize()` never mutate input DataFrames.
119
+
120
+ ## Project status
121
+
122
+ | Phase | Module | Status |
123
+ |---|---|---|
124
+ | 0 | Scaffold, exceptions, CI | ✓ Complete |
125
+ | 1 | Core (`Assignment`, `Results`, base classes) | ✓ Complete |
126
+ | 2 | Designs (CRD, BlockedCRD, ReRandomizedCRD, FactorialDesign, balance, power) | ✓ Complete |
127
+ | 3 | Estimators (DIM, BlockedDIM, Factorial, Lin, CUPED) | ✓ Complete |
128
+ | 4 | Inference (RandomizationTest, MultipleTestingCorrection, NeymanCI, BootstrapCI) | ✓ Complete (4.1–4.4; sequential → v2) |
129
+ | 5 | Diagnostics (SRMTest, AATest, BalanceReport) | ✓ Complete |
130
+ | 6 | Pipeline (ExperimentPipeline, ExperimentComparison) | ✓ Complete |
131
+ | 7 | Visualization and reporting (plots, ExperimentReport) | ✓ Complete |
132
+
133
+ Test coverage: 720 tests, all passing on CI.
134
+
135
+ See [`ROADMAP.md`](ROADMAP.md) for deferred features and v2 plans, and `CHANGELOG.md` for the
136
+ full history of changes.
137
+
138
+ ## What's implemented
139
+
140
+ ### Designs (`skxperiments.design`)
141
+
142
+ - **`CRD`** — Completely randomized design.
143
+ - **`BlockedCRD`** — Independent randomization within blocks.
144
+ - **`ReRandomizedCRD`** — Mahalanobis acceptance criterion with cached covariance matrix; loop with `max_attempts`.
145
+ - **`FactorialDesign`** — 2^K factorial design with equal cell sizes; little-endian cell encoding.
146
+ - **`check_balance(assignment, covariates)`** — Standardized mean differences (SMD), pooled std with `ddof=1`.
147
+ - **`power_analysis(...)`** — Sample size, MDE, or power for two-sample mean comparisons.
148
+
149
+ ### Estimators (`skxperiments.estimators`)
150
+
151
+ - **`DifferenceInMeans`** — Simple ATE for `CRDAssignment`.
152
+ - **`BlockedDifferenceInMeans`** — Size-weighted ATE for `BlockedAssignment`.
153
+ - **`FactorialEstimator`** — All 2^K − 1 effects (main effects and interactions of all orders) for `FactorialAssignment`. Returns `Results` in multi-effect mode.
154
+ - **`LinEstimator`** — Covariate-adjusted ATE via OLS with treatment-covariate interaction (Lin 2013).
155
+ - **`CUPED`** — Variance reduction with a pre-experiment covariate (Deng et al. 2013).
156
+
157
+ All estimators return `Results` with point estimates only; standard errors and confidence
158
+ intervals come from inference classes in `skxperiments.inference`.
159
+
160
+ ### Inference (`skxperiments.inference`)
161
+
162
+ - **`RandomizationTest`** — Fisher's sharp null hypothesis test via Monte Carlo permutations.
163
+ Uses `Assignment.draw()` to respect the original randomization mechanism (including
164
+ rerandomization Mahalanobis criterion and within-block proportions). P-value via the
165
+ Phipson & Smyth (2010) continuity correction. Three alternatives: `"two-sided"`,
166
+ `"greater"`, `"less"`. Works with `DifferenceInMeans`, `BlockedDifferenceInMeans`,
167
+ `LinEstimator`, and `CUPED`.
168
+ - **`MultipleTestingCorrection`** — Bonferroni, Holm (FWER) and Benjamini-Hochberg (FDR)
169
+ correction over a family of p-values. Accepts a multi-effect `Results` (typical from
170
+ `FactorialEstimator` after inference) or a list of scalar `Results` (for comparing
171
+ independent experiments). Clips corrected p-values to `[0, 1]`; preserves originals
172
+ in `Results.extra["original_p_values"]`. Default method is Holm.
173
+ - **`NeymanCI`** — Neyman variance-based two-sided Wald confidence interval and p-value
174
+ for finite-population inference. Conservative variance for `CRDAssignment` (including
175
+ rerandomized) and stratified variance for `BlockedAssignment`, consistent with the
176
+ size-weighted ATE of `BlockedDifferenceInMeans`. Wraps `DifferenceInMeans` or
177
+ `BlockedDifferenceInMeans`; rejects superpopulation mode (use `BootstrapCI`).
178
+ - **`BootstrapCI`** — Bootstrap confidence interval (percentile or BCa) for
179
+ superpopulation inference. Resamples units within each arm (within each
180
+ block-by-arm stratum for blocked designs) and refits the estimator, so it
181
+ works with any scalar estimator (`DifferenceInMeans`, `BlockedDifferenceInMeans`,
182
+ `LinEstimator`, `CUPED`).
183
+
184
+ ### Diagnostics (`skxperiments.diagnostics`)
185
+
186
+ - **`SRMTest`** — Sample Ratio Mismatch via chi-squared: observed vs. intended arm/cell
187
+ allocation, flagged below a threshold (default 0.001). Two-arm and factorial.
188
+ - **`BalanceReport`** — Standardized mean differences (SMD) per covariate, flagging
189
+ `|SMD| > 0.1`. Consumes `check_balance`; `to_dataframe()` feeds the Phase 7 Love plot.
190
+ - **`AATest`** — A/A calibration: re-randomizes a design on fixed data, runs a wrapped
191
+ inference, and checks the false-positive rate (exact binomial test) and p-value
192
+ uniformity (KS).
193
+
194
+ Each returns a dedicated result with `to_diagnostics_report()` for pipeline aggregation.
195
+
196
+ ### Pipeline (`skxperiments.pipeline`)
197
+
198
+ - **`ExperimentPipeline`** — Composes an inference (with its estimator) and diagnostics,
199
+ runs them on an `Assignment`, and bundles the result. Runs `SRMTest` automatically;
200
+ diagnostics are best-effort and flags are surfaced without halting (opt-in
201
+ `raise_on_flag`).
202
+ - **`ExperimentComparison`** — Compares independent experiments, applying
203
+ `MultipleTestingCorrection` across the family. Returns a comparison table ready for the
204
+ forest plot. Subgroup comparison is deferred to v2.
205
+
206
+ ### Reporting (`skxperiments.reporting`)
207
+
208
+ Requires the optional `viz` extra (`pip install skxperiments[viz]`).
209
+
210
+ - **Plots** — diagnostic (`plot_balance`, `plot_srm`, `plot_null_distribution`) and result
211
+ (`plot_effect`, `plot_forest`, `plot_interaction`, `plot_power_curve`). Each returns a
212
+ matplotlib `Axes` and accepts an optional `ax`.
213
+ - **`ExperimentReport`** — Renders a `PipelineResult` as a self-contained static HTML page
214
+ (results table, diagnostics, embedded plots). `include_plots=False` skips the optional
215
+ dependency.
216
+
217
+ ## What's next (v2)
218
+
219
+ The v1 feature set is complete. Deferred items live in [`ROADMAP.md`](ROADMAP.md):
220
+ `SequentialTest` (mSPRT/always-valid), Benjamini-Yekutieli correction, covariate-adjusted
221
+ variance in `NeymanCI`, studentized and block-resampling bootstrap, subgroup comparison,
222
+ a plotly backend, and interactive dashboards.
223
+
224
+ ## Contributing
225
+
226
+ Contributions are welcome. Please open an issue to discuss substantial changes before submitting
227
+ a pull request. The architecture has documented design decisions that should be respected — see
228
+ [`ROADMAP.md`](ROADMAP.md), the project notes in `CHANGELOG.md`, and the docstrings of base
229
+ classes (`BaseAssignment`, `BaseEstimator`, `Results`) for the contracts new code must follow.
230
+
231
+ Run the test suite with:
232
+
233
+ ```bash
234
+ pytest tests/ -v
235
+ ```
236
+
237
+ Skip slow statistical tests:
238
+
239
+ ```bash
240
+ pytest tests/ -v -m "not slow"
241
+ ```
242
+
243
+ ## License
244
+
245
+ MIT.
246
+
247
+ ## References
248
+
249
+ The implementations follow standard textbook formulations:
250
+
251
+ - Imbens, G. W., & Rubin, D. B. (2015). *Causal inference for statistics, social, and biomedical
252
+ sciences: An introduction.* Cambridge University Press.
253
+ - Lin, W. (2013). Agnostic notes on regression adjustments to experimental data: Reexamining
254
+ Freedman's critique. *Annals of Applied Statistics*, 7(1), 295–318.
255
+ - Morgan, K. L., & Rubin, D. B. (2012). Rerandomization to improve covariate balance in
256
+ experiments. *Annals of Statistics*, 40(2), 1263–1282.
257
+ - Deng, A., Xu, Y., Kohavi, R., & Walker, T. (2013). Improving the sensitivity of online
258
+ controlled experiments by utilizing pre-experiment data. *WSDM 2013*.
259
+ - Box, G. E. P., Hunter, J. S., & Hunter, W. G. (2005). *Statistics for experimenters: Design,
260
+ innovation, and discovery* (2nd ed.). Wiley.
261
+ - Cohen, J. (1988). *Statistical power analysis for the behavioral sciences* (2nd ed.). Routledge.
262
+ - Austin, P. C. (2009). Balance diagnostics for comparing the distribution of baseline covariates
263
+ between treatment groups in propensity-score matched samples. *Statistics in Medicine*.
264
+ - Phipson, B., & Smyth, G. K. (2010). Permutation P-values should never be zero: calculating
265
+ exact P-values when permutations are randomly drawn. *Statistical Applications in Genetics
266
+ and Molecular Biology*, 9(1).
267
+ - Fisher, R. A. (1935). *The Design of Experiments*. Oliver and Boyd.
268
+ - Holm, S. (1979). A simple sequentially rejective multiple test procedure. *Scandinavian
269
+ Journal of Statistics*, 6(2), 65–70.
270
+ - Benjamini, Y., & Hochberg, Y. (1995). Controlling the false discovery rate: a practical and
271
+ powerful approach to multiple testing. *Journal of the Royal Statistical Society: Series B*,
272
+ 57(1), 289–300.
@@ -0,0 +1,36 @@
1
+ skxperiments/__init__.py,sha256=zQDBS0g4SM9rsIDgYxaKbWzQqatRFQpWpkEg9j9esgs,139
2
+ skxperiments/pipeline.py,sha256=2njPU2qxu0vdeQFi2i0RLBm81zc6xDFn8v2HXMNp6k8,13343
3
+ skxperiments/core/__init__.py,sha256=V53NjlelwN8lSwHq_f0UhgVIneL2Q2dNSYxSyz-B-5w,1270
4
+ skxperiments/core/assignment.py,sha256=gq96xZD9_b7jDSizAdRyX4jOymzGZj2UwJkaoz1dHj8,20559
5
+ skxperiments/core/base.py,sha256=jQ3yc2wzmvm1wKXtbdolw3Li60KY4l1COcIXZPwhAUw,15829
6
+ skxperiments/core/exceptions.py,sha256=6WpfdCNDER9w5NQgHu-2LQNM2JdDuB2Bb03L2SMzY9A,4135
7
+ skxperiments/core/potential_outcomes.py,sha256=yvcZYT_mjLoX_ijuZHGSmrj3wqcTVFMcht_QcsYSBjs,4794
8
+ skxperiments/core/results.py,sha256=ECQsJ4nIO8PFuO-Ac9si474E-1xB4hZYm0Lm4mScDmE,24173
9
+ skxperiments/design/__init__.py,sha256=So_cHRb285y3UzGebwnw6zRswKOcxpNyiZHgE2CeCc0,692
10
+ skxperiments/design/balance.py,sha256=B6BKhkJfenwKjc_0bLNrbXRB-Gcqp3R4n2tBtLmZZis,6102
11
+ skxperiments/design/blocked_crd.py,sha256=5letYzEMkOF_F7XRMkj0cpF5fFmQry_lUaCBr_NCh3w,5491
12
+ skxperiments/design/crd.py,sha256=hfYAhWRo13z5SEkx_raYlg-UyzrHTTqxYAS8H1atMq0,5452
13
+ skxperiments/design/factorial.py,sha256=MZ9CLbHl4eDyirIQt6detjaRwhYn7NLTov_EJvz50FQ,5968
14
+ skxperiments/design/power.py,sha256=hbbr9d7lvptKyvb33vLFj369izQ6kr7P-B1T0Hr1lcI,7619
15
+ skxperiments/design/rerandomized_crd.py,sha256=d7hwIXVpDRqoQag8dmZIul6KAPff9TyJ16kWEtjSezM,11768
16
+ skxperiments/diagnostics/__init__.py,sha256=IMqNGzyNDErXECJDAe614wyDCthtXUENF4026AGw9nE,548
17
+ skxperiments/diagnostics/aa_test.py,sha256=ny7jyYEro2Q7qgwN_v5hdkzdUIBIJexe3PefGeLuA_I,10876
18
+ skxperiments/diagnostics/balance_report.py,sha256=5RxraXUSPxNOGcXi1Hbqm7qzG8-hxh4CbGRiYZFvQZI,7938
19
+ skxperiments/diagnostics/srm.py,sha256=lVU3M0VU0SX5_uCg2ATeNZ1NcYzVR1sTa1A5szS_jgE,12287
20
+ skxperiments/estimators/__init__.py,sha256=q8ONWo11PPkuUwkDFG-wbTbJaTbQppNknPle4rJvhO4,752
21
+ skxperiments/estimators/blocked_difference_in_means.py,sha256=9Wve756AlOziJVGjkObTxnEfcjDYSZZ31Gd4hkKOiGk,7538
22
+ skxperiments/estimators/cuped.py,sha256=5qHSutrQxtdLfyj4KtLjTL4ENYmsnVuZJ7K3sQ-T4XM,10642
23
+ skxperiments/estimators/difference_in_means.py,sha256=ZE-cP42hImaBMSidxwo-7cSw9l_VUubIndAJwSDBJM0,5662
24
+ skxperiments/estimators/factorial_estimator.py,sha256=tPxRI2g1i3BZdQX-Am68MDDA3bOJV0vWZ5AWdl7rap8,7754
25
+ skxperiments/estimators/lin_estimator.py,sha256=D6hiSmwd2Uyx5PJl7k0hCHOC2lNDl1u0wayMgKheNbk,11001
26
+ skxperiments/inference/__init__.py,sha256=rs_MMI7O3zhrWYoqGABy7JO9qKbL7dbReRkMQJJGrjY,545
27
+ skxperiments/inference/bootstrap.py,sha256=PPFvOjBSt5C6l0iMVtOQXnFBbH7LEj9aauitsh3kQlo,17893
28
+ skxperiments/inference/multiple.py,sha256=y9KiW693iYIdqK58K1sBv-42Y3Iw39X9KSL3s21m4GA,13573
29
+ skxperiments/inference/neyman.py,sha256=wTNZmnpLNEevd0apOU-GUWqF4kGM_wVlPNriMkxazzw,14903
30
+ skxperiments/inference/randomization_test.py,sha256=M4uvNQDaweinNx5Jg2hvFFdO6UYQy8ltaGzcc9WvitA,13123
31
+ skxperiments/reporting/__init__.py,sha256=njiThCyqBGT0JUaWa0MCDvOY8K07L3Bw8Wwc2XoHX_g,844
32
+ skxperiments/reporting/plots.py,sha256=JPzLmoZu46Vv0gpeyDnEWiMX4AMuZBp4rzEx8w9fVnI,12731
33
+ skxperiments/reporting/summary.py,sha256=XcEUVPN8MxVHXdQR25PBc9DIGtIZM7LhDw0_oVnvM2Y,6759
34
+ skxperiments-0.1.0.dev0.dist-info/METADATA,sha256=mV0bukgv6p_FOnRnf0dyzpihzxepoihAcacKF0_ZV5o,12924
35
+ skxperiments-0.1.0.dev0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
36
+ skxperiments-0.1.0.dev0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any