skxperiments 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. skxperiments/__init__.py +5 -0
  2. skxperiments/core/__init__.py +42 -0
  3. skxperiments/core/assignment.py +589 -0
  4. skxperiments/core/base.py +512 -0
  5. skxperiments/core/exceptions.py +145 -0
  6. skxperiments/core/potential_outcomes.py +168 -0
  7. skxperiments/core/results.py +624 -0
  8. skxperiments/design/__init__.py +22 -0
  9. skxperiments/design/balance.py +182 -0
  10. skxperiments/design/blocked_crd.py +157 -0
  11. skxperiments/design/crd.py +162 -0
  12. skxperiments/design/factorial.py +174 -0
  13. skxperiments/design/power.py +233 -0
  14. skxperiments/design/rerandomized_crd.py +319 -0
  15. skxperiments/diagnostics/__init__.py +21 -0
  16. skxperiments/diagnostics/aa_test.py +277 -0
  17. skxperiments/diagnostics/balance_report.py +224 -0
  18. skxperiments/diagnostics/srm.py +327 -0
  19. skxperiments/estimators/__init__.py +23 -0
  20. skxperiments/estimators/blocked_difference_in_means.py +197 -0
  21. skxperiments/estimators/cuped.py +280 -0
  22. skxperiments/estimators/difference_in_means.py +161 -0
  23. skxperiments/estimators/factorial_estimator.py +213 -0
  24. skxperiments/estimators/lin_estimator.py +298 -0
  25. skxperiments/inference/__init__.py +17 -0
  26. skxperiments/inference/bootstrap.py +450 -0
  27. skxperiments/inference/multiple.py +365 -0
  28. skxperiments/inference/neyman.py +386 -0
  29. skxperiments/inference/randomization_test.py +319 -0
  30. skxperiments/pipeline.py +366 -0
  31. skxperiments/reporting/__init__.py +30 -0
  32. skxperiments/reporting/plots.py +411 -0
  33. skxperiments/reporting/summary.py +185 -0
  34. skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
  35. skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
  36. skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
@@ -0,0 +1,168 @@
1
+ """Potential outcomes framework for unit-level causal quantities.
2
+
3
+ This module provides the PotentialOutcomes class, which represents
4
+ Y(0) and Y(1) for each unit. In real experiments, both potential
5
+ outcomes are never simultaneously observed; this class is used in
6
+ synthetic data generators and property-based tests.
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from skxperiments.core.exceptions import InsufficientDataError
13
+
14
+
15
+ class PotentialOutcomes:
16
+ """Represents unit-level potential outcomes Y(0) and Y(1).
17
+
18
+ Parameters
19
+ ----------
20
+ y0 : array-like
21
+ Potential outcomes under control Y(0).
22
+ y1 : array-like
23
+ Potential outcomes under treatment Y(1).
24
+ unit_ids : array-like or None, optional
25
+ Identifiers for each unit, by default None.
26
+
27
+ Raises
28
+ ------
29
+ InsufficientDataError
30
+ If y0 and y1 have different lengths, are empty, or unit_ids
31
+ has a different length.
32
+
33
+ Examples
34
+ --------
35
+ >>> import numpy as np
36
+ >>> po = PotentialOutcomes(
37
+ ... y0=np.array([1.0, 2.0, 3.0]),
38
+ ... y1=np.array([2.0, 3.0, 5.0]),
39
+ ... )
40
+ >>> po.ate
41
+ 1.3333333333333333
42
+ >>> po.ite
43
+ array([1., 1., 2.])
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ y0: np.ndarray | list,
49
+ y1: np.ndarray | list,
50
+ unit_ids: np.ndarray | list | None = None,
51
+ ) -> None:
52
+ self._y0 = np.asarray(y0, dtype=float)
53
+ self._y1 = np.asarray(y1, dtype=float)
54
+
55
+ if self._y0.size == 0:
56
+ raise InsufficientDataError(
57
+ context="PotentialOutcomes",
58
+ minimum=1,
59
+ received=0,
60
+ )
61
+
62
+ if self._y1.size == 0:
63
+ raise InsufficientDataError(
64
+ context="PotentialOutcomes",
65
+ minimum=1,
66
+ received=0,
67
+ )
68
+
69
+ if len(self._y0) != len(self._y1):
70
+ raise InsufficientDataError(
71
+ context="PotentialOutcomes (y0 and y1 must have the same length)",
72
+ minimum=len(self._y0),
73
+ received=len(self._y1),
74
+ )
75
+
76
+ if unit_ids is not None:
77
+ self._unit_ids: np.ndarray | None = np.asarray(unit_ids)
78
+ if len(self._unit_ids) != len(self._y0):
79
+ raise InsufficientDataError(
80
+ context="PotentialOutcomes (unit_ids must match y0 length)",
81
+ minimum=len(self._y0),
82
+ received=len(self._unit_ids),
83
+ )
84
+ else:
85
+ self._unit_ids = None
86
+
87
+ @property
88
+ def ite(self) -> np.ndarray:
89
+ """Individual Treatment Effect for each unit.
90
+
91
+ Returns
92
+ -------
93
+ np.ndarray
94
+ Array of individual treatment effects (y1 - y0).
95
+ """
96
+ return self._y1 - self._y0
97
+
98
+ @property
99
+ def ate(self) -> float:
100
+ """Average Treatment Effect across all units.
101
+
102
+ Returns
103
+ -------
104
+ float
105
+ Mean of individual treatment effects.
106
+ """
107
+ return float(np.mean(self.ite))
108
+
109
+ @property
110
+ def n(self) -> int:
111
+ """Number of units.
112
+
113
+ Returns
114
+ -------
115
+ int
116
+ Total number of units.
117
+ """
118
+ return len(self._y0)
119
+
120
+ def to_dataframe(self) -> pd.DataFrame:
121
+ """Convert potential outcomes to a pandas DataFrame.
122
+
123
+ Returns
124
+ -------
125
+ pd.DataFrame
126
+ DataFrame with columns y0, y1, ite. If unit_ids were
127
+ provided, includes unit_id as the first column.
128
+ """
129
+ data: dict[str, np.ndarray] = {}
130
+
131
+ if self._unit_ids is not None:
132
+ data["unit_id"] = self._unit_ids
133
+
134
+ data["y0"] = self._y0
135
+ data["y1"] = self._y1
136
+ data["ite"] = self.ite
137
+
138
+ return pd.DataFrame(data)
139
+
140
+ def summary(self) -> str:
141
+ """Generate a text summary of the potential outcomes.
142
+
143
+ Returns
144
+ -------
145
+ str
146
+ Formatted summary string with key statistics.
147
+ """
148
+ ite = self.ite
149
+ lines = [
150
+ "PotentialOutcomes Summary",
151
+ "-------------------------",
152
+ f"N units : {self.n}",
153
+ f"ATE : {self.ate:.4f}",
154
+ f"ITE std : {float(np.std(ite, ddof=0)):.4f}",
155
+ f"ITE min : {float(np.min(ite)):.4f}",
156
+ f"ITE max : {float(np.max(ite)):.4f}",
157
+ ]
158
+ return "\n".join(lines)
159
+
160
+ def __repr__(self) -> str:
161
+ """Return string representation.
162
+
163
+ Returns
164
+ -------
165
+ str
166
+ Compact representation with n and ate.
167
+ """
168
+ return f"PotentialOutcomes(n={self.n}, ate={self.ate:.4f})"