skxperiments 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skxperiments/__init__.py +5 -0
- skxperiments/core/__init__.py +42 -0
- skxperiments/core/assignment.py +589 -0
- skxperiments/core/base.py +512 -0
- skxperiments/core/exceptions.py +145 -0
- skxperiments/core/potential_outcomes.py +168 -0
- skxperiments/core/results.py +624 -0
- skxperiments/design/__init__.py +22 -0
- skxperiments/design/balance.py +182 -0
- skxperiments/design/blocked_crd.py +157 -0
- skxperiments/design/crd.py +162 -0
- skxperiments/design/factorial.py +174 -0
- skxperiments/design/power.py +233 -0
- skxperiments/design/rerandomized_crd.py +319 -0
- skxperiments/diagnostics/__init__.py +21 -0
- skxperiments/diagnostics/aa_test.py +277 -0
- skxperiments/diagnostics/balance_report.py +224 -0
- skxperiments/diagnostics/srm.py +327 -0
- skxperiments/estimators/__init__.py +23 -0
- skxperiments/estimators/blocked_difference_in_means.py +197 -0
- skxperiments/estimators/cuped.py +280 -0
- skxperiments/estimators/difference_in_means.py +161 -0
- skxperiments/estimators/factorial_estimator.py +213 -0
- skxperiments/estimators/lin_estimator.py +298 -0
- skxperiments/inference/__init__.py +17 -0
- skxperiments/inference/bootstrap.py +450 -0
- skxperiments/inference/multiple.py +365 -0
- skxperiments/inference/neyman.py +386 -0
- skxperiments/inference/randomization_test.py +319 -0
- skxperiments/pipeline.py +366 -0
- skxperiments/reporting/__init__.py +30 -0
- skxperiments/reporting/plots.py +411 -0
- skxperiments/reporting/summary.py +185 -0
- skxperiments-0.1.0.dev0.dist-info/METADATA +272 -0
- skxperiments-0.1.0.dev0.dist-info/RECORD +36 -0
- skxperiments-0.1.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Potential outcomes framework for unit-level causal quantities.
|
|
2
|
+
|
|
3
|
+
This module provides the PotentialOutcomes class, which represents
|
|
4
|
+
Y(0) and Y(1) for each unit. In real experiments, both potential
|
|
5
|
+
outcomes are never simultaneously observed; this class is used in
|
|
6
|
+
synthetic data generators and property-based tests.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from skxperiments.core.exceptions import InsufficientDataError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PotentialOutcomes:
|
|
16
|
+
"""Represents unit-level potential outcomes Y(0) and Y(1).
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
y0 : array-like
|
|
21
|
+
Potential outcomes under control Y(0).
|
|
22
|
+
y1 : array-like
|
|
23
|
+
Potential outcomes under treatment Y(1).
|
|
24
|
+
unit_ids : array-like or None, optional
|
|
25
|
+
Identifiers for each unit, by default None.
|
|
26
|
+
|
|
27
|
+
Raises
|
|
28
|
+
------
|
|
29
|
+
InsufficientDataError
|
|
30
|
+
If y0 and y1 have different lengths, are empty, or unit_ids
|
|
31
|
+
has a different length.
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
>>> import numpy as np
|
|
36
|
+
>>> po = PotentialOutcomes(
|
|
37
|
+
... y0=np.array([1.0, 2.0, 3.0]),
|
|
38
|
+
... y1=np.array([2.0, 3.0, 5.0]),
|
|
39
|
+
... )
|
|
40
|
+
>>> po.ate
|
|
41
|
+
1.3333333333333333
|
|
42
|
+
>>> po.ite
|
|
43
|
+
array([1., 1., 2.])
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
y0: np.ndarray | list,
|
|
49
|
+
y1: np.ndarray | list,
|
|
50
|
+
unit_ids: np.ndarray | list | None = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
self._y0 = np.asarray(y0, dtype=float)
|
|
53
|
+
self._y1 = np.asarray(y1, dtype=float)
|
|
54
|
+
|
|
55
|
+
if self._y0.size == 0:
|
|
56
|
+
raise InsufficientDataError(
|
|
57
|
+
context="PotentialOutcomes",
|
|
58
|
+
minimum=1,
|
|
59
|
+
received=0,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if self._y1.size == 0:
|
|
63
|
+
raise InsufficientDataError(
|
|
64
|
+
context="PotentialOutcomes",
|
|
65
|
+
minimum=1,
|
|
66
|
+
received=0,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if len(self._y0) != len(self._y1):
|
|
70
|
+
raise InsufficientDataError(
|
|
71
|
+
context="PotentialOutcomes (y0 and y1 must have the same length)",
|
|
72
|
+
minimum=len(self._y0),
|
|
73
|
+
received=len(self._y1),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if unit_ids is not None:
|
|
77
|
+
self._unit_ids: np.ndarray | None = np.asarray(unit_ids)
|
|
78
|
+
if len(self._unit_ids) != len(self._y0):
|
|
79
|
+
raise InsufficientDataError(
|
|
80
|
+
context="PotentialOutcomes (unit_ids must match y0 length)",
|
|
81
|
+
minimum=len(self._y0),
|
|
82
|
+
received=len(self._unit_ids),
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
self._unit_ids = None
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def ite(self) -> np.ndarray:
|
|
89
|
+
"""Individual Treatment Effect for each unit.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
np.ndarray
|
|
94
|
+
Array of individual treatment effects (y1 - y0).
|
|
95
|
+
"""
|
|
96
|
+
return self._y1 - self._y0
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def ate(self) -> float:
|
|
100
|
+
"""Average Treatment Effect across all units.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
float
|
|
105
|
+
Mean of individual treatment effects.
|
|
106
|
+
"""
|
|
107
|
+
return float(np.mean(self.ite))
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def n(self) -> int:
|
|
111
|
+
"""Number of units.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
int
|
|
116
|
+
Total number of units.
|
|
117
|
+
"""
|
|
118
|
+
return len(self._y0)
|
|
119
|
+
|
|
120
|
+
def to_dataframe(self) -> pd.DataFrame:
|
|
121
|
+
"""Convert potential outcomes to a pandas DataFrame.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
pd.DataFrame
|
|
126
|
+
DataFrame with columns y0, y1, ite. If unit_ids were
|
|
127
|
+
provided, includes unit_id as the first column.
|
|
128
|
+
"""
|
|
129
|
+
data: dict[str, np.ndarray] = {}
|
|
130
|
+
|
|
131
|
+
if self._unit_ids is not None:
|
|
132
|
+
data["unit_id"] = self._unit_ids
|
|
133
|
+
|
|
134
|
+
data["y0"] = self._y0
|
|
135
|
+
data["y1"] = self._y1
|
|
136
|
+
data["ite"] = self.ite
|
|
137
|
+
|
|
138
|
+
return pd.DataFrame(data)
|
|
139
|
+
|
|
140
|
+
def summary(self) -> str:
|
|
141
|
+
"""Generate a text summary of the potential outcomes.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
str
|
|
146
|
+
Formatted summary string with key statistics.
|
|
147
|
+
"""
|
|
148
|
+
ite = self.ite
|
|
149
|
+
lines = [
|
|
150
|
+
"PotentialOutcomes Summary",
|
|
151
|
+
"-------------------------",
|
|
152
|
+
f"N units : {self.n}",
|
|
153
|
+
f"ATE : {self.ate:.4f}",
|
|
154
|
+
f"ITE std : {float(np.std(ite, ddof=0)):.4f}",
|
|
155
|
+
f"ITE min : {float(np.min(ite)):.4f}",
|
|
156
|
+
f"ITE max : {float(np.max(ite)):.4f}",
|
|
157
|
+
]
|
|
158
|
+
return "\n".join(lines)
|
|
159
|
+
|
|
160
|
+
def __repr__(self) -> str:
|
|
161
|
+
"""Return string representation.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
str
|
|
166
|
+
Compact representation with n and ate.
|
|
167
|
+
"""
|
|
168
|
+
return f"PotentialOutcomes(n={self.n}, ate={self.ate:.4f})"
|