abtestwise 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
abtestwise/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ """abtestwise: a lightweight toolkit for binary A/B experiment analysis.
2
+
3
+ Version 0.1 combines frequentist and Bayesian summaries for binary proportions
4
+ using aggregate count data.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .binary import BinaryABTest
10
+ from .result import BinaryABResult
11
+
12
+ __version__ = "0.1.0"
13
+
14
+ __all__ = ["BinaryABTest", "BinaryABResult", "__version__"]
abtestwise/bayesian.py ADDED
@@ -0,0 +1,77 @@
1
+ """Bayesian analysis: beta-binomial posterior simulation.
2
+
3
+ For a binary metric with a Beta prior, the posterior for each group's true
4
+ success rate is conjugate and also Beta:
5
+
6
+ posterior = Beta(prior_alpha + successes, prior_beta + failures)
7
+
8
+ We draw samples from each group's posterior, form the lift distribution
9
+ (Treatment B - Control A), and summarize it using NumPy operations.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import numpy as np
15
+
16
+
17
+ def posterior_samples(
18
+ successes: int,
19
+ total: int,
20
+ prior_alpha: float,
21
+ prior_beta: float,
22
+ n_simulations: int,
23
+ rng: np.random.Generator,
24
+ ) -> np.ndarray:
25
+ """"Draw posterior samples of a group's true success rate from its Beta posterior."""
26
+ failures = total - successes
27
+ alpha = prior_alpha + successes
28
+ beta = prior_beta + failures
29
+ return rng.beta(alpha, beta, size=n_simulations)
30
+
31
+
32
+ def simulate_lift_samples(
33
+ control_successes: int,
34
+ control_total: int,
35
+ treatment_successes: int,
36
+ treatment_total: int,
37
+ prior_alpha: float,
38
+ prior_beta: float,
39
+ n_simulations: int,
40
+ rng: np.random.Generator,
41
+ ) -> np.ndarray:
42
+ """Return posterior samples of the lift (treatment rate - control rate)."""
43
+ control = posterior_samples(
44
+ control_successes, control_total, prior_alpha, prior_beta, n_simulations, rng
45
+ )
46
+ treatment = posterior_samples(
47
+ treatment_successes,
48
+ treatment_total,
49
+ prior_alpha,
50
+ prior_beta,
51
+ n_simulations,
52
+ rng,
53
+ )
54
+ return treatment - control
55
+
56
+
57
+ def credible_interval_bounds(
58
+ lift_samples: np.ndarray, credible_interval: float
59
+ ) -> tuple[float, float]:
60
+ """Equal-tailed credible interval for the lift at the given level.
61
+
62
+ For a 0.95 interval this returns the 2.5th and 97.5th percentiles.
63
+ """
64
+ tail = (1.0 - credible_interval) / 2.0
65
+ lower = float(np.quantile(lift_samples, tail))
66
+ upper = float(np.quantile(lift_samples, 1.0 - tail))
67
+ return lower, upper
68
+
69
+
70
+ def expected_loss_treatment(lift_samples: np.ndarray) -> float:
71
+ """Expected loss from choosing treatment: mean(max(-lift, 0))."""
72
+ return float(np.mean(np.maximum(-lift_samples, 0.0)))
73
+
74
+
75
+ def expected_loss_control(lift_samples: np.ndarray) -> float:
76
+ """Expected loss from choosing control: mean(max(lift, 0))."""
77
+ return float(np.mean(np.maximum(lift_samples, 0.0)))
abtestwise/binary.py ADDED
@@ -0,0 +1,143 @@
1
+ """Run binary A/B tests from aggregate count data.
2
+
3
+ This module validates inputs, sets up the random number generator so results are
4
+ reproducible from a seed, and combines the frequentist and Bayesian helpers to
5
+ build a :class:BinaryABResult.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ from dataclasses import dataclass
12
+
13
+ import numpy as np
14
+
15
+ from . import bayesian, frequentist, validation
16
+ from .result import BinaryABResult
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class BinaryABTest:
21
+ """A binary A/B test using aggregate success and total counts."""
22
+
23
+ control_successes: int
24
+ control_total: int
25
+ treatment_successes: int
26
+ treatment_total: int
27
+ prior_alpha: float
28
+ prior_beta: float
29
+ n_simulations: int
30
+ credible_interval: float
31
+ seed: int | None
32
+
33
+ @classmethod
34
+ def from_counts(
35
+ cls,
36
+ control_successes: int,
37
+ control_total: int,
38
+ treatment_successes: int,
39
+ treatment_total: int,
40
+ *,
41
+ prior_alpha: float = 1.0,
42
+ prior_beta: float = 1.0,
43
+ n_simulations: int = 100_000,
44
+ credible_interval: float = 0.95,
45
+ seed: int | None = None,
46
+ ) -> "BinaryABTest":
47
+ """Create a binary A/B test from aggregate counts.
48
+
49
+ The four count inputs can be positional. All other settings have to be names.
50
+ The default prior is Beta(1, 1).
51
+ """
52
+ validation.validate_count("control_successes", control_successes)
53
+ validation.validate_total("control_total", control_total)
54
+ validation.validate_count("treatment_successes", treatment_successes)
55
+ validation.validate_total("treatment_total", treatment_total)
56
+ validation.validate_successes_le_total(
57
+ "control_successes", control_successes, "control_total", control_total
58
+ )
59
+ validation.validate_successes_le_total(
60
+ "treatment_successes",
61
+ treatment_successes,
62
+ "treatment_total",
63
+ treatment_total,
64
+ )
65
+ validation.validate_prior("prior_alpha", prior_alpha)
66
+ validation.validate_prior("prior_beta", prior_beta)
67
+ validation.validate_n_simulations(n_simulations)
68
+ validation.validate_credible_interval(credible_interval)
69
+ validation.validate_seed(seed)
70
+
71
+ return cls(
72
+ control_successes=control_successes,
73
+ control_total=control_total,
74
+ treatment_successes=treatment_successes,
75
+ treatment_total=treatment_total,
76
+ prior_alpha=float(prior_alpha),
77
+ prior_beta=float(prior_beta),
78
+ n_simulations=n_simulations,
79
+ credible_interval=credible_interval,
80
+ seed=seed,
81
+ )
82
+
83
+ def run(self) -> BinaryABResult:
84
+ """Run the frequentist and Bayesian analyses and return the result."""
85
+ control_rate = self.control_successes / self.control_total
86
+ treatment_rate = self.treatment_successes / self.treatment_total
87
+ absolute_lift = treatment_rate - control_rate
88
+
89
+ # Relative lift is undefined when the control rate is zero.
90
+ relative_lift = (
91
+ absolute_lift / control_rate if control_rate != 0 else math.nan
92
+ )
93
+
94
+ # --- Frequentist ---
95
+ z_statistic, p_value = frequentist.two_proportion_z_test(
96
+ self.control_successes,
97
+ self.control_total,
98
+ self.treatment_successes,
99
+ self.treatment_total,
100
+ )
101
+
102
+ # --- Bayesian ---
103
+ rng = np.random.default_rng(self.seed)
104
+ lift_samples = bayesian.simulate_lift_samples(
105
+ self.control_successes,
106
+ self.control_total,
107
+ self.treatment_successes,
108
+ self.treatment_total,
109
+ self.prior_alpha,
110
+ self.prior_beta,
111
+ self.n_simulations,
112
+ rng,
113
+ )
114
+
115
+ lower, upper = bayesian.credible_interval_bounds(
116
+ lift_samples, self.credible_interval
117
+ )
118
+
119
+ return BinaryABResult(
120
+ control_successes=self.control_successes,
121
+ control_total=self.control_total,
122
+ treatment_successes=self.treatment_successes,
123
+ treatment_total=self.treatment_total,
124
+ prior_alpha=self.prior_alpha,
125
+ prior_beta=self.prior_beta,
126
+ n_simulations=self.n_simulations,
127
+ credible_interval=self.credible_interval,
128
+ seed=self.seed,
129
+ control_rate=control_rate,
130
+ treatment_rate=treatment_rate,
131
+ absolute_lift=absolute_lift,
132
+ relative_lift=relative_lift,
133
+ z_statistic=z_statistic,
134
+ p_value=p_value,
135
+ posterior_mean_lift=float(np.mean(lift_samples)),
136
+ posterior_median_lift=float(np.median(lift_samples)),
137
+ prob_treatment_better=float(np.mean(lift_samples > 0)),
138
+ prob_control_better=float(np.mean(lift_samples < 0)),
139
+ credible_interval_bounds=(lower, upper),
140
+ expected_loss_treatment=bayesian.expected_loss_treatment(lift_samples),
141
+ expected_loss_control=bayesian.expected_loss_control(lift_samples),
142
+ lift_samples=lift_samples,
143
+ )
@@ -0,0 +1,48 @@
1
+ """Frequentist two-sided pooled two-proportion z-test."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+
7
+ from scipy.stats import norm
8
+
9
+
10
+ def two_proportion_z_test(
11
+ control_successes: int,
12
+ control_total: int,
13
+ treatment_successes: int,
14
+ treatment_total: int,
15
+ ) -> tuple[float, float]:
16
+ """Run a two-sided pooled two-proportion z-test.
17
+
18
+ Returns ``(z_statistic, p_value)``.
19
+
20
+ The pooled estimate combines both arms under the null hypothesis that the
21
+ two proportions are equal:
22
+
23
+ p_pool = (x_c + x_t) / (n_c + n_t)
24
+ se = sqrt(p_pool * (1 - p_pool) * (1/n_c + 1/n_t))
25
+ z = (rate_t - rate_c) / se
26
+ p = 2 * (1 - Phi(|z|))
27
+
28
+ If the standard error is zero, then return ``(0.0, 1.0)`` to avoid dividing by zero.
29
+ """
30
+ control_rate = control_successes / control_total
31
+ treatment_rate = treatment_successes / treatment_total
32
+
33
+ pooled_rate = (control_successes + treatment_successes) / (
34
+ control_total + treatment_total
35
+ )
36
+ standard_error = math.sqrt(
37
+ pooled_rate
38
+ * (1.0 - pooled_rate)
39
+ * (1.0 / control_total + 1.0 / treatment_total)
40
+ )
41
+
42
+ if standard_error == 0.0:
43
+ return 0.0, 1.0
44
+
45
+ z_statistic = (treatment_rate - control_rate) / standard_error
46
+ p_value = 2.0 * (1.0 - norm.cdf(abs(z_statistic)))
47
+
48
+ return float(z_statistic), float(p_value)
abtestwise/plotting.py ADDED
@@ -0,0 +1,143 @@
1
+ """Plotting for binary A/B test results.
2
+
3
+ Lift is always Treatment B - Control A. Plots show the lift in percentage points,
4
+ so a raw lift of 0.025 will be displayed as +2.5 percentage points.
5
+
6
+ These functions return matplotlib Axes objects. Note that they do not call plt.show()
7
+ or save files automatically.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import TYPE_CHECKING
13
+
14
+ import numpy as np
15
+
16
+ if TYPE_CHECKING: # pragma: no cover - typing only
17
+ from matplotlib.axes import Axes
18
+
19
+ # Raw lift is a proportion difference; multiply by 100 to get percentage points.
20
+ _PCT_POINTS = 100.0
21
+
22
+
23
+ def plot_lift_distribution(
24
+ lift_samples: np.ndarray,
25
+ median_lift: float,
26
+ credible_interval_bounds: tuple[float, float],
27
+ credible_interval: float,
28
+ *,
29
+ ax: "Axes | None" = None,
30
+ bins: int = 50,
31
+ density: bool = True,
32
+ title: str | None = "Posterior Distribution of Lift",
33
+ ) -> "Axes":
34
+ """Histogram of the posterior lift distribution (Treatment B - Control A).
35
+
36
+ Marks zero, the posterior median, and the credible-interval bounds. All
37
+ lift values are shown in percentage points.
38
+ """
39
+ import matplotlib.pyplot as plt
40
+
41
+ if ax is None:
42
+ _, ax = plt.subplots(figsize=(8, 5))
43
+
44
+ samples_pp = np.asarray(lift_samples) * _PCT_POINTS
45
+ median_pp = median_lift * _PCT_POINTS
46
+ lower_pp = credible_interval_bounds[0] * _PCT_POINTS
47
+ upper_pp = credible_interval_bounds[1] * _PCT_POINTS
48
+ ci_pct = credible_interval * 100
49
+
50
+ ax.hist(
51
+ samples_pp,
52
+ bins=bins,
53
+ density=density,
54
+ color="#4C72B0",
55
+ alpha=0.7,
56
+ edgecolor="white",
57
+ linewidth=0.5,
58
+ )
59
+
60
+ # Zero reference: where Treatment B and Control A are equal.
61
+ ax.axvline(0.0, color="#444444", linestyle="--", linewidth=1.5, label="No difference")
62
+ # Posterior median lift.
63
+ ax.axvline(
64
+ median_pp,
65
+ color="#C44E52",
66
+ linestyle="-",
67
+ linewidth=2.0,
68
+ label=f"Median {median_pp:+.2f} pp",
69
+ )
70
+ # Credible interval bounds.
71
+ ax.axvline(
72
+ lower_pp,
73
+ color="#55A868",
74
+ linestyle=":",
75
+ linewidth=1.8,
76
+ label=f"{ci_pct:g}% CI [{lower_pp:+.2f}, {upper_pp:+.2f}] pp",
77
+ )
78
+ ax.axvline(upper_pp, color="#55A868", linestyle=":", linewidth=1.8)
79
+
80
+ ax.set_xlabel("Lift: Treatment B - Control A (percentage points)")
81
+ ax.set_ylabel("Density" if density else "Frequency")
82
+ if title is not None:
83
+ ax.set_title(title)
84
+ ax.legend(loc="best", frameon=True, fontsize=9)
85
+ ax.margins(x=0.02)
86
+
87
+ return ax
88
+
89
+
90
+ def plot_probability_bar(
91
+ prob_treatment_better: float,
92
+ prob_control_better: float,
93
+ *,
94
+ ax: "Axes | None" = None,
95
+ title: str | None = "Posterior Probability of Being Better",
96
+ ) -> "Axes":
97
+ """Two-bar chart comparing P(Treatment B better) vs P(Control A better)."""
98
+ import matplotlib.pyplot as plt
99
+
100
+ if ax is None:
101
+ _, ax = plt.subplots(figsize=(6, 5))
102
+
103
+ labels = ["Treatment B better", "Control A better"]
104
+ values = [prob_treatment_better, prob_control_better]
105
+ colors = ["#4C72B0", "#C44E52"]
106
+
107
+ bars = ax.bar(labels, values, color=colors, width=0.6, edgecolor="white")
108
+
109
+ # Percentage labels. For tall bars (near the top boundary) we place the
110
+ # label *inside* the bar in white so it does not overlap the chart top; for
111
+ # shorter bars we place it just above the bar in dark text.
112
+ high_bar_threshold = 0.9
113
+ for bar, value in zip(bars, values):
114
+ x = bar.get_x() + bar.get_width() / 2
115
+ if value >= high_bar_threshold:
116
+ ax.text(
117
+ x,
118
+ value - 0.03,
119
+ f"{value:.1%}",
120
+ ha="center",
121
+ va="top",
122
+ fontsize=11,
123
+ fontweight="bold",
124
+ color="white",
125
+ )
126
+ else:
127
+ ax.text(
128
+ x,
129
+ value + 0.02,
130
+ f"{value:.1%}",
131
+ ha="center",
132
+ va="bottom",
133
+ fontsize=11,
134
+ fontweight="bold",
135
+ color="#222222",
136
+ )
137
+
138
+ ax.set_ylim(0.0, 1.0)
139
+ ax.set_ylabel("Posterior probability")
140
+ if title is not None:
141
+ ax.set_title(title)
142
+
143
+ return ax
abtestwise/result.py ADDED
@@ -0,0 +1,209 @@
1
+ """Result object for binary A/B tests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ import math
9
+
10
+ import numpy as np
11
+
12
+ from . import validation
13
+
14
+
15
+ def _format_percent(x: float, digits: int = 2) -> str:
16
+ """Format a proportion as a percent string."""
17
+ if x is None or (isinstance(x, float) and math.isnan(x)):
18
+ return "undefined"
19
+ return f"{x * 100:.{digits}f}%"
20
+
21
+
22
+ def _format_pp(x: float, digits: int = 2) -> str:
23
+ """Format a proportion difference as signed percentage points."""
24
+ if x is None or (isinstance(x, float) and math.isnan(x)):
25
+ return "undefined"
26
+ return f"{x * 100:+.{digits}f}"
27
+
28
+
29
+ def _format_probability(x: float, digits: int = 1) -> str:
30
+ """Format a probability as a percent string."""
31
+ if x is None or (isinstance(x, float) and math.isnan(x)):
32
+ return "undefined"
33
+ return f"{x * 100:.{digits}f}%"
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class BinaryABResult:
38
+ """Results from a binary A/B test.
39
+
40
+ Lift is always Treatment B - Control A.
41
+ """
42
+
43
+ # Inputs
44
+ control_successes: int
45
+ control_total: int
46
+ treatment_successes: int
47
+ treatment_total: int
48
+ prior_alpha: float
49
+ prior_beta: float
50
+ n_simulations: int
51
+ credible_interval: float
52
+ seed: int | None
53
+
54
+ # Observed and frequentist results
55
+ control_rate: float
56
+ treatment_rate: float
57
+ absolute_lift: float
58
+ relative_lift: float
59
+ z_statistic: float
60
+ p_value: float
61
+
62
+ # Bayesian results
63
+ posterior_mean_lift: float
64
+ posterior_median_lift: float
65
+ prob_treatment_better: float
66
+ prob_control_better: float
67
+ credible_interval_bounds: tuple[float, float]
68
+ expected_loss_treatment: float
69
+ expected_loss_control: float
70
+
71
+ # Posterior samples
72
+ lift_samples: np.ndarray = field(repr=False)
73
+
74
+ def prob_lift_above(self, threshold: float) -> float:
75
+ """Return the posterior probability that lift is above a threshold."""
76
+ return float(np.mean(self.lift_samples > threshold))
77
+
78
+ def prob_no_harm(self, margin: float = 0.0) -> float:
79
+ """Posterior probability that Treatment B does no harm beyond ``margin``.
80
+
81
+ Computes ``P(lift >= -margin | data)``, where lift is
82
+ ``treatment_rate - control_rate``. ``margin`` is in raw decimal units, so
83
+ ``margin=0.005`` means "Treatment B is not worse than Control A by more
84
+ than 0.5 percentage points". With ``margin=0.0`` this is the probability
85
+ that lift is at least zero.
86
+ """
87
+ validation.validate_margin(margin)
88
+ return float(np.mean(self.lift_samples >= -margin))
89
+
90
+ def prob_harm_above(self, margin: float = 0.0) -> float:
91
+ """Posterior probability that Treatment B is harmful beyond ``margin``.
92
+
93
+ Computes ``P(lift < -margin | data)``, the exact complement of
94
+ :meth:`prob_no_harm` for the same ``margin``.
95
+ """
96
+ validation.validate_margin(margin)
97
+ return float(np.mean(self.lift_samples < -margin))
98
+
99
+ def plot_lift_distribution(
100
+ self,
101
+ ax: Any = None,
102
+ *,
103
+ bins: int = 50,
104
+ density: bool = True,
105
+ title: str | None = "Posterior Distribution of Lift",
106
+ ) -> Any:
107
+ """Plot the posterior lift distribution."""
108
+ from . import plotting
109
+
110
+ return plotting.plot_lift_distribution(
111
+ self.lift_samples,
112
+ self.posterior_median_lift,
113
+ self.credible_interval_bounds,
114
+ self.credible_interval,
115
+ ax=ax,
116
+ bins=bins,
117
+ density=density,
118
+ title=title,
119
+ )
120
+
121
+ def plot_probability_bar(
122
+ self,
123
+ ax: Any = None,
124
+ *,
125
+ title: str | None = "Posterior Probability of Being Better",
126
+ ) -> Any:
127
+ """Plot the probability that each group is better."""
128
+ from . import plotting
129
+
130
+ return plotting.plot_probability_bar(
131
+ self.prob_treatment_better,
132
+ self.prob_control_better,
133
+ ax=ax,
134
+ title=title,
135
+ )
136
+
137
+ def to_dict(self) -> dict[str, Any]:
138
+ """Return result fields as a dictionary."""
139
+ return {
140
+ "control_successes": self.control_successes,
141
+ "control_total": self.control_total,
142
+ "treatment_successes": self.treatment_successes,
143
+ "treatment_total": self.treatment_total,
144
+ "prior_alpha": self.prior_alpha,
145
+ "prior_beta": self.prior_beta,
146
+ "n_simulations": self.n_simulations,
147
+ "credible_interval": self.credible_interval,
148
+ "seed": self.seed,
149
+ "control_rate": self.control_rate,
150
+ "treatment_rate": self.treatment_rate,
151
+ "absolute_lift": self.absolute_lift,
152
+ "relative_lift": self.relative_lift,
153
+ "z_statistic": self.z_statistic,
154
+ "p_value": self.p_value,
155
+ "posterior_mean_lift": self.posterior_mean_lift,
156
+ "posterior_median_lift": self.posterior_median_lift,
157
+ "prob_treatment_better": self.prob_treatment_better,
158
+ "prob_control_better": self.prob_control_better,
159
+ "credible_interval_bounds": self.credible_interval_bounds,
160
+ "expected_loss_treatment": self.expected_loss_treatment,
161
+ "expected_loss_control": self.expected_loss_control,
162
+ }
163
+
164
+ def summary(self) -> str:
165
+ """Return a formatted summary."""
166
+ ci_pct = self.credible_interval * 100
167
+ lower, upper = self.credible_interval_bounds
168
+
169
+ # Relative lift is a ratio, not percentage points.
170
+ if math.isnan(self.relative_lift):
171
+ relative_lift_str = "undefined"
172
+ else:
173
+ relative_lift_str = f"{self.relative_lift * 100:+.2f}%"
174
+
175
+ lines = [
176
+ "Binary A/B test result",
177
+ "=" * 40,
178
+ "Observed (lift is always Treatment B - Control A)",
179
+ f" Control (A): {self.control_successes} / {self.control_total} "
180
+ f"= {_format_percent(self.control_rate)}",
181
+ f" Treatment (B): {self.treatment_successes} / {self.treatment_total} "
182
+ f"= {_format_percent(self.treatment_rate)}",
183
+ f" Observed lift (B - A): {_format_pp(self.absolute_lift)} "
184
+ "percentage points",
185
+ f" Relative lift: {relative_lift_str}",
186
+ "",
187
+ "Frequentist (two-sided pooled z-test)",
188
+ f" z statistic: {self.z_statistic:+.4f}",
189
+ f" p-value: {self.p_value:.4f}",
190
+ "",
191
+ f"Bayesian (Beta({self.prior_alpha:g}, {self.prior_beta:g}) prior, "
192
+ f"{self.n_simulations:,} sims)",
193
+ f" Posterior mean lift: {_format_pp(self.posterior_mean_lift)} "
194
+ "percentage points",
195
+ f" Posterior median lift: {_format_pp(self.posterior_median_lift)} "
196
+ "percentage points",
197
+ f" P(Treatment B > Control A): "
198
+ f"{_format_probability(self.prob_treatment_better)}",
199
+ f" P(Control A > Treatment B): "
200
+ f"{_format_probability(self.prob_control_better)}",
201
+ f" {ci_pct:g}% credible interval for lift: "
202
+ f"[{_format_pp(lower)}, {_format_pp(upper)}] percentage points",
203
+ " Expected loss",
204
+ f" Choosing treatment B: "
205
+ f"{self.expected_loss_treatment * 100:.2f} percentage points",
206
+ f" Choosing control A: "
207
+ f"{self.expected_loss_control * 100:.2f} percentage points",
208
+ ]
209
+ return "\n".join(lines)
@@ -0,0 +1,80 @@
1
+ """Input validation helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+
7
+
8
+ def validate_count(name: str, value: int) -> None:
9
+ """Validate a non-negative integer count."""
10
+ if isinstance(value, bool) or not isinstance(value, int):
11
+ raise ValueError(f"{name} must be an integer, got {value!r}.")
12
+ if value < 0:
13
+ raise ValueError(f"{name} must be >= 0, got {value}.")
14
+
15
+
16
+ def validate_total(name: str, value: int) -> None:
17
+ """Validate a positive integer total."""
18
+ if isinstance(value, bool) or not isinstance(value, int):
19
+ raise ValueError(f"{name} must be an integer, got {value!r}.")
20
+ if value <= 0:
21
+ raise ValueError(f"{name} must be > 0, got {value}.")
22
+
23
+
24
+ def validate_successes_le_total(
25
+ successes_name: str,
26
+ successes: int,
27
+ total_name: str,
28
+ total: int,
29
+ ) -> None:
30
+ """Validate that successes do not exceed total."""
31
+ if successes > total:
32
+ raise ValueError(
33
+ f"{successes_name} ({successes}) cannot exceed {total_name} ({total})."
34
+ )
35
+
36
+
37
+ def validate_prior(name: str, value: float) -> None:
38
+ """Validate a positive Beta prior parameter."""
39
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
40
+ raise ValueError(f"{name} must be a number, got {value!r}.")
41
+ if value <= 0:
42
+ raise ValueError(f"{name} must be > 0, got {value}.")
43
+
44
+
45
+ def validate_credible_interval(value: float) -> None:
46
+ """Validate a credible interval level between 0 and 1."""
47
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
48
+ raise ValueError(f"credible_interval must be a number, got {value!r}.")
49
+ if not (0.0 < value < 1.0):
50
+ raise ValueError(
51
+ f"credible_interval must be strictly between 0 and 1, got {value}."
52
+ )
53
+
54
+
55
+ def validate_n_simulations(value: int) -> None:
56
+ """Validate a positive integer number of simulations."""
57
+ if isinstance(value, bool) or not isinstance(value, int):
58
+ raise ValueError(f"n_simulations must be an integer, got {value!r}.")
59
+ if value <= 0:
60
+ raise ValueError(f"n_simulations must be > 0, got {value}.")
61
+
62
+
63
+ def validate_seed(value: int | None) -> None:
64
+ """Validate a random seed."""
65
+ if value is None:
66
+ return
67
+ if isinstance(value, bool) or not isinstance(value, int):
68
+ raise ValueError(f"seed must be None or an integer, got {value!r}.")
69
+ if value < 0:
70
+ raise ValueError(f"seed must be >= 0, got {value}.")
71
+
72
+
73
+ def validate_margin(value: float) -> None:
74
+ """Validate a non-negative, finite do-no-harm margin."""
75
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
76
+ raise ValueError(f"margin must be a number, got {value!r}.")
77
+ if not math.isfinite(value):
78
+ raise ValueError(f"margin must be finite, got {value}.")
79
+ if value < 0:
80
+ raise ValueError(f"margin must be >= 0, got {value}.")
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.4
2
+ Name: abtestwise
3
+ Version: 0.1.0
4
+ Summary: Lightweight toolkit for binary A/B experiment analysis using aggregate counts.
5
+ License: MIT
6
+ Keywords: ab-testing,bayesian,experimentation,frequentist,statistics
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: matplotlib>=3.5
9
+ Requires-Dist: numpy>=1.22
10
+ Requires-Dist: scipy>=1.8
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=7.0; extra == 'dev'
13
+ Description-Content-Type: text/markdown
14
+
15
+ # abtestwise
16
+
17
+ A lightweight Python toolkit for **binary A/B experiment analysis** using
18
+ aggregate count data. Version 0.1 combines frequentist and Bayesian summaries
19
+ for binary proportions.
20
+
21
+ ## Install
22
+
23
+ Install from PyPI:
24
+
25
+ ```bash
26
+ pip install abtestwise
27
+ ```
28
+
29
+ ## Development install
30
+
31
+ To work on the package locally (with the test dependencies):
32
+
33
+ ```bash
34
+ pip install -e ".[dev]"
35
+ ```
36
+
37
+ ## Quickstart
38
+
39
+ ```python
40
+ from abtestwise import BinaryABTest
41
+
42
+ test = BinaryABTest.from_counts(
43
+ control_successes=120,
44
+ control_total=1000,
45
+ treatment_successes=145,
46
+ treatment_total=1000,
47
+ prior_alpha=1,
48
+ prior_beta=1,
49
+ n_simulations=100_000,
50
+ credible_interval=0.95,
51
+ seed=42,
52
+ )
53
+
54
+ result = test.run()
55
+
56
+ print(result.summary())
57
+ print(result.prob_lift_above(0.01))
58
+ ```
59
+
60
+ `prob_lift_above(0.01)` gives the posterior probability that Treatment B improves
61
+ the metric by more than 1 percentage point.
62
+
63
+ ### Do-no-harm checks
64
+
65
+ `prob_no_harm(margin)` gives the posterior probability that Treatment B is **not**
66
+ worse than Control A by more than `margin` (in raw decimal units, so `0.005` means
67
+ 0.5 percentage points). `prob_harm_above(margin)` is its complement.
68
+
69
+ ```python
70
+ result.prob_no_harm(0.005) # P(lift >= -0.005): B is not worse by more than 0.5pp
71
+ result.prob_harm_above(0.005) # P(lift < -0.005): B is worse by more than 0.5pp
72
+ ```
73
+
74
+ Raw result values are also available:
75
+
76
+ ```python
77
+ result.to_dict()
78
+ ```
79
+
80
+ ## Plotting
81
+
82
+ ```python
83
+ import matplotlib.pyplot as plt
84
+
85
+ result.plot_lift_distribution()
86
+ result.plot_probability_bar()
87
+
88
+ plt.show()
89
+ ```
90
+
91
+ The lift distribution plot shows posterior lift in percentage points.
92
+
93
+ The probability bar plot shows:
94
+
95
+ ```text
96
+ P(Treatment B > Control A)
97
+ P(Control A > Treatment B)
98
+ ```
99
+
100
+ ## Groups and sign convention
101
+
102
+ In product A/B testing terms:
103
+
104
+ - **Control (A)** is the baseline group.
105
+ - **Treatment (B)** is the test group or variant B.
106
+ - **Lift is always Treatment B - Control A.**
107
+ - **Positive lift means Treatment B is better than Control A.**
108
+ - **Negative lift means Control A is better than Treatment B.**
109
+
110
+ ## Scope
111
+
112
+ Current package scope:
113
+
114
+ - Binary proportions only.
115
+ - Aggregate counts only.
116
+ - Two groups only.
117
+ - Frequentist: two-sided pooled two-proportion z-test.
118
+ - Bayesian: beta-binomial posterior simulation with default prior `Beta(1, 1)`.
119
+ - Equal-tailed credible interval.
120
+ - Expected loss.
121
+ - Practical lift thresholds.
122
+ - Do-no-harm probabilities using a user-defined harm margin.
123
+ - Simple plots.
124
+
125
+ ## Development
126
+
127
+ Run tests with:
128
+
129
+ ```bash
130
+ python -m pytest -q
131
+ ```
@@ -0,0 +1,10 @@
1
+ abtestwise/__init__.py,sha256=S-NlVV4h2eFs3ClUNaEqYkMrjLr2m8IZ43j379THHXA,388
2
+ abtestwise/bayesian.py,sha256=2Cz9Q9V170MOt275lM6Cy15JRsoG7q4CH87RXClfzUM,2350
3
+ abtestwise/binary.py,sha256=2krX5pqcMP8yVMfnKPSY-FoPvFSPVx0DIV9D1DbcLcY,5132
4
+ abtestwise/frequentist.py,sha256=gSkvwlA9-FZYyEcarRPCyLmdsfpkMsFTNK2-HhpmI7I,1377
5
+ abtestwise/plotting.py,sha256=WVXFkT9oKCmnFPnBW8HtIglUgsNkWgZGEdxqroZVcO8,4243
6
+ abtestwise/result.py,sha256=FHiPYjEBVI4vlBPrxpUBVMSOBALCCfVLoE7CwbBzCpo,7519
7
+ abtestwise/validation.py,sha256=vu5n4TcXjPz2eKfuKVqM-J08MQ-z1nDZZcOj0CEKU4E,2883
8
+ abtestwise-0.1.0.dist-info/METADATA,sha256=X4NoGjL2eKwTWNKwwIoxyvkHplk1Xukz7etUmpdPD5o,2952
9
+ abtestwise-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ abtestwise-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any