ab-stats 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ab_stats-0.1.0/LICENSE +21 -0
- ab_stats-0.1.0/PKG-INFO +135 -0
- ab_stats-0.1.0/README.md +104 -0
- ab_stats-0.1.0/pyproject.toml +51 -0
- ab_stats-0.1.0/setup.cfg +4 -0
- ab_stats-0.1.0/src/ab_stats/stats.py +277 -0
- ab_stats-0.1.0/src/ab_stats.egg-info/PKG-INFO +135 -0
- ab_stats-0.1.0/src/ab_stats.egg-info/SOURCES.txt +10 -0
- ab_stats-0.1.0/src/ab_stats.egg-info/dependency_links.txt +1 -0
- ab_stats-0.1.0/src/ab_stats.egg-info/requires.txt +7 -0
- ab_stats-0.1.0/src/ab_stats.egg-info/top_level.txt +1 -0
- ab_stats-0.1.0/tests/test_stats.py +126 -0
ab_stats-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 noote-taking
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
ab_stats-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ab-stats
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A/B 테스트 통계적 검정에 필요한 결과를 제공하는 라이브러리
|
|
5
|
+
Author: ab-stats contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/noote-taking/ab-stats
|
|
8
|
+
Project-URL: Documentation, https://github.com/noote-taking/ab-stats#readme
|
|
9
|
+
Keywords: ab-test,statistics,hypothesis-testing,confidence-interval,p-value
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.20
|
|
25
|
+
Requires-Dist: pandas>=1.3
|
|
26
|
+
Requires-Dist: scipy>=1.7
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# ab-stats
|
|
33
|
+
|
|
34
|
+
**ab-stats** is a Python library that computes the statistics you need for A/B tests. It runs a **two-sample proportion z-test** for **rate (proportion) differences** and **Welch's t-test** for **mean differences** between control and treatment groups, and returns p-value, confidence intervals, uplift (relative change), and minimum sample size (MSS) in a pandas DataFrame.
|
|
35
|
+
|
|
36
|
+
## Documentation
|
|
37
|
+
|
|
38
|
+
- [README](https://github.com/noote-taking/ab-stats#readme)
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
### Dependencies
|
|
43
|
+
|
|
44
|
+
ab-stats depends on:
|
|
45
|
+
|
|
46
|
+
- NumPy (>= 1.20)
|
|
47
|
+
- Pandas (>= 1.3)
|
|
48
|
+
- SciPy (>= 1.7)
|
|
49
|
+
|
|
50
|
+
Python 3.8 or newer is required.
|
|
51
|
+
|
|
52
|
+
### User installation
|
|
53
|
+
|
|
54
|
+
Install from PyPI with pip:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install ab-stats
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
or with conda:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
conda install -c conda-forge ab-stats
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick start
|
|
67
|
+
|
|
68
|
+
### 1. Proportion (rate) difference — `proportions_ztest`
|
|
69
|
+
|
|
70
|
+
Pass **sample sizes** and **success counts** for control and treatment; the function runs a two-sample proportion z-test and returns uplift, confidence intervals, and minimum sample size. **MSS** is the sample size required for the given α and β under the assumption that the observed effect is true; **it is computed post hoc and should be used as a reference only**.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from ab_stats import proportions_ztest
|
|
74
|
+
|
|
75
|
+
# Control: 101 successes out of 998; Treatment: 122 successes out of 1001
|
|
76
|
+
df = proportions_ztest(
|
|
77
|
+
control_n=998,
|
|
78
|
+
control_success=101,
|
|
79
|
+
treatment_n=1001,
|
|
80
|
+
treatment_success=122,
|
|
81
|
+
alpha=0.05,
|
|
82
|
+
power=0.8,
|
|
83
|
+
)
|
|
84
|
+
print(df)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Output:**
|
|
88
|
+
|
|
89
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic |
|
|
90
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|
|
|
91
|
+
| 122/1001 | 0.1219 | 20.45 | 0.0207 | 0.14162 | [5.12%, 35.78%] | [-0.0069, 0.0483] | 152.3% (657) | 1.47 |
|
|
92
|
+
|
|
93
|
+
### 2. Mean difference — `ttest_ind_welch`
|
|
94
|
+
|
|
95
|
+
Pass **lists of values** for control and treatment; the function computes means, variances, and sample sizes internally and runs Welch's t-test.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from ab_stats import ttest_ind_welch
|
|
99
|
+
|
|
100
|
+
# Example: observation lists for control and treatment
|
|
101
|
+
control = [10.1, 9.8, 11.2, 10.5, 9.9, 10.8, 10.3, 11.0, 9.7, 10.4, 9.8, 10.1] # n=12
|
|
102
|
+
treatment = [12.0, 11.5, 12.8, 11.9, 12.2, 12.5, 11.7, 12.1, 12.3, 11.8] # n=10
|
|
103
|
+
|
|
104
|
+
df = ttest_ind_welch(control, treatment, alpha=0.05, power=0.8)
|
|
105
|
+
print(df)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Output:**
|
|
109
|
+
|
|
110
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic | df |
|
|
111
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|-----|
|
|
112
|
+
| 120/10 | 12.03 | 17.14 | 1.76 | 0.00273 | [8.21%, 26.07%] | [0.65, 2.87] | 45.2% (221) | 3.45 | 18.52 |
|
|
113
|
+
|
|
114
|
+
### 3. Using with Pandas
|
|
115
|
+
|
|
116
|
+
Results are returned as a pandas DataFrame, so you can merge with other columns or filter as usual.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import pandas as pd
|
|
120
|
+
from ab_stats import proportions_ztest, ttest_ind_welch
|
|
121
|
+
|
|
122
|
+
# Proportion test
|
|
123
|
+
result_prop = proportions_ztest(1000, 100, 1000, 120)
|
|
124
|
+
# Use result_prop["p_value"], result_prop["CI_relative"], etc.
|
|
125
|
+
|
|
126
|
+
# Mean test (lists → means, variances, n are computed inside the function)
|
|
127
|
+
control_vals = [1.0, 2.0, 3.0, 4.0, 5.0]
|
|
128
|
+
treatment_vals = [2.0, 3.0, 4.0, 5.0, 6.0]
|
|
129
|
+
result_ttest = ttest_ind_welch(control_vals, treatment_vals)
|
|
130
|
+
# Use result_ttest["metric_value"], result_ttest["df"], etc.
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
ab_stats-0.1.0/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# ab-stats
|
|
2
|
+
|
|
3
|
+
**ab-stats** is a Python library that computes the statistics you need for A/B tests. It runs a **two-sample proportion z-test** for **rate (proportion) differences** and **Welch's t-test** for **mean differences** between control and treatment groups, and returns p-value, confidence intervals, uplift (relative change), and minimum sample size (MSS) in a pandas DataFrame.
|
|
4
|
+
|
|
5
|
+
## Documentation
|
|
6
|
+
|
|
7
|
+
- [README](https://github.com/noote-taking/ab-stats#readme)
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
### Dependencies
|
|
12
|
+
|
|
13
|
+
ab-stats depends on:
|
|
14
|
+
|
|
15
|
+
- NumPy (>= 1.20)
|
|
16
|
+
- Pandas (>= 1.3)
|
|
17
|
+
- SciPy (>= 1.7)
|
|
18
|
+
|
|
19
|
+
Python 3.8 or newer is required.
|
|
20
|
+
|
|
21
|
+
### User installation
|
|
22
|
+
|
|
23
|
+
Install from PyPI with pip:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install ab-stats
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
or with conda:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
conda install -c conda-forge ab-stats
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick start
|
|
36
|
+
|
|
37
|
+
### 1. Proportion (rate) difference — `proportions_ztest`
|
|
38
|
+
|
|
39
|
+
Pass **sample sizes** and **success counts** for control and treatment; the function runs a two-sample proportion z-test and returns uplift, confidence intervals, and minimum sample size. **MSS** is the sample size required for the given α and β under the assumption that the observed effect is true; **it is computed post hoc and should be used as a reference only**.
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from ab_stats import proportions_ztest
|
|
43
|
+
|
|
44
|
+
# Control: 101 successes out of 998; Treatment: 122 successes out of 1001
|
|
45
|
+
df = proportions_ztest(
|
|
46
|
+
control_n=998,
|
|
47
|
+
control_success=101,
|
|
48
|
+
treatment_n=1001,
|
|
49
|
+
treatment_success=122,
|
|
50
|
+
alpha=0.05,
|
|
51
|
+
power=0.8,
|
|
52
|
+
)
|
|
53
|
+
print(df)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Output:**
|
|
57
|
+
|
|
58
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic |
|
|
59
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|
|
|
60
|
+
| 122/1001 | 0.1219 | 20.45 | 0.0207 | 0.14162 | [5.12%, 35.78%] | [-0.0069, 0.0483] | 152.3% (657) | 1.47 |
|
|
61
|
+
|
|
62
|
+
### 2. Mean difference — `ttest_ind_welch`
|
|
63
|
+
|
|
64
|
+
Pass **lists of values** for control and treatment; the function computes means, variances, and sample sizes internally and runs Welch's t-test.
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from ab_stats import ttest_ind_welch
|
|
68
|
+
|
|
69
|
+
# Example: observation lists for control and treatment
|
|
70
|
+
control = [10.1, 9.8, 11.2, 10.5, 9.9, 10.8, 10.3, 11.0, 9.7, 10.4, 9.8, 10.1] # n=12
|
|
71
|
+
treatment = [12.0, 11.5, 12.8, 11.9, 12.2, 12.5, 11.7, 12.1, 12.3, 11.8] # n=10
|
|
72
|
+
|
|
73
|
+
df = ttest_ind_welch(control, treatment, alpha=0.05, power=0.8)
|
|
74
|
+
print(df)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Output:**
|
|
78
|
+
|
|
79
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic | df |
|
|
80
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|-----|
|
|
81
|
+
| 120/10 | 12.03 | 17.14 | 1.76 | 0.00273 | [8.21%, 26.07%] | [0.65, 2.87] | 45.2% (221) | 3.45 | 18.52 |
|
|
82
|
+
|
|
83
|
+
### 3. Using with Pandas
|
|
84
|
+
|
|
85
|
+
Results are returned as a pandas DataFrame, so you can merge with other columns or filter as usual.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import pandas as pd
|
|
89
|
+
from ab_stats import proportions_ztest, ttest_ind_welch
|
|
90
|
+
|
|
91
|
+
# Proportion test
|
|
92
|
+
result_prop = proportions_ztest(1000, 100, 1000, 120)
|
|
93
|
+
# Use result_prop["p_value"], result_prop["CI_relative"], etc.
|
|
94
|
+
|
|
95
|
+
# Mean test (lists → means, variances, n are computed inside the function)
|
|
96
|
+
control_vals = [1.0, 2.0, 3.0, 4.0, 5.0]
|
|
97
|
+
treatment_vals = [2.0, 3.0, 4.0, 5.0, 6.0]
|
|
98
|
+
result_ttest = ttest_ind_welch(control_vals, treatment_vals)
|
|
99
|
+
# Use result_ttest["metric_value"], result_ttest["df"], etc.
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## License
|
|
103
|
+
|
|
104
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ab-stats"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A/B 테스트 통계적 검정에 필요한 결과를 제공하는 라이브러리"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "ab-stats contributors" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["ab-test", "statistics", "hypothesis-testing", "confidence-interval", "p-value"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Scientific/Engineering",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"numpy>=1.20",
|
|
32
|
+
"pandas>=1.3",
|
|
33
|
+
"scipy>=1.7",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.0",
|
|
39
|
+
"pytest-cov>=4.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Repository = "https://github.com/noote-taking/ab-stats"
|
|
44
|
+
Documentation = "https://github.com/noote-taking/ab-stats#readme"
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
where = ["src"]
|
|
48
|
+
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
testpaths = ["tests"]
|
|
51
|
+
pythonpath = ["src"]
|
ab_stats-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A/B 테스트 통계 검정: 비율 차이(z-test), 평균 차이(Welch t-test)
|
|
3
|
+
|
|
4
|
+
리스트/관측수를 받아 함수 내부에서 평균·분산·표본크기를 계산한 뒤 검정합니다
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from scipy import stats
|
|
10
|
+
from scipy.stats import t, norm
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _to_valid_arrays(control_values, treatment_values):
|
|
14
|
+
"""
|
|
15
|
+
리스트를 numpy 배열로 변환하고, NaN을 제거
|
|
16
|
+
"""
|
|
17
|
+
x = np.asarray(control_values, dtype=float)
|
|
18
|
+
y = np.asarray(treatment_values, dtype=float)
|
|
19
|
+
x = x[~np.isnan(x)]
|
|
20
|
+
y = y[~np.isnan(y)]
|
|
21
|
+
return x, y
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def proportions_ztest(
|
|
25
|
+
control_n,
|
|
26
|
+
control_success,
|
|
27
|
+
treatment_n,
|
|
28
|
+
treatment_success,
|
|
29
|
+
alpha=0.05,
|
|
30
|
+
power=0.8,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
두 독립 비율의 차이에 대한 z-검정 (대조군 vs 실험군).
|
|
34
|
+
|
|
35
|
+
대조군/실험군의 관측수와 성공수를 받아, 실험군 비율이 대조군과 다른지 검정하고
|
|
36
|
+
증감률·신뢰구간·최소 샘플 수 등을 반환합니다.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
control_n : int
|
|
41
|
+
대조군 관측수.
|
|
42
|
+
control_success : int
|
|
43
|
+
대조군 성공수.
|
|
44
|
+
treatment_n : int
|
|
45
|
+
실험군 관측수.
|
|
46
|
+
treatment_success : int
|
|
47
|
+
실험군 성공수.
|
|
48
|
+
alpha : float, optional
|
|
49
|
+
유의수준 (default: 0.05).
|
|
50
|
+
power : float, optional
|
|
51
|
+
검정력 1 - beta (default: 0.8).
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
pandas.DataFrame
|
|
56
|
+
한 행에 metric_formula, metric_value, delta_relative, delta_absolute,
|
|
57
|
+
p_value, CI_relative, CI_absolute, MSS, statistic 컬럼.
|
|
58
|
+
"""
|
|
59
|
+
epsilon = 1e-10
|
|
60
|
+
|
|
61
|
+
if control_n <= 0 or treatment_n <= 0:
|
|
62
|
+
raise ValueError("control_n and treatment_n must be positive.")
|
|
63
|
+
if not (0 <= control_success <= control_n and 0 <= treatment_success <= treatment_n):
|
|
64
|
+
raise ValueError("Success counts must be between 0 and respective n.")
|
|
65
|
+
|
|
66
|
+
p1 = control_success / control_n
|
|
67
|
+
p2 = treatment_success / treatment_n
|
|
68
|
+
diff = p2 - p1
|
|
69
|
+
|
|
70
|
+
# 표준오차 및 z-통계량·p-value 직접 계산
|
|
71
|
+
# delta_se: 비율 차이(p2-p1)의 표준오차 (standard error)
|
|
72
|
+
delta_se = np.sqrt(p1 * (1 - p1) / control_n + p2 * (1 - p2) / treatment_n)
|
|
73
|
+
if delta_se <= 0:
|
|
74
|
+
delta_se = epsilon
|
|
75
|
+
stat_z = diff / delta_se # stat_z: z-통계량
|
|
76
|
+
p_value = 2 * (1 - norm.cdf(abs(stat_z))) # 양측검정 p-value
|
|
77
|
+
|
|
78
|
+
# metric_formula: 실험군 성공수/실험군 관측수
|
|
79
|
+
metric_formula = f"{treatment_success}/{treatment_n}"
|
|
80
|
+
metric_value = p2
|
|
81
|
+
delta_absolute = diff
|
|
82
|
+
delta_relative = (diff / p1) * 100 if p1 > epsilon else np.nan
|
|
83
|
+
|
|
84
|
+
# 신뢰구간 (비율 차이: z 사용)
|
|
85
|
+
# z_crit: 신뢰구간 계산을 위한 z-분포의 임계값 (양측검정 상위 α/2 지점)
|
|
86
|
+
z_crit = norm.ppf(1 - alpha / 2)
|
|
87
|
+
delta_ci_lower = diff - z_crit * delta_se # CI: confidence interval (신뢰구간)
|
|
88
|
+
delta_ci_upper = diff + z_crit * delta_se
|
|
89
|
+
CI_absolute = f"[{delta_ci_lower:.4f}, {delta_ci_upper:.4f}]"
|
|
90
|
+
|
|
91
|
+
# 증감률(%) 신뢰구간
|
|
92
|
+
if p1 > epsilon:
|
|
93
|
+
uplift_se = np.sqrt(
|
|
94
|
+
(1 / p1**2) * p2 * (1 - p2) / treatment_n
|
|
95
|
+
+ (p2**2 / p1**4) * p1 * (1 - p1) / control_n
|
|
96
|
+
)
|
|
97
|
+
uplift = (diff / p1) * 100
|
|
98
|
+
uplift_ci_lower = uplift - z_crit * uplift_se
|
|
99
|
+
uplift_ci_upper = uplift + z_crit * uplift_se
|
|
100
|
+
CI_relative = f"[{uplift_ci_lower:.2f}%, {uplift_ci_upper:.2f}%]"
|
|
101
|
+
else:
|
|
102
|
+
CI_relative = "[nan%, nan%]"
|
|
103
|
+
|
|
104
|
+
# MSS: Minimum Sample Size (최소 샘플 수) 대비 현재 비율
|
|
105
|
+
beta = 1 - power # beta: 제2종 오류 확률
|
|
106
|
+
z_alpha = norm.ppf(1 - alpha / 2) # z_alpha: 유의수준 α에 대한 z 임계값
|
|
107
|
+
z_beta = norm.ppf(1 - beta) # z_beta: 검정력(1-β)에 대한 z 값
|
|
108
|
+
k = control_n / treatment_n if treatment_n > 0 else 0 # k: 대조군/실험군 샘플 비율
|
|
109
|
+
|
|
110
|
+
if k <= 0 or not (0 < p1 < 1 and 0 < p2 < 1):
|
|
111
|
+
MSS = "0.0% (∞)"
|
|
112
|
+
else:
|
|
113
|
+
delta_abs = abs(p2 - p1) # delta_abs: 비율 차이의 절댓값
|
|
114
|
+
if delta_abs <= 0:
|
|
115
|
+
MSS = "0.0% (∞)"
|
|
116
|
+
else:
|
|
117
|
+
# n2_min: 검정력을 만족하기 위한 실험군 최소 샘플 수
|
|
118
|
+
n2_min = ((z_alpha + z_beta) ** 2) * (
|
|
119
|
+
(p1 * (1 - p1)) / k + p2 * (1 - p2)
|
|
120
|
+
) / (delta_abs**2)
|
|
121
|
+
if np.isnan(n2_min) or n2_min <= 0:
|
|
122
|
+
MSS = "0.0% (∞)"
|
|
123
|
+
else:
|
|
124
|
+
min_n2 = int(np.ceil(n2_min)) # min_n2: 최소 샘플 수 (정수로 올림)
|
|
125
|
+
ratio_pct = (treatment_n / min_n2) * 100 # ratio_pct: 현재 샘플 수 / 최소 샘플 수 (%)
|
|
126
|
+
MSS = f"{ratio_pct:.1f}% ({min_n2:,})"
|
|
127
|
+
|
|
128
|
+
return pd.DataFrame(
|
|
129
|
+
[
|
|
130
|
+
{
|
|
131
|
+
"metric_formula": metric_formula,
|
|
132
|
+
"metric_value": metric_value,
|
|
133
|
+
"delta_relative": delta_relative,
|
|
134
|
+
"delta_absolute": delta_absolute,
|
|
135
|
+
"p_value": round(p_value, 5),
|
|
136
|
+
"CI_relative": CI_relative,
|
|
137
|
+
"CI_absolute": CI_absolute,
|
|
138
|
+
"MSS": MSS,
|
|
139
|
+
"statistic": round(float(stat_z), 2),
|
|
140
|
+
}
|
|
141
|
+
]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def ttest_ind_welch(
|
|
146
|
+
control_values,
|
|
147
|
+
treatment_values,
|
|
148
|
+
alpha=0.05,
|
|
149
|
+
power=0.8,
|
|
150
|
+
):
|
|
151
|
+
"""
|
|
152
|
+
두 독립 표본의 평균 차이에 대한 Welch t-검정.
|
|
153
|
+
|
|
154
|
+
대조군/실험군 **값의 리스트**를 받아, 함수 내부에서 평균·분산·표본크기를 계산한 뒤
|
|
155
|
+
Welch-Satterthwaite 자유도로 검정합니다.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
control_values : array_like
|
|
160
|
+
대조군 관측값 리스트 (또는 배열).
|
|
161
|
+
treatment_values : array_like
|
|
162
|
+
실험군 관측값 리스트 (또는 배열).
|
|
163
|
+
alpha : float, optional
|
|
164
|
+
유의수준 (default: 0.05).
|
|
165
|
+
power : float, optional
|
|
166
|
+
검정력 1 - beta (default: 0.8).
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
pandas.DataFrame
|
|
171
|
+
한 행에 metric_formula, metric_value, delta_relative, delta_absolute,
|
|
172
|
+
p_value, CI_relative, CI_absolute, MSS, statistic, df 컬럼.
|
|
173
|
+
"""
|
|
174
|
+
x, y = _to_valid_arrays(control_values, treatment_values)
|
|
175
|
+
n1, n2 = len(x), len(y)
|
|
176
|
+
|
|
177
|
+
if n1 <= 1 or n2 <= 1:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
"Each group must have at least 2 observations (for variance). "
|
|
180
|
+
f"Got control n={n1}, treatment n={n2}."
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# 리스트 기준: 관측수 = len, 누적값 = sum (0 포함)
|
|
184
|
+
treatment_sum = float(np.sum(y))
|
|
185
|
+
mu1 = float(np.mean(x))
|
|
186
|
+
mu2 = float(np.mean(y))
|
|
187
|
+
# 표본 분산 (ddof=1)
|
|
188
|
+
s1_sq = float(np.var(x, ddof=1)) if n1 > 1 else 0.0
|
|
189
|
+
s2_sq = float(np.var(y, ddof=1)) if n2 > 1 else 0.0
|
|
190
|
+
|
|
191
|
+
# metric_formula: 실험군 누적값/실험군 관측수 (분자는 정수로 표기, 합계가 소수인 경우 없음)
|
|
192
|
+
metric_formula = f"{int(treatment_sum)}/{n2}"
|
|
193
|
+
metric_value = mu2
|
|
194
|
+
|
|
195
|
+
# Welch t-통계량 및 자유도
|
|
196
|
+
se = np.sqrt(s1_sq / n1 + s2_sq / n2) # se: standard error (표준오차)
|
|
197
|
+
if se <= 0:
|
|
198
|
+
se = 1e-10
|
|
199
|
+
t_stat = (mu2 - mu1) / se # t_stat: t-통계량
|
|
200
|
+
|
|
201
|
+
# Welch-Satterthwaite 자유도 계산: 분자(num_df)와 분모(den_df)
|
|
202
|
+
num_df = (s1_sq / n1 + s2_sq / n2) ** 2 # num_df: numerator of degrees of freedom
|
|
203
|
+
den_df = (s1_sq / n1) ** 2 / (n1 - 1) + (s2_sq / n2) ** 2 / (n2 - 1) # den_df: denominator
|
|
204
|
+
df_welch = num_df / den_df if den_df > 0 else np.nan # df_welch: Welch 자유도
|
|
205
|
+
|
|
206
|
+
# 두 그룹 모두 분산 0(상수)이면 자유도·p-value·CI 무의미 → NaN 반환
|
|
207
|
+
if np.isfinite(df_welch) and df_welch > 0:
|
|
208
|
+
p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df=df_welch))
|
|
209
|
+
# t_crit_for_ci: 신뢰구간(CI) 계산을 위한 t-분포의 임계값 (critical value)
|
|
210
|
+
# 양측검정에서 상위 α/2 지점의 t 값 (예: α=0.05면 97.5% 지점)
|
|
211
|
+
t_crit_for_ci = t.ppf(1 - alpha / 2, df_welch)
|
|
212
|
+
else:
|
|
213
|
+
p_value = np.nan
|
|
214
|
+
t_crit_for_ci = np.nan
|
|
215
|
+
|
|
216
|
+
diff = mu2 - mu1
|
|
217
|
+
delta_absolute = diff
|
|
218
|
+
epsilon = 1e-10
|
|
219
|
+
delta_relative = (diff / mu1) * 100 if abs(mu1) > epsilon else np.nan
|
|
220
|
+
|
|
221
|
+
# 신뢰구간 (평균 차이: t 사용)
|
|
222
|
+
delta_ci_lower = diff - t_crit_for_ci * se
|
|
223
|
+
delta_ci_upper = diff + t_crit_for_ci * se
|
|
224
|
+
CI_absolute = f"[{delta_ci_lower:.4f}, {delta_ci_upper:.4f}]"
|
|
225
|
+
|
|
226
|
+
# 증감률(%) 신뢰구간
|
|
227
|
+
if abs(mu1) > epsilon:
|
|
228
|
+
# uplift_se: 증감률(uplift = (μ2-μ1)/μ1 * 100%)의 표준오차
|
|
229
|
+
uplift_se = np.sqrt(
|
|
230
|
+
(1 / max(mu1, epsilon) ** 2) * s2_sq / n2
|
|
231
|
+
+ (mu2**2 / max(mu1, epsilon) ** 4) * s1_sq / n1
|
|
232
|
+
)
|
|
233
|
+
uplift = (diff / mu1) * 100 # uplift: 증감률 (%)
|
|
234
|
+
uplift_ci_lower = uplift - t_crit_for_ci * uplift_se
|
|
235
|
+
uplift_ci_upper = uplift + t_crit_for_ci * uplift_se
|
|
236
|
+
CI_relative = f"[{uplift_ci_lower:.2f}%, {uplift_ci_upper:.2f}%]"
|
|
237
|
+
else:
|
|
238
|
+
CI_relative = "[nan%, nan%]"
|
|
239
|
+
|
|
240
|
+
# MSS: Minimum Sample Size (최소 샘플 수) - 평균 차이 기준 (Welch 가정, k = n1/n2)
|
|
241
|
+
beta = 1 - power # beta: 제2종 오류 확률
|
|
242
|
+
z_alpha = norm.ppf(1 - alpha / 2) # z_alpha: 유의수준 α에 대한 z 임계값
|
|
243
|
+
z_beta = norm.ppf(1 - beta) # z_beta: 검정력(1-β)에 대한 z 값
|
|
244
|
+
k = n1 / n2 if n2 > 0 else 0 # k: 대조군/실험군 샘플 비율
|
|
245
|
+
|
|
246
|
+
if k <= 0:
|
|
247
|
+
MSS = "0.0% (∞)"
|
|
248
|
+
else:
|
|
249
|
+
delta_abs = abs(mu2 - mu1) # delta_abs: 평균 차이의 절댓값
|
|
250
|
+
if delta_abs <= 0 or np.isnan(delta_abs):
|
|
251
|
+
MSS = "0.0% (∞)"
|
|
252
|
+
else:
|
|
253
|
+
# n2_min: 검정력을 만족하기 위한 실험군 최소 샘플 수
|
|
254
|
+
n2_min = ((z_alpha + z_beta) ** 2) * (s1_sq / k + s2_sq) / (delta_abs**2)
|
|
255
|
+
if np.isnan(n2_min) or n2_min <= 0:
|
|
256
|
+
MSS = "0.0% (∞)"
|
|
257
|
+
else:
|
|
258
|
+
min_n2 = int(np.ceil(n2_min)) # min_n2: 최소 샘플 수 (정수로 올림)
|
|
259
|
+
ratio_pct = (n2 / min_n2) * 100 # ratio_pct: 현재 샘플 수 / 최소 샘플 수 (%)
|
|
260
|
+
MSS = f"{ratio_pct:.1f}% ({min_n2:,})"
|
|
261
|
+
|
|
262
|
+
return pd.DataFrame(
|
|
263
|
+
[
|
|
264
|
+
{
|
|
265
|
+
"metric_formula": metric_formula,
|
|
266
|
+
"metric_value": metric_value,
|
|
267
|
+
"delta_relative": delta_relative,
|
|
268
|
+
"delta_absolute": delta_absolute,
|
|
269
|
+
"p_value": round(p_value, 5),
|
|
270
|
+
"CI_relative": CI_relative,
|
|
271
|
+
"CI_absolute": CI_absolute,
|
|
272
|
+
"MSS": MSS,
|
|
273
|
+
"statistic": round(float(t_stat), 2),
|
|
274
|
+
"df": round(float(df_welch), 2),
|
|
275
|
+
}
|
|
276
|
+
]
|
|
277
|
+
)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ab-stats
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A/B 테스트 통계적 검정에 필요한 결과를 제공하는 라이브러리
|
|
5
|
+
Author: ab-stats contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/noote-taking/ab-stats
|
|
8
|
+
Project-URL: Documentation, https://github.com/noote-taking/ab-stats#readme
|
|
9
|
+
Keywords: ab-test,statistics,hypothesis-testing,confidence-interval,p-value
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.20
|
|
25
|
+
Requires-Dist: pandas>=1.3
|
|
26
|
+
Requires-Dist: scipy>=1.7
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# ab-stats
|
|
33
|
+
|
|
34
|
+
**ab-stats** is a Python library that computes the statistics you need for A/B tests. It runs a **two-sample proportion z-test** for **rate (proportion) differences** and **Welch's t-test** for **mean differences** between control and treatment groups, and returns p-value, confidence intervals, uplift (relative change), and minimum sample size (MSS) in a pandas DataFrame.
|
|
35
|
+
|
|
36
|
+
## Documentation
|
|
37
|
+
|
|
38
|
+
- [README](https://github.com/noote-taking/ab-stats#readme)
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
### Dependencies
|
|
43
|
+
|
|
44
|
+
ab-stats depends on:
|
|
45
|
+
|
|
46
|
+
- NumPy (>= 1.20)
|
|
47
|
+
- Pandas (>= 1.3)
|
|
48
|
+
- SciPy (>= 1.7)
|
|
49
|
+
|
|
50
|
+
Python 3.8 or newer is required.
|
|
51
|
+
|
|
52
|
+
### User installation
|
|
53
|
+
|
|
54
|
+
Install from PyPI with pip:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install ab-stats
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
or with conda:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
conda install -c conda-forge ab-stats
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick start
|
|
67
|
+
|
|
68
|
+
### 1. Proportion (rate) difference — `proportions_ztest`
|
|
69
|
+
|
|
70
|
+
Pass **sample sizes** and **success counts** for control and treatment; the function runs a two-sample proportion z-test and returns uplift, confidence intervals, and minimum sample size. **MSS** is the sample size required for the given α and β under the assumption that the observed effect is true; **it is computed post hoc and should be used as a reference only**.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from ab_stats import proportions_ztest
|
|
74
|
+
|
|
75
|
+
# Control: 101 successes out of 998; Treatment: 122 successes out of 1001
|
|
76
|
+
df = proportions_ztest(
|
|
77
|
+
control_n=998,
|
|
78
|
+
control_success=101,
|
|
79
|
+
treatment_n=1001,
|
|
80
|
+
treatment_success=122,
|
|
81
|
+
alpha=0.05,
|
|
82
|
+
power=0.8,
|
|
83
|
+
)
|
|
84
|
+
print(df)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Output:**
|
|
88
|
+
|
|
89
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic |
|
|
90
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|
|
|
91
|
+
| 122/1001 | 0.1219 | 20.45 | 0.0207 | 0.14162 | [5.12%, 35.78%] | [-0.0069, 0.0483] | 152.3% (657) | 1.47 |
|
|
92
|
+
|
|
93
|
+
### 2. Mean difference — `ttest_ind_welch`
|
|
94
|
+
|
|
95
|
+
Pass **lists of values** for control and treatment; the function computes means, variances, and sample sizes internally and runs Welch's t-test.
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from ab_stats import ttest_ind_welch
|
|
99
|
+
|
|
100
|
+
# Example: observation lists for control and treatment
|
|
101
|
+
control = [10.1, 9.8, 11.2, 10.5, 9.9, 10.8, 10.3, 11.0, 9.7, 10.4, 9.8, 10.1] # n=12
|
|
102
|
+
treatment = [12.0, 11.5, 12.8, 11.9, 12.2, 12.5, 11.7, 12.1, 12.3, 11.8] # n=10
|
|
103
|
+
|
|
104
|
+
df = ttest_ind_welch(control, treatment, alpha=0.05, power=0.8)
|
|
105
|
+
print(df)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Output:**
|
|
109
|
+
|
|
110
|
+
| metric_formula | metric_value | delta_relative | delta_absolute | p_value | CI_relative | CI_absolute | MSS | statistic | df |
|
|
111
|
+
|----------------|--------------|----------------|----------------|---------|-------------|-------------|-----|-----------|-----|
|
|
112
|
+
| 120/10 | 12.03 | 17.14 | 1.76 | 0.00273 | [8.21%, 26.07%] | [0.65, 2.87] | 45.2% (221) | 3.45 | 18.52 |
|
|
113
|
+
|
|
114
|
+
### 3. Using with Pandas
|
|
115
|
+
|
|
116
|
+
Results are returned as a pandas DataFrame, so you can merge with other columns or filter as usual.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import pandas as pd
|
|
120
|
+
from ab_stats import proportions_ztest, ttest_ind_welch
|
|
121
|
+
|
|
122
|
+
# Proportion test
|
|
123
|
+
result_prop = proportions_ztest(1000, 100, 1000, 120)
|
|
124
|
+
# Use result_prop["p_value"], result_prop["CI_relative"], etc.
|
|
125
|
+
|
|
126
|
+
# Mean test (lists → means, variances, n are computed inside the function)
|
|
127
|
+
control_vals = [1.0, 2.0, 3.0, 4.0, 5.0]
|
|
128
|
+
treatment_vals = [2.0, 3.0, 4.0, 5.0, 6.0]
|
|
129
|
+
result_ttest = ttest_ind_welch(control_vals, treatment_vals)
|
|
130
|
+
# Use result_ttest["metric_value"], result_ttest["df"], etc.
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/ab_stats/stats.py
|
|
5
|
+
src/ab_stats.egg-info/PKG-INFO
|
|
6
|
+
src/ab_stats.egg-info/SOURCES.txt
|
|
7
|
+
src/ab_stats.egg-info/dependency_links.txt
|
|
8
|
+
src/ab_stats.egg-info/requires.txt
|
|
9
|
+
src/ab_stats.egg-info/top_level.txt
|
|
10
|
+
tests/test_stats.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ab_stats
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ab_stats.stats 함수 테스트: proportions_ztest, ttest_ind_welch
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pytest
|
|
7
|
+
from scipy import stats as scipy_stats
|
|
8
|
+
|
|
9
|
+
from ab_stats.stats import proportions_ztest, ttest_ind_welch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
# proportions_ztest
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
def test_proportions_ztest_basic():
|
|
17
|
+
"""기본: DataFrame 1행, 필수 컬럼 존재, p_value·statistic 범위"""
|
|
18
|
+
df = proportions_ztest(
|
|
19
|
+
control_n=1000,
|
|
20
|
+
control_success=100,
|
|
21
|
+
treatment_n=1000,
|
|
22
|
+
treatment_success=120,
|
|
23
|
+
alpha=0.05,
|
|
24
|
+
power=0.8,
|
|
25
|
+
)
|
|
26
|
+
assert len(df) == 1
|
|
27
|
+
for col in ["metric_formula", "metric_value", "delta_relative", "delta_absolute",
|
|
28
|
+
"p_value", "CI_relative", "CI_absolute", "MSS", "statistic"]:
|
|
29
|
+
assert col in df.columns
|
|
30
|
+
|
|
31
|
+
assert 0 <= df["p_value"].iloc[0] <= 1
|
|
32
|
+
assert df["metric_formula"].iloc[0] == "120/1000"
|
|
33
|
+
assert abs(df["metric_value"].iloc[0] - 0.12) < 1e-10
|
|
34
|
+
assert abs(df["delta_absolute"].iloc[0] - 0.02) < 1e-10
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_proportions_ztest_same_proportion():
|
|
38
|
+
"""비율이 같으면 p_value가 크고, delta_absolute=0"""
|
|
39
|
+
df = proportions_ztest(
|
|
40
|
+
control_n=500,
|
|
41
|
+
control_success=100,
|
|
42
|
+
treatment_n=500,
|
|
43
|
+
treatment_success=100,
|
|
44
|
+
)
|
|
45
|
+
assert abs(df["delta_absolute"].iloc[0]) < 1e-10
|
|
46
|
+
assert df["p_value"].iloc[0] >= 0.99 # 동일 비율이면 p ≈ 1
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_proportions_ztest_invalid_n():
|
|
50
|
+
"""관측수 0 이하면 ValueError"""
|
|
51
|
+
with pytest.raises(ValueError, match="must be positive"):
|
|
52
|
+
proportions_ztest(0, 0, 100, 50)
|
|
53
|
+
with pytest.raises(ValueError, match="must be positive"):
|
|
54
|
+
proportions_ztest(100, 50, 0, 0)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_proportions_ztest_invalid_success():
|
|
58
|
+
"""성공수 범위 벗어나면 ValueError"""
|
|
59
|
+
with pytest.raises(ValueError, match="Success counts"):
|
|
60
|
+
proportions_ztest(100, 101, 100, 50)
|
|
61
|
+
with pytest.raises(ValueError, match="Success counts"):
|
|
62
|
+
proportions_ztest(100, -1, 100, 50)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# ttest_ind_welch
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
def test_ttest_ind_welch_basic():
|
|
70
|
+
"""기본: DataFrame 1행, 필수 컬럼, Welch t와 scipy 결과와 일치 (100개 이상 샘플)"""
|
|
71
|
+
np.random.seed(42)
|
|
72
|
+
n = 150 # 100개 이상 샘플로 실제에 가깝게
|
|
73
|
+
control = np.random.normal(10, 2, n)
|
|
74
|
+
treatment = np.random.normal(12, 2, n)
|
|
75
|
+
|
|
76
|
+
df = ttest_ind_welch(control.tolist(), treatment.tolist(), alpha=0.05, power=0.8)
|
|
77
|
+
|
|
78
|
+
assert len(df) == 1
|
|
79
|
+
for col in ["metric_formula", "metric_value", "delta_relative", "delta_absolute",
|
|
80
|
+
"p_value", "CI_relative", "CI_absolute", "MSS", "statistic", "df"]:
|
|
81
|
+
assert col in df.columns
|
|
82
|
+
|
|
83
|
+
# scipy Welch t-test와 통계량·p-value 비교 (동일 데이터)
|
|
84
|
+
t_scipy, p_scipy = scipy_stats.ttest_ind(treatment, control, equal_var=False)
|
|
85
|
+
# statistic은 소수 둘째 자리로 반올림되어 출력되므로 허용 오차 0.01
|
|
86
|
+
assert abs(df["statistic"].iloc[0] - t_scipy) < 0.01
|
|
87
|
+
assert abs(df["p_value"].iloc[0] - p_scipy) < 1e-5
|
|
88
|
+
|
|
89
|
+
assert 0 <= df["p_value"].iloc[0] <= 1
|
|
90
|
+
assert df["metric_value"].iloc[0] == pytest.approx(np.mean(treatment), rel=1e-10)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_ttest_ind_welch_metric_formula():
|
|
94
|
+
"""metric_formula = 실험군 누적값/실험군 관측수"""
|
|
95
|
+
control = [1.0, 2.0, 3.0]
|
|
96
|
+
treatment = [0, 0, 0, 200, 500] # sum=700, n=5
|
|
97
|
+
df = ttest_ind_welch(control, treatment)
|
|
98
|
+
assert df["metric_formula"].iloc[0] == "700/5"
|
|
99
|
+
assert df["metric_value"].iloc[0] == pytest.approx(700 / 5, rel=1e-10)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_ttest_ind_welch_too_few_observations():
|
|
103
|
+
"""그룹당 2미만이면 ValueError"""
|
|
104
|
+
with pytest.raises(ValueError, match="at least 2 observations"):
|
|
105
|
+
ttest_ind_welch([1.0], [1.0, 2.0])
|
|
106
|
+
with pytest.raises(ValueError, match="at least 2 observations"):
|
|
107
|
+
ttest_ind_welch([1.0, 2.0], [3.0])
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_ttest_ind_welch_constant_groups():
|
|
111
|
+
"""두 그룹 모두 상수(분산 0)면 p_value가 NaN, CI_absolute에 nan 포함"""
|
|
112
|
+
control = [5.0, 5.0, 5.0]
|
|
113
|
+
treatment = [7.0, 7.0, 7.0]
|
|
114
|
+
df = ttest_ind_welch(control, treatment)
|
|
115
|
+
assert np.isnan(df["p_value"].iloc[0])
|
|
116
|
+
assert "nan" in df["CI_absolute"].iloc[0].lower()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_ttest_ind_welch_nan_removal():
|
|
120
|
+
"""NaN이 있으면 제거 후 계산 (listwise deletion)"""
|
|
121
|
+
control = [1.0, 2.0, 3.0, np.nan]
|
|
122
|
+
treatment = [2.0, 3.0, 4.0]
|
|
123
|
+
# control은 3개만 사용되어야 함
|
|
124
|
+
df = ttest_ind_welch(control, treatment)
|
|
125
|
+
assert len(df) == 1
|
|
126
|
+
assert np.isfinite(df["p_value"].iloc[0]) or df["p_value"].iloc[0] is not None
|