abtoolkit 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abtoolkit-1.0.0/LICENSE +7 -0
- abtoolkit-1.0.0/PKG-INFO +113 -0
- abtoolkit-1.0.0/README.md +92 -0
- abtoolkit-1.0.0/abtoolkit/__init__.py +0 -0
- abtoolkit-1.0.0/abtoolkit/continuous/__init__.py +0 -0
- abtoolkit-1.0.0/abtoolkit/continuous/simulation.py +269 -0
- abtoolkit-1.0.0/abtoolkit/continuous/stattests.py +192 -0
- abtoolkit-1.0.0/abtoolkit/continuous/utils.py +87 -0
- abtoolkit-1.0.0/abtoolkit/discrete/__init__.py +0 -0
- abtoolkit-1.0.0/abtoolkit/discrete/utils.py +30 -0
- abtoolkit-1.0.0/abtoolkit.egg-info/PKG-INFO +113 -0
- abtoolkit-1.0.0/abtoolkit.egg-info/SOURCES.txt +34 -0
- abtoolkit-1.0.0/abtoolkit.egg-info/dependency_links.txt +1 -0
- abtoolkit-1.0.0/abtoolkit.egg-info/requires.txt +6 -0
- abtoolkit-1.0.0/abtoolkit.egg-info/top_level.txt +6 -0
- abtoolkit-1.0.0/examples/continuous_var_analysis.py +51 -0
- abtoolkit-1.0.0/pyproject.toml +34 -0
- abtoolkit-1.0.0/setup.cfg +4 -0
- abtoolkit-1.0.0/tests/__init__.py +0 -0
- abtoolkit-1.0.0/tests/test_continuous_simulation.py +63 -0
- abtoolkit-1.0.0/tests/test_continuous_tests.py +87 -0
- abtoolkit-1.0.0/tests/test_continuous_utils.py +18 -0
- abtoolkit-1.0.0/tests/test_discrete_utils.py +16 -0
- abtoolkit-1.0.0/venv/bin/activate_this.py +31 -0
- abtoolkit-1.0.0/venv/bin/rst2html.py +23 -0
- abtoolkit-1.0.0/venv/bin/rst2html4.py +26 -0
- abtoolkit-1.0.0/venv/bin/rst2html5.py +33 -0
- abtoolkit-1.0.0/venv/bin/rst2latex.py +26 -0
- abtoolkit-1.0.0/venv/bin/rst2man.py +27 -0
- abtoolkit-1.0.0/venv/bin/rst2odt.py +28 -0
- abtoolkit-1.0.0/venv/bin/rst2odt_prepstyles.py +20 -0
- abtoolkit-1.0.0/venv/bin/rst2pseudoxml.py +23 -0
- abtoolkit-1.0.0/venv/bin/rst2s5.py +24 -0
- abtoolkit-1.0.0/venv/bin/rst2xetex.py +27 -0
- abtoolkit-1.0.0/venv/bin/rst2xml.py +23 -0
- abtoolkit-1.0.0/venv/bin/rstpep2html.py +25 -0
abtoolkit-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2024 Nikita Altukhov
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
abtoolkit-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: abtoolkit
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Package with tools for AB testing
|
|
5
|
+
Author-email: Nikita Altukhov <altuxov.nikita@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/nikitosl/abtoolkit
|
|
7
|
+
Project-URL: Issues, https://github.com/nikitosl/abtoolkit/issues
|
|
8
|
+
Keywords: ab_test,cuped,did,ttest
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pandas>=2.2.1
|
|
16
|
+
Requires-Dist: numpy>=1.26.4
|
|
17
|
+
Requires-Dist: statsmodels>=0.14.1
|
|
18
|
+
Requires-Dist: scipy>=1.12.0
|
|
19
|
+
Requires-Dist: linearmodels>=5.4
|
|
20
|
+
Requires-Dist: tqdm>=4.66.2
|
|
21
|
+
|
|
22
|
+
# ABToolkit
|
|
23
|
+
Set of tools for AA and AB tests, sample size estimation, confidence intervals estimation.
|
|
24
|
+
For continuous and discrete variables.
|
|
25
|
+
|
|
26
|
+
## Install using pip:
|
|
27
|
+
```pip install abtoolkit```
|
|
28
|
+
|
|
29
|
+
## Continuous variables analysis
|
|
30
|
+
#### Sample size estimation:
|
|
31
|
+
```
|
|
32
|
+
from abtoolkit.continuous.utils import calculate_sample_size_by_mde
|
|
33
|
+
calculate_sample_size_by_mde(
|
|
34
|
+
std=variable.std(),
|
|
35
|
+
alpha=alpha_level,
|
|
36
|
+
power=power,
|
|
37
|
+
mde=mde
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
#### AA and AB tests simulation:
|
|
42
|
+
Using ```abtoolkit.continuous.simulation.StatTestsSimulation``` class you can simulate and check different stat-test,
|
|
43
|
+
compare them in terms of stat test power to choose the best test for your data. As result of simulation for each
|
|
44
|
+
stat test you will get the 1-st Type error estimation with confidence interval, 2-nd Type error estimation with
|
|
45
|
+
confidence interval and plot of p-value distribution for different tests.
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
from abtoolkit.continuous.simulation import StatTestsSimulation
|
|
49
|
+
simulation = StatTestsSimulation(
|
|
50
|
+
control,
|
|
51
|
+
test,
|
|
52
|
+
stattests_list=["ttest", "regression_test", "cuped_ttest", "did_regression_test", "additional_vars_regression_test"],
|
|
53
|
+
experiments_num=experiments_num,
|
|
54
|
+
sample_size=sample_size,
|
|
55
|
+
mde=mde,
|
|
56
|
+
alpha_level=alpha_level,
|
|
57
|
+
|
|
58
|
+
control_previous_values=control_previous_value,
|
|
59
|
+
test_previous_values=test_previous_value,
|
|
60
|
+
control_cuped_covariant=control_previous_value,
|
|
61
|
+
test_cuped_covariant=test_previous_value,
|
|
62
|
+
control_additional_vars=[control_previous_value],
|
|
63
|
+
test_additional_vars=[test_previous_value],
|
|
64
|
+
)
|
|
65
|
+
simulation.run() # Run simulation
|
|
66
|
+
simulation.print_results() # Print results of simulation
|
|
67
|
+
simulation.plot_p_values() # Plot p-values distribution
|
|
68
|
+
```
|
|
69
|
+
Output:
|
|
70
|
+

|
|
71
|
+

|
|
72
|
+
|
|
73
|
+
Full example of usage you can find in ```examples/continuous_var_analysis.py``` script.
|
|
74
|
+
|
|
75
|
+
#### Next stat tests implemented for treatment effect estimation:
|
|
76
|
+
- ***T-Test*** - estimates treatment effect by comparing variables between test and control groups.
|
|
77
|
+
- ***Difference T-Test*** - estimates treatment effect by comparing difference between actual and previous values
|
|
78
|
+
of variables in test and control groups.
|
|
79
|
+
- ***Regression Test*** - estimates treatment effect using linear regression by tested predicting variable.
|
|
80
|
+
Fact of treatment represented in model as binary flag (treated or not). Weight for this flag show significant
|
|
81
|
+
of treatment impact.
|
|
82
|
+
```y = bias + w * treated```
|
|
83
|
+
- ***Regression Difference-in-Difference Test*** - estimates treatment effect using linear regression by predicting
|
|
84
|
+
difference between test and control groups whist represented as difference between current variable value and
|
|
85
|
+
previous period variable value (two differences). Weight for treated and current variable values shows
|
|
86
|
+
significant of treatment. ```y = bias + w0 * treated + w1 * after + w2 * treated * after```
|
|
87
|
+
- ***CUPED*** - estimates treatment effect by comparing variables between test and control groups and uses covariant
|
|
88
|
+
to reduce variance and speedup test. ```y = y - Q * covariant```, where ```Q = cov(y, covariant) / var(covariant)```.
|
|
89
|
+
Cuped variable has same mean value (unbiased), but smaller variance, that speedup test.
|
|
90
|
+
- ***Regression with Additional Variables*** - estimates treatment effect using linear regression by predicting
|
|
91
|
+
tested variable with additional variables, which describe part of main variable variance and speedup test.
|
|
92
|
+
Fact of treatment represented in model as binary flag (treated or not). Weight for this flag show significant
|
|
93
|
+
of treatment impact.
|
|
94
|
+
```y = bias + w0 * treated + w1 * additional_variable1 + w2 * additional_variable2 + ...```
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
## Discrete variables analysis
|
|
98
|
+
#### Sample size estimation:
|
|
99
|
+
```
|
|
100
|
+
from abtoolkit.discrete.utils import estimate_ci_binomial
|
|
101
|
+
estimate_ci_binomial(
|
|
102
|
+
p,
|
|
103
|
+
sample_size,
|
|
104
|
+
alpha=0.05
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
#### AA and AB tests simulation:
|
|
108
|
+
To Be Done
|
|
109
|
+
#### Next stat tests implemented for treatment effect estimation:
|
|
110
|
+
To Be Done
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
You can find examples of toolkit usage in [examples/](https://github.com/nikitosl/abtoolkit/tree/master/examples) directory.
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# ABToolkit
|
|
2
|
+
Set of tools for AA and AB tests, sample size estimation, confidence intervals estimation.
|
|
3
|
+
For continuous and discrete variables.
|
|
4
|
+
|
|
5
|
+
## Install using pip:
|
|
6
|
+
```pip install abtoolkit```
|
|
7
|
+
|
|
8
|
+
## Continuous variables analysis
|
|
9
|
+
#### Sample size estimation:
|
|
10
|
+
```
|
|
11
|
+
from abtoolkit.continuous.utils import calculate_sample_size_by_mde
|
|
12
|
+
calculate_sample_size_by_mde(
|
|
13
|
+
std=variable.std(),
|
|
14
|
+
alpha=alpha_level,
|
|
15
|
+
power=power,
|
|
16
|
+
mde=mde
|
|
17
|
+
)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
#### AA and AB tests simulation:
|
|
21
|
+
Using ```abtoolkit.continuous.simulation.StatTestsSimulation``` class you can simulate and check different stat-test,
|
|
22
|
+
compare them in terms of stat test power to choose the best test for your data. As result of simulation for each
|
|
23
|
+
stat test you will get the 1-st Type error estimation with confidence interval, 2-nd Type error estimation with
|
|
24
|
+
confidence interval and plot of p-value distribution for different tests.
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
from abtoolkit.continuous.simulation import StatTestsSimulation
|
|
28
|
+
simulation = StatTestsSimulation(
|
|
29
|
+
control,
|
|
30
|
+
test,
|
|
31
|
+
stattests_list=["ttest", "regression_test", "cuped_ttest", "did_regression_test", "additional_vars_regression_test"],
|
|
32
|
+
experiments_num=experiments_num,
|
|
33
|
+
sample_size=sample_size,
|
|
34
|
+
mde=mde,
|
|
35
|
+
alpha_level=alpha_level,
|
|
36
|
+
|
|
37
|
+
control_previous_values=control_previous_value,
|
|
38
|
+
test_previous_values=test_previous_value,
|
|
39
|
+
control_cuped_covariant=control_previous_value,
|
|
40
|
+
test_cuped_covariant=test_previous_value,
|
|
41
|
+
control_additional_vars=[control_previous_value],
|
|
42
|
+
test_additional_vars=[test_previous_value],
|
|
43
|
+
)
|
|
44
|
+
simulation.run() # Run simulation
|
|
45
|
+
simulation.print_results() # Print results of simulation
|
|
46
|
+
simulation.plot_p_values() # Plot p-values distribution
|
|
47
|
+
```
|
|
48
|
+
Output:
|
|
49
|
+

|
|
50
|
+

|
|
51
|
+
|
|
52
|
+
Full example of usage you can find in ```examples/continuous_var_analysis.py``` script.
|
|
53
|
+
|
|
54
|
+
#### Next stat tests implemented for treatment effect estimation:
|
|
55
|
+
- ***T-Test*** - estimates treatment effect by comparing variables between test and control groups.
|
|
56
|
+
- ***Difference T-Test*** - estimates treatment effect by comparing difference between actual and previous values
|
|
57
|
+
of variables in test and control groups.
|
|
58
|
+
- ***Regression Test*** - estimates treatment effect using linear regression by tested predicting variable.
|
|
59
|
+
Fact of treatment represented in model as binary flag (treated or not). Weight for this flag show significant
|
|
60
|
+
of treatment impact.
|
|
61
|
+
```y = bias + w * treated```
|
|
62
|
+
- ***Regression Difference-in-Difference Test*** - estimates treatment effect using linear regression by predicting
|
|
63
|
+
difference between test and control groups whist represented as difference between current variable value and
|
|
64
|
+
previous period variable value (two differences). Weight for treated and current variable values shows
|
|
65
|
+
significant of treatment. ```y = bias + w0 * treated + w1 * after + w2 * treated * after```
|
|
66
|
+
- ***CUPED*** - estimates treatment effect by comparing variables between test and control groups and uses covariant
|
|
67
|
+
to reduce variance and speedup test. ```y = y - Q * covariant```, where ```Q = cov(y, covariant) / var(covariant)```.
|
|
68
|
+
Cuped variable has same mean value (unbiased), but smaller variance, that speedup test.
|
|
69
|
+
- ***Regression with Additional Variables*** - estimates treatment effect using linear regression by predicting
|
|
70
|
+
tested variable with additional variables, which describe part of main variable variance and speedup test.
|
|
71
|
+
Fact of treatment represented in model as binary flag (treated or not). Weight for this flag show significant
|
|
72
|
+
of treatment impact.
|
|
73
|
+
```y = bias + w0 * treated + w1 * additional_variable1 + w2 * additional_variable2 + ...```
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
## Discrete variables analysis
|
|
77
|
+
#### Sample size estimation:
|
|
78
|
+
```
|
|
79
|
+
from abtoolkit.discrete.utils import estimate_ci_binomial
|
|
80
|
+
estimate_ci_binomial(
|
|
81
|
+
p,
|
|
82
|
+
sample_size,
|
|
83
|
+
alpha=0.05
|
|
84
|
+
)
|
|
85
|
+
```
|
|
86
|
+
#### AA and AB tests simulation:
|
|
87
|
+
To Be Done
|
|
88
|
+
#### Next stat tests implemented for treatment effect estimation:
|
|
89
|
+
To Be Done
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
You can find examples of toolkit usage in [examples/](https://github.com/nikitosl/abtoolkit/tree/master/examples) directory.
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from abtoolkit.continuous.stattests import additional_vars_regression_test
|
|
9
|
+
from abtoolkit.continuous.stattests import cuped_ttest
|
|
10
|
+
from abtoolkit.continuous.stattests import did_regression_test
|
|
11
|
+
from abtoolkit.continuous.stattests import regression_test
|
|
12
|
+
from abtoolkit.continuous.stattests import ttest
|
|
13
|
+
from abtoolkit.discrete.utils import estimate_ci_binomial
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class StatTestsSimulation:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
control: pd.Series,
|
|
20
|
+
test: pd.Series,
|
|
21
|
+
stattests_list: List[str],
|
|
22
|
+
sample_size: int,
|
|
23
|
+
experiments_num: int,
|
|
24
|
+
mde: float,
|
|
25
|
+
alpha_level: float = 0.05,
|
|
26
|
+
power: float = 0.8,
|
|
27
|
+
control_previous_values: pd.Series = None,
|
|
28
|
+
test_previous_values: pd.Series = None,
|
|
29
|
+
control_cuped_covariant: pd.Series = None,
|
|
30
|
+
test_cuped_covariant: pd.Series = None,
|
|
31
|
+
control_additional_vars: List[pd.Series] = None,
|
|
32
|
+
test_additional_vars: List[pd.Series] = None,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Simulates AA and AB tests for given stat-tests. Prints result (alpha and power) for each test
|
|
36
|
+
and builds plot for p-value distributions.
|
|
37
|
+
|
|
38
|
+
:param control: control variable
|
|
39
|
+
:param test: test variable
|
|
40
|
+
:param stattests_list: list of stat-tests for estimation
|
|
41
|
+
:param sample_size: number of examples to sample from variables in each iteration
|
|
42
|
+
:param experiments_num: number of experiments to perform for each stat-test
|
|
43
|
+
:param mde: minimal detectable effect, used to perform AB test (add to test variable)
|
|
44
|
+
:param alpha_level: test alpha-level
|
|
45
|
+
:param power: test power
|
|
46
|
+
:param control_previous_values: previous values of control variable used to reduce variance and speedup
|
|
47
|
+
test in difference-in-difference test
|
|
48
|
+
:param test_previous_values: previous values of test variable used to reduce variance and speedup test
|
|
49
|
+
in difference-in-difference test
|
|
50
|
+
:param control_cuped_covariant: covariant for control group variable used to reduce variance and speedup test
|
|
51
|
+
in cuped test
|
|
52
|
+
:param test_cuped_covariant: covariant for test group variable used to reduce variance and speedup test
|
|
53
|
+
in cuped test
|
|
54
|
+
:param control_additional_vars: list of additional variables for control group variable used to
|
|
55
|
+
reduce variance and speedup test in 'regression_with_additional_variables' test
|
|
56
|
+
:param test_additional_vars: list of additional variables for test group variable used to
|
|
57
|
+
reduce variance and speedup test in 'regression_with_additional_variables' test
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
self.control = control
|
|
61
|
+
self.test = test
|
|
62
|
+
|
|
63
|
+
self.stattests_list = stattests_list
|
|
64
|
+
self.experiments_num = experiments_num
|
|
65
|
+
self.sample_size = sample_size
|
|
66
|
+
self.mde = mde
|
|
67
|
+
self.alpha_level = alpha_level
|
|
68
|
+
self.power = power
|
|
69
|
+
|
|
70
|
+
self.stattests_func_map = {
|
|
71
|
+
"ttest": self.simulate_ttest,
|
|
72
|
+
"diff_ttest": self.simulate_difference_ttest,
|
|
73
|
+
"cuped_ttest": self.simulate_cuped,
|
|
74
|
+
"regression_test": self.simulate_reg,
|
|
75
|
+
"did_regression_test": self.simulate_reg_did,
|
|
76
|
+
"additional_vars_regression_test": self.simulate_reg_add,
|
|
77
|
+
}
|
|
78
|
+
self.info = {}
|
|
79
|
+
|
|
80
|
+
# Optional
|
|
81
|
+
self.control_previous_values = control_previous_values
|
|
82
|
+
self.test_previous_values = test_previous_values
|
|
83
|
+
self.control_cuped_covariant = control_cuped_covariant
|
|
84
|
+
self.test_cuped_covariant = test_cuped_covariant
|
|
85
|
+
self.control_additional_vars = control_additional_vars
|
|
86
|
+
self.test_additional_vars = test_additional_vars
|
|
87
|
+
|
|
88
|
+
def plot_p_values(self):
|
|
89
|
+
"""
|
|
90
|
+
Plot p-values distribution for each test
|
|
91
|
+
:return: None
|
|
92
|
+
"""
|
|
93
|
+
if len(self.info) == 0:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
X = np.linspace(0, 1, 1000)
|
|
97
|
+
for test, test_info in self.info.items():
|
|
98
|
+
ab_pvalues = np.array(test_info["ab_pvalues"])
|
|
99
|
+
Y = [np.mean(ab_pvalues < x) for x in X]
|
|
100
|
+
plt.plot(X, Y, label=test)
|
|
101
|
+
|
|
102
|
+
plt.plot([self.alpha_level, self.alpha_level], [0, 1], '--k', alpha=0.8)
|
|
103
|
+
plt.plot([0, 1], [self.power, self.power], '--k', alpha=0.8)
|
|
104
|
+
plt.title('P-Value Distribution for AB Simulation', size=12)
|
|
105
|
+
plt.xlabel('p-value', size=10)
|
|
106
|
+
plt.legend(fontsize=10)
|
|
107
|
+
plt.grid()
|
|
108
|
+
plt.show()
|
|
109
|
+
|
|
110
|
+
def print_results(self):
|
|
111
|
+
"""
|
|
112
|
+
Print simulation results for each test (alpha and power + confidence intervals)
|
|
113
|
+
:return: None
|
|
114
|
+
"""
|
|
115
|
+
for test in self.info:
|
|
116
|
+
a, p = self.info[test]['alpha'], self.info[test]['power']
|
|
117
|
+
aci1, aci2 = round(self.info[test]['alpha_ci'][0], 4), round(self.info[test]['alpha_ci'][1], 4)
|
|
118
|
+
pci1, pci2 = round(self.info[test]['power_ci'][0], 4), round(self.info[test]['power_ci'][1], 4)
|
|
119
|
+
|
|
120
|
+
if (aci1 > self.alpha_level) or (self.power > pci2):
|
|
121
|
+
print('\033[91m' + f"'{test}'; alpha={a} ci[{aci1}; {aci2}], power={p} [{pci1}; {pci2}]" + '\033[0m')
|
|
122
|
+
else:
|
|
123
|
+
print('\033[92m' + f"'{test}'; alpha={a} ci[{aci1}; {aci2}], power={p} [{pci1}; {pci2}]" + '\033[0m')
|
|
124
|
+
|
|
125
|
+
def run(self):
|
|
126
|
+
"""
|
|
127
|
+
Simulate all tests from 'self.stattests_list' by given data and save information to 'info' dictionary
|
|
128
|
+
:return:
|
|
129
|
+
"""
|
|
130
|
+
self.info = {}
|
|
131
|
+
for stattest in self.stattests_list:
|
|
132
|
+
self.simulate_test_by_name(stattest)
|
|
133
|
+
return self.info
|
|
134
|
+
|
|
135
|
+
def simulate_test_by_name(self, test_name: str):
|
|
136
|
+
"""
|
|
137
|
+
Simulate AA and AB test and save results to 'info' dictionary
|
|
138
|
+
:param test_name: name of test for simulation (ttest | cuped_ttest | regression_test | did_regression_test
|
|
139
|
+
| additional_vars_regression_test)
|
|
140
|
+
:return: None
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
assert test_name in self.stattests_func_map, f"Given test_name {test_name} not found"
|
|
144
|
+
stattest_func = self.stattests_func_map[test_name]
|
|
145
|
+
|
|
146
|
+
test_success_no_effect_cnt = 0
|
|
147
|
+
test_pvalues_no_effect = []
|
|
148
|
+
test_success_effect_cnt = 0
|
|
149
|
+
test_pvalues_effect = []
|
|
150
|
+
|
|
151
|
+
for _ in tqdm(range(self.experiments_num), desc=f"Simulation test '{test_name}'"):
|
|
152
|
+
p_value = stattest_func(mde=0)
|
|
153
|
+
test_pvalues_no_effect.append(p_value)
|
|
154
|
+
if p_value < self.alpha_level:
|
|
155
|
+
test_success_no_effect_cnt += 1
|
|
156
|
+
|
|
157
|
+
p_value = stattest_func(mde=self.mde)
|
|
158
|
+
test_pvalues_effect.append(p_value)
|
|
159
|
+
if p_value < self.alpha_level:
|
|
160
|
+
test_success_effect_cnt += 1
|
|
161
|
+
|
|
162
|
+
alpha = test_success_no_effect_cnt / self.experiments_num
|
|
163
|
+
power = test_success_effect_cnt / self.experiments_num
|
|
164
|
+
|
|
165
|
+
alpha_ci = estimate_ci_binomial(alpha, self.experiments_num, alpha=0.05)
|
|
166
|
+
power_ci = estimate_ci_binomial(power, self.experiments_num, alpha=0.05)
|
|
167
|
+
|
|
168
|
+
if test_name in self.info:
|
|
169
|
+
del self.info[test_name]
|
|
170
|
+
self.info[test_name] = {
|
|
171
|
+
"alpha": alpha,
|
|
172
|
+
"alpha_ci": alpha_ci,
|
|
173
|
+
"power": power,
|
|
174
|
+
"power_ci": power_ci,
|
|
175
|
+
"aa_pvalues": test_pvalues_no_effect,
|
|
176
|
+
"ab_pvalues": test_pvalues_effect,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
def simulate_ttest(self, mde: float) -> float:
|
|
180
|
+
"""
|
|
181
|
+
Simulate ttest
|
|
182
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
183
|
+
:return: p_value
|
|
184
|
+
"""
|
|
185
|
+
control_sample = self.control.sample(self.sample_size, replace=True)
|
|
186
|
+
test_sample = self.test.sample(self.sample_size, replace=True)
|
|
187
|
+
test_sample += mde
|
|
188
|
+
|
|
189
|
+
return ttest(control_sample, test_sample)
|
|
190
|
+
|
|
191
|
+
def simulate_difference_ttest(self, mde: float) -> float:
|
|
192
|
+
"""
|
|
193
|
+
Simulate ttest for difference between actual variable value and previous period variable value
|
|
194
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
195
|
+
:return: p_value
|
|
196
|
+
"""
|
|
197
|
+
control_index_sample = self.control.index[np.random.randint(0, len(self.control), size=self.sample_size)]
|
|
198
|
+
test_index_sample = self.test.index[np.random.randint(0, len(self.test), size=self.sample_size)]
|
|
199
|
+
|
|
200
|
+
control_sample = self.control.loc[control_index_sample]
|
|
201
|
+
control_pre_sample = self.control_previous_values.loc[control_index_sample]
|
|
202
|
+
test_sample = self.test.loc[test_index_sample]
|
|
203
|
+
test_pre_sample = self.test_previous_values.loc[test_index_sample]
|
|
204
|
+
test_sample += mde
|
|
205
|
+
|
|
206
|
+
return cuped_ttest(control_sample, control_pre_sample, test_sample, test_pre_sample)
|
|
207
|
+
|
|
208
|
+
def simulate_cuped(self, mde: float) -> float:
|
|
209
|
+
"""
|
|
210
|
+
Simulate CUPED ttest
|
|
211
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
212
|
+
:return: p_value
|
|
213
|
+
"""
|
|
214
|
+
control_index_sample = self.control.index[np.random.randint(0, len(self.control), size=self.sample_size)]
|
|
215
|
+
test_index_sample = self.test.index[np.random.randint(0, len(self.test), size=self.sample_size)]
|
|
216
|
+
|
|
217
|
+
control_sample = self.control.loc[control_index_sample]
|
|
218
|
+
control_covariant_sample = self.control_cuped_covariant.loc[control_index_sample]
|
|
219
|
+
test_sample = self.test.loc[test_index_sample]
|
|
220
|
+
test_covariant_sample = self.test_cuped_covariant.loc[test_index_sample]
|
|
221
|
+
test_sample += mde
|
|
222
|
+
|
|
223
|
+
return cuped_ttest(control_sample, control_covariant_sample, test_sample, test_covariant_sample)
|
|
224
|
+
|
|
225
|
+
def simulate_reg(self, mde: float) -> float:
|
|
226
|
+
"""
|
|
227
|
+
Simulate test using regression
|
|
228
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
229
|
+
:return: p_value
|
|
230
|
+
"""
|
|
231
|
+
control_sample = self.control.sample(self.sample_size, replace=True)
|
|
232
|
+
test_sample = self.test.sample(self.sample_size, replace=True)
|
|
233
|
+
test_sample += mde
|
|
234
|
+
|
|
235
|
+
return regression_test(control_sample, test_sample)
|
|
236
|
+
|
|
237
|
+
def simulate_reg_did(self, mde: float) -> float:
|
|
238
|
+
"""
|
|
239
|
+
Simulate test using regression with difference-in-difference technique
|
|
240
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
241
|
+
:return: p_value
|
|
242
|
+
"""
|
|
243
|
+
control_index_sample = self.control.index[np.random.randint(0, len(self.control), size=self.sample_size)]
|
|
244
|
+
test_index_sample = self.test.index[np.random.randint(0, len(self.test), size=self.sample_size)]
|
|
245
|
+
|
|
246
|
+
control_sample = self.control.loc[control_index_sample]
|
|
247
|
+
control_previous_sample = self.control_previous_values.loc[control_index_sample]
|
|
248
|
+
test_sample = self.test.loc[test_index_sample]
|
|
249
|
+
test_previous_sample = self.test_previous_values.loc[test_index_sample]
|
|
250
|
+
test_sample += mde
|
|
251
|
+
|
|
252
|
+
return did_regression_test(control_sample, control_previous_sample, test_sample, test_previous_sample)
|
|
253
|
+
|
|
254
|
+
def simulate_reg_add(self, mde: float) -> float:
|
|
255
|
+
"""
|
|
256
|
+
Simulate test using regression with additional variables
|
|
257
|
+
:param mde: minimal detectable effect, to sum with test variable
|
|
258
|
+
:return: p_value
|
|
259
|
+
"""
|
|
260
|
+
control_index_sample = self.control.index[np.random.randint(0, len(self.control), size=self.sample_size)]
|
|
261
|
+
test_index_sample = self.test.index[np.random.randint(0, len(self.test), size=self.sample_size)]
|
|
262
|
+
|
|
263
|
+
control_sample = self.control.loc[control_index_sample]
|
|
264
|
+
control_add_samples = [a.loc[control_index_sample] for a in self.control_additional_vars]
|
|
265
|
+
test_sample = self.test.loc[test_index_sample]
|
|
266
|
+
test_add_samples = [a.loc[test_index_sample] for a in self.test_additional_vars]
|
|
267
|
+
test_sample += mde
|
|
268
|
+
|
|
269
|
+
return additional_vars_regression_test(control_sample, control_add_samples, test_sample, test_add_samples)
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import linearmodels as lm
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from scipy.stats import ttest_ind
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def ttest(
|
|
10
|
+
control: pd.Series,
|
|
11
|
+
test: pd.Series,
|
|
12
|
+
) -> float:
|
|
13
|
+
"""
|
|
14
|
+
Simple two-side t-test
|
|
15
|
+
:param control: pd.Series for control sample
|
|
16
|
+
:param test: pd.Series for test sample
|
|
17
|
+
:return: p-value
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
return ttest_ind(control, test, alternative="less").pvalue
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def difference_ttest(
|
|
24
|
+
control: pd.Series,
|
|
25
|
+
control_pre: pd.Series,
|
|
26
|
+
test: pd.Series,
|
|
27
|
+
test_pre: pd.Series,
|
|
28
|
+
) -> float:
|
|
29
|
+
"""
|
|
30
|
+
Estimation treatment effect using ttest and CUPED to increase test's power
|
|
31
|
+
:param control: pd.Series, control sample
|
|
32
|
+
:param control_pre: pd.Series, control previous period value
|
|
33
|
+
:param test: pd.Series, test sample
|
|
34
|
+
:param test_pre: pd.Series, test previous period value
|
|
35
|
+
:return: p-value
|
|
36
|
+
"""
|
|
37
|
+
control = control - control_pre
|
|
38
|
+
test = test - test_pre
|
|
39
|
+
|
|
40
|
+
return ttest(control, test)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def cuped_ttest(
|
|
44
|
+
control: pd.Series,
|
|
45
|
+
control_covariant: pd.Series,
|
|
46
|
+
test: pd.Series,
|
|
47
|
+
test_covariant: pd.Series,
|
|
48
|
+
) -> float:
|
|
49
|
+
"""
|
|
50
|
+
Estimation treatment effect using ttest and CUPED to increase test's power
|
|
51
|
+
:param control: pd.Series, control sample
|
|
52
|
+
:param control_covariant: pd.Series, control sample covariant
|
|
53
|
+
:param test: pd.Series, test sample
|
|
54
|
+
:param test_covariant: pd.Series, test sample covariant
|
|
55
|
+
:return: p-value
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
full_value = pd.concat([
|
|
59
|
+
control.rename("value"),
|
|
60
|
+
test.rename("value"),
|
|
61
|
+
], axis=0)
|
|
62
|
+
|
|
63
|
+
full_covariant = pd.concat([
|
|
64
|
+
control_covariant.rename("covariant"),
|
|
65
|
+
test_covariant.rename("covariant"),
|
|
66
|
+
], axis=0)
|
|
67
|
+
|
|
68
|
+
cov = np.cov(full_covariant, full_value)[0, 1]
|
|
69
|
+
var = full_covariant.var()
|
|
70
|
+
theta = cov / var
|
|
71
|
+
|
|
72
|
+
cuped_test = test - theta * test_covariant
|
|
73
|
+
cuped_control = control - theta * control_covariant
|
|
74
|
+
|
|
75
|
+
return ttest(cuped_control, cuped_test)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def regression_test(
|
|
80
|
+
control: pd.Series,
|
|
81
|
+
test: pd.Series,
|
|
82
|
+
) -> float:
|
|
83
|
+
"""
|
|
84
|
+
Treatment effect estimation using linear regression
|
|
85
|
+
:param control: pd.Series with index [entity, dt], where dt could be int of datetime. Control sample
|
|
86
|
+
:param test: pd.Series with index [entity, dt], where dt could be int of datetime. Test sample
|
|
87
|
+
:return: p-value
|
|
88
|
+
"""
|
|
89
|
+
df = pd.concat([
|
|
90
|
+
control.rename("value").to_frame().assign(treated=0),
|
|
91
|
+
test.rename("value").to_frame().assign(treated=1),
|
|
92
|
+
], axis=0)
|
|
93
|
+
df["bias"] = 1
|
|
94
|
+
|
|
95
|
+
if not isinstance(df.index, pd.MultiIndex):
|
|
96
|
+
df["index1"] = 0
|
|
97
|
+
df["index2"] = 1
|
|
98
|
+
df = df.set_index(["index1", "index2"])
|
|
99
|
+
|
|
100
|
+
mod = lm.PanelOLS.from_formula("value ~ bias + treated", data=df)
|
|
101
|
+
result = mod.fit()
|
|
102
|
+
return result.pvalues["treated"]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def did_regression_test(
|
|
106
|
+
control: pd.Series,
|
|
107
|
+
control_pre: pd.Series,
|
|
108
|
+
test: pd.Series,
|
|
109
|
+
test_pre: pd.Series,
|
|
110
|
+
) -> float:
|
|
111
|
+
"""
|
|
112
|
+
Difference-in-Difference treatment effect estimation using linear regression.
|
|
113
|
+
Calculates difference between current and last values in test and control groups and then
|
|
114
|
+
calculates difference between differences to increase test power
|
|
115
|
+
:param control_pre: pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
116
|
+
Control sample before treatment
|
|
117
|
+
:param control: pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
118
|
+
Control sample after treatment
|
|
119
|
+
:param test_pre: pd.Series with index [entity, dt], where dt could be int of datetime. Test sample before treatment
|
|
120
|
+
:param test: pd.Series with index [entity, dt], where dt could be int of datetime. Test sample after treatment
|
|
121
|
+
:return: p-value
|
|
122
|
+
"""
|
|
123
|
+
df = pd.concat([
|
|
124
|
+
control_pre.rename("value").to_frame().assign(treated=0).assign(after=0),
|
|
125
|
+
control.rename("value").to_frame().assign(treated=0).assign(after=1),
|
|
126
|
+
test_pre.rename("value").to_frame().assign(treated=1).assign(after=0),
|
|
127
|
+
test.rename("value").to_frame().assign(treated=1).assign(after=1),
|
|
128
|
+
], axis=0)
|
|
129
|
+
|
|
130
|
+
df["bias"] = 1
|
|
131
|
+
|
|
132
|
+
if not isinstance(df.index, pd.MultiIndex):
|
|
133
|
+
df["index1"] = 0
|
|
134
|
+
df["index2"] = 1
|
|
135
|
+
df = df.set_index(["index1", "index2"])
|
|
136
|
+
|
|
137
|
+
mod = lm.PanelOLS.from_formula("value ~ bias + + after + treated + treated*after", data=df)
|
|
138
|
+
result = mod.fit()
|
|
139
|
+
return result.pvalues["treated:after"]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def additional_vars_regression_test(
|
|
143
|
+
control: pd.Series,
|
|
144
|
+
control_additional_vars: List[pd.Series],
|
|
145
|
+
test: pd.Series,
|
|
146
|
+
test_additional_vars: List[pd.Series],
|
|
147
|
+
) -> float:
|
|
148
|
+
"""
|
|
149
|
+
Treatment effect estimation using additional variables in linear regression. Additional
|
|
150
|
+
variables should reduce deviation of target variable and increase test power
|
|
151
|
+
:param control: pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
152
|
+
Control sample
|
|
153
|
+
:param control_additional_vars: List of pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
154
|
+
Additional variables which can describe some deviation of tested variable
|
|
155
|
+
:param test: pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
156
|
+
Test sample
|
|
157
|
+
:param test_additional_vars: List of pd.Series with index [entity, dt], where dt could be int of datetime.
|
|
158
|
+
Additional variables which can describe some deviation of tested variable
|
|
159
|
+
:return: p-value
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
assert len(test_additional_vars) > 0, "No additional vars for 'additional_vars_regression_test' test given"
|
|
163
|
+
|
|
164
|
+
additional_vars_names_test = [v.name for v in test_additional_vars]
|
|
165
|
+
additional_vars_names_control = [v.name for v in control_additional_vars]
|
|
166
|
+
assert set(additional_vars_names_test) == set(additional_vars_names_control), \
|
|
167
|
+
(f"Lists of control and test additional vars should the same. "
|
|
168
|
+
f"Got {set(additional_vars_names_test)} vars for test "
|
|
169
|
+
f"and {set(additional_vars_names_control)} vars for control")
|
|
170
|
+
|
|
171
|
+
control_df = pd.concat([control.rename("value").to_frame()] + control_additional_vars, axis=1)
|
|
172
|
+
test_df = pd.concat([test.rename("value").to_frame()] + test_additional_vars, axis=1)
|
|
173
|
+
|
|
174
|
+
control_df.index = test_df.index
|
|
175
|
+
df = pd.concat([
|
|
176
|
+
control_df.assign(treated=0),
|
|
177
|
+
test_df.assign(treated=1),
|
|
178
|
+
], axis=0)
|
|
179
|
+
|
|
180
|
+
df["bias"] = 1
|
|
181
|
+
|
|
182
|
+
if not isinstance(df.index, pd.MultiIndex):
|
|
183
|
+
df["index1"] = 0
|
|
184
|
+
df["index2"] = 1
|
|
185
|
+
df = df.set_index(["index1", "index2"])
|
|
186
|
+
|
|
187
|
+
additional_vars_formula = " + ".join(map(str, additional_vars_names_test))
|
|
188
|
+
|
|
189
|
+
formula = f"value ~ bias + treated + {additional_vars_formula}"
|
|
190
|
+
mod = lm.PanelOLS.from_formula(formula, data=df)
|
|
191
|
+
result = mod.fit()
|
|
192
|
+
return result.pvalues["treated"]
|