abanalyzer 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +11 -0
- data/.travis.yml +3 -5
- data/Gemfile +1 -2
- data/README.rdoc +1 -1
- data/Rakefile +9 -6
- data/abanalyzer.gemspec +15 -14
- data/lib/abanalyzer/abtest.rb +17 -22
- data/lib/abanalyzer/matrix.rb +15 -17
- data/lib/abanalyzer/sample.rb +5 -7
- data/lib/abanalyzer/version.rb +1 -1
- data/test/abtest_test.rb +14 -13
- data/test/matrix_test.rb +6 -5
- metadata +36 -22
- data/test/helper.rb +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9cbf5e6bc121a608cab28ea506580a3cbae02998
|
4
|
+
data.tar.gz: f897ff9d1f557e831d779d7ee6f87937dc414ec3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f78dfa9350b44b072a34c4d6d230dd415d3b84eb53a9e0d5b1f1e7bf7f5bf146858a2e03ecabe528a26bb52f4f7a80c0458c82948f830454cddc93c050069ef7
|
7
|
+
data.tar.gz: 5d54f21901a039d64e8aec4d1c3c32b56e8edd2b29e7a7fbeb54c258ca3b6f8ed5afa1546a8a7a8338f99acc182ad2176deb806ce5d5a23855304013ddb591d8
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.rdoc
CHANGED
@@ -55,7 +55,7 @@ You can additionally get the actual score for either a Chi-Square test for indep
|
|
55
55
|
|
56
56
|
|
57
57
|
== Sample Size Calculations
|
58
|
-
Let's say you want to determine how large your sample size needs to be for an A/B test. Let's say your baseline is 10%, and you want to be able to determine if there's at least a 10% relative lift (1% absolute) to 11%. Let's assume you want a power[http://en.wikipedia.org/wiki/Statistical_power] of 0.8 and a {significance level}[http://en.wikipedia.org/wiki/Statistical_significance] of 0.05 (that is, an 80% chance of that you'll
|
58
|
+
Let's say you want to determine how large your sample size needs to be for an A/B test. Let's say your baseline is 10%, and you want to be able to determine if there's at least a 10% relative lift (1% absolute) to 11%. Let's assume you want a power[http://en.wikipedia.org/wiki/Statistical_power] of 0.8 and a {significance level}[http://en.wikipedia.org/wiki/Statistical_significance] of 0.05 (that is, an 80% chance of that you'll succeed in recognizing a difference when there is one, and a 5% chance of a false negative).
|
59
59
|
|
60
60
|
...
|
61
61
|
ABAnalyzer.calculate_size(0.1, 0.11, 0.05, 0.8)
|
data/Rakefile
CHANGED
@@ -1,18 +1,21 @@
|
|
1
|
+
require 'rake/testtask'
|
1
2
|
require 'bundler/gem_tasks'
|
2
3
|
require 'rdoc/task'
|
4
|
+
require 'rubocop/rake_task'
|
5
|
+
|
6
|
+
RuboCop::RakeTask.new
|
3
7
|
|
4
|
-
RDoc::Task.new(
|
5
|
-
rdoc.title =
|
8
|
+
RDoc::Task.new('doc') do |rdoc|
|
9
|
+
rdoc.title = 'ABAnalyzer - A/B test analysis library for Ruby'
|
6
10
|
rdoc.rdoc_dir = 'docs'
|
7
11
|
rdoc.rdoc_files.include('README.rdoc')
|
8
12
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
9
|
-
|
10
|
-
|
11
|
-
require 'rake/testtask'
|
13
|
+
end
|
12
14
|
|
13
15
|
Rake::TestTask.new do |t|
|
16
|
+
t.libs << 'test'
|
14
17
|
t.test_files = FileList['test/*_test.rb']
|
15
18
|
t.verbose = true
|
16
19
|
end
|
17
20
|
|
18
|
-
task :
|
21
|
+
task default: %i[rubocop test]
|
data/abanalyzer.gemspec
CHANGED
@@ -1,20 +1,21 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
|
+
require 'abanalyzer/version'
|
3
|
+
require 'date'
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
|
-
s.name =
|
6
|
+
s.name = 'abanalyzer'
|
7
7
|
s.version = ABAnalyzer::VERSION
|
8
|
-
s.authors = [
|
8
|
+
s.authors = ['Brian Muller']
|
9
|
+
s.license = 'GPL-3.0'
|
9
10
|
s.date = Date.today.to_s
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.files = `git ls-files`.split($/)
|
11
|
+
s.summary = 'A/B test analysis library for Ruby'
|
12
|
+
s.email = 'bamuller@gmail.com'
|
13
|
+
s.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
14
14
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
15
|
-
s.homepage =
|
16
|
-
s.require_paths = [
|
17
|
-
s.add_development_dependency(
|
18
|
-
s.add_development_dependency(
|
19
|
-
s.
|
15
|
+
s.homepage = 'https://github.com/bmuller/abanalyzer'
|
16
|
+
s.require_paths = ['lib']
|
17
|
+
s.add_development_dependency('rake', '~> 12.1')
|
18
|
+
s.add_development_dependency('minitest', '~> 5.10')
|
19
|
+
s.add_development_dependency('rubocop', '~> 0.50')
|
20
|
+
s.add_dependency('statistics2', '= 0.54')
|
20
21
|
end
|
data/lib/abanalyzer/abtest.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'statistics2'
|
2
2
|
|
3
3
|
module ABAnalyzer
|
4
|
-
|
5
4
|
class ABTest
|
6
5
|
# values should be hash of hashes, with top level hash the group names:
|
7
6
|
# { :groupa => { :yes => 20, :no => 10 }, :groupb => { :yes => 18, :no => 8 } }
|
@@ -9,42 +8,43 @@ module ABAnalyzer
|
|
9
8
|
@values = Matrix.new values
|
10
9
|
end
|
11
10
|
|
12
|
-
def different?(sig=0.05)
|
11
|
+
def different?(sig = 0.05)
|
13
12
|
gtest_p < sig
|
14
13
|
end
|
15
|
-
|
14
|
+
|
16
15
|
def chisquare_score
|
17
|
-
sum=0
|
18
|
-
@values.each_cell
|
16
|
+
sum = 0
|
17
|
+
@values.each_cell do |colname, rowname, value|
|
19
18
|
ex = expected(colname, rowname)
|
20
19
|
test_sufficient_data(colname, rowname, ex, value)
|
21
|
-
sum += ((value - ex)
|
22
|
-
|
23
|
-
|
20
|
+
sum += ((value - ex)**2) / ex
|
21
|
+
end
|
22
|
+
sum
|
24
23
|
end
|
25
|
-
|
24
|
+
|
26
25
|
def gtest_score
|
27
|
-
sum=0
|
28
|
-
@values.each_cell
|
26
|
+
sum = 0
|
27
|
+
@values.each_cell do |colname, rowname, value|
|
29
28
|
ex = expected(colname, rowname)
|
30
29
|
test_sufficient_data(colname, rowname, ex, value)
|
31
30
|
sum += value * Math.log(value / ex)
|
32
|
-
|
33
|
-
|
31
|
+
end
|
32
|
+
sum
|
34
33
|
end
|
35
34
|
|
36
35
|
def chisquare_p
|
37
|
-
|
36
|
+
1 - Statistics2.chi2dist(df, chisquare_score)
|
38
37
|
end
|
39
38
|
|
40
39
|
def gtest_p
|
41
|
-
|
40
|
+
1 - Statistics2.chi2dist(df, 2 * gtest_score)
|
42
41
|
end
|
43
|
-
|
42
|
+
|
44
43
|
private
|
44
|
+
|
45
45
|
def test_sufficient_data(colname, rowname, expected, value)
|
46
46
|
msg = "Insufficient data size for column #{colname} row #{rowname}. Expected value must be >= 5, and value must be > 0."
|
47
|
-
raise InsufficientDataError, msg if expected < 5
|
47
|
+
raise InsufficientDataError, msg if (expected < 5) || (value <= 0)
|
48
48
|
end
|
49
49
|
|
50
50
|
def expected(colname, rowname)
|
@@ -54,10 +54,5 @@ module ABAnalyzer
|
|
54
54
|
def df
|
55
55
|
(@values.columns.length - 1) * (@values.rows.length - 1)
|
56
56
|
end
|
57
|
-
|
58
|
-
def self.chi2dist(degrees, score)
|
59
|
-
1 - Statistics2.chi2dist(degrees, score)
|
60
|
-
end
|
61
57
|
end
|
62
|
-
|
63
58
|
end
|
data/lib/abanalyzer/matrix.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module ABAnalyzer
|
2
|
-
|
3
2
|
class Matrix
|
4
3
|
attr_reader :columns, :rows
|
5
4
|
def initialize(values)
|
@@ -10,14 +9,14 @@ module ABAnalyzer
|
|
10
9
|
end
|
11
10
|
|
12
11
|
def validate
|
13
|
-
@values.each
|
14
|
-
if column.keys.map
|
12
|
+
@values.each do |colname, column|
|
13
|
+
if column.keys.map(&:to_s).sort != @rows.map(&:to_s).sort
|
15
14
|
raise MatrixFormatError, "Column #{colname} has row names that don't match the first column's."
|
16
15
|
end
|
17
|
-
|
18
|
-
coltotal = @columns.map { |col| column_sum(col) }.inject { |a,b| a+b }
|
19
|
-
rowtotal = @rows.map { |col| row_sum(col) }.inject { |a,b| a+b }
|
20
|
-
raise MatrixFormatError,
|
16
|
+
end
|
17
|
+
coltotal = @columns.map { |col| column_sum(col) }.inject { |a, b| a + b }
|
18
|
+
rowtotal = @rows.map { |col| row_sum(col) }.inject { |a, b| a + b }
|
19
|
+
raise MatrixFormatError, 'Column sums do not equal row sums' if coltotal != rowtotal
|
21
20
|
end
|
22
21
|
|
23
22
|
def get_column(name)
|
@@ -25,17 +24,17 @@ module ABAnalyzer
|
|
25
24
|
end
|
26
25
|
|
27
26
|
def get_row(name)
|
28
|
-
@values.map
|
27
|
+
@values.map do |_colname, rows|
|
29
28
|
rows[name]
|
30
|
-
|
29
|
+
end
|
31
30
|
end
|
32
31
|
|
33
32
|
def each_cell
|
34
|
-
@columns.each
|
35
|
-
@rows.each
|
33
|
+
@columns.each do |colname|
|
34
|
+
@rows.each do |rowname|
|
36
35
|
yield colname, rowname, get(colname, rowname)
|
37
|
-
|
38
|
-
|
36
|
+
end
|
37
|
+
end
|
39
38
|
end
|
40
39
|
|
41
40
|
def get(colname, rowname)
|
@@ -43,16 +42,15 @@ module ABAnalyzer
|
|
43
42
|
end
|
44
43
|
|
45
44
|
def column_sum(name)
|
46
|
-
get_column(name).inject { |a,b| a+b }
|
45
|
+
get_column(name).inject { |a, b| a + b }
|
47
46
|
end
|
48
47
|
|
49
48
|
def row_sum(name)
|
50
|
-
get_row(name).inject { |a,b| a+b }
|
49
|
+
get_row(name).inject { |a, b| a + b }
|
51
50
|
end
|
52
51
|
|
53
52
|
def total_sum
|
54
|
-
@columns.map { |col| column_sum(col) }.inject { |a,b| a+b }
|
53
|
+
@columns.map { |col| column_sum(col) }.inject { |a, b| a + b }
|
55
54
|
end
|
56
55
|
end
|
57
|
-
|
58
56
|
end
|
data/lib/abanalyzer/sample.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
require 'statistics2'
|
2
2
|
|
3
3
|
module ABAnalyzer
|
4
|
-
|
5
4
|
# Calculate the minimum sample size (per group) based on the desire to detect
|
6
5
|
# a increase from proportion p1 to proportion p2. Significance is generally
|
7
6
|
# safe at 0.05 (why? just because) and a power of 0.8 (why? just because)
|
8
7
|
def self.calculate_size(p1, p2, significance, power)
|
9
|
-
[
|
10
|
-
raise
|
11
|
-
|
8
|
+
[p1, p2, significance, power].each do |a|
|
9
|
+
raise 'All arguments to calculate_size must be Floats' unless a.is_a?(Float)
|
10
|
+
end
|
12
11
|
|
13
12
|
pbar = (p1 + p2) / 2.0
|
14
13
|
sides = 2.0
|
@@ -16,8 +15,8 @@ module ABAnalyzer
|
|
16
15
|
zcrit = Statistics2.pnormaldist(1 - (significance / sides))
|
17
16
|
zpow = Statistics2.pnormaldist(power)
|
18
17
|
|
19
|
-
numerator = (zcrit * Math.sqrt(2 * pbar * (1 - pbar)) + zpow * Math.sqrt(p2 * (1 - p2) + p1 * (1 - p1)))
|
20
|
-
denominator = (p2 - p1)
|
18
|
+
numerator = (zcrit * Math.sqrt(2 * pbar * (1 - pbar)) + zpow * Math.sqrt(p2 * (1 - p2) + p1 * (1 - p1)))**2
|
19
|
+
denominator = (p2 - p1)**2
|
21
20
|
(numerator / denominator).ceil
|
22
21
|
end
|
23
22
|
|
@@ -39,5 +38,4 @@ module ABAnalyzer
|
|
39
38
|
ci = confidence_interval(successes, trials, confidence)
|
40
39
|
[(ci.first - compared_proportion) / compared_proportion, (ci.last - compared_proportion) / compared_proportion]
|
41
40
|
end
|
42
|
-
|
43
41
|
end
|
data/lib/abanalyzer/version.rb
CHANGED
data/test/abtest_test.rb
CHANGED
@@ -1,24 +1,25 @@
|
|
1
|
-
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'abanalyzer'
|
2
3
|
|
3
|
-
class ABTestTest < Test
|
4
|
+
class ABTestTest < MiniTest::Test
|
4
5
|
def setup
|
5
|
-
@values = { :
|
6
|
+
@values = { rep: { male: 200, female: 250 }, dem: { male: 150, female: 300 }, ind: { male: 50, female: 50 } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def test_test_creation
|
9
|
-
|
10
|
-
m = ABAnalyzer::ABTest.new(
|
11
|
-
|
10
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
11
|
+
m = ABAnalyzer::ABTest.new(one: { a: 10, b: 20 }, two: { a: 5, b: 0 })
|
12
|
+
m.gtest_p
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
m = ABAnalyzer::ABTest.new(
|
16
|
-
|
15
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
16
|
+
m = ABAnalyzer::ABTest.new(one: { a: 10, b: 20 }, two: { a: 5, b: -6 })
|
17
|
+
m.gtest_p
|
17
18
|
end
|
18
19
|
|
19
|
-
|
20
|
-
m = ABAnalyzer::ABTest.new(
|
21
|
-
|
20
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
21
|
+
m = ABAnalyzer::ABTest.new(one: { a: 1, b: 1 }, two: { a: 1, b: 1 })
|
22
|
+
m.gtest_p
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
@@ -28,7 +29,7 @@ class ABTestTest < Test::Unit::TestCase
|
|
28
29
|
chisquare = 1 - Statistics2.chi2dist(2, 16.2037037037037)
|
29
30
|
assert_equal abt.chisquare_p, chisquare
|
30
31
|
|
31
|
-
gtest = 1 - Statistics2.chi2dist(2, 2*8.13286375180066)
|
32
|
+
gtest = 1 - Statistics2.chi2dist(2, 2 * 8.13286375180066)
|
32
33
|
assert_equal abt.gtest_p, gtest
|
33
34
|
end
|
34
35
|
end
|
data/test/matrix_test.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
|
-
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'abanalyzer'
|
2
3
|
|
3
|
-
class
|
4
|
+
class TestMatrix < MiniTest::Test
|
4
5
|
def setup
|
5
|
-
@values = { :
|
6
|
+
@values = { rep: { male: 200, female: 250 }, dem: { male: 150, female: 300 }, ind: { male: 50, female: 50 } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def test_matrix_creation
|
9
|
-
|
10
|
-
ABAnalyzer::Matrix.new(
|
10
|
+
assert_raises ABAnalyzer::MatrixFormatError do
|
11
|
+
ABAnalyzer::Matrix.new(one: { a: 10, b: 20 }, two: { a: 5 })
|
11
12
|
end
|
12
13
|
end
|
13
14
|
|
metadata
CHANGED
@@ -1,65 +1,80 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abanalyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Muller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '12.1'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '12.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.10'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '5.10'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubocop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.50'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.50'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: statistics2
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - '
|
59
|
+
- - '='
|
46
60
|
- !ruby/object:Gem::Version
|
47
61
|
version: '0.54'
|
48
62
|
type: :runtime
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
|
-
- - '
|
66
|
+
- - '='
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '0.54'
|
55
|
-
description:
|
69
|
+
description:
|
56
70
|
email: bamuller@gmail.com
|
57
71
|
executables: []
|
58
72
|
extensions: []
|
59
73
|
extra_rdoc_files: []
|
60
74
|
files:
|
61
|
-
- .gitignore
|
62
|
-
- .
|
75
|
+
- ".gitignore"
|
76
|
+
- ".rubocop.yml"
|
77
|
+
- ".travis.yml"
|
63
78
|
- Gemfile
|
64
79
|
- LICENSE
|
65
80
|
- README.rdoc
|
@@ -72,10 +87,10 @@ files:
|
|
72
87
|
- lib/abanalyzer/sample.rb
|
73
88
|
- lib/abanalyzer/version.rb
|
74
89
|
- test/abtest_test.rb
|
75
|
-
- test/helper.rb
|
76
90
|
- test/matrix_test.rb
|
77
91
|
homepage: https://github.com/bmuller/abanalyzer
|
78
|
-
licenses:
|
92
|
+
licenses:
|
93
|
+
- GPL-3.0
|
79
94
|
metadata: {}
|
80
95
|
post_install_message:
|
81
96
|
rdoc_options: []
|
@@ -83,21 +98,20 @@ require_paths:
|
|
83
98
|
- lib
|
84
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
100
|
requirements:
|
86
|
-
- -
|
101
|
+
- - ">="
|
87
102
|
- !ruby/object:Gem::Version
|
88
103
|
version: '0'
|
89
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
105
|
requirements:
|
91
|
-
- -
|
106
|
+
- - ">="
|
92
107
|
- !ruby/object:Gem::Version
|
93
108
|
version: '0'
|
94
109
|
requirements: []
|
95
110
|
rubyforge_project:
|
96
|
-
rubygems_version: 2.
|
111
|
+
rubygems_version: 2.6.13
|
97
112
|
signing_key:
|
98
113
|
specification_version: 4
|
99
114
|
summary: A/B test analysis library for Ruby
|
100
115
|
test_files:
|
101
116
|
- test/abtest_test.rb
|
102
|
-
- test/helper.rb
|
103
117
|
- test/matrix_test.rb
|