abanalyzer 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +11 -0
- data/.travis.yml +3 -5
- data/Gemfile +1 -2
- data/README.rdoc +1 -1
- data/Rakefile +9 -6
- data/abanalyzer.gemspec +15 -14
- data/lib/abanalyzer/abtest.rb +17 -22
- data/lib/abanalyzer/matrix.rb +15 -17
- data/lib/abanalyzer/sample.rb +5 -7
- data/lib/abanalyzer/version.rb +1 -1
- data/test/abtest_test.rb +14 -13
- data/test/matrix_test.rb +6 -5
- metadata +36 -22
- data/test/helper.rb +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9cbf5e6bc121a608cab28ea506580a3cbae02998
|
4
|
+
data.tar.gz: f897ff9d1f557e831d779d7ee6f87937dc414ec3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f78dfa9350b44b072a34c4d6d230dd415d3b84eb53a9e0d5b1f1e7bf7f5bf146858a2e03ecabe528a26bb52f4f7a80c0458c82948f830454cddc93c050069ef7
|
7
|
+
data.tar.gz: 5d54f21901a039d64e8aec4d1c3c32b56e8edd2b29e7a7fbeb54c258ca3b6f8ed5afa1546a8a7a8338f99acc182ad2176deb806ce5d5a23855304013ddb591d8
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.rdoc
CHANGED
@@ -55,7 +55,7 @@ You can additionally get the actual score for either a Chi-Square test for indep
|
|
55
55
|
|
56
56
|
|
57
57
|
== Sample Size Calculations
|
58
|
-
Let's say you want to determine how large your sample size needs to be for an A/B test. Let's say your baseline is 10%, and you want to be able to determine if there's at least a 10% relative lift (1% absolute) to 11%. Let's assume you want a power[http://en.wikipedia.org/wiki/Statistical_power] of 0.8 and a {significance level}[http://en.wikipedia.org/wiki/Statistical_significance] of 0.05 (that is, an 80% chance of that you'll
|
58
|
+
Let's say you want to determine how large your sample size needs to be for an A/B test. Let's say your baseline is 10%, and you want to be able to determine if there's at least a 10% relative lift (1% absolute) to 11%. Let's assume you want a power[http://en.wikipedia.org/wiki/Statistical_power] of 0.8 and a {significance level}[http://en.wikipedia.org/wiki/Statistical_significance] of 0.05 (that is, an 80% chance of that you'll succeed in recognizing a difference when there is one, and a 5% chance of a false negative).
|
59
59
|
|
60
60
|
...
|
61
61
|
ABAnalyzer.calculate_size(0.1, 0.11, 0.05, 0.8)
|
data/Rakefile
CHANGED
@@ -1,18 +1,21 @@
|
|
1
|
+
require 'rake/testtask'
|
1
2
|
require 'bundler/gem_tasks'
|
2
3
|
require 'rdoc/task'
|
4
|
+
require 'rubocop/rake_task'
|
5
|
+
|
6
|
+
RuboCop::RakeTask.new
|
3
7
|
|
4
|
-
RDoc::Task.new(
|
5
|
-
rdoc.title =
|
8
|
+
RDoc::Task.new('doc') do |rdoc|
|
9
|
+
rdoc.title = 'ABAnalyzer - A/B test analysis library for Ruby'
|
6
10
|
rdoc.rdoc_dir = 'docs'
|
7
11
|
rdoc.rdoc_files.include('README.rdoc')
|
8
12
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
9
|
-
|
10
|
-
|
11
|
-
require 'rake/testtask'
|
13
|
+
end
|
12
14
|
|
13
15
|
Rake::TestTask.new do |t|
|
16
|
+
t.libs << 'test'
|
14
17
|
t.test_files = FileList['test/*_test.rb']
|
15
18
|
t.verbose = true
|
16
19
|
end
|
17
20
|
|
18
|
-
task :
|
21
|
+
task default: %i[rubocop test]
|
data/abanalyzer.gemspec
CHANGED
@@ -1,20 +1,21 @@
|
|
1
|
-
|
2
|
-
require
|
3
|
-
require
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
|
+
require 'abanalyzer/version'
|
3
|
+
require 'date'
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
|
-
s.name =
|
6
|
+
s.name = 'abanalyzer'
|
7
7
|
s.version = ABAnalyzer::VERSION
|
8
|
-
s.authors = [
|
8
|
+
s.authors = ['Brian Muller']
|
9
|
+
s.license = 'GPL-3.0'
|
9
10
|
s.date = Date.today.to_s
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.files = `git ls-files`.split($/)
|
11
|
+
s.summary = 'A/B test analysis library for Ruby'
|
12
|
+
s.email = 'bamuller@gmail.com'
|
13
|
+
s.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
14
14
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
15
|
-
s.homepage =
|
16
|
-
s.require_paths = [
|
17
|
-
s.add_development_dependency(
|
18
|
-
s.add_development_dependency(
|
19
|
-
s.
|
15
|
+
s.homepage = 'https://github.com/bmuller/abanalyzer'
|
16
|
+
s.require_paths = ['lib']
|
17
|
+
s.add_development_dependency('rake', '~> 12.1')
|
18
|
+
s.add_development_dependency('minitest', '~> 5.10')
|
19
|
+
s.add_development_dependency('rubocop', '~> 0.50')
|
20
|
+
s.add_dependency('statistics2', '= 0.54')
|
20
21
|
end
|
data/lib/abanalyzer/abtest.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'statistics2'
|
2
2
|
|
3
3
|
module ABAnalyzer
|
4
|
-
|
5
4
|
class ABTest
|
6
5
|
# values should be hash of hashes, with top level hash the group names:
|
7
6
|
# { :groupa => { :yes => 20, :no => 10 }, :groupb => { :yes => 18, :no => 8 } }
|
@@ -9,42 +8,43 @@ module ABAnalyzer
|
|
9
8
|
@values = Matrix.new values
|
10
9
|
end
|
11
10
|
|
12
|
-
def different?(sig=0.05)
|
11
|
+
def different?(sig = 0.05)
|
13
12
|
gtest_p < sig
|
14
13
|
end
|
15
|
-
|
14
|
+
|
16
15
|
def chisquare_score
|
17
|
-
sum=0
|
18
|
-
@values.each_cell
|
16
|
+
sum = 0
|
17
|
+
@values.each_cell do |colname, rowname, value|
|
19
18
|
ex = expected(colname, rowname)
|
20
19
|
test_sufficient_data(colname, rowname, ex, value)
|
21
|
-
sum += ((value - ex)
|
22
|
-
|
23
|
-
|
20
|
+
sum += ((value - ex)**2) / ex
|
21
|
+
end
|
22
|
+
sum
|
24
23
|
end
|
25
|
-
|
24
|
+
|
26
25
|
def gtest_score
|
27
|
-
sum=0
|
28
|
-
@values.each_cell
|
26
|
+
sum = 0
|
27
|
+
@values.each_cell do |colname, rowname, value|
|
29
28
|
ex = expected(colname, rowname)
|
30
29
|
test_sufficient_data(colname, rowname, ex, value)
|
31
30
|
sum += value * Math.log(value / ex)
|
32
|
-
|
33
|
-
|
31
|
+
end
|
32
|
+
sum
|
34
33
|
end
|
35
34
|
|
36
35
|
def chisquare_p
|
37
|
-
|
36
|
+
1 - Statistics2.chi2dist(df, chisquare_score)
|
38
37
|
end
|
39
38
|
|
40
39
|
def gtest_p
|
41
|
-
|
40
|
+
1 - Statistics2.chi2dist(df, 2 * gtest_score)
|
42
41
|
end
|
43
|
-
|
42
|
+
|
44
43
|
private
|
44
|
+
|
45
45
|
def test_sufficient_data(colname, rowname, expected, value)
|
46
46
|
msg = "Insufficient data size for column #{colname} row #{rowname}. Expected value must be >= 5, and value must be > 0."
|
47
|
-
raise InsufficientDataError, msg if expected < 5
|
47
|
+
raise InsufficientDataError, msg if (expected < 5) || (value <= 0)
|
48
48
|
end
|
49
49
|
|
50
50
|
def expected(colname, rowname)
|
@@ -54,10 +54,5 @@ module ABAnalyzer
|
|
54
54
|
def df
|
55
55
|
(@values.columns.length - 1) * (@values.rows.length - 1)
|
56
56
|
end
|
57
|
-
|
58
|
-
def self.chi2dist(degrees, score)
|
59
|
-
1 - Statistics2.chi2dist(degrees, score)
|
60
|
-
end
|
61
57
|
end
|
62
|
-
|
63
58
|
end
|
data/lib/abanalyzer/matrix.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module ABAnalyzer
|
2
|
-
|
3
2
|
class Matrix
|
4
3
|
attr_reader :columns, :rows
|
5
4
|
def initialize(values)
|
@@ -10,14 +9,14 @@ module ABAnalyzer
|
|
10
9
|
end
|
11
10
|
|
12
11
|
def validate
|
13
|
-
@values.each
|
14
|
-
if column.keys.map
|
12
|
+
@values.each do |colname, column|
|
13
|
+
if column.keys.map(&:to_s).sort != @rows.map(&:to_s).sort
|
15
14
|
raise MatrixFormatError, "Column #{colname} has row names that don't match the first column's."
|
16
15
|
end
|
17
|
-
|
18
|
-
coltotal = @columns.map { |col| column_sum(col) }.inject { |a,b| a+b }
|
19
|
-
rowtotal = @rows.map { |col| row_sum(col) }.inject { |a,b| a+b }
|
20
|
-
raise MatrixFormatError,
|
16
|
+
end
|
17
|
+
coltotal = @columns.map { |col| column_sum(col) }.inject { |a, b| a + b }
|
18
|
+
rowtotal = @rows.map { |col| row_sum(col) }.inject { |a, b| a + b }
|
19
|
+
raise MatrixFormatError, 'Column sums do not equal row sums' if coltotal != rowtotal
|
21
20
|
end
|
22
21
|
|
23
22
|
def get_column(name)
|
@@ -25,17 +24,17 @@ module ABAnalyzer
|
|
25
24
|
end
|
26
25
|
|
27
26
|
def get_row(name)
|
28
|
-
@values.map
|
27
|
+
@values.map do |_colname, rows|
|
29
28
|
rows[name]
|
30
|
-
|
29
|
+
end
|
31
30
|
end
|
32
31
|
|
33
32
|
def each_cell
|
34
|
-
@columns.each
|
35
|
-
@rows.each
|
33
|
+
@columns.each do |colname|
|
34
|
+
@rows.each do |rowname|
|
36
35
|
yield colname, rowname, get(colname, rowname)
|
37
|
-
|
38
|
-
|
36
|
+
end
|
37
|
+
end
|
39
38
|
end
|
40
39
|
|
41
40
|
def get(colname, rowname)
|
@@ -43,16 +42,15 @@ module ABAnalyzer
|
|
43
42
|
end
|
44
43
|
|
45
44
|
def column_sum(name)
|
46
|
-
get_column(name).inject { |a,b| a+b }
|
45
|
+
get_column(name).inject { |a, b| a + b }
|
47
46
|
end
|
48
47
|
|
49
48
|
def row_sum(name)
|
50
|
-
get_row(name).inject { |a,b| a+b }
|
49
|
+
get_row(name).inject { |a, b| a + b }
|
51
50
|
end
|
52
51
|
|
53
52
|
def total_sum
|
54
|
-
@columns.map { |col| column_sum(col) }.inject { |a,b| a+b }
|
53
|
+
@columns.map { |col| column_sum(col) }.inject { |a, b| a + b }
|
55
54
|
end
|
56
55
|
end
|
57
|
-
|
58
56
|
end
|
data/lib/abanalyzer/sample.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
require 'statistics2'
|
2
2
|
|
3
3
|
module ABAnalyzer
|
4
|
-
|
5
4
|
# Calculate the minimum sample size (per group) based on the desire to detect
|
6
5
|
# a increase from proportion p1 to proportion p2. Significance is generally
|
7
6
|
# safe at 0.05 (why? just because) and a power of 0.8 (why? just because)
|
8
7
|
def self.calculate_size(p1, p2, significance, power)
|
9
|
-
[
|
10
|
-
raise
|
11
|
-
|
8
|
+
[p1, p2, significance, power].each do |a|
|
9
|
+
raise 'All arguments to calculate_size must be Floats' unless a.is_a?(Float)
|
10
|
+
end
|
12
11
|
|
13
12
|
pbar = (p1 + p2) / 2.0
|
14
13
|
sides = 2.0
|
@@ -16,8 +15,8 @@ module ABAnalyzer
|
|
16
15
|
zcrit = Statistics2.pnormaldist(1 - (significance / sides))
|
17
16
|
zpow = Statistics2.pnormaldist(power)
|
18
17
|
|
19
|
-
numerator = (zcrit * Math.sqrt(2 * pbar * (1 - pbar)) + zpow * Math.sqrt(p2 * (1 - p2) + p1 * (1 - p1)))
|
20
|
-
denominator = (p2 - p1)
|
18
|
+
numerator = (zcrit * Math.sqrt(2 * pbar * (1 - pbar)) + zpow * Math.sqrt(p2 * (1 - p2) + p1 * (1 - p1)))**2
|
19
|
+
denominator = (p2 - p1)**2
|
21
20
|
(numerator / denominator).ceil
|
22
21
|
end
|
23
22
|
|
@@ -39,5 +38,4 @@ module ABAnalyzer
|
|
39
38
|
ci = confidence_interval(successes, trials, confidence)
|
40
39
|
[(ci.first - compared_proportion) / compared_proportion, (ci.last - compared_proportion) / compared_proportion]
|
41
40
|
end
|
42
|
-
|
43
41
|
end
|
data/lib/abanalyzer/version.rb
CHANGED
data/test/abtest_test.rb
CHANGED
@@ -1,24 +1,25 @@
|
|
1
|
-
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'abanalyzer'
|
2
3
|
|
3
|
-
class ABTestTest < Test
|
4
|
+
class ABTestTest < MiniTest::Test
|
4
5
|
def setup
|
5
|
-
@values = { :
|
6
|
+
@values = { rep: { male: 200, female: 250 }, dem: { male: 150, female: 300 }, ind: { male: 50, female: 50 } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def test_test_creation
|
9
|
-
|
10
|
-
m = ABAnalyzer::ABTest.new(
|
11
|
-
|
10
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
11
|
+
m = ABAnalyzer::ABTest.new(one: { a: 10, b: 20 }, two: { a: 5, b: 0 })
|
12
|
+
m.gtest_p
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
m = ABAnalyzer::ABTest.new(
|
16
|
-
|
15
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
16
|
+
m = ABAnalyzer::ABTest.new(one: { a: 10, b: 20 }, two: { a: 5, b: -6 })
|
17
|
+
m.gtest_p
|
17
18
|
end
|
18
19
|
|
19
|
-
|
20
|
-
m = ABAnalyzer::ABTest.new(
|
21
|
-
|
20
|
+
assert_raises ABAnalyzer::InsufficientDataError do
|
21
|
+
m = ABAnalyzer::ABTest.new(one: { a: 1, b: 1 }, two: { a: 1, b: 1 })
|
22
|
+
m.gtest_p
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
@@ -28,7 +29,7 @@ class ABTestTest < Test::Unit::TestCase
|
|
28
29
|
chisquare = 1 - Statistics2.chi2dist(2, 16.2037037037037)
|
29
30
|
assert_equal abt.chisquare_p, chisquare
|
30
31
|
|
31
|
-
gtest = 1 - Statistics2.chi2dist(2, 2*8.13286375180066)
|
32
|
+
gtest = 1 - Statistics2.chi2dist(2, 2 * 8.13286375180066)
|
32
33
|
assert_equal abt.gtest_p, gtest
|
33
34
|
end
|
34
35
|
end
|
data/test/matrix_test.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
|
-
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'abanalyzer'
|
2
3
|
|
3
|
-
class
|
4
|
+
class TestMatrix < MiniTest::Test
|
4
5
|
def setup
|
5
|
-
@values = { :
|
6
|
+
@values = { rep: { male: 200, female: 250 }, dem: { male: 150, female: 300 }, ind: { male: 50, female: 50 } }
|
6
7
|
end
|
7
8
|
|
8
9
|
def test_matrix_creation
|
9
|
-
|
10
|
-
ABAnalyzer::Matrix.new(
|
10
|
+
assert_raises ABAnalyzer::MatrixFormatError do
|
11
|
+
ABAnalyzer::Matrix.new(one: { a: 10, b: 20 }, two: { a: 5 })
|
11
12
|
end
|
12
13
|
end
|
13
14
|
|
metadata
CHANGED
@@ -1,65 +1,80 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abanalyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Muller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '12.1'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '12.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.10'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '5.10'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubocop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.50'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.50'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: statistics2
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - '
|
59
|
+
- - '='
|
46
60
|
- !ruby/object:Gem::Version
|
47
61
|
version: '0.54'
|
48
62
|
type: :runtime
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
|
-
- - '
|
66
|
+
- - '='
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '0.54'
|
55
|
-
description:
|
69
|
+
description:
|
56
70
|
email: bamuller@gmail.com
|
57
71
|
executables: []
|
58
72
|
extensions: []
|
59
73
|
extra_rdoc_files: []
|
60
74
|
files:
|
61
|
-
- .gitignore
|
62
|
-
- .
|
75
|
+
- ".gitignore"
|
76
|
+
- ".rubocop.yml"
|
77
|
+
- ".travis.yml"
|
63
78
|
- Gemfile
|
64
79
|
- LICENSE
|
65
80
|
- README.rdoc
|
@@ -72,10 +87,10 @@ files:
|
|
72
87
|
- lib/abanalyzer/sample.rb
|
73
88
|
- lib/abanalyzer/version.rb
|
74
89
|
- test/abtest_test.rb
|
75
|
-
- test/helper.rb
|
76
90
|
- test/matrix_test.rb
|
77
91
|
homepage: https://github.com/bmuller/abanalyzer
|
78
|
-
licenses:
|
92
|
+
licenses:
|
93
|
+
- GPL-3.0
|
79
94
|
metadata: {}
|
80
95
|
post_install_message:
|
81
96
|
rdoc_options: []
|
@@ -83,21 +98,20 @@ require_paths:
|
|
83
98
|
- lib
|
84
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|
85
100
|
requirements:
|
86
|
-
- -
|
101
|
+
- - ">="
|
87
102
|
- !ruby/object:Gem::Version
|
88
103
|
version: '0'
|
89
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
105
|
requirements:
|
91
|
-
- -
|
106
|
+
- - ">="
|
92
107
|
- !ruby/object:Gem::Version
|
93
108
|
version: '0'
|
94
109
|
requirements: []
|
95
110
|
rubyforge_project:
|
96
|
-
rubygems_version: 2.
|
111
|
+
rubygems_version: 2.6.13
|
97
112
|
signing_key:
|
98
113
|
specification_version: 4
|
99
114
|
summary: A/B test analysis library for Ruby
|
100
115
|
test_files:
|
101
116
|
- test/abtest_test.rb
|
102
|
-
- test/helper.rb
|
103
117
|
- test/matrix_test.rb
|