vanity 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/bin/vanity +45 -0
- data/lib/vanity/commands/report.rb +27 -0
- data/lib/vanity/commands.rb +1 -0
- data/lib/vanity/experiment/ab_test.rb +116 -31
- data/lib/vanity/experiment/base.rb +16 -3
- data/lib/vanity/playground.rb +1 -0
- data/lib/vanity/report.erb +1 -1
- data/lib/vanity.rb +1 -0
- data/test/ab_test_test.rb +230 -30
- data/test/experiments/null_abc.rb +4 -0
- data/vanity.gemspec +2 -1
- metadata +9 -5
data/CHANGELOG
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
0.2.2 (2009-11-12)
|
2
|
+
* Added: vanity binary, with single command for generating a report.
|
3
|
+
* Added: return alternative by value from experiment.alternative(val) method.
|
4
|
+
* Added: reset an experiment by calling reset!.
|
5
|
+
* Added: experiment alternative name (option 1, option 2, etc).
|
6
|
+
* Added: new scoring algorithm: use experiment.score instead of
|
7
|
+
alternative.z_score/confidence.
|
8
|
+
* Added: experiment.conclusion for plain English results.
|
9
|
+
|
1
10
|
0.2.1 (2009-11-11)
|
2
11
|
* Added: z-score and confidence level for A/B test alternatives.
|
3
12
|
* Added: test auto-completion and auto-outcome (complete_it, outcome_is).
|
data/bin/vanity
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
path = File.expand_path("../lib", File.dirname(__FILE__))
|
3
|
+
$LOAD_PATH.unshift path unless $LOAD_PATH.include?(path)
|
4
|
+
|
5
|
+
require "vanity"
|
6
|
+
require "optparse"
|
7
|
+
|
8
|
+
playground = Vanity.playground
|
9
|
+
options = Struct.new(:output).new
|
10
|
+
OptionParser.new("", 24, " ") do |opts|
|
11
|
+
opts.banner = "Usage: #{File.basename($0)} [options]\n"
|
12
|
+
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "General options:"
|
15
|
+
opts.on("--path PATH", "Path to experiments directory (default: #{playground.load_path})") { |v| playground.load_path = v }
|
16
|
+
opts.on("--output FILE", "Write report to this file (default: stdout)") { |v| options.output = v }
|
17
|
+
|
18
|
+
opts.separator ""
|
19
|
+
opts.separator "Redis options:"
|
20
|
+
opts.on("--host HOST", "Redis server host (default: #{playground.host})") { |v| playground.host = v }
|
21
|
+
opts.on("--port PORT", "Redis server port (default: #{playground.port})") { |v| playground.port = v }
|
22
|
+
opts.on("--db DB", "Redis database (default: #{playground.db})") { |v| playground.db = v }
|
23
|
+
opts.on("--password PWD", "Redis database password") { |v| playground.password = v }
|
24
|
+
opts.on("--namespace NS", "Redis namespace (default: #{playground.namespace})") { |v| playground.namespace = v }
|
25
|
+
|
26
|
+
opts.separator ""
|
27
|
+
opts.separator "Common options:"
|
28
|
+
opts.on_tail "-h", "-H", "--help", "Show this message" do
|
29
|
+
puts opts.to_s.gsub(/^.*DEPRECATED.*$/s, '')
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
opts.on_tail "-v", "--version", "Show version" do
|
33
|
+
puts "Vanity #{Vanity::Version::STRING}"
|
34
|
+
exit
|
35
|
+
end
|
36
|
+
end.parse!(ARGV)
|
37
|
+
|
38
|
+
cmds = ARGV.empty? ? ["report"] : ARGV
|
39
|
+
cmds.each do |cmd|
|
40
|
+
case cmd
|
41
|
+
when "report"
|
42
|
+
Vanity::Commands.report options.output
|
43
|
+
else fail "No such command: #{cmd}"
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "erb"
|
2
|
+
require "cgi"
|
3
|
+
|
4
|
+
module Vanity
|
5
|
+
module Commands
|
6
|
+
class << self
|
7
|
+
|
8
|
+
# Generate a report with all available tests. Outputs to the named file,
|
9
|
+
# or stdout with no arguments.
|
10
|
+
def report(output = nil)
|
11
|
+
require "erb"
|
12
|
+
erb = ERB.new(File.read("lib/vanity/report.erb"), nil, '<')
|
13
|
+
experiments = Vanity.playground.experiments
|
14
|
+
html = erb.result(binding)
|
15
|
+
if output
|
16
|
+
File.open output, 'w' do |file|
|
17
|
+
file.write html
|
18
|
+
end
|
19
|
+
puts "New report available in #{output}"
|
20
|
+
else
|
21
|
+
$stdout.write html
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "commands/report")
|
@@ -7,12 +7,16 @@ module Vanity
|
|
7
7
|
def initialize(experiment, id, value) #:nodoc:
|
8
8
|
@experiment = experiment
|
9
9
|
@id = id
|
10
|
+
@name = "option #{(@id + 1)}"
|
10
11
|
@value = value
|
11
12
|
end
|
12
13
|
|
13
14
|
# Alternative id, only unique for this experiment.
|
14
15
|
attr_reader :id
|
15
16
|
|
17
|
+
# Alternative name (option A, option B, etc).
|
18
|
+
attr_reader :name
|
19
|
+
|
16
20
|
# Alternative value.
|
17
21
|
attr_reader :value
|
18
22
|
|
@@ -28,12 +32,13 @@ module Vanity
|
|
28
32
|
|
29
33
|
# Number of conversions for this alternative (same participant may be counted more than once).
|
30
34
|
def conversions
|
31
|
-
redis
|
35
|
+
redis[key("conversions")].to_i
|
32
36
|
end
|
33
37
|
|
34
38
|
# Conversion rate calculated as converted/participants.
|
35
39
|
def conversion_rate
|
36
|
-
converted.to_f
|
40
|
+
c, p = converted.to_f, participants.to_f
|
41
|
+
p > 0 ? c/p : 0.0
|
37
42
|
end
|
38
43
|
|
39
44
|
def <=>(other)
|
@@ -51,33 +56,20 @@ module Vanity
|
|
51
56
|
end
|
52
57
|
end
|
53
58
|
|
54
|
-
# Z-score this alternativet related to the base alternative. This
|
55
|
-
# alternative is better than base if it receives a positive z-score,
|
56
|
-
# worse if z-score is negative. Call #confident if you need confidence
|
57
|
-
# level (percentage).
|
58
|
-
def z_score
|
59
|
-
return 0 if base == self
|
60
|
-
pc = base.conversion_rate
|
61
|
-
nc = base.participants
|
62
|
-
p = conversion_rate
|
63
|
-
n = participants
|
64
|
-
(p - pc) / Math.sqrt((p * (1-p)/n) + (pc * (1-pc)/nc))
|
65
|
-
end
|
66
|
-
|
67
|
-
# How confident are we in this alternative being an improvement over the
|
68
|
-
# base alternative. Returns 0, 90, 95, 99 or 99.9 (percentage).
|
69
|
-
def confidence
|
70
|
-
score = z_score
|
71
|
-
confidence = AbTest::Z_TO_CONFIDENCE.find { |z,p| score >= z }
|
72
|
-
confidence ? confidence.last : 0
|
73
|
-
end
|
74
|
-
|
75
59
|
def destroy #:nodoc:
|
76
60
|
redis.del key("participants")
|
77
61
|
redis.del key("converted")
|
78
62
|
redis.del key("conversions")
|
79
63
|
end
|
80
64
|
|
65
|
+
def to_s #:nodoc:
|
66
|
+
name
|
67
|
+
end
|
68
|
+
|
69
|
+
def inspect #:nodoc:
|
70
|
+
"#{name}: #{value} #{converted}/#{participants}"
|
71
|
+
end
|
72
|
+
|
81
73
|
protected
|
82
74
|
|
83
75
|
def key(name)
|
@@ -97,6 +89,15 @@ module Vanity
|
|
97
89
|
|
98
90
|
# The meat.
|
99
91
|
class AbTest < Base
|
92
|
+
class << self
|
93
|
+
|
94
|
+
def confidence(score) #:nodoc:
|
95
|
+
score = score.abs
|
96
|
+
confidence = AbTest::Z_TO_CONFIDENCE.find { |z,p| score >= z }
|
97
|
+
confidence ? confidence.last : 0
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
100
101
|
def initialize(*args) #:nodoc:
|
101
102
|
super
|
102
103
|
end
|
@@ -125,6 +126,11 @@ module Vanity
|
|
125
126
|
alternatives
|
126
127
|
end
|
127
128
|
|
129
|
+
# Returns an Alternative with the specified value.
|
130
|
+
def alternative(value)
|
131
|
+
alternatives.find { |alt| alt.value == value }
|
132
|
+
end
|
133
|
+
|
128
134
|
# Sets this test to two alternatives: false and true.
|
129
135
|
def false_true
|
130
136
|
alternatives false, true
|
@@ -194,11 +200,84 @@ module Vanity
|
|
194
200
|
|
195
201
|
# -- Reporting --
|
196
202
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
203
|
+
# Returns an object with the following attributes:
|
204
|
+
# [:alts] List of alternatives as structures (see below).
|
205
|
+
# [:best] Best alternative.
|
206
|
+
# [:base] Second best alternative.
|
207
|
+
# [:choice] Choice alterntive, either selected outcome or best alternative (with confidence).
|
208
|
+
#
|
209
|
+
# Each alternative is an object with the following attributes:
|
210
|
+
# [:id] Identifier.
|
211
|
+
# [:conv] Conversion rate (0.0 to 1.0, rounded to 3 places).
|
212
|
+
# [:pop] Population size (participants).
|
213
|
+
# [:diff] Difference from least performant altenative (percentage).
|
214
|
+
# [:z] Z-score compared to base (above).
|
215
|
+
# [:conf] Confidence based on z-score (0, 90, 95, 99, 99.9).
|
216
|
+
def score
|
217
|
+
struct = Struct.new(:id, :conv, :pop, :diff, :z, :conf)
|
218
|
+
alts = alternatives.map { |alt| struct.new(alt.id, alt.conversion_rate.round(3), alt.participants) }
|
219
|
+
# sort by conversion rate to find second best and 2nd best
|
220
|
+
sorted = alts.sort_by(&:conv)
|
221
|
+
base = sorted[-2]
|
222
|
+
# calculate z-score
|
223
|
+
pc = base.conv
|
224
|
+
nc = base.pop
|
225
|
+
alts.each do |alt|
|
226
|
+
p = alt.conv
|
227
|
+
n = alt.pop
|
228
|
+
alt.z = (p - pc) / ((p * (1-p)/n) + (pc * (1-pc)/nc)).abs ** 0.5
|
229
|
+
alt.conf = AbTest.confidence(alt.z)
|
230
|
+
end
|
231
|
+
# difference is measured from least performant
|
232
|
+
if least = sorted.find { |alt| alt.conv > 0 }
|
233
|
+
alts.each do |alt|
|
234
|
+
alt.diff = (alt.conv - least.conv) / least.conv * 100 if alt.conv > least.conv
|
235
|
+
end
|
236
|
+
end
|
237
|
+
# best alternative is one with highest conversion rate (best shot).
|
238
|
+
# choice alternative can only pick best if we have high confidence (>90%).
|
239
|
+
best = sorted.last if sorted.last.conv > 0
|
240
|
+
choice = outcome ? alts[outcome.id] : (best && best.conf >= 90 ? best : nil)
|
241
|
+
Struct.new(:alts, :best, :base, :choice).new(alts, best, base, choice)
|
242
|
+
end
|
243
|
+
|
244
|
+
# Use the score returned by #score to derive a conclusion. Returns an
|
245
|
+
# array of claims.
|
246
|
+
def conclusion(score = score)
|
247
|
+
claims = []
|
248
|
+
# find name form alt structure returned from score
|
249
|
+
name = ->(alt){ alternatives[alt.id].name }
|
250
|
+
# only interested in sorted alternatives with conversion
|
251
|
+
sorted = score.alts.select { |alt| alt.conv > 0.0 }.sort_by(&:conv).reverse
|
252
|
+
if sorted.size > 1
|
253
|
+
# start with alternatives that have conversion, from best to worst,
|
254
|
+
# then alternatives with no conversion.
|
255
|
+
sorted |= score.alts
|
256
|
+
# we want a result that's clearly better than 2nd best.
|
257
|
+
best, second = sorted[0], sorted[1]
|
258
|
+
if best.conv > second.conv
|
259
|
+
diff = ((best.conv - second.conv) / second.conv * 100).round
|
260
|
+
better = " (%d%% better than %s)" % [diff, name[second]] if diff > 0
|
261
|
+
claims << "The best choice is %s: it converted at %.1f%%%s." % [name[best], best.conv * 100, better]
|
262
|
+
if best.conf >= 90
|
263
|
+
claims << "With %d%% probability this result is statistically significant." % score.best.conf
|
264
|
+
else
|
265
|
+
claims << "This result is not statistically significant, suggest you continue this experiment."
|
266
|
+
end
|
267
|
+
sorted.delete best
|
268
|
+
end
|
269
|
+
sorted.each do |alt|
|
270
|
+
if alt.conv > 0.0
|
271
|
+
claims << "%s converted at %.1f%%." % [name[alt].capitalize, alt.conv * 100]
|
272
|
+
else
|
273
|
+
claims << "%s did not convert." % name[alt].capitalize
|
274
|
+
end
|
275
|
+
end
|
276
|
+
else
|
277
|
+
claims << "This experiment did not run long enough to find a clear winner."
|
278
|
+
end
|
279
|
+
claims << "#{name[score.choice].capitalize} selected as the best alternative." if score.choice
|
280
|
+
claims
|
202
281
|
end
|
203
282
|
|
204
283
|
def humanize
|
@@ -228,7 +307,7 @@ module Vanity
|
|
228
307
|
|
229
308
|
# Alternative chosen when this experiment was completed.
|
230
309
|
def outcome
|
231
|
-
outcome = redis
|
310
|
+
outcome = redis[key("outcome")]
|
232
311
|
outcome && alternatives[outcome.to_i]
|
233
312
|
end
|
234
313
|
|
@@ -242,8 +321,8 @@ module Vanity
|
|
242
321
|
end
|
243
322
|
end
|
244
323
|
unless outcome
|
245
|
-
|
246
|
-
outcome =
|
324
|
+
best = score.best
|
325
|
+
outcome = best.id if best
|
247
326
|
end
|
248
327
|
# TODO: logging
|
249
328
|
redis.setnx key("outcome"), outcome
|
@@ -257,6 +336,12 @@ module Vanity
|
|
257
336
|
super
|
258
337
|
end
|
259
338
|
|
339
|
+
def reset! #:nodoc:
|
340
|
+
redis.del key(:outcome)
|
341
|
+
alternatives.each(&:destroy)
|
342
|
+
super
|
343
|
+
end
|
344
|
+
|
260
345
|
def destroy #:nodoc:
|
261
346
|
redis.del key(:outcome)
|
262
347
|
alternatives.each(&:destroy)
|
@@ -19,7 +19,7 @@ module Vanity
|
|
19
19
|
@id, @name = id.to_sym, name
|
20
20
|
@namespace = "#{@playground.namespace}:#{@id}"
|
21
21
|
redis.setnx key(:created_at), Time.now.to_i
|
22
|
-
@created_at = Time.at(redis
|
22
|
+
@created_at = Time.at(redis[key(:created_at)].to_i)
|
23
23
|
@identify_block = ->(context){ context.vanity_identity }
|
24
24
|
end
|
25
25
|
|
@@ -34,6 +34,11 @@ module Vanity
|
|
34
34
|
|
35
35
|
# Experiment completion timestamp.
|
36
36
|
attr_reader :completed_at
|
37
|
+
|
38
|
+
# Returns the type of this class as a symbol (e.g. ab_test).
|
39
|
+
def type
|
40
|
+
self.class.type
|
41
|
+
end
|
37
42
|
|
38
43
|
# Call this method with no argument or block to return an identity. Call
|
39
44
|
# this method with a block to define how to obtain an identity for the
|
@@ -117,12 +122,13 @@ module Vanity
|
|
117
122
|
|
118
123
|
# Time stamp when experiment was completed.
|
119
124
|
def completed_at
|
120
|
-
|
125
|
+
time = redis[key(:completed_at)]
|
126
|
+
time && Time.at(time.to_i)
|
121
127
|
end
|
122
128
|
|
123
129
|
# Returns true if experiment active, false if completed.
|
124
130
|
def active?
|
125
|
-
redis
|
131
|
+
redis[key(:completed_at)].nil?
|
126
132
|
end
|
127
133
|
|
128
134
|
|
@@ -145,6 +151,13 @@ module Vanity
|
|
145
151
|
def save #:nodoc:
|
146
152
|
end
|
147
153
|
|
154
|
+
# Reset experiment.
|
155
|
+
def reset!
|
156
|
+
@created_at = Time.now
|
157
|
+
redis[key(:created_at)] = @created_at.to_i
|
158
|
+
redis.del key(:completed_at)
|
159
|
+
end
|
160
|
+
|
148
161
|
# Get rid of all experiment data.
|
149
162
|
def destroy
|
150
163
|
redis.del key(:created_at)
|
data/lib/vanity/playground.rb
CHANGED
@@ -15,6 +15,7 @@ module Vanity
|
|
15
15
|
# Created new Playground. Unless you need to, use the global Vanity.playground.
|
16
16
|
def initialize
|
17
17
|
@experiments = {}
|
18
|
+
@host, @port, @db = "127.0.0.1", 6379, 0
|
18
19
|
@namespace = "vanity:#{Vanity::Version::MAJOR}"
|
19
20
|
@load_path = "experiments"
|
20
21
|
end
|
data/lib/vanity/report.erb
CHANGED
@@ -15,7 +15,7 @@
|
|
15
15
|
<li class="experiment" id="experiment_<%= CGI.escape exp.id.to_s %>">
|
16
16
|
<h3><%= CGI.escape_html exp.name %></h3>
|
17
17
|
<blockquote><%= CGI.escape_html exp.description.to_s %></blockquote>
|
18
|
-
<%= exp.
|
18
|
+
<%= exp.conclusion.join(" ") %>
|
19
19
|
<p class="meta"><%= exp.humanize %> started <%= exp.created_at.strftime("%a, %b %-d %Y") %></p>
|
20
20
|
</li>
|
21
21
|
<% end %>
|
data/lib/vanity.rb
CHANGED
@@ -21,3 +21,4 @@ require File.join(File.dirname(__FILE__), "vanity/playground")
|
|
21
21
|
require File.join(File.dirname(__FILE__), "vanity/experiment/base")
|
22
22
|
require File.join(File.dirname(__FILE__), "vanity/experiment/ab_test")
|
23
23
|
require File.join(File.dirname(__FILE__), "vanity/rails") if defined?(Rails)
|
24
|
+
Vanity.autoload :Commands, File.join(File.dirname(__FILE__), "vanity/commands")
|
data/test/ab_test_test.rb
CHANGED
@@ -32,12 +32,12 @@ class AbTestTest < ActionController::TestCase
|
|
32
32
|
|
33
33
|
# -- Experiment definition --
|
34
34
|
|
35
|
-
def
|
35
|
+
def test_uses_ab_test_when_type_is_ab_test
|
36
36
|
experiment(:ab, type: :ab_test) { }
|
37
37
|
assert_instance_of Vanity::Experiment::AbTest, experiment(:ab)
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def test_requires_at_least_two_alternatives_per_experiment
|
41
41
|
assert_raises RuntimeError do
|
42
42
|
experiment :none, type: :ab_test do
|
43
43
|
alternatives []
|
@@ -52,11 +52,27 @@ class AbTestTest < ActionController::TestCase
|
|
52
52
|
alternatives "foo", "bar"
|
53
53
|
end
|
54
54
|
end
|
55
|
+
|
56
|
+
def test_returning_alternative_by_value
|
57
|
+
experiment :abcd do
|
58
|
+
alternatives :a, :b, :c, :d
|
59
|
+
end
|
60
|
+
assert_equal experiment(:abcd).alternatives[1], experiment(:abcd).alternative(:b)
|
61
|
+
assert_equal experiment(:abcd).alternatives[3], experiment(:abcd).alternative(:d)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_alternative_name
|
65
|
+
experiment :abcd do
|
66
|
+
alternatives :a, :b
|
67
|
+
end
|
68
|
+
assert_equal "option 1", experiment(:abcd).alternative(:a).name
|
69
|
+
assert_equal "option 2", experiment(:abcd).alternative(:b).name
|
70
|
+
end
|
55
71
|
|
56
72
|
|
57
73
|
# -- Running experiment --
|
58
74
|
|
59
|
-
def
|
75
|
+
def test_returns_the_same_alternative_consistently
|
60
76
|
experiment :foobar do
|
61
77
|
alternatives "foo", "bar"
|
62
78
|
identify { "6e98ec" }
|
@@ -68,7 +84,7 @@ class AbTestTest < ActionController::TestCase
|
|
68
84
|
end
|
69
85
|
end
|
70
86
|
|
71
|
-
def
|
87
|
+
def test_returns_different_alternatives_for_each_participant
|
72
88
|
experiment :foobar do
|
73
89
|
alternatives "foo", "bar"
|
74
90
|
identify { rand(1000).to_s }
|
@@ -78,7 +94,7 @@ class AbTestTest < ActionController::TestCase
|
|
78
94
|
assert_in_delta alts.select { |a| a == "foo" }.count, 500, 100 # this may fail, such is propability
|
79
95
|
end
|
80
96
|
|
81
|
-
def
|
97
|
+
def test_records_all_participants_in_each_alternative
|
82
98
|
ids = (Array.new(200) { |i| i.to_s } * 5).shuffle
|
83
99
|
experiment :foobar do
|
84
100
|
alternatives "foo", "bar"
|
@@ -90,7 +106,7 @@ class AbTestTest < ActionController::TestCase
|
|
90
106
|
assert_in_delta alts.first.participants, 100, 20
|
91
107
|
end
|
92
108
|
|
93
|
-
def
|
109
|
+
def test_records_each_converted_participant_only_once
|
94
110
|
ids = (Array.new(100) { |i| i.to_s } * 5).shuffle
|
95
111
|
test = self
|
96
112
|
experiment :foobar do
|
@@ -123,6 +139,26 @@ class AbTestTest < ActionController::TestCase
|
|
123
139
|
assert_equal 100, alts.inject(0) { |t,a| t + a.converted }
|
124
140
|
end
|
125
141
|
|
142
|
+
def test_reset_experiment
|
143
|
+
experiment :simple do
|
144
|
+
identify { "me" }
|
145
|
+
complete_if { alternatives.map(&:converted).sum >= 1 }
|
146
|
+
outcome_is { alternative(true) }
|
147
|
+
end
|
148
|
+
experiment(:simple).choose
|
149
|
+
experiment(:simple).conversion!
|
150
|
+
refute experiment(:simple).active?
|
151
|
+
assert_equal true, experiment(:simple).outcome.value
|
152
|
+
|
153
|
+
experiment(:simple).reset!
|
154
|
+
assert experiment(:simple).active?
|
155
|
+
assert_nil experiment(:simple).outcome
|
156
|
+
assert_nil experiment(:simple).completed_at
|
157
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:participants).sum
|
158
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:conversions).sum
|
159
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:converted).sum
|
160
|
+
end
|
161
|
+
|
126
162
|
|
127
163
|
# -- A/B helper methods --
|
128
164
|
|
@@ -190,34 +226,187 @@ class AbTestTest < ActionController::TestCase
|
|
190
226
|
end
|
191
227
|
|
192
228
|
|
193
|
-
# --
|
229
|
+
# -- Scoring --
|
194
230
|
|
195
|
-
def
|
196
|
-
experiment
|
197
|
-
alternatives :a, :b, :c, :d
|
198
|
-
end
|
199
|
-
alts = experiment(:abcd).alternatives
|
231
|
+
def test_scoring
|
232
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
200
233
|
# participating, conversions, rate, z-score
|
201
234
|
# Control: 182 35 19.23% N/A
|
202
|
-
182.times { |i|
|
203
|
-
35.times
|
235
|
+
182.times { |i| experiment(:abcd).alternative(:a).participating!(i) }
|
236
|
+
35.times { |i| experiment(:abcd).alternative(:a).conversion!(i) }
|
204
237
|
# Treatment A: 180 45 25.00% 1.33
|
205
|
-
180.times { |i|
|
206
|
-
45.times
|
207
|
-
#
|
208
|
-
189.times { |i|
|
209
|
-
28.times
|
210
|
-
#
|
211
|
-
188.times { |i|
|
212
|
-
61.times
|
238
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
239
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
240
|
+
# treatment B: 189 28 14.81% -1.13
|
241
|
+
189.times { |i| experiment(:abcd).alternative(:c).participating!(i) }
|
242
|
+
28.times { |i| experiment(:abcd).alternative(:c).conversion!(i) }
|
243
|
+
# treatment C: 188 61 32.45% 2.94
|
244
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
245
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
246
|
+
|
247
|
+
z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z }
|
248
|
+
assert_equal %w{-1.33 0.00 -2.47 1.58}, z_scores
|
249
|
+
confidences = experiment(:abcd).score.alts.map(&:conf)
|
250
|
+
assert_equal [90, 0, 99, 90], confidences
|
251
|
+
|
252
|
+
diff = experiment(:abcd).score.alts.map { |alt| alt.diff && alt.diff.round }
|
253
|
+
assert_equal [30, 69, nil, 119], diff
|
254
|
+
assert_equal 3, experiment(:abcd).score.best.id
|
255
|
+
assert_equal 3, experiment(:abcd).score.choice.id
|
256
|
+
end
|
213
257
|
|
214
|
-
|
215
|
-
|
258
|
+
def test_scoring_with_no_performers
|
259
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
260
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.z.nan? }
|
261
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.conf == 0 }
|
262
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.diff.nil? }
|
263
|
+
assert_nil experiment(:abcd).score.best
|
264
|
+
assert_nil experiment(:abcd).score.choice
|
265
|
+
end
|
216
266
|
|
217
|
-
|
218
|
-
|
267
|
+
def test_scoring_with_one_performer
|
268
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
269
|
+
10.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
270
|
+
8.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
271
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.z.nan? }
|
272
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.conf == 0 }
|
273
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.diff.nil? }
|
274
|
+
assert 1, experiment(:abcd).score.best.id
|
275
|
+
assert_nil experiment(:abcd).score.choice
|
276
|
+
end
|
277
|
+
|
278
|
+
def test_scoring_with_some_performers
|
279
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
280
|
+
10.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
281
|
+
8.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
282
|
+
12.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
283
|
+
5.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
284
|
+
|
285
|
+
z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z }
|
286
|
+
assert_equal %w{NaN 2.01 NaN 0.00}, z_scores
|
287
|
+
confidences = experiment(:abcd).score.alts.map(&:conf)
|
288
|
+
assert_equal [0, 95, 0, 0], confidences
|
289
|
+
diff = experiment(:abcd).score.alts.map { |alt| alt.diff && alt.diff.round }
|
290
|
+
assert_equal [nil, 92, nil, nil], diff
|
291
|
+
assert_equal 1, experiment(:abcd).score.best.id
|
292
|
+
assert_equal 1, experiment(:abcd).score.choice.id
|
293
|
+
end
|
294
|
+
|
295
|
+
|
296
|
+
# -- Conclusion --
|
297
|
+
|
298
|
+
def test_conclusion
|
299
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
300
|
+
# participating, conversions, rate, z-score
|
301
|
+
# Control: 182 35 19.23% N/A
|
302
|
+
182.times { |i| experiment(:abcd).alternative(:a).participating!(i) }
|
303
|
+
35.times { |i| experiment(:abcd).alternative(:a).conversion!(i) }
|
304
|
+
# Treatment A: 180 45 25.00% 1.33
|
305
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
306
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
307
|
+
# treatment B: 189 28 14.81% -1.13
|
308
|
+
189.times { |i| experiment(:abcd).alternative(:c).participating!(i) }
|
309
|
+
28.times { |i| experiment(:abcd).alternative(:c).conversion!(i) }
|
310
|
+
# treatment C: 188 61 32.45% 2.94
|
311
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
312
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
313
|
+
|
314
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
315
|
+
The best choice is option 4: it converted at 32.4% (30% better than option 2).
|
316
|
+
With 90% probability this result is statistically significant.
|
317
|
+
Option 2 converted at 25.0%.
|
318
|
+
Option 1 converted at 19.2%.
|
319
|
+
Option 3 converted at 14.8%.
|
320
|
+
Option 4 selected as the best alternative.
|
321
|
+
TEXT
|
219
322
|
end
|
220
|
-
|
323
|
+
|
324
|
+
def test_conclusion_with_some_performers
|
325
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
326
|
+
# Treatment A: 180 45 25.00% 1.33
|
327
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
328
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
329
|
+
# treatment C: 188 61 32.45% 2.94
|
330
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
331
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
332
|
+
|
333
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
334
|
+
The best choice is option 4: it converted at 32.4% (30% better than option 2).
|
335
|
+
With 90% probability this result is statistically significant.
|
336
|
+
Option 2 converted at 25.0%.
|
337
|
+
Option 1 did not convert.
|
338
|
+
Option 3 did not convert.
|
339
|
+
Option 4 selected as the best alternative.
|
340
|
+
TEXT
|
341
|
+
end
|
342
|
+
|
343
|
+
def test_conclusion_without_clear_winner
|
344
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
345
|
+
# Treatment A: 180 45 25.00% 1.33
|
346
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
347
|
+
58.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
348
|
+
# treatment C: 188 61 32.45% 2.94
|
349
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
350
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
351
|
+
|
352
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
353
|
+
The best choice is option 4: it converted at 32.4% (1% better than option 2).
|
354
|
+
This result is not statistically significant, suggest you continue this experiment.
|
355
|
+
Option 2 converted at 32.2%.
|
356
|
+
Option 1 did not convert.
|
357
|
+
Option 3 did not convert.
|
358
|
+
TEXT
|
359
|
+
end
|
360
|
+
|
361
|
+
def test_conclusion_without_close_performers
|
362
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
363
|
+
# Treatment A: 180 45 25.00% 1.33
|
364
|
+
186.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
365
|
+
60.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
366
|
+
# treatment C: 188 61 32.45% 2.94
|
367
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
368
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
369
|
+
|
370
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
371
|
+
The best choice is option 4: it converted at 32.4%.
|
372
|
+
This result is not statistically significant, suggest you continue this experiment.
|
373
|
+
Option 2 converted at 32.3%.
|
374
|
+
Option 1 did not convert.
|
375
|
+
Option 3 did not convert.
|
376
|
+
TEXT
|
377
|
+
end
|
378
|
+
|
379
|
+
def test_conclusion_without_equal_performers
|
380
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
381
|
+
# Treatment A: 180 45 25.00% 1.33
|
382
|
+
188.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
383
|
+
61.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
384
|
+
# treatment C: 188 61 32.45% 2.94
|
385
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
386
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
387
|
+
|
388
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
389
|
+
Option 4 converted at 32.4%.
|
390
|
+
Option 2 converted at 32.4%.
|
391
|
+
Option 1 did not convert.
|
392
|
+
Option 3 did not convert.
|
393
|
+
TEXT
|
394
|
+
end
|
395
|
+
|
396
|
+
def test_conclusion_with_one_performers
|
397
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
398
|
+
# Treatment A: 180 45 25.00% 1.33
|
399
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
400
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
401
|
+
|
402
|
+
assert_equal "This experiment did not run long enough to find a clear winner.", experiment(:abcd).conclusion.join("\n")
|
403
|
+
end
|
404
|
+
|
405
|
+
def test_conclusion_with_no_performers
|
406
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
407
|
+
assert_equal "This experiment did not run long enough to find a clear winner.", experiment(:abcd).conclusion.join("\n")
|
408
|
+
end
|
409
|
+
|
221
410
|
|
222
411
|
# -- Completion --
|
223
412
|
|
@@ -332,19 +521,30 @@ class AbTestTest < ActionController::TestCase
|
|
332
521
|
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
333
522
|
end
|
334
523
|
|
335
|
-
def
|
524
|
+
def test_outcome_only_performing_alternative
|
525
|
+
experiment :quick do
|
526
|
+
end
|
527
|
+
2.times do |i|
|
528
|
+
experiment(:quick).alternatives[1].participating!(i)
|
529
|
+
experiment(:quick).alternatives[1].conversion!(i)
|
530
|
+
end
|
531
|
+
experiment(:quick).complete!
|
532
|
+
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
533
|
+
end
|
534
|
+
|
535
|
+
def test_outcome_choosing_equal_alternatives
|
336
536
|
experiment :quick do
|
337
537
|
end
|
338
538
|
8.times do |i|
|
339
539
|
experiment(:quick).alternatives[0].participating!(i)
|
340
540
|
experiment(:quick).alternatives[0].conversion!(i)
|
341
541
|
end
|
342
|
-
|
542
|
+
8.times do |i|
|
343
543
|
experiment(:quick).alternatives[1].participating!(i)
|
344
544
|
experiment(:quick).alternatives[1].conversion!(i)
|
345
545
|
end
|
346
546
|
experiment(:quick).complete!
|
347
|
-
assert_equal experiment(:quick).alternatives[
|
547
|
+
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
348
548
|
end
|
349
549
|
|
350
550
|
end
|
data/vanity.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "vanity"
|
3
|
-
spec.version = "0.2.
|
3
|
+
spec.version = "0.2.2"
|
4
4
|
spec.author = "Assaf Arkin"
|
5
5
|
spec.email = "assaf@labnotes.org"
|
6
6
|
spec.homepage = "http://github.com/assaf/vanity"
|
@@ -9,6 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
#spec.post_install_message = "To get started run vanity --help"
|
10
10
|
|
11
11
|
spec.files = Dir["{bin,lib,rails,test}/**/*", "CHANGELOG", "README.rdoc", "vanity.gemspec"]
|
12
|
+
spec.executable = "vanity"
|
12
13
|
|
13
14
|
spec.has_rdoc = true
|
14
15
|
spec.extra_rdoc_files = "README.rdoc", "CHANGELOG"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vanity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Assaf Arkin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-11-
|
12
|
+
date: 2009-11-12 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -24,14 +24,17 @@ dependencies:
|
|
24
24
|
version:
|
25
25
|
description: ""
|
26
26
|
email: assaf@labnotes.org
|
27
|
-
executables:
|
28
|
-
|
27
|
+
executables:
|
28
|
+
- vanity
|
29
29
|
extensions: []
|
30
30
|
|
31
31
|
extra_rdoc_files:
|
32
32
|
- README.rdoc
|
33
33
|
- CHANGELOG
|
34
34
|
files:
|
35
|
+
- bin/vanity
|
36
|
+
- lib/vanity/commands/report.rb
|
37
|
+
- lib/vanity/commands.rb
|
35
38
|
- lib/vanity/experiment/ab_test.rb
|
36
39
|
- lib/vanity/experiment/base.rb
|
37
40
|
- lib/vanity/playground.rb
|
@@ -42,6 +45,7 @@ files:
|
|
42
45
|
- lib/vanity.rb
|
43
46
|
- test/ab_test_test.rb
|
44
47
|
- test/experiment_test.rb
|
48
|
+
- test/experiments/null_abc.rb
|
45
49
|
- test/playground_test.rb
|
46
50
|
- test/rails_test.rb
|
47
51
|
- test/test_helper.rb
|
@@ -55,7 +59,7 @@ licenses: []
|
|
55
59
|
post_install_message:
|
56
60
|
rdoc_options:
|
57
61
|
- --title
|
58
|
-
- Vanity 0.2.
|
62
|
+
- Vanity 0.2.2
|
59
63
|
- --main
|
60
64
|
- README.rdoc
|
61
65
|
- --webcvs
|