vanity 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/bin/vanity +45 -0
- data/lib/vanity/commands/report.rb +27 -0
- data/lib/vanity/commands.rb +1 -0
- data/lib/vanity/experiment/ab_test.rb +116 -31
- data/lib/vanity/experiment/base.rb +16 -3
- data/lib/vanity/playground.rb +1 -0
- data/lib/vanity/report.erb +1 -1
- data/lib/vanity.rb +1 -0
- data/test/ab_test_test.rb +230 -30
- data/test/experiments/null_abc.rb +4 -0
- data/vanity.gemspec +2 -1
- metadata +9 -5
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
0.2.2 (2009-11-12)
|
|
2
|
+
* Added: vanity binary, with single command for generating a report.
|
|
3
|
+
* Added: return alternative by value from experiment.alternative(val) method.
|
|
4
|
+
* Added: reset an experiment by calling reset!.
|
|
5
|
+
* Added: experiment alternative name (option 1, option 2, etc).
|
|
6
|
+
* Added: new scoring algorithm: use experiment.score instead of
|
|
7
|
+
alternative.z_score/confidence.
|
|
8
|
+
* Added: experiment.conclusion for plain English results.
|
|
9
|
+
|
|
1
10
|
0.2.1 (2009-11-11)
|
|
2
11
|
* Added: z-score and confidence level for A/B test alternatives.
|
|
3
12
|
* Added: test auto-completion and auto-outcome (complete_it, outcome_is).
|
data/bin/vanity
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
path = File.expand_path("../lib", File.dirname(__FILE__))
|
|
3
|
+
$LOAD_PATH.unshift path unless $LOAD_PATH.include?(path)
|
|
4
|
+
|
|
5
|
+
require "vanity"
|
|
6
|
+
require "optparse"
|
|
7
|
+
|
|
8
|
+
playground = Vanity.playground
|
|
9
|
+
options = Struct.new(:output).new
|
|
10
|
+
OptionParser.new("", 24, " ") do |opts|
|
|
11
|
+
opts.banner = "Usage: #{File.basename($0)} [options]\n"
|
|
12
|
+
|
|
13
|
+
opts.separator ""
|
|
14
|
+
opts.separator "General options:"
|
|
15
|
+
opts.on("--path PATH", "Path to experiments directory (default: #{playground.load_path})") { |v| playground.load_path = v }
|
|
16
|
+
opts.on("--output FILE", "Write report to this file (default: stdout)") { |v| options.output = v }
|
|
17
|
+
|
|
18
|
+
opts.separator ""
|
|
19
|
+
opts.separator "Redis options:"
|
|
20
|
+
opts.on("--host HOST", "Redis server host (default: #{playground.host})") { |v| playground.host = v }
|
|
21
|
+
opts.on("--port PORT", "Redis server port (default: #{playground.port})") { |v| playground.port = v }
|
|
22
|
+
opts.on("--db DB", "Redis database (default: #{playground.db})") { |v| playground.db = v }
|
|
23
|
+
opts.on("--password PWD", "Redis database password") { |v| playground.password = v }
|
|
24
|
+
opts.on("--namespace NS", "Redis namespace (default: #{playground.namespace})") { |v| playground.namespace = v }
|
|
25
|
+
|
|
26
|
+
opts.separator ""
|
|
27
|
+
opts.separator "Common options:"
|
|
28
|
+
opts.on_tail "-h", "-H", "--help", "Show this message" do
|
|
29
|
+
puts opts.to_s.gsub(/^.*DEPRECATED.*$/s, '')
|
|
30
|
+
exit
|
|
31
|
+
end
|
|
32
|
+
opts.on_tail "-v", "--version", "Show version" do
|
|
33
|
+
puts "Vanity #{Vanity::Version::STRING}"
|
|
34
|
+
exit
|
|
35
|
+
end
|
|
36
|
+
end.parse!(ARGV)
|
|
37
|
+
|
|
38
|
+
cmds = ARGV.empty? ? ["report"] : ARGV
|
|
39
|
+
cmds.each do |cmd|
|
|
40
|
+
case cmd
|
|
41
|
+
when "report"
|
|
42
|
+
Vanity::Commands.report options.output
|
|
43
|
+
else fail "No such command: #{cmd}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require "erb"
|
|
2
|
+
require "cgi"
|
|
3
|
+
|
|
4
|
+
module Vanity
|
|
5
|
+
module Commands
|
|
6
|
+
class << self
|
|
7
|
+
|
|
8
|
+
# Generate a report with all available tests. Outputs to the named file,
|
|
9
|
+
# or stdout with no arguments.
|
|
10
|
+
def report(output = nil)
|
|
11
|
+
require "erb"
|
|
12
|
+
erb = ERB.new(File.read("lib/vanity/report.erb"), nil, '<')
|
|
13
|
+
experiments = Vanity.playground.experiments
|
|
14
|
+
html = erb.result(binding)
|
|
15
|
+
if output
|
|
16
|
+
File.open output, 'w' do |file|
|
|
17
|
+
file.write html
|
|
18
|
+
end
|
|
19
|
+
puts "New report available in #{output}"
|
|
20
|
+
else
|
|
21
|
+
$stdout.write html
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__), "commands/report")
|
|
@@ -7,12 +7,16 @@ module Vanity
|
|
|
7
7
|
def initialize(experiment, id, value) #:nodoc:
|
|
8
8
|
@experiment = experiment
|
|
9
9
|
@id = id
|
|
10
|
+
@name = "option #{(@id + 1)}"
|
|
10
11
|
@value = value
|
|
11
12
|
end
|
|
12
13
|
|
|
13
14
|
# Alternative id, only unique for this experiment.
|
|
14
15
|
attr_reader :id
|
|
15
16
|
|
|
17
|
+
# Alternative name (option A, option B, etc).
|
|
18
|
+
attr_reader :name
|
|
19
|
+
|
|
16
20
|
# Alternative value.
|
|
17
21
|
attr_reader :value
|
|
18
22
|
|
|
@@ -28,12 +32,13 @@ module Vanity
|
|
|
28
32
|
|
|
29
33
|
# Number of conversions for this alternative (same participant may be counted more than once).
|
|
30
34
|
def conversions
|
|
31
|
-
redis
|
|
35
|
+
redis[key("conversions")].to_i
|
|
32
36
|
end
|
|
33
37
|
|
|
34
38
|
# Conversion rate calculated as converted/participants.
|
|
35
39
|
def conversion_rate
|
|
36
|
-
converted.to_f
|
|
40
|
+
c, p = converted.to_f, participants.to_f
|
|
41
|
+
p > 0 ? c/p : 0.0
|
|
37
42
|
end
|
|
38
43
|
|
|
39
44
|
def <=>(other)
|
|
@@ -51,33 +56,20 @@ module Vanity
|
|
|
51
56
|
end
|
|
52
57
|
end
|
|
53
58
|
|
|
54
|
-
# Z-score this alternativet related to the base alternative. This
|
|
55
|
-
# alternative is better than base if it receives a positive z-score,
|
|
56
|
-
# worse if z-score is negative. Call #confident if you need confidence
|
|
57
|
-
# level (percentage).
|
|
58
|
-
def z_score
|
|
59
|
-
return 0 if base == self
|
|
60
|
-
pc = base.conversion_rate
|
|
61
|
-
nc = base.participants
|
|
62
|
-
p = conversion_rate
|
|
63
|
-
n = participants
|
|
64
|
-
(p - pc) / Math.sqrt((p * (1-p)/n) + (pc * (1-pc)/nc))
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# How confident are we in this alternative being an improvement over the
|
|
68
|
-
# base alternative. Returns 0, 90, 95, 99 or 99.9 (percentage).
|
|
69
|
-
def confidence
|
|
70
|
-
score = z_score
|
|
71
|
-
confidence = AbTest::Z_TO_CONFIDENCE.find { |z,p| score >= z }
|
|
72
|
-
confidence ? confidence.last : 0
|
|
73
|
-
end
|
|
74
|
-
|
|
75
59
|
def destroy #:nodoc:
|
|
76
60
|
redis.del key("participants")
|
|
77
61
|
redis.del key("converted")
|
|
78
62
|
redis.del key("conversions")
|
|
79
63
|
end
|
|
80
64
|
|
|
65
|
+
def to_s #:nodoc:
|
|
66
|
+
name
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def inspect #:nodoc:
|
|
70
|
+
"#{name}: #{value} #{converted}/#{participants}"
|
|
71
|
+
end
|
|
72
|
+
|
|
81
73
|
protected
|
|
82
74
|
|
|
83
75
|
def key(name)
|
|
@@ -97,6 +89,15 @@ module Vanity
|
|
|
97
89
|
|
|
98
90
|
# The meat.
|
|
99
91
|
class AbTest < Base
|
|
92
|
+
class << self
|
|
93
|
+
|
|
94
|
+
def confidence(score) #:nodoc:
|
|
95
|
+
score = score.abs
|
|
96
|
+
confidence = AbTest::Z_TO_CONFIDENCE.find { |z,p| score >= z }
|
|
97
|
+
confidence ? confidence.last : 0
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
100
101
|
def initialize(*args) #:nodoc:
|
|
101
102
|
super
|
|
102
103
|
end
|
|
@@ -125,6 +126,11 @@ module Vanity
|
|
|
125
126
|
alternatives
|
|
126
127
|
end
|
|
127
128
|
|
|
129
|
+
# Returns an Alternative with the specified value.
|
|
130
|
+
def alternative(value)
|
|
131
|
+
alternatives.find { |alt| alt.value == value }
|
|
132
|
+
end
|
|
133
|
+
|
|
128
134
|
# Sets this test to two alternatives: false and true.
|
|
129
135
|
def false_true
|
|
130
136
|
alternatives false, true
|
|
@@ -194,11 +200,84 @@ module Vanity
|
|
|
194
200
|
|
|
195
201
|
# -- Reporting --
|
|
196
202
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
203
|
+
# Returns an object with the following attributes:
|
|
204
|
+
# [:alts] List of alternatives as structures (see below).
|
|
205
|
+
# [:best] Best alternative.
|
|
206
|
+
# [:base] Second best alternative.
|
|
207
|
+
# [:choice] Choice alterntive, either selected outcome or best alternative (with confidence).
|
|
208
|
+
#
|
|
209
|
+
# Each alternative is an object with the following attributes:
|
|
210
|
+
# [:id] Identifier.
|
|
211
|
+
# [:conv] Conversion rate (0.0 to 1.0, rounded to 3 places).
|
|
212
|
+
# [:pop] Population size (participants).
|
|
213
|
+
# [:diff] Difference from least performant altenative (percentage).
|
|
214
|
+
# [:z] Z-score compared to base (above).
|
|
215
|
+
# [:conf] Confidence based on z-score (0, 90, 95, 99, 99.9).
|
|
216
|
+
def score
|
|
217
|
+
struct = Struct.new(:id, :conv, :pop, :diff, :z, :conf)
|
|
218
|
+
alts = alternatives.map { |alt| struct.new(alt.id, alt.conversion_rate.round(3), alt.participants) }
|
|
219
|
+
# sort by conversion rate to find second best and 2nd best
|
|
220
|
+
sorted = alts.sort_by(&:conv)
|
|
221
|
+
base = sorted[-2]
|
|
222
|
+
# calculate z-score
|
|
223
|
+
pc = base.conv
|
|
224
|
+
nc = base.pop
|
|
225
|
+
alts.each do |alt|
|
|
226
|
+
p = alt.conv
|
|
227
|
+
n = alt.pop
|
|
228
|
+
alt.z = (p - pc) / ((p * (1-p)/n) + (pc * (1-pc)/nc)).abs ** 0.5
|
|
229
|
+
alt.conf = AbTest.confidence(alt.z)
|
|
230
|
+
end
|
|
231
|
+
# difference is measured from least performant
|
|
232
|
+
if least = sorted.find { |alt| alt.conv > 0 }
|
|
233
|
+
alts.each do |alt|
|
|
234
|
+
alt.diff = (alt.conv - least.conv) / least.conv * 100 if alt.conv > least.conv
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
# best alternative is one with highest conversion rate (best shot).
|
|
238
|
+
# choice alternative can only pick best if we have high confidence (>90%).
|
|
239
|
+
best = sorted.last if sorted.last.conv > 0
|
|
240
|
+
choice = outcome ? alts[outcome.id] : (best && best.conf >= 90 ? best : nil)
|
|
241
|
+
Struct.new(:alts, :best, :base, :choice).new(alts, best, base, choice)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Use the score returned by #score to derive a conclusion. Returns an
|
|
245
|
+
# array of claims.
|
|
246
|
+
def conclusion(score = score)
|
|
247
|
+
claims = []
|
|
248
|
+
# find name form alt structure returned from score
|
|
249
|
+
name = ->(alt){ alternatives[alt.id].name }
|
|
250
|
+
# only interested in sorted alternatives with conversion
|
|
251
|
+
sorted = score.alts.select { |alt| alt.conv > 0.0 }.sort_by(&:conv).reverse
|
|
252
|
+
if sorted.size > 1
|
|
253
|
+
# start with alternatives that have conversion, from best to worst,
|
|
254
|
+
# then alternatives with no conversion.
|
|
255
|
+
sorted |= score.alts
|
|
256
|
+
# we want a result that's clearly better than 2nd best.
|
|
257
|
+
best, second = sorted[0], sorted[1]
|
|
258
|
+
if best.conv > second.conv
|
|
259
|
+
diff = ((best.conv - second.conv) / second.conv * 100).round
|
|
260
|
+
better = " (%d%% better than %s)" % [diff, name[second]] if diff > 0
|
|
261
|
+
claims << "The best choice is %s: it converted at %.1f%%%s." % [name[best], best.conv * 100, better]
|
|
262
|
+
if best.conf >= 90
|
|
263
|
+
claims << "With %d%% probability this result is statistically significant." % score.best.conf
|
|
264
|
+
else
|
|
265
|
+
claims << "This result is not statistically significant, suggest you continue this experiment."
|
|
266
|
+
end
|
|
267
|
+
sorted.delete best
|
|
268
|
+
end
|
|
269
|
+
sorted.each do |alt|
|
|
270
|
+
if alt.conv > 0.0
|
|
271
|
+
claims << "%s converted at %.1f%%." % [name[alt].capitalize, alt.conv * 100]
|
|
272
|
+
else
|
|
273
|
+
claims << "%s did not convert." % name[alt].capitalize
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
else
|
|
277
|
+
claims << "This experiment did not run long enough to find a clear winner."
|
|
278
|
+
end
|
|
279
|
+
claims << "#{name[score.choice].capitalize} selected as the best alternative." if score.choice
|
|
280
|
+
claims
|
|
202
281
|
end
|
|
203
282
|
|
|
204
283
|
def humanize
|
|
@@ -228,7 +307,7 @@ module Vanity
|
|
|
228
307
|
|
|
229
308
|
# Alternative chosen when this experiment was completed.
|
|
230
309
|
def outcome
|
|
231
|
-
outcome = redis
|
|
310
|
+
outcome = redis[key("outcome")]
|
|
232
311
|
outcome && alternatives[outcome.to_i]
|
|
233
312
|
end
|
|
234
313
|
|
|
@@ -242,8 +321,8 @@ module Vanity
|
|
|
242
321
|
end
|
|
243
322
|
end
|
|
244
323
|
unless outcome
|
|
245
|
-
|
|
246
|
-
outcome =
|
|
324
|
+
best = score.best
|
|
325
|
+
outcome = best.id if best
|
|
247
326
|
end
|
|
248
327
|
# TODO: logging
|
|
249
328
|
redis.setnx key("outcome"), outcome
|
|
@@ -257,6 +336,12 @@ module Vanity
|
|
|
257
336
|
super
|
|
258
337
|
end
|
|
259
338
|
|
|
339
|
+
def reset! #:nodoc:
|
|
340
|
+
redis.del key(:outcome)
|
|
341
|
+
alternatives.each(&:destroy)
|
|
342
|
+
super
|
|
343
|
+
end
|
|
344
|
+
|
|
260
345
|
def destroy #:nodoc:
|
|
261
346
|
redis.del key(:outcome)
|
|
262
347
|
alternatives.each(&:destroy)
|
|
@@ -19,7 +19,7 @@ module Vanity
|
|
|
19
19
|
@id, @name = id.to_sym, name
|
|
20
20
|
@namespace = "#{@playground.namespace}:#{@id}"
|
|
21
21
|
redis.setnx key(:created_at), Time.now.to_i
|
|
22
|
-
@created_at = Time.at(redis
|
|
22
|
+
@created_at = Time.at(redis[key(:created_at)].to_i)
|
|
23
23
|
@identify_block = ->(context){ context.vanity_identity }
|
|
24
24
|
end
|
|
25
25
|
|
|
@@ -34,6 +34,11 @@ module Vanity
|
|
|
34
34
|
|
|
35
35
|
# Experiment completion timestamp.
|
|
36
36
|
attr_reader :completed_at
|
|
37
|
+
|
|
38
|
+
# Returns the type of this class as a symbol (e.g. ab_test).
|
|
39
|
+
def type
|
|
40
|
+
self.class.type
|
|
41
|
+
end
|
|
37
42
|
|
|
38
43
|
# Call this method with no argument or block to return an identity. Call
|
|
39
44
|
# this method with a block to define how to obtain an identity for the
|
|
@@ -117,12 +122,13 @@ module Vanity
|
|
|
117
122
|
|
|
118
123
|
# Time stamp when experiment was completed.
|
|
119
124
|
def completed_at
|
|
120
|
-
|
|
125
|
+
time = redis[key(:completed_at)]
|
|
126
|
+
time && Time.at(time.to_i)
|
|
121
127
|
end
|
|
122
128
|
|
|
123
129
|
# Returns true if experiment active, false if completed.
|
|
124
130
|
def active?
|
|
125
|
-
redis
|
|
131
|
+
redis[key(:completed_at)].nil?
|
|
126
132
|
end
|
|
127
133
|
|
|
128
134
|
|
|
@@ -145,6 +151,13 @@ module Vanity
|
|
|
145
151
|
def save #:nodoc:
|
|
146
152
|
end
|
|
147
153
|
|
|
154
|
+
# Reset experiment.
|
|
155
|
+
def reset!
|
|
156
|
+
@created_at = Time.now
|
|
157
|
+
redis[key(:created_at)] = @created_at.to_i
|
|
158
|
+
redis.del key(:completed_at)
|
|
159
|
+
end
|
|
160
|
+
|
|
148
161
|
# Get rid of all experiment data.
|
|
149
162
|
def destroy
|
|
150
163
|
redis.del key(:created_at)
|
data/lib/vanity/playground.rb
CHANGED
|
@@ -15,6 +15,7 @@ module Vanity
|
|
|
15
15
|
# Created new Playground. Unless you need to, use the global Vanity.playground.
|
|
16
16
|
def initialize
|
|
17
17
|
@experiments = {}
|
|
18
|
+
@host, @port, @db = "127.0.0.1", 6379, 0
|
|
18
19
|
@namespace = "vanity:#{Vanity::Version::MAJOR}"
|
|
19
20
|
@load_path = "experiments"
|
|
20
21
|
end
|
data/lib/vanity/report.erb
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
<li class="experiment" id="experiment_<%= CGI.escape exp.id.to_s %>">
|
|
16
16
|
<h3><%= CGI.escape_html exp.name %></h3>
|
|
17
17
|
<blockquote><%= CGI.escape_html exp.description.to_s %></blockquote>
|
|
18
|
-
<%= exp.
|
|
18
|
+
<%= exp.conclusion.join(" ") %>
|
|
19
19
|
<p class="meta"><%= exp.humanize %> started <%= exp.created_at.strftime("%a, %b %-d %Y") %></p>
|
|
20
20
|
</li>
|
|
21
21
|
<% end %>
|
data/lib/vanity.rb
CHANGED
|
@@ -21,3 +21,4 @@ require File.join(File.dirname(__FILE__), "vanity/playground")
|
|
|
21
21
|
require File.join(File.dirname(__FILE__), "vanity/experiment/base")
|
|
22
22
|
require File.join(File.dirname(__FILE__), "vanity/experiment/ab_test")
|
|
23
23
|
require File.join(File.dirname(__FILE__), "vanity/rails") if defined?(Rails)
|
|
24
|
+
Vanity.autoload :Commands, File.join(File.dirname(__FILE__), "vanity/commands")
|
data/test/ab_test_test.rb
CHANGED
|
@@ -32,12 +32,12 @@ class AbTestTest < ActionController::TestCase
|
|
|
32
32
|
|
|
33
33
|
# -- Experiment definition --
|
|
34
34
|
|
|
35
|
-
def
|
|
35
|
+
def test_uses_ab_test_when_type_is_ab_test
|
|
36
36
|
experiment(:ab, type: :ab_test) { }
|
|
37
37
|
assert_instance_of Vanity::Experiment::AbTest, experiment(:ab)
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
-
def
|
|
40
|
+
def test_requires_at_least_two_alternatives_per_experiment
|
|
41
41
|
assert_raises RuntimeError do
|
|
42
42
|
experiment :none, type: :ab_test do
|
|
43
43
|
alternatives []
|
|
@@ -52,11 +52,27 @@ class AbTestTest < ActionController::TestCase
|
|
|
52
52
|
alternatives "foo", "bar"
|
|
53
53
|
end
|
|
54
54
|
end
|
|
55
|
+
|
|
56
|
+
def test_returning_alternative_by_value
|
|
57
|
+
experiment :abcd do
|
|
58
|
+
alternatives :a, :b, :c, :d
|
|
59
|
+
end
|
|
60
|
+
assert_equal experiment(:abcd).alternatives[1], experiment(:abcd).alternative(:b)
|
|
61
|
+
assert_equal experiment(:abcd).alternatives[3], experiment(:abcd).alternative(:d)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def test_alternative_name
|
|
65
|
+
experiment :abcd do
|
|
66
|
+
alternatives :a, :b
|
|
67
|
+
end
|
|
68
|
+
assert_equal "option 1", experiment(:abcd).alternative(:a).name
|
|
69
|
+
assert_equal "option 2", experiment(:abcd).alternative(:b).name
|
|
70
|
+
end
|
|
55
71
|
|
|
56
72
|
|
|
57
73
|
# -- Running experiment --
|
|
58
74
|
|
|
59
|
-
def
|
|
75
|
+
def test_returns_the_same_alternative_consistently
|
|
60
76
|
experiment :foobar do
|
|
61
77
|
alternatives "foo", "bar"
|
|
62
78
|
identify { "6e98ec" }
|
|
@@ -68,7 +84,7 @@ class AbTestTest < ActionController::TestCase
|
|
|
68
84
|
end
|
|
69
85
|
end
|
|
70
86
|
|
|
71
|
-
def
|
|
87
|
+
def test_returns_different_alternatives_for_each_participant
|
|
72
88
|
experiment :foobar do
|
|
73
89
|
alternatives "foo", "bar"
|
|
74
90
|
identify { rand(1000).to_s }
|
|
@@ -78,7 +94,7 @@ class AbTestTest < ActionController::TestCase
|
|
|
78
94
|
assert_in_delta alts.select { |a| a == "foo" }.count, 500, 100 # this may fail, such is propability
|
|
79
95
|
end
|
|
80
96
|
|
|
81
|
-
def
|
|
97
|
+
def test_records_all_participants_in_each_alternative
|
|
82
98
|
ids = (Array.new(200) { |i| i.to_s } * 5).shuffle
|
|
83
99
|
experiment :foobar do
|
|
84
100
|
alternatives "foo", "bar"
|
|
@@ -90,7 +106,7 @@ class AbTestTest < ActionController::TestCase
|
|
|
90
106
|
assert_in_delta alts.first.participants, 100, 20
|
|
91
107
|
end
|
|
92
108
|
|
|
93
|
-
def
|
|
109
|
+
def test_records_each_converted_participant_only_once
|
|
94
110
|
ids = (Array.new(100) { |i| i.to_s } * 5).shuffle
|
|
95
111
|
test = self
|
|
96
112
|
experiment :foobar do
|
|
@@ -123,6 +139,26 @@ class AbTestTest < ActionController::TestCase
|
|
|
123
139
|
assert_equal 100, alts.inject(0) { |t,a| t + a.converted }
|
|
124
140
|
end
|
|
125
141
|
|
|
142
|
+
def test_reset_experiment
|
|
143
|
+
experiment :simple do
|
|
144
|
+
identify { "me" }
|
|
145
|
+
complete_if { alternatives.map(&:converted).sum >= 1 }
|
|
146
|
+
outcome_is { alternative(true) }
|
|
147
|
+
end
|
|
148
|
+
experiment(:simple).choose
|
|
149
|
+
experiment(:simple).conversion!
|
|
150
|
+
refute experiment(:simple).active?
|
|
151
|
+
assert_equal true, experiment(:simple).outcome.value
|
|
152
|
+
|
|
153
|
+
experiment(:simple).reset!
|
|
154
|
+
assert experiment(:simple).active?
|
|
155
|
+
assert_nil experiment(:simple).outcome
|
|
156
|
+
assert_nil experiment(:simple).completed_at
|
|
157
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:participants).sum
|
|
158
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:conversions).sum
|
|
159
|
+
assert_equal 0, experiment(:simple).alternatives.map(&:converted).sum
|
|
160
|
+
end
|
|
161
|
+
|
|
126
162
|
|
|
127
163
|
# -- A/B helper methods --
|
|
128
164
|
|
|
@@ -190,34 +226,187 @@ class AbTestTest < ActionController::TestCase
|
|
|
190
226
|
end
|
|
191
227
|
|
|
192
228
|
|
|
193
|
-
# --
|
|
229
|
+
# -- Scoring --
|
|
194
230
|
|
|
195
|
-
def
|
|
196
|
-
experiment
|
|
197
|
-
alternatives :a, :b, :c, :d
|
|
198
|
-
end
|
|
199
|
-
alts = experiment(:abcd).alternatives
|
|
231
|
+
def test_scoring
|
|
232
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
200
233
|
# participating, conversions, rate, z-score
|
|
201
234
|
# Control: 182 35 19.23% N/A
|
|
202
|
-
182.times { |i|
|
|
203
|
-
35.times
|
|
235
|
+
182.times { |i| experiment(:abcd).alternative(:a).participating!(i) }
|
|
236
|
+
35.times { |i| experiment(:abcd).alternative(:a).conversion!(i) }
|
|
204
237
|
# Treatment A: 180 45 25.00% 1.33
|
|
205
|
-
180.times { |i|
|
|
206
|
-
45.times
|
|
207
|
-
#
|
|
208
|
-
189.times { |i|
|
|
209
|
-
28.times
|
|
210
|
-
#
|
|
211
|
-
188.times { |i|
|
|
212
|
-
61.times
|
|
238
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
239
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
240
|
+
# treatment B: 189 28 14.81% -1.13
|
|
241
|
+
189.times { |i| experiment(:abcd).alternative(:c).participating!(i) }
|
|
242
|
+
28.times { |i| experiment(:abcd).alternative(:c).conversion!(i) }
|
|
243
|
+
# treatment C: 188 61 32.45% 2.94
|
|
244
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
245
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
246
|
+
|
|
247
|
+
z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z }
|
|
248
|
+
assert_equal %w{-1.33 0.00 -2.47 1.58}, z_scores
|
|
249
|
+
confidences = experiment(:abcd).score.alts.map(&:conf)
|
|
250
|
+
assert_equal [90, 0, 99, 90], confidences
|
|
251
|
+
|
|
252
|
+
diff = experiment(:abcd).score.alts.map { |alt| alt.diff && alt.diff.round }
|
|
253
|
+
assert_equal [30, 69, nil, 119], diff
|
|
254
|
+
assert_equal 3, experiment(:abcd).score.best.id
|
|
255
|
+
assert_equal 3, experiment(:abcd).score.choice.id
|
|
256
|
+
end
|
|
213
257
|
|
|
214
|
-
|
|
215
|
-
|
|
258
|
+
def test_scoring_with_no_performers
|
|
259
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
260
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.z.nan? }
|
|
261
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.conf == 0 }
|
|
262
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.diff.nil? }
|
|
263
|
+
assert_nil experiment(:abcd).score.best
|
|
264
|
+
assert_nil experiment(:abcd).score.choice
|
|
265
|
+
end
|
|
216
266
|
|
|
217
|
-
|
|
218
|
-
|
|
267
|
+
def test_scoring_with_one_performer
|
|
268
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
269
|
+
10.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
270
|
+
8.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
271
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.z.nan? }
|
|
272
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.conf == 0 }
|
|
273
|
+
assert experiment(:abcd).score.alts.all? { |alt| alt.diff.nil? }
|
|
274
|
+
assert 1, experiment(:abcd).score.best.id
|
|
275
|
+
assert_nil experiment(:abcd).score.choice
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def test_scoring_with_some_performers
|
|
279
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
280
|
+
10.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
281
|
+
8.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
282
|
+
12.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
283
|
+
5.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
284
|
+
|
|
285
|
+
z_scores = experiment(:abcd).score.alts.map { |alt| "%.2f" % alt.z }
|
|
286
|
+
assert_equal %w{NaN 2.01 NaN 0.00}, z_scores
|
|
287
|
+
confidences = experiment(:abcd).score.alts.map(&:conf)
|
|
288
|
+
assert_equal [0, 95, 0, 0], confidences
|
|
289
|
+
diff = experiment(:abcd).score.alts.map { |alt| alt.diff && alt.diff.round }
|
|
290
|
+
assert_equal [nil, 92, nil, nil], diff
|
|
291
|
+
assert_equal 1, experiment(:abcd).score.best.id
|
|
292
|
+
assert_equal 1, experiment(:abcd).score.choice.id
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# -- Conclusion --
|
|
297
|
+
|
|
298
|
+
def test_conclusion
|
|
299
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
300
|
+
# participating, conversions, rate, z-score
|
|
301
|
+
# Control: 182 35 19.23% N/A
|
|
302
|
+
182.times { |i| experiment(:abcd).alternative(:a).participating!(i) }
|
|
303
|
+
35.times { |i| experiment(:abcd).alternative(:a).conversion!(i) }
|
|
304
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
305
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
306
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
307
|
+
# treatment B: 189 28 14.81% -1.13
|
|
308
|
+
189.times { |i| experiment(:abcd).alternative(:c).participating!(i) }
|
|
309
|
+
28.times { |i| experiment(:abcd).alternative(:c).conversion!(i) }
|
|
310
|
+
# treatment C: 188 61 32.45% 2.94
|
|
311
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
312
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
313
|
+
|
|
314
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
|
315
|
+
The best choice is option 4: it converted at 32.4% (30% better than option 2).
|
|
316
|
+
With 90% probability this result is statistically significant.
|
|
317
|
+
Option 2 converted at 25.0%.
|
|
318
|
+
Option 1 converted at 19.2%.
|
|
319
|
+
Option 3 converted at 14.8%.
|
|
320
|
+
Option 4 selected as the best alternative.
|
|
321
|
+
TEXT
|
|
219
322
|
end
|
|
220
|
-
|
|
323
|
+
|
|
324
|
+
def test_conclusion_with_some_performers
|
|
325
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
326
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
327
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
328
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
329
|
+
# treatment C: 188 61 32.45% 2.94
|
|
330
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
331
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
332
|
+
|
|
333
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
|
334
|
+
The best choice is option 4: it converted at 32.4% (30% better than option 2).
|
|
335
|
+
With 90% probability this result is statistically significant.
|
|
336
|
+
Option 2 converted at 25.0%.
|
|
337
|
+
Option 1 did not convert.
|
|
338
|
+
Option 3 did not convert.
|
|
339
|
+
Option 4 selected as the best alternative.
|
|
340
|
+
TEXT
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def test_conclusion_without_clear_winner
|
|
344
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
345
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
346
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
347
|
+
58.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
348
|
+
# treatment C: 188 61 32.45% 2.94
|
|
349
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
350
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
351
|
+
|
|
352
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
|
353
|
+
The best choice is option 4: it converted at 32.4% (1% better than option 2).
|
|
354
|
+
This result is not statistically significant, suggest you continue this experiment.
|
|
355
|
+
Option 2 converted at 32.2%.
|
|
356
|
+
Option 1 did not convert.
|
|
357
|
+
Option 3 did not convert.
|
|
358
|
+
TEXT
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def test_conclusion_without_close_performers
|
|
362
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
363
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
364
|
+
186.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
365
|
+
60.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
366
|
+
# treatment C: 188 61 32.45% 2.94
|
|
367
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
368
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
369
|
+
|
|
370
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
|
371
|
+
The best choice is option 4: it converted at 32.4%.
|
|
372
|
+
This result is not statistically significant, suggest you continue this experiment.
|
|
373
|
+
Option 2 converted at 32.3%.
|
|
374
|
+
Option 1 did not convert.
|
|
375
|
+
Option 3 did not convert.
|
|
376
|
+
TEXT
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def test_conclusion_without_equal_performers
|
|
380
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
381
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
382
|
+
188.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
383
|
+
61.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
384
|
+
# treatment C: 188 61 32.45% 2.94
|
|
385
|
+
188.times { |i| experiment(:abcd).alternative(:d).participating!(i) }
|
|
386
|
+
61.times { |i| experiment(:abcd).alternative(:d).conversion!(i) }
|
|
387
|
+
|
|
388
|
+
assert_equal <<-TEXT, experiment(:abcd).conclusion.join("\n") << "\n"
|
|
389
|
+
Option 4 converted at 32.4%.
|
|
390
|
+
Option 2 converted at 32.4%.
|
|
391
|
+
Option 1 did not convert.
|
|
392
|
+
Option 3 did not convert.
|
|
393
|
+
TEXT
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
def test_conclusion_with_one_performers
|
|
397
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
398
|
+
# Treatment A: 180 45 25.00% 1.33
|
|
399
|
+
180.times { |i| experiment(:abcd).alternative(:b).participating!(i) }
|
|
400
|
+
45.times { |i| experiment(:abcd).alternative(:b).conversion!(i) }
|
|
401
|
+
|
|
402
|
+
assert_equal "This experiment did not run long enough to find a clear winner.", experiment(:abcd).conclusion.join("\n")
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def test_conclusion_with_no_performers
|
|
406
|
+
experiment(:abcd) { alternatives :a, :b, :c, :d }
|
|
407
|
+
assert_equal "This experiment did not run long enough to find a clear winner.", experiment(:abcd).conclusion.join("\n")
|
|
408
|
+
end
|
|
409
|
+
|
|
221
410
|
|
|
222
411
|
# -- Completion --
|
|
223
412
|
|
|
@@ -332,19 +521,30 @@ class AbTestTest < ActionController::TestCase
|
|
|
332
521
|
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
|
333
522
|
end
|
|
334
523
|
|
|
335
|
-
def
|
|
524
|
+
def test_outcome_only_performing_alternative
|
|
525
|
+
experiment :quick do
|
|
526
|
+
end
|
|
527
|
+
2.times do |i|
|
|
528
|
+
experiment(:quick).alternatives[1].participating!(i)
|
|
529
|
+
experiment(:quick).alternatives[1].conversion!(i)
|
|
530
|
+
end
|
|
531
|
+
experiment(:quick).complete!
|
|
532
|
+
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
def test_outcome_choosing_equal_alternatives
|
|
336
536
|
experiment :quick do
|
|
337
537
|
end
|
|
338
538
|
8.times do |i|
|
|
339
539
|
experiment(:quick).alternatives[0].participating!(i)
|
|
340
540
|
experiment(:quick).alternatives[0].conversion!(i)
|
|
341
541
|
end
|
|
342
|
-
|
|
542
|
+
8.times do |i|
|
|
343
543
|
experiment(:quick).alternatives[1].participating!(i)
|
|
344
544
|
experiment(:quick).alternatives[1].conversion!(i)
|
|
345
545
|
end
|
|
346
546
|
experiment(:quick).complete!
|
|
347
|
-
assert_equal experiment(:quick).alternatives[
|
|
547
|
+
assert_equal experiment(:quick).alternatives[1], experiment(:quick).outcome
|
|
348
548
|
end
|
|
349
549
|
|
|
350
550
|
end
|
data/vanity.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |spec|
|
|
2
2
|
spec.name = "vanity"
|
|
3
|
-
spec.version = "0.2.
|
|
3
|
+
spec.version = "0.2.2"
|
|
4
4
|
spec.author = "Assaf Arkin"
|
|
5
5
|
spec.email = "assaf@labnotes.org"
|
|
6
6
|
spec.homepage = "http://github.com/assaf/vanity"
|
|
@@ -9,6 +9,7 @@ Gem::Specification.new do |spec|
|
|
|
9
9
|
#spec.post_install_message = "To get started run vanity --help"
|
|
10
10
|
|
|
11
11
|
spec.files = Dir["{bin,lib,rails,test}/**/*", "CHANGELOG", "README.rdoc", "vanity.gemspec"]
|
|
12
|
+
spec.executable = "vanity"
|
|
12
13
|
|
|
13
14
|
spec.has_rdoc = true
|
|
14
15
|
spec.extra_rdoc_files = "README.rdoc", "CHANGELOG"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: vanity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Assaf Arkin
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-11-
|
|
12
|
+
date: 2009-11-12 00:00:00 -08:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -24,14 +24,17 @@ dependencies:
|
|
|
24
24
|
version:
|
|
25
25
|
description: ""
|
|
26
26
|
email: assaf@labnotes.org
|
|
27
|
-
executables:
|
|
28
|
-
|
|
27
|
+
executables:
|
|
28
|
+
- vanity
|
|
29
29
|
extensions: []
|
|
30
30
|
|
|
31
31
|
extra_rdoc_files:
|
|
32
32
|
- README.rdoc
|
|
33
33
|
- CHANGELOG
|
|
34
34
|
files:
|
|
35
|
+
- bin/vanity
|
|
36
|
+
- lib/vanity/commands/report.rb
|
|
37
|
+
- lib/vanity/commands.rb
|
|
35
38
|
- lib/vanity/experiment/ab_test.rb
|
|
36
39
|
- lib/vanity/experiment/base.rb
|
|
37
40
|
- lib/vanity/playground.rb
|
|
@@ -42,6 +45,7 @@ files:
|
|
|
42
45
|
- lib/vanity.rb
|
|
43
46
|
- test/ab_test_test.rb
|
|
44
47
|
- test/experiment_test.rb
|
|
48
|
+
- test/experiments/null_abc.rb
|
|
45
49
|
- test/playground_test.rb
|
|
46
50
|
- test/rails_test.rb
|
|
47
51
|
- test/test_helper.rb
|
|
@@ -55,7 +59,7 @@ licenses: []
|
|
|
55
59
|
post_install_message:
|
|
56
60
|
rdoc_options:
|
|
57
61
|
- --title
|
|
58
|
-
- Vanity 0.2.
|
|
62
|
+
- Vanity 0.2.2
|
|
59
63
|
- --main
|
|
60
64
|
- README.rdoc
|
|
61
65
|
- --webcvs
|