benchmark-lab 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +29 -0
- data/.pullreview.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +674 -0
- data/README.md +103 -0
- data/Rakefile +10 -0
- data/benchmark-lab.gemspec +26 -0
- data/lib/benchmark/lab.rb +147 -0
- data/lib/benchmark/lab/descriptive_statistics.rb +63 -0
- data/lib/benchmark/lab/mann_whitney_u_test.rb +114 -0
- data/lib/benchmark/lab/version.rb +5 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/unit/descriptive_statistics_spec.rb +45 -0
- data/spec/unit/experiment_spec.rb +101 -0
- data/spec/unit/mann_whitney_u_test_spec.rb +32 -0
- metadata +134 -0
data/README.md
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
[![PullReview stats](https://www.pullreview.com/github/toch/benchmark-lab/badges/master.svg?)](https://www.pullreview.com/github/toch/benchmark-lab/reviews/master)
|
2
|
+
|
3
|
+
# Benchmark Lab
|
4
|
+
|
5
|
+
Run Real Experiment and Calculate Non-Parametric Statistics.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Install it yourself as:
|
10
|
+
|
11
|
+
$ gem install benchmark-lab
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
There are two ways to use it:
|
16
|
+
1. classic: as Benchmark.bm does
|
17
|
+
2. iterative: collects and measures separately, stores into different JSON
|
18
|
+
files, then put everything together and rank them
|
19
|
+
|
20
|
+
### Classic Usage
|
21
|
+
|
22
|
+
```Ruby
|
23
|
+
require 'benchmark/lab'
|
24
|
+
|
25
|
+
n = 5_000_000
|
26
|
+
cases = {
|
27
|
+
'for:' => proc { for i in 1..n; a = "1"; end },
|
28
|
+
'times:' => proc { n.times do ; a = "1"; end },
|
29
|
+
'upto:' => proc { 1.upto(n) do ; a = "1"; end }
|
30
|
+
}
|
31
|
+
|
32
|
+
# How many times do you run the function
|
33
|
+
# 20 is a good minimum number
|
34
|
+
nbr_of_samples = 20
|
35
|
+
|
36
|
+
Benchmark.experiment(nbr_of_samples) do |x|
|
37
|
+
cases.each { |label, blk| x.report(label, &blk) }
|
38
|
+
end
|
39
|
+
```
|
40
|
+
|
41
|
+
The output looks like the following:
|
42
|
+
```
|
43
|
+
user system total real
|
44
|
+
for: [0.77,0.77,0.78] [0.00,0.00,0.00] [0.77,0.77,0.78] [0.77,0.77,0.78]
|
45
|
+
times: [0.74,0.74,0.74] [0.00,0.00,0.00] [0.74,0.74,0.74] [0.74,0.74,0.74]
|
46
|
+
upto: [0.75,0.75,0.75] [0.00,0.00,0.00] [0.75,0.75,0.75] [0.75,0.75,0.75]
|
47
|
+
The best "times:" is significantly (95%) better (total time).
|
48
|
+
```
|
49
|
+
|
50
|
+
### Iterative Usage
|
51
|
+
|
52
|
+
```Ruby
|
53
|
+
require 'benchmark/lab'
|
54
|
+
|
55
|
+
n = 5_000_000
|
56
|
+
|
57
|
+
# How many times do you run the function
|
58
|
+
# 20 is a good minimum number
|
59
|
+
nbr_of_samples = 20
|
60
|
+
|
61
|
+
jsons = []
|
62
|
+
|
63
|
+
jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
|
64
|
+
x.report('for') { for i in 1..n; a = "1"; end }
|
65
|
+
end
|
66
|
+
|
67
|
+
jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
|
68
|
+
x.report('times') { n.times do ; a = "1"; end }
|
69
|
+
end
|
70
|
+
|
71
|
+
jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
|
72
|
+
x.report('upto') { 1.upto(n) do ; a = "1"; end }
|
73
|
+
end
|
74
|
+
|
75
|
+
best, is_h0_rejected = Benchmark.aggregate_and_rank(jsons.map { |json| JSON.parse(json) })
|
76
|
+
|
77
|
+
puts best
|
78
|
+
puts is_h0_rejected
|
79
|
+
```
|
80
|
+
|
81
|
+
The output looks like the following:
|
82
|
+
```
|
83
|
+
{"name"=>"total", "sample"=>[0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6900000000000013, 0.6900000000000048, 0.6900000000000048, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999993, 0.6999999999999993, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028], "sample_size"=>20, "minimum"=>0.6899999999999977, "maximum"=>0.7000000000000028, "first_quartile"=>0.690000000000003, "third_quartile"=>0.7000000000000028, "median"=>0.6999999999999957, "interquartile_range"=>0.009999999999999787, "label"=>"upto"}
|
84
|
+
true
|
85
|
+
```
|
86
|
+
|
87
|
+
## Ideas
|
88
|
+
|
89
|
+
* compare two different implementations of a same function
|
90
|
+
1. get the stats, then compare
|
91
|
+
2. use git (commit, branch)
|
92
|
+
3. use tests to check no performance regression at the same time
|
93
|
+
4. annotate the tests you want to check
|
94
|
+
* decide the sample size automatically (based on the power you want to reach)
|
95
|
+
* explain correctly why we should do that
|
96
|
+
|
97
|
+
## Contributing
|
98
|
+
|
99
|
+
1. Fork it ( https://github.com/toch/benchmark-lab/fork )
|
100
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
101
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
102
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
103
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'benchmark/lab/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'benchmark-lab'
|
8
|
+
spec.version = Benchmark::Experiment::VERSION
|
9
|
+
spec.authors = ['Christophe Philemotte']
|
10
|
+
spec.email = ['christophe.philemotte@8thcolor.com']
|
11
|
+
spec.summary = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
|
12
|
+
spec.description = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
|
13
|
+
spec.homepage = 'https://github.com/toch/benchmark-lab'
|
14
|
+
spec.license = 'GPLv3'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1.6'
|
22
|
+
spec.add_development_dependency 'rake'
|
23
|
+
spec.add_development_dependency 'minitest', '4.5.0'
|
24
|
+
spec.add_development_dependency 'turn', '~> 0.9'
|
25
|
+
spec.add_runtime_dependency 'distribution'
|
26
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'benchmark/lab/descriptive_statistics'
|
3
|
+
require 'benchmark/lab/mann_whitney_u_test'
|
4
|
+
require 'benchmark/lab/version'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
module Benchmark
|
9
|
+
class Sample
|
10
|
+
include Enumerable
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@observations = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def <<(observation)
|
17
|
+
@observations << observation
|
18
|
+
end
|
19
|
+
|
20
|
+
def each(&block)
|
21
|
+
@observations.each do |observation|
|
22
|
+
if block_given?
|
23
|
+
block.call observation
|
24
|
+
else
|
25
|
+
yield observation
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class Job
|
32
|
+
alias old_item item
|
33
|
+
def item(label = '', &blk)
|
34
|
+
old_item(label, &blk)
|
35
|
+
@list.last << Sample.new
|
36
|
+
@list.last << []
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def observe_and_summarize(sample_size)
|
41
|
+
@list.each do |label, item, sample, stats|
|
42
|
+
sample_size.times.each do
|
43
|
+
sample << Benchmark.measure(label, &item)
|
44
|
+
end
|
45
|
+
|
46
|
+
Experiment::MEASURED_TIMES.keys.each do |time_name|
|
47
|
+
stats << Benchmark::Experiment::DescriptiveStatistics.new(sample.map(&time_name), time_name)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
alias report item
|
53
|
+
end
|
54
|
+
|
55
|
+
module Experiment
|
56
|
+
MEASURED_TIMES =
|
57
|
+
{
|
58
|
+
utime: 'user',
|
59
|
+
stime: 'system',
|
60
|
+
total: 'total',
|
61
|
+
real: 'real'
|
62
|
+
}
|
63
|
+
|
64
|
+
def observe_and_summarize(sample_size, &blk)
|
65
|
+
job = Job.new(0)
|
66
|
+
yield(job)
|
67
|
+
job.observe_and_summarize(sample_size)
|
68
|
+
all_stats = job.list.map{ |label, _, _, stats| [label, stats] }.to_h
|
69
|
+
all_stats.to_json
|
70
|
+
end
|
71
|
+
|
72
|
+
def aggregate_and_rank(jsons)
|
73
|
+
return if jsons.empty?
|
74
|
+
all_stats = jsons.inject({}) { |elem, hsh| hsh.merge(elem) }
|
75
|
+
rank(all_stats)
|
76
|
+
end
|
77
|
+
|
78
|
+
def experiment(sample_size, &blk)
|
79
|
+
all_stats = JSON.parse(observe_and_summarize(sample_size, &blk))
|
80
|
+
print_stats(all_stats)
|
81
|
+
|
82
|
+
best, is_the_best_significative = rank(all_stats)
|
83
|
+
|
84
|
+
puts "The best \"#{best['label']}\" is #{is_the_best_significative ? '' : 'not '}significantly (95%) better (total time)."
|
85
|
+
|
86
|
+
all_stats
|
87
|
+
end
|
88
|
+
|
89
|
+
def rank(all_stats, alpha = 0.05)
|
90
|
+
ranked = all_stats.map do |label, stats|
|
91
|
+
total = stats.select{ |stat| stat['name'] == 'total' }.first
|
92
|
+
total['label'] = label
|
93
|
+
total
|
94
|
+
end.sort_by { |stat| stat['median'] }
|
95
|
+
is_h0_rejected = true
|
96
|
+
if all_stats.size > 1
|
97
|
+
z = Benchmark::Experiment::MannWhitneyUTest::calculate_z(ranked.first['sample'], ranked[1]['sample'])
|
98
|
+
p_value = Benchmark::Experiment::MannWhitneyUTest::calculate_probability_z(z)
|
99
|
+
is_h0_rejected = Benchmark::Experiment::MannWhitneyUTest::is_null_hypothesis_rejected?(p_value, alpha)
|
100
|
+
end
|
101
|
+
|
102
|
+
return ranked.first, is_h0_rejected
|
103
|
+
end
|
104
|
+
|
105
|
+
def iterative_experiment
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def print_stats(all_stats)
|
111
|
+
width = label_width(all_stats)
|
112
|
+
|
113
|
+
lines = []
|
114
|
+
spacing = [0] * MEASURED_TIMES.size
|
115
|
+
tab = ' ' * 4
|
116
|
+
|
117
|
+
all_stats.each do |label, stats|
|
118
|
+
line = ''
|
119
|
+
line << label.ljust(width)
|
120
|
+
|
121
|
+
stats.each_with_index do |stat, index|
|
122
|
+
value = "#{tab}[#{'%.2f' % stat['first_quartile']},#{'%.2f' % stat['median']},#{'%.2f' % stat['third_quartile']}]"
|
123
|
+
spacing[index] = [spacing[index], value.length].minmax.last
|
124
|
+
line << value
|
125
|
+
end
|
126
|
+
line << "\n"
|
127
|
+
lines << line
|
128
|
+
end
|
129
|
+
|
130
|
+
print ''.ljust(width)
|
131
|
+
MEASURED_TIMES.values.each_with_index do |head, index|
|
132
|
+
print "#{tab}#{head}".ljust(spacing[index])
|
133
|
+
end
|
134
|
+
print "\n"
|
135
|
+
|
136
|
+
lines.each { |line| print line }
|
137
|
+
end
|
138
|
+
|
139
|
+
def label_width(all_stats)
|
140
|
+
label_widths = all_stats.map { |label, _| label.to_s.length }
|
141
|
+
label_widths.minmax.last
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
extend Benchmark::Experiment
|
147
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Benchmark
|
2
|
+
module Experiment
|
3
|
+
class DescriptiveStatistics
|
4
|
+
def initialize(sample, name = '')
|
5
|
+
# raise exception if empty sample
|
6
|
+
@name = name
|
7
|
+
@sample = sample.sort
|
8
|
+
@minimum, @maximum = @sample.minmax
|
9
|
+
@median = calculate_median_of(@sample)
|
10
|
+
@first_quartile = calculate_first_quartile_of(@sample)
|
11
|
+
@third_quartile = calculate_third_quartile_of(@sample)
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :name, :sample, :minimum, :maximum, :first_quartile, :third_quartile, :median
|
15
|
+
|
16
|
+
def sample_size
|
17
|
+
sample.size
|
18
|
+
end
|
19
|
+
|
20
|
+
def interquartile_range
|
21
|
+
@third_quartile - @first_quartile
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json(options = {})
|
25
|
+
{
|
26
|
+
'name' => name,
|
27
|
+
'sample' => sample,
|
28
|
+
'sample_size' => sample_size,
|
29
|
+
'minimum' => minimum,
|
30
|
+
'maximum' => maximum,
|
31
|
+
'first_quartile' => first_quartile,
|
32
|
+
'third_quartile' => third_quartile,
|
33
|
+
'median' => median,
|
34
|
+
'interquartile_range' => interquartile_range
|
35
|
+
}.to_json
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# https://en.wikipedia.org/wiki/Median
|
41
|
+
def calculate_median_of(data)
|
42
|
+
return data[data.size / 2] if data.size.odd?
|
43
|
+
|
44
|
+
(data[(data.size - 1) / 2] + data[data.size / 2]) / 2.0
|
45
|
+
end
|
46
|
+
|
47
|
+
# http://mathworld.wolfram.com/Quartile.html
|
48
|
+
# https://en.wikipedia.org/wiki/Quartile
|
49
|
+
def calculate_first_quartile_of(data)
|
50
|
+
return calculate_median_of(data[0..(data.size / 2)]) if data.size.odd?
|
51
|
+
|
52
|
+
calculate_median_of(data[0..((data.size - 1) / 2)])
|
53
|
+
end
|
54
|
+
|
55
|
+
def calculate_third_quartile_of(data)
|
56
|
+
return calculate_median_of(data[(data.size / 2)..-1]) if data.size.odd?
|
57
|
+
|
58
|
+
calculate_median_of(data[(data.size / 2)..-1])
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'distribution'
|
2
|
+
|
3
|
+
module Benchmark
|
4
|
+
module Experiment
|
5
|
+
module MannWhitneyUTest
|
6
|
+
def self.calculate_U(x, y)
|
7
|
+
ranked = concatenate_and_label(x, y)
|
8
|
+
|
9
|
+
rank!(ranked)
|
10
|
+
|
11
|
+
adjust_ties!(ranked)
|
12
|
+
|
13
|
+
r_x = rank_sum(ranked, :x)
|
14
|
+
r_y = rank_sum(ranked, :y)
|
15
|
+
|
16
|
+
n_x = x.size
|
17
|
+
n_y = y.size
|
18
|
+
|
19
|
+
u_x = calculate_u_sample(n_x, n_y, r_x)
|
20
|
+
u_y = calculate_u_sample(n_y, n_x, r_y)
|
21
|
+
|
22
|
+
[u_x, u_y]
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.calculate_z(x, y)
|
26
|
+
n_x = x.size.to_f
|
27
|
+
n_y = y.size.to_f
|
28
|
+
n = n_x + n_y
|
29
|
+
n_xy = n_x * n_y
|
30
|
+
|
31
|
+
u = calculate_U(x, y).minmax.first.to_f
|
32
|
+
|
33
|
+
t = ties?(x, y)
|
34
|
+
|
35
|
+
mu_u = n_xy / 2.0
|
36
|
+
|
37
|
+
if !t.first
|
38
|
+
sigma_u = Math::sqrt(n_xy * (n + 1.0) / 12.0)
|
39
|
+
else
|
40
|
+
sigma_u = Math::sqrt(n_xy / (n * (n + 1)) * ((n**3 - n) / 12.0 - t.last))
|
41
|
+
end
|
42
|
+
|
43
|
+
(u - mu_u) / sigma_u
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.calculate_probability_z(z, two_sided = true)
|
47
|
+
prob = (1.0 - Distribution::Normal.cdf(z.abs()))
|
48
|
+
prob *= 2.0 if two_sided
|
49
|
+
prob
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.is_null_hypothesis_rejected?(pvalue, significance_level)
|
53
|
+
pvalue < significance_level
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def self.ties?(x, y)
|
59
|
+
all = x + y
|
60
|
+
ties = all.group_by { |e| e }.reject { |_, v| v.size < 2 }
|
61
|
+
|
62
|
+
found_ties = ties.size > 0
|
63
|
+
[
|
64
|
+
found_ties,
|
65
|
+
ties.inject(0) { |a, v| a + (v.size**3 - v.size) / 12.0 }
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.concatenate_and_label(x, y)
|
70
|
+
ranked = []
|
71
|
+
|
72
|
+
ranked += x.map { |e| [e, :x] }
|
73
|
+
ranked += y.map { |e| [e, :y] }
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.rank!(ranked)
|
77
|
+
ranked.sort!
|
78
|
+
|
79
|
+
ranked.inject(1) do |rank, elem|
|
80
|
+
elem << rank
|
81
|
+
rank + 1
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.rank_sum(ranked, label)
|
86
|
+
ranked
|
87
|
+
.select { |elem| elem[1] == label }
|
88
|
+
.inject(0) { |rank_sum, elem| rank_sum + elem.last }
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.adjust_ties!(ranked)
|
92
|
+
ties = {}
|
93
|
+
|
94
|
+
ranked
|
95
|
+
.group_by { |e| e.first }
|
96
|
+
.reject { |_, v| v.size < 2 }
|
97
|
+
.each do |score, data|
|
98
|
+
ties[score] = data.inject(0) do |sum, elem|
|
99
|
+
sum + elem.last
|
100
|
+
end / data.size.to_f
|
101
|
+
end
|
102
|
+
|
103
|
+
ranked.map! do |elem|
|
104
|
+
elem[-1] = ties[elem.first] if ties.keys.include? elem.first
|
105
|
+
elem
|
106
|
+
end if ties.keys.size > 0
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.calculate_u_sample(n, n_other, r)
|
110
|
+
n * n_other + n * (n + 1) / 2.0 - r
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|