benchmark-lab 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ [![PullReview stats](https://www.pullreview.com/github/toch/benchmark-lab/badges/master.svg?)](https://www.pullreview.com/github/toch/benchmark-lab/reviews/master)
2
+
3
+ # Benchmark Lab
4
+
5
+ Run Real Experiment and Calculate Non-Parametric Statistics.
6
+
7
+ ## Installation
8
+
9
+ Install it yourself as:
10
+
11
+ $ gem install benchmark-lab
12
+
13
+ ## Usage
14
+
15
+ There are two ways to use it:
16
+ 1. classic: as Benchmark.bm does
17
+ 2. iterative: collects and measures separately, stores into different JSON
18
+ files, then put everything together and rank them
19
+
20
+ ### Classic Usage
21
+
22
+ ```Ruby
23
+ require 'benchmark/lab'
24
+
25
+ n = 5_000_000
26
+ cases = {
27
+ 'for:' => proc { for i in 1..n; a = "1"; end },
28
+ 'times:' => proc { n.times do ; a = "1"; end },
29
+ 'upto:' => proc { 1.upto(n) do ; a = "1"; end }
30
+ }
31
+
32
+ # How many times do you run the function
33
+ # 20 is a good minimum number
34
+ nbr_of_samples = 20
35
+
36
+ Benchmark.experiment(nbr_of_samples) do |x|
37
+ cases.each { |label, blk| x.report(label, &blk) }
38
+ end
39
+ ```
40
+
41
+ The output looks like the following:
42
+ ```
43
+ user system total real
44
+ for: [0.77,0.77,0.78] [0.00,0.00,0.00] [0.77,0.77,0.78] [0.77,0.77,0.78]
45
+ times: [0.74,0.74,0.74] [0.00,0.00,0.00] [0.74,0.74,0.74] [0.74,0.74,0.74]
46
+ upto: [0.75,0.75,0.75] [0.00,0.00,0.00] [0.75,0.75,0.75] [0.75,0.75,0.75]
47
+ The best "times:" is significantly (95%) better (total time).
48
+ ```
49
+
50
+ ### Iterative Usage
51
+
52
+ ```Ruby
53
+ require 'benchmark/lab'
54
+
55
+ n = 5_000_000
56
+
57
+ # How many times do you run the function
58
+ # 20 is a good minimum number
59
+ nbr_of_samples = 20
60
+
61
+ jsons = []
62
+
63
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
64
+ x.report('for') { for i in 1..n; a = "1"; end }
65
+ end
66
+
67
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
68
+ x.report('times') { n.times do ; a = "1"; end }
69
+ end
70
+
71
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
72
+ x.report('upto') { 1.upto(n) do ; a = "1"; end }
73
+ end
74
+
75
+ best, is_h0_rejected = Benchmark.aggregate_and_rank(jsons.map { |json| JSON.parse(json) })
76
+
77
+ puts best
78
+ puts is_h0_rejected
79
+ ```
80
+
81
+ The output looks like the following:
82
+ ```
83
+ {"name"=>"total", "sample"=>[0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6900000000000013, 0.6900000000000048, 0.6900000000000048, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999993, 0.6999999999999993, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028], "sample_size"=>20, "minimum"=>0.6899999999999977, "maximum"=>0.7000000000000028, "first_quartile"=>0.690000000000003, "third_quartile"=>0.7000000000000028, "median"=>0.6999999999999957, "interquartile_range"=>0.009999999999999787, "label"=>"upto"}
84
+ true
85
+ ```
86
+
87
+ ## Ideas
88
+
89
+ * compare two different implementations of a same function
90
+ 1. get the stats, then compare
91
+ 2. use git (commit, branch)
92
+ 3. use tests to check no performance regression at the same time
93
+ 4. annotate the tests you want to check
94
+ * decide the sample size automatically (based on the power you want to reach)
95
+ * explain correctly why we should do that
96
+
97
+ ## Contributing
98
+
99
+ 1. Fork it ( https://github.com/toch/benchmark-lab/fork )
100
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
101
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
102
+ 4. Push to the branch (`git push origin my-new-feature`)
103
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'spec'
7
+ t.test_files = FileList['spec/*/*_spec.rb']
8
+ end
9
+
10
+ task default: :test
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'benchmark/lab/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'benchmark-lab'
8
+ spec.version = Benchmark::Experiment::VERSION
9
+ spec.authors = ['Christophe Philemotte']
10
+ spec.email = ['christophe.philemotte@8thcolor.com']
11
+ spec.summary = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
12
+ spec.description = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
13
+ spec.homepage = 'https://github.com/toch/benchmark-lab'
14
+ spec.license = 'GPLv3'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.6'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'minitest', '4.5.0'
24
+ spec.add_development_dependency 'turn', '~> 0.9'
25
+ spec.add_runtime_dependency 'distribution'
26
+ end
@@ -0,0 +1,147 @@
1
+ require 'benchmark'
2
+ require 'benchmark/lab/descriptive_statistics'
3
+ require 'benchmark/lab/mann_whitney_u_test'
4
+ require 'benchmark/lab/version'
5
+
6
+ require 'json'
7
+
8
+ module Benchmark
9
+ class Sample
10
+ include Enumerable
11
+
12
+ def initialize
13
+ @observations = []
14
+ end
15
+
16
+ def <<(observation)
17
+ @observations << observation
18
+ end
19
+
20
+ def each(&block)
21
+ @observations.each do |observation|
22
+ if block_given?
23
+ block.call observation
24
+ else
25
+ yield observation
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ class Job
32
+ alias old_item item
33
+ def item(label = '', &blk)
34
+ old_item(label, &blk)
35
+ @list.last << Sample.new
36
+ @list.last << []
37
+ self
38
+ end
39
+
40
+ def observe_and_summarize(sample_size)
41
+ @list.each do |label, item, sample, stats|
42
+ sample_size.times.each do
43
+ sample << Benchmark.measure(label, &item)
44
+ end
45
+
46
+ Experiment::MEASURED_TIMES.keys.each do |time_name|
47
+ stats << Benchmark::Experiment::DescriptiveStatistics.new(sample.map(&time_name), time_name)
48
+ end
49
+ end
50
+ end
51
+
52
+ alias report item
53
+ end
54
+
55
+ module Experiment
56
+ MEASURED_TIMES =
57
+ {
58
+ utime: 'user',
59
+ stime: 'system',
60
+ total: 'total',
61
+ real: 'real'
62
+ }
63
+
64
+ def observe_and_summarize(sample_size, &blk)
65
+ job = Job.new(0)
66
+ yield(job)
67
+ job.observe_and_summarize(sample_size)
68
+ all_stats = job.list.map{ |label, _, _, stats| [label, stats] }.to_h
69
+ all_stats.to_json
70
+ end
71
+
72
+ def aggregate_and_rank(jsons)
73
+ return if jsons.empty?
74
+ all_stats = jsons.inject({}) { |elem, hsh| hsh.merge(elem) }
75
+ rank(all_stats)
76
+ end
77
+
78
+ def experiment(sample_size, &blk)
79
+ all_stats = JSON.parse(observe_and_summarize(sample_size, &blk))
80
+ print_stats(all_stats)
81
+
82
+ best, is_the_best_significative = rank(all_stats)
83
+
84
+ puts "The best \"#{best['label']}\" is #{is_the_best_significative ? '' : 'not '}significantly (95%) better (total time)."
85
+
86
+ all_stats
87
+ end
88
+
89
+ def rank(all_stats, alpha = 0.05)
90
+ ranked = all_stats.map do |label, stats|
91
+ total = stats.select{ |stat| stat['name'] == 'total' }.first
92
+ total['label'] = label
93
+ total
94
+ end.sort_by { |stat| stat['median'] }
95
+ is_h0_rejected = true
96
+ if all_stats.size > 1
97
+ z = Benchmark::Experiment::MannWhitneyUTest::calculate_z(ranked.first['sample'], ranked[1]['sample'])
98
+ p_value = Benchmark::Experiment::MannWhitneyUTest::calculate_probability_z(z)
99
+ is_h0_rejected = Benchmark::Experiment::MannWhitneyUTest::is_null_hypothesis_rejected?(p_value, alpha)
100
+ end
101
+
102
+ return ranked.first, is_h0_rejected
103
+ end
104
+
105
+ def iterative_experiment
106
+ end
107
+
108
+ private
109
+
110
+ def print_stats(all_stats)
111
+ width = label_width(all_stats)
112
+
113
+ lines = []
114
+ spacing = [0] * MEASURED_TIMES.size
115
+ tab = ' ' * 4
116
+
117
+ all_stats.each do |label, stats|
118
+ line = ''
119
+ line << label.ljust(width)
120
+
121
+ stats.each_with_index do |stat, index|
122
+ value = "#{tab}[#{'%.2f' % stat['first_quartile']},#{'%.2f' % stat['median']},#{'%.2f' % stat['third_quartile']}]"
123
+ spacing[index] = [spacing[index], value.length].minmax.last
124
+ line << value
125
+ end
126
+ line << "\n"
127
+ lines << line
128
+ end
129
+
130
+ print ''.ljust(width)
131
+ MEASURED_TIMES.values.each_with_index do |head, index|
132
+ print "#{tab}#{head}".ljust(spacing[index])
133
+ end
134
+ print "\n"
135
+
136
+ lines.each { |line| print line }
137
+ end
138
+
139
+ def label_width(all_stats)
140
+ label_widths = all_stats.map { |label, _| label.to_s.length }
141
+ label_widths.minmax.last
142
+ end
143
+
144
+ end
145
+
146
+ extend Benchmark::Experiment
147
+ end
@@ -0,0 +1,63 @@
1
+ module Benchmark
2
+ module Experiment
3
+ class DescriptiveStatistics
4
+ def initialize(sample, name = '')
5
+ # raise exception if empty sample
6
+ @name = name
7
+ @sample = sample.sort
8
+ @minimum, @maximum = @sample.minmax
9
+ @median = calculate_median_of(@sample)
10
+ @first_quartile = calculate_first_quartile_of(@sample)
11
+ @third_quartile = calculate_third_quartile_of(@sample)
12
+ end
13
+
14
+ attr_reader :name, :sample, :minimum, :maximum, :first_quartile, :third_quartile, :median
15
+
16
+ def sample_size
17
+ sample.size
18
+ end
19
+
20
+ def interquartile_range
21
+ @third_quartile - @first_quartile
22
+ end
23
+
24
+ def to_json(options = {})
25
+ {
26
+ 'name' => name,
27
+ 'sample' => sample,
28
+ 'sample_size' => sample_size,
29
+ 'minimum' => minimum,
30
+ 'maximum' => maximum,
31
+ 'first_quartile' => first_quartile,
32
+ 'third_quartile' => third_quartile,
33
+ 'median' => median,
34
+ 'interquartile_range' => interquartile_range
35
+ }.to_json
36
+ end
37
+
38
+ private
39
+
40
+ # https://en.wikipedia.org/wiki/Median
41
+ def calculate_median_of(data)
42
+ return data[data.size / 2] if data.size.odd?
43
+
44
+ (data[(data.size - 1) / 2] + data[data.size / 2]) / 2.0
45
+ end
46
+
47
+ # http://mathworld.wolfram.com/Quartile.html
48
+ # https://en.wikipedia.org/wiki/Quartile
49
+ def calculate_first_quartile_of(data)
50
+ return calculate_median_of(data[0..(data.size / 2)]) if data.size.odd?
51
+
52
+ calculate_median_of(data[0..((data.size - 1) / 2)])
53
+ end
54
+
55
+ def calculate_third_quartile_of(data)
56
+ return calculate_median_of(data[(data.size / 2)..-1]) if data.size.odd?
57
+
58
+ calculate_median_of(data[(data.size / 2)..-1])
59
+ end
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,114 @@
1
+ require 'distribution'
2
+
3
+ module Benchmark
4
+ module Experiment
5
+ module MannWhitneyUTest
6
+ def self.calculate_U(x, y)
7
+ ranked = concatenate_and_label(x, y)
8
+
9
+ rank!(ranked)
10
+
11
+ adjust_ties!(ranked)
12
+
13
+ r_x = rank_sum(ranked, :x)
14
+ r_y = rank_sum(ranked, :y)
15
+
16
+ n_x = x.size
17
+ n_y = y.size
18
+
19
+ u_x = calculate_u_sample(n_x, n_y, r_x)
20
+ u_y = calculate_u_sample(n_y, n_x, r_y)
21
+
22
+ [u_x, u_y]
23
+ end
24
+
25
+ def self.calculate_z(x, y)
26
+ n_x = x.size.to_f
27
+ n_y = y.size.to_f
28
+ n = n_x + n_y
29
+ n_xy = n_x * n_y
30
+
31
+ u = calculate_U(x, y).minmax.first.to_f
32
+
33
+ t = ties?(x, y)
34
+
35
+ mu_u = n_xy / 2.0
36
+
37
+ if !t.first
38
+ sigma_u = Math::sqrt(n_xy * (n + 1.0) / 12.0)
39
+ else
40
+ sigma_u = Math::sqrt(n_xy / (n * (n + 1)) * ((n**3 - n) / 12.0 - t.last))
41
+ end
42
+
43
+ (u - mu_u) / sigma_u
44
+ end
45
+
46
+ def self.calculate_probability_z(z, two_sided = true)
47
+ prob = (1.0 - Distribution::Normal.cdf(z.abs()))
48
+ prob *= 2.0 if two_sided
49
+ prob
50
+ end
51
+
52
+ def self.is_null_hypothesis_rejected?(pvalue, significance_level)
53
+ pvalue < significance_level
54
+ end
55
+
56
+ private
57
+
58
+ def self.ties?(x, y)
59
+ all = x + y
60
+ ties = all.group_by { |e| e }.reject { |_, v| v.size < 2 }
61
+
62
+ found_ties = ties.size > 0
63
+ [
64
+ found_ties,
65
+ ties.inject(0) { |a, v| a + (v.size**3 - v.size) / 12.0 }
66
+ ]
67
+ end
68
+
69
+ def self.concatenate_and_label(x, y)
70
+ ranked = []
71
+
72
+ ranked += x.map { |e| [e, :x] }
73
+ ranked += y.map { |e| [e, :y] }
74
+ end
75
+
76
+ def self.rank!(ranked)
77
+ ranked.sort!
78
+
79
+ ranked.inject(1) do |rank, elem|
80
+ elem << rank
81
+ rank + 1
82
+ end
83
+ end
84
+
85
+ def self.rank_sum(ranked, label)
86
+ ranked
87
+ .select { |elem| elem[1] == label }
88
+ .inject(0) { |rank_sum, elem| rank_sum + elem.last }
89
+ end
90
+
91
+ def self.adjust_ties!(ranked)
92
+ ties = {}
93
+
94
+ ranked
95
+ .group_by { |e| e.first }
96
+ .reject { |_, v| v.size < 2 }
97
+ .each do |score, data|
98
+ ties[score] = data.inject(0) do |sum, elem|
99
+ sum + elem.last
100
+ end / data.size.to_f
101
+ end
102
+
103
+ ranked.map! do |elem|
104
+ elem[-1] = ties[elem.first] if ties.keys.include? elem.first
105
+ elem
106
+ end if ties.keys.size > 0
107
+ end
108
+
109
+ def self.calculate_u_sample(n, n_other, r)
110
+ n * n_other + n * (n + 1) / 2.0 - r
111
+ end
112
+ end
113
+ end
114
+ end