benchmark-lab 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,103 @@
1
+ [![PullReview stats](https://www.pullreview.com/github/toch/benchmark-lab/badges/master.svg?)](https://www.pullreview.com/github/toch/benchmark-lab/reviews/master)
2
+
3
+ # Benchmark Lab
4
+
5
+ Run Real Experiment and Calculate Non-Parametric Statistics.
6
+
7
+ ## Installation
8
+
9
+ Install it yourself as:
10
+
11
+ $ gem install benchmark-lab
12
+
13
+ ## Usage
14
+
15
+ There are two ways to use it:
16
+ 1. classic: as Benchmark.bm does
17
+ 2. iterative: collects and measures separately, stores into different JSON
18
+ files, then put everything together and rank them
19
+
20
+ ### Classic Usage
21
+
22
+ ```Ruby
23
+ require 'benchmark/lab'
24
+
25
+ n = 5_000_000
26
+ cases = {
27
+ 'for:' => proc { for i in 1..n; a = "1"; end },
28
+ 'times:' => proc { n.times do ; a = "1"; end },
29
+ 'upto:' => proc { 1.upto(n) do ; a = "1"; end }
30
+ }
31
+
32
+ # How many times do you run the function
33
+ # 20 is a good minimum number
34
+ nbr_of_samples = 20
35
+
36
+ Benchmark.experiment(nbr_of_samples) do |x|
37
+ cases.each { |label, blk| x.report(label, &blk) }
38
+ end
39
+ ```
40
+
41
+ The output looks like the following:
42
+ ```
43
+ user system total real
44
+ for: [0.77,0.77,0.78] [0.00,0.00,0.00] [0.77,0.77,0.78] [0.77,0.77,0.78]
45
+ times: [0.74,0.74,0.74] [0.00,0.00,0.00] [0.74,0.74,0.74] [0.74,0.74,0.74]
46
+ upto: [0.75,0.75,0.75] [0.00,0.00,0.00] [0.75,0.75,0.75] [0.75,0.75,0.75]
47
+ The best "times:" is significantly (95%) better (total time).
48
+ ```
49
+
50
+ ### Iterative Usage
51
+
52
+ ```Ruby
53
+ require 'benchmark/lab'
54
+
55
+ n = 5_000_000
56
+
57
+ # How many times do you run the function
58
+ # 20 is a good minimum number
59
+ nbr_of_samples = 20
60
+
61
+ jsons = []
62
+
63
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
64
+ x.report('for') { for i in 1..n; a = "1"; end }
65
+ end
66
+
67
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
68
+ x.report('times') { n.times do ; a = "1"; end }
69
+ end
70
+
71
+ jsons << Benchmark.observe_and_summarize(nbr_of_samples) do |x|
72
+ x.report('upto') { 1.upto(n) do ; a = "1"; end }
73
+ end
74
+
75
+ best, is_h0_rejected = Benchmark.aggregate_and_rank(jsons.map { |json| JSON.parse(json) })
76
+
77
+ puts best
78
+ puts is_h0_rejected
79
+ ```
80
+
81
+ The output looks like the following:
82
+ ```
83
+ {"name"=>"total", "sample"=>[0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6899999999999977, 0.6900000000000013, 0.6900000000000048, 0.6900000000000048, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999957, 0.6999999999999993, 0.6999999999999993, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028, 0.7000000000000028], "sample_size"=>20, "minimum"=>0.6899999999999977, "maximum"=>0.7000000000000028, "first_quartile"=>0.690000000000003, "third_quartile"=>0.7000000000000028, "median"=>0.6999999999999957, "interquartile_range"=>0.009999999999999787, "label"=>"upto"}
84
+ true
85
+ ```
86
+
87
+ ## Ideas
88
+
89
+ * compare two different implementations of a same function
90
+ 1. get the stats, then compare
91
+ 2. use git (commit, branch)
92
+ 3. use tests to check no performance regression at the same time
93
+ 4. annotate the tests you want to check
94
+ * decide the sample size automatically (based on the power you want to reach)
95
+ * explain correctly why we should do that
96
+
97
+ ## Contributing
98
+
99
+ 1. Fork it ( https://github.com/toch/benchmark-lab/fork )
100
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
101
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
102
+ 4. Push to the branch (`git push origin my-new-feature`)
103
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'spec'
7
+ t.test_files = FileList['spec/*/*_spec.rb']
8
+ end
9
+
10
+ task default: :test
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'benchmark/lab/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'benchmark-lab'
8
+ spec.version = Benchmark::Experiment::VERSION
9
+ spec.authors = ['Christophe Philemotte']
10
+ spec.email = ['christophe.philemotte@8thcolor.com']
11
+ spec.summary = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
12
+ spec.description = %q{Run Real Experiment and Calculate Non-Parametric Statistics.}
13
+ spec.homepage = 'https://github.com/toch/benchmark-lab'
14
+ spec.license = 'GPLv3'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.6'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'minitest', '4.5.0'
24
+ spec.add_development_dependency 'turn', '~> 0.9'
25
+ spec.add_runtime_dependency 'distribution'
26
+ end
@@ -0,0 +1,147 @@
1
+ require 'benchmark'
2
+ require 'benchmark/lab/descriptive_statistics'
3
+ require 'benchmark/lab/mann_whitney_u_test'
4
+ require 'benchmark/lab/version'
5
+
6
+ require 'json'
7
+
8
+ module Benchmark
9
+ class Sample
10
+ include Enumerable
11
+
12
+ def initialize
13
+ @observations = []
14
+ end
15
+
16
+ def <<(observation)
17
+ @observations << observation
18
+ end
19
+
20
+ def each(&block)
21
+ @observations.each do |observation|
22
+ if block_given?
23
+ block.call observation
24
+ else
25
+ yield observation
26
+ end
27
+ end
28
+ end
29
+ end
30
+
31
+ class Job
32
+ alias old_item item
33
+ def item(label = '', &blk)
34
+ old_item(label, &blk)
35
+ @list.last << Sample.new
36
+ @list.last << []
37
+ self
38
+ end
39
+
40
+ def observe_and_summarize(sample_size)
41
+ @list.each do |label, item, sample, stats|
42
+ sample_size.times.each do
43
+ sample << Benchmark.measure(label, &item)
44
+ end
45
+
46
+ Experiment::MEASURED_TIMES.keys.each do |time_name|
47
+ stats << Benchmark::Experiment::DescriptiveStatistics.new(sample.map(&time_name), time_name)
48
+ end
49
+ end
50
+ end
51
+
52
+ alias report item
53
+ end
54
+
55
+ module Experiment
56
+ MEASURED_TIMES =
57
+ {
58
+ utime: 'user',
59
+ stime: 'system',
60
+ total: 'total',
61
+ real: 'real'
62
+ }
63
+
64
+ def observe_and_summarize(sample_size, &blk)
65
+ job = Job.new(0)
66
+ yield(job)
67
+ job.observe_and_summarize(sample_size)
68
+ all_stats = job.list.map{ |label, _, _, stats| [label, stats] }.to_h
69
+ all_stats.to_json
70
+ end
71
+
72
+ def aggregate_and_rank(jsons)
73
+ return if jsons.empty?
74
+ all_stats = jsons.inject({}) { |elem, hsh| hsh.merge(elem) }
75
+ rank(all_stats)
76
+ end
77
+
78
+ def experiment(sample_size, &blk)
79
+ all_stats = JSON.parse(observe_and_summarize(sample_size, &blk))
80
+ print_stats(all_stats)
81
+
82
+ best, is_the_best_significative = rank(all_stats)
83
+
84
+ puts "The best \"#{best['label']}\" is #{is_the_best_significative ? '' : 'not '}significantly (95%) better (total time)."
85
+
86
+ all_stats
87
+ end
88
+
89
+ def rank(all_stats, alpha = 0.05)
90
+ ranked = all_stats.map do |label, stats|
91
+ total = stats.select{ |stat| stat['name'] == 'total' }.first
92
+ total['label'] = label
93
+ total
94
+ end.sort_by { |stat| stat['median'] }
95
+ is_h0_rejected = true
96
+ if all_stats.size > 1
97
+ z = Benchmark::Experiment::MannWhitneyUTest::calculate_z(ranked.first['sample'], ranked[1]['sample'])
98
+ p_value = Benchmark::Experiment::MannWhitneyUTest::calculate_probability_z(z)
99
+ is_h0_rejected = Benchmark::Experiment::MannWhitneyUTest::is_null_hypothesis_rejected?(p_value, alpha)
100
+ end
101
+
102
+ return ranked.first, is_h0_rejected
103
+ end
104
+
105
+ def iterative_experiment
106
+ end
107
+
108
+ private
109
+
110
+ def print_stats(all_stats)
111
+ width = label_width(all_stats)
112
+
113
+ lines = []
114
+ spacing = [0] * MEASURED_TIMES.size
115
+ tab = ' ' * 4
116
+
117
+ all_stats.each do |label, stats|
118
+ line = ''
119
+ line << label.ljust(width)
120
+
121
+ stats.each_with_index do |stat, index|
122
+ value = "#{tab}[#{'%.2f' % stat['first_quartile']},#{'%.2f' % stat['median']},#{'%.2f' % stat['third_quartile']}]"
123
+ spacing[index] = [spacing[index], value.length].minmax.last
124
+ line << value
125
+ end
126
+ line << "\n"
127
+ lines << line
128
+ end
129
+
130
+ print ''.ljust(width)
131
+ MEASURED_TIMES.values.each_with_index do |head, index|
132
+ print "#{tab}#{head}".ljust(spacing[index])
133
+ end
134
+ print "\n"
135
+
136
+ lines.each { |line| print line }
137
+ end
138
+
139
+ def label_width(all_stats)
140
+ label_widths = all_stats.map { |label, _| label.to_s.length }
141
+ label_widths.minmax.last
142
+ end
143
+
144
+ end
145
+
146
+ extend Benchmark::Experiment
147
+ end
@@ -0,0 +1,63 @@
1
+ module Benchmark
2
+ module Experiment
3
+ class DescriptiveStatistics
4
+ def initialize(sample, name = '')
5
+ # raise exception if empty sample
6
+ @name = name
7
+ @sample = sample.sort
8
+ @minimum, @maximum = @sample.minmax
9
+ @median = calculate_median_of(@sample)
10
+ @first_quartile = calculate_first_quartile_of(@sample)
11
+ @third_quartile = calculate_third_quartile_of(@sample)
12
+ end
13
+
14
+ attr_reader :name, :sample, :minimum, :maximum, :first_quartile, :third_quartile, :median
15
+
16
+ def sample_size
17
+ sample.size
18
+ end
19
+
20
+ def interquartile_range
21
+ @third_quartile - @first_quartile
22
+ end
23
+
24
+ def to_json(options = {})
25
+ {
26
+ 'name' => name,
27
+ 'sample' => sample,
28
+ 'sample_size' => sample_size,
29
+ 'minimum' => minimum,
30
+ 'maximum' => maximum,
31
+ 'first_quartile' => first_quartile,
32
+ 'third_quartile' => third_quartile,
33
+ 'median' => median,
34
+ 'interquartile_range' => interquartile_range
35
+ }.to_json
36
+ end
37
+
38
+ private
39
+
40
+ # https://en.wikipedia.org/wiki/Median
41
+ def calculate_median_of(data)
42
+ return data[data.size / 2] if data.size.odd?
43
+
44
+ (data[(data.size - 1) / 2] + data[data.size / 2]) / 2.0
45
+ end
46
+
47
+ # http://mathworld.wolfram.com/Quartile.html
48
+ # https://en.wikipedia.org/wiki/Quartile
49
+ def calculate_first_quartile_of(data)
50
+ return calculate_median_of(data[0..(data.size / 2)]) if data.size.odd?
51
+
52
+ calculate_median_of(data[0..((data.size - 1) / 2)])
53
+ end
54
+
55
+ def calculate_third_quartile_of(data)
56
+ return calculate_median_of(data[(data.size / 2)..-1]) if data.size.odd?
57
+
58
+ calculate_median_of(data[(data.size / 2)..-1])
59
+ end
60
+
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,114 @@
1
+ require 'distribution'
2
+
3
+ module Benchmark
4
+ module Experiment
5
+ module MannWhitneyUTest
6
+ def self.calculate_U(x, y)
7
+ ranked = concatenate_and_label(x, y)
8
+
9
+ rank!(ranked)
10
+
11
+ adjust_ties!(ranked)
12
+
13
+ r_x = rank_sum(ranked, :x)
14
+ r_y = rank_sum(ranked, :y)
15
+
16
+ n_x = x.size
17
+ n_y = y.size
18
+
19
+ u_x = calculate_u_sample(n_x, n_y, r_x)
20
+ u_y = calculate_u_sample(n_y, n_x, r_y)
21
+
22
+ [u_x, u_y]
23
+ end
24
+
25
+ def self.calculate_z(x, y)
26
+ n_x = x.size.to_f
27
+ n_y = y.size.to_f
28
+ n = n_x + n_y
29
+ n_xy = n_x * n_y
30
+
31
+ u = calculate_U(x, y).minmax.first.to_f
32
+
33
+ t = ties?(x, y)
34
+
35
+ mu_u = n_xy / 2.0
36
+
37
+ if !t.first
38
+ sigma_u = Math::sqrt(n_xy * (n + 1.0) / 12.0)
39
+ else
40
+ sigma_u = Math::sqrt(n_xy / (n * (n + 1)) * ((n**3 - n) / 12.0 - t.last))
41
+ end
42
+
43
+ (u - mu_u) / sigma_u
44
+ end
45
+
46
+ def self.calculate_probability_z(z, two_sided = true)
47
+ prob = (1.0 - Distribution::Normal.cdf(z.abs()))
48
+ prob *= 2.0 if two_sided
49
+ prob
50
+ end
51
+
52
+ def self.is_null_hypothesis_rejected?(pvalue, significance_level)
53
+ pvalue < significance_level
54
+ end
55
+
56
+ private
57
+
58
+ def self.ties?(x, y)
59
+ all = x + y
60
+ ties = all.group_by { |e| e }.reject { |_, v| v.size < 2 }
61
+
62
+ found_ties = ties.size > 0
63
+ [
64
+ found_ties,
65
+ ties.inject(0) { |a, v| a + (v.size**3 - v.size) / 12.0 }
66
+ ]
67
+ end
68
+
69
+ def self.concatenate_and_label(x, y)
70
+ ranked = []
71
+
72
+ ranked += x.map { |e| [e, :x] }
73
+ ranked += y.map { |e| [e, :y] }
74
+ end
75
+
76
+ def self.rank!(ranked)
77
+ ranked.sort!
78
+
79
+ ranked.inject(1) do |rank, elem|
80
+ elem << rank
81
+ rank + 1
82
+ end
83
+ end
84
+
85
+ def self.rank_sum(ranked, label)
86
+ ranked
87
+ .select { |elem| elem[1] == label }
88
+ .inject(0) { |rank_sum, elem| rank_sum + elem.last }
89
+ end
90
+
91
+ def self.adjust_ties!(ranked)
92
+ ties = {}
93
+
94
+ ranked
95
+ .group_by { |e| e.first }
96
+ .reject { |_, v| v.size < 2 }
97
+ .each do |score, data|
98
+ ties[score] = data.inject(0) do |sum, elem|
99
+ sum + elem.last
100
+ end / data.size.to_f
101
+ end
102
+
103
+ ranked.map! do |elem|
104
+ elem[-1] = ties[elem.first] if ties.keys.include? elem.first
105
+ elem
106
+ end if ties.keys.size > 0
107
+ end
108
+
109
+ def self.calculate_u_sample(n, n_other, r)
110
+ n * n_other + n * (n + 1) / 2.0 - r
111
+ end
112
+ end
113
+ end
114
+ end