ppbench 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,536 @@
1
+ require "ppbench/version"
2
+ require "parallel"
3
+ require "csv"
4
+ require "httpclient"
5
+ require "progressbar"
6
+ require "thread"
7
+ require "json"
8
+ require "timeout"
9
+ require "descriptive_statistics"
10
+
11
+ module Ppbench
12
+
13
+ def self.naming=(json)
14
+ @naming = json
15
+ end
16
+
17
+ def self.machine(key)
18
+ return key if @naming.empty?
19
+ name = @naming['machines'][key]
20
+ name == nil ? key : name
21
+ end
22
+
23
+ def self.experiment(key)
24
+ return key if @naming.empty?
25
+ name = @naming['experiments'][key]
26
+ name == nil ? key : name
27
+ end
28
+
29
+ def self.precision=(v)
30
+ @precision = v
31
+ end
32
+
33
+ def self.precision
34
+ @precision
35
+ end
36
+
37
+ def self.precision_error(length)
38
+ """
39
+ Sorry, we have not enough data for messages of about #{length} byte length.
40
+ You may want to reduce the precision with the global --precision flag.
41
+ Current precison is #{Ppbench::precision}.
42
+ So you could collect more data (preferred) or reduce the precision value.
43
+ """
44
+ end
45
+
46
+ R_COLORS = [
47
+ '0.5,0.5,0.5',
48
+ '0.96,0.26,0.21',
49
+ '0.25,0.31,0.71',
50
+ '0.13,0.59,0.95',
51
+ '0,0.59,0.53',
52
+ '0.30,0.69,0.31',
53
+ '0.8,0.86,0.22',
54
+ '1,0.6,0.03',
55
+ '1,0.6,0',
56
+ '1,0.34,0.13'
57
+ ]
58
+
59
+ R_NO_SYMBOL = "16"
60
+
61
+ R_SYMBOLS = "c(1,2,3,4,5,6,7,8,9,10)"
62
+
63
+ LOG_HEADER = [
64
+ "Machine Tag",
65
+ "Experiment Tag",
66
+ "Document Path",
67
+ "Failed requests",
68
+ "Concurrency Level",
69
+ "Total transferred",
70
+ "Time per request",
71
+ "Transfer rate",
72
+ "Requests per second",
73
+ "Retries",
74
+ "Response Code"
75
+ ]
76
+
77
+ # Runs a benchmark against a host and stores benchmark data in a log file.
78
+ #
79
+ def self.run_bench(host, log, machine_tag: '', experiment_tag: '', timeout: 60, repetitions: 10, coverage: 0.1, min: 1, max: 500000, concurrency: 10)
80
+ rounds = ((max - min) * coverage).to_i
81
+
82
+ CSV.open(log, 'w', write_headers: true, headers: Ppbench::LOG_HEADER, force_quotes: true) do |logger|
83
+
84
+ logfile = Mutex.new
85
+ progress = ProgressBar.new("Running", rounds)
86
+
87
+ webclient = HTTPClient.new
88
+
89
+ Parallel.each(1.upto(rounds), in_threads: concurrency) do |_|
90
+
91
+ length = Random.rand(min..max)
92
+ document = "/mping/#{length}"
93
+
94
+ results = {
95
+ duration: [],
96
+ length: [],
97
+ code: [],
98
+ retries: [],
99
+ fails: []
100
+ }
101
+ begin
102
+ #uri = URI("#{host}#{document}")
103
+ 1.upto(repetitions) do
104
+ answer = {}
105
+ Timeout::timeout(timeout) do
106
+ response = webclient.get("#{host}#{document}").body
107
+ answer = JSON.parse(response)
108
+ end
109
+ results[:duration] << answer['duration']
110
+ results[:length] << answer['length']
111
+ results[:code] << answer['code']
112
+ results[:retries] << answer['retries']
113
+ results[:fails] << (answer['code'] == 200 ? 0 : 1)
114
+ end
115
+ rescue Exception => e
116
+ print ("Timeout of '#{host}#{document}'")
117
+ print ("#{e}")
118
+ end
119
+
120
+ unless results[:duration].empty?
121
+ time_taken = results[:duration].mean # in milliseconds
122
+ length = results[:length].median # message length
123
+ transfer_rate = results[:length].sum * 1000 / results[:duration].sum
124
+ code = results[:code].first # HTTP response code
125
+ retries = results[:retries].sum # Amount of retries
126
+ failed = results[:fails].sum # Amount of fails
127
+
128
+ requests_per_second = 1000 / time_taken
129
+
130
+ logfile.synchronize do
131
+ progress.inc
132
+
133
+ logger << [
134
+ "#{machine_tag}",
135
+ "#{experiment_tag}",
136
+ "#{document}",
137
+ "#{failed}",
138
+ "#{concurrency}",
139
+ "#{length}",
140
+ "#{time_taken}",
141
+ "#{transfer_rate}",
142
+ "#{requests_per_second}",
143
+ "#{retries}",
144
+ "#{code}"
145
+ ]
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Load CSV files and conversion to better analyzable format (List of hashes)
153
+ #
154
+ def self.load_data(files)
155
+ files.map do |file|
156
+ rows = CSV.read(file, headers: true)
157
+
158
+ rows.map do |row|
159
+ {
160
+ :experiment => row.key?('Experiment Tag') ? row['Experiment Tag'] : nil,
161
+ :machine => row.key?('Machine Tag') ? row['Machine Tag'] : nil,
162
+ :document => row.key?('Document Path') ? row['Document Path'] : nil,
163
+ :length => row.key?('Total transferred') ? row['Total transferred'].to_i : nil,
164
+ :failed => row.key?('Failed requests') ? row['Failed requests'].to_i : nil,
165
+ :tpr => row.key?('Time per request') ? row['Time per request'].to_f : nil,
166
+ :transfer_rate => row.key?('Transfer rate') ? row['Transfer rate'].to_f : nil,
167
+ :rps => row.key?('Requests per second') ? row['Requests per second'].to_f : nil,
168
+ :retries => row.key?('Retries') ? row['Retries'].to_i : nil,
169
+ :response_code => row.key?('Response Code') ? row['Response Code'].to_i : nil
170
+ }
171
+ end
172
+ end.flatten
173
+ end
174
+
175
+ # Filter benchmark data.
176
+ #
177
+ def self.filter(data, maxsize: 2 ** 64, experiments: [], machines: [], fails: 0)
178
+ data.select { |entry| entry[:tpr] > 0 }
179
+ .select { |entry| entry[:failed] <= fails }
180
+ .select { |entry| entry[:length] <= maxsize }
181
+ .select { |entry| machines.include?(entry[:machine]) || machines.empty? }
182
+ .select { |entry| experiments.include?(entry[:experiment]) || experiments.empty? }
183
+ end
184
+
185
+ # Aggregate benchmark data.
186
+ # {
187
+ # 'weave': {
188
+ # 'm.large': [{ machine: String, experiment: String, document: String, length: value, tpr: Integer, ... }]
189
+ # }, ...
190
+ # },
191
+ # 'docker': { ... },
192
+ # 'bare': { ... }
193
+ # }
194
+ def self.aggregate(data)
195
+ experiments = data.group_by { |entry| entry[:experiment] }
196
+ experiments.map do |experiment, values|
197
+ machines = values.group_by { |entry| entry[:machine] }
198
+ [
199
+ experiment,
200
+ machines
201
+ ]
202
+ end.to_h
203
+ end
204
+
205
+ # Determines biggest value of aggregated data.
206
+ #
207
+ def self.maximum(data, of: :tpr)
208
+ y = 0
209
+ for experiment, machines in data
210
+ for machine, values in machines
211
+ m = values.max_by { |e| e[of] }
212
+ y = (y > m[of] ? y : m[of])
213
+ end
214
+ end
215
+ y
216
+ end
217
+
218
+ # Prepares a plot to present absolute values.
219
+ #
220
+ def self.prepare_plot(
221
+ maxy,
222
+ receive_window: 87380,
223
+ length: 500000,
224
+ xaxis_title: "Message Length",
225
+ xaxis_unit: "kB",
226
+ yaxis_title: "Transfer Rate",
227
+ yaxis_unit: "MB/sec",
228
+ title: "Data Transfer Rates",
229
+ subtitle: ""
230
+ )
231
+ recwindow = receive_window == 0 ? '' : "abline(v = seq(#{receive_window}, #{length}, by=#{receive_window}), lty='dashed')"
232
+
233
+ """
234
+ plot(x=c(0), y=c(0), xlim=c(0, #{length}), ylim=c(0, #{maxy}), main='#{title}\\n(#{subtitle})', xlab='#{xaxis_title} (#{xaxis_unit})', ylab='#{yaxis_title} (#{yaxis_unit})', xaxt='n', yaxt='n', pch=NA)
235
+ #{recwindow if receive_window < length }
236
+ """
237
+ end
238
+
239
+ # Prepares a plot to present relative comparisons.
240
+ #
241
+ def self.prepare_comparisonplot(
242
+ maxy,
243
+ receive_window: 87300,
244
+ length: 50000,
245
+ xaxis_title: "Message Length (kB)",
246
+ xaxis_unit: "kB",
247
+ yaxis_title: "Relative performance compared with reference experiment (%)",
248
+ yaxis_unit: "%",
249
+ title: "Relative performance (Data Transfer Rate)",
250
+ subtitle: ""
251
+ )
252
+ recwindow = receive_window == 0 ? '' : "abline(v = seq(#{receive_window}, #{length}, by=#{receive_window}), lty='dashed')"
253
+
254
+ """
255
+ plot(x=c(0), y=c(0), xlim=c(0, #{length}), ylim=c(0, #{maxy}), main='#{title}\\n(#{subtitle})', xlab='#{xaxis_title} (#{xaxis_unit})', ylab='#{yaxis_title} (#{yaxis_unit})', xaxt='n', yaxt='n', pch=NA)
256
+ #{recwindow if receive_window < length}
257
+ """
258
+ end
259
+
260
+ # Adds a serie to a plot.
261
+ #
262
+ def self.add_series(
263
+ data,
264
+ to_plot: :tpr,
265
+ color: 'grey',
266
+ symbol: 1,
267
+ alpha: 0.15,
268
+ length: 500000,
269
+ confidence: 90,
270
+ no_points: false,
271
+ with_bands: false
272
+ )
273
+ """
274
+ #{points(data, to_plot: to_plot, color: color, symbol: symbol, alpha: alpha) unless no_points }
275
+ #{bands(data, to_plot: to_plot, color: color, length: length, confidence: confidence) if with_bands }
276
+ """
277
+ end
278
+
279
+ # Adds a compare line to a comparison plot.
280
+ #
281
+ def self.add_comparisonplot(
282
+ reference,
283
+ serie,
284
+ to_plot: :tpr,
285
+ color: 'grey',
286
+ symbol: 1,
287
+ length: 500000,
288
+ n: Ppbench::precision,
289
+ nknots: 20
290
+ )
291
+ step = length / n
292
+ references = reference.map { |v| [v[:length], v[to_plot]] }
293
+ ref_values = 1.upto(n).map do |i|
294
+ vs = references.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
295
+
296
+ if vs.empty?
297
+ $stderr.puts precision_error(i * step)
298
+ exit!
299
+ end
300
+
301
+ [
302
+ i * step,
303
+ vs.median
304
+ ]
305
+ end.to_h
306
+
307
+ series = serie.map { |v| [v[:length], v[to_plot]] }
308
+ serie_values = 1.upto(n).map do |i|
309
+ vs = series.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
310
+
311
+ if vs.empty?
312
+ $stderr.puts precision_error(i * step)
313
+ exit!
314
+ end
315
+
316
+ [
317
+ i * step,
318
+ vs.median
319
+ ]
320
+ end.to_h
321
+
322
+ xs = []
323
+ ys = []
324
+
325
+ ref_values.each do |x, y|
326
+ if serie_values.key? x
327
+ xs << x
328
+ ys << serie_values[x] / y
329
+ end
330
+ end
331
+
332
+ """
333
+ xs=c(#{ xs * ',' })
334
+ ys=c(#{ ys * ',' })
335
+ median <- smooth.spline(xs, ys, nknots=#{nknots})
336
+ lines(median, lwd=2, col=rgb(#{color}))
337
+ """
338
+ end
339
+
340
+ # Generates scatter plot of points for plots.
341
+ #
342
+ def self.points(data, to_plot: :tpr, color: 'grey', alpha: 0.15, symbol: 1)
343
+ points = data.map { |v| [v[:length], v[to_plot]] }
344
+ xs = "c(#{points.map { |e| e[0] } * ','})"
345
+ ys = "c(#{points.map { |e| e[1] } * ','})"
346
+
347
+ """
348
+ xs = #{xs}
349
+ ys = #{ys}
350
+ points(x=xs,y=ys, col=rgb(#{color},alpha=#{ alpha }), pch=#{ symbol })
351
+ """
352
+ end
353
+
354
+ # Generates median lines and confidence bands for plots.
355
+ #
356
+ def self.bands(data, to_plot: :tpr, n: Ppbench::precision, length: 500000, color: 'grey', confidence: 90, nknots: 15)
357
+
358
+ step = length / n
359
+ points = data.map { |v| [v[:length], v[to_plot]] }
360
+ values = 1.upto(n).map do |i|
361
+ [
362
+ i * step,
363
+ points.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
364
+ ]
365
+ end
366
+
367
+ upper_confidence = 100 - (100 - confidence) / 2
368
+ semi_upper_confidence = 100 - (100 - confidence / 2) / 2
369
+ lower_confidence = (100 - confidence) / 2
370
+ semi_lower_confidence = (100 - confidence / 2) / 2
371
+
372
+ summary = values.map do |x,vs|
373
+
374
+ if vs.empty?
375
+ $stderr.puts precision_error(x)
376
+ exit!
377
+ end
378
+
379
+ {
380
+ :x => x,
381
+ :lower => vs.percentile(lower_confidence),
382
+ :semi_lower => vs.percentile(semi_lower_confidence),
383
+ :median => vs.median,
384
+ :semi_upper => vs.percentile(semi_upper_confidence),
385
+ :upper => vs.percentile(upper_confidence)
386
+ }
387
+ end
388
+
389
+ xs = "c(#{summary.map { |v| v[:x] } * ','})"
390
+ medians = "c(#{summary.map { |v| v[:median] } * ','})"
391
+ lowers = "c(#{summary.map { |v| v[:lower] } * ','})"
392
+ semi_lowers = "c(#{summary.map { |v| v[:semi_lower] } * ','})"
393
+ uppers = "c(#{summary.map { |v| v[:upper] } * ','})"
394
+ semi_uppers = "c(#{summary.map { |v| v[:semi_upper] } * ','})"
395
+
396
+ """
397
+ xs = #{xs}
398
+ medians = #{medians}
399
+ lowers = #{lowers}
400
+ semi_lowers = #{semi_lowers}
401
+ uppers = #{uppers}
402
+ semi_uppers = #{semi_uppers}
403
+
404
+ low <- smooth.spline(xs, lowers, nknots=#{nknots})
405
+ semi_low <- smooth.spline(xs, semi_lowers, nknots=#{nknots})
406
+ up <- smooth.spline(xs, uppers, nknots=#{nknots})
407
+ semi_up <- smooth.spline(xs, semi_uppers, nknots=#{nknots})
408
+ median <- smooth.spline(xs, medians, nknots=#{nknots})
409
+ polygon(c(low$x, rev(up$x)), c(low$y, rev(up$y)), col = rgb(#{color},alpha=0.10), border=NA)
410
+ polygon(c(semi_low$x, rev(semi_up$x)), c(semi_low$y, rev(semi_up$y)), col = rgb(#{color},alpha=0.15), border=NA)
411
+ lines(median, lwd=2, col=rgb(#{color}))
412
+ lines(low, col=rgb(#{color},alpha=0.50), lty='dashed', lwd=0.5)
413
+ lines(up, col=rgb(#{color},alpha=0.50), lty='dashed', lwd=0.5)
414
+ """
415
+
416
+
417
+ end
418
+
419
+ # Generates an R plot output script which can be used for plotting benchmark data
420
+ # as scatter plot with optional confidence bands.
421
+ #
422
+ def self.plotter(
423
+ data,
424
+ to_plot: :tpr,
425
+ machines: [],
426
+ experiments: [],
427
+ receive_window: 87380,
428
+ xaxis_max: 500000,
429
+ confidence: 90,
430
+ no_points: false,
431
+ with_bands: false,
432
+ yaxis_max: 10000000,
433
+ yaxis_steps: 10,
434
+ xaxis_steps: 10,
435
+ xaxis_title: "",
436
+ xaxis_unit: "",
437
+ xaxis_divisor: 1000,
438
+ yaxis_title: "",
439
+ yaxis_unit: "",
440
+ yaxis_divisor: 1000000,
441
+ title: "",
442
+ subtitle: "",
443
+ legend_position: "topright"
444
+ )
445
+ series_data = []
446
+ series_names = []
447
+ series_colors = R_COLORS
448
+
449
+ for exp in experiments
450
+ for machine in machines
451
+ if (data.include? exp) && (data[exp].include? machine)
452
+ series_data << data[exp][machine]
453
+ series_names << "'#{Ppbench::experiment(exp)} on #{Ppbench::machine(machine)}'"
454
+ end
455
+ end
456
+ end
457
+
458
+ colors = "c(#{series_colors.map { |c| "rgb(#{c})" } * ','})"
459
+
460
+ sym = 1;
461
+ r = "#{prepare_plot(yaxis_max, receive_window: receive_window, length: xaxis_max, title: title, xaxis_title: xaxis_title, xaxis_unit: xaxis_unit, yaxis_title: yaxis_title, yaxis_unit: yaxis_unit, subtitle: subtitle)}\n"
462
+
463
+ for serie in series_data
464
+ r += add_series(serie, to_plot: to_plot, with_bands: with_bands, no_points: no_points, color: series_colors.shift, symbol: sym, length: xaxis_max, confidence: confidence)
465
+ sym = sym + 1
466
+ end
467
+
468
+ symbols = no_points ? R_NO_SYMBOL : R_SYMBOLS
469
+
470
+ r + """
471
+ xa = seq(0, #{xaxis_max}, by=#{xaxis_max/xaxis_steps})
472
+ ya = seq(0, #{yaxis_max}, by=#{yaxis_max/yaxis_steps})
473
+ axis(1, at = xa, labels = paste(xa/#{xaxis_divisor}, '#{xaxis_unit}', sep = ' ' ))
474
+ axis(2, at = ya, labels = paste(ya/#{yaxis_divisor}, '#{yaxis_unit}', sep = ' ' ))
475
+ legend('#{legend_position}', cex=0.9, pch=#{symbols}, col=#{colors}, c(#{series_names * ',' }),box.col=rgb(1,1,1,0), bg=rgb(1,1,1,0.75))
476
+ """
477
+ end
478
+
479
+ # Generates an R plot output script which can be used for plotting comparison plots
480
+ # of benchmark data.
481
+ #
482
+ def self.comparison_plotter(
483
+ data,
484
+ yaxis_max: 1.5,
485
+ to_plot: :transfer_rate,
486
+ machines: [],
487
+ experiments: [],
488
+ receive_window: 87380,
489
+ xaxis_max: 500000,
490
+ xaxis_steps: 10,
491
+ xaxis_title: "",
492
+ xaxis_unit: "",
493
+ xaxis_divisor: 1000,
494
+ yaxis_title: "",
495
+ yaxis_unit: "%",
496
+ title: "",
497
+ subtitle: "",
498
+ legend_position: "topright"
499
+ )
500
+ series_data = []
501
+ series_names = []
502
+ series_colors = R_COLORS
503
+
504
+ ref = true
505
+ for exp in experiments
506
+ for machine in machines
507
+ reference = ref ? 'Reference: ' : ''
508
+ ref = false
509
+ if (data.include? exp) && (data[exp].include? machine)
510
+ series_data << data[exp][machine]
511
+ series_names << "'#{reference}#{Ppbench::experiment(exp)} on #{Ppbench::machine(machine)}'"
512
+ end
513
+ end
514
+ end
515
+
516
+ colors = "c(#{series_colors.map { |c| "rgb(#{c})" } * ','})"
517
+
518
+ sym = 1;
519
+ r = "#{prepare_comparisonplot(yaxis_max, receive_window: receive_window, length: xaxis_max, title: title, subtitle: subtitle, xaxis_title: xaxis_title, xaxis_unit: xaxis_unit, yaxis_title: yaxis_title, yaxis_unit: yaxis_unit)}\n"
520
+
521
+ reference = series_data.first
522
+
523
+ for serie in series_data
524
+ r += add_comparisonplot(reference, serie, to_plot: to_plot, color: series_colors.shift, symbol: sym, length: xaxis_max)
525
+ sym = sym + 1
526
+ end
527
+
528
+ r + """
529
+ xa = seq(0, #{xaxis_max}, by=#{xaxis_max/xaxis_steps})
530
+ ya = seq(0, #{yaxis_max}, by=#{0.1})
531
+ axis(1, at = xa, labels = paste(xa/#{xaxis_divisor}, '#{xaxis_unit}', sep = '' ))
532
+ axis(2, at = ya, labels = paste(ya * 100, '#{yaxis_unit}', sep = '' ))
533
+ legend('#{legend_position}', cex=0.9, pch=c(#{R_NO_SYMBOL}), col=#{colors}, c(#{series_names * ',' }),box.col=rgb(1,1,1,0), bg=rgb(1,1,1,0.75))
534
+ """
535
+ end
536
+ end