ppbench 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,536 @@
1
+ require "ppbench/version"
2
+ require "parallel"
3
+ require "csv"
4
+ require "httpclient"
5
+ require "progressbar"
6
+ require "thread"
7
+ require "json"
8
+ require "timeout"
9
+ require "descriptive_statistics"
10
+
11
+ module Ppbench
12
+
13
+ def self.naming=(json)
14
+ @naming = json
15
+ end
16
+
17
+ def self.machine(key)
18
+ return key if @naming.empty?
19
+ name = @naming['machines'][key]
20
+ name == nil ? key : name
21
+ end
22
+
23
+ def self.experiment(key)
24
+ return key if @naming.empty?
25
+ name = @naming['experiments'][key]
26
+ name == nil ? key : name
27
+ end
28
+
29
+ def self.precision=(v)
30
+ @precision = v
31
+ end
32
+
33
+ def self.precision
34
+ @precision
35
+ end
36
+
37
+ def self.precision_error(length)
38
+ """
39
+ Sorry, we have not enough data for messages of about #{length} byte length.
40
+ You may want to reduce the precision with the global --precision flag.
41
+ Current precison is #{Ppbench::precision}.
42
+ So you could collect more data (preferred) or reduce the precision value.
43
+ """
44
+ end
45
+
46
+ R_COLORS = [
47
+ '0.5,0.5,0.5',
48
+ '0.96,0.26,0.21',
49
+ '0.25,0.31,0.71',
50
+ '0.13,0.59,0.95',
51
+ '0,0.59,0.53',
52
+ '0.30,0.69,0.31',
53
+ '0.8,0.86,0.22',
54
+ '1,0.6,0.03',
55
+ '1,0.6,0',
56
+ '1,0.34,0.13'
57
+ ]
58
+
59
+ R_NO_SYMBOL = "16"
60
+
61
+ R_SYMBOLS = "c(1,2,3,4,5,6,7,8,9,10)"
62
+
63
+ LOG_HEADER = [
64
+ "Machine Tag",
65
+ "Experiment Tag",
66
+ "Document Path",
67
+ "Failed requests",
68
+ "Concurrency Level",
69
+ "Total transferred",
70
+ "Time per request",
71
+ "Transfer rate",
72
+ "Requests per second",
73
+ "Retries",
74
+ "Response Code"
75
+ ]
76
+
77
+ # Runs a benchmark against a host and stores benchmark data in a log file.
78
+ #
79
+ def self.run_bench(host, log, machine_tag: '', experiment_tag: '', timeout: 60, repetitions: 10, coverage: 0.1, min: 1, max: 500000, concurrency: 10)
80
+ rounds = ((max - min) * coverage).to_i
81
+
82
+ CSV.open(log, 'w', write_headers: true, headers: Ppbench::LOG_HEADER, force_quotes: true) do |logger|
83
+
84
+ logfile = Mutex.new
85
+ progress = ProgressBar.new("Running", rounds)
86
+
87
+ webclient = HTTPClient.new
88
+
89
+ Parallel.each(1.upto(rounds), in_threads: concurrency) do |_|
90
+
91
+ length = Random.rand(min..max)
92
+ document = "/mping/#{length}"
93
+
94
+ results = {
95
+ duration: [],
96
+ length: [],
97
+ code: [],
98
+ retries: [],
99
+ fails: []
100
+ }
101
+ begin
102
+ #uri = URI("#{host}#{document}")
103
+ 1.upto(repetitions) do
104
+ answer = {}
105
+ Timeout::timeout(timeout) do
106
+ response = webclient.get("#{host}#{document}").body
107
+ answer = JSON.parse(response)
108
+ end
109
+ results[:duration] << answer['duration']
110
+ results[:length] << answer['length']
111
+ results[:code] << answer['code']
112
+ results[:retries] << answer['retries']
113
+ results[:fails] << (answer['code'] == 200 ? 0 : 1)
114
+ end
115
+ rescue Exception => e
116
+ print ("Timeout of '#{host}#{document}'")
117
+ print ("#{e}")
118
+ end
119
+
120
+ unless results[:duration].empty?
121
+ time_taken = results[:duration].mean # in milliseconds
122
+ length = results[:length].median # message length
123
+ transfer_rate = results[:length].sum * 1000 / results[:duration].sum
124
+ code = results[:code].first # HTTP response code
125
+ retries = results[:retries].sum # Amount of retries
126
+ failed = results[:fails].sum # Amount of fails
127
+
128
+ requests_per_second = 1000 / time_taken
129
+
130
+ logfile.synchronize do
131
+ progress.inc
132
+
133
+ logger << [
134
+ "#{machine_tag}",
135
+ "#{experiment_tag}",
136
+ "#{document}",
137
+ "#{failed}",
138
+ "#{concurrency}",
139
+ "#{length}",
140
+ "#{time_taken}",
141
+ "#{transfer_rate}",
142
+ "#{requests_per_second}",
143
+ "#{retries}",
144
+ "#{code}"
145
+ ]
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Load CSV files and conversion to better analyzable format (List of hashes)
153
+ #
154
+ def self.load_data(files)
155
+ files.map do |file|
156
+ rows = CSV.read(file, headers: true)
157
+
158
+ rows.map do |row|
159
+ {
160
+ :experiment => row.key?('Experiment Tag') ? row['Experiment Tag'] : nil,
161
+ :machine => row.key?('Machine Tag') ? row['Machine Tag'] : nil,
162
+ :document => row.key?('Document Path') ? row['Document Path'] : nil,
163
+ :length => row.key?('Total transferred') ? row['Total transferred'].to_i : nil,
164
+ :failed => row.key?('Failed requests') ? row['Failed requests'].to_i : nil,
165
+ :tpr => row.key?('Time per request') ? row['Time per request'].to_f : nil,
166
+ :transfer_rate => row.key?('Transfer rate') ? row['Transfer rate'].to_f : nil,
167
+ :rps => row.key?('Requests per second') ? row['Requests per second'].to_f : nil,
168
+ :retries => row.key?('Retries') ? row['Retries'].to_i : nil,
169
+ :response_code => row.key?('Response Code') ? row['Response Code'].to_i : nil
170
+ }
171
+ end
172
+ end.flatten
173
+ end
174
+
175
+ # Filter benchmark data.
176
+ #
177
+ def self.filter(data, maxsize: 2 ** 64, experiments: [], machines: [], fails: 0)
178
+ data.select { |entry| entry[:tpr] > 0 }
179
+ .select { |entry| entry[:failed] <= fails }
180
+ .select { |entry| entry[:length] <= maxsize }
181
+ .select { |entry| machines.include?(entry[:machine]) || machines.empty? }
182
+ .select { |entry| experiments.include?(entry[:experiment]) || experiments.empty? }
183
+ end
184
+
185
+ # Aggregate benchmark data.
186
+ # {
187
+ # 'weave': {
188
+ # 'm.large': [{ machine: String, experiment: String, document: String, length: value, tpr: Integer, ... }]
189
+ # }, ...
190
+ # },
191
+ # 'docker': { ... },
192
+ # 'bare': { ... }
193
+ # }
194
+ def self.aggregate(data)
195
+ experiments = data.group_by { |entry| entry[:experiment] }
196
+ experiments.map do |experiment, values|
197
+ machines = values.group_by { |entry| entry[:machine] }
198
+ [
199
+ experiment,
200
+ machines
201
+ ]
202
+ end.to_h
203
+ end
204
+
205
+ # Determines biggest value of aggregated data.
206
+ #
207
+ def self.maximum(data, of: :tpr)
208
+ y = 0
209
+ for experiment, machines in data
210
+ for machine, values in machines
211
+ m = values.max_by { |e| e[of] }
212
+ y = (y > m[of] ? y : m[of])
213
+ end
214
+ end
215
+ y
216
+ end
217
+
218
+ # Prepares a plot to present absolute values.
219
+ #
220
+ def self.prepare_plot(
221
+ maxy,
222
+ receive_window: 87380,
223
+ length: 500000,
224
+ xaxis_title: "Message Length",
225
+ xaxis_unit: "kB",
226
+ yaxis_title: "Transfer Rate",
227
+ yaxis_unit: "MB/sec",
228
+ title: "Data Transfer Rates",
229
+ subtitle: ""
230
+ )
231
+ recwindow = receive_window == 0 ? '' : "abline(v = seq(#{receive_window}, #{length}, by=#{receive_window}), lty='dashed')"
232
+
233
+ """
234
+ plot(x=c(0), y=c(0), xlim=c(0, #{length}), ylim=c(0, #{maxy}), main='#{title}\\n(#{subtitle})', xlab='#{xaxis_title} (#{xaxis_unit})', ylab='#{yaxis_title} (#{yaxis_unit})', xaxt='n', yaxt='n', pch=NA)
235
+ #{recwindow if receive_window < length }
236
+ """
237
+ end
238
+
239
+ # Prepares a plot to present relative comparisons.
240
+ #
241
+ def self.prepare_comparisonplot(
242
+ maxy,
243
+ receive_window: 87300,
244
+ length: 50000,
245
+ xaxis_title: "Message Length (kB)",
246
+ xaxis_unit: "kB",
247
+ yaxis_title: "Relative performance compared with reference experiment (%)",
248
+ yaxis_unit: "%",
249
+ title: "Relative performance (Data Transfer Rate)",
250
+ subtitle: ""
251
+ )
252
+ recwindow = receive_window == 0 ? '' : "abline(v = seq(#{receive_window}, #{length}, by=#{receive_window}), lty='dashed')"
253
+
254
+ """
255
+ plot(x=c(0), y=c(0), xlim=c(0, #{length}), ylim=c(0, #{maxy}), main='#{title}\\n(#{subtitle})', xlab='#{xaxis_title} (#{xaxis_unit})', ylab='#{yaxis_title} (#{yaxis_unit})', xaxt='n', yaxt='n', pch=NA)
256
+ #{recwindow if receive_window < length}
257
+ """
258
+ end
259
+
260
+ # Adds a serie to a plot.
261
+ #
262
+ def self.add_series(
263
+ data,
264
+ to_plot: :tpr,
265
+ color: 'grey',
266
+ symbol: 1,
267
+ alpha: 0.15,
268
+ length: 500000,
269
+ confidence: 90,
270
+ no_points: false,
271
+ with_bands: false
272
+ )
273
+ """
274
+ #{points(data, to_plot: to_plot, color: color, symbol: symbol, alpha: alpha) unless no_points }
275
+ #{bands(data, to_plot: to_plot, color: color, length: length, confidence: confidence) if with_bands }
276
+ """
277
+ end
278
+
279
+ # Adds a compare line to a comparison plot.
280
+ #
281
+ def self.add_comparisonplot(
282
+ reference,
283
+ serie,
284
+ to_plot: :tpr,
285
+ color: 'grey',
286
+ symbol: 1,
287
+ length: 500000,
288
+ n: Ppbench::precision,
289
+ nknots: 20
290
+ )
291
+ step = length / n
292
+ references = reference.map { |v| [v[:length], v[to_plot]] }
293
+ ref_values = 1.upto(n).map do |i|
294
+ vs = references.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
295
+
296
+ if vs.empty?
297
+ $stderr.puts precision_error(i * step)
298
+ exit!
299
+ end
300
+
301
+ [
302
+ i * step,
303
+ vs.median
304
+ ]
305
+ end.to_h
306
+
307
+ series = serie.map { |v| [v[:length], v[to_plot]] }
308
+ serie_values = 1.upto(n).map do |i|
309
+ vs = series.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
310
+
311
+ if vs.empty?
312
+ $stderr.puts precision_error(i * step)
313
+ exit!
314
+ end
315
+
316
+ [
317
+ i * step,
318
+ vs.median
319
+ ]
320
+ end.to_h
321
+
322
+ xs = []
323
+ ys = []
324
+
325
+ ref_values.each do |x, y|
326
+ if serie_values.key? x
327
+ xs << x
328
+ ys << serie_values[x] / y
329
+ end
330
+ end
331
+
332
+ """
333
+ xs=c(#{ xs * ',' })
334
+ ys=c(#{ ys * ',' })
335
+ median <- smooth.spline(xs, ys, nknots=#{nknots})
336
+ lines(median, lwd=2, col=rgb(#{color}))
337
+ """
338
+ end
339
+
340
+ # Generates scatter plot of points for plots.
341
+ #
342
+ def self.points(data, to_plot: :tpr, color: 'grey', alpha: 0.15, symbol: 1)
343
+ points = data.map { |v| [v[:length], v[to_plot]] }
344
+ xs = "c(#{points.map { |e| e[0] } * ','})"
345
+ ys = "c(#{points.map { |e| e[1] } * ','})"
346
+
347
+ """
348
+ xs = #{xs}
349
+ ys = #{ys}
350
+ points(x=xs,y=ys, col=rgb(#{color},alpha=#{ alpha }), pch=#{ symbol })
351
+ """
352
+ end
353
+
354
+ # Generates median lines and confidence bands for plots.
355
+ #
356
+ def self.bands(data, to_plot: :tpr, n: Ppbench::precision, length: 500000, color: 'grey', confidence: 90, nknots: 15)
357
+
358
+ step = length / n
359
+ points = data.map { |v| [v[:length], v[to_plot]] }
360
+ values = 1.upto(n).map do |i|
361
+ [
362
+ i * step,
363
+ points.select { |p| p[0] < i * step && p[0] >= (i - 1) * step }.map { |p| p[1] }
364
+ ]
365
+ end
366
+
367
+ upper_confidence = 100 - (100 - confidence) / 2
368
+ semi_upper_confidence = 100 - (100 - confidence / 2) / 2
369
+ lower_confidence = (100 - confidence) / 2
370
+ semi_lower_confidence = (100 - confidence / 2) / 2
371
+
372
+ summary = values.map do |x,vs|
373
+
374
+ if vs.empty?
375
+ $stderr.puts precision_error(x)
376
+ exit!
377
+ end
378
+
379
+ {
380
+ :x => x,
381
+ :lower => vs.percentile(lower_confidence),
382
+ :semi_lower => vs.percentile(semi_lower_confidence),
383
+ :median => vs.median,
384
+ :semi_upper => vs.percentile(semi_upper_confidence),
385
+ :upper => vs.percentile(upper_confidence)
386
+ }
387
+ end
388
+
389
+ xs = "c(#{summary.map { |v| v[:x] } * ','})"
390
+ medians = "c(#{summary.map { |v| v[:median] } * ','})"
391
+ lowers = "c(#{summary.map { |v| v[:lower] } * ','})"
392
+ semi_lowers = "c(#{summary.map { |v| v[:semi_lower] } * ','})"
393
+ uppers = "c(#{summary.map { |v| v[:upper] } * ','})"
394
+ semi_uppers = "c(#{summary.map { |v| v[:semi_upper] } * ','})"
395
+
396
+ """
397
+ xs = #{xs}
398
+ medians = #{medians}
399
+ lowers = #{lowers}
400
+ semi_lowers = #{semi_lowers}
401
+ uppers = #{uppers}
402
+ semi_uppers = #{semi_uppers}
403
+
404
+ low <- smooth.spline(xs, lowers, nknots=#{nknots})
405
+ semi_low <- smooth.spline(xs, semi_lowers, nknots=#{nknots})
406
+ up <- smooth.spline(xs, uppers, nknots=#{nknots})
407
+ semi_up <- smooth.spline(xs, semi_uppers, nknots=#{nknots})
408
+ median <- smooth.spline(xs, medians, nknots=#{nknots})
409
+ polygon(c(low$x, rev(up$x)), c(low$y, rev(up$y)), col = rgb(#{color},alpha=0.10), border=NA)
410
+ polygon(c(semi_low$x, rev(semi_up$x)), c(semi_low$y, rev(semi_up$y)), col = rgb(#{color},alpha=0.15), border=NA)
411
+ lines(median, lwd=2, col=rgb(#{color}))
412
+ lines(low, col=rgb(#{color},alpha=0.50), lty='dashed', lwd=0.5)
413
+ lines(up, col=rgb(#{color},alpha=0.50), lty='dashed', lwd=0.5)
414
+ """
415
+
416
+
417
+ end
418
+
419
+ # Generates an R plot output script which can be used for plotting benchmark data
420
+ # as scatter plot with optional confidence bands.
421
+ #
422
+ def self.plotter(
423
+ data,
424
+ to_plot: :tpr,
425
+ machines: [],
426
+ experiments: [],
427
+ receive_window: 87380,
428
+ xaxis_max: 500000,
429
+ confidence: 90,
430
+ no_points: false,
431
+ with_bands: false,
432
+ yaxis_max: 10000000,
433
+ yaxis_steps: 10,
434
+ xaxis_steps: 10,
435
+ xaxis_title: "",
436
+ xaxis_unit: "",
437
+ xaxis_divisor: 1000,
438
+ yaxis_title: "",
439
+ yaxis_unit: "",
440
+ yaxis_divisor: 1000000,
441
+ title: "",
442
+ subtitle: "",
443
+ legend_position: "topright"
444
+ )
445
+ series_data = []
446
+ series_names = []
447
+ series_colors = R_COLORS
448
+
449
+ for exp in experiments
450
+ for machine in machines
451
+ if (data.include? exp) && (data[exp].include? machine)
452
+ series_data << data[exp][machine]
453
+ series_names << "'#{Ppbench::experiment(exp)} on #{Ppbench::machine(machine)}'"
454
+ end
455
+ end
456
+ end
457
+
458
+ colors = "c(#{series_colors.map { |c| "rgb(#{c})" } * ','})"
459
+
460
+ sym = 1;
461
+ r = "#{prepare_plot(yaxis_max, receive_window: receive_window, length: xaxis_max, title: title, xaxis_title: xaxis_title, xaxis_unit: xaxis_unit, yaxis_title: yaxis_title, yaxis_unit: yaxis_unit, subtitle: subtitle)}\n"
462
+
463
+ for serie in series_data
464
+ r += add_series(serie, to_plot: to_plot, with_bands: with_bands, no_points: no_points, color: series_colors.shift, symbol: sym, length: xaxis_max, confidence: confidence)
465
+ sym = sym + 1
466
+ end
467
+
468
+ symbols = no_points ? R_NO_SYMBOL : R_SYMBOLS
469
+
470
+ r + """
471
+ xa = seq(0, #{xaxis_max}, by=#{xaxis_max/xaxis_steps})
472
+ ya = seq(0, #{yaxis_max}, by=#{yaxis_max/yaxis_steps})
473
+ axis(1, at = xa, labels = paste(xa/#{xaxis_divisor}, '#{xaxis_unit}', sep = ' ' ))
474
+ axis(2, at = ya, labels = paste(ya/#{yaxis_divisor}, '#{yaxis_unit}', sep = ' ' ))
475
+ legend('#{legend_position}', cex=0.9, pch=#{symbols}, col=#{colors}, c(#{series_names * ',' }),box.col=rgb(1,1,1,0), bg=rgb(1,1,1,0.75))
476
+ """
477
+ end
478
+
479
+ # Generates an R plot output script which can be used for plotting comparison plots
480
+ # of benchmark data.
481
+ #
482
+ def self.comparison_plotter(
483
+ data,
484
+ yaxis_max: 1.5,
485
+ to_plot: :transfer_rate,
486
+ machines: [],
487
+ experiments: [],
488
+ receive_window: 87380,
489
+ xaxis_max: 500000,
490
+ xaxis_steps: 10,
491
+ xaxis_title: "",
492
+ xaxis_unit: "",
493
+ xaxis_divisor: 1000,
494
+ yaxis_title: "",
495
+ yaxis_unit: "%",
496
+ title: "",
497
+ subtitle: "",
498
+ legend_position: "topright"
499
+ )
500
+ series_data = []
501
+ series_names = []
502
+ series_colors = R_COLORS
503
+
504
+ ref = true
505
+ for exp in experiments
506
+ for machine in machines
507
+ reference = ref ? 'Reference: ' : ''
508
+ ref = false
509
+ if (data.include? exp) && (data[exp].include? machine)
510
+ series_data << data[exp][machine]
511
+ series_names << "'#{reference}#{Ppbench::experiment(exp)} on #{Ppbench::machine(machine)}'"
512
+ end
513
+ end
514
+ end
515
+
516
+ colors = "c(#{series_colors.map { |c| "rgb(#{c})" } * ','})"
517
+
518
+ sym = 1;
519
+ r = "#{prepare_comparisonplot(yaxis_max, receive_window: receive_window, length: xaxis_max, title: title, subtitle: subtitle, xaxis_title: xaxis_title, xaxis_unit: xaxis_unit, yaxis_title: yaxis_title, yaxis_unit: yaxis_unit)}\n"
520
+
521
+ reference = series_data.first
522
+
523
+ for serie in series_data
524
+ r += add_comparisonplot(reference, serie, to_plot: to_plot, color: series_colors.shift, symbol: sym, length: xaxis_max)
525
+ sym = sym + 1
526
+ end
527
+
528
+ r + """
529
+ xa = seq(0, #{xaxis_max}, by=#{xaxis_max/xaxis_steps})
530
+ ya = seq(0, #{yaxis_max}, by=#{0.1})
531
+ axis(1, at = xa, labels = paste(xa/#{xaxis_divisor}, '#{xaxis_unit}', sep = '' ))
532
+ axis(2, at = ya, labels = paste(ya * 100, '#{yaxis_unit}', sep = '' ))
533
+ legend('#{legend_position}', cex=0.9, pch=c(#{R_NO_SYMBOL}), col=#{colors}, c(#{series_names * ',' }),box.col=rgb(1,1,1,0), bg=rgb(1,1,1,0.75))
534
+ """
535
+ end
536
+ end