compare_compressors 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +132 -0
- data/bin/compare_compressors +6 -0
- data/lib/compare_compressors.rb +40 -0
- data/lib/compare_compressors/command_line_interface.rb +223 -0
- data/lib/compare_compressors/comparer.rb +70 -0
- data/lib/compare_compressors/compressor.rb +150 -0
- data/lib/compare_compressors/compressors/brotli_compressor.rb +43 -0
- data/lib/compare_compressors/compressors/bzip2_compressor.rb +37 -0
- data/lib/compare_compressors/compressors/gzip_compressor.rb +34 -0
- data/lib/compare_compressors/compressors/seven_zip_compressor.rb +43 -0
- data/lib/compare_compressors/compressors/xz_compressor.rb +37 -0
- data/lib/compare_compressors/compressors/zstd_compressor.rb +37 -0
- data/lib/compare_compressors/cost_model.rb +55 -0
- data/lib/compare_compressors/costed_group_result.rb +87 -0
- data/lib/compare_compressors/group_result.rb +62 -0
- data/lib/compare_compressors/plotter.rb +164 -0
- data/lib/compare_compressors/plotters/cost_plotter.rb +90 -0
- data/lib/compare_compressors/plotters/raw_plotter.rb +61 -0
- data/lib/compare_compressors/plotters/size_plotter.rb +76 -0
- data/lib/compare_compressors/result.rb +81 -0
- data/lib/compare_compressors/version.rb +8 -0
- data/test/compare_compressors/compare_compressors_test.rb +271 -0
- metadata +101 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# A single compressor-level result averaged the over targets.
|
6
|
+
#
|
7
|
+
GroupResult = Struct.new(
|
8
|
+
:compressor_name,
|
9
|
+
:compressor_level,
|
10
|
+
:mean_compression_elapsed_hours,
|
11
|
+
:mean_compression_cpu_hours,
|
12
|
+
:max_compression_max_rss,
|
13
|
+
:mean_compressed_gibytes,
|
14
|
+
:mean_compression_delta_gibytes,
|
15
|
+
:geomean_compression_ratio,
|
16
|
+
:mean_decompression_elapsed_hours,
|
17
|
+
:mean_decompression_cpu_hours,
|
18
|
+
:max_decompression_max_rss
|
19
|
+
) do
|
20
|
+
DEFAULT_SCALE = 1.0
|
21
|
+
|
22
|
+
HOUR = 3600 # seconds
|
23
|
+
GIGABYTE = 1024**3 # bytes
|
24
|
+
|
25
|
+
#
|
26
|
+
# Create a GroupResult for a group of Results for the same compressor
|
27
|
+
# and level (but possibly multiple targets).
|
28
|
+
#
|
29
|
+
def self.new_from_results(compressor_name, compressor_level, results, scale)
|
30
|
+
new(
|
31
|
+
compressor_name,
|
32
|
+
compressor_level,
|
33
|
+
scale * Result.mean(results, :compression_elapsed_time) / HOUR,
|
34
|
+
scale * Result.mean(results, :compression_cpu_time) / HOUR,
|
35
|
+
results.map(&:compression_max_rss).max,
|
36
|
+
scale * Result.mean(results, :size) / GIGABYTE,
|
37
|
+
scale * Result.mean(results, :compression_delta) / GIGABYTE,
|
38
|
+
Result.geomean(results, :compression_ratio),
|
39
|
+
scale * Result.mean(results, :decompression_elapsed_time) / HOUR,
|
40
|
+
scale * Result.mean(results, :decompression_cpu_time) / HOUR,
|
41
|
+
results.map(&:decompression_max_rss).max
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Group individual result to average across targets in the sample.
|
47
|
+
#
|
48
|
+
# @param [Array.<Result>] results
|
49
|
+
# @return [Array.<GroupResult>]
|
50
|
+
#
|
51
|
+
def self.group(results, scale: DEFAULT_SCALE)
|
52
|
+
results.group_by(&:group_key).map do |_, group_results|
|
53
|
+
GroupResult.new_from_results(
|
54
|
+
group_results.first.compressor_name,
|
55
|
+
group_results.first.compressor_level,
|
56
|
+
group_results,
|
57
|
+
scale
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot compression results to gnuplot.
|
6
|
+
#
|
7
|
+
class Plotter
|
8
|
+
DEFAULT_TERMINAL = 'png size 640, 480'
|
9
|
+
DEFAULT_OUTPUT = 'compare_compressors.png'
|
10
|
+
DEFAULT_LOGSCALE_SIZE = false
|
11
|
+
DEFAULT_AUTOSCALE_FIX = false
|
12
|
+
DEFAULT_SHOW_LABELS = true
|
13
|
+
DEFAULT_LMARGIN = nil
|
14
|
+
DEFAULT_TITLE = nil
|
15
|
+
DEFAULT_USE_CPU_TIME = false
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
terminal:, output:, logscale_size:, autoscale_fix:,
|
19
|
+
show_labels:, lmargin:, title:, use_cpu_time:
|
20
|
+
)
|
21
|
+
@terminal = terminal
|
22
|
+
@output = output
|
23
|
+
@logscale_size = logscale_size
|
24
|
+
@autoscale_fix = autoscale_fix
|
25
|
+
@show_labels = show_labels
|
26
|
+
@lmargin = lmargin
|
27
|
+
@title = title
|
28
|
+
@use_cpu_time = use_cpu_time
|
29
|
+
|
30
|
+
@group_results = nil
|
31
|
+
@io = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :terminal
|
35
|
+
attr_reader :output
|
36
|
+
attr_reader :logscale_size
|
37
|
+
attr_reader :autoscale_fix
|
38
|
+
attr_reader :show_labels
|
39
|
+
attr_reader :lmargin
|
40
|
+
attr_reader :title
|
41
|
+
attr_reader :use_cpu_time
|
42
|
+
|
43
|
+
attr_reader :group_results
|
44
|
+
attr_reader :io
|
45
|
+
|
46
|
+
def plot(group_results, pareto_only:, io: STDOUT)
|
47
|
+
group_results = find_non_dominated(group_results) if pareto_only
|
48
|
+
@group_results = group_results
|
49
|
+
@io = io
|
50
|
+
write
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def write
|
56
|
+
write_preamble
|
57
|
+
write_data
|
58
|
+
write_labels
|
59
|
+
write_style
|
60
|
+
write_plots
|
61
|
+
end
|
62
|
+
|
63
|
+
def write_preamble
|
64
|
+
io.puts "set terminal #{terminal}"
|
65
|
+
io.puts "set output '#{output}'"
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_data
|
69
|
+
group_results_by_name = group_results.group_by(&:compressor_name)
|
70
|
+
group_results_by_name.each do |name, name_results|
|
71
|
+
io.puts "$#{name} << EOD"
|
72
|
+
name_results.each do |name_result|
|
73
|
+
io.puts name_result.to_a.join(' ')
|
74
|
+
end
|
75
|
+
io.puts 'EOD'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def write_style
|
80
|
+
io.puts "set title #{escape(title)}" if title
|
81
|
+
io.puts 'set key outside'
|
82
|
+
io.puts "set lmargin #{lmargin}" if lmargin
|
83
|
+
|
84
|
+
io.puts 'set logscale y' if logscale_size
|
85
|
+
io.puts 'set autoscale fix' if autoscale_fix
|
86
|
+
end
|
87
|
+
|
88
|
+
def write_labels
|
89
|
+
# Subclasses can label the axes.
|
90
|
+
end
|
91
|
+
|
92
|
+
def column_names
|
93
|
+
# Subclasses can declare their column names.
|
94
|
+
end
|
95
|
+
|
96
|
+
def splots
|
97
|
+
[]
|
98
|
+
end
|
99
|
+
|
100
|
+
def write_plots
|
101
|
+
io.puts "splot #{splots.join(", \\\n ")}"
|
102
|
+
end
|
103
|
+
|
104
|
+
def time_unit
|
105
|
+
if use_cpu_time
|
106
|
+
'(CPU Hours)'
|
107
|
+
else
|
108
|
+
'(Hours)'
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def compressor_names
|
113
|
+
group_results.map(&:compressor_name).uniq.sort
|
114
|
+
end
|
115
|
+
|
116
|
+
def compressor_number(compressor_name)
|
117
|
+
COMPRESSORS.index { |c| c.name == compressor_name } + 1
|
118
|
+
end
|
119
|
+
|
120
|
+
def point_style(name)
|
121
|
+
number = compressor_number(name)
|
122
|
+
"linecolor #{number} pointtype #{number}"
|
123
|
+
end
|
124
|
+
|
125
|
+
def find_display_name(compressor_name)
|
126
|
+
compressor = COMPRESSORS.find { |c| c.name == compressor_name }
|
127
|
+
compressor&.display_name || compressor_name
|
128
|
+
end
|
129
|
+
|
130
|
+
def column_numbers(names = column_names)
|
131
|
+
struct = @group_results.first.class
|
132
|
+
names.map { |name| struct.members.index(name) + 1 }
|
133
|
+
end
|
134
|
+
|
135
|
+
#
|
136
|
+
# Find points on the Pareto frontier using the axes shown in the graph.
|
137
|
+
#
|
138
|
+
# @param [Array.<Struct>] points
|
139
|
+
# @return [Array.<Struct>]
|
140
|
+
#
|
141
|
+
def find_non_dominated(points)
|
142
|
+
points.reject do |point0|
|
143
|
+
points.any? do |point1|
|
144
|
+
dominates?(point1, point0)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Check whether `point1` dominates `point0`. Here we assume that we are
|
151
|
+
# minimizing on all columns.
|
152
|
+
#
|
153
|
+
def dominates?(point1, point0)
|
154
|
+
column_names.all? { |name| point1[name] < point0[name] }
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# Make at least some attempt to escape double quotes.
|
159
|
+
#
|
160
|
+
def escape(str)
|
161
|
+
str.dump
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot compression results to gnuplot in 2D cost space (time cost and space
|
6
|
+
# cost).
|
7
|
+
#
|
8
|
+
class CostPlotter < Plotter
|
9
|
+
DEFAULT_SHOW_COST_CONTOURS = true
|
10
|
+
|
11
|
+
def initialize(cost_model, **options)
|
12
|
+
@cost_model = cost_model
|
13
|
+
@show_cost_contours = \
|
14
|
+
if options.key?(:show_cost_contours)
|
15
|
+
options.delete(:show_cost_contours)
|
16
|
+
else
|
17
|
+
DEFAULT_SHOW_COST_CONTOURS
|
18
|
+
end
|
19
|
+
super(**options)
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :cost_model
|
23
|
+
attr_reader :show_cost_contours
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def column_values(column_name)
|
28
|
+
group_results.map(&column_name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def color_palette_range
|
32
|
+
min_cost = column_names.map { |name| column_values(name).min }.inject(&:+)
|
33
|
+
max_cost = column_names.map { |name| column_values(name).max }.inject(&:+)
|
34
|
+
[min_cost / 2.0, max_cost]
|
35
|
+
end
|
36
|
+
|
37
|
+
def write_style
|
38
|
+
super
|
39
|
+
|
40
|
+
io.puts 'set view map'
|
41
|
+
io.puts 'set contour'
|
42
|
+
io.puts 'set palette gray'
|
43
|
+
io.puts 'set cntrlabel font ",10"'
|
44
|
+
io.puts 'set style textbox opaque noborder'
|
45
|
+
io.puts "set cbrange [#{color_palette_range.join(':')}]"
|
46
|
+
io.puts 'unset colorbox'
|
47
|
+
end
|
48
|
+
|
49
|
+
def write_labels
|
50
|
+
io.puts "set xlabel 'Time Cost (#{cost_model.currency})'"
|
51
|
+
io.puts "set ylabel 'Size Cost (#{cost_model.currency})'"
|
52
|
+
end
|
53
|
+
|
54
|
+
def column_names
|
55
|
+
[:hour_cost, :gibyte_cost]
|
56
|
+
end
|
57
|
+
|
58
|
+
def splots
|
59
|
+
splots = []
|
60
|
+
splots.concat(contour_splots) if show_cost_contours
|
61
|
+
splots.concat(points_splots)
|
62
|
+
splots.concat(point_label_splots) if show_labels
|
63
|
+
splots
|
64
|
+
end
|
65
|
+
|
66
|
+
def points_splots
|
67
|
+
compressor_names.map do |name|
|
68
|
+
columns = column_numbers + [0]
|
69
|
+
"'$#{name}' using #{columns.join(':')} with points nocontour" \
|
70
|
+
" #{point_style(name)}" \
|
71
|
+
" title '#{find_display_name(name)}'"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def point_label_splots
|
76
|
+
compressor_names.map do |name|
|
77
|
+
columns = column_numbers + [0] + column_numbers([:compressor_level])
|
78
|
+
"'$#{name}' using #{columns.join(':')} with labels" \
|
79
|
+
' left nocontour notitle'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def contour_splots
|
84
|
+
[
|
85
|
+
'x + y with lines palette notitle nosurface',
|
86
|
+
'x + y with labels boxed notitle nosurface'
|
87
|
+
]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot grouped compression results to gnuplot in 3D (compression time,
|
6
|
+
# decompression time, and compressed size).
|
7
|
+
#
|
8
|
+
class RawPlotter < Plotter
|
9
|
+
DEFAULT_VIEW_ROT_X = 30
|
10
|
+
DEFAULT_VIEW_ROT_Z = 350
|
11
|
+
|
12
|
+
def initialize(**options)
|
13
|
+
@view_rot_x = options.delete(:view_rot_x) || DEFAULT_VIEW_ROT_X
|
14
|
+
@view_rot_z = options.delete(:view_rot_z) || DEFAULT_VIEW_ROT_Z
|
15
|
+
super(**options)
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_reader :view_rot_x
|
19
|
+
attr_reader :view_rot_z
|
20
|
+
|
21
|
+
def write_style
|
22
|
+
super
|
23
|
+
io.puts format('set view %d, %d', view_rot_x, view_rot_z)
|
24
|
+
io.puts 'set grid xtics ytics ztics'
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_labels
|
28
|
+
io.puts "set xlabel 'Compression Time #{time_unit}' rotate parallel"
|
29
|
+
io.puts 'set ylabel "Compressed Size (GiB)" rotate parallel'
|
30
|
+
io.puts "set zlabel 'Decompression Time #{time_unit}' rotate parallel"
|
31
|
+
end
|
32
|
+
|
33
|
+
def column_names
|
34
|
+
if use_cpu_time
|
35
|
+
[
|
36
|
+
:mean_compression_cpu_hours,
|
37
|
+
:mean_compressed_gibytes,
|
38
|
+
:mean_decompression_cpu_hours
|
39
|
+
]
|
40
|
+
else
|
41
|
+
[
|
42
|
+
:mean_compression_elapsed_hours,
|
43
|
+
:mean_compressed_gibytes,
|
44
|
+
:mean_decompression_elapsed_hours
|
45
|
+
]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def splots
|
50
|
+
points_splots
|
51
|
+
end
|
52
|
+
|
53
|
+
def points_splots
|
54
|
+
compressor_names.map do |name|
|
55
|
+
"'$#{name}' using #{column_numbers.join(':')} with points" \
|
56
|
+
" #{point_style(name)}" \
|
57
|
+
" title '#{find_display_name(name)}'"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot grouped compression results to gnuplot in 2D --- just compression time
|
6
|
+
# or decompression time vs size.
|
7
|
+
#
|
8
|
+
class SizePlotter < Plotter
|
9
|
+
DEFAULT_DECOMPRESSION = false # plot compression by default
|
10
|
+
|
11
|
+
def initialize(**options)
|
12
|
+
@decompression = \
|
13
|
+
if options.key?(:decompression)
|
14
|
+
options.delete(:decompression)
|
15
|
+
else
|
16
|
+
DEFAULT_DECOMPRESSION
|
17
|
+
end
|
18
|
+
super(**options)
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :decompression
|
22
|
+
|
23
|
+
def write_labels
|
24
|
+
io.puts 'set ylabel "Compressed Size (GiB)"'
|
25
|
+
if decompression
|
26
|
+
io.puts "set xlabel 'Decompression Time #{time_unit}'"
|
27
|
+
else
|
28
|
+
io.puts "set xlabel 'Compression Time #{time_unit}'"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def time_column_name
|
33
|
+
if decompression && use_cpu_time
|
34
|
+
:mean_decompression_cpu_hours
|
35
|
+
elsif decompression
|
36
|
+
:mean_decompression_elapsed_hours
|
37
|
+
elsif use_cpu_time
|
38
|
+
:mean_compression_cpu_hours
|
39
|
+
else
|
40
|
+
:mean_compression_elapsed_hours
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def column_names
|
45
|
+
[time_column_name, :mean_compressed_gibytes]
|
46
|
+
end
|
47
|
+
|
48
|
+
def write_plots
|
49
|
+
io.puts "plot #{plots.join(", \\\n ")}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def plots
|
53
|
+
if show_labels
|
54
|
+
point_plots + point_label_plots
|
55
|
+
else
|
56
|
+
point_plots
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def point_plots
|
61
|
+
compressor_names.map do |name|
|
62
|
+
"'$#{name}' using #{column_numbers.join(':')} with points" \
|
63
|
+
" #{point_style(name)}" \
|
64
|
+
" title '#{find_display_name(name)}'"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def point_label_plots
|
69
|
+
compressor_names.map do |name|
|
70
|
+
columns = column_numbers(column_names + [:compressor_level])
|
71
|
+
"'$#{name}' using #{columns.join(':')}" \
|
72
|
+
' with labels left offset 0, character 0.5 notitle'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|