compare_compressors 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +132 -0
- data/bin/compare_compressors +6 -0
- data/lib/compare_compressors.rb +40 -0
- data/lib/compare_compressors/command_line_interface.rb +223 -0
- data/lib/compare_compressors/comparer.rb +70 -0
- data/lib/compare_compressors/compressor.rb +150 -0
- data/lib/compare_compressors/compressors/brotli_compressor.rb +43 -0
- data/lib/compare_compressors/compressors/bzip2_compressor.rb +37 -0
- data/lib/compare_compressors/compressors/gzip_compressor.rb +34 -0
- data/lib/compare_compressors/compressors/seven_zip_compressor.rb +43 -0
- data/lib/compare_compressors/compressors/xz_compressor.rb +37 -0
- data/lib/compare_compressors/compressors/zstd_compressor.rb +37 -0
- data/lib/compare_compressors/cost_model.rb +55 -0
- data/lib/compare_compressors/costed_group_result.rb +87 -0
- data/lib/compare_compressors/group_result.rb +62 -0
- data/lib/compare_compressors/plotter.rb +164 -0
- data/lib/compare_compressors/plotters/cost_plotter.rb +90 -0
- data/lib/compare_compressors/plotters/raw_plotter.rb +61 -0
- data/lib/compare_compressors/plotters/size_plotter.rb +76 -0
- data/lib/compare_compressors/result.rb +81 -0
- data/lib/compare_compressors/version.rb +8 -0
- data/test/compare_compressors/compare_compressors_test.rb +271 -0
- metadata +101 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# A single compressor-level result averaged the over targets.
|
6
|
+
#
|
7
|
+
GroupResult = Struct.new(
|
8
|
+
:compressor_name,
|
9
|
+
:compressor_level,
|
10
|
+
:mean_compression_elapsed_hours,
|
11
|
+
:mean_compression_cpu_hours,
|
12
|
+
:max_compression_max_rss,
|
13
|
+
:mean_compressed_gibytes,
|
14
|
+
:mean_compression_delta_gibytes,
|
15
|
+
:geomean_compression_ratio,
|
16
|
+
:mean_decompression_elapsed_hours,
|
17
|
+
:mean_decompression_cpu_hours,
|
18
|
+
:max_decompression_max_rss
|
19
|
+
) do
|
20
|
+
DEFAULT_SCALE = 1.0
|
21
|
+
|
22
|
+
HOUR = 3600 # seconds
|
23
|
+
GIGABYTE = 1024**3 # bytes
|
24
|
+
|
25
|
+
#
|
26
|
+
# Create a GroupResult for a group of Results for the same compressor
|
27
|
+
# and level (but possibly multiple targets).
|
28
|
+
#
|
29
|
+
def self.new_from_results(compressor_name, compressor_level, results, scale)
|
30
|
+
new(
|
31
|
+
compressor_name,
|
32
|
+
compressor_level,
|
33
|
+
scale * Result.mean(results, :compression_elapsed_time) / HOUR,
|
34
|
+
scale * Result.mean(results, :compression_cpu_time) / HOUR,
|
35
|
+
results.map(&:compression_max_rss).max,
|
36
|
+
scale * Result.mean(results, :size) / GIGABYTE,
|
37
|
+
scale * Result.mean(results, :compression_delta) / GIGABYTE,
|
38
|
+
Result.geomean(results, :compression_ratio),
|
39
|
+
scale * Result.mean(results, :decompression_elapsed_time) / HOUR,
|
40
|
+
scale * Result.mean(results, :decompression_cpu_time) / HOUR,
|
41
|
+
results.map(&:decompression_max_rss).max
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Group individual result to average across targets in the sample.
|
47
|
+
#
|
48
|
+
# @param [Array.<Result>] results
|
49
|
+
# @return [Array.<GroupResult>]
|
50
|
+
#
|
51
|
+
def self.group(results, scale: DEFAULT_SCALE)
|
52
|
+
results.group_by(&:group_key).map do |_, group_results|
|
53
|
+
GroupResult.new_from_results(
|
54
|
+
group_results.first.compressor_name,
|
55
|
+
group_results.first.compressor_level,
|
56
|
+
group_results,
|
57
|
+
scale
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot compression results to gnuplot.
|
6
|
+
#
|
7
|
+
class Plotter
|
8
|
+
DEFAULT_TERMINAL = 'png size 640, 480'
|
9
|
+
DEFAULT_OUTPUT = 'compare_compressors.png'
|
10
|
+
DEFAULT_LOGSCALE_SIZE = false
|
11
|
+
DEFAULT_AUTOSCALE_FIX = false
|
12
|
+
DEFAULT_SHOW_LABELS = true
|
13
|
+
DEFAULT_LMARGIN = nil
|
14
|
+
DEFAULT_TITLE = nil
|
15
|
+
DEFAULT_USE_CPU_TIME = false
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
terminal:, output:, logscale_size:, autoscale_fix:,
|
19
|
+
show_labels:, lmargin:, title:, use_cpu_time:
|
20
|
+
)
|
21
|
+
@terminal = terminal
|
22
|
+
@output = output
|
23
|
+
@logscale_size = logscale_size
|
24
|
+
@autoscale_fix = autoscale_fix
|
25
|
+
@show_labels = show_labels
|
26
|
+
@lmargin = lmargin
|
27
|
+
@title = title
|
28
|
+
@use_cpu_time = use_cpu_time
|
29
|
+
|
30
|
+
@group_results = nil
|
31
|
+
@io = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :terminal
|
35
|
+
attr_reader :output
|
36
|
+
attr_reader :logscale_size
|
37
|
+
attr_reader :autoscale_fix
|
38
|
+
attr_reader :show_labels
|
39
|
+
attr_reader :lmargin
|
40
|
+
attr_reader :title
|
41
|
+
attr_reader :use_cpu_time
|
42
|
+
|
43
|
+
attr_reader :group_results
|
44
|
+
attr_reader :io
|
45
|
+
|
46
|
+
def plot(group_results, pareto_only:, io: STDOUT)
|
47
|
+
group_results = find_non_dominated(group_results) if pareto_only
|
48
|
+
@group_results = group_results
|
49
|
+
@io = io
|
50
|
+
write
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def write
|
56
|
+
write_preamble
|
57
|
+
write_data
|
58
|
+
write_labels
|
59
|
+
write_style
|
60
|
+
write_plots
|
61
|
+
end
|
62
|
+
|
63
|
+
def write_preamble
|
64
|
+
io.puts "set terminal #{terminal}"
|
65
|
+
io.puts "set output '#{output}'"
|
66
|
+
end
|
67
|
+
|
68
|
+
def write_data
|
69
|
+
group_results_by_name = group_results.group_by(&:compressor_name)
|
70
|
+
group_results_by_name.each do |name, name_results|
|
71
|
+
io.puts "$#{name} << EOD"
|
72
|
+
name_results.each do |name_result|
|
73
|
+
io.puts name_result.to_a.join(' ')
|
74
|
+
end
|
75
|
+
io.puts 'EOD'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def write_style
|
80
|
+
io.puts "set title #{escape(title)}" if title
|
81
|
+
io.puts 'set key outside'
|
82
|
+
io.puts "set lmargin #{lmargin}" if lmargin
|
83
|
+
|
84
|
+
io.puts 'set logscale y' if logscale_size
|
85
|
+
io.puts 'set autoscale fix' if autoscale_fix
|
86
|
+
end
|
87
|
+
|
88
|
+
def write_labels
|
89
|
+
# Subclasses can label the axes.
|
90
|
+
end
|
91
|
+
|
92
|
+
def column_names
|
93
|
+
# Subclasses can declare their column names.
|
94
|
+
end
|
95
|
+
|
96
|
+
def splots
|
97
|
+
[]
|
98
|
+
end
|
99
|
+
|
100
|
+
def write_plots
|
101
|
+
io.puts "splot #{splots.join(", \\\n ")}"
|
102
|
+
end
|
103
|
+
|
104
|
+
def time_unit
|
105
|
+
if use_cpu_time
|
106
|
+
'(CPU Hours)'
|
107
|
+
else
|
108
|
+
'(Hours)'
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def compressor_names
|
113
|
+
group_results.map(&:compressor_name).uniq.sort
|
114
|
+
end
|
115
|
+
|
116
|
+
def compressor_number(compressor_name)
|
117
|
+
COMPRESSORS.index { |c| c.name == compressor_name } + 1
|
118
|
+
end
|
119
|
+
|
120
|
+
def point_style(name)
|
121
|
+
number = compressor_number(name)
|
122
|
+
"linecolor #{number} pointtype #{number}"
|
123
|
+
end
|
124
|
+
|
125
|
+
def find_display_name(compressor_name)
|
126
|
+
compressor = COMPRESSORS.find { |c| c.name == compressor_name }
|
127
|
+
compressor&.display_name || compressor_name
|
128
|
+
end
|
129
|
+
|
130
|
+
def column_numbers(names = column_names)
|
131
|
+
struct = @group_results.first.class
|
132
|
+
names.map { |name| struct.members.index(name) + 1 }
|
133
|
+
end
|
134
|
+
|
135
|
+
#
|
136
|
+
# Find points on the Pareto frontier using the axes shown in the graph.
|
137
|
+
#
|
138
|
+
# @param [Array.<Struct>] points
|
139
|
+
# @return [Array.<Struct>]
|
140
|
+
#
|
141
|
+
def find_non_dominated(points)
|
142
|
+
points.reject do |point0|
|
143
|
+
points.any? do |point1|
|
144
|
+
dominates?(point1, point0)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Check whether `point1` dominates `point0`. Here we assume that we are
|
151
|
+
# minimizing on all columns.
|
152
|
+
#
|
153
|
+
def dominates?(point1, point0)
|
154
|
+
column_names.all? { |name| point1[name] < point0[name] }
|
155
|
+
end
|
156
|
+
|
157
|
+
#
|
158
|
+
# Make at least some attempt to escape double quotes.
|
159
|
+
#
|
160
|
+
def escape(str)
|
161
|
+
str.dump
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot compression results to gnuplot in 2D cost space (time cost and space
|
6
|
+
# cost).
|
7
|
+
#
|
8
|
+
class CostPlotter < Plotter
|
9
|
+
DEFAULT_SHOW_COST_CONTOURS = true
|
10
|
+
|
11
|
+
def initialize(cost_model, **options)
|
12
|
+
@cost_model = cost_model
|
13
|
+
@show_cost_contours = \
|
14
|
+
if options.key?(:show_cost_contours)
|
15
|
+
options.delete(:show_cost_contours)
|
16
|
+
else
|
17
|
+
DEFAULT_SHOW_COST_CONTOURS
|
18
|
+
end
|
19
|
+
super(**options)
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :cost_model
|
23
|
+
attr_reader :show_cost_contours
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def column_values(column_name)
|
28
|
+
group_results.map(&column_name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def color_palette_range
|
32
|
+
min_cost = column_names.map { |name| column_values(name).min }.inject(&:+)
|
33
|
+
max_cost = column_names.map { |name| column_values(name).max }.inject(&:+)
|
34
|
+
[min_cost / 2.0, max_cost]
|
35
|
+
end
|
36
|
+
|
37
|
+
def write_style
|
38
|
+
super
|
39
|
+
|
40
|
+
io.puts 'set view map'
|
41
|
+
io.puts 'set contour'
|
42
|
+
io.puts 'set palette gray'
|
43
|
+
io.puts 'set cntrlabel font ",10"'
|
44
|
+
io.puts 'set style textbox opaque noborder'
|
45
|
+
io.puts "set cbrange [#{color_palette_range.join(':')}]"
|
46
|
+
io.puts 'unset colorbox'
|
47
|
+
end
|
48
|
+
|
49
|
+
def write_labels
|
50
|
+
io.puts "set xlabel 'Time Cost (#{cost_model.currency})'"
|
51
|
+
io.puts "set ylabel 'Size Cost (#{cost_model.currency})'"
|
52
|
+
end
|
53
|
+
|
54
|
+
def column_names
|
55
|
+
[:hour_cost, :gibyte_cost]
|
56
|
+
end
|
57
|
+
|
58
|
+
def splots
|
59
|
+
splots = []
|
60
|
+
splots.concat(contour_splots) if show_cost_contours
|
61
|
+
splots.concat(points_splots)
|
62
|
+
splots.concat(point_label_splots) if show_labels
|
63
|
+
splots
|
64
|
+
end
|
65
|
+
|
66
|
+
def points_splots
|
67
|
+
compressor_names.map do |name|
|
68
|
+
columns = column_numbers + [0]
|
69
|
+
"'$#{name}' using #{columns.join(':')} with points nocontour" \
|
70
|
+
" #{point_style(name)}" \
|
71
|
+
" title '#{find_display_name(name)}'"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def point_label_splots
|
76
|
+
compressor_names.map do |name|
|
77
|
+
columns = column_numbers + [0] + column_numbers([:compressor_level])
|
78
|
+
"'$#{name}' using #{columns.join(':')} with labels" \
|
79
|
+
' left nocontour notitle'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def contour_splots
|
84
|
+
[
|
85
|
+
'x + y with lines palette notitle nosurface',
|
86
|
+
'x + y with labels boxed notitle nosurface'
|
87
|
+
]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot grouped compression results to gnuplot in 3D (compression time,
|
6
|
+
# decompression time, and compressed size).
|
7
|
+
#
|
8
|
+
class RawPlotter < Plotter
|
9
|
+
DEFAULT_VIEW_ROT_X = 30
|
10
|
+
DEFAULT_VIEW_ROT_Z = 350
|
11
|
+
|
12
|
+
def initialize(**options)
|
13
|
+
@view_rot_x = options.delete(:view_rot_x) || DEFAULT_VIEW_ROT_X
|
14
|
+
@view_rot_z = options.delete(:view_rot_z) || DEFAULT_VIEW_ROT_Z
|
15
|
+
super(**options)
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_reader :view_rot_x
|
19
|
+
attr_reader :view_rot_z
|
20
|
+
|
21
|
+
def write_style
|
22
|
+
super
|
23
|
+
io.puts format('set view %d, %d', view_rot_x, view_rot_z)
|
24
|
+
io.puts 'set grid xtics ytics ztics'
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_labels
|
28
|
+
io.puts "set xlabel 'Compression Time #{time_unit}' rotate parallel"
|
29
|
+
io.puts 'set ylabel "Compressed Size (GiB)" rotate parallel'
|
30
|
+
io.puts "set zlabel 'Decompression Time #{time_unit}' rotate parallel"
|
31
|
+
end
|
32
|
+
|
33
|
+
def column_names
|
34
|
+
if use_cpu_time
|
35
|
+
[
|
36
|
+
:mean_compression_cpu_hours,
|
37
|
+
:mean_compressed_gibytes,
|
38
|
+
:mean_decompression_cpu_hours
|
39
|
+
]
|
40
|
+
else
|
41
|
+
[
|
42
|
+
:mean_compression_elapsed_hours,
|
43
|
+
:mean_compressed_gibytes,
|
44
|
+
:mean_decompression_elapsed_hours
|
45
|
+
]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def splots
|
50
|
+
points_splots
|
51
|
+
end
|
52
|
+
|
53
|
+
def points_splots
|
54
|
+
compressor_names.map do |name|
|
55
|
+
"'$#{name}' using #{column_numbers.join(':')} with points" \
|
56
|
+
" #{point_style(name)}" \
|
57
|
+
" title '#{find_display_name(name)}'"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CompareCompressors
|
4
|
+
#
|
5
|
+
# Plot grouped compression results to gnuplot in 2D --- just compression time
|
6
|
+
# or decompression time vs size.
|
7
|
+
#
|
8
|
+
class SizePlotter < Plotter
|
9
|
+
DEFAULT_DECOMPRESSION = false # plot compression by default
|
10
|
+
|
11
|
+
def initialize(**options)
|
12
|
+
@decompression = \
|
13
|
+
if options.key?(:decompression)
|
14
|
+
options.delete(:decompression)
|
15
|
+
else
|
16
|
+
DEFAULT_DECOMPRESSION
|
17
|
+
end
|
18
|
+
super(**options)
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :decompression
|
22
|
+
|
23
|
+
def write_labels
|
24
|
+
io.puts 'set ylabel "Compressed Size (GiB)"'
|
25
|
+
if decompression
|
26
|
+
io.puts "set xlabel 'Decompression Time #{time_unit}'"
|
27
|
+
else
|
28
|
+
io.puts "set xlabel 'Compression Time #{time_unit}'"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def time_column_name
|
33
|
+
if decompression && use_cpu_time
|
34
|
+
:mean_decompression_cpu_hours
|
35
|
+
elsif decompression
|
36
|
+
:mean_decompression_elapsed_hours
|
37
|
+
elsif use_cpu_time
|
38
|
+
:mean_compression_cpu_hours
|
39
|
+
else
|
40
|
+
:mean_compression_elapsed_hours
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def column_names
|
45
|
+
[time_column_name, :mean_compressed_gibytes]
|
46
|
+
end
|
47
|
+
|
48
|
+
def write_plots
|
49
|
+
io.puts "plot #{plots.join(", \\\n ")}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def plots
|
53
|
+
if show_labels
|
54
|
+
point_plots + point_label_plots
|
55
|
+
else
|
56
|
+
point_plots
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def point_plots
|
61
|
+
compressor_names.map do |name|
|
62
|
+
"'$#{name}' using #{column_numbers.join(':')} with points" \
|
63
|
+
" #{point_style(name)}" \
|
64
|
+
" title '#{find_display_name(name)}'"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def point_label_plots
|
69
|
+
compressor_names.map do |name|
|
70
|
+
columns = column_numbers(column_names + [:compressor_level])
|
71
|
+
"'$#{name}' using #{columns.join(':')}" \
|
72
|
+
' with labels left offset 0, character 0.5 notitle'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|