compare_compressors 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CompareCompressors
4
+ #
5
+ # A single compressor-level result averaged the over targets.
6
+ #
7
+ GroupResult = Struct.new(
8
+ :compressor_name,
9
+ :compressor_level,
10
+ :mean_compression_elapsed_hours,
11
+ :mean_compression_cpu_hours,
12
+ :max_compression_max_rss,
13
+ :mean_compressed_gibytes,
14
+ :mean_compression_delta_gibytes,
15
+ :geomean_compression_ratio,
16
+ :mean_decompression_elapsed_hours,
17
+ :mean_decompression_cpu_hours,
18
+ :max_decompression_max_rss
19
+ ) do
20
+ DEFAULT_SCALE = 1.0
21
+
22
+ HOUR = 3600 # seconds
23
+ GIGABYTE = 1024**3 # bytes
24
+
25
+ #
26
+ # Create a GroupResult for a group of Results for the same compressor
27
+ # and level (but possibly multiple targets).
28
+ #
29
+ def self.new_from_results(compressor_name, compressor_level, results, scale)
30
+ new(
31
+ compressor_name,
32
+ compressor_level,
33
+ scale * Result.mean(results, :compression_elapsed_time) / HOUR,
34
+ scale * Result.mean(results, :compression_cpu_time) / HOUR,
35
+ results.map(&:compression_max_rss).max,
36
+ scale * Result.mean(results, :size) / GIGABYTE,
37
+ scale * Result.mean(results, :compression_delta) / GIGABYTE,
38
+ Result.geomean(results, :compression_ratio),
39
+ scale * Result.mean(results, :decompression_elapsed_time) / HOUR,
40
+ scale * Result.mean(results, :decompression_cpu_time) / HOUR,
41
+ results.map(&:decompression_max_rss).max
42
+ )
43
+ end
44
+
45
+ #
46
+ # Group individual result to average across targets in the sample.
47
+ #
48
+ # @param [Array.<Result>] results
49
+ # @return [Array.<GroupResult>]
50
+ #
51
+ def self.group(results, scale: DEFAULT_SCALE)
52
+ results.group_by(&:group_key).map do |_, group_results|
53
+ GroupResult.new_from_results(
54
+ group_results.first.compressor_name,
55
+ group_results.first.compressor_level,
56
+ group_results,
57
+ scale
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CompareCompressors
4
+ #
5
+ # Plot compression results to gnuplot.
6
+ #
7
+ class Plotter
8
+ DEFAULT_TERMINAL = 'png size 640, 480'
9
+ DEFAULT_OUTPUT = 'compare_compressors.png'
10
+ DEFAULT_LOGSCALE_SIZE = false
11
+ DEFAULT_AUTOSCALE_FIX = false
12
+ DEFAULT_SHOW_LABELS = true
13
+ DEFAULT_LMARGIN = nil
14
+ DEFAULT_TITLE = nil
15
+ DEFAULT_USE_CPU_TIME = false
16
+
17
+ def initialize(
18
+ terminal:, output:, logscale_size:, autoscale_fix:,
19
+ show_labels:, lmargin:, title:, use_cpu_time:
20
+ )
21
+ @terminal = terminal
22
+ @output = output
23
+ @logscale_size = logscale_size
24
+ @autoscale_fix = autoscale_fix
25
+ @show_labels = show_labels
26
+ @lmargin = lmargin
27
+ @title = title
28
+ @use_cpu_time = use_cpu_time
29
+
30
+ @group_results = nil
31
+ @io = nil
32
+ end
33
+
34
+ attr_reader :terminal
35
+ attr_reader :output
36
+ attr_reader :logscale_size
37
+ attr_reader :autoscale_fix
38
+ attr_reader :show_labels
39
+ attr_reader :lmargin
40
+ attr_reader :title
41
+ attr_reader :use_cpu_time
42
+
43
+ attr_reader :group_results
44
+ attr_reader :io
45
+
46
+ def plot(group_results, pareto_only:, io: STDOUT)
47
+ group_results = find_non_dominated(group_results) if pareto_only
48
+ @group_results = group_results
49
+ @io = io
50
+ write
51
+ end
52
+
53
+ private
54
+
55
+ def write
56
+ write_preamble
57
+ write_data
58
+ write_labels
59
+ write_style
60
+ write_plots
61
+ end
62
+
63
+ def write_preamble
64
+ io.puts "set terminal #{terminal}"
65
+ io.puts "set output '#{output}'"
66
+ end
67
+
68
+ def write_data
69
+ group_results_by_name = group_results.group_by(&:compressor_name)
70
+ group_results_by_name.each do |name, name_results|
71
+ io.puts "$#{name} << EOD"
72
+ name_results.each do |name_result|
73
+ io.puts name_result.to_a.join(' ')
74
+ end
75
+ io.puts 'EOD'
76
+ end
77
+ end
78
+
79
+ def write_style
80
+ io.puts "set title #{escape(title)}" if title
81
+ io.puts 'set key outside'
82
+ io.puts "set lmargin #{lmargin}" if lmargin
83
+
84
+ io.puts 'set logscale y' if logscale_size
85
+ io.puts 'set autoscale fix' if autoscale_fix
86
+ end
87
+
88
+ def write_labels
89
+ # Subclasses can label the axes.
90
+ end
91
+
92
+ def column_names
93
+ # Subclasses can declare their column names.
94
+ end
95
+
96
+ def splots
97
+ []
98
+ end
99
+
100
+ def write_plots
101
+ io.puts "splot #{splots.join(", \\\n ")}"
102
+ end
103
+
104
+ def time_unit
105
+ if use_cpu_time
106
+ '(CPU Hours)'
107
+ else
108
+ '(Hours)'
109
+ end
110
+ end
111
+
112
+ def compressor_names
113
+ group_results.map(&:compressor_name).uniq.sort
114
+ end
115
+
116
+ def compressor_number(compressor_name)
117
+ COMPRESSORS.index { |c| c.name == compressor_name } + 1
118
+ end
119
+
120
+ def point_style(name)
121
+ number = compressor_number(name)
122
+ "linecolor #{number} pointtype #{number}"
123
+ end
124
+
125
+ def find_display_name(compressor_name)
126
+ compressor = COMPRESSORS.find { |c| c.name == compressor_name }
127
+ compressor&.display_name || compressor_name
128
+ end
129
+
130
+ def column_numbers(names = column_names)
131
+ struct = @group_results.first.class
132
+ names.map { |name| struct.members.index(name) + 1 }
133
+ end
134
+
135
+ #
136
+ # Find points on the Pareto frontier using the axes shown in the graph.
137
+ #
138
+ # @param [Array.<Struct>] points
139
+ # @return [Array.<Struct>]
140
+ #
141
+ def find_non_dominated(points)
142
+ points.reject do |point0|
143
+ points.any? do |point1|
144
+ dominates?(point1, point0)
145
+ end
146
+ end
147
+ end
148
+
149
+ #
150
+ # Check whether `point1` dominates `point0`. Here we assume that we are
151
+ # minimizing on all columns.
152
+ #
153
+ def dominates?(point1, point0)
154
+ column_names.all? { |name| point1[name] < point0[name] }
155
+ end
156
+
157
+ #
158
+ # Make at least some attempt to escape double quotes.
159
+ #
160
+ def escape(str)
161
+ str.dump
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CompareCompressors
4
+ #
5
+ # Plot compression results to gnuplot in 2D cost space (time cost and space
6
+ # cost).
7
+ #
8
+ class CostPlotter < Plotter
9
+ DEFAULT_SHOW_COST_CONTOURS = true
10
+
11
+ def initialize(cost_model, **options)
12
+ @cost_model = cost_model
13
+ @show_cost_contours = \
14
+ if options.key?(:show_cost_contours)
15
+ options.delete(:show_cost_contours)
16
+ else
17
+ DEFAULT_SHOW_COST_CONTOURS
18
+ end
19
+ super(**options)
20
+ end
21
+
22
+ attr_reader :cost_model
23
+ attr_reader :show_cost_contours
24
+
25
+ private
26
+
27
+ def column_values(column_name)
28
+ group_results.map(&column_name)
29
+ end
30
+
31
+ def color_palette_range
32
+ min_cost = column_names.map { |name| column_values(name).min }.inject(&:+)
33
+ max_cost = column_names.map { |name| column_values(name).max }.inject(&:+)
34
+ [min_cost / 2.0, max_cost]
35
+ end
36
+
37
+ def write_style
38
+ super
39
+
40
+ io.puts 'set view map'
41
+ io.puts 'set contour'
42
+ io.puts 'set palette gray'
43
+ io.puts 'set cntrlabel font ",10"'
44
+ io.puts 'set style textbox opaque noborder'
45
+ io.puts "set cbrange [#{color_palette_range.join(':')}]"
46
+ io.puts 'unset colorbox'
47
+ end
48
+
49
+ def write_labels
50
+ io.puts "set xlabel 'Time Cost (#{cost_model.currency})'"
51
+ io.puts "set ylabel 'Size Cost (#{cost_model.currency})'"
52
+ end
53
+
54
+ def column_names
55
+ [:hour_cost, :gibyte_cost]
56
+ end
57
+
58
+ def splots
59
+ splots = []
60
+ splots.concat(contour_splots) if show_cost_contours
61
+ splots.concat(points_splots)
62
+ splots.concat(point_label_splots) if show_labels
63
+ splots
64
+ end
65
+
66
+ def points_splots
67
+ compressor_names.map do |name|
68
+ columns = column_numbers + [0]
69
+ "'$#{name}' using #{columns.join(':')} with points nocontour" \
70
+ " #{point_style(name)}" \
71
+ " title '#{find_display_name(name)}'"
72
+ end
73
+ end
74
+
75
+ def point_label_splots
76
+ compressor_names.map do |name|
77
+ columns = column_numbers + [0] + column_numbers([:compressor_level])
78
+ "'$#{name}' using #{columns.join(':')} with labels" \
79
+ ' left nocontour notitle'
80
+ end
81
+ end
82
+
83
+ def contour_splots
84
+ [
85
+ 'x + y with lines palette notitle nosurface',
86
+ 'x + y with labels boxed notitle nosurface'
87
+ ]
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CompareCompressors
4
+ #
5
+ # Plot grouped compression results to gnuplot in 3D (compression time,
6
+ # decompression time, and compressed size).
7
+ #
8
+ class RawPlotter < Plotter
9
+ DEFAULT_VIEW_ROT_X = 30
10
+ DEFAULT_VIEW_ROT_Z = 350
11
+
12
+ def initialize(**options)
13
+ @view_rot_x = options.delete(:view_rot_x) || DEFAULT_VIEW_ROT_X
14
+ @view_rot_z = options.delete(:view_rot_z) || DEFAULT_VIEW_ROT_Z
15
+ super(**options)
16
+ end
17
+
18
+ attr_reader :view_rot_x
19
+ attr_reader :view_rot_z
20
+
21
+ def write_style
22
+ super
23
+ io.puts format('set view %d, %d', view_rot_x, view_rot_z)
24
+ io.puts 'set grid xtics ytics ztics'
25
+ end
26
+
27
+ def write_labels
28
+ io.puts "set xlabel 'Compression Time #{time_unit}' rotate parallel"
29
+ io.puts 'set ylabel "Compressed Size (GiB)" rotate parallel'
30
+ io.puts "set zlabel 'Decompression Time #{time_unit}' rotate parallel"
31
+ end
32
+
33
+ def column_names
34
+ if use_cpu_time
35
+ [
36
+ :mean_compression_cpu_hours,
37
+ :mean_compressed_gibytes,
38
+ :mean_decompression_cpu_hours
39
+ ]
40
+ else
41
+ [
42
+ :mean_compression_elapsed_hours,
43
+ :mean_compressed_gibytes,
44
+ :mean_decompression_elapsed_hours
45
+ ]
46
+ end
47
+ end
48
+
49
+ def splots
50
+ points_splots
51
+ end
52
+
53
+ def points_splots
54
+ compressor_names.map do |name|
55
+ "'$#{name}' using #{column_numbers.join(':')} with points" \
56
+ " #{point_style(name)}" \
57
+ " title '#{find_display_name(name)}'"
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CompareCompressors
4
+ #
5
+ # Plot grouped compression results to gnuplot in 2D --- just compression time
6
+ # or decompression time vs size.
7
+ #
8
+ class SizePlotter < Plotter
9
+ DEFAULT_DECOMPRESSION = false # plot compression by default
10
+
11
+ def initialize(**options)
12
+ @decompression = \
13
+ if options.key?(:decompression)
14
+ options.delete(:decompression)
15
+ else
16
+ DEFAULT_DECOMPRESSION
17
+ end
18
+ super(**options)
19
+ end
20
+
21
+ attr_reader :decompression
22
+
23
+ def write_labels
24
+ io.puts 'set ylabel "Compressed Size (GiB)"'
25
+ if decompression
26
+ io.puts "set xlabel 'Decompression Time #{time_unit}'"
27
+ else
28
+ io.puts "set xlabel 'Compression Time #{time_unit}'"
29
+ end
30
+ end
31
+
32
+ def time_column_name
33
+ if decompression && use_cpu_time
34
+ :mean_decompression_cpu_hours
35
+ elsif decompression
36
+ :mean_decompression_elapsed_hours
37
+ elsif use_cpu_time
38
+ :mean_compression_cpu_hours
39
+ else
40
+ :mean_compression_elapsed_hours
41
+ end
42
+ end
43
+
44
+ def column_names
45
+ [time_column_name, :mean_compressed_gibytes]
46
+ end
47
+
48
+ def write_plots
49
+ io.puts "plot #{plots.join(", \\\n ")}"
50
+ end
51
+
52
+ def plots
53
+ if show_labels
54
+ point_plots + point_label_plots
55
+ else
56
+ point_plots
57
+ end
58
+ end
59
+
60
+ def point_plots
61
+ compressor_names.map do |name|
62
+ "'$#{name}' using #{column_numbers.join(':')} with points" \
63
+ " #{point_style(name)}" \
64
+ " title '#{find_display_name(name)}'"
65
+ end
66
+ end
67
+
68
+ def point_label_plots
69
+ compressor_names.map do |name|
70
+ columns = column_numbers(column_names + [:compressor_level])
71
+ "'$#{name}' using #{columns.join(':')}" \
72
+ ' with labels left offset 0, character 0.5 notitle'
73
+ end
74
+ end
75
+ end
76
+ end