compress-bsc 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +2 -0
  4. data/Gemfile +8 -0
  5. data/LICENSE +192 -0
  6. data/README.md +279 -0
  7. data/Rakefile +96 -0
  8. data/bin/rbsc +306 -0
  9. data/certs/djberg96_pub.pem +26 -0
  10. data/compress-bsc.gemspec +45 -0
  11. data/coverage/assets/0.13.2/DataTables-1.10.20/images/sort_asc.png +0 -0
  12. data/coverage/assets/0.13.2/DataTables-1.10.20/images/sort_asc_disabled.png +0 -0
  13. data/coverage/assets/0.13.2/DataTables-1.10.20/images/sort_both.png +0 -0
  14. data/coverage/assets/0.13.2/DataTables-1.10.20/images/sort_desc.png +0 -0
  15. data/coverage/assets/0.13.2/DataTables-1.10.20/images/sort_desc_disabled.png +0 -0
  16. data/coverage/assets/0.13.2/application.css +1 -0
  17. data/coverage/assets/0.13.2/application.js +7 -0
  18. data/coverage/assets/0.13.2/colorbox/border.png +0 -0
  19. data/coverage/assets/0.13.2/colorbox/controls.png +0 -0
  20. data/coverage/assets/0.13.2/colorbox/loading.gif +0 -0
  21. data/coverage/assets/0.13.2/colorbox/loading_background.png +0 -0
  22. data/coverage/assets/0.13.2/favicon_green.png +0 -0
  23. data/coverage/assets/0.13.2/favicon_red.png +0 -0
  24. data/coverage/assets/0.13.2/favicon_yellow.png +0 -0
  25. data/coverage/assets/0.13.2/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  26. data/coverage/assets/0.13.2/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  27. data/coverage/assets/0.13.2/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  28. data/coverage/assets/0.13.2/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  29. data/coverage/assets/0.13.2/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  30. data/coverage/assets/0.13.2/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  31. data/coverage/assets/0.13.2/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  32. data/coverage/assets/0.13.2/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  33. data/coverage/assets/0.13.2/images/ui-icons_222222_256x240.png +0 -0
  34. data/coverage/assets/0.13.2/images/ui-icons_2e83ff_256x240.png +0 -0
  35. data/coverage/assets/0.13.2/images/ui-icons_454545_256x240.png +0 -0
  36. data/coverage/assets/0.13.2/images/ui-icons_888888_256x240.png +0 -0
  37. data/coverage/assets/0.13.2/images/ui-icons_cd0a0a_256x240.png +0 -0
  38. data/coverage/assets/0.13.2/loading.gif +0 -0
  39. data/coverage/assets/0.13.2/magnify.png +0 -0
  40. data/coverage/index.html +4779 -0
  41. data/examples/usage_example.rb +215 -0
  42. data/lib/compress/bsc/compressor.rb +81 -0
  43. data/lib/compress/bsc/decompressor.rb +159 -0
  44. data/lib/compress/bsc/error.rb +18 -0
  45. data/lib/compress/bsc/library.rb +100 -0
  46. data/lib/compress/bsc/version.rb +5 -0
  47. data/lib/compress/bsc.rb +26 -0
  48. data/lib/compress-bsc.rb +5 -0
  49. data/spec/compressor_spec.rb +124 -0
  50. data/spec/decompressor_spec.rb +135 -0
  51. data/spec/error_spec.rb +63 -0
  52. data/spec/examples.txt +60 -0
  53. data/spec/ffi_bsc_spec.rb +101 -0
  54. data/spec/library_spec.rb +97 -0
  55. data/spec/spec_helper.rb +53 -0
  56. data.tar.gz.sig +0 -0
  57. metadata +232 -0
  58. metadata.gz.sig +0 -0
data/bin/rbsc ADDED
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/compress/bsc'
4
+ require 'optparse'
5
+
6
+ class BSCCli
7
+ def initialize
8
+ @options = {
9
+ action: nil,
10
+ input: nil,
11
+ output: nil,
12
+ block_sorter: Compress::BSC::Library::LIBBSC_DEFAULT_BLOCKSORTER,
13
+ coder: Compress::BSC::Library::LIBBSC_DEFAULT_CODER,
14
+ lzp_hash_size: 0,
15
+ lzp_min_len: 0,
16
+ features: Compress::BSC::Library::LIBBSC_DEFAULT_FEATURES,
17
+ verbose: false
18
+ }
19
+ end
20
+
21
+ def run(args)
22
+ parse_options(args)
23
+ validate_options
24
+
25
+ @bsc = Compress::BSC.new
26
+
27
+ case @options[:action]
28
+ when :compress
29
+ compress_file
30
+ when :decompress
31
+ decompress_file
32
+ when :info
33
+ show_info
34
+ else
35
+ puts "Error: No action specified"
36
+ exit 1
37
+ end
38
+ rescue Compress::BSC::Error => e
39
+ puts "BSC Error: #{e.error_name} (#{e.code})"
40
+ puts e.message
41
+ exit 1
42
+ rescue => e
43
+ puts "Error: #{e.message}"
44
+ exit 1
45
+ end
46
+
47
+ private
48
+
49
+ def parse_options(args)
50
+ OptionParser.new do |opts|
51
+ opts.banner = "Usage: #{$0} [options]"
52
+ opts.separator ""
53
+ opts.separator "Actions:"
54
+
55
+ opts.on("-c", "--compress", "Compress input file") do
56
+ @options[:action] = :compress
57
+ end
58
+
59
+ opts.on("-d", "--decompress", "Decompress input file") do
60
+ @options[:action] = :decompress
61
+ end
62
+
63
+ opts.on("-i", "--info", "Show information about compressed file") do
64
+ @options[:action] = :info
65
+ end
66
+
67
+ opts.separator ""
68
+ opts.separator "Files:"
69
+
70
+ opts.on("-f", "--input FILE", "Input file") do |file|
71
+ @options[:input] = file
72
+ end
73
+
74
+ opts.on("-o", "--output FILE", "Output file") do |file|
75
+ @options[:output] = file
76
+ end
77
+
78
+ opts.separator ""
79
+ opts.separator "Compression options:"
80
+
81
+ opts.on("-b", "--block-sorter SORTER", Integer, "Block sorter (0=BWT, 3-8=ST)") do |sorter|
82
+ @options[:block_sorter] = sorter
83
+ end
84
+
85
+ opts.on("--coder CODER", Integer, "Coder (1=Static, 2=Adaptive, 3=Fast)") do |coder|
86
+ @options[:coder] = coder
87
+ end
88
+
89
+ opts.on("--lzp-hash SIZE", Integer, "LZP hash size (10-28, 0=disable)") do |size|
90
+ @options[:lzp_hash_size] = size
91
+ end
92
+
93
+ opts.on("--lzp-min LEN", Integer, "LZP minimum length (4-255, 0=disable)") do |len|
94
+ @options[:lzp_min_len] = len
95
+ end
96
+
97
+ opts.on("--[no-]fast", "Enable fast mode") do |fast|
98
+ if fast
99
+ @options[:features] |= Compress::BSC::Library::LIBBSC_FEATURE_FASTMODE
100
+ else
101
+ @options[:features] &= ~Compress::BSC::Library::LIBBSC_FEATURE_FASTMODE
102
+ end
103
+ end
104
+
105
+ opts.on("--[no-]threads", "Enable multi-threading") do |threads|
106
+ if threads
107
+ @options[:features] |= Compress::BSC::Library::LIBBSC_FEATURE_MULTITHREADING
108
+ else
109
+ @options[:features] &= ~Compress::BSC::Library::LIBBSC_FEATURE_MULTITHREADING
110
+ end
111
+ end
112
+
113
+ opts.separator ""
114
+ opts.separator "Other options:"
115
+
116
+ opts.on("-v", "--verbose", "Verbose output") do
117
+ @options[:verbose] = true
118
+ end
119
+
120
+ opts.on("-h", "--help", "Show this help") do
121
+ puts opts
122
+ exit
123
+ end
124
+
125
+ opts.separator ""
126
+ opts.separator "Examples:"
127
+ opts.separator " #{$0} -c -f input.txt -o output.bsc"
128
+ opts.separator " #{$0} -d -f output.bsc -o restored.txt"
129
+ opts.separator " #{$0} -i -f output.bsc"
130
+ opts.separator " #{$0} -c -f large.txt -o large.bsc --lzp-hash 16 --lzp-min 64"
131
+ end.parse!(args)
132
+ end
133
+
134
+ def validate_options
135
+ unless @options[:action]
136
+ puts "Error: Must specify an action (-c, -d, or -i)"
137
+ exit 1
138
+ end
139
+
140
+ unless @options[:input]
141
+ puts "Error: Must specify input file (-f)"
142
+ exit 1
143
+ end
144
+
145
+ unless File.exist?(@options[:input])
146
+ puts "Error: Input file '#{@options[:input]}' does not exist"
147
+ exit 1
148
+ end
149
+
150
+ if @options[:action] != :info && !@options[:output]
151
+ puts "Error: Must specify output file (-o) for compress/decompress actions"
152
+ exit 1
153
+ end
154
+ end
155
+
156
+ def compress_file
157
+ puts "Compressing #{@options[:input]}..." if @options[:verbose]
158
+
159
+ compressor = Compress::BSC::Compressor.new(
160
+ lzp_hash_size: @options[:lzp_hash_size],
161
+ lzp_min_len: @options[:lzp_min_len],
162
+ block_sorter: @options[:block_sorter],
163
+ coder: @options[:coder],
164
+ features: @options[:features]
165
+ )
166
+
167
+ start_time = Time.now
168
+ input_size = File.size(@options[:input])
169
+
170
+ compressed_size = compressor.compress_file(@options[:input], @options[:output])
171
+
172
+ end_time = Time.now
173
+ duration = end_time - start_time
174
+
175
+ ratio = input_size.to_f / compressed_size
176
+ speed = input_size / duration / 1024 / 1024 # MB/s
177
+
178
+ puts "Compression completed:"
179
+ puts " Input size: #{format_bytes(input_size)}"
180
+ puts " Output size: #{format_bytes(compressed_size)}"
181
+ puts " Ratio: #{ratio.round(2)}:1 (#{((1 - compressed_size.to_f / input_size) * 100).round(1)}% savings)"
182
+ puts " Time: #{duration.round(2)}s"
183
+ puts " Speed: #{speed.round(2)} MB/s"
184
+
185
+ if @options[:verbose]
186
+ puts " Block sorter: #{block_sorter_name(@options[:block_sorter])}"
187
+ puts " Coder: #{coder_name(@options[:coder])}"
188
+ puts " LZP: #{@options[:lzp_hash_size] > 0 ? "enabled (#{@options[:lzp_hash_size]}/#{@options[:lzp_min_len]})" : "disabled"}"
189
+ puts " Features: #{feature_names(@options[:features]).join(', ')}"
190
+ end
191
+ end
192
+
193
+ def decompress_file
194
+ puts "Decompressing #{@options[:input]}..." if @options[:verbose]
195
+
196
+ # Get block info first
197
+ compressed_data = File.binread(@options[:input])
198
+ info = Compress::BSC::Decompressor.block_info(compressed_data)
199
+
200
+ decompressor = Compress::BSC::Decompressor.new(features: @options[:features])
201
+
202
+ start_time = Time.now
203
+ input_size = File.size(@options[:input])
204
+
205
+ decompressed_size = decompressor.decompress_file(@options[:input], @options[:output])
206
+
207
+ end_time = Time.now
208
+ duration = end_time - start_time
209
+
210
+ speed = decompressed_size / duration / 1024 / 1024 # MB/s
211
+
212
+ puts "Decompression completed:"
213
+ puts " Input size: #{format_bytes(input_size)}"
214
+ puts " Output size: #{format_bytes(decompressed_size)}"
215
+ puts " Time: #{duration.round(2)}s"
216
+ puts " Speed: #{speed.round(2)} MB/s"
217
+
218
+ if @options[:verbose]
219
+ puts " Block size: #{format_bytes(info[:block_size])}"
220
+ puts " Data size: #{format_bytes(info[:data_size])}"
221
+ end
222
+ end
223
+
224
+ def show_info
225
+ puts "Analyzing #{@options[:input]}..."
226
+
227
+ compressed_data = File.binread(@options[:input])
228
+ info = Compress::BSC::Decompressor.block_info(compressed_data)
229
+
230
+ file_size = File.size(@options[:input])
231
+ ratio = info[:data_size].to_f / file_size
232
+
233
+ puts "File information:"
234
+ puts " File size: #{format_bytes(file_size)}"
235
+ puts " Block size: #{format_bytes(info[:block_size])}"
236
+ puts " Original size: #{format_bytes(info[:data_size])}"
237
+ puts " Compression ratio: #{ratio.round(2)}:1"
238
+ puts " Space savings: #{((1 - file_size.to_f / info[:data_size]) * 100).round(1)}%"
239
+ puts " Header overhead: #{format_bytes(info[:block_size] - info[:data_size])}"
240
+ end
241
+
242
+ def format_bytes(bytes)
243
+ units = %w[B KB MB GB TB]
244
+ size = bytes.to_f
245
+ unit_index = 0
246
+
247
+ while size >= 1024 && unit_index < units.length - 1
248
+ size /= 1024
249
+ unit_index += 1
250
+ end
251
+
252
+ if size == size.to_i
253
+ "#{size.to_i} #{units[unit_index]}"
254
+ else
255
+ "#{size.round(1)} #{units[unit_index]}"
256
+ end
257
+ end
258
+
259
+ def block_sorter_name(sorter)
260
+ case sorter
261
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_BWT
262
+ "BWT"
263
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST3
264
+ "ST3"
265
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST4
266
+ "ST4"
267
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST5
268
+ "ST5"
269
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST6
270
+ "ST6"
271
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST7
272
+ "ST7"
273
+ when Compress::BSC::Library::LIBBSC_BLOCKSORTER_ST8
274
+ "ST8"
275
+ else
276
+ "Unknown (#{sorter})"
277
+ end
278
+ end
279
+
280
+ def coder_name(coder)
281
+ case coder
282
+ when Compress::BSC::Library::LIBBSC_CODER_QLFC_STATIC
283
+ "QLFC Static"
284
+ when Compress::BSC::Library::LIBBSC_CODER_QLFC_ADAPTIVE
285
+ "QLFC Adaptive"
286
+ when Compress::BSC::Library::LIBBSC_CODER_QLFC_FAST
287
+ "QLFC Fast"
288
+ else
289
+ "Unknown (#{coder})"
290
+ end
291
+ end
292
+
293
+ def feature_names(features)
294
+ names = []
295
+ names << "Fast Mode" if (features & Compress::BSC::Library::LIBBSC_FEATURE_FASTMODE) != 0
296
+ names << "Multi-threading" if (features & Compress::BSC::Library::LIBBSC_FEATURE_MULTITHREADING) != 0
297
+ names << "Large Pages" if (features & Compress::BSC::Library::LIBBSC_FEATURE_LARGEPAGES) != 0
298
+ names << "CUDA" if (features & Compress::BSC::Library::LIBBSC_FEATURE_CUDA) != 0
299
+ names.empty? ? ["None"] : names
300
+ end
301
+ end
302
+
303
+ if __FILE__ == $0
304
+ cli = BSCCli.new
305
+ cli.run(ARGV)
306
+ end
@@ -0,0 +1,26 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIEcDCCAtigAwIBAgIBATANBgkqhkiG9w0BAQsFADA/MREwDwYDVQQDDAhkamJl
3
+ cmc5NjEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPyLGQBGRYDY29t
4
+ MB4XDTE4MDMxODE1MjIwN1oXDTI4MDMxNTE1MjIwN1owPzERMA8GA1UEAwwIZGpi
5
+ ZXJnOTYxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkWA2Nv
6
+ bTCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBALgfaroVM6CI06cxr0/h
7
+ A+j+pc8fgpRgBVmHFaFunq28GPC3IvW7Nvc3Y8SnAW7pP1EQIbhlwRIaQzJ93/yj
8
+ u95KpkP7tA9erypnV7dpzBkzNlX14ACaFD/6pHoXoe2ltBxk3CCyyzx70mTqJpph
9
+ 75IB03ni9a8yqn8pmse+s83bFJOAqddSj009sGPcQO+QOWiNxqYv1n5EHcvj2ebO
10
+ 6hN7YTmhx7aSia4qL/quc4DlIaGMWoAhvML7u1fmo53CYxkKskfN8MOecq2vfEmL
11
+ iLu+SsVVEAufMDDFMXMJlvDsviolUSGMSNRTujkyCcJoXKYYxZSNtIiyd9etI0X3
12
+ ctu0uhrFyrMZXCedutvXNjUolD5r9KGBFSWH1R9u2I3n3SAyFF2yzv/7idQHLJJq
13
+ 74BMnx0FIq6fCpu5slAipvxZ3ZkZpEXZFr3cIBtO1gFvQWW7E/Y3ijliWJS1GQFq
14
+ 058qERadHGu1yu1dojmFRo6W2KZvY9al2yIlbkpDrD5MYQIDAQABo3cwdTAJBgNV
15
+ HRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUFZsMapgzJimzsbaBG2Tm8j5e
16
+ AzgwHQYDVR0RBBYwFIESZGpiZXJnOTZAZ21haWwuY29tMB0GA1UdEgQWMBSBEmRq
17
+ YmVyZzk2QGdtYWlsLmNvbTANBgkqhkiG9w0BAQsFAAOCAYEAW2tnYixXQtKxgGXq
18
+ /3iSWG2bLwvxS4go3srO+aRXZHrFUMlJ5W0mCxl03aazxxKTsVVpZD8QZxvK91OQ
19
+ h9zr9JBYqCLcCVbr8SkmYCi/laxIZxsNE5YI8cC8vvlLI7AMgSfPSnn/Epq1GjGY
20
+ 6L1iRcEDtanGCIvjqlCXO9+BmsnCfEVehqZkQHeYczA03tpOWb6pon2wzvMKSsKH
21
+ ks0ApVdstSLz1kzzAqem/uHdG9FyXdbTAwH1G4ZPv69sQAFAOCgAqYmdnzedsQtE
22
+ 1LQfaQrx0twO+CZJPcRLEESjq8ScQxWRRkfuh2VeR7cEU7L7KqT10mtUwrvw7APf
23
+ DYoeCY9KyjIBjQXfbj2ke5u1hZj94Fsq9FfbEQg8ygCgwThnmkTrrKEiMSs3alYR
24
+ ORVCZpRuCPpmC8qmqxUnARDArzucjaclkxjLWvCVHeFa9UP7K3Nl9oTjJNv+7/jM
25
+ WZs4eecIcUc4tKdHxcAJ0MO/Dkqq7hGaiHpwKY76wQ1+8xAh
26
+ -----END CERTIFICATE-----
@@ -0,0 +1,45 @@
1
+ require_relative 'lib/compress/bsc/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = 'compress-bsc'
5
+ spec.version = Compress::BSC::VERSION
6
+ spec.author = 'Daniel Beger'
7
+ spec.email = 'djberg96@gmail.com'
8
+
9
+ spec.summary = 'Ruby FFI bindings for libbsc (Block Sorting Compression Library)'
10
+ spec.homepage = 'https://github.com/djberg96/compress-bsc'
11
+ spec.license = 'Apache-2.0'
12
+
13
+ spec.files = Dir['**/*'].reject{ |f| f.include?('git') }
14
+ spec.bindir = 'bin'
15
+ spec.executables = ['rbsc']
16
+ spec.cert_chain = ['certs/djberg96_pub.pem']
17
+
18
+ spec.required_ruby_version = '>= 2.7.0'
19
+
20
+ spec.add_dependency 'ffi', '~> 1.15'
21
+
22
+ spec.add_development_dependency 'rspec', '~> 3.12'
23
+ spec.add_development_dependency 'rake', '~> 13.0'
24
+ spec.add_development_dependency 'bundler', '~> 2.0'
25
+ spec.add_development_dependency 'simplecov', '~> 0.22'
26
+ spec.add_development_dependency 'rubocop', '~> 1.50'
27
+ spec.add_development_dependency 'yard', '~> 0.9'
28
+
29
+ spec.metadata = {
30
+ 'homepage_uri' => 'https://github.com/djberg96/compress-bsc',
31
+ 'bug_tracker_uri' => 'https://github.com/djberg96/compress-bsc/issues',
32
+ 'changelog_uri' => 'https://github.com/djberg96/compress-bsc/blob/main/CHANGES.md',
33
+ 'documentation_uri' => 'https://github.com/djberg96/compress-bsc/wiki',
34
+ 'source_code_uri' => 'https://github.com/djberg96/compress-bsc',
35
+ 'wiki_uri' => 'https://github.com/djberg96/compress-bsc/wiki',
36
+ 'rubygems_mfa_required' => 'true',
37
+ 'github_repo' => 'https://github.com/djberg96/compress-bsc',
38
+ 'funding_uri' => 'https://github.com/sponsors/djberg96'
39
+ }
40
+
41
+ spec.description = <<-EOF
42
+ A Ruby interface to the libbsc high-performance block-sorting compression library
43
+ from Ilya Grebnov using FFI.
44
+ EOF
45
+ end