omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +243 -368
  3. data/README.adoc +101 -5
  4. data/docs/guides/archive-formats/index.adoc +31 -1
  5. data/docs/guides/archive-formats/ole-format.adoc +316 -0
  6. data/docs/guides/archive-formats/rpm-format.adoc +249 -0
  7. data/docs/index.adoc +12 -2
  8. data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
  9. data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
  10. data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
  11. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
  12. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
  13. data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
  14. data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
  15. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
  16. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
  17. data/lib/omnizip/algorithms/lzma.rb +20 -5
  18. data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
  19. data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
  20. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
  21. data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
  22. data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
  23. data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
  24. data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
  25. data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
  26. data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
  27. data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
  28. data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
  29. data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
  30. data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
  31. data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
  32. data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
  33. data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
  34. data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
  35. data/lib/omnizip/buffer/memory_extractor.rb +3 -3
  36. data/lib/omnizip/buffer.rb +2 -2
  37. data/lib/omnizip/filters/delta.rb +2 -1
  38. data/lib/omnizip/filters/registry.rb +6 -6
  39. data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
  40. data/lib/omnizip/formats/lzip.rb +2 -1
  41. data/lib/omnizip/formats/lzma_alone.rb +2 -1
  42. data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
  43. data/lib/omnizip/formats/ole/constants.rb +61 -0
  44. data/lib/omnizip/formats/ole/dirent.rb +380 -0
  45. data/lib/omnizip/formats/ole/header.rb +198 -0
  46. data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
  47. data/lib/omnizip/formats/ole/storage.rb +305 -0
  48. data/lib/omnizip/formats/ole/types/variant.rb +328 -0
  49. data/lib/omnizip/formats/ole.rb +145 -0
  50. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
  51. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
  52. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
  53. data/lib/omnizip/formats/rar3/reader.rb +6 -2
  54. data/lib/omnizip/formats/rar5/reader.rb +4 -1
  55. data/lib/omnizip/formats/rpm/constants.rb +58 -0
  56. data/lib/omnizip/formats/rpm/entry.rb +102 -0
  57. data/lib/omnizip/formats/rpm/header.rb +113 -0
  58. data/lib/omnizip/formats/rpm/lead.rb +122 -0
  59. data/lib/omnizip/formats/rpm/tag.rb +230 -0
  60. data/lib/omnizip/formats/rpm.rb +434 -0
  61. data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
  62. data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
  63. data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
  64. data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
  65. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
  66. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
  67. data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
  68. data/lib/omnizip/formats/seven_zip.rb +10 -0
  69. data/lib/omnizip/formats/xar/entry.rb +18 -5
  70. data/lib/omnizip/formats/xar/header.rb +34 -6
  71. data/lib/omnizip/formats/xar/reader.rb +43 -10
  72. data/lib/omnizip/formats/xar/toc.rb +34 -21
  73. data/lib/omnizip/formats/xar/writer.rb +15 -5
  74. data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
  75. data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
  76. data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
  77. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
  78. data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
  79. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
  80. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
  81. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
  82. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
  83. data/lib/omnizip/pipe/stream_compressor.rb +1 -1
  84. data/lib/omnizip/version.rb +1 -1
  85. data/readme-docs/compression-algorithms.adoc +6 -2
  86. metadata +30 -2
@@ -193,7 +193,12 @@ module Omnizip
193
193
  next if [".", ".."].include?(entry)
194
194
 
195
195
  child_path = File.join(path, entry)
196
- child_archive_path = archive_path ? File.join(archive_path, entry) : entry
196
+ child_archive_path = if archive_path
197
+ File.join(archive_path,
198
+ entry)
199
+ else
200
+ entry
201
+ end
197
202
  add_tree(child_path, child_archive_path)
198
203
  end
199
204
  else
@@ -223,7 +228,8 @@ module Omnizip
223
228
 
224
229
  # Calculate and write TOC checksum
225
230
  file.pos
226
- toc_checksum_data = compute_checksum(compressed_toc, @options[:toc_checksum])
231
+ toc_checksum_data = compute_checksum(compressed_toc,
232
+ @options[:toc_checksum])
227
233
  file.write(toc_checksum_data)
228
234
  toc_checksum_size = toc_checksum_data.bytesize
229
235
 
@@ -269,7 +275,8 @@ module Omnizip
269
275
  return if data.nil? || data.empty?
270
276
 
271
277
  # Calculate extracted checksum
272
- entry.extracted_checksum = compute_checksum_hex(data, @options[:file_checksum])
278
+ entry.extracted_checksum = compute_checksum_hex(data,
279
+ @options[:file_checksum])
273
280
  entry.extracted_checksum_style = @options[:file_checksum]
274
281
 
275
282
  # Compress data
@@ -279,11 +286,14 @@ module Omnizip
279
286
  entry.data_size = data.bytesize
280
287
 
281
288
  # Calculate archived checksum
282
- entry.archived_checksum = compute_checksum_hex(compressed, @options[:file_checksum])
289
+ entry.archived_checksum = compute_checksum_hex(compressed,
290
+ @options[:file_checksum])
283
291
  entry.archived_checksum_style = @options[:file_checksum]
284
292
 
285
293
  # Add to heap
286
- entry.data_offset = @heap_data.bytesize
294
+ # Data offset must account for TOC checksum at start of heap
295
+ checksum_size = CHECKSUM_SIZES[@options[:toc_checksum]] || 0
296
+ entry.data_offset = checksum_size + @heap_data.bytesize
287
297
  @heap_data << compressed
288
298
  end
289
299
 
@@ -355,10 +355,10 @@ module Omnizip
355
355
  # XZ Utils: lzma_delta_props_decode sets opt->dist = props[0] + 1
356
356
  # So if props[0] = 0, distance = 1; if props[0] = 255, distance = 256
357
357
  distance = if properties&.bytesize&.positive?
358
- (properties.getbyte(0) || 0) + 1
359
- else
360
- 1
361
- end
358
+ (properties.getbyte(0) || 0) + 1
359
+ else
360
+ 1
361
+ end
362
362
 
363
363
  Omnizip::Filters::Delta.new(distance).decode(data, 0)
364
364
  end
@@ -383,19 +383,26 @@ module Omnizip
383
383
  # Use the appropriate BCJ filter based on architecture
384
384
  case architecture
385
385
  when :x86
386
- Omnizip::Filters::BCJ.new(architecture: :x86).decode(data, start_offset)
386
+ Omnizip::Filters::BCJ.new(architecture: :x86).decode(data,
387
+ start_offset)
387
388
  when :powerpc
388
- Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data, start_offset)
389
+ Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data,
390
+ start_offset)
389
391
  when :ia64
390
- Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data, start_offset)
392
+ Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data,
393
+ start_offset)
391
394
  when :arm
392
- Omnizip::Filters::BCJ.new(architecture: :arm).decode(data, start_offset)
395
+ Omnizip::Filters::BCJ.new(architecture: :arm).decode(data,
396
+ start_offset)
393
397
  when :armthumb
394
- Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data, start_offset)
398
+ Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data,
399
+ start_offset)
395
400
  when :sparc
396
- Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data, start_offset)
401
+ Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data,
402
+ start_offset)
397
403
  when :arm64
398
- Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data, start_offset)
404
+ Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data,
405
+ start_offset)
399
406
  else
400
407
  raise Omnizip::FormatError,
401
408
  "Unsupported BCJ architecture: #{architecture}"
@@ -429,7 +436,9 @@ module Omnizip
429
436
  if ENV["DEBUG_ARM64_BCJ"]
430
437
  puts "DEBUG ARM64 BCJ: start_offset=0x#{start_offset.to_s(16).upcase}"
431
438
  puts "DEBUG ARM64 BCJ: input (first 32 bytes):"
432
- puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
439
+ puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row|
440
+ row.join(" ")
441
+ }.join("\n")
433
442
  end
434
443
 
435
444
  # XZ Utils ARM64 BCJ filter implementation
@@ -480,7 +489,10 @@ module Omnizip
480
489
  # DEBUG: Show output data
481
490
  if ENV["DEBUG_ARM64_BCJ"]
482
491
  puts "DEBUG ARM64 BCJ: output (first 32 bytes):"
483
- puts result[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row| row.join(" ") }.join("\n")
492
+ puts result[0,
493
+ 32].unpack1("H*").scan(/../).each_slice(16).map { |row|
494
+ row.join(" ")
495
+ }.join("\n")
484
496
  end
485
497
 
486
498
  result
@@ -546,19 +558,19 @@ module Omnizip
546
558
 
547
559
  properties = lzma2_filter[:properties]
548
560
  dict_size = if properties&.bytesize&.positive?
549
- prop = properties.getbyte(0)
550
- if prop.even?
551
- 1 << ((prop / 2) + 12)
552
- else
553
- 3 * (1 << (((prop - 1) / 2) + 11))
554
- end
555
- else
556
- 8 * 1024 * 1024 # 8MB default
557
- end
561
+ prop = properties.getbyte(0)
562
+ if prop.even?
563
+ 1 << ((prop / 2) + 12)
564
+ else
565
+ 3 * (1 << (((prop - 1) / 2) + 11))
566
+ end
567
+ else
568
+ 8 * 1024 * 1024 # 8MB default
569
+ end
558
570
 
559
571
  # Create LZMA2 decoder with raw_mode for XZ format
560
572
  decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
561
- raw_mode: true)
573
+ raw_mode: true)
562
574
 
563
575
  # Set dict_size directly since we skipped property byte reading
564
576
  decoder.instance_variable_set(:@dict_size, dict_size)
@@ -601,19 +613,19 @@ module Omnizip
601
613
  # If prop is even: dict_size = 2^((prop/2) + 12)
602
614
  # If prop is odd: dict_size = 3 * 2^((prop-1)/2 + 11)
603
615
  dict_size = if properties&.bytesize&.positive?
604
- prop = properties.getbyte(0)
605
- if prop.even?
606
- 1 << ((prop / 2) + 12)
607
- else
608
- 3 * (1 << (((prop - 1) / 2) + 11))
609
- end
610
- else
611
- 8 * 1024 * 1024 # 8MB default
612
- end
616
+ prop = properties.getbyte(0)
617
+ if prop.even?
618
+ 1 << ((prop / 2) + 12)
619
+ else
620
+ 3 * (1 << (((prop - 1) / 2) + 11))
621
+ end
622
+ else
623
+ 8 * 1024 * 1024 # 8MB default
624
+ end
613
625
 
614
626
  # Create LZMA2 decoder with raw_mode for XZ format
615
627
  decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
616
- raw_mode: true)
628
+ raw_mode: true)
617
629
 
618
630
  # Set dict_size directly since we skipped property byte reading
619
631
  decoder.instance_variable_set(:@dict_size, dict_size)
@@ -16,7 +16,8 @@ module Omnizip
16
16
 
17
17
  attr_reader :uncompressed_size, :compressed_size
18
18
 
19
- def initialize(check_type: CHECK_CRC64, dict_size: 8 * 1024 * 1024, include_block_sizes: false)
19
+ def initialize(check_type: CHECK_CRC64, dict_size: 8 * 1024 * 1024,
20
+ include_block_sizes: false)
20
21
  @check_type = check_type
21
22
  @dict_size = dict_size
22
23
  @uncompressed_size = 0
@@ -139,7 +139,9 @@ module Omnizip
139
139
  # "Index Padding MUST contain only null bytes" (XZ spec Section 4.1)
140
140
  unless padding.bytes.all?(&:zero?)
141
141
  raise FormatError,
142
- "Index padding contains non-null bytes: #{padding.bytes.map { |b| '0x%02x' % b }.join(', ')}"
142
+ "Index padding contains non-null bytes: #{padding.bytes.map do |b|
143
+ '0x%02x' % b
144
+ end.join(', ')}"
143
145
  end
144
146
 
145
147
  # Add padding to index data for CRC calculation
@@ -85,7 +85,8 @@ module Omnizip
85
85
 
86
86
  # Validate check type (only 0, 1, 4, 10 are valid)
87
87
  unless [0, 1, 4, 10].include?(check_type)
88
- raise FormatError, "Unsupported check type: #{check_type} (not supported)"
88
+ raise FormatError,
89
+ "Unsupported check type: #{check_type} (not supported)"
89
90
  end
90
91
 
91
92
  # Verify CRC32 (bytes 8-11)
@@ -117,9 +117,10 @@ module Omnizip
117
117
  eocd_data = buffer[i..]
118
118
  comment_length = eocd_data[20, 2].unpack1("v")
119
119
 
120
- # Verify this is the actual EOCD by checking if comment length matches
121
- if i + 22 + comment_length == buffer.size
122
- return from_binary(eocd_data)
120
+ # Verify this is the actual EOCD by checking if comment length is reasonable
121
+ # Some ZIP tools add trailing data, so we check if comment fits within remaining buffer
122
+ if i + 22 + comment_length <= buffer.size
123
+ return from_binary(eocd_data[0, 22 + comment_length])
123
124
  end
124
125
  end
125
126
  end
@@ -74,7 +74,10 @@ module Omnizip
74
74
  # @return [String, Integer] Decompressed data or bytes written
75
75
  def decode_stream(output = nil, preserve_dict: false)
76
76
  @output_buffer = []
77
- @dictionary = Array.new(@dict_size, 0) unless preserve_dict && @dictionary
77
+ unless preserve_dict && @dictionary
78
+ @dictionary = Array.new(@dict_size,
79
+ 0)
80
+ end
78
81
  @dict_pos = 0
79
82
  @dict_full = false
80
83
 
@@ -309,7 +312,8 @@ module Omnizip
309
312
 
310
313
  if is_rep.zero?
311
314
  # Simple match
312
- len = @length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
315
+ len = @length_coder.decode(@range_decoder,
316
+ pos_state) + MATCH_LEN_MIN
313
317
  @state.update_match
314
318
 
315
319
  # Decode distance
@@ -358,14 +362,16 @@ module Omnizip
358
362
  return [1, @reps[0]]
359
363
  end
360
364
 
361
- len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
365
+ len = @rep_length_coder.decode(@range_decoder,
366
+ pos_state) + MATCH_LEN_MIN
362
367
  @state.update_rep
363
368
  return [len, @reps[0]]
364
369
  end
365
370
 
366
371
  if @range_decoder.decode_bit(@is_rep1_models[@state.value]).zero?
367
372
  # Rep1
368
- len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
373
+ len = @rep_length_coder.decode(@range_decoder,
374
+ pos_state) + MATCH_LEN_MIN
369
375
  distance = @reps[1]
370
376
  @reps[1] = @reps[0]
371
377
  @reps[0] = distance
@@ -375,7 +381,8 @@ module Omnizip
375
381
 
376
382
  if @range_decoder.decode_bit(@is_rep2_models[@state.value]).zero?
377
383
  # Rep2
378
- len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
384
+ len = @rep_length_coder.decode(@range_decoder,
385
+ pos_state) + MATCH_LEN_MIN
379
386
  distance = @reps[2]
380
387
  @reps[2] = @reps[1]
381
388
  @reps[1] = @reps[0]
@@ -385,7 +392,8 @@ module Omnizip
385
392
  end
386
393
 
387
394
  # Rep3
388
- len = @rep_length_coder.decode(@range_decoder, pos_state) + MATCH_LEN_MIN
395
+ len = @rep_length_coder.decode(@range_decoder,
396
+ pos_state) + MATCH_LEN_MIN
389
397
  distance = @reps[3]
390
398
  @reps[3] = @reps[2]
391
399
  @reps[2] = @reps[1]
@@ -151,7 +151,8 @@ module Omnizip
151
151
  raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
152
152
  raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
153
153
  raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
154
- raise ArgumentError, "level must be 0-9" unless @level.between?(0, 9)
154
+ raise ArgumentError, "level must be 0-9" unless @level.between?(0,
155
+ 9)
155
156
  return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
156
157
 
157
158
  raise ArgumentError, "Invalid dictionary size"
@@ -83,7 +83,8 @@ module Omnizip
83
83
 
84
84
  @dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
85
85
  @state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
86
- @models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc, lp, pb)
86
+ @models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
87
+ lp, pb)
87
88
  @match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
88
89
  @optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
89
90
 
@@ -209,7 +210,9 @@ module Omnizip
209
210
 
210
211
  # Initialize hash table
211
212
  match_len_max = 2
212
- end_pos = [@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0].max
213
+ end_pos = [
214
+ @dictionary.buffer.bytesize + data.bytesize - match_len_max, 0
215
+ ].max
213
216
  @match_finder.skip(end_pos)
214
217
 
215
218
  # Position in match finder's buffer for encoding
@@ -245,7 +248,8 @@ module Omnizip
245
248
  pos += length
246
249
  else
247
250
  actual_distance = distance - REPS
248
- encode_match(actual_distance, length, encoder, pos, match_pos, data)
251
+ encode_match(actual_distance, length, encoder, pos, match_pos,
252
+ data)
249
253
  pos += length
250
254
  end
251
255
  end
@@ -270,7 +274,8 @@ module Omnizip
270
274
  encoder.encode_symbols(temp_buffer, out_pos, 10000)
271
275
 
272
276
  if out_pos.value.positive?
273
- output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
277
+ output.write(StringCompat.byteslice(temp_buffer, 0,
278
+ out_pos.value))
274
279
  end
275
280
 
276
281
  output.size - size_before
@@ -309,7 +314,8 @@ module Omnizip
309
314
  if match_byte.nil?
310
315
  encode_normal_literal(literal_offset, symbol, encoder)
311
316
  else
312
- encode_matched_literal(literal_offset, match_byte, symbol, encoder)
317
+ encode_matched_literal(literal_offset, match_byte, symbol,
318
+ encoder)
313
319
  end
314
320
  else
315
321
  encode_normal_literal(literal_offset, symbol, encoder)
@@ -319,7 +325,8 @@ module Omnizip
319
325
  end
320
326
 
321
327
  # Encode normal match
322
- def encode_match(distance, length, encoder, pos, match_pos, _input_data)
328
+ def encode_match(distance, length, encoder, pos, match_pos,
329
+ _input_data)
323
330
  pos_state = pos & ((1 << @pb) - 1)
324
331
 
325
332
  prob_is_match = @models.is_match[@state.value][pos_state]
@@ -408,7 +415,8 @@ module Omnizip
408
415
  end
409
416
  end
410
417
 
411
- def encode_matched_literal(literal_offset, match_byte, symbol, encoder)
418
+ def encode_matched_literal(literal_offset, match_byte, symbol,
419
+ encoder)
412
420
  offset = 0x100
413
421
  symbol += 0x100
414
422
 
@@ -418,7 +426,9 @@ module Omnizip
418
426
  subcoder_index = offset + match_bit + (symbol >> 8)
419
427
  bit = (symbol >> 7) & 1
420
428
 
421
- encoder.queue_bit(@models.literal[literal_offset + subcoder_index], bit)
429
+ encoder.queue_bit(
430
+ @models.literal[literal_offset + subcoder_index], bit
431
+ )
422
432
 
423
433
  symbol <<= 1
424
434
  offset &= ~(match_byte ^ symbol)
@@ -430,15 +440,18 @@ module Omnizip
430
440
 
431
441
  if len < 8
432
442
  encoder.queue_bit(@models.match_len_encoder.choice, 0)
433
- encode_bittree(@models.match_len_encoder.low[pos_state], 3, len, encoder)
443
+ encode_bittree(@models.match_len_encoder.low[pos_state], 3, len,
444
+ encoder)
434
445
  elsif len < 16
435
446
  encoder.queue_bit(@models.match_len_encoder.choice, 1)
436
447
  encoder.queue_bit(@models.match_len_encoder.choice2, 0)
437
- encode_bittree(@models.match_len_encoder.mid[pos_state], 3, len - 8, encoder)
448
+ encode_bittree(@models.match_len_encoder.mid[pos_state], 3,
449
+ len - 8, encoder)
438
450
  else
439
451
  encoder.queue_bit(@models.match_len_encoder.choice, 1)
440
452
  encoder.queue_bit(@models.match_len_encoder.choice2, 1)
441
- encode_bittree(@models.match_len_encoder.high, 8, len - 16, encoder)
453
+ encode_bittree(@models.match_len_encoder.high, 8, len - 16,
454
+ encoder)
442
455
  end
443
456
  end
444
457
 
@@ -454,12 +467,14 @@ module Omnizip
454
467
  dist_reduced = distance - base
455
468
 
456
469
  if dist_slot < 14
457
- encode_bittree_reverse(@models.dist_special, dist_reduced, footer_bits, base - dist_slot - 1, encoder)
470
+ encode_bittree_reverse(@models.dist_special, dist_reduced,
471
+ footer_bits, base - dist_slot - 1, encoder)
458
472
  else
459
473
  direct_bits = footer_bits - 4
460
474
  encoder.queue_direct_bits(dist_reduced >> 4, direct_bits)
461
475
  align_mask = (1 << 4) - 1
462
- encode_bittree_reverse(@models.dist_align, dist_reduced & align_mask, 4, 0, encoder)
476
+ encode_bittree_reverse(@models.dist_align,
477
+ dist_reduced & align_mask, 4, 0, encoder)
463
478
  end
464
479
  end
465
480
  end
@@ -97,7 +97,8 @@ module Omnizip
97
97
  # Shared state across all chunks
98
98
  @dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
99
99
  @state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
100
- @models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc, lp, pb)
100
+ @models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
101
+ lp, pb)
101
102
  @match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
102
103
  @optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
103
104
 
@@ -234,7 +235,9 @@ module Omnizip
234
235
  # We skip to position (start_pos + data.bytesize - MATCH_LEN_MAX),
235
236
  # but ensure we don't go negative for small inputs
236
237
  match_len_max = 2 # Minimum match length in LZMA2
237
- end_pos = [@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0].max
238
+ end_pos = [
239
+ @dictionary.buffer.bytesize + data.bytesize - match_len_max, 0
240
+ ].max
238
241
  @match_finder.skip(end_pos)
239
242
 
240
243
  # Position in match finder's buffer for encoding
@@ -331,7 +334,8 @@ module Omnizip
331
334
  # Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
332
335
  # Ruby's [] operator has a bug with null bytes that can return extra bytes
333
336
  # See: https://bugs.ruby-lang.org/issues/15985
334
- output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
337
+ output.write(StringCompat.byteslice(temp_buffer, 0,
338
+ out_pos.value))
335
339
  end
336
340
 
337
341
  # Return the number of bytes written
@@ -358,7 +362,8 @@ module Omnizip
358
362
  # Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
359
363
  # Ruby's [] operator has a bug with null bytes that can return extra bytes
360
364
  # See: https://bugs.ruby-lang.org/issues/15985
361
- output.write(StringCompat.byteslice(temp_buffer, 0, out_pos.value))
365
+ output.write(StringCompat.byteslice(temp_buffer, 0,
366
+ out_pos.value))
362
367
  end
363
368
 
364
369
  # Return the number of bytes written
@@ -410,7 +415,8 @@ module Omnizip
410
415
  end
411
416
 
412
417
  # Encode normal match
413
- def encode_match(distance, length, encoder, pos, match_pos, _input_data)
418
+ def encode_match(distance, length, encoder, pos, match_pos,
419
+ _input_data)
414
420
  pos_state = pos & ((1 << @pb) - 1)
415
421
 
416
422
  # Encode is_match bit (1 for match) - uses OLD state value
@@ -554,7 +560,8 @@ module Omnizip
554
560
  # @param match_byte [Integer] The match byte to compare against
555
561
  # @param symbol [Integer] The literal byte to encode (0-255)
556
562
  # @param encoder [XZBufferedRangeEncoder] The range encoder
557
- def encode_matched_literal(literal_offset, match_byte, symbol, encoder)
563
+ def encode_matched_literal(literal_offset, match_byte, symbol,
564
+ encoder)
558
565
  offset = 0x100
559
566
  symbol += 0x100 # Start symbol at 256 (XZ Utils algorithm)
560
567
 
@@ -60,7 +60,7 @@ module Omnizip
60
60
  case @format
61
61
  when :zip
62
62
  compress_zip
63
- when :seven_zip, :'7z'
63
+ when :seven_zip, :"7z"
64
64
  compress_7z
65
65
  else
66
66
  raise ArgumentError, "Unsupported format: #{@format}"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Omnizip
4
- VERSION = "0.3.2"
4
+ VERSION = "0.3.4"
5
5
  end
@@ -290,7 +290,11 @@ deflate64.compress(input, output)
290
290
 
291
291
  Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-based algorithm. It's designed to provide a good balance between compression ratio and speed.
292
292
 
293
- **Note:** Current implementation uses the zstd-ruby gem. A pure Ruby implementation is planned for full portability.
293
+ **Implementation:** Pure Ruby implementation (RFC 8878 compliant). Supports:
294
+ - Raw blocks (uncompressed)
295
+ - RLE blocks (run-length encoding for repetitive data)
296
+ - Frame/block decoding
297
+ - Huffman/FSE compression infrastructure (encoder implemented, decoder in progress)
294
298
 
295
299
  === Characteristics
296
300
 
@@ -310,7 +314,7 @@ Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-b
310
314
  **Not Ideal For:**
311
315
 
312
316
  * Maximum compression needs (use LZMA instead)
313
- * Environments requiring pure Ruby (until pure Ruby implementation is complete)
317
+ * General-purpose compression (Huffman/FSE decoder not complete)
314
318
 
315
319
  === Compression Levels
316
320
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omnizip
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-02-19 00:00:00.000000000 Z
11
+ date: 2026-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -145,8 +145,10 @@ files:
145
145
  - docs/guides/advanced-features/streaming.adoc
146
146
  - docs/guides/archive-formats/gzip-format.adoc
147
147
  - docs/guides/archive-formats/index.adoc
148
+ - docs/guides/archive-formats/ole-format.adoc
148
149
  - docs/guides/archive-formats/rar-format.adoc
149
150
  - docs/guides/archive-formats/rar5.adoc
151
+ - docs/guides/archive-formats/rpm-format.adoc
150
152
  - docs/guides/archive-formats/seven-zip-format.adoc
151
153
  - docs/guides/archive-formats/tar-format.adoc
152
154
  - docs/guides/archive-formats/xz-format.adoc
@@ -265,6 +267,16 @@ files:
265
267
  - lib/omnizip/algorithms/zstandard/constants.rb
266
268
  - lib/omnizip/algorithms/zstandard/decoder.rb
267
269
  - lib/omnizip/algorithms/zstandard/encoder.rb
270
+ - lib/omnizip/algorithms/zstandard/frame/block.rb
271
+ - lib/omnizip/algorithms/zstandard/frame/header.rb
272
+ - lib/omnizip/algorithms/zstandard/fse/bitstream.rb
273
+ - lib/omnizip/algorithms/zstandard/fse/encoder.rb
274
+ - lib/omnizip/algorithms/zstandard/fse/table.rb
275
+ - lib/omnizip/algorithms/zstandard/huffman.rb
276
+ - lib/omnizip/algorithms/zstandard/huffman_encoder.rb
277
+ - lib/omnizip/algorithms/zstandard/literals.rb
278
+ - lib/omnizip/algorithms/zstandard/literals_encoder.rb
279
+ - lib/omnizip/algorithms/zstandard/sequences.rb
268
280
  - lib/omnizip/buffer.rb
269
281
  - lib/omnizip/buffer/memory_archive.rb
270
282
  - lib/omnizip/buffer/memory_extractor.rb
@@ -344,6 +356,7 @@ files:
344
356
  - lib/omnizip/formats/.keep
345
357
  - lib/omnizip/formats/bzip2_file.rb
346
358
  - lib/omnizip/formats/cpio.rb
359
+ - lib/omnizip/formats/cpio/bounded_io.rb
347
360
  - lib/omnizip/formats/cpio/constants.rb
348
361
  - lib/omnizip/formats/cpio/entry.rb
349
362
  - lib/omnizip/formats/cpio/reader.rb
@@ -362,6 +375,14 @@ files:
362
375
  - lib/omnizip/formats/iso/writer.rb
363
376
  - lib/omnizip/formats/lzip.rb
364
377
  - lib/omnizip/formats/lzma_alone.rb
378
+ - lib/omnizip/formats/ole.rb
379
+ - lib/omnizip/formats/ole/allocation_table.rb
380
+ - lib/omnizip/formats/ole/constants.rb
381
+ - lib/omnizip/formats/ole/dirent.rb
382
+ - lib/omnizip/formats/ole/header.rb
383
+ - lib/omnizip/formats/ole/ranges_io.rb
384
+ - lib/omnizip/formats/ole/storage.rb
385
+ - lib/omnizip/formats/ole/types/variant.rb
365
386
  - lib/omnizip/formats/rar.rb
366
387
  - lib/omnizip/formats/rar/archive_repairer.rb
367
388
  - lib/omnizip/formats/rar/archive_verifier.rb
@@ -422,7 +443,14 @@ files:
422
443
  - lib/omnizip/formats/rar5/decompressor.rb
423
444
  - lib/omnizip/formats/rar5/reader.rb
424
445
  - lib/omnizip/formats/rar5/writer.rb
446
+ - lib/omnizip/formats/rpm.rb
447
+ - lib/omnizip/formats/rpm/constants.rb
448
+ - lib/omnizip/formats/rpm/entry.rb
449
+ - lib/omnizip/formats/rpm/header.rb
450
+ - lib/omnizip/formats/rpm/lead.rb
451
+ - lib/omnizip/formats/rpm/tag.rb
425
452
  - lib/omnizip/formats/seven_zip.rb
453
+ - lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb
426
454
  - lib/omnizip/formats/seven_zip/coder_chain.rb
427
455
  - lib/omnizip/formats/seven_zip/constants.rb
428
456
  - lib/omnizip/formats/seven_zip/encoded_header.rb