omnizip 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +243 -368
- data/README.adoc +101 -5
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
- data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
- data/lib/omnizip/algorithms/lzma.rb +20 -5
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
- data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
- data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
- data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
- data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
- data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
- data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
- data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
- data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
- data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
- data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
- data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
- data/lib/omnizip/buffer/memory_extractor.rb +3 -3
- data/lib/omnizip/buffer.rb +2 -2
- data/lib/omnizip/filters/delta.rb +2 -1
- data/lib/omnizip/filters/registry.rb +6 -6
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/lzip.rb +2 -1
- data/lib/omnizip/formats/lzma_alone.rb +2 -1
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
- data/lib/omnizip/formats/rar3/reader.rb +6 -2
- data/lib/omnizip/formats/rar5/reader.rb +4 -1
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
- data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
- data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
- data/lib/omnizip/formats/seven_zip.rb +10 -0
- data/lib/omnizip/formats/xar/entry.rb +18 -5
- data/lib/omnizip/formats/xar/header.rb +34 -6
- data/lib/omnizip/formats/xar/reader.rb +43 -10
- data/lib/omnizip/formats/xar/toc.rb +34 -21
- data/lib/omnizip/formats/xar/writer.rb +15 -5
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
- data/lib/omnizip/pipe/stream_compressor.rb +1 -1
- data/lib/omnizip/version.rb +1 -1
- data/readme-docs/compression-algorithms.adoc +6 -2
- metadata +30 -2
|
@@ -193,7 +193,12 @@ module Omnizip
|
|
|
193
193
|
next if [".", ".."].include?(entry)
|
|
194
194
|
|
|
195
195
|
child_path = File.join(path, entry)
|
|
196
|
-
child_archive_path =
|
|
196
|
+
child_archive_path = if archive_path
|
|
197
|
+
File.join(archive_path,
|
|
198
|
+
entry)
|
|
199
|
+
else
|
|
200
|
+
entry
|
|
201
|
+
end
|
|
197
202
|
add_tree(child_path, child_archive_path)
|
|
198
203
|
end
|
|
199
204
|
else
|
|
@@ -223,7 +228,8 @@ module Omnizip
|
|
|
223
228
|
|
|
224
229
|
# Calculate and write TOC checksum
|
|
225
230
|
file.pos
|
|
226
|
-
toc_checksum_data = compute_checksum(compressed_toc,
|
|
231
|
+
toc_checksum_data = compute_checksum(compressed_toc,
|
|
232
|
+
@options[:toc_checksum])
|
|
227
233
|
file.write(toc_checksum_data)
|
|
228
234
|
toc_checksum_size = toc_checksum_data.bytesize
|
|
229
235
|
|
|
@@ -269,7 +275,8 @@ module Omnizip
|
|
|
269
275
|
return if data.nil? || data.empty?
|
|
270
276
|
|
|
271
277
|
# Calculate extracted checksum
|
|
272
|
-
entry.extracted_checksum = compute_checksum_hex(data,
|
|
278
|
+
entry.extracted_checksum = compute_checksum_hex(data,
|
|
279
|
+
@options[:file_checksum])
|
|
273
280
|
entry.extracted_checksum_style = @options[:file_checksum]
|
|
274
281
|
|
|
275
282
|
# Compress data
|
|
@@ -279,11 +286,14 @@ module Omnizip
|
|
|
279
286
|
entry.data_size = data.bytesize
|
|
280
287
|
|
|
281
288
|
# Calculate archived checksum
|
|
282
|
-
entry.archived_checksum = compute_checksum_hex(compressed,
|
|
289
|
+
entry.archived_checksum = compute_checksum_hex(compressed,
|
|
290
|
+
@options[:file_checksum])
|
|
283
291
|
entry.archived_checksum_style = @options[:file_checksum]
|
|
284
292
|
|
|
285
293
|
# Add to heap
|
|
286
|
-
|
|
294
|
+
# Data offset must account for TOC checksum at start of heap
|
|
295
|
+
checksum_size = CHECKSUM_SIZES[@options[:toc_checksum]] || 0
|
|
296
|
+
entry.data_offset = checksum_size + @heap_data.bytesize
|
|
287
297
|
@heap_data << compressed
|
|
288
298
|
end
|
|
289
299
|
|
|
@@ -355,10 +355,10 @@ module Omnizip
|
|
|
355
355
|
# XZ Utils: lzma_delta_props_decode sets opt->dist = props[0] + 1
|
|
356
356
|
# So if props[0] = 0, distance = 1; if props[0] = 255, distance = 256
|
|
357
357
|
distance = if properties&.bytesize&.positive?
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
358
|
+
(properties.getbyte(0) || 0) + 1
|
|
359
|
+
else
|
|
360
|
+
1
|
|
361
|
+
end
|
|
362
362
|
|
|
363
363
|
Omnizip::Filters::Delta.new(distance).decode(data, 0)
|
|
364
364
|
end
|
|
@@ -383,19 +383,26 @@ module Omnizip
|
|
|
383
383
|
# Use the appropriate BCJ filter based on architecture
|
|
384
384
|
case architecture
|
|
385
385
|
when :x86
|
|
386
|
-
Omnizip::Filters::BCJ.new(architecture: :x86).decode(data,
|
|
386
|
+
Omnizip::Filters::BCJ.new(architecture: :x86).decode(data,
|
|
387
|
+
start_offset)
|
|
387
388
|
when :powerpc
|
|
388
|
-
Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data,
|
|
389
|
+
Omnizip::Filters::BCJ.new(architecture: :powerpc).decode(data,
|
|
390
|
+
start_offset)
|
|
389
391
|
when :ia64
|
|
390
|
-
Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data,
|
|
392
|
+
Omnizip::Filters::BCJ.new(architecture: :ia64).decode(data,
|
|
393
|
+
start_offset)
|
|
391
394
|
when :arm
|
|
392
|
-
Omnizip::Filters::BCJ.new(architecture: :arm).decode(data,
|
|
395
|
+
Omnizip::Filters::BCJ.new(architecture: :arm).decode(data,
|
|
396
|
+
start_offset)
|
|
393
397
|
when :armthumb
|
|
394
|
-
Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data,
|
|
398
|
+
Omnizip::Filters::BCJ.new(architecture: :armthumb).decode(data,
|
|
399
|
+
start_offset)
|
|
395
400
|
when :sparc
|
|
396
|
-
Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data,
|
|
401
|
+
Omnizip::Filters::BCJ.new(architecture: :sparc).decode(data,
|
|
402
|
+
start_offset)
|
|
397
403
|
when :arm64
|
|
398
|
-
Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data,
|
|
404
|
+
Omnizip::Filters::BCJ.new(architecture: :arm64).decode(data,
|
|
405
|
+
start_offset)
|
|
399
406
|
else
|
|
400
407
|
raise Omnizip::FormatError,
|
|
401
408
|
"Unsupported BCJ architecture: #{architecture}"
|
|
@@ -429,7 +436,9 @@ module Omnizip
|
|
|
429
436
|
if ENV["DEBUG_ARM64_BCJ"]
|
|
430
437
|
puts "DEBUG ARM64 BCJ: start_offset=0x#{start_offset.to_s(16).upcase}"
|
|
431
438
|
puts "DEBUG ARM64 BCJ: input (first 32 bytes):"
|
|
432
|
-
puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row|
|
|
439
|
+
puts data[0, 32].unpack1("H*").scan(/../).each_slice(16).map { |row|
|
|
440
|
+
row.join(" ")
|
|
441
|
+
}.join("\n")
|
|
433
442
|
end
|
|
434
443
|
|
|
435
444
|
# XZ Utils ARM64 BCJ filter implementation
|
|
@@ -480,7 +489,10 @@ module Omnizip
|
|
|
480
489
|
# DEBUG: Show output data
|
|
481
490
|
if ENV["DEBUG_ARM64_BCJ"]
|
|
482
491
|
puts "DEBUG ARM64 BCJ: output (first 32 bytes):"
|
|
483
|
-
puts result[0,
|
|
492
|
+
puts result[0,
|
|
493
|
+
32].unpack1("H*").scan(/../).each_slice(16).map { |row|
|
|
494
|
+
row.join(" ")
|
|
495
|
+
}.join("\n")
|
|
484
496
|
end
|
|
485
497
|
|
|
486
498
|
result
|
|
@@ -546,19 +558,19 @@ module Omnizip
|
|
|
546
558
|
|
|
547
559
|
properties = lzma2_filter[:properties]
|
|
548
560
|
dict_size = if properties&.bytesize&.positive?
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
561
|
+
prop = properties.getbyte(0)
|
|
562
|
+
if prop.even?
|
|
563
|
+
1 << ((prop / 2) + 12)
|
|
564
|
+
else
|
|
565
|
+
3 * (1 << (((prop - 1) / 2) + 11))
|
|
566
|
+
end
|
|
567
|
+
else
|
|
568
|
+
8 * 1024 * 1024 # 8MB default
|
|
569
|
+
end
|
|
558
570
|
|
|
559
571
|
# Create LZMA2 decoder with raw_mode for XZ format
|
|
560
572
|
decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
|
|
561
|
-
|
|
573
|
+
raw_mode: true)
|
|
562
574
|
|
|
563
575
|
# Set dict_size directly since we skipped property byte reading
|
|
564
576
|
decoder.instance_variable_set(:@dict_size, dict_size)
|
|
@@ -601,19 +613,19 @@ module Omnizip
|
|
|
601
613
|
# If prop is even: dict_size = 2^((prop/2) + 12)
|
|
602
614
|
# If prop is odd: dict_size = 3 * 2^((prop-1)/2 + 11)
|
|
603
615
|
dict_size = if properties&.bytesize&.positive?
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
616
|
+
prop = properties.getbyte(0)
|
|
617
|
+
if prop.even?
|
|
618
|
+
1 << ((prop / 2) + 12)
|
|
619
|
+
else
|
|
620
|
+
3 * (1 << (((prop - 1) / 2) + 11))
|
|
621
|
+
end
|
|
622
|
+
else
|
|
623
|
+
8 * 1024 * 1024 # 8MB default
|
|
624
|
+
end
|
|
613
625
|
|
|
614
626
|
# Create LZMA2 decoder with raw_mode for XZ format
|
|
615
627
|
decoder = Omnizip::Implementations::XZUtils::LZMA2::Decoder.new(input_buffer,
|
|
616
|
-
|
|
628
|
+
raw_mode: true)
|
|
617
629
|
|
|
618
630
|
# Set dict_size directly since we skipped property byte reading
|
|
619
631
|
decoder.instance_variable_set(:@dict_size, dict_size)
|
|
@@ -16,7 +16,8 @@ module Omnizip
|
|
|
16
16
|
|
|
17
17
|
attr_reader :uncompressed_size, :compressed_size
|
|
18
18
|
|
|
19
|
-
def initialize(check_type: CHECK_CRC64, dict_size: 8 * 1024 * 1024,
|
|
19
|
+
def initialize(check_type: CHECK_CRC64, dict_size: 8 * 1024 * 1024,
|
|
20
|
+
include_block_sizes: false)
|
|
20
21
|
@check_type = check_type
|
|
21
22
|
@dict_size = dict_size
|
|
22
23
|
@uncompressed_size = 0
|
|
@@ -139,7 +139,9 @@ module Omnizip
|
|
|
139
139
|
# "Index Padding MUST contain only null bytes" (XZ spec Section 4.1)
|
|
140
140
|
unless padding.bytes.all?(&:zero?)
|
|
141
141
|
raise FormatError,
|
|
142
|
-
"Index padding contains non-null bytes: #{padding.bytes.map
|
|
142
|
+
"Index padding contains non-null bytes: #{padding.bytes.map do |b|
|
|
143
|
+
'0x%02x' % b
|
|
144
|
+
end.join(', ')}"
|
|
143
145
|
end
|
|
144
146
|
|
|
145
147
|
# Add padding to index data for CRC calculation
|
|
@@ -85,7 +85,8 @@ module Omnizip
|
|
|
85
85
|
|
|
86
86
|
# Validate check type (only 0, 1, 4, 10 are valid)
|
|
87
87
|
unless [0, 1, 4, 10].include?(check_type)
|
|
88
|
-
raise FormatError,
|
|
88
|
+
raise FormatError,
|
|
89
|
+
"Unsupported check type: #{check_type} (not supported)"
|
|
89
90
|
end
|
|
90
91
|
|
|
91
92
|
# Verify CRC32 (bytes 8-11)
|
|
@@ -117,9 +117,10 @@ module Omnizip
|
|
|
117
117
|
eocd_data = buffer[i..]
|
|
118
118
|
comment_length = eocd_data[20, 2].unpack1("v")
|
|
119
119
|
|
|
120
|
-
# Verify this is the actual EOCD by checking if comment length
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
# Verify this is the actual EOCD by checking if comment length is reasonable
|
|
121
|
+
# Some ZIP tools add trailing data, so we check if comment fits within remaining buffer
|
|
122
|
+
if i + 22 + comment_length <= buffer.size
|
|
123
|
+
return from_binary(eocd_data[0, 22 + comment_length])
|
|
123
124
|
end
|
|
124
125
|
end
|
|
125
126
|
end
|
|
@@ -74,7 +74,10 @@ module Omnizip
|
|
|
74
74
|
# @return [String, Integer] Decompressed data or bytes written
|
|
75
75
|
def decode_stream(output = nil, preserve_dict: false)
|
|
76
76
|
@output_buffer = []
|
|
77
|
-
|
|
77
|
+
unless preserve_dict && @dictionary
|
|
78
|
+
@dictionary = Array.new(@dict_size,
|
|
79
|
+
0)
|
|
80
|
+
end
|
|
78
81
|
@dict_pos = 0
|
|
79
82
|
@dict_full = false
|
|
80
83
|
|
|
@@ -309,7 +312,8 @@ module Omnizip
|
|
|
309
312
|
|
|
310
313
|
if is_rep.zero?
|
|
311
314
|
# Simple match
|
|
312
|
-
len = @length_coder.decode(@range_decoder,
|
|
315
|
+
len = @length_coder.decode(@range_decoder,
|
|
316
|
+
pos_state) + MATCH_LEN_MIN
|
|
313
317
|
@state.update_match
|
|
314
318
|
|
|
315
319
|
# Decode distance
|
|
@@ -358,14 +362,16 @@ module Omnizip
|
|
|
358
362
|
return [1, @reps[0]]
|
|
359
363
|
end
|
|
360
364
|
|
|
361
|
-
len = @rep_length_coder.decode(@range_decoder,
|
|
365
|
+
len = @rep_length_coder.decode(@range_decoder,
|
|
366
|
+
pos_state) + MATCH_LEN_MIN
|
|
362
367
|
@state.update_rep
|
|
363
368
|
return [len, @reps[0]]
|
|
364
369
|
end
|
|
365
370
|
|
|
366
371
|
if @range_decoder.decode_bit(@is_rep1_models[@state.value]).zero?
|
|
367
372
|
# Rep1
|
|
368
|
-
len = @rep_length_coder.decode(@range_decoder,
|
|
373
|
+
len = @rep_length_coder.decode(@range_decoder,
|
|
374
|
+
pos_state) + MATCH_LEN_MIN
|
|
369
375
|
distance = @reps[1]
|
|
370
376
|
@reps[1] = @reps[0]
|
|
371
377
|
@reps[0] = distance
|
|
@@ -375,7 +381,8 @@ module Omnizip
|
|
|
375
381
|
|
|
376
382
|
if @range_decoder.decode_bit(@is_rep2_models[@state.value]).zero?
|
|
377
383
|
# Rep2
|
|
378
|
-
len = @rep_length_coder.decode(@range_decoder,
|
|
384
|
+
len = @rep_length_coder.decode(@range_decoder,
|
|
385
|
+
pos_state) + MATCH_LEN_MIN
|
|
379
386
|
distance = @reps[2]
|
|
380
387
|
@reps[2] = @reps[1]
|
|
381
388
|
@reps[1] = @reps[0]
|
|
@@ -385,7 +392,8 @@ module Omnizip
|
|
|
385
392
|
end
|
|
386
393
|
|
|
387
394
|
# Rep3
|
|
388
|
-
len = @rep_length_coder.decode(@range_decoder,
|
|
395
|
+
len = @rep_length_coder.decode(@range_decoder,
|
|
396
|
+
pos_state) + MATCH_LEN_MIN
|
|
389
397
|
distance = @reps[3]
|
|
390
398
|
@reps[3] = @reps[2]
|
|
391
399
|
@reps[2] = @reps[1]
|
|
@@ -151,7 +151,8 @@ module Omnizip
|
|
|
151
151
|
raise ArgumentError, "lc must be 0-8" unless @lc.between?(0, 8)
|
|
152
152
|
raise ArgumentError, "lp must be 0-4" unless @lp.between?(0, 4)
|
|
153
153
|
raise ArgumentError, "pb must be 0-4" unless @pb.between?(0, 4)
|
|
154
|
-
raise ArgumentError, "level must be 0-9" unless @level.between?(0,
|
|
154
|
+
raise ArgumentError, "level must be 0-9" unless @level.between?(0,
|
|
155
|
+
9)
|
|
155
156
|
return if @dict_size.between?(DICT_SIZE_MIN, DICT_SIZE_MAX)
|
|
156
157
|
|
|
157
158
|
raise ArgumentError, "Invalid dictionary size"
|
|
@@ -83,7 +83,8 @@ module Omnizip
|
|
|
83
83
|
|
|
84
84
|
@dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
|
|
85
85
|
@state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
|
|
86
|
-
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
|
|
86
|
+
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
|
|
87
|
+
lp, pb)
|
|
87
88
|
@match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
|
|
88
89
|
@optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
|
|
89
90
|
|
|
@@ -209,7 +210,9 @@ module Omnizip
|
|
|
209
210
|
|
|
210
211
|
# Initialize hash table
|
|
211
212
|
match_len_max = 2
|
|
212
|
-
end_pos = [
|
|
213
|
+
end_pos = [
|
|
214
|
+
@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0
|
|
215
|
+
].max
|
|
213
216
|
@match_finder.skip(end_pos)
|
|
214
217
|
|
|
215
218
|
# Position in match finder's buffer for encoding
|
|
@@ -245,7 +248,8 @@ module Omnizip
|
|
|
245
248
|
pos += length
|
|
246
249
|
else
|
|
247
250
|
actual_distance = distance - REPS
|
|
248
|
-
encode_match(actual_distance, length, encoder, pos, match_pos,
|
|
251
|
+
encode_match(actual_distance, length, encoder, pos, match_pos,
|
|
252
|
+
data)
|
|
249
253
|
pos += length
|
|
250
254
|
end
|
|
251
255
|
end
|
|
@@ -270,7 +274,8 @@ module Omnizip
|
|
|
270
274
|
encoder.encode_symbols(temp_buffer, out_pos, 10000)
|
|
271
275
|
|
|
272
276
|
if out_pos.value.positive?
|
|
273
|
-
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
277
|
+
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
278
|
+
out_pos.value))
|
|
274
279
|
end
|
|
275
280
|
|
|
276
281
|
output.size - size_before
|
|
@@ -309,7 +314,8 @@ module Omnizip
|
|
|
309
314
|
if match_byte.nil?
|
|
310
315
|
encode_normal_literal(literal_offset, symbol, encoder)
|
|
311
316
|
else
|
|
312
|
-
encode_matched_literal(literal_offset, match_byte, symbol,
|
|
317
|
+
encode_matched_literal(literal_offset, match_byte, symbol,
|
|
318
|
+
encoder)
|
|
313
319
|
end
|
|
314
320
|
else
|
|
315
321
|
encode_normal_literal(literal_offset, symbol, encoder)
|
|
@@ -319,7 +325,8 @@ module Omnizip
|
|
|
319
325
|
end
|
|
320
326
|
|
|
321
327
|
# Encode normal match
|
|
322
|
-
def encode_match(distance, length, encoder, pos, match_pos,
|
|
328
|
+
def encode_match(distance, length, encoder, pos, match_pos,
|
|
329
|
+
_input_data)
|
|
323
330
|
pos_state = pos & ((1 << @pb) - 1)
|
|
324
331
|
|
|
325
332
|
prob_is_match = @models.is_match[@state.value][pos_state]
|
|
@@ -408,7 +415,8 @@ module Omnizip
|
|
|
408
415
|
end
|
|
409
416
|
end
|
|
410
417
|
|
|
411
|
-
def encode_matched_literal(literal_offset, match_byte, symbol,
|
|
418
|
+
def encode_matched_literal(literal_offset, match_byte, symbol,
|
|
419
|
+
encoder)
|
|
412
420
|
offset = 0x100
|
|
413
421
|
symbol += 0x100
|
|
414
422
|
|
|
@@ -418,7 +426,9 @@ module Omnizip
|
|
|
418
426
|
subcoder_index = offset + match_bit + (symbol >> 8)
|
|
419
427
|
bit = (symbol >> 7) & 1
|
|
420
428
|
|
|
421
|
-
encoder.queue_bit(
|
|
429
|
+
encoder.queue_bit(
|
|
430
|
+
@models.literal[literal_offset + subcoder_index], bit
|
|
431
|
+
)
|
|
422
432
|
|
|
423
433
|
symbol <<= 1
|
|
424
434
|
offset &= ~(match_byte ^ symbol)
|
|
@@ -430,15 +440,18 @@ module Omnizip
|
|
|
430
440
|
|
|
431
441
|
if len < 8
|
|
432
442
|
encoder.queue_bit(@models.match_len_encoder.choice, 0)
|
|
433
|
-
encode_bittree(@models.match_len_encoder.low[pos_state], 3, len,
|
|
443
|
+
encode_bittree(@models.match_len_encoder.low[pos_state], 3, len,
|
|
444
|
+
encoder)
|
|
434
445
|
elsif len < 16
|
|
435
446
|
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
436
447
|
encoder.queue_bit(@models.match_len_encoder.choice2, 0)
|
|
437
|
-
encode_bittree(@models.match_len_encoder.mid[pos_state], 3,
|
|
448
|
+
encode_bittree(@models.match_len_encoder.mid[pos_state], 3,
|
|
449
|
+
len - 8, encoder)
|
|
438
450
|
else
|
|
439
451
|
encoder.queue_bit(@models.match_len_encoder.choice, 1)
|
|
440
452
|
encoder.queue_bit(@models.match_len_encoder.choice2, 1)
|
|
441
|
-
encode_bittree(@models.match_len_encoder.high, 8, len - 16,
|
|
453
|
+
encode_bittree(@models.match_len_encoder.high, 8, len - 16,
|
|
454
|
+
encoder)
|
|
442
455
|
end
|
|
443
456
|
end
|
|
444
457
|
|
|
@@ -454,12 +467,14 @@ module Omnizip
|
|
|
454
467
|
dist_reduced = distance - base
|
|
455
468
|
|
|
456
469
|
if dist_slot < 14
|
|
457
|
-
encode_bittree_reverse(@models.dist_special, dist_reduced,
|
|
470
|
+
encode_bittree_reverse(@models.dist_special, dist_reduced,
|
|
471
|
+
footer_bits, base - dist_slot - 1, encoder)
|
|
458
472
|
else
|
|
459
473
|
direct_bits = footer_bits - 4
|
|
460
474
|
encoder.queue_direct_bits(dist_reduced >> 4, direct_bits)
|
|
461
475
|
align_mask = (1 << 4) - 1
|
|
462
|
-
encode_bittree_reverse(@models.dist_align,
|
|
476
|
+
encode_bittree_reverse(@models.dist_align,
|
|
477
|
+
dist_reduced & align_mask, 4, 0, encoder)
|
|
463
478
|
end
|
|
464
479
|
end
|
|
465
480
|
end
|
|
@@ -97,7 +97,8 @@ module Omnizip
|
|
|
97
97
|
# Shared state across all chunks
|
|
98
98
|
@dictionary = Omnizip::Algorithms::LZMA::Dictionary.new(dict_size)
|
|
99
99
|
@state = Omnizip::Algorithms::LZMA::LZMAState.new(0)
|
|
100
|
-
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
|
|
100
|
+
@models = Omnizip::Algorithms::LZMA::XzProbabilityModels.new(lc,
|
|
101
|
+
lp, pb)
|
|
101
102
|
@match_finder = Omnizip::Algorithms::LZMA::MatchFinder.new(@dictionary)
|
|
102
103
|
@optimal = Omnizip::Algorithms::LZMA::OptimalEncoder.new(mode: :fast)
|
|
103
104
|
|
|
@@ -234,7 +235,9 @@ module Omnizip
|
|
|
234
235
|
# We skip to position (start_pos + data.bytesize - MATCH_LEN_MAX),
|
|
235
236
|
# but ensure we don't go negative for small inputs
|
|
236
237
|
match_len_max = 2 # Minimum match length in LZMA2
|
|
237
|
-
end_pos = [
|
|
238
|
+
end_pos = [
|
|
239
|
+
@dictionary.buffer.bytesize + data.bytesize - match_len_max, 0
|
|
240
|
+
].max
|
|
238
241
|
@match_finder.skip(end_pos)
|
|
239
242
|
|
|
240
243
|
# Position in match finder's buffer for encoding
|
|
@@ -331,7 +334,8 @@ module Omnizip
|
|
|
331
334
|
# Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
|
|
332
335
|
# Ruby's [] operator has a bug with null bytes that can return extra bytes
|
|
333
336
|
# See: https://bugs.ruby-lang.org/issues/15985
|
|
334
|
-
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
337
|
+
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
338
|
+
out_pos.value))
|
|
335
339
|
end
|
|
336
340
|
|
|
337
341
|
# Return the number of bytes written
|
|
@@ -358,7 +362,8 @@ module Omnizip
|
|
|
358
362
|
# Use StringCompat.byteslice for Ruby 3.0-3.1 compatibility
|
|
359
363
|
# Ruby's [] operator has a bug with null bytes that can return extra bytes
|
|
360
364
|
# See: https://bugs.ruby-lang.org/issues/15985
|
|
361
|
-
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
365
|
+
output.write(StringCompat.byteslice(temp_buffer, 0,
|
|
366
|
+
out_pos.value))
|
|
362
367
|
end
|
|
363
368
|
|
|
364
369
|
# Return the number of bytes written
|
|
@@ -410,7 +415,8 @@ module Omnizip
|
|
|
410
415
|
end
|
|
411
416
|
|
|
412
417
|
# Encode normal match
|
|
413
|
-
def encode_match(distance, length, encoder, pos, match_pos,
|
|
418
|
+
def encode_match(distance, length, encoder, pos, match_pos,
|
|
419
|
+
_input_data)
|
|
414
420
|
pos_state = pos & ((1 << @pb) - 1)
|
|
415
421
|
|
|
416
422
|
# Encode is_match bit (1 for match) - uses OLD state value
|
|
@@ -554,7 +560,8 @@ module Omnizip
|
|
|
554
560
|
# @param match_byte [Integer] The match byte to compare against
|
|
555
561
|
# @param symbol [Integer] The literal byte to encode (0-255)
|
|
556
562
|
# @param encoder [XZBufferedRangeEncoder] The range encoder
|
|
557
|
-
def encode_matched_literal(literal_offset, match_byte, symbol,
|
|
563
|
+
def encode_matched_literal(literal_offset, match_byte, symbol,
|
|
564
|
+
encoder)
|
|
558
565
|
offset = 0x100
|
|
559
566
|
symbol += 0x100 # Start symbol at 256 (XZ Utils algorithm)
|
|
560
567
|
|
data/lib/omnizip/version.rb
CHANGED
|
@@ -290,7 +290,11 @@ deflate64.compress(input, output)
|
|
|
290
290
|
|
|
291
291
|
Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-based algorithm. It's designed to provide a good balance between compression ratio and speed.
|
|
292
292
|
|
|
293
|
-
**
|
|
293
|
+
**Implementation:** Pure Ruby implementation (RFC 8878 compliant). Supports:
|
|
294
|
+
- Raw blocks (uncompressed)
|
|
295
|
+
- RLE blocks (run-length encoding for repetitive data)
|
|
296
|
+
- Frame/block decoding
|
|
297
|
+
- Huffman/FSE compression infrastructure (encoder implemented, decoder in progress)
|
|
294
298
|
|
|
295
299
|
=== Characteristics
|
|
296
300
|
|
|
@@ -310,7 +314,7 @@ Zstandard (zstd) offers fast compression with good ratios, using a modern LZ77-b
|
|
|
310
314
|
**Not Ideal For:**
|
|
311
315
|
|
|
312
316
|
* Maximum compression needs (use LZMA instead)
|
|
313
|
-
*
|
|
317
|
+
* General-purpose compression (Huffman/FSE decoder not complete)
|
|
314
318
|
|
|
315
319
|
=== Compression Levels
|
|
316
320
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: omnizip
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-02-
|
|
11
|
+
date: 2026-02-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -145,8 +145,10 @@ files:
|
|
|
145
145
|
- docs/guides/advanced-features/streaming.adoc
|
|
146
146
|
- docs/guides/archive-formats/gzip-format.adoc
|
|
147
147
|
- docs/guides/archive-formats/index.adoc
|
|
148
|
+
- docs/guides/archive-formats/ole-format.adoc
|
|
148
149
|
- docs/guides/archive-formats/rar-format.adoc
|
|
149
150
|
- docs/guides/archive-formats/rar5.adoc
|
|
151
|
+
- docs/guides/archive-formats/rpm-format.adoc
|
|
150
152
|
- docs/guides/archive-formats/seven-zip-format.adoc
|
|
151
153
|
- docs/guides/archive-formats/tar-format.adoc
|
|
152
154
|
- docs/guides/archive-formats/xz-format.adoc
|
|
@@ -265,6 +267,16 @@ files:
|
|
|
265
267
|
- lib/omnizip/algorithms/zstandard/constants.rb
|
|
266
268
|
- lib/omnizip/algorithms/zstandard/decoder.rb
|
|
267
269
|
- lib/omnizip/algorithms/zstandard/encoder.rb
|
|
270
|
+
- lib/omnizip/algorithms/zstandard/frame/block.rb
|
|
271
|
+
- lib/omnizip/algorithms/zstandard/frame/header.rb
|
|
272
|
+
- lib/omnizip/algorithms/zstandard/fse/bitstream.rb
|
|
273
|
+
- lib/omnizip/algorithms/zstandard/fse/encoder.rb
|
|
274
|
+
- lib/omnizip/algorithms/zstandard/fse/table.rb
|
|
275
|
+
- lib/omnizip/algorithms/zstandard/huffman.rb
|
|
276
|
+
- lib/omnizip/algorithms/zstandard/huffman_encoder.rb
|
|
277
|
+
- lib/omnizip/algorithms/zstandard/literals.rb
|
|
278
|
+
- lib/omnizip/algorithms/zstandard/literals_encoder.rb
|
|
279
|
+
- lib/omnizip/algorithms/zstandard/sequences.rb
|
|
268
280
|
- lib/omnizip/buffer.rb
|
|
269
281
|
- lib/omnizip/buffer/memory_archive.rb
|
|
270
282
|
- lib/omnizip/buffer/memory_extractor.rb
|
|
@@ -344,6 +356,7 @@ files:
|
|
|
344
356
|
- lib/omnizip/formats/.keep
|
|
345
357
|
- lib/omnizip/formats/bzip2_file.rb
|
|
346
358
|
- lib/omnizip/formats/cpio.rb
|
|
359
|
+
- lib/omnizip/formats/cpio/bounded_io.rb
|
|
347
360
|
- lib/omnizip/formats/cpio/constants.rb
|
|
348
361
|
- lib/omnizip/formats/cpio/entry.rb
|
|
349
362
|
- lib/omnizip/formats/cpio/reader.rb
|
|
@@ -362,6 +375,14 @@ files:
|
|
|
362
375
|
- lib/omnizip/formats/iso/writer.rb
|
|
363
376
|
- lib/omnizip/formats/lzip.rb
|
|
364
377
|
- lib/omnizip/formats/lzma_alone.rb
|
|
378
|
+
- lib/omnizip/formats/ole.rb
|
|
379
|
+
- lib/omnizip/formats/ole/allocation_table.rb
|
|
380
|
+
- lib/omnizip/formats/ole/constants.rb
|
|
381
|
+
- lib/omnizip/formats/ole/dirent.rb
|
|
382
|
+
- lib/omnizip/formats/ole/header.rb
|
|
383
|
+
- lib/omnizip/formats/ole/ranges_io.rb
|
|
384
|
+
- lib/omnizip/formats/ole/storage.rb
|
|
385
|
+
- lib/omnizip/formats/ole/types/variant.rb
|
|
365
386
|
- lib/omnizip/formats/rar.rb
|
|
366
387
|
- lib/omnizip/formats/rar/archive_repairer.rb
|
|
367
388
|
- lib/omnizip/formats/rar/archive_verifier.rb
|
|
@@ -422,7 +443,14 @@ files:
|
|
|
422
443
|
- lib/omnizip/formats/rar5/decompressor.rb
|
|
423
444
|
- lib/omnizip/formats/rar5/reader.rb
|
|
424
445
|
- lib/omnizip/formats/rar5/writer.rb
|
|
446
|
+
- lib/omnizip/formats/rpm.rb
|
|
447
|
+
- lib/omnizip/formats/rpm/constants.rb
|
|
448
|
+
- lib/omnizip/formats/rpm/entry.rb
|
|
449
|
+
- lib/omnizip/formats/rpm/header.rb
|
|
450
|
+
- lib/omnizip/formats/rpm/lead.rb
|
|
451
|
+
- lib/omnizip/formats/rpm/tag.rb
|
|
425
452
|
- lib/omnizip/formats/seven_zip.rb
|
|
453
|
+
- lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb
|
|
426
454
|
- lib/omnizip/formats/seven_zip/coder_chain.rb
|
|
427
455
|
- lib/omnizip/formats/seven_zip/constants.rb
|
|
428
456
|
- lib/omnizip/formats/seven_zip/encoded_header.rb
|