cabriolet 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +700 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +154 -14
- data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +35 -43
- data/lib/cabriolet/cab/decompressor.rb +14 -19
- data/lib/cabriolet/cab/extractor.rb +140 -31
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +34 -45
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +3 -2
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +633 -38
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +67 -17
- metadata +33 -2
|
@@ -40,13 +40,17 @@ module Cabriolet
|
|
|
40
40
|
|
|
41
41
|
# Decompress LZSS data
|
|
42
42
|
#
|
|
43
|
-
# @param bytes [Integer]
|
|
44
|
-
# until EOF)
|
|
43
|
+
# @param bytes [Integer, nil] Maximum number of output bytes to write (nil or 0 = until EOF)
|
|
45
44
|
# @return [Integer] Number of bytes decompressed
|
|
46
|
-
def decompress(
|
|
45
|
+
def decompress(bytes = nil)
|
|
47
46
|
bytes_written = 0
|
|
47
|
+
# Only enforce limit if bytes is a positive integer
|
|
48
|
+
enforce_limit = bytes&.positive?
|
|
48
49
|
|
|
49
50
|
loop do
|
|
51
|
+
# Check if we've reached the output byte limit (only when limit is enforced)
|
|
52
|
+
break if enforce_limit && bytes_written >= bytes
|
|
53
|
+
|
|
50
54
|
# Read control byte
|
|
51
55
|
control_byte = read_input_byte
|
|
52
56
|
break if control_byte.nil?
|
|
@@ -55,6 +59,9 @@ module Cabriolet
|
|
|
55
59
|
|
|
56
60
|
# Process each bit in the control byte
|
|
57
61
|
8.times do |bit_index|
|
|
62
|
+
# Check output limit before each operation (only when limit is enforced)
|
|
63
|
+
break if enforce_limit && bytes_written >= bytes
|
|
64
|
+
|
|
58
65
|
mask = 1 << bit_index
|
|
59
66
|
|
|
60
67
|
if control_byte.anybits?(mask)
|
|
@@ -81,6 +88,9 @@ module Cabriolet
|
|
|
81
88
|
|
|
82
89
|
# Copy from window
|
|
83
90
|
length.times do
|
|
91
|
+
# Check if we've reached the limit mid-match
|
|
92
|
+
break if enforce_limit && bytes_written >= bytes
|
|
93
|
+
|
|
84
94
|
byte = @window[match_pos]
|
|
85
95
|
@window[@window_pos] = byte
|
|
86
96
|
write_output_byte(byte)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
3
5
|
module Cabriolet
|
|
4
6
|
module Decompressors
|
|
5
7
|
# LZX handles LZX compressed data
|
|
@@ -100,7 +102,7 @@ module Cabriolet
|
|
|
100
102
|
# @param output_length [Integer] Expected output length for E8 processing
|
|
101
103
|
# @param is_delta [Boolean] Whether this is LZX DELTA format
|
|
102
104
|
def initialize(io_system, input, output, buffer_size, window_bits:,
|
|
103
|
-
reset_interval: 0, output_length: 0, is_delta: false)
|
|
105
|
+
reset_interval: 0, output_length: 0, is_delta: false, salvage: false, **_kwargs)
|
|
104
106
|
super(io_system, input, output, buffer_size)
|
|
105
107
|
|
|
106
108
|
# Validate window_bits
|
|
@@ -146,8 +148,9 @@ module Cabriolet
|
|
|
146
148
|
@intel_started = false
|
|
147
149
|
@e8_buf = "\0" * FRAME_SIZE
|
|
148
150
|
|
|
149
|
-
# Initialize bitstream
|
|
150
|
-
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size
|
|
151
|
+
# Initialize bitstream (LZX uses MSB-first bit ordering per libmspack lzxd.c)
|
|
152
|
+
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
|
|
153
|
+
bit_order: :msb, salvage: salvage)
|
|
151
154
|
|
|
152
155
|
# Initialize Huffman trees
|
|
153
156
|
initialize_trees
|
|
@@ -173,19 +176,21 @@ module Cabriolet
|
|
|
173
176
|
def decompress(bytes)
|
|
174
177
|
return 0 if bytes <= 0
|
|
175
178
|
|
|
179
|
+
# Read Intel filesize header if not already read (once per stream)
|
|
180
|
+
read_intel_header unless @header_read
|
|
181
|
+
|
|
176
182
|
total_written = 0
|
|
177
183
|
end_frame = ((@offset + bytes) / FRAME_SIZE) + 1
|
|
178
184
|
|
|
179
185
|
while @frame < end_frame
|
|
180
|
-
# Check reset interval
|
|
181
|
-
|
|
186
|
+
# Check reset interval - reset offset registers at frame boundaries
|
|
187
|
+
if @reset_interval.positive? && (@frame % @reset_interval).zero? && @frame.positive?
|
|
188
|
+
@r0 = @r1 = @r2 = 1
|
|
189
|
+
end
|
|
182
190
|
|
|
183
191
|
# Read DELTA chunk size if needed
|
|
184
192
|
@bitstream.read_bits(16) if @is_delta
|
|
185
193
|
|
|
186
|
-
# Read Intel filesize header if needed
|
|
187
|
-
read_intel_header unless @header_read
|
|
188
|
-
|
|
189
194
|
# Calculate frame size
|
|
190
195
|
frame_size = calculate_frame_size
|
|
191
196
|
|
|
@@ -238,6 +243,10 @@ module Cabriolet
|
|
|
238
243
|
|
|
239
244
|
# Reset LZX state (called at reset intervals)
|
|
240
245
|
#
|
|
246
|
+
# Per libmspack: Only reset state variables, NOT Huffman code lengths.
|
|
247
|
+
# Lengths persist across blocks and are updated via delta encoding.
|
|
248
|
+
# They are only zeroed at initialization (in initialize_trees).
|
|
249
|
+
#
|
|
241
250
|
# @return [void]
|
|
242
251
|
def reset_state
|
|
243
252
|
@r0 = 1
|
|
@@ -247,12 +256,17 @@ module Cabriolet
|
|
|
247
256
|
@block_remaining = 0
|
|
248
257
|
@block_type = BLOCKTYPE_INVALID
|
|
249
258
|
|
|
250
|
-
#
|
|
251
|
-
|
|
252
|
-
|
|
259
|
+
# NOTE: Do NOT reset @maintree_lengths or @length_lengths here!
|
|
260
|
+
# Per libmspack lzxd.c line 267-269, lengths are initialized to 0
|
|
261
|
+
# only once (at start) "because deltas will be applied to them".
|
|
262
|
+
# Resetting them here breaks delta encoding between blocks.
|
|
253
263
|
end
|
|
254
264
|
|
|
255
|
-
# Read Intel filesize header
|
|
265
|
+
# Read Intel filesize header (once per stream, before any frames)
|
|
266
|
+
#
|
|
267
|
+
# Format per libmspack:
|
|
268
|
+
# - 1 bit: Intel flag (if 0, filesize = 0; if 1, read 32-bit filesize)
|
|
269
|
+
# - If flag is 1: 32 bits for filesize (16 bits high, 16 bits low)
|
|
256
270
|
#
|
|
257
271
|
# @return [void]
|
|
258
272
|
def read_intel_header
|
|
@@ -304,13 +318,20 @@ module Cabriolet
|
|
|
304
318
|
|
|
305
319
|
# Read block header
|
|
306
320
|
#
|
|
321
|
+
# LZX block header format (per libmspack):
|
|
322
|
+
# - 3 bits: block_type
|
|
323
|
+
# - 24 bits: block_length (16 bits high, 8 bits low, combined as (high << 8) | low)
|
|
324
|
+
#
|
|
307
325
|
# @return [void]
|
|
308
326
|
def read_block_header
|
|
309
|
-
# Align for uncompressed blocks
|
|
327
|
+
# Align for uncompressed blocks - this ensures correct byte alignment
|
|
328
|
+
# when reading the R0, R1, R2 values from the block header
|
|
310
329
|
@bitstream.byte_align if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.allbits?(1)
|
|
311
330
|
|
|
312
|
-
# Read block type
|
|
331
|
+
# Read block type (3 bits)
|
|
313
332
|
@block_type = @bitstream.read_bits(3)
|
|
333
|
+
|
|
334
|
+
# Read block length (24 bits: 16 bits high, then 8 bits low)
|
|
314
335
|
high = @bitstream.read_bits(16)
|
|
315
336
|
low = @bitstream.read_bits(8)
|
|
316
337
|
@block_length = (high << 8) | low
|
|
@@ -324,6 +345,8 @@ module Cabriolet
|
|
|
324
345
|
when BLOCKTYPE_UNCOMPRESSED
|
|
325
346
|
read_uncompressed_block_header
|
|
326
347
|
else
|
|
348
|
+
# Per libmspack lzxd.c line 519-521, BLOCKTYPE_INVALID (0) and
|
|
349
|
+
# blocktypes 4-7 are all invalid and should raise an error
|
|
327
350
|
raise DecompressionError, "Invalid block type: #{@block_type}"
|
|
328
351
|
end
|
|
329
352
|
end
|
|
@@ -338,11 +361,11 @@ module Cabriolet
|
|
|
338
361
|
end
|
|
339
362
|
|
|
340
363
|
# Build aligned tree
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
364
|
+
# Note: Aligned tree may be incomplete (Kraft sum < 1.0), which is valid
|
|
365
|
+
# as long as the unused codes are never encountered in the bitstream
|
|
366
|
+
@aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS,
|
|
367
|
+
bit_order: :msb)
|
|
368
|
+
@aligned_tree.build_table(ALIGNED_TABLEBITS)
|
|
346
369
|
|
|
347
370
|
# Read main and length trees (same as verbatim)
|
|
348
371
|
read_main_and_length_trees
|
|
@@ -359,15 +382,14 @@ module Cabriolet
|
|
|
359
382
|
#
|
|
360
383
|
# @return [void]
|
|
361
384
|
def read_main_and_length_trees
|
|
362
|
-
# Read and build pretree
|
|
363
|
-
read_pretree
|
|
364
|
-
|
|
365
385
|
# Read main tree lengths using pretree
|
|
386
|
+
# Note: Each call to read_lengths reads its own pretree (per libmspack lzxd_read_lens)
|
|
366
387
|
read_lengths(@maintree_lengths, 0, 256)
|
|
367
388
|
read_lengths(@maintree_lengths, 256, @maintree_maxsymbols)
|
|
368
389
|
|
|
369
390
|
# Build main tree
|
|
370
|
-
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols
|
|
391
|
+
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols,
|
|
392
|
+
bit_order: :msb)
|
|
371
393
|
unless @maintree.build_table(LENGTH_TABLEBITS)
|
|
372
394
|
raise DecompressionError,
|
|
373
395
|
"Failed to build main tree"
|
|
@@ -380,7 +402,8 @@ module Cabriolet
|
|
|
380
402
|
read_lengths(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
|
|
381
403
|
|
|
382
404
|
# Build length tree (may be empty)
|
|
383
|
-
@length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS
|
|
405
|
+
@length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS,
|
|
406
|
+
bit_order: :msb)
|
|
384
407
|
if @length_tree.build_table(LENGTH_TABLEBITS)
|
|
385
408
|
@length_empty = false
|
|
386
409
|
else
|
|
@@ -401,7 +424,8 @@ module Cabriolet
|
|
|
401
424
|
@pretree_lengths[i] = @bitstream.read_bits(4)
|
|
402
425
|
end
|
|
403
426
|
|
|
404
|
-
@pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS
|
|
427
|
+
@pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS,
|
|
428
|
+
bit_order: :msb)
|
|
405
429
|
return if @pretree.build_table(PRETREE_TABLEBITS)
|
|
406
430
|
|
|
407
431
|
raise DecompressionError, "Failed to build pretree"
|
|
@@ -409,11 +433,16 @@ module Cabriolet
|
|
|
409
433
|
|
|
410
434
|
# Read code lengths using pretree
|
|
411
435
|
#
|
|
436
|
+
# Per libmspack's lzxd_read_lens, each call reads its own pretree first
|
|
437
|
+
#
|
|
412
438
|
# @param lengths [Array<Integer>] Target length array
|
|
413
439
|
# @param first [Integer] First symbol index
|
|
414
440
|
# @param last [Integer] Last symbol index (exclusive)
|
|
415
441
|
# @return [void]
|
|
416
442
|
def read_lengths(lengths, first, last)
|
|
443
|
+
# Read and build pretree (20 elements, 4 bits each)
|
|
444
|
+
read_pretree
|
|
445
|
+
|
|
417
446
|
x = first
|
|
418
447
|
|
|
419
448
|
while x < last
|
|
@@ -494,9 +523,9 @@ module Cabriolet
|
|
|
494
523
|
@window_posn += 1
|
|
495
524
|
run_length -= 1
|
|
496
525
|
else
|
|
497
|
-
# Match: decode length and offset
|
|
498
|
-
decode_match(main_element, run_length)
|
|
499
|
-
run_length
|
|
526
|
+
# Match: decode length and offset, then decrement run_length by match_length
|
|
527
|
+
match_length = decode_match(main_element, run_length)
|
|
528
|
+
run_length -= match_length
|
|
500
529
|
end
|
|
501
530
|
end
|
|
502
531
|
end
|
|
@@ -504,8 +533,8 @@ module Cabriolet
|
|
|
504
533
|
# Decode and copy a match
|
|
505
534
|
#
|
|
506
535
|
# @param main_element [Integer] Main tree symbol
|
|
507
|
-
# @param run_length [Integer] Remaining run length
|
|
508
|
-
# @return [
|
|
536
|
+
# @param run_length [Integer] Remaining run length (unused, kept for compatibility)
|
|
537
|
+
# @return [Integer] Match length (bytes consumed)
|
|
509
538
|
def decode_match(main_element, _run_length)
|
|
510
539
|
main_element -= NUM_CHARS
|
|
511
540
|
|
|
@@ -533,8 +562,10 @@ module Cabriolet
|
|
|
533
562
|
match_offset = @r0
|
|
534
563
|
when 1
|
|
535
564
|
@r1, @r0 = @r0, @r1
|
|
565
|
+
match_offset = @r0
|
|
536
566
|
when 2
|
|
537
567
|
@r2, @r0 = @r0, @r2
|
|
568
|
+
match_offset = @r0
|
|
538
569
|
else
|
|
539
570
|
# Calculate offset from position slot
|
|
540
571
|
extra = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
|
|
@@ -573,6 +604,9 @@ module Cabriolet
|
|
|
573
604
|
|
|
574
605
|
# Copy match
|
|
575
606
|
copy_match(match_offset, match_length)
|
|
607
|
+
|
|
608
|
+
# Return match length so caller can decrement run_length
|
|
609
|
+
match_length
|
|
576
610
|
end
|
|
577
611
|
|
|
578
612
|
# Decode extended match length for LZX DELTA
|
|
@@ -608,9 +642,12 @@ module Cabriolet
|
|
|
608
642
|
# @return [void]
|
|
609
643
|
def copy_match(offset, length)
|
|
610
644
|
if offset > @window_posn
|
|
611
|
-
# Match wraps around window
|
|
612
|
-
|
|
613
|
-
|
|
645
|
+
# Match wraps around window - validate it doesn't read beyond available data
|
|
646
|
+
# Per libmspack lzxd.c lines 622-628: check if match offset goes beyond
|
|
647
|
+
# what has been decompressed so far (accounting for any reference data)
|
|
648
|
+
ref_data_size = 0 # We don't support reference data yet (LZX DELTA feature)
|
|
649
|
+
if offset > @offset && (offset - @window_posn) > ref_data_size
|
|
650
|
+
raise DecompressionError, "Match offset beyond LZX stream"
|
|
614
651
|
end
|
|
615
652
|
|
|
616
653
|
# Copy from end of window
|
|
@@ -14,6 +14,13 @@ module Cabriolet
|
|
|
14
14
|
DISTANCE_MAXSYMBOLS = 32
|
|
15
15
|
DISTANCE_TABLEBITS = 6
|
|
16
16
|
|
|
17
|
+
# MSZIP signature bytes
|
|
18
|
+
SIGNATURE_BYTE_C = 0x43 # ASCII 'C'
|
|
19
|
+
SIGNATURE_BYTE_K = 0x4B # ASCII 'K'
|
|
20
|
+
|
|
21
|
+
# Maximum bytes to search for CK signature (prevents infinite loops)
|
|
22
|
+
MAX_SIGNATURE_SEARCH = 10_000
|
|
23
|
+
|
|
17
24
|
# Match lengths for literal codes 257-285
|
|
18
25
|
LIT_LENGTHS = [
|
|
19
26
|
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
|
|
@@ -50,7 +57,8 @@ module Cabriolet
|
|
|
50
57
|
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
51
58
|
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
52
59
|
# @param fix_mszip [Boolean] Enable repair mode for corrupted data
|
|
53
|
-
def initialize(io_system, input, output, buffer_size, fix_mszip: false
|
|
60
|
+
def initialize(io_system, input, output, buffer_size, fix_mszip: false,
|
|
61
|
+
salvage: false, **_kwargs)
|
|
54
62
|
super(io_system, input, output, buffer_size)
|
|
55
63
|
@fix_mszip = fix_mszip
|
|
56
64
|
|
|
@@ -58,9 +66,11 @@ module Cabriolet
|
|
|
58
66
|
@window = "\0" * FRAME_SIZE
|
|
59
67
|
@window_posn = 0
|
|
60
68
|
@bytes_output = 0
|
|
69
|
+
@window_offset = 0 # Offset into window for unconsumed data (for multi-file CFDATA blocks)
|
|
61
70
|
|
|
62
71
|
# Initialize bitstream
|
|
63
|
-
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size
|
|
72
|
+
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
|
|
73
|
+
salvage: salvage)
|
|
64
74
|
|
|
65
75
|
# Initialize Huffman trees
|
|
66
76
|
@literal_lengths = Array.new(LITERAL_MAXSYMBOLS, 0)
|
|
@@ -76,15 +86,50 @@ module Cabriolet
|
|
|
76
86
|
def decompress(bytes)
|
|
77
87
|
total_written = 0
|
|
78
88
|
|
|
89
|
+
if ENV["DEBUG_MSZIP"]
|
|
90
|
+
warn "DEBUG MSZIP.decompress(#{bytes}): ENTRY bytes_output=#{@bytes_output} window_offset=#{@window_offset} window_posn=#{@window_posn}"
|
|
91
|
+
end
|
|
92
|
+
|
|
79
93
|
while bytes.positive?
|
|
80
|
-
#
|
|
81
|
-
|
|
94
|
+
# Check if we have buffered data from previous inflate
|
|
95
|
+
if @bytes_output.positive?
|
|
96
|
+
if ENV["DEBUG_MSZIP"]
|
|
97
|
+
warn "DEBUG MSZIP: Using buffered data: bytes_output=#{@bytes_output} window_offset=#{@window_offset}"
|
|
98
|
+
end
|
|
82
99
|
|
|
83
|
-
|
|
100
|
+
# Write from buffer
|
|
101
|
+
write_amount = [bytes, @bytes_output].min
|
|
102
|
+
io_system.write(output, @window[@window_offset, write_amount])
|
|
103
|
+
total_written += write_amount
|
|
104
|
+
bytes -= write_amount
|
|
105
|
+
@bytes_output -= write_amount
|
|
106
|
+
@window_offset += write_amount
|
|
107
|
+
|
|
108
|
+
if ENV["DEBUG_MSZIP"]
|
|
109
|
+
warn "DEBUG MSZIP: After buffer write: total_written=#{total_written} bytes_remaining=#{bytes} bytes_output=#{@bytes_output}"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Continue loop to check if we need more data
|
|
113
|
+
next
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# No buffered data - need to inflate a new MSZIP frame
|
|
117
|
+
# Reset window for new frame
|
|
118
|
+
@window_offset = 0
|
|
84
119
|
@window_posn = 0
|
|
85
|
-
@bytes_output = 0
|
|
86
120
|
|
|
87
|
-
#
|
|
121
|
+
# Read 'CK' signature (marks start of MSZIP frame)
|
|
122
|
+
# Every MSZIP frame starts with a CK signature
|
|
123
|
+
if ENV["DEBUG_MSZIP"]
|
|
124
|
+
warn "DEBUG MSZIP: Reading CK signature (new MSZIP frame)"
|
|
125
|
+
end
|
|
126
|
+
read_signature
|
|
127
|
+
|
|
128
|
+
# Inflate the MSZIP frame (processes deflate blocks until last_block or window full)
|
|
129
|
+
if ENV["DEBUG_MSZIP"]
|
|
130
|
+
warn "DEBUG MSZIP: Calling inflate_block"
|
|
131
|
+
end
|
|
132
|
+
|
|
88
133
|
begin
|
|
89
134
|
inflate_block
|
|
90
135
|
rescue DecompressionError
|
|
@@ -97,11 +142,15 @@ module Cabriolet
|
|
|
97
142
|
@bytes_output = FRAME_SIZE
|
|
98
143
|
end
|
|
99
144
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
145
|
+
if ENV["DEBUG_MSZIP"]
|
|
146
|
+
warn "DEBUG MSZIP: After inflate_block: bytes_output=#{@bytes_output} window_posn=#{@window_posn}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Now we have data in the window buffer - loop back to write from it
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
if ENV["DEBUG_MSZIP"]
|
|
153
|
+
warn "DEBUG MSZIP.decompress: EXIT total_written=#{total_written}"
|
|
105
154
|
end
|
|
106
155
|
|
|
107
156
|
total_written
|
|
@@ -111,49 +160,63 @@ module Cabriolet
|
|
|
111
160
|
|
|
112
161
|
# Read and verify 'CK' signature
|
|
113
162
|
def read_signature
|
|
163
|
+
if ENV["DEBUG_MSZIP"]
|
|
164
|
+
warn "DEBUG read_signature: Before byte_align"
|
|
165
|
+
end
|
|
166
|
+
|
|
114
167
|
# Align to byte boundary
|
|
115
168
|
@bitstream.byte_align
|
|
116
169
|
|
|
117
|
-
# Read
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
max_search = 10_000 # Prevent infinite loops
|
|
170
|
+
# Read first 2 bytes
|
|
171
|
+
c = @bitstream.read_bits(8)
|
|
172
|
+
k = @bitstream.read_bits(8)
|
|
121
173
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
174
|
+
if ENV["DEBUG_MSZIP"]
|
|
175
|
+
warn "DEBUG read_signature: Read 0x#{c.to_s(16)} 0x#{k.to_s(16)} (expected 'C'=0x43 'K'=0x4B)"
|
|
176
|
+
end
|
|
125
177
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
178
|
+
# If not CK, search for it (similar to libmspack's tolerant behavior)
|
|
179
|
+
unless c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
|
|
180
|
+
# Search for CK signature in the stream (up to a reasonable limit)
|
|
181
|
+
max_search = 256
|
|
182
|
+
found = false
|
|
183
|
+
|
|
184
|
+
max_search.times do
|
|
185
|
+
# Shift: c becomes k, read new k
|
|
186
|
+
c = k
|
|
187
|
+
k = @bitstream.read_bits(8)
|
|
188
|
+
|
|
189
|
+
if c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
|
|
190
|
+
found = true
|
|
191
|
+
if ENV["DEBUG_MSZIP"]
|
|
192
|
+
warn "DEBUG read_signature: Found CK signature after searching"
|
|
193
|
+
end
|
|
194
|
+
break
|
|
195
|
+
end
|
|
130
196
|
end
|
|
131
197
|
|
|
132
|
-
|
|
133
|
-
if bytes_read > max_search
|
|
198
|
+
unless found
|
|
134
199
|
raise DecompressionError,
|
|
135
|
-
"
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
if byte == 0x43 # 'C'
|
|
139
|
-
state = 1
|
|
140
|
-
elsif state == 1 && byte == 0x4B # 'K'
|
|
141
|
-
break
|
|
142
|
-
else
|
|
143
|
-
state = 0
|
|
200
|
+
"Invalid MSZIP signature: could not find CK in stream"
|
|
144
201
|
end
|
|
145
202
|
end
|
|
146
203
|
end
|
|
147
204
|
|
|
148
205
|
# Inflate a single block
|
|
206
|
+
#
|
|
207
|
+
# Processes deflate blocks until the last_block flag is set or window is full.
|
|
208
|
+
# Always decodes complete blocks - does not stop mid-block.
|
|
149
209
|
def inflate_block
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
210
|
+
# Read first block header
|
|
211
|
+
last_block = @bitstream.read_bits(1)
|
|
212
|
+
block_type = @bitstream.read_bits(2)
|
|
153
213
|
|
|
154
|
-
|
|
155
|
-
|
|
214
|
+
if ENV["DEBUG_MSZIP"]
|
|
215
|
+
warn "DEBUG inflate_block: First block: last_block=#{last_block} block_type=#{block_type}"
|
|
216
|
+
end
|
|
156
217
|
|
|
218
|
+
loop do
|
|
219
|
+
# Process current block
|
|
157
220
|
case block_type
|
|
158
221
|
when 0
|
|
159
222
|
inflate_stored_block
|
|
@@ -167,7 +230,16 @@ module Cabriolet
|
|
|
167
230
|
raise DecompressionError, "Invalid block type: #{block_type}"
|
|
168
231
|
end
|
|
169
232
|
|
|
233
|
+
if ENV["DEBUG_MSZIP"]
|
|
234
|
+
warn "DEBUG inflate_block: After block: last_block=#{last_block} window_posn=#{@window_posn}"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Stop if this was the last block
|
|
170
238
|
break if last_block == 1
|
|
239
|
+
|
|
240
|
+
# Read next block header (only if we need to continue)
|
|
241
|
+
last_block = @bitstream.read_bits(1)
|
|
242
|
+
block_type = @bitstream.read_bits(2)
|
|
171
243
|
end
|
|
172
244
|
|
|
173
245
|
# Flush remaining window data
|
|
@@ -306,13 +378,25 @@ module Cabriolet
|
|
|
306
378
|
end
|
|
307
379
|
|
|
308
380
|
# Inflate a Huffman-compressed block
|
|
381
|
+
#
|
|
382
|
+
# Always decodes until code 256 (END OF BLOCK)
|
|
309
383
|
def inflate_huffman_block
|
|
384
|
+
symbol_count = 0
|
|
310
385
|
loop do
|
|
386
|
+
if ENV["DEBUG_MSZIP_SYMBOLS"]
|
|
387
|
+
warn "DEBUG inflate_huffman_block: window_posn=#{@window_posn} bytes_output=#{@bytes_output}"
|
|
388
|
+
end
|
|
389
|
+
|
|
311
390
|
# Decode symbol from literal tree
|
|
312
391
|
code = Huffman::Decoder.decode_symbol(
|
|
313
392
|
@bitstream, @literal_tree.table, LITERAL_TABLEBITS,
|
|
314
393
|
@literal_lengths, LITERAL_MAXSYMBOLS
|
|
315
394
|
)
|
|
395
|
+
symbol_count += 1
|
|
396
|
+
|
|
397
|
+
if ENV["DEBUG_MSZIP_SYMBOLS"] || ENV["DEBUG_MSZIP"]
|
|
398
|
+
warn "DEBUG inflate_huffman_block[#{symbol_count}]: decoded code=#{code} (#{'0x%02x' % code if code < 256})"
|
|
399
|
+
end
|
|
316
400
|
|
|
317
401
|
if code < 256
|
|
318
402
|
# Literal byte
|
|
@@ -321,6 +405,9 @@ module Cabriolet
|
|
|
321
405
|
flush_window if @window_posn == FRAME_SIZE
|
|
322
406
|
elsif code == 256
|
|
323
407
|
# End of block
|
|
408
|
+
if ENV["DEBUG_MSZIP"] || ENV["DEBUG_MSZIP_SYMBOLS"]
|
|
409
|
+
warn "DEBUG inflate_huffman_block: END OF BLOCK (window_posn=#{@window_posn})"
|
|
410
|
+
end
|
|
324
411
|
break
|
|
325
412
|
else
|
|
326
413
|
# Length/distance pair (LZ77 match)
|
|
@@ -12,7 +12,7 @@ module Cabriolet
|
|
|
12
12
|
FRAME_SIZE = 32_768
|
|
13
13
|
|
|
14
14
|
# Match constants
|
|
15
|
-
MAX_MATCH =
|
|
15
|
+
MAX_MATCH = 259
|
|
16
16
|
|
|
17
17
|
# Position slot tables (same as in qtmd.c)
|
|
18
18
|
POSITION_BASE = [
|
|
@@ -68,7 +68,8 @@ module Cabriolet
|
|
|
68
68
|
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
69
69
|
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
70
70
|
# @param window_bits [Integer] Window size parameter (10-21)
|
|
71
|
-
def initialize(io_system, input, output, buffer_size, window_bits: 10
|
|
71
|
+
def initialize(io_system, input, output, buffer_size, window_bits: 10,
|
|
72
|
+
**_kwargs)
|
|
72
73
|
super(io_system, input, output, buffer_size)
|
|
73
74
|
|
|
74
75
|
# Validate window_bits
|