cabriolet 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cabriolet/binary/bitstream.rb +56 -6
- data/lib/cabriolet/cab/decompressor.rb +6 -3
- data/lib/cabriolet/cab/extractor.rb +40 -19
- data/lib/cabriolet/checksum.rb +7 -4
- data/lib/cabriolet/compressors/lzx.rb +17 -9
- data/lib/cabriolet/compressors/mszip.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +22 -14
- data/lib/cabriolet/decompressors/lzx.rb +117 -24
- data/lib/cabriolet/decompressors/mszip.rb +21 -17
- data/lib/cabriolet/decompressors/quantum.rb +3 -4
- data/lib/cabriolet/huffman/decoder.rb +8 -2
- data/lib/cabriolet/plugin_manager.rb +5 -5
- data/lib/cabriolet/system/file_handle.rb +1 -1
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +92 -94
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 59d85958b00fa7eb684912e7ec77bfd9ce261a01035ed7ef42c2d6db5b1405a7
|
|
4
|
+
data.tar.gz: e9fa2123fe7c48778a01018c68f47cdb66f68449fe4fee48074e2b7591d5b9e1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ddc9ef226ce8359cbe65ac4e80853dbd8e57f25c6e934e48837a163f614bcdbdf94f18609c788c6396f4b87f93d4e12c1fa326c8f291bc1156ec0eaf0387d377
|
|
7
|
+
data.tar.gz: 20206a2b5011d4a869d0f4d29085b5b874fe1d34daa00ce14f842d55a71722c19329ea75b1f46adaebf88fc43805a467b089e038aa19fe5736010d1a9ca951ad
|
|
@@ -4,7 +4,7 @@ module Cabriolet
|
|
|
4
4
|
module Binary
|
|
5
5
|
# Bitstream provides bit-level I/O operations for reading compressed data
|
|
6
6
|
class Bitstream
|
|
7
|
-
attr_reader :io_system, :handle, :buffer_size, :bit_order
|
|
7
|
+
attr_reader :io_system, :handle, :buffer_size, :bit_order, :bits_left
|
|
8
8
|
|
|
9
9
|
# Initialize a new bitstream
|
|
10
10
|
#
|
|
@@ -29,6 +29,9 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
29
29
|
# For MSB mode, we need to know the bit width of the buffer
|
|
30
30
|
# Ruby integers are arbitrary precision, so we use 32 bits as standard
|
|
31
31
|
@bitbuf_width = 32
|
|
32
|
+
|
|
33
|
+
# Cache ENV lookups once at initialization
|
|
34
|
+
@debug_bitstream = ENV.fetch("DEBUG_BITSTREAM", nil)
|
|
32
35
|
end
|
|
33
36
|
|
|
34
37
|
# Read specified number of bits from the stream
|
|
@@ -83,7 +86,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
83
86
|
byte = 0 if byte.nil?
|
|
84
87
|
|
|
85
88
|
# DEBUG
|
|
86
|
-
if
|
|
89
|
+
if @debug_bitstream
|
|
87
90
|
warn "DEBUG LSB read_byte: buffer_pos=#{@buffer_pos} byte=#{byte} (#{byte.to_s(2).rjust(
|
|
88
91
|
8, '0'
|
|
89
92
|
)}) bits_left=#{@bits_left}"
|
|
@@ -101,7 +104,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
101
104
|
@bits_left -= num_bits
|
|
102
105
|
|
|
103
106
|
# DEBUG
|
|
104
|
-
warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if
|
|
107
|
+
warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
105
108
|
|
|
106
109
|
result
|
|
107
110
|
end
|
|
@@ -116,7 +119,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
116
119
|
word = read_msb_word
|
|
117
120
|
|
|
118
121
|
# DEBUG
|
|
119
|
-
warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if
|
|
122
|
+
warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
120
123
|
|
|
121
124
|
# INJECT_BITS (MSB): inject at the left side
|
|
122
125
|
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
@@ -131,7 +134,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
131
134
|
@bits_left -= num_bits
|
|
132
135
|
|
|
133
136
|
# DEBUG
|
|
134
|
-
warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if
|
|
137
|
+
warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
135
138
|
|
|
136
139
|
result
|
|
137
140
|
end
|
|
@@ -172,15 +175,62 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
172
175
|
byte
|
|
173
176
|
end
|
|
174
177
|
|
|
178
|
+
# Ensure at least num_bits are available in the bit buffer.
|
|
179
|
+
# Reads from input if needed. Used for alignment operations.
|
|
180
|
+
#
|
|
181
|
+
# @param num_bits [Integer] Minimum number of bits required
|
|
182
|
+
# @return [void]
|
|
183
|
+
def ensure_bits(num_bits)
|
|
184
|
+
if @bit_order == :msb
|
|
185
|
+
while @bits_left < num_bits
|
|
186
|
+
word = read_msb_word
|
|
187
|
+
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
188
|
+
@bits_left += 16
|
|
189
|
+
end
|
|
190
|
+
else
|
|
191
|
+
while @bits_left < num_bits
|
|
192
|
+
byte = read_byte
|
|
193
|
+
byte = 0 if byte.nil?
|
|
194
|
+
@bit_buffer |= (byte << @bits_left)
|
|
195
|
+
@bits_left += 8
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
175
200
|
# Align to the next byte boundary
|
|
176
201
|
#
|
|
177
202
|
# @return [void]
|
|
178
203
|
def byte_align
|
|
179
204
|
discard_bits = @bits_left % 8
|
|
180
|
-
@
|
|
205
|
+
if @bit_order == :msb
|
|
206
|
+
# MSB mode: valid bits are at the left (high) end, shift left to discard
|
|
207
|
+
@bit_buffer = (@bit_buffer << discard_bits) & ((1 << @bitbuf_width) - 1)
|
|
208
|
+
else
|
|
209
|
+
@bit_buffer >>= discard_bits
|
|
210
|
+
end
|
|
181
211
|
@bits_left -= discard_bits
|
|
182
212
|
end
|
|
183
213
|
|
|
214
|
+
# Flush the bit buffer entirely (discard all remaining bits).
|
|
215
|
+
# Per libmspack lzxd.c: used when transitioning to raw byte reading
|
|
216
|
+
# for uncompressed blocks. Sets bits_left=0 and bit_buffer=0.
|
|
217
|
+
#
|
|
218
|
+
# @return [void]
|
|
219
|
+
def flush_bit_buffer
|
|
220
|
+
@bit_buffer = 0
|
|
221
|
+
@bits_left = 0
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Read a raw byte directly from the input, bypassing the bit buffer.
|
|
225
|
+
# Per libmspack lzxd.c: uncompressed block headers and data are read
|
|
226
|
+
# directly from the input pointer (i_ptr), not through the bitstream.
|
|
227
|
+
# Call flush_bit_buffer first to discard any residual bits.
|
|
228
|
+
#
|
|
229
|
+
# @return [Integer] Byte value (0 on EOF)
|
|
230
|
+
def read_raw_byte
|
|
231
|
+
read_byte || 0
|
|
232
|
+
end
|
|
233
|
+
|
|
184
234
|
# Peek at bits without consuming them
|
|
185
235
|
#
|
|
186
236
|
# @param num_bits [Integer] Number of bits to peek at
|
|
@@ -157,8 +157,11 @@ module Cabriolet
|
|
|
157
157
|
offset = cab_offset + 4
|
|
158
158
|
end
|
|
159
159
|
else
|
|
160
|
-
# No cabinet found in this chunk, move to next
|
|
161
|
-
|
|
160
|
+
# No cabinet found in this chunk, move to next.
|
|
161
|
+
# Overlap by 20 bytes so MSCF signatures spanning chunk
|
|
162
|
+
# boundaries are not missed (state machine reads 20 bytes).
|
|
163
|
+
overlap = length > 20 ? 20 : 0
|
|
164
|
+
offset += [length - overlap, 1].max
|
|
162
165
|
end
|
|
163
166
|
end
|
|
164
167
|
|
|
@@ -452,7 +455,7 @@ file_length)
|
|
|
452
455
|
cablen_u32, caboff, file_length)
|
|
453
456
|
|
|
454
457
|
# Not valid, restart search after "MSCF"
|
|
455
|
-
|
|
458
|
+
state = 0
|
|
456
459
|
end
|
|
457
460
|
end
|
|
458
461
|
|
|
@@ -22,6 +22,9 @@ module Cabriolet
|
|
|
22
22
|
@current_decomp = nil
|
|
23
23
|
@current_input = nil
|
|
24
24
|
@current_offset = 0
|
|
25
|
+
|
|
26
|
+
# Cache ENV lookups once at initialization
|
|
27
|
+
@debug_block = ENV.fetch("DEBUG_BLOCK", nil)
|
|
25
28
|
end
|
|
26
29
|
|
|
27
30
|
# Extract a single file from the cabinet
|
|
@@ -185,7 +188,7 @@ module Cabriolet
|
|
|
185
188
|
# @param salvage [Boolean] Salvage mode flag
|
|
186
189
|
# @param file_offset [Integer] File offset for reset condition check
|
|
187
190
|
def setup_decompressor_for_folder(folder, salvage, file_offset)
|
|
188
|
-
if
|
|
191
|
+
if @debug_block
|
|
189
192
|
warn "DEBUG extract_file: Checking reset condition"
|
|
190
193
|
warn " @current_folder == folder: #{@current_folder == folder}"
|
|
191
194
|
warn " @current_offset (#{@current_offset}) > file_offset (#{file_offset})"
|
|
@@ -193,7 +196,7 @@ module Cabriolet
|
|
|
193
196
|
end
|
|
194
197
|
|
|
195
198
|
if @current_folder != folder || @current_offset > file_offset || !@current_decomp
|
|
196
|
-
if
|
|
199
|
+
if @debug_block
|
|
197
200
|
warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
|
|
198
201
|
end
|
|
199
202
|
|
|
@@ -211,7 +214,20 @@ module Cabriolet
|
|
|
211
214
|
# Create decompressor ONCE and reuse it
|
|
212
215
|
@current_decomp = @decompressor.create_decompressor(folder,
|
|
213
216
|
@current_input, nil)
|
|
214
|
-
|
|
217
|
+
|
|
218
|
+
# Per libmspack cabd.c: set output_length from the folder's total
|
|
219
|
+
# uncompressed size (max file.offset + file.length across all files
|
|
220
|
+
# in the folder). This allows the LZX decompressor to reduce the
|
|
221
|
+
# last frame's size so it doesn't read past the end of the stream.
|
|
222
|
+
if @current_decomp.respond_to?(:set_output_length)
|
|
223
|
+
cab = folder.data&.cabinet
|
|
224
|
+
if cab&.files
|
|
225
|
+
folder_files = cab.files.select { |f| f.folder == folder }
|
|
226
|
+
max_end = folder_files.map { |f| f.offset + f.length }.max
|
|
227
|
+
@current_decomp.set_output_length(max_end) if max_end&.positive?
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
elsif @debug_block
|
|
215
231
|
warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader)"
|
|
216
232
|
end
|
|
217
233
|
end
|
|
@@ -228,7 +244,6 @@ module Cabriolet
|
|
|
228
244
|
null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
229
245
|
|
|
230
246
|
@current_decomp.instance_variable_set(:@output, null_output)
|
|
231
|
-
@current_decomp.set_output_length(skip_bytes) if @current_decomp.respond_to?(:set_output_length)
|
|
232
247
|
|
|
233
248
|
begin
|
|
234
249
|
@current_decomp.decompress(skip_bytes)
|
|
@@ -248,8 +263,11 @@ module Cabriolet
|
|
|
248
263
|
# @param output_fh [System::FileHandle] Output file handle
|
|
249
264
|
# @param filelen [Integer] Number of bytes to write
|
|
250
265
|
def write_file_data(output_fh, filelen)
|
|
266
|
+
unless @current_decomp
|
|
267
|
+
raise DecompressionError, "Decompressor not available (state was reset)"
|
|
268
|
+
end
|
|
269
|
+
|
|
251
270
|
@current_decomp.instance_variable_set(:@output, output_fh)
|
|
252
|
-
@current_decomp.set_output_length(filelen) if @current_decomp.respond_to?(:set_output_length)
|
|
253
271
|
@current_decomp.decompress(filelen)
|
|
254
272
|
@current_offset += filelen
|
|
255
273
|
end
|
|
@@ -311,6 +329,9 @@ module Cabriolet
|
|
|
311
329
|
@buffer_pos = 0
|
|
312
330
|
@cab_handle = nil
|
|
313
331
|
|
|
332
|
+
# Cache ENV lookups once at initialization
|
|
333
|
+
@debug_block = ENV.fetch("DEBUG_BLOCK", nil)
|
|
334
|
+
|
|
314
335
|
# Open first cabinet and seek to data offset
|
|
315
336
|
open_current_cabinet
|
|
316
337
|
end
|
|
@@ -318,7 +339,7 @@ module Cabriolet
|
|
|
318
339
|
def read(bytes)
|
|
319
340
|
# Early return if we've already exhausted all blocks and buffer
|
|
320
341
|
if @current_block >= @num_blocks && @buffer_pos >= @buffer.bytesize
|
|
321
|
-
if
|
|
342
|
+
if @debug_block
|
|
322
343
|
warn "DEBUG BlockReader.read(#{bytes}): Already exhausted, returning empty"
|
|
323
344
|
end
|
|
324
345
|
return +""
|
|
@@ -326,14 +347,14 @@ module Cabriolet
|
|
|
326
347
|
|
|
327
348
|
result = +""
|
|
328
349
|
|
|
329
|
-
if
|
|
350
|
+
if @debug_block
|
|
330
351
|
warn "DEBUG BlockReader.read(#{bytes}): buffer_size=#{@buffer.bytesize} buffer_pos=#{@buffer_pos} block=#{@current_block}/#{@num_blocks}"
|
|
331
352
|
end
|
|
332
353
|
|
|
333
354
|
while result.bytesize < bytes
|
|
334
355
|
# Read more data if buffer is empty
|
|
335
356
|
if (@buffer_pos >= @buffer.bytesize) && !read_next_block
|
|
336
|
-
if
|
|
357
|
+
if @debug_block
|
|
337
358
|
warn "DEBUG BlockReader.read: EXHAUSTED at result.bytesize=#{result.bytesize} (wanted #{bytes})"
|
|
338
359
|
end
|
|
339
360
|
break
|
|
@@ -347,7 +368,7 @@ module Cabriolet
|
|
|
347
368
|
@buffer_pos += to_copy
|
|
348
369
|
end
|
|
349
370
|
|
|
350
|
-
if
|
|
371
|
+
if @debug_block
|
|
351
372
|
warn "DEBUG BlockReader.read: returning #{result.bytesize} bytes"
|
|
352
373
|
end
|
|
353
374
|
|
|
@@ -371,12 +392,12 @@ module Cabriolet
|
|
|
371
392
|
private
|
|
372
393
|
|
|
373
394
|
def read_next_block
|
|
374
|
-
if
|
|
395
|
+
if @debug_block
|
|
375
396
|
warn "DEBUG read_next_block: current_block=#{@current_block} num_blocks=#{@num_blocks}"
|
|
376
397
|
end
|
|
377
398
|
|
|
378
399
|
if @current_block >= @num_blocks
|
|
379
|
-
if
|
|
400
|
+
if @debug_block
|
|
380
401
|
warn "DEBUG read_next_block: EXHAUSTED (current_block >= num_blocks)"
|
|
381
402
|
end
|
|
382
403
|
return false
|
|
@@ -387,19 +408,19 @@ module Cabriolet
|
|
|
387
408
|
|
|
388
409
|
loop do
|
|
389
410
|
# Read CFDATA header
|
|
390
|
-
if
|
|
411
|
+
if @debug_block
|
|
391
412
|
handle_pos = @cab_handle.tell
|
|
392
413
|
warn "DEBUG read_next_block: About to read CFDATA header at position #{handle_pos}"
|
|
393
414
|
end
|
|
394
415
|
|
|
395
416
|
header_data = @cab_handle.read(Constants::CFDATA_SIZE)
|
|
396
417
|
|
|
397
|
-
if
|
|
418
|
+
if @debug_block
|
|
398
419
|
warn "DEBUG read_next_block: Read #{header_data.bytesize} bytes (expected #{Constants::CFDATA_SIZE})"
|
|
399
420
|
end
|
|
400
421
|
|
|
401
422
|
if header_data.bytesize != Constants::CFDATA_SIZE
|
|
402
|
-
if
|
|
423
|
+
if @debug_block
|
|
403
424
|
warn "DEBUG read_next_block: FAILED - header read returned #{header_data.bytesize} bytes"
|
|
404
425
|
end
|
|
405
426
|
return false
|
|
@@ -427,18 +448,18 @@ module Cabriolet
|
|
|
427
448
|
end
|
|
428
449
|
|
|
429
450
|
# Read compressed data
|
|
430
|
-
if
|
|
451
|
+
if @debug_block
|
|
431
452
|
warn "DEBUG read_next_block: About to read #{cfdata.compressed_size} bytes of compressed data"
|
|
432
453
|
end
|
|
433
454
|
|
|
434
455
|
compressed_data = @cab_handle.read(cfdata.compressed_size)
|
|
435
456
|
|
|
436
|
-
if
|
|
457
|
+
if @debug_block
|
|
437
458
|
warn "DEBUG read_next_block: Read #{compressed_data.bytesize} bytes of compressed data (expected #{cfdata.compressed_size})"
|
|
438
459
|
end
|
|
439
460
|
|
|
440
461
|
if compressed_data.bytesize != cfdata.compressed_size
|
|
441
|
-
if
|
|
462
|
+
if @debug_block
|
|
442
463
|
warn "DEBUG read_next_block: FAILED - compressed data read returned #{compressed_data.bytesize} bytes"
|
|
443
464
|
end
|
|
444
465
|
return false
|
|
@@ -482,7 +503,7 @@ module Cabriolet
|
|
|
482
503
|
end
|
|
483
504
|
|
|
484
505
|
def open_current_cabinet
|
|
485
|
-
if
|
|
506
|
+
if @debug_block
|
|
486
507
|
warn "DEBUG open_current_cabinet: filename=#{@current_data.cabinet.filename} offset=#{@current_data.offset}"
|
|
487
508
|
end
|
|
488
509
|
|
|
@@ -490,7 +511,7 @@ module Cabriolet
|
|
|
490
511
|
@cab_handle = @io_system.open(@current_data.cabinet.filename, Constants::MODE_READ)
|
|
491
512
|
@cab_handle.seek(@current_data.offset, Constants::SEEK_START)
|
|
492
513
|
|
|
493
|
-
if
|
|
514
|
+
if @debug_block
|
|
494
515
|
actual_pos = @cab_handle.tell
|
|
495
516
|
warn "DEBUG open_current_cabinet: seeked to position #{actual_pos} (expected #{@current_data.offset})"
|
|
496
517
|
end
|
data/lib/cabriolet/checksum.rb
CHANGED
|
@@ -28,14 +28,17 @@ module Cabriolet
|
|
|
28
28
|
ul = 0
|
|
29
29
|
offset = bytes.size - remainder
|
|
30
30
|
|
|
31
|
+
# Match libmspack's cabd_checksum remainder handling:
|
|
32
|
+
# The C fall-through switch processes bytes in decreasing shift
|
|
33
|
+
# order (first remaining byte gets the highest shift).
|
|
31
34
|
case remainder
|
|
32
35
|
when 3
|
|
33
|
-
ul |= bytes[offset
|
|
36
|
+
ul |= bytes[offset] << 16
|
|
34
37
|
ul |= bytes[offset + 1] << 8
|
|
35
|
-
ul |= bytes[offset]
|
|
38
|
+
ul |= bytes[offset + 2]
|
|
36
39
|
when 2
|
|
37
|
-
ul |= bytes[offset
|
|
38
|
-
ul |= bytes[offset]
|
|
40
|
+
ul |= bytes[offset] << 8
|
|
41
|
+
ul |= bytes[offset + 1]
|
|
39
42
|
when 1
|
|
40
43
|
ul |= bytes[offset]
|
|
41
44
|
end
|
|
@@ -89,7 +89,7 @@ module Cabriolet
|
|
|
89
89
|
buffer_size, bit_order: :msb)
|
|
90
90
|
|
|
91
91
|
# Initialize sliding window for LZ77
|
|
92
|
-
@window = "\0" * @window_size
|
|
92
|
+
@window = ("\0" * @window_size).b
|
|
93
93
|
@window_pos = 0
|
|
94
94
|
|
|
95
95
|
# Initialize R0, R1, R2 (LRU offset registers)
|
|
@@ -153,6 +153,11 @@ module Cabriolet
|
|
|
153
153
|
|
|
154
154
|
# Compress a single frame (32KB)
|
|
155
155
|
#
|
|
156
|
+
# Per libmspack lzxd.c: uncompressed blocks write R0/R1/R2 and data
|
|
157
|
+
# as raw bytes directly to the stream, NOT through the MSB bitstream.
|
|
158
|
+
# The bitstream is flushed (padded to 16-bit boundary) after the
|
|
159
|
+
# block header, then raw bytes follow.
|
|
160
|
+
#
|
|
156
161
|
# @param data [String] Frame data to compress
|
|
157
162
|
# @return [void]
|
|
158
163
|
def compress_frame(data)
|
|
@@ -163,12 +168,12 @@ module Cabriolet
|
|
|
163
168
|
# Write UNCOMPRESSED block header
|
|
164
169
|
write_block_header(BLOCKTYPE_UNCOMPRESSED, block_length)
|
|
165
170
|
|
|
166
|
-
# Write offset registers (R0, R1, R2)
|
|
171
|
+
# Write offset registers (R0, R1, R2) as raw bytes
|
|
167
172
|
write_offset_registers
|
|
168
173
|
|
|
169
|
-
# Write raw uncompressed data
|
|
174
|
+
# Write raw uncompressed data (bypassing MSB bitstream)
|
|
170
175
|
data.each_byte do |byte|
|
|
171
|
-
@bitstream.
|
|
176
|
+
@bitstream.write_raw_byte(byte)
|
|
172
177
|
end
|
|
173
178
|
end
|
|
174
179
|
|
|
@@ -571,14 +576,17 @@ module Cabriolet
|
|
|
571
576
|
|
|
572
577
|
# Write offset registers (R0, R1, R2) for uncompressed blocks
|
|
573
578
|
#
|
|
579
|
+
# Per libmspack lzxd.c: R0/R1/R2 are written as raw bytes directly
|
|
580
|
+
# to the stream (not through the MSB bitstream) to avoid byte-swapping.
|
|
581
|
+
#
|
|
574
582
|
# @return [void]
|
|
575
583
|
def write_offset_registers
|
|
576
|
-
# Write R0, R1, R2 as 32-bit little-endian values (12 bytes total)
|
|
584
|
+
# Write R0, R1, R2 as 32-bit little-endian values (12 raw bytes total)
|
|
577
585
|
[@r0, @r1, @r2].each do |offset|
|
|
578
|
-
@bitstream.
|
|
579
|
-
@bitstream.
|
|
580
|
-
@bitstream.
|
|
581
|
-
@bitstream.
|
|
586
|
+
@bitstream.write_raw_byte(offset & 0xFF)
|
|
587
|
+
@bitstream.write_raw_byte((offset >> 8) & 0xFF)
|
|
588
|
+
@bitstream.write_raw_byte((offset >> 16) & 0xFF)
|
|
589
|
+
@bitstream.write_raw_byte((offset >> 24) & 0xFF)
|
|
582
590
|
end
|
|
583
591
|
end
|
|
584
592
|
|
|
@@ -31,11 +31,12 @@ module Cabriolet
|
|
|
31
31
|
mode = MODE_EXPAND)
|
|
32
32
|
super(io_system, input, output, buffer_size)
|
|
33
33
|
@mode = mode
|
|
34
|
-
@window =
|
|
34
|
+
@window = (WINDOW_FILL.chr * WINDOW_SIZE).b
|
|
35
35
|
@window_pos = initialize_window_position
|
|
36
36
|
@input_buffer = ""
|
|
37
37
|
@input_pos = 0
|
|
38
38
|
@invert = mode == MODE_MSHELP ? 0xFF : 0x00
|
|
39
|
+
@output_buffer = String.new(encoding: Encoding::BINARY, capacity: 4096)
|
|
39
40
|
end
|
|
40
41
|
|
|
41
42
|
# Decompress LZSS data
|
|
@@ -69,8 +70,8 @@ module Cabriolet
|
|
|
69
70
|
literal = read_input_byte
|
|
70
71
|
break if literal.nil?
|
|
71
72
|
|
|
72
|
-
@window
|
|
73
|
-
|
|
73
|
+
@window.setbyte(@window_pos, literal)
|
|
74
|
+
buffer_output_byte(literal)
|
|
74
75
|
bytes_written += 1
|
|
75
76
|
|
|
76
77
|
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
@@ -91,9 +92,9 @@ module Cabriolet
|
|
|
91
92
|
# Check if we've reached the limit mid-match
|
|
92
93
|
break if enforce_limit && bytes_written >= bytes
|
|
93
94
|
|
|
94
|
-
byte = @window
|
|
95
|
-
@window
|
|
96
|
-
|
|
95
|
+
byte = @window.getbyte(match_pos)
|
|
96
|
+
@window.setbyte(@window_pos, byte)
|
|
97
|
+
buffer_output_byte(byte)
|
|
97
98
|
bytes_written += 1
|
|
98
99
|
|
|
99
100
|
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
@@ -103,6 +104,7 @@ module Cabriolet
|
|
|
103
104
|
end
|
|
104
105
|
end
|
|
105
106
|
|
|
107
|
+
flush_output_buffer
|
|
106
108
|
bytes_written
|
|
107
109
|
end
|
|
108
110
|
|
|
@@ -131,17 +133,23 @@ module Cabriolet
|
|
|
131
133
|
byte
|
|
132
134
|
end
|
|
133
135
|
|
|
134
|
-
#
|
|
136
|
+
# Buffer an output byte and flush when buffer is full
|
|
135
137
|
#
|
|
136
|
-
# @param byte [Integer] Byte to
|
|
138
|
+
# @param byte [Integer] Byte to buffer
|
|
137
139
|
# @return [void]
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
140
|
+
def buffer_output_byte(byte)
|
|
141
|
+
@output_buffer << byte.chr
|
|
142
|
+
flush_output_buffer if @output_buffer.bytesize >= 4096
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Flush the output buffer to the output stream
|
|
146
|
+
#
|
|
147
|
+
# @return [void]
|
|
148
|
+
def flush_output_buffer
|
|
149
|
+
return if @output_buffer.empty?
|
|
143
150
|
|
|
144
|
-
|
|
151
|
+
@io_system.write(@output, @output_buffer)
|
|
152
|
+
@output_buffer.clear
|
|
145
153
|
end
|
|
146
154
|
end
|
|
147
155
|
end
|
|
@@ -129,8 +129,8 @@ module Cabriolet
|
|
|
129
129
|
@num_offsets = POSITION_SLOTS[window_bits - 15] << 3
|
|
130
130
|
@maintree_maxsymbols = NUM_CHARS + @num_offsets
|
|
131
131
|
|
|
132
|
-
# Initialize window
|
|
133
|
-
@window = "\0" * @window_size
|
|
132
|
+
# Initialize window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
133
|
+
@window = ("\0" * @window_size).b
|
|
134
134
|
@window_posn = 0
|
|
135
135
|
@frame_posn = 0
|
|
136
136
|
@frame = 0
|
|
@@ -149,7 +149,7 @@ module Cabriolet
|
|
|
149
149
|
# Intel E8 transformation state
|
|
150
150
|
@intel_filesize = 0
|
|
151
151
|
@intel_started = false
|
|
152
|
-
@e8_buf = "\0" * FRAME_SIZE
|
|
152
|
+
@e8_buf = ("\0" * FRAME_SIZE).b
|
|
153
153
|
|
|
154
154
|
# Initialize bitstream (LZX uses MSB-first bit ordering per libmspack lzxd.c)
|
|
155
155
|
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
|
|
@@ -162,6 +162,12 @@ module Cabriolet
|
|
|
162
162
|
@offset = 0
|
|
163
163
|
@output_ptr = 0
|
|
164
164
|
@output_end = 0
|
|
165
|
+
|
|
166
|
+
# Per libmspack: pending frame data for multi-file extraction.
|
|
167
|
+
# When a decompress call ends mid-frame, the unwritten portion
|
|
168
|
+
# of the frame is stored here for the next call to output.
|
|
169
|
+
@pending_frame_data = nil
|
|
170
|
+
@pending_frame_offset = 0
|
|
165
171
|
end
|
|
166
172
|
|
|
167
173
|
# Set output length (for Intel E8 processing)
|
|
@@ -174,6 +180,11 @@ module Cabriolet
|
|
|
174
180
|
|
|
175
181
|
# Decompress LZX data
|
|
176
182
|
#
|
|
183
|
+
# Per libmspack lzxd.c: the decompressor always decodes full frames
|
|
184
|
+
# (32KB) into the window, but may output fewer bytes if the caller
|
|
185
|
+
# requests less. When multiple files share a folder, decompress is
|
|
186
|
+
# called per file, so partial-frame data must carry over between calls.
|
|
187
|
+
#
|
|
177
188
|
# @param bytes [Integer] Number of bytes to decompress
|
|
178
189
|
# @return [Integer] Number of bytes decompressed
|
|
179
190
|
def decompress(bytes)
|
|
@@ -183,7 +194,28 @@ module Cabriolet
|
|
|
183
194
|
read_intel_header unless @header_read
|
|
184
195
|
|
|
185
196
|
total_written = 0
|
|
186
|
-
|
|
197
|
+
|
|
198
|
+
# Output any pending frame data from the previous partial-frame write.
|
|
199
|
+
# This handles multi-file extraction where the previous call ended
|
|
200
|
+
# mid-frame and the next file's data starts in the same frame.
|
|
201
|
+
if @pending_frame_data
|
|
202
|
+
avail = @pending_frame_data.bytesize - @pending_frame_offset
|
|
203
|
+
write_amount = [bytes, avail].min
|
|
204
|
+
io_system.write(output, @pending_frame_data[@pending_frame_offset, write_amount])
|
|
205
|
+
total_written += write_amount
|
|
206
|
+
@offset += write_amount
|
|
207
|
+
@pending_frame_offset += write_amount
|
|
208
|
+
|
|
209
|
+
if @pending_frame_offset >= @pending_frame_data.bytesize
|
|
210
|
+
@pending_frame_data = nil
|
|
211
|
+
@pending_frame_offset = 0
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
remaining = bytes - total_written
|
|
216
|
+
return total_written if remaining <= 0
|
|
217
|
+
|
|
218
|
+
end_frame = ((@offset + remaining) / FRAME_SIZE) + 1
|
|
187
219
|
|
|
188
220
|
while @frame < end_frame
|
|
189
221
|
# Check reset interval - reset offset registers at frame boundaries
|
|
@@ -217,20 +249,47 @@ module Cabriolet
|
|
|
217
249
|
@window[@frame_posn, frame_size]
|
|
218
250
|
end
|
|
219
251
|
|
|
220
|
-
#
|
|
252
|
+
# Defensive guard: frame_data should never be nil if the >= window
|
|
253
|
+
# wrap checks below are correct. If it is, the stream is corrupt
|
|
254
|
+
# or a regression has been introduced.
|
|
255
|
+
if frame_data.nil?
|
|
256
|
+
if @salvage
|
|
257
|
+
warn "Salvage: nil frame data at frame_posn=#{@frame_posn}, frame=#{@frame}"
|
|
258
|
+
break
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
raise DecompressionError,
|
|
262
|
+
"LZX: nil frame data at position #{@frame_posn}, frame_size=#{frame_size}"
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Write frame - per libmspack: offset tracks actual output bytes,
|
|
266
|
+
# not full frame bytes. Save unwritten remainder for next call.
|
|
221
267
|
write_amount = [bytes - total_written, frame_size].min
|
|
222
268
|
io_system.write(output, frame_data[0, write_amount])
|
|
223
269
|
total_written += write_amount
|
|
224
|
-
@offset +=
|
|
270
|
+
@offset += write_amount
|
|
271
|
+
|
|
272
|
+
# Store pending data if partial frame write
|
|
273
|
+
if write_amount < frame_size
|
|
274
|
+
@pending_frame_data = frame_data
|
|
275
|
+
@pending_frame_offset = write_amount
|
|
276
|
+
end
|
|
225
277
|
|
|
226
|
-
# Advance frame
|
|
278
|
+
# Advance frame (always by full frame, matching decode position)
|
|
227
279
|
@frame += 1
|
|
228
280
|
@frame_posn += frame_size
|
|
229
|
-
@frame_posn = 0 if @frame_posn
|
|
230
|
-
@window_posn = 0 if @window_posn
|
|
231
|
-
|
|
232
|
-
# Re-align bitstream
|
|
233
|
-
|
|
281
|
+
@frame_posn = 0 if @frame_posn >= @window_size
|
|
282
|
+
@window_posn = 0 if @window_posn >= @window_size
|
|
283
|
+
|
|
284
|
+
# Re-align bitstream to 16-bit word boundary between frames.
|
|
285
|
+
# Per libmspack lzxd.c: LZX frames are padded to 16-bit word
|
|
286
|
+
# boundaries (not 8-bit byte boundaries) because the bitstream
|
|
287
|
+
# reads data in 16-bit little-endian words.
|
|
288
|
+
if @bitstream.bits_left.positive?
|
|
289
|
+
@bitstream.ensure_bits(16)
|
|
290
|
+
end
|
|
291
|
+
remove = @bitstream.bits_left & 15
|
|
292
|
+
@bitstream.skip_bits(remove) if remove.positive?
|
|
234
293
|
end
|
|
235
294
|
|
|
236
295
|
total_written
|
|
@@ -313,14 +372,27 @@ module Cabriolet
|
|
|
313
372
|
# Read new block header if needed
|
|
314
373
|
read_block_header if @block_remaining.zero?
|
|
315
374
|
|
|
316
|
-
# Decode as much as possible
|
|
375
|
+
# Decode as much as possible from the current block
|
|
317
376
|
this_run = [@block_remaining, bytes_todo].min
|
|
318
377
|
bytes_todo -= this_run
|
|
319
378
|
@block_remaining -= this_run
|
|
320
379
|
|
|
321
380
|
case @block_type
|
|
322
381
|
when BLOCKTYPE_VERBATIM, BLOCKTYPE_ALIGNED
|
|
323
|
-
decode_huffman_block(this_run)
|
|
382
|
+
remaining = decode_huffman_block(this_run)
|
|
383
|
+
|
|
384
|
+
# Per libmspack lzxd.c: if a match caused overrun (this_run
|
|
385
|
+
# went negative in the inner loop), adjust block_remaining.
|
|
386
|
+
# This happens when a match crosses a block boundary within
|
|
387
|
+
# a frame (bytes_todo limited this_run, not block_remaining).
|
|
388
|
+
if remaining.negative?
|
|
389
|
+
overrun = -remaining
|
|
390
|
+
if overrun > @block_remaining
|
|
391
|
+
raise DecompressionError,
|
|
392
|
+
"Match overrun (#{overrun}) exceeds block remaining (#{@block_remaining})"
|
|
393
|
+
end
|
|
394
|
+
@block_remaining -= overrun
|
|
395
|
+
end
|
|
324
396
|
when BLOCKTYPE_UNCOMPRESSED
|
|
325
397
|
decode_uncompressed_block(this_run)
|
|
326
398
|
else
|
|
@@ -337,9 +409,11 @@ module Cabriolet
|
|
|
337
409
|
#
|
|
338
410
|
# @return [void]
|
|
339
411
|
def read_block_header
|
|
340
|
-
#
|
|
341
|
-
#
|
|
342
|
-
|
|
412
|
+
# Per libmspack lzxd.c: when transitioning FROM an uncompressed block
|
|
413
|
+
# with ODD length, skip 1 raw padding byte to maintain 16-bit alignment.
|
|
414
|
+
if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.odd?
|
|
415
|
+
@bitstream.read_raw_byte
|
|
416
|
+
end
|
|
343
417
|
|
|
344
418
|
# Read block type (3 bits)
|
|
345
419
|
@block_type = @bitstream.read_bits(3)
|
|
@@ -549,15 +623,23 @@ module Cabriolet
|
|
|
549
623
|
|
|
550
624
|
# Read uncompressed block header
|
|
551
625
|
#
|
|
626
|
+
# Per libmspack lzxd.c: for uncompressed blocks, the bitstream is
|
|
627
|
+
# flushed (bit_buffer=0, bits_left=0) and R0/R1/R2 are read directly
|
|
628
|
+
# from the raw input stream (i_ptr), NOT through the MSB bitstream.
|
|
629
|
+
# Reading through the MSB bitstream would byte-swap each 16-bit word.
|
|
630
|
+
#
|
|
552
631
|
# @return [void]
|
|
553
632
|
def read_uncompressed_block_header
|
|
554
633
|
@intel_started = true
|
|
555
634
|
|
|
556
|
-
#
|
|
557
|
-
@bitstream.
|
|
635
|
+
# Per libmspack: if bits_left == 0, ensure we have data available
|
|
636
|
+
@bitstream.ensure_bits(16) if @bitstream.bits_left.zero?
|
|
637
|
+
|
|
638
|
+
# Flush bit buffer - discard any remaining bits (alignment padding)
|
|
639
|
+
@bitstream.flush_bit_buffer
|
|
558
640
|
|
|
559
|
-
# Read R0, R1, R2
|
|
560
|
-
bytes = Array.new(12) { @bitstream.
|
|
641
|
+
# Read R0, R1, R2 directly from raw input (bypassing bitstream)
|
|
642
|
+
bytes = Array.new(12) { @bitstream.read_raw_byte }
|
|
561
643
|
@r0 = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24)
|
|
562
644
|
@r1 = bytes[4] | (bytes[5] << 8) | (bytes[6] << 16) | (bytes[7] << 24)
|
|
563
645
|
@r2 = bytes[8] | (bytes[9] << 8) | (bytes[10] << 16) | (bytes[11] << 24)
|
|
@@ -565,8 +647,13 @@ module Cabriolet
|
|
|
565
647
|
|
|
566
648
|
# Decode Huffman-compressed block
|
|
567
649
|
#
|
|
650
|
+
# Per libmspack lzxd.c: the inner decode loop uses this_run as its
|
|
651
|
+
# counter. A match can cause this_run to go negative (overrun past
|
|
652
|
+
# the planned run length). The caller must adjust block_remaining
|
|
653
|
+
# for any overrun.
|
|
654
|
+
#
|
|
568
655
|
# @param run_length [Integer] Number of bytes to decode
|
|
569
|
-
# @return [
|
|
656
|
+
# @return [Integer] Final run_length (0 or negative if overrun)
|
|
570
657
|
def decode_huffman_block(run_length)
|
|
571
658
|
while run_length.positive?
|
|
572
659
|
# Decode main symbol
|
|
@@ -586,6 +673,8 @@ module Cabriolet
|
|
|
586
673
|
run_length -= match_length
|
|
587
674
|
end
|
|
588
675
|
end
|
|
676
|
+
|
|
677
|
+
run_length
|
|
589
678
|
end
|
|
590
679
|
|
|
591
680
|
# Decode and copy a match
|
|
@@ -599,7 +688,7 @@ module Cabriolet
|
|
|
599
688
|
# Decode match length
|
|
600
689
|
match_length = main_element & NUM_PRIMARY_LENGTHS
|
|
601
690
|
if match_length == NUM_PRIMARY_LENGTHS
|
|
602
|
-
if @length_empty
|
|
691
|
+
if @length_empty || @length_tree.nil?
|
|
603
692
|
raise DecompressionError,
|
|
604
693
|
"Length tree needed but empty"
|
|
605
694
|
end
|
|
@@ -747,11 +836,15 @@ module Cabriolet
|
|
|
747
836
|
|
|
748
837
|
# Decode uncompressed block
|
|
749
838
|
#
|
|
839
|
+
# Per libmspack lzxd.c: uncompressed block data is read directly from
|
|
840
|
+
# the raw input stream (i_ptr), NOT through the MSB bitstream. The bit
|
|
841
|
+
# buffer was already flushed when the uncompressed block header was read.
|
|
842
|
+
#
|
|
750
843
|
# @param run_length [Integer] Number of bytes to decode
|
|
751
844
|
# @return [void]
|
|
752
845
|
def decode_uncompressed_block(run_length)
|
|
753
846
|
run_length.times do
|
|
754
|
-
byte = @bitstream.
|
|
847
|
+
byte = @bitstream.read_raw_byte
|
|
755
848
|
@window.setbyte(@window_posn, byte)
|
|
756
849
|
@window_posn += 1
|
|
757
850
|
end
|
|
@@ -62,8 +62,8 @@ salvage: false, **_kwargs)
|
|
|
62
62
|
super(io_system, input, output, buffer_size)
|
|
63
63
|
@fix_mszip = fix_mszip
|
|
64
64
|
|
|
65
|
-
# Initialize sliding window
|
|
66
|
-
@window = "\0" * FRAME_SIZE
|
|
65
|
+
# Initialize sliding window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
66
|
+
@window = ("\0" * FRAME_SIZE).b
|
|
67
67
|
@window_posn = 0
|
|
68
68
|
@bytes_output = 0
|
|
69
69
|
@window_offset = 0 # Offset into window for unconsumed data (for multi-file CFDATA blocks)
|
|
@@ -77,6 +77,10 @@ salvage: false, **_kwargs)
|
|
|
77
77
|
@distance_lengths = Array.new(DISTANCE_MAXSYMBOLS, 0)
|
|
78
78
|
@literal_tree = nil
|
|
79
79
|
@distance_tree = nil
|
|
80
|
+
|
|
81
|
+
# Cache ENV lookups once at initialization
|
|
82
|
+
@debug_mszip = ENV.fetch("DEBUG_MSZIP", nil)
|
|
83
|
+
@debug_mszip_symbols = ENV.fetch("DEBUG_MSZIP_SYMBOLS", nil)
|
|
80
84
|
end
|
|
81
85
|
|
|
82
86
|
# Decompress MSZIP data
|
|
@@ -86,14 +90,14 @@ salvage: false, **_kwargs)
|
|
|
86
90
|
def decompress(bytes)
|
|
87
91
|
total_written = 0
|
|
88
92
|
|
|
89
|
-
if
|
|
93
|
+
if @debug_mszip
|
|
90
94
|
warn "DEBUG MSZIP.decompress(#{bytes}): ENTRY bytes_output=#{@bytes_output} window_offset=#{@window_offset} window_posn=#{@window_posn}"
|
|
91
95
|
end
|
|
92
96
|
|
|
93
97
|
while bytes.positive?
|
|
94
98
|
# Check if we have buffered data from previous inflate
|
|
95
99
|
if @bytes_output.positive?
|
|
96
|
-
if
|
|
100
|
+
if @debug_mszip
|
|
97
101
|
warn "DEBUG MSZIP: Using buffered data: bytes_output=#{@bytes_output} window_offset=#{@window_offset}"
|
|
98
102
|
end
|
|
99
103
|
|
|
@@ -105,7 +109,7 @@ salvage: false, **_kwargs)
|
|
|
105
109
|
@bytes_output -= write_amount
|
|
106
110
|
@window_offset += write_amount
|
|
107
111
|
|
|
108
|
-
if
|
|
112
|
+
if @debug_mszip
|
|
109
113
|
warn "DEBUG MSZIP: After buffer write: total_written=#{total_written} bytes_remaining=#{bytes} bytes_output=#{@bytes_output}"
|
|
110
114
|
end
|
|
111
115
|
|
|
@@ -120,13 +124,13 @@ salvage: false, **_kwargs)
|
|
|
120
124
|
|
|
121
125
|
# Read 'CK' signature (marks start of MSZIP frame)
|
|
122
126
|
# Every MSZIP frame starts with a CK signature
|
|
123
|
-
if
|
|
127
|
+
if @debug_mszip
|
|
124
128
|
warn "DEBUG MSZIP: Reading CK signature (new MSZIP frame)"
|
|
125
129
|
end
|
|
126
130
|
read_signature
|
|
127
131
|
|
|
128
132
|
# Inflate the MSZIP frame (processes deflate blocks until last_block or window full)
|
|
129
|
-
if
|
|
133
|
+
if @debug_mszip
|
|
130
134
|
warn "DEBUG MSZIP: Calling inflate_block"
|
|
131
135
|
end
|
|
132
136
|
|
|
@@ -142,14 +146,14 @@ salvage: false, **_kwargs)
|
|
|
142
146
|
@bytes_output = FRAME_SIZE
|
|
143
147
|
end
|
|
144
148
|
|
|
145
|
-
if
|
|
149
|
+
if @debug_mszip
|
|
146
150
|
warn "DEBUG MSZIP: After inflate_block: bytes_output=#{@bytes_output} window_posn=#{@window_posn}"
|
|
147
151
|
end
|
|
148
152
|
|
|
149
153
|
# Now we have data in the window buffer - loop back to write from it
|
|
150
154
|
end
|
|
151
155
|
|
|
152
|
-
if
|
|
156
|
+
if @debug_mszip
|
|
153
157
|
warn "DEBUG MSZIP.decompress: EXIT total_written=#{total_written}"
|
|
154
158
|
end
|
|
155
159
|
|
|
@@ -160,7 +164,7 @@ salvage: false, **_kwargs)
|
|
|
160
164
|
|
|
161
165
|
# Read and verify 'CK' signature
|
|
162
166
|
def read_signature
|
|
163
|
-
if
|
|
167
|
+
if @debug_mszip
|
|
164
168
|
warn "DEBUG read_signature: Before byte_align"
|
|
165
169
|
end
|
|
166
170
|
|
|
@@ -171,7 +175,7 @@ salvage: false, **_kwargs)
|
|
|
171
175
|
c = @bitstream.read_bits(8)
|
|
172
176
|
k = @bitstream.read_bits(8)
|
|
173
177
|
|
|
174
|
-
if
|
|
178
|
+
if @debug_mszip
|
|
175
179
|
warn "DEBUG read_signature: Read 0x#{c.to_s(16)} 0x#{k.to_s(16)} (expected 'C'=0x43 'K'=0x4B)"
|
|
176
180
|
end
|
|
177
181
|
|
|
@@ -188,7 +192,7 @@ salvage: false, **_kwargs)
|
|
|
188
192
|
|
|
189
193
|
if c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
|
|
190
194
|
found = true
|
|
191
|
-
if
|
|
195
|
+
if @debug_mszip
|
|
192
196
|
warn "DEBUG read_signature: Found CK signature after searching"
|
|
193
197
|
end
|
|
194
198
|
break
|
|
@@ -211,7 +215,7 @@ salvage: false, **_kwargs)
|
|
|
211
215
|
last_block = @bitstream.read_bits(1)
|
|
212
216
|
block_type = @bitstream.read_bits(2)
|
|
213
217
|
|
|
214
|
-
if
|
|
218
|
+
if @debug_mszip
|
|
215
219
|
warn "DEBUG inflate_block: First block: last_block=#{last_block} block_type=#{block_type}"
|
|
216
220
|
end
|
|
217
221
|
|
|
@@ -230,7 +234,7 @@ salvage: false, **_kwargs)
|
|
|
230
234
|
raise DecompressionError, "Invalid block type: #{block_type}"
|
|
231
235
|
end
|
|
232
236
|
|
|
233
|
-
if
|
|
237
|
+
if @debug_mszip
|
|
234
238
|
warn "DEBUG inflate_block: After block: last_block=#{last_block} window_posn=#{@window_posn}"
|
|
235
239
|
end
|
|
236
240
|
|
|
@@ -383,7 +387,7 @@ salvage: false, **_kwargs)
|
|
|
383
387
|
def inflate_huffman_block
|
|
384
388
|
symbol_count = 0
|
|
385
389
|
loop do
|
|
386
|
-
if
|
|
390
|
+
if @debug_mszip_symbols
|
|
387
391
|
warn "DEBUG inflate_huffman_block: window_posn=#{@window_posn} bytes_output=#{@bytes_output}"
|
|
388
392
|
end
|
|
389
393
|
|
|
@@ -394,7 +398,7 @@ salvage: false, **_kwargs)
|
|
|
394
398
|
)
|
|
395
399
|
symbol_count += 1
|
|
396
400
|
|
|
397
|
-
if
|
|
401
|
+
if @debug_mszip || @debug_mszip_symbols
|
|
398
402
|
warn "DEBUG inflate_huffman_block[#{symbol_count}]: decoded code=#{code} (#{'0x%02x' % code if code < 256})"
|
|
399
403
|
end
|
|
400
404
|
|
|
@@ -405,7 +409,7 @@ salvage: false, **_kwargs)
|
|
|
405
409
|
flush_window if @window_posn == FRAME_SIZE
|
|
406
410
|
elsif code == 256
|
|
407
411
|
# End of block
|
|
408
|
-
if
|
|
412
|
+
if @debug_mszip || @debug_mszip_symbols
|
|
409
413
|
warn "DEBUG inflate_huffman_block: END OF BLOCK (window_posn=#{@window_posn})"
|
|
410
414
|
end
|
|
411
415
|
break
|
|
@@ -60,12 +60,11 @@ module Cabriolet
|
|
|
60
60
|
@window_bits = window_bits
|
|
61
61
|
@window_size = 1 << window_bits
|
|
62
62
|
|
|
63
|
-
# Initialize window (
|
|
63
|
+
# Initialize window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
64
64
|
@window = if String.method_defined?(:bytesplice)
|
|
65
|
-
"\0" * @window_size
|
|
65
|
+
("\0" * @window_size).b
|
|
66
66
|
else
|
|
67
|
-
|
|
68
|
-
String.new("\0" * @window_size)
|
|
67
|
+
String.new("\0" * @window_size, encoding: Encoding::BINARY)
|
|
69
68
|
end
|
|
70
69
|
@window_posn = 0
|
|
71
70
|
@frame_todo = FRAME_SIZE
|
|
@@ -52,8 +52,14 @@ num_symbols = nil)
|
|
|
52
52
|
"Huffman decode error: code too long"
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
# Get the next bit from bit buffer at position idx
|
|
56
|
-
|
|
55
|
+
# Get the next bit from bit buffer at position idx.
|
|
56
|
+
# LSB mode: peek_bits returns bottom n bits; bit idx is at position idx.
|
|
57
|
+
# MSB mode: peek_bits returns top n bits right-justified; bit idx is the LSB.
|
|
58
|
+
bit = if bitstream.bit_order == :msb
|
|
59
|
+
bitstream.peek_bits(idx + 1) & 1
|
|
60
|
+
else
|
|
61
|
+
(bitstream.peek_bits(idx + 1) >> idx) & 1
|
|
62
|
+
end
|
|
57
63
|
|
|
58
64
|
# Follow the tree path: (current_entry << 1) | bit
|
|
59
65
|
next_idx = (sym << 1) | bit
|
|
@@ -259,11 +259,11 @@ module Cabriolet
|
|
|
259
259
|
else
|
|
260
260
|
@plugins.select { |_, entry| entry[:state] == state }
|
|
261
261
|
.transform_values do |entry|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
262
|
+
{
|
|
263
|
+
metadata: entry[:metadata],
|
|
264
|
+
state: entry[:state],
|
|
265
|
+
error: entry[:error],
|
|
266
|
+
}
|
|
267
267
|
end
|
|
268
268
|
end
|
|
269
269
|
end
|
|
@@ -98,7 +98,7 @@ module Cabriolet
|
|
|
98
98
|
raise ArgumentError, "Invalid mode: #{mode}"
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
-
::File.open(@filename, file_mode)
|
|
101
|
+
::File.open(@filename, file_mode) # rubocop:disable Style/FileOpen -- lifecycle managed by close/closed?
|
|
102
102
|
rescue Errno::ENOENT, Errno::EACCES => e
|
|
103
103
|
raise IOError, "Cannot open file #{@filename}: #{e.message}"
|
|
104
104
|
end
|
data/lib/cabriolet/version.rb
CHANGED
data/lib/cabriolet.rb
CHANGED
|
@@ -65,101 +65,99 @@ module Cabriolet
|
|
|
65
65
|
# Default buffer size of 64KB - better for modern systems
|
|
66
66
|
# Larger buffers reduce I/O syscall overhead significantly
|
|
67
67
|
self.default_buffer_size = 65_536
|
|
68
|
-
end
|
|
69
68
|
|
|
70
|
-
# Models
|
|
71
|
-
require_relative "cabriolet/models/cabinet"
|
|
72
|
-
require_relative "cabriolet/models/folder"
|
|
73
|
-
require_relative "cabriolet/models/folder_data"
|
|
74
|
-
require_relative "cabriolet/models/file"
|
|
75
|
-
require_relative "cabriolet/models/chm_header"
|
|
76
|
-
require_relative "cabriolet/models/chm_section"
|
|
77
|
-
require_relative "cabriolet/models/chm_file"
|
|
78
|
-
require_relative "cabriolet/models/szdd_header"
|
|
79
|
-
require_relative "cabriolet/models/kwaj_header"
|
|
80
|
-
require_relative "cabriolet/models/hlp_header"
|
|
81
|
-
require_relative "cabriolet/models/hlp_file"
|
|
82
|
-
require_relative "cabriolet/models/winhelp_header"
|
|
83
|
-
require_relative "cabriolet/models/lit_header"
|
|
84
|
-
require_relative "cabriolet/models/oab_header"
|
|
85
|
-
|
|
86
|
-
# Load errors first (needed by algorithm_factory)
|
|
87
|
-
|
|
88
|
-
# Load plugin system
|
|
89
|
-
require_relative "cabriolet/plugin"
|
|
90
|
-
require_relative "cabriolet/plugin_validator"
|
|
91
|
-
require_relative "cabriolet/plugin_manager"
|
|
92
|
-
|
|
93
|
-
# Load algorithm factory
|
|
94
|
-
require_relative "cabriolet/algorithm_factory"
|
|
95
|
-
|
|
96
|
-
# Load core components
|
|
97
|
-
|
|
98
|
-
require_relative "cabriolet/quantum_shared"
|
|
99
|
-
|
|
100
|
-
require_relative "cabriolet/huffman/tree"
|
|
101
|
-
require_relative "cabriolet/huffman/decoder"
|
|
102
|
-
require_relative "cabriolet/huffman/encoder"
|
|
103
|
-
|
|
104
|
-
require_relative "cabriolet/decompressors/base"
|
|
105
|
-
require_relative "cabriolet/decompressors/none"
|
|
106
|
-
require_relative "cabriolet/decompressors/lzss"
|
|
107
|
-
require_relative "cabriolet/decompressors/mszip"
|
|
108
|
-
require_relative "cabriolet/decompressors/lzx"
|
|
109
|
-
require_relative "cabriolet/decompressors/quantum"
|
|
110
|
-
|
|
111
|
-
require_relative "cabriolet/compressors/base"
|
|
112
|
-
require_relative "cabriolet/compressors/lzss"
|
|
113
|
-
require_relative "cabriolet/compressors/mszip"
|
|
114
|
-
require_relative "cabriolet/compressors/lzx"
|
|
115
|
-
require_relative "cabriolet/compressors/quantum"
|
|
116
|
-
|
|
117
|
-
require_relative "cabriolet/cab/parser"
|
|
118
|
-
require_relative "cabriolet/cab/decompressor"
|
|
119
|
-
require_relative "cabriolet/cab/extractor"
|
|
120
|
-
require_relative "cabriolet/cab/compressor"
|
|
121
|
-
|
|
122
|
-
require_relative "cabriolet/chm/parser"
|
|
123
|
-
require_relative "cabriolet/chm/decompressor"
|
|
124
|
-
require_relative "cabriolet/chm/compressor"
|
|
125
|
-
|
|
126
|
-
require_relative "cabriolet/szdd/parser"
|
|
127
|
-
require_relative "cabriolet/szdd/decompressor"
|
|
128
|
-
require_relative "cabriolet/szdd/compressor"
|
|
129
|
-
|
|
130
|
-
require_relative "cabriolet/kwaj/parser"
|
|
131
|
-
require_relative "cabriolet/kwaj/decompressor"
|
|
132
|
-
require_relative "cabriolet/kwaj/compressor"
|
|
133
|
-
|
|
134
|
-
require_relative "cabriolet/hlp/parser"
|
|
135
|
-
require_relative "cabriolet/hlp/decompressor"
|
|
136
|
-
require_relative "cabriolet/hlp/compressor"
|
|
137
|
-
|
|
138
|
-
require_relative "cabriolet/hlp/winhelp/parser"
|
|
139
|
-
require_relative "cabriolet/hlp/winhelp/zeck_lz77"
|
|
140
|
-
require_relative "cabriolet/hlp/winhelp/decompressor"
|
|
141
|
-
require_relative "cabriolet/hlp/winhelp/compressor"
|
|
142
|
-
|
|
143
|
-
require_relative "cabriolet/lit/decompressor"
|
|
144
|
-
require_relative "cabriolet/lit/compressor"
|
|
145
|
-
|
|
146
|
-
require_relative "cabriolet/oab/decompressor"
|
|
147
|
-
require_relative "cabriolet/oab/compressor"
|
|
148
|
-
|
|
149
|
-
# Load new advanced features
|
|
150
|
-
require_relative "cabriolet/format_detector"
|
|
151
|
-
require_relative "cabriolet/extraction/base_extractor"
|
|
152
|
-
require_relative "cabriolet/extraction/extractor"
|
|
153
|
-
require_relative "cabriolet/streaming"
|
|
154
|
-
require_relative "cabriolet/validator"
|
|
155
|
-
require_relative "cabriolet/repairer"
|
|
156
|
-
require_relative "cabriolet/modifier"
|
|
157
|
-
|
|
158
|
-
# Load CLI (optional, for command-line usage)
|
|
159
|
-
require_relative "cabriolet/cli"
|
|
160
|
-
|
|
161
|
-
# Convenience methods
|
|
162
|
-
module Cabriolet
|
|
69
|
+
# Models
|
|
70
|
+
require_relative "cabriolet/models/cabinet"
|
|
71
|
+
require_relative "cabriolet/models/folder"
|
|
72
|
+
require_relative "cabriolet/models/folder_data"
|
|
73
|
+
require_relative "cabriolet/models/file"
|
|
74
|
+
require_relative "cabriolet/models/chm_header"
|
|
75
|
+
require_relative "cabriolet/models/chm_section"
|
|
76
|
+
require_relative "cabriolet/models/chm_file"
|
|
77
|
+
require_relative "cabriolet/models/szdd_header"
|
|
78
|
+
require_relative "cabriolet/models/kwaj_header"
|
|
79
|
+
require_relative "cabriolet/models/hlp_header"
|
|
80
|
+
require_relative "cabriolet/models/hlp_file"
|
|
81
|
+
require_relative "cabriolet/models/winhelp_header"
|
|
82
|
+
require_relative "cabriolet/models/lit_header"
|
|
83
|
+
require_relative "cabriolet/models/oab_header"
|
|
84
|
+
|
|
85
|
+
# Load errors first (needed by algorithm_factory)
|
|
86
|
+
|
|
87
|
+
# Load plugin system
|
|
88
|
+
require_relative "cabriolet/plugin"
|
|
89
|
+
require_relative "cabriolet/plugin_validator"
|
|
90
|
+
require_relative "cabriolet/plugin_manager"
|
|
91
|
+
|
|
92
|
+
# Load algorithm factory
|
|
93
|
+
require_relative "cabriolet/algorithm_factory"
|
|
94
|
+
|
|
95
|
+
# Load core components
|
|
96
|
+
|
|
97
|
+
require_relative "cabriolet/quantum_shared"
|
|
98
|
+
|
|
99
|
+
require_relative "cabriolet/huffman/tree"
|
|
100
|
+
require_relative "cabriolet/huffman/decoder"
|
|
101
|
+
require_relative "cabriolet/huffman/encoder"
|
|
102
|
+
|
|
103
|
+
require_relative "cabriolet/decompressors/base"
|
|
104
|
+
require_relative "cabriolet/decompressors/none"
|
|
105
|
+
require_relative "cabriolet/decompressors/lzss"
|
|
106
|
+
require_relative "cabriolet/decompressors/mszip"
|
|
107
|
+
require_relative "cabriolet/decompressors/lzx"
|
|
108
|
+
require_relative "cabriolet/decompressors/quantum"
|
|
109
|
+
|
|
110
|
+
require_relative "cabriolet/compressors/base"
|
|
111
|
+
require_relative "cabriolet/compressors/lzss"
|
|
112
|
+
require_relative "cabriolet/compressors/mszip"
|
|
113
|
+
require_relative "cabriolet/compressors/lzx"
|
|
114
|
+
require_relative "cabriolet/compressors/quantum"
|
|
115
|
+
|
|
116
|
+
require_relative "cabriolet/cab/parser"
|
|
117
|
+
require_relative "cabriolet/cab/decompressor"
|
|
118
|
+
require_relative "cabriolet/cab/extractor"
|
|
119
|
+
require_relative "cabriolet/cab/compressor"
|
|
120
|
+
|
|
121
|
+
require_relative "cabriolet/chm/parser"
|
|
122
|
+
require_relative "cabriolet/chm/decompressor"
|
|
123
|
+
require_relative "cabriolet/chm/compressor"
|
|
124
|
+
|
|
125
|
+
require_relative "cabriolet/szdd/parser"
|
|
126
|
+
require_relative "cabriolet/szdd/decompressor"
|
|
127
|
+
require_relative "cabriolet/szdd/compressor"
|
|
128
|
+
|
|
129
|
+
require_relative "cabriolet/kwaj/parser"
|
|
130
|
+
require_relative "cabriolet/kwaj/decompressor"
|
|
131
|
+
require_relative "cabriolet/kwaj/compressor"
|
|
132
|
+
|
|
133
|
+
require_relative "cabriolet/hlp/parser"
|
|
134
|
+
require_relative "cabriolet/hlp/decompressor"
|
|
135
|
+
require_relative "cabriolet/hlp/compressor"
|
|
136
|
+
|
|
137
|
+
require_relative "cabriolet/hlp/winhelp/parser"
|
|
138
|
+
require_relative "cabriolet/hlp/winhelp/zeck_lz77"
|
|
139
|
+
require_relative "cabriolet/hlp/winhelp/decompressor"
|
|
140
|
+
require_relative "cabriolet/hlp/winhelp/compressor"
|
|
141
|
+
|
|
142
|
+
require_relative "cabriolet/lit/decompressor"
|
|
143
|
+
require_relative "cabriolet/lit/compressor"
|
|
144
|
+
|
|
145
|
+
require_relative "cabriolet/oab/decompressor"
|
|
146
|
+
require_relative "cabriolet/oab/compressor"
|
|
147
|
+
|
|
148
|
+
# Load new advanced features
|
|
149
|
+
require_relative "cabriolet/format_detector"
|
|
150
|
+
require_relative "cabriolet/extraction/base_extractor"
|
|
151
|
+
require_relative "cabriolet/extraction/extractor"
|
|
152
|
+
require_relative "cabriolet/streaming"
|
|
153
|
+
require_relative "cabriolet/validator"
|
|
154
|
+
require_relative "cabriolet/repairer"
|
|
155
|
+
require_relative "cabriolet/modifier"
|
|
156
|
+
|
|
157
|
+
# Load CLI (optional, for command-line usage)
|
|
158
|
+
require_relative "cabriolet/cli"
|
|
159
|
+
|
|
160
|
+
# Convenience methods
|
|
163
161
|
class << self
|
|
164
162
|
# Open and parse an archive with automatic format detection
|
|
165
163
|
#
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cabriolet
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-03-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bindata
|