cabriolet 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cabriolet/algorithm_factory.rb +2 -2
- data/lib/cabriolet/base_compressor.rb +6 -6
- data/lib/cabriolet/binary/bitstream.rb +56 -6
- data/lib/cabriolet/cab/decompressor.rb +10 -7
- data/lib/cabriolet/cab/extractor.rb +49 -21
- data/lib/cabriolet/cab/parser.rb +3 -0
- data/lib/cabriolet/checksum.rb +7 -4
- data/lib/cabriolet/cli.rb +4 -4
- data/lib/cabriolet/compressors/lzx.rb +17 -9
- data/lib/cabriolet/compressors/mszip.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +22 -14
- data/lib/cabriolet/decompressors/lzx.rb +136 -24
- data/lib/cabriolet/decompressors/mszip.rb +36 -17
- data/lib/cabriolet/decompressors/quantum.rb +34 -36
- data/lib/cabriolet/file_manager.rb +4 -4
- data/lib/cabriolet/format_base.rb +4 -4
- data/lib/cabriolet/hlp/compressor.rb +2 -2
- data/lib/cabriolet/huffman/decoder.rb +8 -2
- data/lib/cabriolet/plugin.rb +2 -2
- data/lib/cabriolet/plugin_manager.rb +5 -5
- data/lib/cabriolet/streaming.rb +2 -2
- data/lib/cabriolet/system/file_handle.rb +1 -1
- data/lib/cabriolet/validator.rb +2 -2
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +96 -98
- metadata +2 -2
|
@@ -129,8 +129,8 @@ module Cabriolet
|
|
|
129
129
|
@num_offsets = POSITION_SLOTS[window_bits - 15] << 3
|
|
130
130
|
@maintree_maxsymbols = NUM_CHARS + @num_offsets
|
|
131
131
|
|
|
132
|
-
# Initialize window
|
|
133
|
-
@window = "\0" * @window_size
|
|
132
|
+
# Initialize window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
133
|
+
@window = ("\0" * @window_size).b
|
|
134
134
|
@window_posn = 0
|
|
135
135
|
@frame_posn = 0
|
|
136
136
|
@frame = 0
|
|
@@ -149,7 +149,7 @@ module Cabriolet
|
|
|
149
149
|
# Intel E8 transformation state
|
|
150
150
|
@intel_filesize = 0
|
|
151
151
|
@intel_started = false
|
|
152
|
-
@e8_buf = "\0" * FRAME_SIZE
|
|
152
|
+
@e8_buf = ("\0" * FRAME_SIZE).b
|
|
153
153
|
|
|
154
154
|
# Initialize bitstream (LZX uses MSB-first bit ordering per libmspack lzxd.c)
|
|
155
155
|
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
|
|
@@ -162,6 +162,12 @@ module Cabriolet
|
|
|
162
162
|
@offset = 0
|
|
163
163
|
@output_ptr = 0
|
|
164
164
|
@output_end = 0
|
|
165
|
+
|
|
166
|
+
# Per libmspack: pending frame data for multi-file extraction.
|
|
167
|
+
# When a decompress call ends mid-frame, the unwritten portion
|
|
168
|
+
# of the frame is stored here for the next call to output.
|
|
169
|
+
@pending_frame_data = nil
|
|
170
|
+
@pending_frame_offset = 0
|
|
165
171
|
end
|
|
166
172
|
|
|
167
173
|
# Set output length (for Intel E8 processing)
|
|
@@ -172,8 +178,30 @@ module Cabriolet
|
|
|
172
178
|
@output_length = length if length.positive?
|
|
173
179
|
end
|
|
174
180
|
|
|
181
|
+
# Free resources used by the decompressor
|
|
182
|
+
#
|
|
183
|
+
# Releases large memory buffers to prevent memory leaks when
|
|
184
|
+
# the decompressor is no longer needed.
|
|
185
|
+
#
|
|
186
|
+
# @return [void]
|
|
187
|
+
def free
|
|
188
|
+
@window = nil
|
|
189
|
+
@e8_buf = nil
|
|
190
|
+
@pending_frame_data = nil
|
|
191
|
+
@bitstream = nil
|
|
192
|
+
@maintree_lengths = nil
|
|
193
|
+
@length_lengths = nil
|
|
194
|
+
@pretree_lengths = nil
|
|
195
|
+
@aligned_lengths = nil
|
|
196
|
+
end
|
|
197
|
+
|
|
175
198
|
# Decompress LZX data
|
|
176
199
|
#
|
|
200
|
+
# Per libmspack lzxd.c: the decompressor always decodes full frames
|
|
201
|
+
# (32KB) into the window, but may output fewer bytes if the caller
|
|
202
|
+
# requests less. When multiple files share a folder, decompress is
|
|
203
|
+
# called per file, so partial-frame data must carry over between calls.
|
|
204
|
+
#
|
|
177
205
|
# @param bytes [Integer] Number of bytes to decompress
|
|
178
206
|
# @return [Integer] Number of bytes decompressed
|
|
179
207
|
def decompress(bytes)
|
|
@@ -183,7 +211,30 @@ module Cabriolet
|
|
|
183
211
|
read_intel_header unless @header_read
|
|
184
212
|
|
|
185
213
|
total_written = 0
|
|
186
|
-
|
|
214
|
+
|
|
215
|
+
# Output any pending frame data from the previous partial-frame write.
|
|
216
|
+
# This handles multi-file extraction where the previous call ended
|
|
217
|
+
# mid-frame and the next file's data starts in the same frame.
|
|
218
|
+
if @pending_frame_data
|
|
219
|
+
avail = @pending_frame_data.bytesize - @pending_frame_offset
|
|
220
|
+
write_amount = [bytes, avail].min
|
|
221
|
+
io_system.write(output,
|
|
222
|
+
@pending_frame_data[@pending_frame_offset,
|
|
223
|
+
write_amount])
|
|
224
|
+
total_written += write_amount
|
|
225
|
+
@offset += write_amount
|
|
226
|
+
@pending_frame_offset += write_amount
|
|
227
|
+
|
|
228
|
+
if @pending_frame_offset >= @pending_frame_data.bytesize
|
|
229
|
+
@pending_frame_data = nil
|
|
230
|
+
@pending_frame_offset = 0
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
remaining = bytes - total_written
|
|
235
|
+
return total_written if remaining <= 0
|
|
236
|
+
|
|
237
|
+
end_frame = ((@offset + remaining) / FRAME_SIZE) + 1
|
|
187
238
|
|
|
188
239
|
while @frame < end_frame
|
|
189
240
|
# Check reset interval - reset offset registers at frame boundaries
|
|
@@ -217,20 +268,47 @@ module Cabriolet
|
|
|
217
268
|
@window[@frame_posn, frame_size]
|
|
218
269
|
end
|
|
219
270
|
|
|
220
|
-
#
|
|
271
|
+
# Defensive guard: frame_data should never be nil if the >= window
|
|
272
|
+
# wrap checks below are correct. If it is, the stream is corrupt
|
|
273
|
+
# or a regression has been introduced.
|
|
274
|
+
if frame_data.nil?
|
|
275
|
+
if @salvage
|
|
276
|
+
warn "Salvage: nil frame data at frame_posn=#{@frame_posn}, frame=#{@frame}"
|
|
277
|
+
break
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
raise DecompressionError,
|
|
281
|
+
"LZX: nil frame data at position #{@frame_posn}, frame_size=#{frame_size}"
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Write frame - per libmspack: offset tracks actual output bytes,
|
|
285
|
+
# not full frame bytes. Save unwritten remainder for next call.
|
|
221
286
|
write_amount = [bytes - total_written, frame_size].min
|
|
222
287
|
io_system.write(output, frame_data[0, write_amount])
|
|
223
288
|
total_written += write_amount
|
|
224
|
-
@offset +=
|
|
289
|
+
@offset += write_amount
|
|
225
290
|
|
|
226
|
-
#
|
|
291
|
+
# Store pending data if partial frame write
|
|
292
|
+
if write_amount < frame_size
|
|
293
|
+
@pending_frame_data = frame_data
|
|
294
|
+
@pending_frame_offset = write_amount
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Advance frame (always by full frame, matching decode position)
|
|
227
298
|
@frame += 1
|
|
228
299
|
@frame_posn += frame_size
|
|
229
|
-
@frame_posn = 0 if @frame_posn
|
|
230
|
-
@window_posn = 0 if @window_posn
|
|
231
|
-
|
|
232
|
-
# Re-align bitstream
|
|
233
|
-
|
|
300
|
+
@frame_posn = 0 if @frame_posn >= @window_size
|
|
301
|
+
@window_posn = 0 if @window_posn >= @window_size
|
|
302
|
+
|
|
303
|
+
# Re-align bitstream to 16-bit word boundary between frames.
|
|
304
|
+
# Per libmspack lzxd.c: LZX frames are padded to 16-bit word
|
|
305
|
+
# boundaries (not 8-bit byte boundaries) because the bitstream
|
|
306
|
+
# reads data in 16-bit little-endian words.
|
|
307
|
+
if @bitstream.bits_left.positive?
|
|
308
|
+
@bitstream.ensure_bits(16)
|
|
309
|
+
end
|
|
310
|
+
remove = @bitstream.bits_left & 15
|
|
311
|
+
@bitstream.skip_bits(remove) if remove.positive?
|
|
234
312
|
end
|
|
235
313
|
|
|
236
314
|
total_written
|
|
@@ -313,14 +391,27 @@ module Cabriolet
|
|
|
313
391
|
# Read new block header if needed
|
|
314
392
|
read_block_header if @block_remaining.zero?
|
|
315
393
|
|
|
316
|
-
# Decode as much as possible
|
|
394
|
+
# Decode as much as possible from the current block
|
|
317
395
|
this_run = [@block_remaining, bytes_todo].min
|
|
318
396
|
bytes_todo -= this_run
|
|
319
397
|
@block_remaining -= this_run
|
|
320
398
|
|
|
321
399
|
case @block_type
|
|
322
400
|
when BLOCKTYPE_VERBATIM, BLOCKTYPE_ALIGNED
|
|
323
|
-
decode_huffman_block(this_run)
|
|
401
|
+
remaining = decode_huffman_block(this_run)
|
|
402
|
+
|
|
403
|
+
# Per libmspack lzxd.c: if a match caused overrun (this_run
|
|
404
|
+
# went negative in the inner loop), adjust block_remaining.
|
|
405
|
+
# This happens when a match crosses a block boundary within
|
|
406
|
+
# a frame (bytes_todo limited this_run, not block_remaining).
|
|
407
|
+
if remaining.negative?
|
|
408
|
+
overrun = -remaining
|
|
409
|
+
if overrun > @block_remaining
|
|
410
|
+
raise DecompressionError,
|
|
411
|
+
"Match overrun (#{overrun}) exceeds block remaining (#{@block_remaining})"
|
|
412
|
+
end
|
|
413
|
+
@block_remaining -= overrun
|
|
414
|
+
end
|
|
324
415
|
when BLOCKTYPE_UNCOMPRESSED
|
|
325
416
|
decode_uncompressed_block(this_run)
|
|
326
417
|
else
|
|
@@ -337,9 +428,11 @@ module Cabriolet
|
|
|
337
428
|
#
|
|
338
429
|
# @return [void]
|
|
339
430
|
def read_block_header
|
|
340
|
-
#
|
|
341
|
-
#
|
|
342
|
-
|
|
431
|
+
# Per libmspack lzxd.c: when transitioning FROM an uncompressed block
|
|
432
|
+
# with ODD length, skip 1 raw padding byte to maintain 16-bit alignment.
|
|
433
|
+
if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.odd?
|
|
434
|
+
@bitstream.read_raw_byte
|
|
435
|
+
end
|
|
343
436
|
|
|
344
437
|
# Read block type (3 bits)
|
|
345
438
|
@block_type = @bitstream.read_bits(3)
|
|
@@ -549,15 +642,23 @@ module Cabriolet
|
|
|
549
642
|
|
|
550
643
|
# Read uncompressed block header
|
|
551
644
|
#
|
|
645
|
+
# Per libmspack lzxd.c: for uncompressed blocks, the bitstream is
|
|
646
|
+
# flushed (bit_buffer=0, bits_left=0) and R0/R1/R2 are read directly
|
|
647
|
+
# from the raw input stream (i_ptr), NOT through the MSB bitstream.
|
|
648
|
+
# Reading through the MSB bitstream would byte-swap each 16-bit word.
|
|
649
|
+
#
|
|
552
650
|
# @return [void]
|
|
553
651
|
def read_uncompressed_block_header
|
|
554
652
|
@intel_started = true
|
|
555
653
|
|
|
556
|
-
#
|
|
557
|
-
@bitstream.
|
|
654
|
+
# Per libmspack: if bits_left == 0, ensure we have data available
|
|
655
|
+
@bitstream.ensure_bits(16) if @bitstream.bits_left.zero?
|
|
656
|
+
|
|
657
|
+
# Flush bit buffer - discard any remaining bits (alignment padding)
|
|
658
|
+
@bitstream.flush_bit_buffer
|
|
558
659
|
|
|
559
|
-
# Read R0, R1, R2
|
|
560
|
-
bytes = Array.new(12) { @bitstream.
|
|
660
|
+
# Read R0, R1, R2 directly from raw input (bypassing bitstream)
|
|
661
|
+
bytes = Array.new(12) { @bitstream.read_raw_byte }
|
|
561
662
|
@r0 = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24)
|
|
562
663
|
@r1 = bytes[4] | (bytes[5] << 8) | (bytes[6] << 16) | (bytes[7] << 24)
|
|
563
664
|
@r2 = bytes[8] | (bytes[9] << 8) | (bytes[10] << 16) | (bytes[11] << 24)
|
|
@@ -565,8 +666,13 @@ module Cabriolet
|
|
|
565
666
|
|
|
566
667
|
# Decode Huffman-compressed block
|
|
567
668
|
#
|
|
669
|
+
# Per libmspack lzxd.c: the inner decode loop uses this_run as its
|
|
670
|
+
# counter. A match can cause this_run to go negative (overrun past
|
|
671
|
+
# the planned run length). The caller must adjust block_remaining
|
|
672
|
+
# for any overrun.
|
|
673
|
+
#
|
|
568
674
|
# @param run_length [Integer] Number of bytes to decode
|
|
569
|
-
# @return [
|
|
675
|
+
# @return [Integer] Final run_length (0 or negative if overrun)
|
|
570
676
|
def decode_huffman_block(run_length)
|
|
571
677
|
while run_length.positive?
|
|
572
678
|
# Decode main symbol
|
|
@@ -586,6 +692,8 @@ module Cabriolet
|
|
|
586
692
|
run_length -= match_length
|
|
587
693
|
end
|
|
588
694
|
end
|
|
695
|
+
|
|
696
|
+
run_length
|
|
589
697
|
end
|
|
590
698
|
|
|
591
699
|
# Decode and copy a match
|
|
@@ -599,7 +707,7 @@ module Cabriolet
|
|
|
599
707
|
# Decode match length
|
|
600
708
|
match_length = main_element & NUM_PRIMARY_LENGTHS
|
|
601
709
|
if match_length == NUM_PRIMARY_LENGTHS
|
|
602
|
-
if @length_empty
|
|
710
|
+
if @length_empty || @length_tree.nil?
|
|
603
711
|
raise DecompressionError,
|
|
604
712
|
"Length tree needed but empty"
|
|
605
713
|
end
|
|
@@ -747,11 +855,15 @@ module Cabriolet
|
|
|
747
855
|
|
|
748
856
|
# Decode uncompressed block
|
|
749
857
|
#
|
|
858
|
+
# Per libmspack lzxd.c: uncompressed block data is read directly from
|
|
859
|
+
# the raw input stream (i_ptr), NOT through the MSB bitstream. The bit
|
|
860
|
+
# buffer was already flushed when the uncompressed block header was read.
|
|
861
|
+
#
|
|
750
862
|
# @param run_length [Integer] Number of bytes to decode
|
|
751
863
|
# @return [void]
|
|
752
864
|
def decode_uncompressed_block(run_length)
|
|
753
865
|
run_length.times do
|
|
754
|
-
byte = @bitstream.
|
|
866
|
+
byte = @bitstream.read_raw_byte
|
|
755
867
|
@window.setbyte(@window_posn, byte)
|
|
756
868
|
@window_posn += 1
|
|
757
869
|
end
|
|
@@ -62,8 +62,8 @@ salvage: false, **_kwargs)
|
|
|
62
62
|
super(io_system, input, output, buffer_size)
|
|
63
63
|
@fix_mszip = fix_mszip
|
|
64
64
|
|
|
65
|
-
# Initialize sliding window
|
|
66
|
-
@window = "\0" * FRAME_SIZE
|
|
65
|
+
# Initialize sliding window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
66
|
+
@window = ("\0" * FRAME_SIZE).b
|
|
67
67
|
@window_posn = 0
|
|
68
68
|
@bytes_output = 0
|
|
69
69
|
@window_offset = 0 # Offset into window for unconsumed data (for multi-file CFDATA blocks)
|
|
@@ -77,6 +77,25 @@ salvage: false, **_kwargs)
|
|
|
77
77
|
@distance_lengths = Array.new(DISTANCE_MAXSYMBOLS, 0)
|
|
78
78
|
@literal_tree = nil
|
|
79
79
|
@distance_tree = nil
|
|
80
|
+
|
|
81
|
+
# Cache ENV lookups once at initialization
|
|
82
|
+
@debug_mszip = ENV.fetch("DEBUG_MSZIP", nil)
|
|
83
|
+
@debug_mszip_symbols = ENV.fetch("DEBUG_MSZIP_SYMBOLS", nil)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Free resources used by the decompressor
|
|
87
|
+
#
|
|
88
|
+
# Releases memory buffers to prevent memory leaks when
|
|
89
|
+
# the decompressor is no longer needed.
|
|
90
|
+
#
|
|
91
|
+
# @return [void]
|
|
92
|
+
def free
|
|
93
|
+
@window = nil
|
|
94
|
+
@bitstream = nil
|
|
95
|
+
@literal_lengths = nil
|
|
96
|
+
@distance_lengths = nil
|
|
97
|
+
@literal_tree = nil
|
|
98
|
+
@distance_tree = nil
|
|
80
99
|
end
|
|
81
100
|
|
|
82
101
|
# Decompress MSZIP data
|
|
@@ -86,14 +105,14 @@ salvage: false, **_kwargs)
|
|
|
86
105
|
def decompress(bytes)
|
|
87
106
|
total_written = 0
|
|
88
107
|
|
|
89
|
-
if
|
|
108
|
+
if @debug_mszip
|
|
90
109
|
warn "DEBUG MSZIP.decompress(#{bytes}): ENTRY bytes_output=#{@bytes_output} window_offset=#{@window_offset} window_posn=#{@window_posn}"
|
|
91
110
|
end
|
|
92
111
|
|
|
93
112
|
while bytes.positive?
|
|
94
113
|
# Check if we have buffered data from previous inflate
|
|
95
114
|
if @bytes_output.positive?
|
|
96
|
-
if
|
|
115
|
+
if @debug_mszip
|
|
97
116
|
warn "DEBUG MSZIP: Using buffered data: bytes_output=#{@bytes_output} window_offset=#{@window_offset}"
|
|
98
117
|
end
|
|
99
118
|
|
|
@@ -105,7 +124,7 @@ salvage: false, **_kwargs)
|
|
|
105
124
|
@bytes_output -= write_amount
|
|
106
125
|
@window_offset += write_amount
|
|
107
126
|
|
|
108
|
-
if
|
|
127
|
+
if @debug_mszip
|
|
109
128
|
warn "DEBUG MSZIP: After buffer write: total_written=#{total_written} bytes_remaining=#{bytes} bytes_output=#{@bytes_output}"
|
|
110
129
|
end
|
|
111
130
|
|
|
@@ -120,13 +139,13 @@ salvage: false, **_kwargs)
|
|
|
120
139
|
|
|
121
140
|
# Read 'CK' signature (marks start of MSZIP frame)
|
|
122
141
|
# Every MSZIP frame starts with a CK signature
|
|
123
|
-
if
|
|
142
|
+
if @debug_mszip
|
|
124
143
|
warn "DEBUG MSZIP: Reading CK signature (new MSZIP frame)"
|
|
125
144
|
end
|
|
126
145
|
read_signature
|
|
127
146
|
|
|
128
147
|
# Inflate the MSZIP frame (processes deflate blocks until last_block or window full)
|
|
129
|
-
if
|
|
148
|
+
if @debug_mszip
|
|
130
149
|
warn "DEBUG MSZIP: Calling inflate_block"
|
|
131
150
|
end
|
|
132
151
|
|
|
@@ -142,14 +161,14 @@ salvage: false, **_kwargs)
|
|
|
142
161
|
@bytes_output = FRAME_SIZE
|
|
143
162
|
end
|
|
144
163
|
|
|
145
|
-
if
|
|
164
|
+
if @debug_mszip
|
|
146
165
|
warn "DEBUG MSZIP: After inflate_block: bytes_output=#{@bytes_output} window_posn=#{@window_posn}"
|
|
147
166
|
end
|
|
148
167
|
|
|
149
168
|
# Now we have data in the window buffer - loop back to write from it
|
|
150
169
|
end
|
|
151
170
|
|
|
152
|
-
if
|
|
171
|
+
if @debug_mszip
|
|
153
172
|
warn "DEBUG MSZIP.decompress: EXIT total_written=#{total_written}"
|
|
154
173
|
end
|
|
155
174
|
|
|
@@ -160,7 +179,7 @@ salvage: false, **_kwargs)
|
|
|
160
179
|
|
|
161
180
|
# Read and verify 'CK' signature
|
|
162
181
|
def read_signature
|
|
163
|
-
if
|
|
182
|
+
if @debug_mszip
|
|
164
183
|
warn "DEBUG read_signature: Before byte_align"
|
|
165
184
|
end
|
|
166
185
|
|
|
@@ -171,7 +190,7 @@ salvage: false, **_kwargs)
|
|
|
171
190
|
c = @bitstream.read_bits(8)
|
|
172
191
|
k = @bitstream.read_bits(8)
|
|
173
192
|
|
|
174
|
-
if
|
|
193
|
+
if @debug_mszip
|
|
175
194
|
warn "DEBUG read_signature: Read 0x#{c.to_s(16)} 0x#{k.to_s(16)} (expected 'C'=0x43 'K'=0x4B)"
|
|
176
195
|
end
|
|
177
196
|
|
|
@@ -188,7 +207,7 @@ salvage: false, **_kwargs)
|
|
|
188
207
|
|
|
189
208
|
if c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
|
|
190
209
|
found = true
|
|
191
|
-
if
|
|
210
|
+
if @debug_mszip
|
|
192
211
|
warn "DEBUG read_signature: Found CK signature after searching"
|
|
193
212
|
end
|
|
194
213
|
break
|
|
@@ -211,7 +230,7 @@ salvage: false, **_kwargs)
|
|
|
211
230
|
last_block = @bitstream.read_bits(1)
|
|
212
231
|
block_type = @bitstream.read_bits(2)
|
|
213
232
|
|
|
214
|
-
if
|
|
233
|
+
if @debug_mszip
|
|
215
234
|
warn "DEBUG inflate_block: First block: last_block=#{last_block} block_type=#{block_type}"
|
|
216
235
|
end
|
|
217
236
|
|
|
@@ -230,7 +249,7 @@ salvage: false, **_kwargs)
|
|
|
230
249
|
raise DecompressionError, "Invalid block type: #{block_type}"
|
|
231
250
|
end
|
|
232
251
|
|
|
233
|
-
if
|
|
252
|
+
if @debug_mszip
|
|
234
253
|
warn "DEBUG inflate_block: After block: last_block=#{last_block} window_posn=#{@window_posn}"
|
|
235
254
|
end
|
|
236
255
|
|
|
@@ -383,7 +402,7 @@ salvage: false, **_kwargs)
|
|
|
383
402
|
def inflate_huffman_block
|
|
384
403
|
symbol_count = 0
|
|
385
404
|
loop do
|
|
386
|
-
if
|
|
405
|
+
if @debug_mszip_symbols
|
|
387
406
|
warn "DEBUG inflate_huffman_block: window_posn=#{@window_posn} bytes_output=#{@bytes_output}"
|
|
388
407
|
end
|
|
389
408
|
|
|
@@ -394,7 +413,7 @@ salvage: false, **_kwargs)
|
|
|
394
413
|
)
|
|
395
414
|
symbol_count += 1
|
|
396
415
|
|
|
397
|
-
if
|
|
416
|
+
if @debug_mszip || @debug_mszip_symbols
|
|
398
417
|
warn "DEBUG inflate_huffman_block[#{symbol_count}]: decoded code=#{code} (#{'0x%02x' % code if code < 256})"
|
|
399
418
|
end
|
|
400
419
|
|
|
@@ -405,7 +424,7 @@ salvage: false, **_kwargs)
|
|
|
405
424
|
flush_window if @window_posn == FRAME_SIZE
|
|
406
425
|
elsif code == 256
|
|
407
426
|
# End of block
|
|
408
|
-
if
|
|
427
|
+
if @debug_mszip || @debug_mszip_symbols
|
|
409
428
|
warn "DEBUG inflate_huffman_block: END OF BLOCK (window_posn=#{@window_posn})"
|
|
410
429
|
end
|
|
411
430
|
break
|
|
@@ -2,30 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../quantum_shared"
|
|
4
4
|
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
# Build new string content
|
|
15
|
-
prefix = byteslice(0, index)
|
|
16
|
-
middle = other_string.byteslice(other_index, other_length)
|
|
17
|
-
suffix = byteslice((index + length)..-1)
|
|
18
|
-
new_content = prefix + middle + suffix
|
|
19
|
-
|
|
20
|
-
# Modify receiver in place
|
|
21
|
-
clear
|
|
22
|
-
self << new_content
|
|
23
|
-
|
|
24
|
-
self
|
|
5
|
+
# Helper for 5-argument bytesplice (added in Ruby 3.3)
|
|
6
|
+
# Ruby 3.2 has bytesplice but only 2-3 argument forms
|
|
7
|
+
unless String.method_defined?(:window_splice)
|
|
8
|
+
class String
|
|
9
|
+
# Copy bytes from source string into self at specified position
|
|
10
|
+
# Works on all Ruby versions including 3.2 which lacks 5-arg bytesplice
|
|
11
|
+
def window_splice(dest_idx, dest_len, src, src_idx, src_len)
|
|
12
|
+
self[dest_idx, dest_len] = src.byteslice(src_idx, src_len)
|
|
25
13
|
end
|
|
26
14
|
end
|
|
27
|
-
|
|
28
|
-
String.prepend(StringBytespliceCompat)
|
|
29
15
|
end
|
|
30
16
|
|
|
31
17
|
module Cabriolet
|
|
@@ -60,13 +46,8 @@ module Cabriolet
|
|
|
60
46
|
@window_bits = window_bits
|
|
61
47
|
@window_size = 1 << window_bits
|
|
62
48
|
|
|
63
|
-
# Initialize window (
|
|
64
|
-
@window =
|
|
65
|
-
"\0" * @window_size
|
|
66
|
-
else
|
|
67
|
-
# In Ruby < 3.2, create mutable window using String.new
|
|
68
|
-
String.new("\0" * @window_size)
|
|
69
|
-
end
|
|
49
|
+
# Initialize window (must be binary to avoid UTF-8 character vs byte mismatch)
|
|
50
|
+
@window = ("\0" * @window_size).b
|
|
70
51
|
@window_posn = 0
|
|
71
52
|
@frame_todo = FRAME_SIZE
|
|
72
53
|
|
|
@@ -83,6 +64,21 @@ module Cabriolet
|
|
|
83
64
|
initialize_models
|
|
84
65
|
end
|
|
85
66
|
|
|
67
|
+
# Free resources used by the decompressor
|
|
68
|
+
#
|
|
69
|
+
# Releases memory buffers to prevent memory leaks when
|
|
70
|
+
# the decompressor is no longer needed.
|
|
71
|
+
#
|
|
72
|
+
# @return [void]
|
|
73
|
+
def free
|
|
74
|
+
@window = nil
|
|
75
|
+
@bitstream = nil
|
|
76
|
+
@m0sym = @m1sym = @m2sym = @m3sym = nil
|
|
77
|
+
@m4sym = @m5sym = @m6sym = @m6lsym = nil
|
|
78
|
+
@model0 = @model1 = @model2 = @model3 = nil
|
|
79
|
+
@model4 = @model5 = @model6 = @model6len = nil
|
|
80
|
+
end
|
|
81
|
+
|
|
86
82
|
# Decompress Quantum data
|
|
87
83
|
#
|
|
88
84
|
# @param bytes [Integer] Number of bytes to decompress
|
|
@@ -403,7 +399,7 @@ module Cabriolet
|
|
|
403
399
|
end
|
|
404
400
|
end
|
|
405
401
|
|
|
406
|
-
# Bulk copy using
|
|
402
|
+
# Bulk copy using window_splice for better performance on longer matches
|
|
407
403
|
def copy_match_bulk(offset, length)
|
|
408
404
|
if offset > @window_posn
|
|
409
405
|
# Match wraps around window
|
|
@@ -418,21 +414,23 @@ module Cabriolet
|
|
|
418
414
|
|
|
419
415
|
if copy_len < length
|
|
420
416
|
# Copy from end, then from beginning
|
|
421
|
-
@window.
|
|
422
|
-
|
|
417
|
+
@window.window_splice(@window_posn, copy_len, @window, src_pos,
|
|
418
|
+
copy_len)
|
|
423
419
|
@window_posn += copy_len
|
|
424
420
|
remaining = length - copy_len
|
|
425
|
-
@window.
|
|
421
|
+
@window.window_splice(@window_posn, remaining, @window, 0,
|
|
422
|
+
remaining)
|
|
426
423
|
@window_posn += remaining
|
|
427
424
|
else
|
|
428
425
|
# Copy entirely from end
|
|
429
|
-
@window.
|
|
426
|
+
@window.window_splice(@window_posn, length, @window, src_pos,
|
|
427
|
+
length)
|
|
430
428
|
@window_posn += length
|
|
431
429
|
end
|
|
432
430
|
else
|
|
433
|
-
# Normal copy - use
|
|
431
|
+
# Normal copy - use window_splice for bulk operation
|
|
434
432
|
src_pos = @window_posn - offset
|
|
435
|
-
@window.
|
|
433
|
+
@window.window_splice(@window_posn, length, @window, src_pos, length)
|
|
436
434
|
@window_posn += length
|
|
437
435
|
end
|
|
438
436
|
end
|
|
@@ -29,13 +29,13 @@ module Cabriolet
|
|
|
29
29
|
# @param options [Hash] Format-specific options
|
|
30
30
|
# @return [FileEntry] Added entry
|
|
31
31
|
# @raise [ArgumentError] if file doesn't exist
|
|
32
|
-
def add_file(source_path, archive_path = nil, **
|
|
32
|
+
def add_file(source_path, archive_path = nil, **)
|
|
33
33
|
archive_path ||= File.basename(source_path)
|
|
34
34
|
|
|
35
35
|
entry = FileEntry.new(
|
|
36
36
|
source: source_path,
|
|
37
37
|
archive_path: archive_path,
|
|
38
|
-
|
|
38
|
+
**,
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
@entries << entry
|
|
@@ -48,11 +48,11 @@ module Cabriolet
|
|
|
48
48
|
# @param archive_path [String] Path in archive
|
|
49
49
|
# @param options [Hash] Format-specific options
|
|
50
50
|
# @return [FileEntry] Added entry
|
|
51
|
-
def add_data(data, archive_path, **
|
|
51
|
+
def add_data(data, archive_path, **)
|
|
52
52
|
entry = FileEntry.new(
|
|
53
53
|
data: data,
|
|
54
54
|
archive_path: archive_path,
|
|
55
|
-
|
|
55
|
+
**,
|
|
56
56
|
)
|
|
57
57
|
|
|
58
58
|
@entries << entry
|
|
@@ -44,7 +44,7 @@ module Cabriolet
|
|
|
44
44
|
# @param size [Integer] Data size
|
|
45
45
|
# @param options [Hash] Additional options for the compressor
|
|
46
46
|
# @return [Object] Compressor instance
|
|
47
|
-
def create_compressor(algorithm, input, output, size, **
|
|
47
|
+
def create_compressor(algorithm, input, output, size, **)
|
|
48
48
|
@algorithm_factory.create(
|
|
49
49
|
algorithm,
|
|
50
50
|
:compressor,
|
|
@@ -52,7 +52,7 @@ module Cabriolet
|
|
|
52
52
|
input,
|
|
53
53
|
output,
|
|
54
54
|
size,
|
|
55
|
-
|
|
55
|
+
**,
|
|
56
56
|
)
|
|
57
57
|
end
|
|
58
58
|
|
|
@@ -64,7 +64,7 @@ module Cabriolet
|
|
|
64
64
|
# @param size [Integer] Data size
|
|
65
65
|
# @param options [Hash] Additional options for the decompressor
|
|
66
66
|
# @return [Object] Decompressor instance
|
|
67
|
-
def create_decompressor(algorithm, input, output, size, **
|
|
67
|
+
def create_decompressor(algorithm, input, output, size, **)
|
|
68
68
|
@algorithm_factory.create(
|
|
69
69
|
algorithm,
|
|
70
70
|
:decompressor,
|
|
@@ -72,7 +72,7 @@ module Cabriolet
|
|
|
72
72
|
input,
|
|
73
73
|
output,
|
|
74
74
|
size,
|
|
75
|
-
|
|
75
|
+
**,
|
|
76
76
|
)
|
|
77
77
|
end
|
|
78
78
|
end
|
|
@@ -45,8 +45,8 @@ module Cabriolet
|
|
|
45
45
|
# @param output_file [String] Output file path
|
|
46
46
|
# @param options [Hash] Format options
|
|
47
47
|
# @return [Integer] Bytes written
|
|
48
|
-
def generate(output_file, **
|
|
49
|
-
@quickhelp.generate(output_file, **
|
|
48
|
+
def generate(output_file, **)
|
|
49
|
+
@quickhelp.generate(output_file, **)
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
# Create a Windows Help format HLP file
|
|
@@ -52,8 +52,14 @@ num_symbols = nil)
|
|
|
52
52
|
"Huffman decode error: code too long"
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
# Get the next bit from bit buffer at position idx
|
|
56
|
-
|
|
55
|
+
# Get the next bit from bit buffer at position idx.
|
|
56
|
+
# LSB mode: peek_bits returns bottom n bits; bit idx is at position idx.
|
|
57
|
+
# MSB mode: peek_bits returns top n bits right-justified; bit idx is the LSB.
|
|
58
|
+
bit = if bitstream.bit_order == :msb
|
|
59
|
+
bitstream.peek_bits(idx + 1) & 1
|
|
60
|
+
else
|
|
61
|
+
(bitstream.peek_bits(idx + 1) >> idx) & 1
|
|
62
|
+
end
|
|
57
63
|
|
|
58
64
|
# Follow the tree path: (current_entry << 1) | bit
|
|
59
65
|
next_idx = (sym << 1) | bit
|
data/lib/cabriolet/plugin.rb
CHANGED
|
@@ -190,10 +190,10 @@ module Cabriolet
|
|
|
190
190
|
# @example Register a format-specific decompressor
|
|
191
191
|
# register_algorithm(:special, SpecialDecompressor,
|
|
192
192
|
# category: :decompressor, format: :cab)
|
|
193
|
-
def register_algorithm(type, klass, **
|
|
193
|
+
def register_algorithm(type, klass, **)
|
|
194
194
|
raise PluginError, "Plugin manager not available" unless @manager
|
|
195
195
|
|
|
196
|
-
Cabriolet.algorithm_factory.register(type, klass, **
|
|
196
|
+
Cabriolet.algorithm_factory.register(type, klass, **)
|
|
197
197
|
end
|
|
198
198
|
|
|
199
199
|
# Register a format handler
|
|
@@ -259,11 +259,11 @@ module Cabriolet
|
|
|
259
259
|
else
|
|
260
260
|
@plugins.select { |_, entry| entry[:state] == state }
|
|
261
261
|
.transform_values do |entry|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
262
|
+
{
|
|
263
|
+
metadata: entry[:metadata],
|
|
264
|
+
state: entry[:state],
|
|
265
|
+
error: entry[:error],
|
|
266
|
+
}
|
|
267
267
|
end
|
|
268
268
|
end
|
|
269
269
|
end
|