cabriolet 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cabriolet/binary/bitstream.rb +56 -6
- data/lib/cabriolet/cab/decompressor.rb +6 -3
- data/lib/cabriolet/cab/extractor.rb +203 -100
- data/lib/cabriolet/checksum.rb +7 -4
- data/lib/cabriolet/compressors/lzx.rb +17 -9
- data/lib/cabriolet/compressors/mszip.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +22 -14
- data/lib/cabriolet/decompressors/lzx.rb +176 -25
- data/lib/cabriolet/decompressors/mszip.rb +21 -17
- data/lib/cabriolet/decompressors/quantum.rb +3 -4
- data/lib/cabriolet/huffman/decoder.rb +8 -2
- data/lib/cabriolet/plugin_manager.rb +5 -5
- data/lib/cabriolet/system/file_handle.rb +1 -1
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +92 -94
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 59d85958b00fa7eb684912e7ec77bfd9ce261a01035ed7ef42c2d6db5b1405a7
|
|
4
|
+
data.tar.gz: e9fa2123fe7c48778a01018c68f47cdb66f68449fe4fee48074e2b7591d5b9e1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ddc9ef226ce8359cbe65ac4e80853dbd8e57f25c6e934e48837a163f614bcdbdf94f18609c788c6396f4b87f93d4e12c1fa326c8f291bc1156ec0eaf0387d377
|
|
7
|
+
data.tar.gz: 20206a2b5011d4a869d0f4d29085b5b874fe1d34daa00ce14f842d55a71722c19329ea75b1f46adaebf88fc43805a467b089e038aa19fe5736010d1a9ca951ad
|
|
@@ -4,7 +4,7 @@ module Cabriolet
|
|
|
4
4
|
module Binary
|
|
5
5
|
# Bitstream provides bit-level I/O operations for reading compressed data
|
|
6
6
|
class Bitstream
|
|
7
|
-
attr_reader :io_system, :handle, :buffer_size, :bit_order
|
|
7
|
+
attr_reader :io_system, :handle, :buffer_size, :bit_order, :bits_left
|
|
8
8
|
|
|
9
9
|
# Initialize a new bitstream
|
|
10
10
|
#
|
|
@@ -29,6 +29,9 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
29
29
|
# For MSB mode, we need to know the bit width of the buffer
|
|
30
30
|
# Ruby integers are arbitrary precision, so we use 32 bits as standard
|
|
31
31
|
@bitbuf_width = 32
|
|
32
|
+
|
|
33
|
+
# Cache ENV lookups once at initialization
|
|
34
|
+
@debug_bitstream = ENV.fetch("DEBUG_BITSTREAM", nil)
|
|
32
35
|
end
|
|
33
36
|
|
|
34
37
|
# Read specified number of bits from the stream
|
|
@@ -83,7 +86,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
83
86
|
byte = 0 if byte.nil?
|
|
84
87
|
|
|
85
88
|
# DEBUG
|
|
86
|
-
if
|
|
89
|
+
if @debug_bitstream
|
|
87
90
|
warn "DEBUG LSB read_byte: buffer_pos=#{@buffer_pos} byte=#{byte} (#{byte.to_s(2).rjust(
|
|
88
91
|
8, '0'
|
|
89
92
|
)}) bits_left=#{@bits_left}"
|
|
@@ -101,7 +104,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
101
104
|
@bits_left -= num_bits
|
|
102
105
|
|
|
103
106
|
# DEBUG
|
|
104
|
-
warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if
|
|
107
|
+
warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
105
108
|
|
|
106
109
|
result
|
|
107
110
|
end
|
|
@@ -116,7 +119,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
116
119
|
word = read_msb_word
|
|
117
120
|
|
|
118
121
|
# DEBUG
|
|
119
|
-
warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if
|
|
122
|
+
warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
120
123
|
|
|
121
124
|
# INJECT_BITS (MSB): inject at the left side
|
|
122
125
|
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
@@ -131,7 +134,7 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
131
134
|
@bits_left -= num_bits
|
|
132
135
|
|
|
133
136
|
# DEBUG
|
|
134
|
-
warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if
|
|
137
|
+
warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if @debug_bitstream
|
|
135
138
|
|
|
136
139
|
result
|
|
137
140
|
end
|
|
@@ -172,15 +175,62 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
|
172
175
|
byte
|
|
173
176
|
end
|
|
174
177
|
|
|
178
|
+
# Ensure at least num_bits are available in the bit buffer.
|
|
179
|
+
# Reads from input if needed. Used for alignment operations.
|
|
180
|
+
#
|
|
181
|
+
# @param num_bits [Integer] Minimum number of bits required
|
|
182
|
+
# @return [void]
|
|
183
|
+
def ensure_bits(num_bits)
|
|
184
|
+
if @bit_order == :msb
|
|
185
|
+
while @bits_left < num_bits
|
|
186
|
+
word = read_msb_word
|
|
187
|
+
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
188
|
+
@bits_left += 16
|
|
189
|
+
end
|
|
190
|
+
else
|
|
191
|
+
while @bits_left < num_bits
|
|
192
|
+
byte = read_byte
|
|
193
|
+
byte = 0 if byte.nil?
|
|
194
|
+
@bit_buffer |= (byte << @bits_left)
|
|
195
|
+
@bits_left += 8
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
175
200
|
# Align to the next byte boundary
|
|
176
201
|
#
|
|
177
202
|
# @return [void]
|
|
178
203
|
def byte_align
|
|
179
204
|
discard_bits = @bits_left % 8
|
|
180
|
-
@
|
|
205
|
+
if @bit_order == :msb
|
|
206
|
+
# MSB mode: valid bits are at the left (high) end, shift left to discard
|
|
207
|
+
@bit_buffer = (@bit_buffer << discard_bits) & ((1 << @bitbuf_width) - 1)
|
|
208
|
+
else
|
|
209
|
+
@bit_buffer >>= discard_bits
|
|
210
|
+
end
|
|
181
211
|
@bits_left -= discard_bits
|
|
182
212
|
end
|
|
183
213
|
|
|
214
|
+
# Flush the bit buffer entirely (discard all remaining bits).
|
|
215
|
+
# Per libmspack lzxd.c: used when transitioning to raw byte reading
|
|
216
|
+
# for uncompressed blocks. Sets bits_left=0 and bit_buffer=0.
|
|
217
|
+
#
|
|
218
|
+
# @return [void]
|
|
219
|
+
def flush_bit_buffer
|
|
220
|
+
@bit_buffer = 0
|
|
221
|
+
@bits_left = 0
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Read a raw byte directly from the input, bypassing the bit buffer.
|
|
225
|
+
# Per libmspack lzxd.c: uncompressed block headers and data are read
|
|
226
|
+
# directly from the input pointer (i_ptr), not through the bitstream.
|
|
227
|
+
# Call flush_bit_buffer first to discard any residual bits.
|
|
228
|
+
#
|
|
229
|
+
# @return [Integer] Byte value (0 on EOF)
|
|
230
|
+
def read_raw_byte
|
|
231
|
+
read_byte || 0
|
|
232
|
+
end
|
|
233
|
+
|
|
184
234
|
# Peek at bits without consuming them
|
|
185
235
|
#
|
|
186
236
|
# @param num_bits [Integer] Number of bits to peek at
|
|
@@ -157,8 +157,11 @@ module Cabriolet
|
|
|
157
157
|
offset = cab_offset + 4
|
|
158
158
|
end
|
|
159
159
|
else
|
|
160
|
-
# No cabinet found in this chunk, move to next
|
|
161
|
-
|
|
160
|
+
# No cabinet found in this chunk, move to next.
|
|
161
|
+
# Overlap by 20 bytes so MSCF signatures spanning chunk
|
|
162
|
+
# boundaries are not missed (state machine reads 20 bytes).
|
|
163
|
+
overlap = length > 20 ? 20 : 0
|
|
164
|
+
offset += [length - overlap, 1].max
|
|
162
165
|
end
|
|
163
166
|
end
|
|
164
167
|
|
|
@@ -452,7 +455,7 @@ file_length)
|
|
|
452
455
|
cablen_u32, caboff, file_length)
|
|
453
456
|
|
|
454
457
|
# Not valid, restart search after "MSCF"
|
|
455
|
-
|
|
458
|
+
state = 0
|
|
456
459
|
end
|
|
457
460
|
end
|
|
458
461
|
|
|
@@ -22,6 +22,9 @@ module Cabriolet
|
|
|
22
22
|
@current_decomp = nil
|
|
23
23
|
@current_input = nil
|
|
24
24
|
@current_offset = 0
|
|
25
|
+
|
|
26
|
+
# Cache ENV lookups once at initialization
|
|
27
|
+
@debug_block = ENV.fetch("DEBUG_BLOCK", nil)
|
|
25
28
|
end
|
|
26
29
|
|
|
27
30
|
# Extract a single file from the cabinet
|
|
@@ -34,25 +37,7 @@ module Cabriolet
|
|
|
34
37
|
def extract_file(file, output_path, **options)
|
|
35
38
|
salvage = options[:salvage] || @decompressor.salvage
|
|
36
39
|
folder = file.folder
|
|
37
|
-
|
|
38
|
-
# Validate file
|
|
39
|
-
raise Cabriolet::ArgumentError, "File has no folder" unless folder
|
|
40
|
-
|
|
41
|
-
if file.offset > Constants::LENGTH_MAX
|
|
42
|
-
raise DecompressionError,
|
|
43
|
-
"File offset beyond 2GB limit"
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Check file length
|
|
47
|
-
filelen = file.length
|
|
48
|
-
if filelen > (Constants::LENGTH_MAX - file.offset)
|
|
49
|
-
unless salvage
|
|
50
|
-
raise DecompressionError,
|
|
51
|
-
"File length exceeds 2GB limit"
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
filelen = Constants::LENGTH_MAX - file.offset
|
|
55
|
-
end
|
|
40
|
+
filelen = validate_file_for_extraction(file, folder, salvage)
|
|
56
41
|
|
|
57
42
|
# Check for merge requirements
|
|
58
43
|
if folder.needs_prev_merge?
|
|
@@ -60,81 +45,22 @@ module Cabriolet
|
|
|
60
45
|
"File requires previous cabinet, cabinet set is incomplete"
|
|
61
46
|
end
|
|
62
47
|
|
|
63
|
-
|
|
64
|
-
unless salvage
|
|
65
|
-
max_len = folder.num_blocks * Constants::BLOCK_MAX
|
|
66
|
-
if file.offset > max_len || filelen > (max_len - file.offset)
|
|
67
|
-
raise DecompressionError, "File extends beyond folder data"
|
|
68
|
-
end
|
|
69
|
-
end
|
|
48
|
+
validate_file_in_folder(folder, file.offset, filelen, salvage)
|
|
70
49
|
|
|
71
50
|
# Create output directory if needed
|
|
72
51
|
output_dir = ::File.dirname(output_path)
|
|
73
52
|
FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
|
|
74
53
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
warn "DEBUG extract_file: Checking reset condition for file #{file.filename} (offset=#{file.offset}, length=#{file.length})"
|
|
78
|
-
warn " @current_folder == folder: #{@current_folder == folder} (current=#{@current_folder.object_id}, new=#{folder.object_id})"
|
|
79
|
-
warn " @current_offset (#{@current_offset}) > file.offset (#{file.offset}): #{@current_offset > file.offset}"
|
|
80
|
-
warn " @current_decomp.nil?: #{@current_decomp.nil?}"
|
|
81
|
-
warn " Reset needed?: #{@current_folder != folder || @current_offset > file.offset || !@current_decomp}"
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
if @current_folder != folder || @current_offset > file.offset || !@current_decomp
|
|
85
|
-
if ENV["DEBUG_BLOCK"]
|
|
86
|
-
warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# Reset state
|
|
90
|
-
@current_input&.close
|
|
91
|
-
@current_input = nil
|
|
92
|
-
@current_decomp = nil
|
|
93
|
-
|
|
94
|
-
# Create new input (libmspack lines 1092-1095)
|
|
95
|
-
# This BlockReader will be REUSED across all files in this folder
|
|
96
|
-
@current_input = BlockReader.new(@io_system, folder.data,
|
|
97
|
-
folder.num_blocks, salvage)
|
|
98
|
-
@current_folder = folder
|
|
99
|
-
@current_offset = 0
|
|
100
|
-
|
|
101
|
-
# Create decompressor ONCE and reuse it (this is the key fix!)
|
|
102
|
-
# The decompressor maintains bitstream state across files
|
|
103
|
-
@current_decomp = @decompressor.create_decompressor(folder,
|
|
104
|
-
@current_input, nil)
|
|
105
|
-
elsif ENV["DEBUG_BLOCK"]
|
|
106
|
-
warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader and decompressor)"
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Skip ahead if needed (libmspack lines 1130-1134)
|
|
110
|
-
if file.offset > @current_offset
|
|
111
|
-
skip_bytes = file.offset - @current_offset
|
|
112
|
-
|
|
113
|
-
# Decompress with NULL output to skip (libmspack line 1130: self->d->outfh = NULL)
|
|
114
|
-
null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
115
|
-
|
|
116
|
-
# Reuse existing decompressor, change output to NULL
|
|
117
|
-
@current_decomp.instance_variable_set(:@output, null_output)
|
|
118
|
-
|
|
119
|
-
# Set output length for LZX frame limiting
|
|
120
|
-
@current_decomp.set_output_length(skip_bytes) if @current_decomp.respond_to?(:set_output_length)
|
|
121
|
-
|
|
122
|
-
@current_decomp.decompress(skip_bytes)
|
|
123
|
-
@current_offset += skip_bytes
|
|
124
|
-
end
|
|
54
|
+
setup_decompressor_for_folder(folder, salvage, file.offset)
|
|
55
|
+
skip_to_file_offset(file.offset, salvage, file.filename)
|
|
125
56
|
|
|
126
57
|
# Extract actual file (libmspack lines 1137-1141)
|
|
127
58
|
output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
|
|
128
59
|
|
|
129
60
|
begin
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# Set output length for LZX frame limiting
|
|
134
|
-
@current_decomp.set_output_length(filelen) if @current_decomp.respond_to?(:set_output_length)
|
|
135
|
-
|
|
136
|
-
@current_decomp.decompress(filelen)
|
|
137
|
-
@current_offset += filelen
|
|
61
|
+
write_file_data(output_fh, filelen)
|
|
62
|
+
rescue DecompressionError
|
|
63
|
+
handle_extraction_error(output_fh, output_path, file.filename, salvage, filelen)
|
|
138
64
|
ensure
|
|
139
65
|
output_fh.close
|
|
140
66
|
end
|
|
@@ -142,6 +68,15 @@ module Cabriolet
|
|
|
142
68
|
filelen
|
|
143
69
|
end
|
|
144
70
|
|
|
71
|
+
# Reset extraction state (used in salvage mode to recover from errors)
|
|
72
|
+
def reset_state
|
|
73
|
+
@current_input&.close
|
|
74
|
+
@current_input = nil
|
|
75
|
+
@current_decomp = nil
|
|
76
|
+
@current_folder = nil
|
|
77
|
+
@current_offset = 0
|
|
78
|
+
end
|
|
79
|
+
|
|
145
80
|
# Extract all files from a cabinet
|
|
146
81
|
#
|
|
147
82
|
# @param cabinet [Models::Cabinet] Cabinet to extract from
|
|
@@ -150,16 +85,19 @@ module Cabriolet
|
|
|
150
85
|
# @option options [Boolean] :preserve_paths Preserve directory structure (default: true)
|
|
151
86
|
# @option options [Boolean] :set_timestamps Set file modification times (default: true)
|
|
152
87
|
# @option options [Proc] :progress Progress callback
|
|
88
|
+
# @option options [Boolean] :salvage Skip files that fail to extract (default: false)
|
|
153
89
|
# @return [Integer] Number of files extracted
|
|
154
90
|
def extract_all(cabinet, output_dir, **options)
|
|
155
91
|
preserve_paths = options.fetch(:preserve_paths, true)
|
|
156
92
|
set_timestamps = options.fetch(:set_timestamps, true)
|
|
157
93
|
progress = options[:progress]
|
|
94
|
+
salvage = options[:salvage] || false
|
|
158
95
|
|
|
159
96
|
# Create output directory
|
|
160
97
|
FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
|
|
161
98
|
|
|
162
99
|
count = 0
|
|
100
|
+
failed_count = 0
|
|
163
101
|
cabinet.files.each do |file|
|
|
164
102
|
# Determine output path
|
|
165
103
|
output_path = if preserve_paths
|
|
@@ -169,8 +107,18 @@ module Cabriolet
|
|
|
169
107
|
::File.basename(file.filename))
|
|
170
108
|
end
|
|
171
109
|
|
|
172
|
-
# Extract file
|
|
173
|
-
|
|
110
|
+
# Extract file (skip if salvage mode and extraction fails)
|
|
111
|
+
begin
|
|
112
|
+
extract_file(file, output_path, **options)
|
|
113
|
+
rescue DecompressionError => e
|
|
114
|
+
if salvage
|
|
115
|
+
warn "Salvage: skipping #{file.filename}: #{e.message}"
|
|
116
|
+
failed_count += 1
|
|
117
|
+
next
|
|
118
|
+
else
|
|
119
|
+
raise
|
|
120
|
+
end
|
|
121
|
+
end
|
|
174
122
|
|
|
175
123
|
# Set timestamp if requested
|
|
176
124
|
if set_timestamps && file.modification_time
|
|
@@ -185,11 +133,163 @@ module Cabriolet
|
|
|
185
133
|
progress&.call(file, count, cabinet.files.size)
|
|
186
134
|
end
|
|
187
135
|
|
|
136
|
+
warn "Salvage: #{failed_count} file(s) skipped due to extraction errors" if failed_count.positive?
|
|
137
|
+
|
|
188
138
|
count
|
|
189
139
|
end
|
|
190
140
|
|
|
191
141
|
private
|
|
192
142
|
|
|
143
|
+
# Validate file for extraction
|
|
144
|
+
#
|
|
145
|
+
# @param file [Models::File] File to validate
|
|
146
|
+
# @param folder [Models::Folder] Folder containing the file
|
|
147
|
+
# @param salvage [Boolean] Salvage mode flag
|
|
148
|
+
# @return [Integer] Validated file length
|
|
149
|
+
def validate_file_for_extraction(file, folder, salvage)
|
|
150
|
+
raise Cabriolet::ArgumentError, "File has no folder" unless folder
|
|
151
|
+
|
|
152
|
+
if file.offset > Constants::LENGTH_MAX
|
|
153
|
+
raise DecompressionError,
|
|
154
|
+
"File offset beyond 2GB limit"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
filelen = file.length
|
|
158
|
+
if filelen > (Constants::LENGTH_MAX - file.offset)
|
|
159
|
+
unless salvage
|
|
160
|
+
raise DecompressionError,
|
|
161
|
+
"File length exceeds 2GB limit"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
filelen = Constants::LENGTH_MAX - file.offset
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
filelen
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Validate file fits within folder
|
|
171
|
+
#
|
|
172
|
+
# @param folder [Models::Folder] Folder to check
|
|
173
|
+
# @param file_offset [Integer] File offset
|
|
174
|
+
# @param filelen [Integer] File length
|
|
175
|
+
# @param salvage [Boolean] Salvage mode flag
|
|
176
|
+
def validate_file_in_folder(folder, file_offset, filelen, salvage)
|
|
177
|
+
return if salvage
|
|
178
|
+
|
|
179
|
+
max_len = folder.num_blocks * Constants::BLOCK_MAX
|
|
180
|
+
return unless file_offset > max_len || filelen > (max_len - file_offset)
|
|
181
|
+
|
|
182
|
+
raise DecompressionError, "File extends beyond folder data"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Setup decompressor for folder
|
|
186
|
+
#
|
|
187
|
+
# @param folder [Models::Folder] Folder to setup for
|
|
188
|
+
# @param salvage [Boolean] Salvage mode flag
|
|
189
|
+
# @param file_offset [Integer] File offset for reset condition check
|
|
190
|
+
def setup_decompressor_for_folder(folder, salvage, file_offset)
|
|
191
|
+
if @debug_block
|
|
192
|
+
warn "DEBUG extract_file: Checking reset condition"
|
|
193
|
+
warn " @current_folder == folder: #{@current_folder == folder}"
|
|
194
|
+
warn " @current_offset (#{@current_offset}) > file_offset (#{file_offset})"
|
|
195
|
+
warn " @current_decomp.nil?: #{@current_decomp.nil?}"
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
if @current_folder != folder || @current_offset > file_offset || !@current_decomp
|
|
199
|
+
if @debug_block
|
|
200
|
+
warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Reset state
|
|
204
|
+
@current_input&.close
|
|
205
|
+
@current_input = nil
|
|
206
|
+
@current_decomp = nil
|
|
207
|
+
|
|
208
|
+
# Create new input (libmspack lines 1092-1095)
|
|
209
|
+
@current_input = BlockReader.new(@io_system, folder.data,
|
|
210
|
+
folder.num_blocks, salvage)
|
|
211
|
+
@current_folder = folder
|
|
212
|
+
@current_offset = 0
|
|
213
|
+
|
|
214
|
+
# Create decompressor ONCE and reuse it
|
|
215
|
+
@current_decomp = @decompressor.create_decompressor(folder,
|
|
216
|
+
@current_input, nil)
|
|
217
|
+
|
|
218
|
+
# Per libmspack cabd.c: set output_length from the folder's total
|
|
219
|
+
# uncompressed size (max file.offset + file.length across all files
|
|
220
|
+
# in the folder). This allows the LZX decompressor to reduce the
|
|
221
|
+
# last frame's size so it doesn't read past the end of the stream.
|
|
222
|
+
if @current_decomp.respond_to?(:set_output_length)
|
|
223
|
+
cab = folder.data&.cabinet
|
|
224
|
+
if cab&.files
|
|
225
|
+
folder_files = cab.files.select { |f| f.folder == folder }
|
|
226
|
+
max_end = folder_files.map { |f| f.offset + f.length }.max
|
|
227
|
+
@current_decomp.set_output_length(max_end) if max_end&.positive?
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
elsif @debug_block
|
|
231
|
+
warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader)"
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Skip to file offset
|
|
236
|
+
#
|
|
237
|
+
# @param file_offset [Integer] Target offset
|
|
238
|
+
# @param salvage [Boolean] Salvage mode flag
|
|
239
|
+
# @param filename [String] Filename for error messages
|
|
240
|
+
def skip_to_file_offset(file_offset, salvage, filename)
|
|
241
|
+
return unless file_offset > @current_offset
|
|
242
|
+
|
|
243
|
+
skip_bytes = file_offset - @current_offset
|
|
244
|
+
null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
245
|
+
|
|
246
|
+
@current_decomp.instance_variable_set(:@output, null_output)
|
|
247
|
+
|
|
248
|
+
begin
|
|
249
|
+
@current_decomp.decompress(skip_bytes)
|
|
250
|
+
rescue DecompressionError
|
|
251
|
+
if salvage
|
|
252
|
+
warn "Salvage: unable to skip to file #{filename}, resetting state"
|
|
253
|
+
reset_state
|
|
254
|
+
else
|
|
255
|
+
raise
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
@current_offset += skip_bytes
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Write file data using decompressor
|
|
262
|
+
#
|
|
263
|
+
# @param output_fh [System::FileHandle] Output file handle
|
|
264
|
+
# @param filelen [Integer] Number of bytes to write
|
|
265
|
+
def write_file_data(output_fh, filelen)
|
|
266
|
+
unless @current_decomp
|
|
267
|
+
raise DecompressionError, "Decompressor not available (state was reset)"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
@current_decomp.instance_variable_set(:@output, output_fh)
|
|
271
|
+
@current_decomp.decompress(filelen)
|
|
272
|
+
@current_offset += filelen
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Handle extraction error
|
|
276
|
+
#
|
|
277
|
+
# @param output_fh [System::FileHandle] Output file handle
|
|
278
|
+
# @param output_path [String] Output file path
|
|
279
|
+
# @param filename [String] Filename for error messages
|
|
280
|
+
# @param salvage [Boolean] Salvage mode flag
|
|
281
|
+
# @raise [DecompressionError] If not in salvage mode
|
|
282
|
+
def handle_extraction_error(output_fh, output_path, filename, salvage, _filelen)
|
|
283
|
+
output_fh.close
|
|
284
|
+
if salvage
|
|
285
|
+
::File.write(output_path, "", mode: "wb")
|
|
286
|
+
warn "Salvage: created empty file for #{filename} due to decompression error"
|
|
287
|
+
reset_state
|
|
288
|
+
else
|
|
289
|
+
raise
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
193
293
|
# Set file attributes based on CAB attributes
|
|
194
294
|
#
|
|
195
295
|
# @param path [String] File path
|
|
@@ -229,6 +329,9 @@ module Cabriolet
|
|
|
229
329
|
@buffer_pos = 0
|
|
230
330
|
@cab_handle = nil
|
|
231
331
|
|
|
332
|
+
# Cache ENV lookups once at initialization
|
|
333
|
+
@debug_block = ENV.fetch("DEBUG_BLOCK", nil)
|
|
334
|
+
|
|
232
335
|
# Open first cabinet and seek to data offset
|
|
233
336
|
open_current_cabinet
|
|
234
337
|
end
|
|
@@ -236,7 +339,7 @@ module Cabriolet
|
|
|
236
339
|
def read(bytes)
|
|
237
340
|
# Early return if we've already exhausted all blocks and buffer
|
|
238
341
|
if @current_block >= @num_blocks && @buffer_pos >= @buffer.bytesize
|
|
239
|
-
if
|
|
342
|
+
if @debug_block
|
|
240
343
|
warn "DEBUG BlockReader.read(#{bytes}): Already exhausted, returning empty"
|
|
241
344
|
end
|
|
242
345
|
return +""
|
|
@@ -244,14 +347,14 @@ module Cabriolet
|
|
|
244
347
|
|
|
245
348
|
result = +""
|
|
246
349
|
|
|
247
|
-
if
|
|
350
|
+
if @debug_block
|
|
248
351
|
warn "DEBUG BlockReader.read(#{bytes}): buffer_size=#{@buffer.bytesize} buffer_pos=#{@buffer_pos} block=#{@current_block}/#{@num_blocks}"
|
|
249
352
|
end
|
|
250
353
|
|
|
251
354
|
while result.bytesize < bytes
|
|
252
355
|
# Read more data if buffer is empty
|
|
253
356
|
if (@buffer_pos >= @buffer.bytesize) && !read_next_block
|
|
254
|
-
if
|
|
357
|
+
if @debug_block
|
|
255
358
|
warn "DEBUG BlockReader.read: EXHAUSTED at result.bytesize=#{result.bytesize} (wanted #{bytes})"
|
|
256
359
|
end
|
|
257
360
|
break
|
|
@@ -265,7 +368,7 @@ module Cabriolet
|
|
|
265
368
|
@buffer_pos += to_copy
|
|
266
369
|
end
|
|
267
370
|
|
|
268
|
-
if
|
|
371
|
+
if @debug_block
|
|
269
372
|
warn "DEBUG BlockReader.read: returning #{result.bytesize} bytes"
|
|
270
373
|
end
|
|
271
374
|
|
|
@@ -289,12 +392,12 @@ module Cabriolet
|
|
|
289
392
|
private
|
|
290
393
|
|
|
291
394
|
def read_next_block
|
|
292
|
-
if
|
|
395
|
+
if @debug_block
|
|
293
396
|
warn "DEBUG read_next_block: current_block=#{@current_block} num_blocks=#{@num_blocks}"
|
|
294
397
|
end
|
|
295
398
|
|
|
296
399
|
if @current_block >= @num_blocks
|
|
297
|
-
if
|
|
400
|
+
if @debug_block
|
|
298
401
|
warn "DEBUG read_next_block: EXHAUSTED (current_block >= num_blocks)"
|
|
299
402
|
end
|
|
300
403
|
return false
|
|
@@ -305,19 +408,19 @@ module Cabriolet
|
|
|
305
408
|
|
|
306
409
|
loop do
|
|
307
410
|
# Read CFDATA header
|
|
308
|
-
if
|
|
411
|
+
if @debug_block
|
|
309
412
|
handle_pos = @cab_handle.tell
|
|
310
413
|
warn "DEBUG read_next_block: About to read CFDATA header at position #{handle_pos}"
|
|
311
414
|
end
|
|
312
415
|
|
|
313
416
|
header_data = @cab_handle.read(Constants::CFDATA_SIZE)
|
|
314
417
|
|
|
315
|
-
if
|
|
418
|
+
if @debug_block
|
|
316
419
|
warn "DEBUG read_next_block: Read #{header_data.bytesize} bytes (expected #{Constants::CFDATA_SIZE})"
|
|
317
420
|
end
|
|
318
421
|
|
|
319
422
|
if header_data.bytesize != Constants::CFDATA_SIZE
|
|
320
|
-
if
|
|
423
|
+
if @debug_block
|
|
321
424
|
warn "DEBUG read_next_block: FAILED - header read returned #{header_data.bytesize} bytes"
|
|
322
425
|
end
|
|
323
426
|
return false
|
|
@@ -345,18 +448,18 @@ module Cabriolet
|
|
|
345
448
|
end
|
|
346
449
|
|
|
347
450
|
# Read compressed data
|
|
348
|
-
if
|
|
451
|
+
if @debug_block
|
|
349
452
|
warn "DEBUG read_next_block: About to read #{cfdata.compressed_size} bytes of compressed data"
|
|
350
453
|
end
|
|
351
454
|
|
|
352
455
|
compressed_data = @cab_handle.read(cfdata.compressed_size)
|
|
353
456
|
|
|
354
|
-
if
|
|
457
|
+
if @debug_block
|
|
355
458
|
warn "DEBUG read_next_block: Read #{compressed_data.bytesize} bytes of compressed data (expected #{cfdata.compressed_size})"
|
|
356
459
|
end
|
|
357
460
|
|
|
358
461
|
if compressed_data.bytesize != cfdata.compressed_size
|
|
359
|
-
if
|
|
462
|
+
if @debug_block
|
|
360
463
|
warn "DEBUG read_next_block: FAILED - compressed data read returned #{compressed_data.bytesize} bytes"
|
|
361
464
|
end
|
|
362
465
|
return false
|
|
@@ -400,7 +503,7 @@ module Cabriolet
|
|
|
400
503
|
end
|
|
401
504
|
|
|
402
505
|
def open_current_cabinet
|
|
403
|
-
if
|
|
506
|
+
if @debug_block
|
|
404
507
|
warn "DEBUG open_current_cabinet: filename=#{@current_data.cabinet.filename} offset=#{@current_data.offset}"
|
|
405
508
|
end
|
|
406
509
|
|
|
@@ -408,7 +511,7 @@ module Cabriolet
|
|
|
408
511
|
@cab_handle = @io_system.open(@current_data.cabinet.filename, Constants::MODE_READ)
|
|
409
512
|
@cab_handle.seek(@current_data.offset, Constants::SEEK_START)
|
|
410
513
|
|
|
411
|
-
if
|
|
514
|
+
if @debug_block
|
|
412
515
|
actual_pos = @cab_handle.tell
|
|
413
516
|
warn "DEBUG open_current_cabinet: seeked to position #{actual_pos} (expected #{@current_data.offset})"
|
|
414
517
|
end
|
data/lib/cabriolet/checksum.rb
CHANGED
|
@@ -28,14 +28,17 @@ module Cabriolet
|
|
|
28
28
|
ul = 0
|
|
29
29
|
offset = bytes.size - remainder
|
|
30
30
|
|
|
31
|
+
# Match libmspack's cabd_checksum remainder handling:
|
|
32
|
+
# The C fall-through switch processes bytes in decreasing shift
|
|
33
|
+
# order (first remaining byte gets the highest shift).
|
|
31
34
|
case remainder
|
|
32
35
|
when 3
|
|
33
|
-
ul |= bytes[offset
|
|
36
|
+
ul |= bytes[offset] << 16
|
|
34
37
|
ul |= bytes[offset + 1] << 8
|
|
35
|
-
ul |= bytes[offset]
|
|
38
|
+
ul |= bytes[offset + 2]
|
|
36
39
|
when 2
|
|
37
|
-
ul |= bytes[offset
|
|
38
|
-
ul |= bytes[offset]
|
|
40
|
+
ul |= bytes[offset] << 8
|
|
41
|
+
ul |= bytes[offset + 1]
|
|
39
42
|
when 1
|
|
40
43
|
ul |= bytes[offset]
|
|
41
44
|
end
|