cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module Cabriolet
|
|
6
|
+
module CAB
|
|
7
|
+
# Extractor handles the extraction of files from cabinets
|
|
8
|
+
class Extractor
|
|
9
|
+
attr_reader :io_system, :decompressor
|
|
10
|
+
|
|
11
|
+
# Initialize a new extractor
|
|
12
|
+
#
|
|
13
|
+
# @param io_system [System::IOSystem] I/O system
|
|
14
|
+
# @param decompressor [CAB::Decompressor] Parent decompressor
|
|
15
|
+
def initialize(io_system, decompressor)
|
|
16
|
+
@io_system = io_system
|
|
17
|
+
@decompressor = decompressor
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Extract a single file from the cabinet
|
|
21
|
+
#
|
|
22
|
+
# @param file [Models::File] File to extract
|
|
23
|
+
# @param output_path [String] Where to write the file
|
|
24
|
+
# @param options [Hash] Extraction options
|
|
25
|
+
# @option options [Boolean] :salvage Enable salvage mode
|
|
26
|
+
# @return [Integer] Number of bytes extracted
|
|
27
|
+
def extract_file(file, output_path, **options)
|
|
28
|
+
salvage = options[:salvage] || @decompressor.salvage
|
|
29
|
+
folder = file.folder
|
|
30
|
+
|
|
31
|
+
# Validate file
|
|
32
|
+
raise Cabriolet::ArgumentError, "File has no folder" unless folder
|
|
33
|
+
|
|
34
|
+
if file.offset > Constants::LENGTH_MAX
|
|
35
|
+
raise DecompressionError,
|
|
36
|
+
"File offset beyond 2GB limit"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Check file length
|
|
40
|
+
filelen = file.length
|
|
41
|
+
if filelen > (Constants::LENGTH_MAX - file.offset)
|
|
42
|
+
unless salvage
|
|
43
|
+
raise DecompressionError,
|
|
44
|
+
"File length exceeds 2GB limit"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
filelen = Constants::LENGTH_MAX - file.offset
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Check for merge requirements
|
|
52
|
+
if folder.needs_prev_merge?
|
|
53
|
+
raise DecompressionError,
|
|
54
|
+
"File requires previous cabinet, cabinet set is incomplete"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Check file fits within folder
|
|
58
|
+
unless salvage
|
|
59
|
+
max_len = folder.num_blocks * Constants::BLOCK_MAX
|
|
60
|
+
if file.offset > max_len || filelen > (max_len - file.offset)
|
|
61
|
+
raise DecompressionError, "File extends beyond folder data"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Create output directory if needed
|
|
66
|
+
output_dir = ::File.dirname(output_path)
|
|
67
|
+
FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
|
|
68
|
+
|
|
69
|
+
# Create input wrapper that reads CFDATA blocks across cabinets
|
|
70
|
+
input_handle = BlockReader.new(@io_system, folder.data,
|
|
71
|
+
folder.num_blocks, salvage)
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
# Create output file
|
|
75
|
+
output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
|
|
76
|
+
|
|
77
|
+
begin
|
|
78
|
+
# Create decompressor
|
|
79
|
+
decomp = @decompressor.create_decompressor(folder, input_handle,
|
|
80
|
+
output_fh)
|
|
81
|
+
|
|
82
|
+
# Skip to file offset if needed
|
|
83
|
+
if file.offset.positive?
|
|
84
|
+
# Decompress and discard bytes before file start
|
|
85
|
+
temp_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
86
|
+
temp_decomp = @decompressor.create_decompressor(folder,
|
|
87
|
+
input_handle, temp_output)
|
|
88
|
+
temp_decomp.decompress(file.offset)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Decompress the file
|
|
92
|
+
decomp.decompress(filelen)
|
|
93
|
+
|
|
94
|
+
filelen
|
|
95
|
+
ensure
|
|
96
|
+
output_fh.close
|
|
97
|
+
end
|
|
98
|
+
ensure
|
|
99
|
+
input_handle.close
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Extract all files from a cabinet
|
|
104
|
+
#
|
|
105
|
+
# @param cabinet [Models::Cabinet] Cabinet to extract from
|
|
106
|
+
# @param output_dir [String] Directory to extract to
|
|
107
|
+
# @param options [Hash] Extraction options
|
|
108
|
+
# @option options [Boolean] :preserve_paths Preserve directory structure (default: true)
|
|
109
|
+
# @option options [Boolean] :set_timestamps Set file modification times (default: true)
|
|
110
|
+
# @option options [Proc] :progress Progress callback
|
|
111
|
+
# @return [Integer] Number of files extracted
|
|
112
|
+
def extract_all(cabinet, output_dir, **options)
|
|
113
|
+
preserve_paths = options.fetch(:preserve_paths, true)
|
|
114
|
+
set_timestamps = options.fetch(:set_timestamps, true)
|
|
115
|
+
progress = options[:progress]
|
|
116
|
+
|
|
117
|
+
# Create output directory
|
|
118
|
+
FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
|
|
119
|
+
|
|
120
|
+
count = 0
|
|
121
|
+
cabinet.files.each do |file|
|
|
122
|
+
# Determine output path
|
|
123
|
+
output_path = if preserve_paths
|
|
124
|
+
::File.join(output_dir, file.filename)
|
|
125
|
+
else
|
|
126
|
+
::File.join(output_dir,
|
|
127
|
+
::File.basename(file.filename))
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Extract file
|
|
131
|
+
extract_file(file, output_path, **options)
|
|
132
|
+
|
|
133
|
+
# Set timestamp if requested
|
|
134
|
+
if set_timestamps && file.modification_time
|
|
135
|
+
::File.utime(file.modification_time, file.modification_time,
|
|
136
|
+
output_path)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Set file permissions based on attributes
|
|
140
|
+
set_file_attributes(output_path, file)
|
|
141
|
+
|
|
142
|
+
count += 1
|
|
143
|
+
progress&.call(file, count, cabinet.files.size)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
count
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
private
|
|
150
|
+
|
|
151
|
+
# Set file attributes based on CAB attributes
|
|
152
|
+
#
|
|
153
|
+
# @param path [String] File path
|
|
154
|
+
# @param file [Models::File] CAB file
|
|
155
|
+
def set_file_attributes(path, file)
|
|
156
|
+
# On Unix systems, set read-only if appropriate
|
|
157
|
+
return unless ::File.exist?(path)
|
|
158
|
+
|
|
159
|
+
if file.readonly?
|
|
160
|
+
# Make file read-only
|
|
161
|
+
::File.chmod(0o444, path)
|
|
162
|
+
elsif file.executable?
|
|
163
|
+
# Make file executable
|
|
164
|
+
::File.chmod(0o755, path)
|
|
165
|
+
else
|
|
166
|
+
# Default permissions
|
|
167
|
+
::File.chmod(0o644, path)
|
|
168
|
+
end
|
|
169
|
+
rescue StandardError
|
|
170
|
+
# Ignore errors setting attributes
|
|
171
|
+
nil
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# BlockReader wraps cabinet file handles and reads CFDATA blocks
|
|
175
|
+
# Handles multi-part cabinets by following the FolderData chain
|
|
176
|
+
class BlockReader
|
|
177
|
+
attr_reader :io_system, :current_data, :num_blocks, :salvage,
|
|
178
|
+
:current_block
|
|
179
|
+
|
|
180
|
+
def initialize(io_system, folder_data, num_blocks, salvage)
|
|
181
|
+
@io_system = io_system
|
|
182
|
+
@current_data = folder_data
|
|
183
|
+
@num_blocks = num_blocks
|
|
184
|
+
@salvage = salvage
|
|
185
|
+
@current_block = 0
|
|
186
|
+
@buffer = ""
|
|
187
|
+
@buffer_pos = 0
|
|
188
|
+
@cab_handle = nil
|
|
189
|
+
|
|
190
|
+
# Open first cabinet and seek to data offset
|
|
191
|
+
open_current_cabinet
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def read(bytes)
|
|
195
|
+
result = +""
|
|
196
|
+
|
|
197
|
+
while result.bytesize < bytes
|
|
198
|
+
# Read more data if buffer is empty
|
|
199
|
+
break if (@buffer_pos >= @buffer.bytesize) && !read_next_block
|
|
200
|
+
|
|
201
|
+
# Copy from buffer
|
|
202
|
+
available = @buffer.bytesize - @buffer_pos
|
|
203
|
+
to_copy = [available, bytes - result.bytesize].min
|
|
204
|
+
|
|
205
|
+
result << @buffer[@buffer_pos, to_copy]
|
|
206
|
+
@buffer_pos += to_copy
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
result
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def seek(_offset, _whence)
|
|
213
|
+
# Not implemented for block reader
|
|
214
|
+
0
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def tell
|
|
218
|
+
0
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def close
|
|
222
|
+
@cab_handle&.close
|
|
223
|
+
@cab_handle = nil
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
private
|
|
227
|
+
|
|
228
|
+
def read_next_block
|
|
229
|
+
return false if @current_block >= @num_blocks
|
|
230
|
+
|
|
231
|
+
# Read blocks, potentially spanning multiple cabinets
|
|
232
|
+
accumulated_data = +""
|
|
233
|
+
|
|
234
|
+
loop do
|
|
235
|
+
# Read CFDATA header
|
|
236
|
+
header_data = @cab_handle.read(Constants::CFDATA_SIZE)
|
|
237
|
+
return false if header_data.bytesize != Constants::CFDATA_SIZE
|
|
238
|
+
|
|
239
|
+
cfdata = Binary::CFData.read(header_data)
|
|
240
|
+
|
|
241
|
+
# Skip reserved block data if present
|
|
242
|
+
if @current_data.cabinet.block_resv.positive?
|
|
243
|
+
@cab_handle.seek(@current_data.cabinet.block_resv, Constants::SEEK_CUR)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Validate block sizes
|
|
247
|
+
unless @salvage
|
|
248
|
+
total_size = accumulated_data.bytesize + cfdata.compressed_size
|
|
249
|
+
if total_size > Constants::INPUT_MAX
|
|
250
|
+
raise DecompressionError,
|
|
251
|
+
"Compressed block size exceeds maximum"
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
if cfdata.uncompressed_size > Constants::BLOCK_MAX
|
|
255
|
+
raise DecompressionError,
|
|
256
|
+
"Uncompressed block size exceeds maximum"
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Read compressed data
|
|
261
|
+
compressed_data = @cab_handle.read(cfdata.compressed_size)
|
|
262
|
+
return false if compressed_data.bytesize != cfdata.compressed_size
|
|
263
|
+
|
|
264
|
+
# Verify checksum if present and not in salvage mode
|
|
265
|
+
if cfdata.checksum.positive? && !@salvage
|
|
266
|
+
# Calculate checksum of data
|
|
267
|
+
data_cksum = calculate_checksum(compressed_data)
|
|
268
|
+
# Calculate checksum of header fields (4 bytes starting at offset 4)
|
|
269
|
+
header_cksum = calculate_checksum(header_data[4, 4], data_cksum)
|
|
270
|
+
|
|
271
|
+
if header_cksum != cfdata.checksum
|
|
272
|
+
raise ChecksumError,
|
|
273
|
+
"Block checksum mismatch"
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Accumulate data
|
|
278
|
+
accumulated_data << compressed_data
|
|
279
|
+
|
|
280
|
+
# If uncompressed_size is 0, this block continues in the next cabinet
|
|
281
|
+
break unless cfdata.uncompressed_size.zero?
|
|
282
|
+
|
|
283
|
+
# Move to next cabinet in the chain
|
|
284
|
+
unless advance_to_next_cabinet
|
|
285
|
+
raise DecompressionError,
|
|
286
|
+
"Block continues but no next cabinet available"
|
|
287
|
+
end
|
|
288
|
+
# Continue reading the next part of the block
|
|
289
|
+
|
|
290
|
+
# This is the final part of the block
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Store in buffer
|
|
294
|
+
@buffer = accumulated_data
|
|
295
|
+
@buffer_pos = 0
|
|
296
|
+
@current_block += 1
|
|
297
|
+
|
|
298
|
+
true
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def open_current_cabinet
|
|
302
|
+
@cab_handle&.close
|
|
303
|
+
@cab_handle = @io_system.open(@current_data.cabinet.filename, Constants::MODE_READ)
|
|
304
|
+
@cab_handle.seek(@current_data.offset, Constants::SEEK_START)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def advance_to_next_cabinet
|
|
308
|
+
# Move to next data segment
|
|
309
|
+
@current_data = @current_data.next_data
|
|
310
|
+
return false unless @current_data
|
|
311
|
+
|
|
312
|
+
# Open new cabinet file
|
|
313
|
+
open_current_cabinet
|
|
314
|
+
true
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def calculate_checksum(data, initial = 0)
|
|
318
|
+
cksum = initial
|
|
319
|
+
bytes = data.bytes
|
|
320
|
+
|
|
321
|
+
# Process 4-byte chunks
|
|
322
|
+
(bytes.size / 4).times do |i|
|
|
323
|
+
offset = i * 4
|
|
324
|
+
value = bytes[offset] |
|
|
325
|
+
(bytes[offset + 1] << 8) |
|
|
326
|
+
(bytes[offset + 2] << 16) |
|
|
327
|
+
(bytes[offset + 3] << 24)
|
|
328
|
+
cksum ^= value
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Process remaining bytes
|
|
332
|
+
remainder = bytes.size % 4
|
|
333
|
+
if remainder.positive?
|
|
334
|
+
ul = 0
|
|
335
|
+
offset = bytes.size - remainder
|
|
336
|
+
|
|
337
|
+
case remainder
|
|
338
|
+
when 3
|
|
339
|
+
ul |= bytes[offset + 2] << 16
|
|
340
|
+
ul |= bytes[offset + 1] << 8
|
|
341
|
+
ul |= bytes[offset]
|
|
342
|
+
when 2
|
|
343
|
+
ul |= bytes[offset + 1] << 8
|
|
344
|
+
ul |= bytes[offset]
|
|
345
|
+
when 1
|
|
346
|
+
ul |= bytes[offset]
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
cksum ^= ul
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
cksum & 0xFFFFFFFF
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module CAB
|
|
5
|
+
# Parser reads and parses CAB file headers
|
|
6
|
+
class Parser
|
|
7
|
+
attr_reader :io_system
|
|
8
|
+
|
|
9
|
+
# Initialize a new parser
|
|
10
|
+
#
|
|
11
|
+
# @param io_system [System::IOSystem] I/O system for reading
|
|
12
|
+
def initialize(io_system)
|
|
13
|
+
@io_system = io_system
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Parse a CAB file and return a Cabinet model
|
|
17
|
+
#
|
|
18
|
+
# @param filename [String] Path to the CAB file
|
|
19
|
+
# @return [Models::Cabinet] Parsed cabinet
|
|
20
|
+
# @raise [ParseError] if the file is not a valid CAB
|
|
21
|
+
def parse(filename)
|
|
22
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
23
|
+
cabinet = parse_handle(handle, filename)
|
|
24
|
+
@io_system.close(handle)
|
|
25
|
+
cabinet
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Parse a CAB from an already-open handle
|
|
29
|
+
#
|
|
30
|
+
# @param handle [System::FileHandle, System::MemoryHandle] Open handle
|
|
31
|
+
# @param filename [String] Filename for reference
|
|
32
|
+
# @param offset [Integer] Offset in file where cabinet starts
|
|
33
|
+
# @param salvage [Boolean] Enable salvage mode for corrupted files
|
|
34
|
+
# @param quiet [Boolean] Suppress error messages
|
|
35
|
+
# @return [Models::Cabinet] Parsed cabinet
|
|
36
|
+
# @raise [ParseError] if not a valid CAB
|
|
37
|
+
def parse_handle(handle, filename, offset = 0, salvage = false,
|
|
38
|
+
quiet = false)
|
|
39
|
+
@salvage = salvage
|
|
40
|
+
@quiet = quiet
|
|
41
|
+
|
|
42
|
+
cabinet = Models::Cabinet.new(filename)
|
|
43
|
+
cabinet.base_offset = offset
|
|
44
|
+
|
|
45
|
+
# Seek to cabinet start
|
|
46
|
+
@io_system.seek(handle, offset, Constants::SEEK_START)
|
|
47
|
+
|
|
48
|
+
# Read and validate header
|
|
49
|
+
header, folder_resv = read_header(handle, cabinet)
|
|
50
|
+
validate_header(header)
|
|
51
|
+
populate_cabinet_from_header(handle, cabinet, header)
|
|
52
|
+
|
|
53
|
+
# Read folders
|
|
54
|
+
read_folders(handle, cabinet, header, folder_resv)
|
|
55
|
+
|
|
56
|
+
# Read files
|
|
57
|
+
read_files(handle, cabinet, header, salvage)
|
|
58
|
+
|
|
59
|
+
cabinet
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def read_header(handle, cabinet)
|
|
65
|
+
header_data = @io_system.read(handle, Constants::CFHEADER_SIZE)
|
|
66
|
+
raise ParseError, "Cannot read CAB header" if header_data.bytesize < Constants::CFHEADER_SIZE
|
|
67
|
+
|
|
68
|
+
header = Binary::CFHeader.read(header_data)
|
|
69
|
+
|
|
70
|
+
folder_resv = 0
|
|
71
|
+
|
|
72
|
+
# Read reserved header if present
|
|
73
|
+
if header.flags.anybits?(Constants::FLAG_RESERVE_PRESENT)
|
|
74
|
+
resv_data = @io_system.read(handle, Constants::CFHEADER_EXT_SIZE)
|
|
75
|
+
if resv_data.bytesize < Constants::CFHEADER_EXT_SIZE
|
|
76
|
+
raise ParseError,
|
|
77
|
+
"Cannot read reserved header"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Parse reserved sizes
|
|
81
|
+
header_resv = resv_data.unpack1("v") # uint16 header_reserved
|
|
82
|
+
folder_resv = resv_data[2].ord # uint8 folder_reserved
|
|
83
|
+
data_resv = resv_data[3].ord # uint8 data_reserved
|
|
84
|
+
|
|
85
|
+
# Store reserved data size in cabinet
|
|
86
|
+
cabinet.set_blocks_info(0, data_resv)
|
|
87
|
+
|
|
88
|
+
# Skip reserved header data
|
|
89
|
+
if header_resv.positive?
|
|
90
|
+
@io_system.seek(handle, header_resv,
|
|
91
|
+
Constants::SEEK_CUR)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
[header, folder_resv]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def validate_header(header)
|
|
99
|
+
unless header.signature == "MSCF"
|
|
100
|
+
raise ParseError,
|
|
101
|
+
"Invalid CAB signature"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
if !(header.major_version == 1 && header.minor_version == 3) && !@quiet
|
|
105
|
+
@io_system.message(nil, "WARNING; cabinet version is not 1.3")
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
if header.num_folders.zero?
|
|
109
|
+
@io_system.message(nil, "no folders in cabinet.") unless @quiet
|
|
110
|
+
raise ParseError, "No folders in cabinet"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
return unless header.num_files.zero?
|
|
114
|
+
|
|
115
|
+
@io_system.message(nil, "no files in cabinet.") unless @quiet
|
|
116
|
+
raise ParseError, "No files in cabinet"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def populate_cabinet_from_header(handle, cabinet, header)
|
|
120
|
+
cabinet.length = header.cabinet_size
|
|
121
|
+
cabinet.set_id = header.set_id
|
|
122
|
+
cabinet.set_index = header.cabinet_index
|
|
123
|
+
cabinet.flags = header.flags
|
|
124
|
+
|
|
125
|
+
# Read previous cabinet metadata if present
|
|
126
|
+
if header.flags.anybits?(Constants::FLAG_PREV_CABINET)
|
|
127
|
+
cabinet.prevname = read_string(handle, false)
|
|
128
|
+
cabinet.previnfo = read_string(handle, true)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Read next cabinet metadata if present
|
|
132
|
+
return unless header.flags.anybits?(Constants::FLAG_NEXT_CABINET)
|
|
133
|
+
|
|
134
|
+
cabinet.nextname = read_string(handle, false)
|
|
135
|
+
cabinet.nextinfo = read_string(handle, true)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def read_folders(handle, cabinet, header, folder_resv)
|
|
139
|
+
header.num_folders.times do
|
|
140
|
+
# Read folder structure
|
|
141
|
+
folder_data = @io_system.read(handle, Constants::CFFOLDER_SIZE)
|
|
142
|
+
if folder_data.bytesize < Constants::CFFOLDER_SIZE
|
|
143
|
+
raise ParseError,
|
|
144
|
+
"Cannot read folder entry"
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
cf_folder = Binary::CFFolder.read(folder_data)
|
|
148
|
+
|
|
149
|
+
# Skip folder reserved space if present
|
|
150
|
+
if folder_resv.positive?
|
|
151
|
+
@io_system.seek(handle, folder_resv,
|
|
152
|
+
Constants::SEEK_CUR)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Create folder model with cabinet and offset
|
|
156
|
+
data_offset = cabinet.base_offset + cf_folder.data_offset
|
|
157
|
+
folder = Models::Folder.new(cabinet, data_offset)
|
|
158
|
+
folder.comp_type = cf_folder.comp_type
|
|
159
|
+
folder.num_blocks = cf_folder.num_blocks
|
|
160
|
+
|
|
161
|
+
# Add to cabinet
|
|
162
|
+
cabinet.folders << folder
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def read_files(handle, cabinet, header, salvage = false)
|
|
167
|
+
header.num_files.times do
|
|
168
|
+
# Read file structure
|
|
169
|
+
file_data = @io_system.read(handle, Constants::CFFILE_SIZE)
|
|
170
|
+
raise ParseError, "Cannot read file entry" if file_data.bytesize < Constants::CFFILE_SIZE
|
|
171
|
+
|
|
172
|
+
cf_file = Binary::CFFile.read(file_data)
|
|
173
|
+
|
|
174
|
+
# Create file model
|
|
175
|
+
file = Models::File.new
|
|
176
|
+
file.length = cf_file.uncompressed_size
|
|
177
|
+
file.offset = cf_file.folder_offset
|
|
178
|
+
file.folder_index = cf_file.folder_index
|
|
179
|
+
file.attribs = cf_file.attribs
|
|
180
|
+
|
|
181
|
+
# Parse date and time
|
|
182
|
+
file.parse_datetime(cf_file.date, cf_file.time)
|
|
183
|
+
|
|
184
|
+
# Read filename
|
|
185
|
+
begin
|
|
186
|
+
file.filename = read_string(handle, false)
|
|
187
|
+
rescue ParseError
|
|
188
|
+
# In salvage mode, skip bad files
|
|
189
|
+
next if salvage
|
|
190
|
+
|
|
191
|
+
raise
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Link file to folder
|
|
195
|
+
begin
|
|
196
|
+
link_file_to_folder(file, cabinet, cf_file.folder_index,
|
|
197
|
+
header.num_folders)
|
|
198
|
+
rescue ParseError
|
|
199
|
+
# In salvage mode, skip files with bad folder indices
|
|
200
|
+
next if salvage
|
|
201
|
+
|
|
202
|
+
raise
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Skip if folder linkage failed in salvage mode
|
|
206
|
+
next if file.folder.nil? && salvage
|
|
207
|
+
|
|
208
|
+
# Add to cabinet
|
|
209
|
+
cabinet.files << file
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Ensure we got at least some files
|
|
213
|
+
return unless cabinet.files.empty?
|
|
214
|
+
|
|
215
|
+
raise ParseError, "No valid files found in cabinet"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def link_file_to_folder(file, cabinet, folder_index, num_folders)
|
|
219
|
+
if folder_index < Constants::FOLDER_CONTINUED_FROM_PREV
|
|
220
|
+
# Normal folder index
|
|
221
|
+
unless folder_index < num_folders
|
|
222
|
+
raise ParseError,
|
|
223
|
+
"Invalid folder index: #{folder_index}"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
file.folder = cabinet.folders[folder_index]
|
|
227
|
+
|
|
228
|
+
elsif [Constants::FOLDER_CONTINUED_TO_NEXT, Constants::FOLDER_CONTINUED_PREV_AND_NEXT].include?(folder_index)
|
|
229
|
+
# File continues to next cabinet - use last folder
|
|
230
|
+
file.folder = cabinet.folders.last
|
|
231
|
+
elsif folder_index == Constants::FOLDER_CONTINUED_FROM_PREV
|
|
232
|
+
# File continues from previous cabinet - use first folder
|
|
233
|
+
file.folder = cabinet.folders.first
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def read_string(handle, permit_empty)
|
|
238
|
+
# Save current position before reading
|
|
239
|
+
base_pos = @io_system.tell(handle)
|
|
240
|
+
|
|
241
|
+
# Read up to 256 bytes to find null terminator
|
|
242
|
+
buffer = @io_system.read(handle, 256)
|
|
243
|
+
raise ParseError, "Cannot read string" if buffer.nil? || buffer.empty?
|
|
244
|
+
|
|
245
|
+
# Find null terminator
|
|
246
|
+
null_pos = buffer.index("\x00")
|
|
247
|
+
raise ParseError, "String not null-terminated" if null_pos.nil?
|
|
248
|
+
|
|
249
|
+
if null_pos.zero? && !permit_empty
|
|
250
|
+
raise ParseError,
|
|
251
|
+
"Empty string not permitted"
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Extract string (without null terminator)
|
|
255
|
+
string = buffer[0...null_pos]
|
|
256
|
+
|
|
257
|
+
# Seek to position after null terminator (base_pos + null_pos + 1)
|
|
258
|
+
@io_system.seek(handle, base_pos + null_pos + 1, Constants::SEEK_START)
|
|
259
|
+
|
|
260
|
+
string
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|