cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Decompressors
|
|
5
|
+
# Quantum handles Quantum-compressed data using arithmetic coding
|
|
6
|
+
# Based on libmspack qtmd.c implementation
|
|
7
|
+
#
|
|
8
|
+
# The Quantum method was created by David Stafford, adapted by Microsoft
|
|
9
|
+
# Corporation.
|
|
10
|
+
class Quantum < Base
|
|
11
|
+
# Frame size (32KB per frame)
|
|
12
|
+
FRAME_SIZE = 32_768
|
|
13
|
+
|
|
14
|
+
# Match constants
|
|
15
|
+
MAX_MATCH = 1028
|
|
16
|
+
|
|
17
|
+
# Position slot tables (same as in qtmd.c)
|
|
18
|
+
POSITION_BASE = [
|
|
19
|
+
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
20
|
+
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
21
|
+
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
22
|
+
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
EXTRA_BITS = [
|
|
26
|
+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
27
|
+
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
28
|
+
17, 17, 18, 18, 19, 19
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
LENGTH_BASE = [
|
|
32
|
+
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
33
|
+
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
LENGTH_EXTRA = [
|
|
37
|
+
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
38
|
+
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
attr_reader :window_bits, :window_size
|
|
42
|
+
|
|
43
|
+
# Represents a symbol in an arithmetic coding model
|
|
44
|
+
class ModelSymbol
|
|
45
|
+
attr_accessor :sym, :cumfreq
|
|
46
|
+
|
|
47
|
+
def initialize(sym, cumfreq)
|
|
48
|
+
@sym = sym
|
|
49
|
+
@cumfreq = cumfreq
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Represents an arithmetic coding model
|
|
54
|
+
class Model
|
|
55
|
+
attr_accessor :shiftsleft, :entries, :syms
|
|
56
|
+
|
|
57
|
+
def initialize(syms, entries)
|
|
58
|
+
@syms = syms
|
|
59
|
+
@entries = entries
|
|
60
|
+
@shiftsleft = 4
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Initialize Quantum decompressor
|
|
65
|
+
#
|
|
66
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
67
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
68
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
69
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
70
|
+
# @param window_bits [Integer] Window size parameter (10-21)
|
|
71
|
+
def initialize(io_system, input, output, buffer_size, window_bits: 10)
|
|
72
|
+
super(io_system, input, output, buffer_size)
|
|
73
|
+
|
|
74
|
+
# Validate window_bits
|
|
75
|
+
unless (10..21).cover?(window_bits)
|
|
76
|
+
raise ArgumentError,
|
|
77
|
+
"Quantum window_bits must be 10-21, got #{window_bits}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
@window_bits = window_bits
|
|
81
|
+
@window_size = 1 << window_bits
|
|
82
|
+
|
|
83
|
+
# Initialize window
|
|
84
|
+
@window = "\0" * @window_size
|
|
85
|
+
@window_posn = 0
|
|
86
|
+
@frame_todo = FRAME_SIZE
|
|
87
|
+
|
|
88
|
+
# Arithmetic coding state
|
|
89
|
+
@h = 0xFFFF
|
|
90
|
+
@l = 0
|
|
91
|
+
@c = 0
|
|
92
|
+
@header_read = false
|
|
93
|
+
|
|
94
|
+
# Initialize bitstream for MSB-first reading
|
|
95
|
+
@bitstream = MSBBitstream.new(io_system, input, buffer_size)
|
|
96
|
+
|
|
97
|
+
# Initialize models
|
|
98
|
+
initialize_models
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Decompress Quantum data
|
|
102
|
+
#
|
|
103
|
+
# @param bytes [Integer] Number of bytes to decompress
|
|
104
|
+
# @return [Integer] Number of bytes decompressed
|
|
105
|
+
def decompress(bytes)
|
|
106
|
+
return 0 if bytes <= 0
|
|
107
|
+
|
|
108
|
+
output_data = String.new(capacity: bytes)
|
|
109
|
+
bytes_todo = bytes
|
|
110
|
+
|
|
111
|
+
while bytes_todo.positive?
|
|
112
|
+
# Read header if needed (initializes C register)
|
|
113
|
+
read_frame_header unless @header_read
|
|
114
|
+
|
|
115
|
+
# Calculate how much to decode this iteration
|
|
116
|
+
frame_end = @window_posn + [bytes_todo, @frame_todo,
|
|
117
|
+
@window_size - @window_posn].min
|
|
118
|
+
|
|
119
|
+
# Decode symbols
|
|
120
|
+
while @window_posn < frame_end
|
|
121
|
+
selector = decode_symbol(@model7)
|
|
122
|
+
|
|
123
|
+
if selector < 4
|
|
124
|
+
# Literal byte from one of 4 models
|
|
125
|
+
model = case selector
|
|
126
|
+
when 0 then @model0
|
|
127
|
+
when 1 then @model1
|
|
128
|
+
when 2 then @model2
|
|
129
|
+
else @model3
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
sym = decode_symbol(model)
|
|
133
|
+
@window.setbyte(@window_posn, sym)
|
|
134
|
+
@window_posn += 1
|
|
135
|
+
@frame_todo -= 1
|
|
136
|
+
else
|
|
137
|
+
# Match
|
|
138
|
+
match_offset, match_length = decode_match(selector)
|
|
139
|
+
|
|
140
|
+
# Validate match doesn't exceed frame or window
|
|
141
|
+
if @window_posn + match_length > @window_size
|
|
142
|
+
raise DecompressionError,
|
|
143
|
+
"Match exceeds window boundary"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
@frame_todo -= match_length
|
|
147
|
+
|
|
148
|
+
# Copy match
|
|
149
|
+
copy_match(match_offset, match_length)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Extract decoded bytes for output
|
|
154
|
+
output_amount = [@window_posn, bytes_todo].min
|
|
155
|
+
output_data << @window[0, output_amount]
|
|
156
|
+
bytes_todo -= output_amount
|
|
157
|
+
|
|
158
|
+
# Handle frame completion
|
|
159
|
+
if @frame_todo.zero?
|
|
160
|
+
# Re-align to byte boundary
|
|
161
|
+
@bitstream.byte_align
|
|
162
|
+
|
|
163
|
+
# Skip trailer bytes until 0xFF
|
|
164
|
+
loop do
|
|
165
|
+
byte = @bitstream.read_bits(8)
|
|
166
|
+
break if byte == 0xFF
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
@header_read = false
|
|
170
|
+
@frame_todo = FRAME_SIZE
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Handle window wrap
|
|
174
|
+
@window_posn = 0 if @window_posn == @window_size
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Write output
|
|
178
|
+
io_system.write(output, output_data)
|
|
179
|
+
bytes
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
private
|
|
183
|
+
|
|
184
|
+
# MSB-first bitstream for Quantum (reads 16-bit words MSB first)
|
|
185
|
+
class MSBBitstream
|
|
186
|
+
attr_reader :bits_left
|
|
187
|
+
|
|
188
|
+
def initialize(io_system, handle, buffer_size)
|
|
189
|
+
@io_system = io_system
|
|
190
|
+
@handle = handle
|
|
191
|
+
@buffer_size = buffer_size
|
|
192
|
+
@buffer = ""
|
|
193
|
+
@buffer_pos = 0
|
|
194
|
+
@bit_buffer = 0
|
|
195
|
+
@bits_left = 0
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Read bits MSB first (matching Quantum's READ_BITS macro)
|
|
199
|
+
def read_bits(num_bits)
|
|
200
|
+
while @bits_left < num_bits
|
|
201
|
+
# Read 16-bit word MSB first
|
|
202
|
+
b0 = read_byte
|
|
203
|
+
b1 = read_byte
|
|
204
|
+
word = (b0 << 8) | b1
|
|
205
|
+
@bit_buffer = (@bit_buffer << 16) | word
|
|
206
|
+
@bits_left += 16
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Extract bits from MSB side
|
|
210
|
+
@bits_left -= num_bits
|
|
211
|
+
(@bit_buffer >> @bits_left) & ((1 << num_bits) - 1)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def read_byte
|
|
215
|
+
if @buffer_pos >= @buffer.bytesize
|
|
216
|
+
@buffer = @io_system.read(@handle, @buffer_size)
|
|
217
|
+
@buffer_pos = 0
|
|
218
|
+
return 0 if @buffer.empty?
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
byte = @buffer.getbyte(@buffer_pos)
|
|
222
|
+
@buffer_pos += 1
|
|
223
|
+
byte
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def byte_align
|
|
227
|
+
@bits_left -= (@bits_left % 8)
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Initialize all 7 arithmetic coding models
|
|
232
|
+
def initialize_models
|
|
233
|
+
# Models depend on window size
|
|
234
|
+
i = @window_bits * 2
|
|
235
|
+
|
|
236
|
+
# Four literal models (64 symbols each)
|
|
237
|
+
@m0sym = init_model_syms(0, 64)
|
|
238
|
+
@model0 = Model.new(@m0sym, 64)
|
|
239
|
+
|
|
240
|
+
@m1sym = init_model_syms(64, 64)
|
|
241
|
+
@model1 = Model.new(@m1sym, 64)
|
|
242
|
+
|
|
243
|
+
@m2sym = init_model_syms(128, 64)
|
|
244
|
+
@model2 = Model.new(@m2sym, 64)
|
|
245
|
+
|
|
246
|
+
@m3sym = init_model_syms(192, 64)
|
|
247
|
+
@model3 = Model.new(@m3sym, 64)
|
|
248
|
+
|
|
249
|
+
# Three match models (size depends on window)
|
|
250
|
+
@m4sym = init_model_syms(0, [i, 24].min)
|
|
251
|
+
@model4 = Model.new(@m4sym, [i, 24].min)
|
|
252
|
+
|
|
253
|
+
@m5sym = init_model_syms(0, [i, 36].min)
|
|
254
|
+
@model5 = Model.new(@m5sym, [i, 36].min)
|
|
255
|
+
|
|
256
|
+
@m6sym = init_model_syms(0, i)
|
|
257
|
+
@model6 = Model.new(@m6sym, i)
|
|
258
|
+
|
|
259
|
+
# Match length model
|
|
260
|
+
@m6lsym = init_model_syms(0, 27)
|
|
261
|
+
@model6len = Model.new(@m6lsym, 27)
|
|
262
|
+
|
|
263
|
+
# Selector model (7 symbols: 0-3 literals, 4-6 matches)
|
|
264
|
+
@m7sym = init_model_syms(0, 7)
|
|
265
|
+
@model7 = Model.new(@m7sym, 7)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Initialize model symbol array
|
|
269
|
+
def init_model_syms(start, len)
|
|
270
|
+
Array.new(len + 1) do |i|
|
|
271
|
+
ModelSymbol.new(start + i, len - i)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Read frame header (initialize C register)
|
|
276
|
+
def read_frame_header
|
|
277
|
+
@h = 0xFFFF
|
|
278
|
+
@l = 0
|
|
279
|
+
@c = @bitstream.read_bits(16)
|
|
280
|
+
@header_read = true
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Decode a symbol using arithmetic coding
|
|
284
|
+
# This implements the GET_SYMBOL macro from qtmd.c
|
|
285
|
+
def decode_symbol(model)
|
|
286
|
+
# Calculate range
|
|
287
|
+
range = ((@h - @l) & 0xFFFF) + 1
|
|
288
|
+
symf = ((((@c - @l + 1) * model.syms[0].cumfreq) - 1) / range) & 0xFFFF
|
|
289
|
+
|
|
290
|
+
# Find symbol
|
|
291
|
+
i = 1
|
|
292
|
+
while i < model.entries
|
|
293
|
+
break if model.syms[i].cumfreq <= symf
|
|
294
|
+
|
|
295
|
+
i += 1
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
sym = model.syms[i - 1].sym
|
|
299
|
+
|
|
300
|
+
# Update range
|
|
301
|
+
range = (@h - @l) + 1
|
|
302
|
+
symf = model.syms[0].cumfreq
|
|
303
|
+
@h = @l + ((model.syms[i - 1].cumfreq * range) / symf) - 1
|
|
304
|
+
@l += ((model.syms[i].cumfreq * range) / symf)
|
|
305
|
+
|
|
306
|
+
# Update model frequencies
|
|
307
|
+
j = i - 1
|
|
308
|
+
while j >= 0
|
|
309
|
+
model.syms[j].cumfreq += 8
|
|
310
|
+
j -= 1
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Check if model needs updating
|
|
314
|
+
update_model(model) if model.syms[0].cumfreq > 3800
|
|
315
|
+
|
|
316
|
+
# Normalize range
|
|
317
|
+
normalize_range
|
|
318
|
+
|
|
319
|
+
sym
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Normalize arithmetic coding range
|
|
323
|
+
def normalize_range
|
|
324
|
+
loop do
|
|
325
|
+
if (@l & 0x8000) != (@h & 0x8000)
|
|
326
|
+
# Underflow case
|
|
327
|
+
break unless @l.anybits?(0x4000) && @h.nobits?(0x4000)
|
|
328
|
+
|
|
329
|
+
@c ^= 0x4000
|
|
330
|
+
@l &= 0x3FFF
|
|
331
|
+
@h |= 0x4000
|
|
332
|
+
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
@l = (@l << 1) & 0xFFFF
|
|
336
|
+
@h = ((@h << 1) | 1) & 0xFFFF
|
|
337
|
+
bit = @bitstream.read_bits(1)
|
|
338
|
+
@c = ((@c << 1) | bit) & 0xFFFF
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# Update model statistics (from qtmd_update_model)
|
|
343
|
+
def update_model(model)
|
|
344
|
+
model.shiftsleft -= 1
|
|
345
|
+
|
|
346
|
+
if model.shiftsleft.positive?
|
|
347
|
+
# Simple shift
|
|
348
|
+
(model.entries - 1).downto(0) do |i|
|
|
349
|
+
model.syms[i].cumfreq >>= 1
|
|
350
|
+
model.syms[i].cumfreq = model.syms[i + 1].cumfreq + 1 if model.syms[i].cumfreq <= model.syms[i + 1].cumfreq
|
|
351
|
+
end
|
|
352
|
+
else
|
|
353
|
+
# Full rebuild
|
|
354
|
+
model.shiftsleft = 50
|
|
355
|
+
|
|
356
|
+
# Convert cumfreq to frequencies
|
|
357
|
+
(0...model.entries).each do |i|
|
|
358
|
+
model.syms[i].cumfreq -= model.syms[i + 1].cumfreq
|
|
359
|
+
model.syms[i].cumfreq += 1
|
|
360
|
+
model.syms[i].cumfreq >>= 1
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Sort by frequency (selection sort for stability)
|
|
364
|
+
(0...(model.entries - 1)).each do |i|
|
|
365
|
+
((i + 1)...model.entries).each do |j|
|
|
366
|
+
if model.syms[i].cumfreq < model.syms[j].cumfreq
|
|
367
|
+
model.syms[i], model.syms[j] = model.syms[j], model.syms[i]
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Convert back to cumulative frequencies
|
|
373
|
+
(model.entries - 1).downto(0) do |i|
|
|
374
|
+
model.syms[i].cumfreq += model.syms[i + 1].cumfreq
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# Decode match offset and length
|
|
380
|
+
def decode_match(selector)
|
|
381
|
+
case selector
|
|
382
|
+
when 4
|
|
383
|
+
# Fixed length match (3 bytes)
|
|
384
|
+
sym = decode_symbol(@model4)
|
|
385
|
+
extra = @bitstream.read_bits(EXTRA_BITS[sym]) if EXTRA_BITS[sym].positive?
|
|
386
|
+
match_offset = POSITION_BASE[sym] + (extra || 0) + 1
|
|
387
|
+
match_length = 3
|
|
388
|
+
when 5
|
|
389
|
+
# Fixed length match (4 bytes)
|
|
390
|
+
sym = decode_symbol(@model5)
|
|
391
|
+
extra = @bitstream.read_bits(EXTRA_BITS[sym]) if EXTRA_BITS[sym].positive?
|
|
392
|
+
match_offset = POSITION_BASE[sym] + (extra || 0) + 1
|
|
393
|
+
match_length = 4
|
|
394
|
+
when 6
|
|
395
|
+
# Variable length match
|
|
396
|
+
sym = decode_symbol(@model6len)
|
|
397
|
+
extra = @bitstream.read_bits(LENGTH_EXTRA[sym]) if LENGTH_EXTRA[sym].positive?
|
|
398
|
+
match_length = LENGTH_BASE[sym] + (extra || 0) + 5
|
|
399
|
+
|
|
400
|
+
sym = decode_symbol(@model6)
|
|
401
|
+
extra = @bitstream.read_bits(EXTRA_BITS[sym]) if EXTRA_BITS[sym].positive?
|
|
402
|
+
match_offset = POSITION_BASE[sym] + (extra || 0) + 1
|
|
403
|
+
else
|
|
404
|
+
raise DecompressionError, "Invalid selector: #{selector}"
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
[match_offset, match_length]
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Copy match from window
|
|
411
|
+
def copy_match(offset, length)
|
|
412
|
+
if offset > @window_posn
|
|
413
|
+
# Match wraps around window
|
|
414
|
+
if offset > @window_size
|
|
415
|
+
raise DecompressionError,
|
|
416
|
+
"Match offset beyond window"
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
# Copy from end of window
|
|
420
|
+
src_pos = @window_size - (offset - @window_posn)
|
|
421
|
+
copy_len = offset - @window_posn
|
|
422
|
+
|
|
423
|
+
if copy_len < length
|
|
424
|
+
# Copy from end, then from beginning
|
|
425
|
+
copy_len.times do
|
|
426
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
427
|
+
@window_posn += 1
|
|
428
|
+
src_pos += 1
|
|
429
|
+
end
|
|
430
|
+
src_pos = 0
|
|
431
|
+
(length - copy_len).times do
|
|
432
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
433
|
+
@window_posn += 1
|
|
434
|
+
src_pos += 1
|
|
435
|
+
end
|
|
436
|
+
else
|
|
437
|
+
# Copy entirely from end
|
|
438
|
+
length.times do
|
|
439
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
440
|
+
@window_posn += 1
|
|
441
|
+
src_pos += 1
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
else
|
|
445
|
+
# Normal copy
|
|
446
|
+
src_pos = @window_posn - offset
|
|
447
|
+
length.times do
|
|
448
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
449
|
+
@window_posn += 1
|
|
450
|
+
src_pos += 1
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
# Base error class for all Cabriolet errors
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when there's an I/O error
|
|
8
|
+
class IOError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when parsing a CAB file fails
|
|
11
|
+
class ParseError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised during decompression
|
|
14
|
+
class DecompressionError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised during compression
|
|
17
|
+
class CompressionError < Error; end
|
|
18
|
+
|
|
19
|
+
# Raised when a checksum doesn't match
|
|
20
|
+
class ChecksumError < Error; end
|
|
21
|
+
|
|
22
|
+
# Raised when an unsupported format is encountered
|
|
23
|
+
class UnsupportedFormatError < Error; end
|
|
24
|
+
|
|
25
|
+
# Raised when invalid arguments are provided
|
|
26
|
+
class ArgumentError < ::ArgumentError; end
|
|
27
|
+
|
|
28
|
+
# Raised when file signature doesn't match expected format
|
|
29
|
+
class SignatureError < Error; end
|
|
30
|
+
|
|
31
|
+
# Raised when file format is invalid or corrupted
|
|
32
|
+
class FormatError < Error; end
|
|
33
|
+
|
|
34
|
+
# Raised when read operation fails
|
|
35
|
+
class ReadError < IOError; end
|
|
36
|
+
|
|
37
|
+
# Raised when seek operation fails
|
|
38
|
+
class SeekError < IOError; end
|
|
39
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
# Detects archive format based on magic bytes and file structure
|
|
5
|
+
class FormatDetector
|
|
6
|
+
# Magic byte signatures for supported formats
|
|
7
|
+
MAGIC_SIGNATURES = {
|
|
8
|
+
"MSCF" => :cab,
|
|
9
|
+
"ITSF" => :chm,
|
|
10
|
+
"\x3F\x5F" => :hlp, # ?_
|
|
11
|
+
"\x4C\x4E" => :hlp, # LN (alternative HLP signature)
|
|
12
|
+
"KWAJ" => :kwaj,
|
|
13
|
+
"SZDD" => :szdd,
|
|
14
|
+
"\x88\xF0\x27\x00" => :szdd, # Alternative SZDD signature
|
|
15
|
+
"ITOLITLS" => :lit,
|
|
16
|
+
"\x00\x00\x00\x00" => :oab, # OAB has null header start
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
# File extension to format mapping (fallback)
|
|
20
|
+
EXTENSION_MAP = {
|
|
21
|
+
".cab" => :cab,
|
|
22
|
+
".chm" => :chm,
|
|
23
|
+
".hlp" => :hlp,
|
|
24
|
+
".kwj" => :kwaj,
|
|
25
|
+
".kwaj" => :kwaj,
|
|
26
|
+
".lit" => :lit,
|
|
27
|
+
".oab" => :oab,
|
|
28
|
+
".szdd" => :szdd,
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
class << self
|
|
32
|
+
# Detect format from file path
|
|
33
|
+
#
|
|
34
|
+
# @param path [String] Path to the archive file
|
|
35
|
+
# @return [Symbol, nil] Detected format or nil if unknown
|
|
36
|
+
def detect(path)
|
|
37
|
+
return nil unless File.exist?(path)
|
|
38
|
+
|
|
39
|
+
# Try magic byte detection first
|
|
40
|
+
format = detect_by_magic_bytes(path)
|
|
41
|
+
return format if format
|
|
42
|
+
|
|
43
|
+
# Fallback to extension-based detection
|
|
44
|
+
detect_by_extension(path)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Detect format from IO stream
|
|
48
|
+
#
|
|
49
|
+
# @param io [IO] IO object to read from
|
|
50
|
+
# @return [Symbol, nil] Detected format or nil if unknown
|
|
51
|
+
def detect_from_io(io)
|
|
52
|
+
original_pos = io.pos
|
|
53
|
+
|
|
54
|
+
# Read first 16 bytes for magic byte checking
|
|
55
|
+
magic_bytes = io.read(16)
|
|
56
|
+
io.seek(original_pos) if original_pos
|
|
57
|
+
|
|
58
|
+
return nil unless magic_bytes && magic_bytes.size >= 4
|
|
59
|
+
|
|
60
|
+
detect_magic_bytes(magic_bytes)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Detect format and return appropriate parser class
|
|
64
|
+
#
|
|
65
|
+
# @param path [String] Path to the archive file
|
|
66
|
+
# @return [Class, nil] Parser class or nil if unknown format
|
|
67
|
+
def parser_for(path)
|
|
68
|
+
format = detect(path)
|
|
69
|
+
format_to_parser(format) if format
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Convert format symbol to parser class
|
|
73
|
+
#
|
|
74
|
+
# @param format [Symbol] Format symbol
|
|
75
|
+
# @return [Class, nil] Parser class
|
|
76
|
+
def format_to_parser(format)
|
|
77
|
+
case format
|
|
78
|
+
when :cab
|
|
79
|
+
Cabriolet::CAB::Parser
|
|
80
|
+
when :chm
|
|
81
|
+
Cabriolet::CHM::Parser
|
|
82
|
+
when :hlp
|
|
83
|
+
Cabriolet::HLP::Parser
|
|
84
|
+
when :kwaj
|
|
85
|
+
Cabriolet::KWAJ::Parser
|
|
86
|
+
when :szdd
|
|
87
|
+
Cabriolet::SZDD::Parser
|
|
88
|
+
when :lit
|
|
89
|
+
# LIT parser to be implemented
|
|
90
|
+
nil
|
|
91
|
+
when :oab
|
|
92
|
+
# OAB parser to be implemented
|
|
93
|
+
nil
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def detect_by_magic_bytes(path)
|
|
100
|
+
File.open(path, "rb") do |file|
|
|
101
|
+
magic_bytes = file.read(16)
|
|
102
|
+
detect_magic_bytes(magic_bytes)
|
|
103
|
+
end
|
|
104
|
+
rescue StandardError
|
|
105
|
+
nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def detect_magic_bytes(bytes)
|
|
109
|
+
return nil unless bytes && bytes.size >= 4
|
|
110
|
+
|
|
111
|
+
# Check each known signature
|
|
112
|
+
MAGIC_SIGNATURES.each do |signature, format|
|
|
113
|
+
if bytes.start_with?(signature) && validate_format(bytes, format)
|
|
114
|
+
# Additional validation for specific formats
|
|
115
|
+
return format
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
nil
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def detect_by_extension(path)
|
|
123
|
+
ext = File.extname(path).downcase
|
|
124
|
+
EXTENSION_MAP[ext]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def validate_format(bytes, format)
|
|
128
|
+
case format
|
|
129
|
+
when :cab
|
|
130
|
+
# Verify CAB header structure
|
|
131
|
+
bytes.size >= 36 && bytes[0..3] == "MSCF"
|
|
132
|
+
when :chm
|
|
133
|
+
# Verify CHM header
|
|
134
|
+
bytes.size >= 8 && bytes[0..3] == "ITSF"
|
|
135
|
+
when :hlp
|
|
136
|
+
# HLP files have either ?_ or LN signature
|
|
137
|
+
bytes.size >= 2 && ["\x3F\x5F", "\x4C\x4E"].include?(bytes[0..1])
|
|
138
|
+
when :kwaj
|
|
139
|
+
# Verify KWAJ header
|
|
140
|
+
bytes.size >= 4 && bytes[0..3] == "KWAJ"
|
|
141
|
+
when :szdd
|
|
142
|
+
# SZDD can have multiple signatures
|
|
143
|
+
bytes.size >= 4 && ["SZDD", "\x88\xF0\x27\x00"].include?(bytes[0..3])
|
|
144
|
+
when :lit
|
|
145
|
+
# Verify LIT header
|
|
146
|
+
bytes.size >= 8 && bytes[0..7] == "ITOLITLS"
|
|
147
|
+
when :oab
|
|
148
|
+
# OAB validation would need more specific checks
|
|
149
|
+
true
|
|
150
|
+
else
|
|
151
|
+
true
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|