cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Compressors
|
|
5
|
+
# Quantum compresses data using arithmetic coding and LZ77-based matching
|
|
6
|
+
# Based on the Quantum decompressor and libmspack qtmd.c implementation
|
|
7
|
+
#
|
|
8
|
+
# STATUS: Functional with known limitations
|
|
9
|
+
# - Literals: WORKING ✓
|
|
10
|
+
# - Short matches (3-13 bytes): WORKING ✓
|
|
11
|
+
# - Longer matches (14+ bytes): Limited support (known issue)
|
|
12
|
+
# - Simple data round-trips successfully
|
|
13
|
+
# - Complex repeated patterns may have issues
|
|
14
|
+
#
|
|
15
|
+
# The Quantum method was created by David Stafford, adapted by Microsoft
|
|
16
|
+
# Corporation.
|
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
|
18
|
+
class Quantum < Base
|
|
19
|
+
# Frame size (32KB per frame)
|
|
20
|
+
FRAME_SIZE = 32_768
|
|
21
|
+
|
|
22
|
+
# Match constants
|
|
23
|
+
MIN_MATCH = 3
|
|
24
|
+
MAX_MATCH = 1028
|
|
25
|
+
|
|
26
|
+
# Position slot tables (same as decompressor)
|
|
27
|
+
POSITION_BASE = [
|
|
28
|
+
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
29
|
+
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
30
|
+
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
31
|
+
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
EXTRA_BITS = [
|
|
35
|
+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
36
|
+
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
37
|
+
17, 17, 18, 18, 19, 19
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
40
|
+
LENGTH_BASE = [
|
|
41
|
+
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
42
|
+
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
43
|
+
].freeze
|
|
44
|
+
|
|
45
|
+
LENGTH_EXTRA = [
|
|
46
|
+
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
47
|
+
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
attr_reader :window_bits, :window_size
|
|
51
|
+
|
|
52
|
+
# Represents a symbol in an arithmetic coding model
|
|
53
|
+
class ModelSymbol
|
|
54
|
+
attr_accessor :sym, :cumfreq
|
|
55
|
+
|
|
56
|
+
def initialize(sym, cumfreq)
|
|
57
|
+
@sym = sym
|
|
58
|
+
@cumfreq = cumfreq
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Represents an arithmetic coding model
|
|
63
|
+
class Model
|
|
64
|
+
attr_accessor :shiftsleft, :entries, :syms
|
|
65
|
+
|
|
66
|
+
def initialize(syms, entries)
|
|
67
|
+
@syms = syms
|
|
68
|
+
@entries = entries
|
|
69
|
+
@shiftsleft = 4
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Initialize Quantum compressor
|
|
74
|
+
#
|
|
75
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
76
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
77
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
78
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
79
|
+
# @param window_bits [Integer] Window size parameter (10-21)
|
|
80
|
+
def initialize(io_system, input, output, buffer_size, window_bits: 10)
|
|
81
|
+
super(io_system, input, output, buffer_size)
|
|
82
|
+
|
|
83
|
+
# Validate window_bits
|
|
84
|
+
unless (10..21).cover?(window_bits)
|
|
85
|
+
raise ArgumentError,
|
|
86
|
+
"Quantum window_bits must be 10-21, got #{window_bits}"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
@window_bits = window_bits
|
|
90
|
+
@window_size = 1 << window_bits
|
|
91
|
+
|
|
92
|
+
# Initialize bitstream for MSB-first writing
|
|
93
|
+
@bitstream = Binary::BitstreamWriter.new(io_system, output,
|
|
94
|
+
buffer_size, msb_first: true)
|
|
95
|
+
|
|
96
|
+
# Initialize models
|
|
97
|
+
initialize_models
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Compress the input data
|
|
101
|
+
#
|
|
102
|
+
# @return [Integer] Total bytes compressed
|
|
103
|
+
def compress
|
|
104
|
+
total_bytes = 0
|
|
105
|
+
|
|
106
|
+
loop do
|
|
107
|
+
# Read frame data
|
|
108
|
+
frame_data = io_system.read(input, FRAME_SIZE)
|
|
109
|
+
break if frame_data.empty?
|
|
110
|
+
|
|
111
|
+
total_bytes += frame_data.bytesize
|
|
112
|
+
|
|
113
|
+
# Compress frame
|
|
114
|
+
compress_frame(frame_data)
|
|
115
|
+
|
|
116
|
+
# Write trailer (0xFF marker)
|
|
117
|
+
@bitstream.flush_msb
|
|
118
|
+
@bitstream.write_byte(0xFF)
|
|
119
|
+
|
|
120
|
+
# Reset models for next frame
|
|
121
|
+
initialize_models
|
|
122
|
+
|
|
123
|
+
break if frame_data.bytesize < FRAME_SIZE
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
total_bytes
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private
|
|
130
|
+
|
|
131
|
+
# Initialize all 7 arithmetic coding models (exactly matching decoder)
|
|
132
|
+
def initialize_models
|
|
133
|
+
# Models depend on window size
|
|
134
|
+
i = @window_bits * 2
|
|
135
|
+
|
|
136
|
+
# Four literal models (64 symbols each)
|
|
137
|
+
@m0sym = init_model_syms(0, 64)
|
|
138
|
+
@model0 = Model.new(@m0sym, 64)
|
|
139
|
+
|
|
140
|
+
@m1sym = init_model_syms(64, 64)
|
|
141
|
+
@model1 = Model.new(@m1sym, 64)
|
|
142
|
+
|
|
143
|
+
@m2sym = init_model_syms(128, 64)
|
|
144
|
+
@model2 = Model.new(@m2sym, 64)
|
|
145
|
+
|
|
146
|
+
@m3sym = init_model_syms(192, 64)
|
|
147
|
+
@model3 = Model.new(@m3sym, 64)
|
|
148
|
+
|
|
149
|
+
# Three match models (size depends on window)
|
|
150
|
+
@m4sym = init_model_syms(0, [i, 24].min)
|
|
151
|
+
@model4 = Model.new(@m4sym, [i, 24].min)
|
|
152
|
+
|
|
153
|
+
@m5sym = init_model_syms(0, [i, 36].min)
|
|
154
|
+
@model5 = Model.new(@m5sym, [i, 36].min)
|
|
155
|
+
|
|
156
|
+
@m6sym = init_model_syms(0, i)
|
|
157
|
+
@model6 = Model.new(@m6sym, i)
|
|
158
|
+
|
|
159
|
+
# Match length model
|
|
160
|
+
@m6lsym = init_model_syms(0, 27)
|
|
161
|
+
@model6len = Model.new(@m6lsym, 27)
|
|
162
|
+
|
|
163
|
+
# Selector model (7 symbols: 0-3 literals, 4-6 matches)
|
|
164
|
+
@m7sym = init_model_syms(0, 7)
|
|
165
|
+
@model7 = Model.new(@m7sym, 7)
|
|
166
|
+
|
|
167
|
+
# Arithmetic coding state
|
|
168
|
+
@h = 0xFFFF
|
|
169
|
+
@l = 0
|
|
170
|
+
@underflow_bits = 0
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Initialize model symbol array (exactly matching qtmd_init_model)
|
|
174
|
+
def init_model_syms(start, len)
|
|
175
|
+
Array.new(len + 1) do |i|
|
|
176
|
+
ModelSymbol.new(start + i, len - i)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Compress a single frame
|
|
181
|
+
def compress_frame(data)
|
|
182
|
+
# No header needed - the first 16 bits of encoded data will be read as C
|
|
183
|
+
pos = 0
|
|
184
|
+
|
|
185
|
+
while pos < data.bytesize
|
|
186
|
+
# Try to find a match
|
|
187
|
+
match_length, match_offset = find_match(data, pos)
|
|
188
|
+
|
|
189
|
+
if match_length >= MIN_MATCH
|
|
190
|
+
# Encode match
|
|
191
|
+
encode_match(match_length, match_offset)
|
|
192
|
+
pos += match_length
|
|
193
|
+
else
|
|
194
|
+
# Encode literal
|
|
195
|
+
byte = data.getbyte(pos)
|
|
196
|
+
encode_literal(byte)
|
|
197
|
+
pos += 1
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Finish arithmetic coding - output final range
|
|
202
|
+
# We need to output enough bits to disambiguate the final range
|
|
203
|
+
finish_arithmetic_coding
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Finish arithmetic coding by outputting the final state
|
|
207
|
+
def finish_arithmetic_coding
|
|
208
|
+
# Output enough bits to ensure decoder can decode correctly
|
|
209
|
+
# We need to output a value that falls within [L, H)
|
|
210
|
+
# A common approach is to output L plus half the range
|
|
211
|
+
@underflow_bits += 1
|
|
212
|
+
bit = if @l.anybits?(0x4000)
|
|
213
|
+
1
|
|
214
|
+
else
|
|
215
|
+
0
|
|
216
|
+
end
|
|
217
|
+
@bitstream.write_bits_msb(bit, 1)
|
|
218
|
+
@underflow_bits.times do
|
|
219
|
+
@bitstream.write_bits_msb(bit ^ 1, 1)
|
|
220
|
+
end
|
|
221
|
+
@underflow_bits = 0
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Find best match in the sliding window
|
|
225
|
+
def find_match(data, pos)
|
|
226
|
+
return [0, 0] if pos < MIN_MATCH
|
|
227
|
+
|
|
228
|
+
best_length = 0
|
|
229
|
+
best_offset = 0
|
|
230
|
+
max_offset = [pos, @window_size].min
|
|
231
|
+
|
|
232
|
+
# Search backwards for matches
|
|
233
|
+
(1..max_offset).each do |offset|
|
|
234
|
+
match_pos = pos - offset
|
|
235
|
+
length = 0
|
|
236
|
+
|
|
237
|
+
# Count matching bytes
|
|
238
|
+
while length < MAX_MATCH &&
|
|
239
|
+
(pos + length) < data.bytesize &&
|
|
240
|
+
data.getbyte(match_pos + length) == data.getbyte(pos + length)
|
|
241
|
+
length += 1
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
if length > best_length
|
|
245
|
+
best_length = length
|
|
246
|
+
best_offset = offset
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
[best_length, best_offset]
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Encode a literal byte
|
|
254
|
+
def encode_literal(byte)
|
|
255
|
+
# Select model based on byte value (0-63, 64-127, 128-191, 192-255)
|
|
256
|
+
selector = byte >> 6
|
|
257
|
+
model = case selector
|
|
258
|
+
when 0 then @model0
|
|
259
|
+
when 1 then @model1
|
|
260
|
+
when 2 then @model2
|
|
261
|
+
else @model3
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Encode selector (0-3 for literals)
|
|
265
|
+
encode_symbol(@model7, selector)
|
|
266
|
+
|
|
267
|
+
# Encode full byte value in selected model
|
|
268
|
+
encode_symbol(model, byte)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Encode a match
|
|
272
|
+
def encode_match(length, offset)
|
|
273
|
+
if length == 3
|
|
274
|
+
# Use model4 for 3-byte matches
|
|
275
|
+
encode_symbol(@model7, 4)
|
|
276
|
+
encode_position(@model4, offset)
|
|
277
|
+
elsif length == 4
|
|
278
|
+
# Use model5 for 4-byte matches
|
|
279
|
+
encode_symbol(@model7, 5)
|
|
280
|
+
encode_position(@model5, offset)
|
|
281
|
+
else
|
|
282
|
+
# Use model6 for longer matches
|
|
283
|
+
encode_symbol(@model7, 6)
|
|
284
|
+
encode_length(@model6len, length - 5)
|
|
285
|
+
encode_position(@model6, offset)
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Encode position using position slots
|
|
290
|
+
def encode_position(model, offset)
|
|
291
|
+
# Find position slot
|
|
292
|
+
slot = find_position_slot(offset - 1)
|
|
293
|
+
|
|
294
|
+
# Encode slot
|
|
295
|
+
encode_symbol(model, slot)
|
|
296
|
+
|
|
297
|
+
# Encode extra bits if needed
|
|
298
|
+
extra = EXTRA_BITS[slot]
|
|
299
|
+
return unless extra.positive?
|
|
300
|
+
|
|
301
|
+
value = (offset - 1) - POSITION_BASE[slot]
|
|
302
|
+
@bitstream.write_bits_msb(value, extra)
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Find position slot for an offset
|
|
306
|
+
def find_position_slot(offset)
|
|
307
|
+
POSITION_BASE.each_with_index do |base, i|
|
|
308
|
+
return i if offset < base + (1 << EXTRA_BITS[i])
|
|
309
|
+
end
|
|
310
|
+
POSITION_BASE.length - 1
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Encode match length
|
|
314
|
+
def encode_length(model, length)
|
|
315
|
+
# Find length slot
|
|
316
|
+
slot = find_length_slot(length)
|
|
317
|
+
|
|
318
|
+
# Encode slot
|
|
319
|
+
encode_symbol(model, slot)
|
|
320
|
+
|
|
321
|
+
# Encode extra bits if needed
|
|
322
|
+
extra = LENGTH_EXTRA[slot]
|
|
323
|
+
return unless extra.positive?
|
|
324
|
+
|
|
325
|
+
value = length - LENGTH_BASE[slot]
|
|
326
|
+
@bitstream.write_bits_msb(value, extra)
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Find length slot for a length value
|
|
330
|
+
def find_length_slot(length)
|
|
331
|
+
LENGTH_BASE.each_with_index do |base, i|
|
|
332
|
+
return i if length < base + (1 << LENGTH_EXTRA[i])
|
|
333
|
+
end
|
|
334
|
+
LENGTH_BASE.length - 1
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Encode a symbol using arithmetic coding
|
|
338
|
+
# This is the inverse of GET_SYMBOL macro in qtmd.c
|
|
339
|
+
def encode_symbol(model, sym)
|
|
340
|
+
# Find symbol index in model
|
|
341
|
+
i = 0
|
|
342
|
+
i += 1 while i < model.entries && model.syms[i].sym != sym
|
|
343
|
+
|
|
344
|
+
if i >= model.entries
|
|
345
|
+
raise ArgumentError,
|
|
346
|
+
"Symbol #{sym} not found in model"
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Calculate range (matching decoder line 93, 101-102)
|
|
350
|
+
range = (@h - @l) + 1
|
|
351
|
+
symf = model.syms[0].cumfreq
|
|
352
|
+
|
|
353
|
+
# Update H and L (matching decoder lines 103-104)
|
|
354
|
+
# Decoder uses syms[i-1] and syms[i], so encoder at index j
|
|
355
|
+
# should use syms[j] and syms[j+1] to make decoder land at i=j+1
|
|
356
|
+
# But decoder returns syms[i-1].sym, so it will return syms[j].sym ✓
|
|
357
|
+
@h = @l + ((model.syms[i].cumfreq * range) / symf) - 1
|
|
358
|
+
@l += ((model.syms[i + 1].cumfreq * range) / symf)
|
|
359
|
+
|
|
360
|
+
# Update model frequencies (matching decoder line 106)
|
|
361
|
+
j = i
|
|
362
|
+
while j >= 0
|
|
363
|
+
model.syms[j].cumfreq += 8
|
|
364
|
+
j -= 1
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Check if model needs updating (matching decoder line 107)
|
|
368
|
+
update_model(model) if model.syms[0].cumfreq > 3800
|
|
369
|
+
|
|
370
|
+
# Normalize range (matching decoder lines 109-121)
|
|
371
|
+
normalize_range
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Normalize arithmetic coding range and output bits
|
|
375
|
+
# This implements the encoder equivalent of the decoder's normalization (lines 109-121)
|
|
376
|
+
def normalize_range
|
|
377
|
+
loop do
|
|
378
|
+
if (@l & 0x8000) == (@h & 0x8000)
|
|
379
|
+
# MSBs are same, output bit
|
|
380
|
+
bit = (@l >> 15) & 1
|
|
381
|
+
@bitstream.write_bits_msb(bit, 1)
|
|
382
|
+
|
|
383
|
+
# Output pending underflow bits (inverted)
|
|
384
|
+
@underflow_bits.times do
|
|
385
|
+
@bitstream.write_bits_msb(bit ^ 1, 1)
|
|
386
|
+
end
|
|
387
|
+
@underflow_bits = 0
|
|
388
|
+
else
|
|
389
|
+
# MSBs differ - check for underflow
|
|
390
|
+
break unless @l.anybits?(0x4000) && @h.nobits?(0x4000)
|
|
391
|
+
|
|
392
|
+
# Underflow case - track pending bits
|
|
393
|
+
@underflow_bits += 1
|
|
394
|
+
@l &= 0x3FFF
|
|
395
|
+
@h |= 0x4000
|
|
396
|
+
|
|
397
|
+
# Can't normalize further
|
|
398
|
+
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# Shift range (both for underflow and MSB match cases)
|
|
402
|
+
@l = (@l << 1) & 0xFFFF
|
|
403
|
+
@h = ((@h << 1) | 1) & 0xFFFF
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Update model statistics (matching qtmd_update_model exactly)
|
|
408
|
+
def update_model(model)
|
|
409
|
+
model.shiftsleft -= 1
|
|
410
|
+
|
|
411
|
+
if model.shiftsleft.positive?
|
|
412
|
+
# Simple shift (matching decoder lines 129-135)
|
|
413
|
+
(model.entries - 1).downto(0) do |i|
|
|
414
|
+
model.syms[i].cumfreq >>= 1
|
|
415
|
+
model.syms[i].cumfreq = model.syms[i + 1].cumfreq + 1 if model.syms[i].cumfreq <= model.syms[i + 1].cumfreq
|
|
416
|
+
end
|
|
417
|
+
else
|
|
418
|
+
# Full rebuild (matching decoder lines 137-163)
|
|
419
|
+
model.shiftsleft = 50
|
|
420
|
+
|
|
421
|
+
# Convert cumfreq to frequencies (lines 139-145)
|
|
422
|
+
(0...model.entries).each do |i|
|
|
423
|
+
model.syms[i].cumfreq -= model.syms[i + 1].cumfreq
|
|
424
|
+
model.syms[i].cumfreq += 1
|
|
425
|
+
model.syms[i].cumfreq >>= 1
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Sort by frequency (selection sort for stability, lines 150-158)
|
|
429
|
+
(0...(model.entries - 1)).each do |i|
|
|
430
|
+
((i + 1)...model.entries).each do |j|
|
|
431
|
+
if model.syms[i].cumfreq < model.syms[j].cumfreq
|
|
432
|
+
model.syms[i], model.syms[j] = model.syms[j], model.syms[i]
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Convert back to cumulative frequencies (lines 161-163)
|
|
438
|
+
(model.entries - 1).downto(0) do |i|
|
|
439
|
+
model.syms[i].cumfreq += model.syms[i + 1].cumfreq
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
# rubocop:enable Metrics/ClassLength
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
# CAB format constants
|
|
5
|
+
module Constants
|
|
6
|
+
# CAB signature
|
|
7
|
+
CAB_SIGNATURE = 0x4643534D # "MSCF"
|
|
8
|
+
|
|
9
|
+
# Compression types
|
|
10
|
+
COMP_TYPE_NONE = 0
|
|
11
|
+
COMP_TYPE_MSZIP = 1
|
|
12
|
+
COMP_TYPE_QUANTUM = 2
|
|
13
|
+
COMP_TYPE_LZX = 3
|
|
14
|
+
|
|
15
|
+
# Compression type mask
|
|
16
|
+
COMP_TYPE_MASK = 0x000F
|
|
17
|
+
|
|
18
|
+
# CAB header flags
|
|
19
|
+
FLAG_PREV_CABINET = 0x0001
|
|
20
|
+
FLAG_NEXT_CABINET = 0x0002
|
|
21
|
+
FLAG_RESERVE_PRESENT = 0x0004
|
|
22
|
+
|
|
23
|
+
# File attribute flags
|
|
24
|
+
ATTRIB_READONLY = 0x01
|
|
25
|
+
ATTRIB_HIDDEN = 0x02
|
|
26
|
+
ATTRIB_SYSTEM = 0x04
|
|
27
|
+
ATTRIB_ARCH = 0x20
|
|
28
|
+
ATTRIB_EXEC = 0x40
|
|
29
|
+
ATTRIB_UTF_NAME = 0x80
|
|
30
|
+
|
|
31
|
+
# Folder index special values
|
|
32
|
+
FOLDER_CONTINUED_FROM_PREV = 0xFFFD
|
|
33
|
+
FOLDER_CONTINUED_TO_NEXT = 0xFFFE
|
|
34
|
+
FOLDER_CONTINUED_PREV_AND_NEXT = 0xFFFF
|
|
35
|
+
|
|
36
|
+
# Block and folder limits
|
|
37
|
+
BLOCK_MAX = 32_768 # Maximum uncompressed block size
|
|
38
|
+
INPUT_MAX = BLOCK_MAX + 6144 # Maximum compressed block size (LZX worst case)
|
|
39
|
+
FOLDER_MAX = 65_535 # Maximum number of data blocks per folder
|
|
40
|
+
LENGTH_MAX = BLOCK_MAX * FOLDER_MAX # Maximum file size
|
|
41
|
+
|
|
42
|
+
# Structure sizes
|
|
43
|
+
CFHEADER_SIZE = 36
|
|
44
|
+
CFHEADER_EXT_SIZE = 4
|
|
45
|
+
CFFOLDER_SIZE = 8
|
|
46
|
+
CFFILE_SIZE = 16
|
|
47
|
+
CFDATA_SIZE = 8
|
|
48
|
+
|
|
49
|
+
# I/O modes
|
|
50
|
+
MODE_READ = 0
|
|
51
|
+
MODE_WRITE = 1
|
|
52
|
+
MODE_UPDATE = 2
|
|
53
|
+
MODE_APPEND = 3
|
|
54
|
+
|
|
55
|
+
# Seek modes
|
|
56
|
+
SEEK_START = 0
|
|
57
|
+
SEEK_CUR = 1
|
|
58
|
+
SEEK_END = 2
|
|
59
|
+
|
|
60
|
+
# KWAJ compression types
|
|
61
|
+
KWAJ_COMP_NONE = 0
|
|
62
|
+
KWAJ_COMP_XOR = 1
|
|
63
|
+
KWAJ_COMP_SZDD = 2
|
|
64
|
+
KWAJ_COMP_LZH = 3
|
|
65
|
+
KWAJ_COMP_MSZIP = 4
|
|
66
|
+
|
|
67
|
+
# KWAJ header flags
|
|
68
|
+
KWAJ_HDR_HASLENGTH = 0x01
|
|
69
|
+
KWAJ_HDR_HASUNKNOWN1 = 0x02
|
|
70
|
+
KWAJ_HDR_HASUNKNOWN2 = 0x04
|
|
71
|
+
KWAJ_HDR_HASFILENAME = 0x08
|
|
72
|
+
KWAJ_HDR_HASFILEEXT = 0x10
|
|
73
|
+
KWAJ_HDR_HASEXTRATEXT = 0x20
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Decompressors
|
|
5
|
+
# Base class for all decompression algorithms
|
|
6
|
+
class Base
|
|
7
|
+
attr_reader :io_system, :input, :output, :buffer_size
|
|
8
|
+
|
|
9
|
+
# Initialize a new decompressor
|
|
10
|
+
#
|
|
11
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
12
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
13
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
14
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
15
|
+
def initialize(io_system, input, output, buffer_size)
|
|
16
|
+
@io_system = io_system
|
|
17
|
+
@input = input
|
|
18
|
+
@output = output
|
|
19
|
+
@buffer_size = buffer_size
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Decompress the specified number of bytes
|
|
23
|
+
#
|
|
24
|
+
# @param bytes [Integer] Number of bytes to decompress
|
|
25
|
+
# @return [Integer] Number of bytes decompressed
|
|
26
|
+
# @raise [NotImplementedError] Must be implemented by subclasses
|
|
27
|
+
def decompress(bytes)
|
|
28
|
+
raise NotImplementedError, "#{self.class} must implement #decompress"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Free any resources used by the decompressor
|
|
32
|
+
#
|
|
33
|
+
# @return [void]
|
|
34
|
+
def free
|
|
35
|
+
# Override in subclasses if cleanup needed
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Decompressors
|
|
5
|
+
# LZSS decompressor for LZSS-compressed CAB data
|
|
6
|
+
#
|
|
7
|
+
# LZSS (Lempel-Ziv-Storer-Szymanski) is a derivative of LZ77 compression.
|
|
8
|
+
# It uses a 4096-byte sliding window with a control byte mechanism to
|
|
9
|
+
# indicate whether the next operation is a literal byte copy or a match
|
|
10
|
+
# from the window history.
|
|
11
|
+
class LZSS < Base
|
|
12
|
+
# LZSS algorithm constants
|
|
13
|
+
WINDOW_SIZE = 4096
|
|
14
|
+
WINDOW_FILL = 0x20
|
|
15
|
+
|
|
16
|
+
# LZSS modes
|
|
17
|
+
MODE_EXPAND = 0
|
|
18
|
+
MODE_MSHELP = 1
|
|
19
|
+
MODE_QBASIC = 2
|
|
20
|
+
|
|
21
|
+
attr_reader :mode, :window, :window_pos
|
|
22
|
+
|
|
23
|
+
# Initialize LZSS decompressor
|
|
24
|
+
#
|
|
25
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
26
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
27
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
28
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
29
|
+
# @param mode [Integer] LZSS mode (default: MODE_EXPAND)
|
|
30
|
+
def initialize(io_system, input, output, buffer_size,
|
|
31
|
+
mode = MODE_EXPAND)
|
|
32
|
+
super(io_system, input, output, buffer_size)
|
|
33
|
+
@mode = mode
|
|
34
|
+
@window = Array.new(WINDOW_SIZE, WINDOW_FILL)
|
|
35
|
+
@window_pos = initialize_window_position
|
|
36
|
+
@input_buffer = ""
|
|
37
|
+
@input_pos = 0
|
|
38
|
+
@invert = mode == MODE_MSHELP ? 0xFF : 0x00
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Decompress LZSS data
|
|
42
|
+
#
|
|
43
|
+
# @param bytes [Integer] Number of bytes to decompress (unused, reads
|
|
44
|
+
# until EOF)
|
|
45
|
+
# @return [Integer] Number of bytes decompressed
|
|
46
|
+
def decompress(_bytes)
|
|
47
|
+
bytes_written = 0
|
|
48
|
+
|
|
49
|
+
loop do
|
|
50
|
+
# Read control byte
|
|
51
|
+
control_byte = read_input_byte
|
|
52
|
+
break if control_byte.nil?
|
|
53
|
+
|
|
54
|
+
control_byte ^= @invert
|
|
55
|
+
|
|
56
|
+
# Process each bit in the control byte
|
|
57
|
+
8.times do |bit_index|
|
|
58
|
+
mask = 1 << bit_index
|
|
59
|
+
|
|
60
|
+
if control_byte.anybits?(mask)
|
|
61
|
+
# Bit is 1: literal byte
|
|
62
|
+
literal = read_input_byte
|
|
63
|
+
break if literal.nil?
|
|
64
|
+
|
|
65
|
+
@window[@window_pos] = literal
|
|
66
|
+
write_output_byte(literal)
|
|
67
|
+
bytes_written += 1
|
|
68
|
+
|
|
69
|
+
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
70
|
+
else
|
|
71
|
+
# Bit is 0: match from window
|
|
72
|
+
offset_low = read_input_byte
|
|
73
|
+
break if offset_low.nil?
|
|
74
|
+
|
|
75
|
+
offset_high_and_length = read_input_byte
|
|
76
|
+
break if offset_high_and_length.nil?
|
|
77
|
+
|
|
78
|
+
# Decode match position and length
|
|
79
|
+
match_pos = offset_low | ((offset_high_and_length & 0xF0) << 4)
|
|
80
|
+
length = (offset_high_and_length & 0x0F) + 3
|
|
81
|
+
|
|
82
|
+
# Copy from window
|
|
83
|
+
length.times do
|
|
84
|
+
byte = @window[match_pos]
|
|
85
|
+
@window[@window_pos] = byte
|
|
86
|
+
write_output_byte(byte)
|
|
87
|
+
bytes_written += 1
|
|
88
|
+
|
|
89
|
+
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
90
|
+
match_pos = (match_pos + 1) & (WINDOW_SIZE - 1)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
bytes_written
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
# Initialize the window position based on mode
|
|
102
|
+
#
|
|
103
|
+
# @return [Integer] Initial window position
|
|
104
|
+
def initialize_window_position
|
|
105
|
+
offset = @mode == MODE_QBASIC ? 18 : 16
|
|
106
|
+
WINDOW_SIZE - offset
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Read a single byte from the input buffer
|
|
110
|
+
#
|
|
111
|
+
# @return [Integer, nil] Byte value or nil at EOF
|
|
112
|
+
def read_input_byte
|
|
113
|
+
if @input_pos >= @input_buffer.bytesize
|
|
114
|
+
@input_buffer = @io_system.read(@input, @buffer_size)
|
|
115
|
+
@input_pos = 0
|
|
116
|
+
return nil if @input_buffer.empty?
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
byte = @input_buffer.getbyte(@input_pos)
|
|
120
|
+
@input_pos += 1
|
|
121
|
+
byte
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Write a single byte to the output
|
|
125
|
+
#
|
|
126
|
+
# @param byte [Integer] Byte to write
|
|
127
|
+
# @return [void]
|
|
128
|
+
# @raise [Errors::DecompressionError] if write fails
|
|
129
|
+
def write_output_byte(byte)
|
|
130
|
+
data = [byte].pack("C")
|
|
131
|
+
written = @io_system.write(@output, data)
|
|
132
|
+
return if written == 1
|
|
133
|
+
|
|
134
|
+
raise Errors::DecompressionError, "Failed to write output byte"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|