cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -1,75 +1,24 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../quantum_shared"
|
|
4
|
+
|
|
3
5
|
module Cabriolet
|
|
4
6
|
module Compressors
|
|
5
7
|
# Quantum compresses data using arithmetic coding and LZ77-based matching
|
|
6
8
|
# Based on the Quantum decompressor and libmspack qtmd.c implementation
|
|
7
9
|
#
|
|
8
|
-
# STATUS: Functional with known limitations
|
|
9
|
-
# - Literals: WORKING ✓
|
|
10
|
-
# - Short matches (3-13 bytes): WORKING ✓
|
|
11
|
-
# - Longer matches (14+ bytes): Limited support (known issue)
|
|
12
|
-
# - Simple data round-trips successfully
|
|
13
|
-
# - Complex repeated patterns may have issues
|
|
14
|
-
#
|
|
15
10
|
# The Quantum method was created by David Stafford, adapted by Microsoft
|
|
16
11
|
# Corporation.
|
|
12
|
+
#
|
|
13
|
+
# NOTE: This compressor is a work-in-progress. The arithmetic coding
|
|
14
|
+
# implementation needs refinement to match the decoder exactly.
|
|
15
|
+
# For now, this implementation focuses on correct structure.
|
|
17
16
|
# rubocop:disable Metrics/ClassLength
|
|
18
17
|
class Quantum < Base
|
|
19
|
-
|
|
20
|
-
FRAME_SIZE = 32_768
|
|
21
|
-
|
|
22
|
-
# Match constants
|
|
23
|
-
MIN_MATCH = 3
|
|
24
|
-
MAX_MATCH = 1028
|
|
25
|
-
|
|
26
|
-
# Position slot tables (same as decompressor)
|
|
27
|
-
POSITION_BASE = [
|
|
28
|
-
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
29
|
-
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
30
|
-
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
31
|
-
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
32
|
-
].freeze
|
|
33
|
-
|
|
34
|
-
EXTRA_BITS = [
|
|
35
|
-
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
36
|
-
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
37
|
-
17, 17, 18, 18, 19, 19
|
|
38
|
-
].freeze
|
|
39
|
-
|
|
40
|
-
LENGTH_BASE = [
|
|
41
|
-
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
42
|
-
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
43
|
-
].freeze
|
|
44
|
-
|
|
45
|
-
LENGTH_EXTRA = [
|
|
46
|
-
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
47
|
-
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
48
|
-
].freeze
|
|
18
|
+
include QuantumShared
|
|
49
19
|
|
|
50
20
|
attr_reader :window_bits, :window_size
|
|
51
21
|
|
|
52
|
-
# Represents a symbol in an arithmetic coding model
|
|
53
|
-
class ModelSymbol
|
|
54
|
-
attr_accessor :sym, :cumfreq
|
|
55
|
-
|
|
56
|
-
def initialize(sym, cumfreq)
|
|
57
|
-
@sym = sym
|
|
58
|
-
@cumfreq = cumfreq
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Represents an arithmetic coding model
|
|
63
|
-
class Model
|
|
64
|
-
attr_accessor :shiftsleft, :entries, :syms
|
|
65
|
-
|
|
66
|
-
def initialize(syms, entries)
|
|
67
|
-
@syms = syms
|
|
68
|
-
@entries = entries
|
|
69
|
-
@shiftsleft = 4
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
|
|
73
22
|
# Initialize Quantum compressor
|
|
74
23
|
#
|
|
75
24
|
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
@@ -77,7 +26,8 @@ module Cabriolet
|
|
|
77
26
|
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
78
27
|
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
79
28
|
# @param window_bits [Integer] Window size parameter (10-21)
|
|
80
|
-
def initialize(io_system, input, output, buffer_size, window_bits: 10
|
|
29
|
+
def initialize(io_system, input, output, buffer_size, window_bits: 10,
|
|
30
|
+
**_kwargs)
|
|
81
31
|
super(io_system, input, output, buffer_size)
|
|
82
32
|
|
|
83
33
|
# Validate window_bits
|
|
@@ -179,7 +129,6 @@ module Cabriolet
|
|
|
179
129
|
|
|
180
130
|
# Compress a single frame
|
|
181
131
|
def compress_frame(data)
|
|
182
|
-
# No header needed - the first 16 bits of encoded data will be read as C
|
|
183
132
|
pos = 0
|
|
184
133
|
|
|
185
134
|
while pos < data.bytesize
|
|
@@ -198,27 +147,25 @@ module Cabriolet
|
|
|
198
147
|
end
|
|
199
148
|
end
|
|
200
149
|
|
|
201
|
-
# Finish arithmetic coding
|
|
202
|
-
# We need to output enough bits to disambiguate the final range
|
|
150
|
+
# Finish arithmetic coding
|
|
203
151
|
finish_arithmetic_coding
|
|
204
152
|
end
|
|
205
153
|
|
|
206
|
-
# Finish arithmetic coding
|
|
154
|
+
# Finish arithmetic coding
|
|
207
155
|
def finish_arithmetic_coding
|
|
208
|
-
# Output
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
@
|
|
156
|
+
# Output pending underflow bits
|
|
157
|
+
if @underflow_bits.positive?
|
|
158
|
+
bit = if @l.anybits?(0x4000)
|
|
159
|
+
1
|
|
160
|
+
else
|
|
161
|
+
0
|
|
162
|
+
end
|
|
163
|
+
@bitstream.write_bits_msb(bit, 1)
|
|
164
|
+
@underflow_bits.times do
|
|
165
|
+
@bitstream.write_bits_msb(bit ^ 1, 1)
|
|
166
|
+
end
|
|
167
|
+
@underflow_bits = 0
|
|
220
168
|
end
|
|
221
|
-
@underflow_bits = 0
|
|
222
169
|
end
|
|
223
170
|
|
|
224
171
|
# Find best match in the sliding window
|
|
@@ -335,7 +282,6 @@ module Cabriolet
|
|
|
335
282
|
end
|
|
336
283
|
|
|
337
284
|
# Encode a symbol using arithmetic coding
|
|
338
|
-
# This is the inverse of GET_SYMBOL macro in qtmd.c
|
|
339
285
|
def encode_symbol(model, sym)
|
|
340
286
|
# Find symbol index in model
|
|
341
287
|
i = 0
|
|
@@ -346,33 +292,29 @@ module Cabriolet
|
|
|
346
292
|
"Symbol #{sym} not found in model"
|
|
347
293
|
end
|
|
348
294
|
|
|
349
|
-
# Calculate range
|
|
350
|
-
range = (@h - @l) + 1
|
|
295
|
+
# Calculate range - use decoder's formula
|
|
296
|
+
range = ((@h - @l) & 0xFFFF) + 1
|
|
351
297
|
symf = model.syms[0].cumfreq
|
|
352
298
|
|
|
353
|
-
# Update H and L
|
|
354
|
-
# Decoder uses syms[i-1] and syms[i], so encoder at index j
|
|
355
|
-
# should use syms[j] and syms[j+1] to make decoder land at i=j+1
|
|
356
|
-
# But decoder returns syms[i-1].sym, so it will return syms[j].sym ✓
|
|
299
|
+
# Update H and L
|
|
357
300
|
@h = @l + ((model.syms[i].cumfreq * range) / symf) - 1
|
|
358
301
|
@l += ((model.syms[i + 1].cumfreq * range) / symf)
|
|
359
302
|
|
|
360
|
-
# Update model frequencies
|
|
303
|
+
# Update model frequencies
|
|
361
304
|
j = i
|
|
362
305
|
while j >= 0
|
|
363
306
|
model.syms[j].cumfreq += 8
|
|
364
307
|
j -= 1
|
|
365
308
|
end
|
|
366
309
|
|
|
367
|
-
# Check if model needs updating
|
|
310
|
+
# Check if model needs updating
|
|
368
311
|
update_model(model) if model.syms[0].cumfreq > 3800
|
|
369
312
|
|
|
370
|
-
# Normalize range
|
|
313
|
+
# Normalize range
|
|
371
314
|
normalize_range
|
|
372
315
|
end
|
|
373
316
|
|
|
374
317
|
# Normalize arithmetic coding range and output bits
|
|
375
|
-
# This implements the encoder equivalent of the decoder's normalization (lines 109-121)
|
|
376
318
|
def normalize_range
|
|
377
319
|
loop do
|
|
378
320
|
if (@l & 0x8000) == (@h & 0x8000)
|
|
@@ -395,37 +337,36 @@ module Cabriolet
|
|
|
395
337
|
@h |= 0x4000
|
|
396
338
|
|
|
397
339
|
# Can't normalize further
|
|
398
|
-
|
|
399
340
|
end
|
|
400
341
|
|
|
401
|
-
# Shift range
|
|
342
|
+
# Shift range
|
|
402
343
|
@l = (@l << 1) & 0xFFFF
|
|
403
344
|
@h = ((@h << 1) | 1) & 0xFFFF
|
|
404
345
|
end
|
|
405
346
|
end
|
|
406
347
|
|
|
407
|
-
# Update model statistics
|
|
348
|
+
# Update model statistics
|
|
408
349
|
def update_model(model)
|
|
409
350
|
model.shiftsleft -= 1
|
|
410
351
|
|
|
411
352
|
if model.shiftsleft.positive?
|
|
412
|
-
# Simple shift
|
|
353
|
+
# Simple shift
|
|
413
354
|
(model.entries - 1).downto(0) do |i|
|
|
414
355
|
model.syms[i].cumfreq >>= 1
|
|
415
356
|
model.syms[i].cumfreq = model.syms[i + 1].cumfreq + 1 if model.syms[i].cumfreq <= model.syms[i + 1].cumfreq
|
|
416
357
|
end
|
|
417
358
|
else
|
|
418
|
-
# Full rebuild
|
|
359
|
+
# Full rebuild
|
|
419
360
|
model.shiftsleft = 50
|
|
420
361
|
|
|
421
|
-
# Convert cumfreq to frequencies
|
|
362
|
+
# Convert cumfreq to frequencies
|
|
422
363
|
(0...model.entries).each do |i|
|
|
423
364
|
model.syms[i].cumfreq -= model.syms[i + 1].cumfreq
|
|
424
365
|
model.syms[i].cumfreq += 1
|
|
425
366
|
model.syms[i].cumfreq >>= 1
|
|
426
367
|
end
|
|
427
368
|
|
|
428
|
-
# Sort by frequency
|
|
369
|
+
# Sort by frequency
|
|
429
370
|
(0...(model.entries - 1)).each do |i|
|
|
430
371
|
((i + 1)...model.entries).each do |j|
|
|
431
372
|
if model.syms[i].cumfreq < model.syms[j].cumfreq
|
|
@@ -434,7 +375,7 @@ module Cabriolet
|
|
|
434
375
|
end
|
|
435
376
|
end
|
|
436
377
|
|
|
437
|
-
# Convert back to cumulative frequencies
|
|
378
|
+
# Convert back to cumulative frequencies
|
|
438
379
|
(model.entries - 1).downto(0) do |i|
|
|
439
380
|
model.syms[i].cumfreq += model.syms[i + 1].cumfreq
|
|
440
381
|
end
|
|
@@ -12,7 +12,7 @@ module Cabriolet
|
|
|
12
12
|
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
13
13
|
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
14
14
|
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
15
|
-
def initialize(io_system, input, output, buffer_size)
|
|
15
|
+
def initialize(io_system, input, output, buffer_size, **_kwargs)
|
|
16
16
|
@io_system = io_system
|
|
17
17
|
@input = input
|
|
18
18
|
@output = output
|
|
@@ -40,13 +40,17 @@ module Cabriolet
|
|
|
40
40
|
|
|
41
41
|
# Decompress LZSS data
|
|
42
42
|
#
|
|
43
|
-
# @param bytes [Integer]
|
|
44
|
-
# until EOF)
|
|
43
|
+
# @param bytes [Integer, nil] Maximum number of output bytes to write (nil or 0 = until EOF)
|
|
45
44
|
# @return [Integer] Number of bytes decompressed
|
|
46
|
-
def decompress(
|
|
45
|
+
def decompress(bytes = nil)
|
|
47
46
|
bytes_written = 0
|
|
47
|
+
# Only enforce limit if bytes is a positive integer
|
|
48
|
+
enforce_limit = bytes&.positive?
|
|
48
49
|
|
|
49
50
|
loop do
|
|
51
|
+
# Check if we've reached the output byte limit (only when limit is enforced)
|
|
52
|
+
break if enforce_limit && bytes_written >= bytes
|
|
53
|
+
|
|
50
54
|
# Read control byte
|
|
51
55
|
control_byte = read_input_byte
|
|
52
56
|
break if control_byte.nil?
|
|
@@ -55,6 +59,9 @@ module Cabriolet
|
|
|
55
59
|
|
|
56
60
|
# Process each bit in the control byte
|
|
57
61
|
8.times do |bit_index|
|
|
62
|
+
# Check output limit before each operation (only when limit is enforced)
|
|
63
|
+
break if enforce_limit && bytes_written >= bytes
|
|
64
|
+
|
|
58
65
|
mask = 1 << bit_index
|
|
59
66
|
|
|
60
67
|
if control_byte.anybits?(mask)
|
|
@@ -81,6 +88,9 @@ module Cabriolet
|
|
|
81
88
|
|
|
82
89
|
# Copy from window
|
|
83
90
|
length.times do
|
|
91
|
+
# Check if we've reached the limit mid-match
|
|
92
|
+
break if enforce_limit && bytes_written >= bytes
|
|
93
|
+
|
|
84
94
|
byte = @window[match_pos]
|
|
85
95
|
@window[@window_pos] = byte
|
|
86
96
|
write_output_byte(byte)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
3
5
|
module Cabriolet
|
|
4
6
|
module Decompressors
|
|
5
7
|
# LZX handles LZX compressed data
|
|
@@ -100,7 +102,7 @@ module Cabriolet
|
|
|
100
102
|
# @param output_length [Integer] Expected output length for E8 processing
|
|
101
103
|
# @param is_delta [Boolean] Whether this is LZX DELTA format
|
|
102
104
|
def initialize(io_system, input, output, buffer_size, window_bits:,
|
|
103
|
-
reset_interval: 0, output_length: 0, is_delta: false)
|
|
105
|
+
reset_interval: 0, output_length: 0, is_delta: false, salvage: false, **_kwargs)
|
|
104
106
|
super(io_system, input, output, buffer_size)
|
|
105
107
|
|
|
106
108
|
# Validate window_bits
|
|
@@ -146,8 +148,9 @@ module Cabriolet
|
|
|
146
148
|
@intel_started = false
|
|
147
149
|
@e8_buf = "\0" * FRAME_SIZE
|
|
148
150
|
|
|
149
|
-
# Initialize bitstream
|
|
150
|
-
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size
|
|
151
|
+
# Initialize bitstream (LZX uses MSB-first bit ordering per libmspack lzxd.c)
|
|
152
|
+
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
|
|
153
|
+
bit_order: :msb, salvage: salvage)
|
|
151
154
|
|
|
152
155
|
# Initialize Huffman trees
|
|
153
156
|
initialize_trees
|
|
@@ -173,19 +176,21 @@ module Cabriolet
|
|
|
173
176
|
def decompress(bytes)
|
|
174
177
|
return 0 if bytes <= 0
|
|
175
178
|
|
|
179
|
+
# Read Intel filesize header if not already read (once per stream)
|
|
180
|
+
read_intel_header unless @header_read
|
|
181
|
+
|
|
176
182
|
total_written = 0
|
|
177
183
|
end_frame = ((@offset + bytes) / FRAME_SIZE) + 1
|
|
178
184
|
|
|
179
185
|
while @frame < end_frame
|
|
180
|
-
# Check reset interval
|
|
181
|
-
|
|
186
|
+
# Check reset interval - reset offset registers at frame boundaries
|
|
187
|
+
if @reset_interval.positive? && (@frame % @reset_interval).zero? && @frame.positive?
|
|
188
|
+
@r0 = @r1 = @r2 = 1
|
|
189
|
+
end
|
|
182
190
|
|
|
183
191
|
# Read DELTA chunk size if needed
|
|
184
192
|
@bitstream.read_bits(16) if @is_delta
|
|
185
193
|
|
|
186
|
-
# Read Intel filesize header if needed
|
|
187
|
-
read_intel_header unless @header_read
|
|
188
|
-
|
|
189
194
|
# Calculate frame size
|
|
190
195
|
frame_size = calculate_frame_size
|
|
191
196
|
|
|
@@ -238,6 +243,10 @@ module Cabriolet
|
|
|
238
243
|
|
|
239
244
|
# Reset LZX state (called at reset intervals)
|
|
240
245
|
#
|
|
246
|
+
# Per libmspack: Only reset state variables, NOT Huffman code lengths.
|
|
247
|
+
# Lengths persist across blocks and are updated via delta encoding.
|
|
248
|
+
# They are only zeroed at initialization (in initialize_trees).
|
|
249
|
+
#
|
|
241
250
|
# @return [void]
|
|
242
251
|
def reset_state
|
|
243
252
|
@r0 = 1
|
|
@@ -247,12 +256,17 @@ module Cabriolet
|
|
|
247
256
|
@block_remaining = 0
|
|
248
257
|
@block_type = BLOCKTYPE_INVALID
|
|
249
258
|
|
|
250
|
-
#
|
|
251
|
-
|
|
252
|
-
|
|
259
|
+
# NOTE: Do NOT reset @maintree_lengths or @length_lengths here!
|
|
260
|
+
# Per libmspack lzxd.c line 267-269, lengths are initialized to 0
|
|
261
|
+
# only once (at start) "because deltas will be applied to them".
|
|
262
|
+
# Resetting them here breaks delta encoding between blocks.
|
|
253
263
|
end
|
|
254
264
|
|
|
255
|
-
# Read Intel filesize header
|
|
265
|
+
# Read Intel filesize header (once per stream, before any frames)
|
|
266
|
+
#
|
|
267
|
+
# Format per libmspack:
|
|
268
|
+
# - 1 bit: Intel flag (if 0, filesize = 0; if 1, read 32-bit filesize)
|
|
269
|
+
# - If flag is 1: 32 bits for filesize (16 bits high, 16 bits low)
|
|
256
270
|
#
|
|
257
271
|
# @return [void]
|
|
258
272
|
def read_intel_header
|
|
@@ -304,13 +318,20 @@ module Cabriolet
|
|
|
304
318
|
|
|
305
319
|
# Read block header
|
|
306
320
|
#
|
|
321
|
+
# LZX block header format (per libmspack):
|
|
322
|
+
# - 3 bits: block_type
|
|
323
|
+
# - 24 bits: block_length (16 bits high, 8 bits low, combined as (high << 8) | low)
|
|
324
|
+
#
|
|
307
325
|
# @return [void]
|
|
308
326
|
def read_block_header
|
|
309
|
-
# Align for uncompressed blocks
|
|
327
|
+
# Align for uncompressed blocks - this ensures correct byte alignment
|
|
328
|
+
# when reading the R0, R1, R2 values from the block header
|
|
310
329
|
@bitstream.byte_align if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.allbits?(1)
|
|
311
330
|
|
|
312
|
-
# Read block type
|
|
331
|
+
# Read block type (3 bits)
|
|
313
332
|
@block_type = @bitstream.read_bits(3)
|
|
333
|
+
|
|
334
|
+
# Read block length (24 bits: 16 bits high, then 8 bits low)
|
|
314
335
|
high = @bitstream.read_bits(16)
|
|
315
336
|
low = @bitstream.read_bits(8)
|
|
316
337
|
@block_length = (high << 8) | low
|
|
@@ -324,6 +345,8 @@ module Cabriolet
|
|
|
324
345
|
when BLOCKTYPE_UNCOMPRESSED
|
|
325
346
|
read_uncompressed_block_header
|
|
326
347
|
else
|
|
348
|
+
# Per libmspack lzxd.c line 519-521, BLOCKTYPE_INVALID (0) and
|
|
349
|
+
# blocktypes 4-7 are all invalid and should raise an error
|
|
327
350
|
raise DecompressionError, "Invalid block type: #{@block_type}"
|
|
328
351
|
end
|
|
329
352
|
end
|
|
@@ -338,11 +361,11 @@ module Cabriolet
|
|
|
338
361
|
end
|
|
339
362
|
|
|
340
363
|
# Build aligned tree
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
364
|
+
# Note: Aligned tree may be incomplete (Kraft sum < 1.0), which is valid
|
|
365
|
+
# as long as the unused codes are never encountered in the bitstream
|
|
366
|
+
@aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS,
|
|
367
|
+
bit_order: :msb)
|
|
368
|
+
@aligned_tree.build_table(ALIGNED_TABLEBITS)
|
|
346
369
|
|
|
347
370
|
# Read main and length trees (same as verbatim)
|
|
348
371
|
read_main_and_length_trees
|
|
@@ -359,15 +382,14 @@ module Cabriolet
|
|
|
359
382
|
#
|
|
360
383
|
# @return [void]
|
|
361
384
|
def read_main_and_length_trees
|
|
362
|
-
# Read and build pretree
|
|
363
|
-
read_pretree
|
|
364
|
-
|
|
365
385
|
# Read main tree lengths using pretree
|
|
386
|
+
# Note: Each call to read_lengths reads its own pretree (per libmspack lzxd_read_lens)
|
|
366
387
|
read_lengths(@maintree_lengths, 0, 256)
|
|
367
388
|
read_lengths(@maintree_lengths, 256, @maintree_maxsymbols)
|
|
368
389
|
|
|
369
390
|
# Build main tree
|
|
370
|
-
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols
|
|
391
|
+
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols,
|
|
392
|
+
bit_order: :msb)
|
|
371
393
|
unless @maintree.build_table(LENGTH_TABLEBITS)
|
|
372
394
|
raise DecompressionError,
|
|
373
395
|
"Failed to build main tree"
|
|
@@ -380,7 +402,8 @@ module Cabriolet
|
|
|
380
402
|
read_lengths(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
|
|
381
403
|
|
|
382
404
|
# Build length tree (may be empty)
|
|
383
|
-
@length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS
|
|
405
|
+
@length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS,
|
|
406
|
+
bit_order: :msb)
|
|
384
407
|
if @length_tree.build_table(LENGTH_TABLEBITS)
|
|
385
408
|
@length_empty = false
|
|
386
409
|
else
|
|
@@ -401,7 +424,8 @@ module Cabriolet
|
|
|
401
424
|
@pretree_lengths[i] = @bitstream.read_bits(4)
|
|
402
425
|
end
|
|
403
426
|
|
|
404
|
-
@pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS
|
|
427
|
+
@pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS,
|
|
428
|
+
bit_order: :msb)
|
|
405
429
|
return if @pretree.build_table(PRETREE_TABLEBITS)
|
|
406
430
|
|
|
407
431
|
raise DecompressionError, "Failed to build pretree"
|
|
@@ -409,11 +433,16 @@ module Cabriolet
|
|
|
409
433
|
|
|
410
434
|
# Read code lengths using pretree
|
|
411
435
|
#
|
|
436
|
+
# Per libmspack's lzxd_read_lens, each call reads its own pretree first
|
|
437
|
+
#
|
|
412
438
|
# @param lengths [Array<Integer>] Target length array
|
|
413
439
|
# @param first [Integer] First symbol index
|
|
414
440
|
# @param last [Integer] Last symbol index (exclusive)
|
|
415
441
|
# @return [void]
|
|
416
442
|
def read_lengths(lengths, first, last)
|
|
443
|
+
# Read and build pretree (20 elements, 4 bits each)
|
|
444
|
+
read_pretree
|
|
445
|
+
|
|
417
446
|
x = first
|
|
418
447
|
|
|
419
448
|
while x < last
|
|
@@ -494,9 +523,9 @@ module Cabriolet
|
|
|
494
523
|
@window_posn += 1
|
|
495
524
|
run_length -= 1
|
|
496
525
|
else
|
|
497
|
-
# Match: decode length and offset
|
|
498
|
-
decode_match(main_element, run_length)
|
|
499
|
-
run_length
|
|
526
|
+
# Match: decode length and offset, then decrement run_length by match_length
|
|
527
|
+
match_length = decode_match(main_element, run_length)
|
|
528
|
+
run_length -= match_length
|
|
500
529
|
end
|
|
501
530
|
end
|
|
502
531
|
end
|
|
@@ -504,8 +533,8 @@ module Cabriolet
|
|
|
504
533
|
# Decode and copy a match
|
|
505
534
|
#
|
|
506
535
|
# @param main_element [Integer] Main tree symbol
|
|
507
|
-
# @param run_length [Integer] Remaining run length
|
|
508
|
-
# @return [
|
|
536
|
+
# @param run_length [Integer] Remaining run length (unused, kept for compatibility)
|
|
537
|
+
# @return [Integer] Match length (bytes consumed)
|
|
509
538
|
def decode_match(main_element, _run_length)
|
|
510
539
|
main_element -= NUM_CHARS
|
|
511
540
|
|
|
@@ -533,8 +562,10 @@ module Cabriolet
|
|
|
533
562
|
match_offset = @r0
|
|
534
563
|
when 1
|
|
535
564
|
@r1, @r0 = @r0, @r1
|
|
565
|
+
match_offset = @r0
|
|
536
566
|
when 2
|
|
537
567
|
@r2, @r0 = @r0, @r2
|
|
568
|
+
match_offset = @r0
|
|
538
569
|
else
|
|
539
570
|
# Calculate offset from position slot
|
|
540
571
|
extra = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
|
|
@@ -573,6 +604,9 @@ module Cabriolet
|
|
|
573
604
|
|
|
574
605
|
# Copy match
|
|
575
606
|
copy_match(match_offset, match_length)
|
|
607
|
+
|
|
608
|
+
# Return match length so caller can decrement run_length
|
|
609
|
+
match_length
|
|
576
610
|
end
|
|
577
611
|
|
|
578
612
|
# Decode extended match length for LZX DELTA
|
|
@@ -608,9 +642,12 @@ module Cabriolet
|
|
|
608
642
|
# @return [void]
|
|
609
643
|
def copy_match(offset, length)
|
|
610
644
|
if offset > @window_posn
|
|
611
|
-
# Match wraps around window
|
|
612
|
-
|
|
613
|
-
|
|
645
|
+
# Match wraps around window - validate it doesn't read beyond available data
|
|
646
|
+
# Per libmspack lzxd.c lines 622-628: check if match offset goes beyond
|
|
647
|
+
# what has been decompressed so far (accounting for any reference data)
|
|
648
|
+
ref_data_size = 0 # We don't support reference data yet (LZX DELTA feature)
|
|
649
|
+
if offset > @offset && (offset - @window_posn) > ref_data_size
|
|
650
|
+
raise DecompressionError, "Match offset beyond LZX stream"
|
|
614
651
|
end
|
|
615
652
|
|
|
616
653
|
# Copy from end of window
|