cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Decompressors
|
|
5
|
+
# LZX handles LZX compressed data
|
|
6
|
+
# Based on libmspack lzxd.c implementation
|
|
7
|
+
#
|
|
8
|
+
# The LZX method was created by Jonathan Forbes and Tomi Poutanen,
|
|
9
|
+
# adapted by Microsoft Corporation.
|
|
10
|
+
class LZX < Base
|
|
11
|
+
# Frame size (32KB per frame)
|
|
12
|
+
FRAME_SIZE = 32_768
|
|
13
|
+
|
|
14
|
+
# Block types
|
|
15
|
+
BLOCKTYPE_INVALID = 0
|
|
16
|
+
BLOCKTYPE_VERBATIM = 1
|
|
17
|
+
BLOCKTYPE_ALIGNED = 2
|
|
18
|
+
BLOCKTYPE_UNCOMPRESSED = 3
|
|
19
|
+
|
|
20
|
+
# Match constants
|
|
21
|
+
MIN_MATCH = 2
|
|
22
|
+
MAX_MATCH = 257
|
|
23
|
+
NUM_CHARS = 256
|
|
24
|
+
|
|
25
|
+
# Tree constants
|
|
26
|
+
PRETREE_NUM_ELEMENTS = 20
|
|
27
|
+
PRETREE_MAXSYMBOLS = 20
|
|
28
|
+
PRETREE_TABLEBITS = 6
|
|
29
|
+
|
|
30
|
+
ALIGNED_NUM_ELEMENTS = 8
|
|
31
|
+
ALIGNED_MAXSYMBOLS = 8
|
|
32
|
+
ALIGNED_TABLEBITS = 7
|
|
33
|
+
|
|
34
|
+
NUM_PRIMARY_LENGTHS = 7
|
|
35
|
+
NUM_SECONDARY_LENGTHS = 249
|
|
36
|
+
LENGTH_MAXSYMBOLS = 250
|
|
37
|
+
LENGTH_TABLEBITS = 12
|
|
38
|
+
|
|
39
|
+
# Position slots for different window sizes
|
|
40
|
+
POSITION_SLOTS = [30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290].freeze
|
|
41
|
+
|
|
42
|
+
# Extra bits for position slots
|
|
43
|
+
EXTRA_BITS = [
|
|
44
|
+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
45
|
+
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
|
|
46
|
+
].freeze
|
|
47
|
+
|
|
48
|
+
# Position base offsets
|
|
49
|
+
POSITION_BASE = [
|
|
50
|
+
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
|
|
51
|
+
768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384, 24_576, 32_768,
|
|
52
|
+
49_152, 65_536, 98_304, 131_072, 196_608, 262_144, 393_216, 524_288, 655_360,
|
|
53
|
+
786_432, 917_504, 1_048_576, 1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936,
|
|
54
|
+
1_835_008, 1_966_080, 2_097_152, 2_228_224, 2_359_296, 2_490_368, 2_621_440, 2_752_512,
|
|
55
|
+
2_883_584, 3_014_656, 3_145_728, 3_276_800, 3_407_872, 3_538_944, 3_670_016, 3_801_088,
|
|
56
|
+
3_932_160, 4_063_232, 4_194_304, 4_325_376, 4_456_448, 4_587_520, 4_718_592, 4_849_664,
|
|
57
|
+
4_980_736, 5_111_808, 5_242_880, 5_373_952, 5_505_024, 5_636_096, 5_767_168, 5_898_240,
|
|
58
|
+
6_029_312, 6_160_384, 6_291_456, 6_422_528, 6_553_600, 6_684_672, 6_815_744, 6_946_816,
|
|
59
|
+
7_077_888, 7_208_960, 7_340_032, 7_471_104, 7_602_176, 7_733_248, 7_864_320, 7_995_392,
|
|
60
|
+
8_126_464, 8_257_536, 8_388_608, 8_519_680, 8_650_752, 8_781_824, 8_912_896, 9_043_968,
|
|
61
|
+
9_175_040, 9_306_112, 9_437_184, 9_568_256, 9_699_328, 9_830_400, 9_961_472, 10_092_544,
|
|
62
|
+
10_223_616, 10_354_688, 10_485_760, 10_616_832, 10_747_904, 10_878_976, 11_010_048,
|
|
63
|
+
11_141_120, 11_272_192, 11_403_264, 11_534_336, 11_665_408, 11_796_480, 11_927_552,
|
|
64
|
+
12_058_624, 12_189_696, 12_320_768, 12_451_840, 12_582_912, 12_713_984, 12_845_056,
|
|
65
|
+
12_976_128, 13_107_200, 13_238_272, 13_369_344, 13_500_416, 13_631_488, 13_762_560,
|
|
66
|
+
13_893_632, 14_024_704, 14_155_776, 14_286_848, 14_417_920, 14_548_992, 14_680_064,
|
|
67
|
+
14_811_136, 14_942_208, 15_073_280, 15_204_352, 15_335_424, 15_466_496, 15_597_568,
|
|
68
|
+
15_728_640, 15_859_712, 15_990_784, 16_121_856, 16_252_928, 16_384_000, 16_515_072,
|
|
69
|
+
16_646_144, 16_777_216, 16_908_288, 17_039_360, 17_170_432, 17_301_504, 17_432_576,
|
|
70
|
+
17_563_648, 17_694_720, 17_825_792, 17_956_864, 18_087_936, 18_219_008, 18_350_080,
|
|
71
|
+
18_481_152, 18_612_224, 18_743_296, 18_874_368, 19_005_440, 19_136_512, 19_267_584,
|
|
72
|
+
19_398_656, 19_529_728, 19_660_800, 19_791_872, 19_922_944, 20_054_016, 20_185_088,
|
|
73
|
+
20_316_160, 20_447_232, 20_578_304, 20_709_376, 20_840_448, 20_971_520, 21_102_592,
|
|
74
|
+
21_233_664, 21_364_736, 21_495_808, 21_626_880, 21_757_952, 21_889_024, 22_020_096,
|
|
75
|
+
22_151_168, 22_282_240, 22_413_312, 22_544_384, 22_675_456, 22_806_528, 22_937_600,
|
|
76
|
+
23_068_672, 23_199_744, 23_330_816, 23_461_888, 23_592_960, 23_724_032, 23_855_104,
|
|
77
|
+
23_986_176, 24_117_248, 24_248_320, 24_379_392, 24_510_464, 24_641_536, 24_772_608,
|
|
78
|
+
24_903_680, 25_034_752, 25_165_824, 25_296_896, 25_427_968, 25_559_040, 25_690_112,
|
|
79
|
+
25_821_184, 25_952_256, 26_083_328, 26_214_400, 26_345_472, 26_476_544, 26_607_616,
|
|
80
|
+
26_738_688, 26_869_760, 27_000_832, 27_131_904, 27_262_976, 27_394_048, 27_525_120,
|
|
81
|
+
27_656_192, 27_787_264, 27_918_336, 28_049_408, 28_180_480, 28_311_552, 28_442_624,
|
|
82
|
+
28_573_696, 28_704_768, 28_835_840, 28_966_912, 29_097_984, 29_229_056, 29_360_128,
|
|
83
|
+
29_491_200, 29_622_272, 29_753_344, 29_884_416, 30_015_488, 30_146_560, 30_277_632,
|
|
84
|
+
30_408_704, 30_539_776, 30_670_848, 30_801_920, 30_932_992, 31_064_064, 31_195_136,
|
|
85
|
+
31_326_208, 31_457_280, 31_588_352, 31_719_424, 31_850_496, 31_981_568, 32_112_640,
|
|
86
|
+
32_243_712, 32_374_784, 32_505_856, 32_636_928, 32_768_000, 32_899_072, 33_030_144,
|
|
87
|
+
33_161_216, 33_292_288, 33_423_360
|
|
88
|
+
].freeze
|
|
89
|
+
|
|
90
|
+
attr_reader :window_bits, :reset_interval, :output_length, :is_delta
|
|
91
|
+
|
|
92
|
+
# Initialize LZX decompressor
|
|
93
|
+
#
|
|
94
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
95
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
96
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
97
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
98
|
+
# @param window_bits [Integer] Window size (15-21 for regular, 17-25 for DELTA)
|
|
99
|
+
# @param reset_interval [Integer] Frame count between resets (0 = never)
|
|
100
|
+
# @param output_length [Integer] Expected output length for E8 processing
|
|
101
|
+
# @param is_delta [Boolean] Whether this is LZX DELTA format
|
|
102
|
+
def initialize(io_system, input, output, buffer_size, window_bits:,
|
|
103
|
+
reset_interval: 0, output_length: 0, is_delta: false)
|
|
104
|
+
super(io_system, input, output, buffer_size)
|
|
105
|
+
|
|
106
|
+
# Validate window_bits
|
|
107
|
+
if is_delta
|
|
108
|
+
unless (17..25).cover?(window_bits)
|
|
109
|
+
raise ArgumentError,
|
|
110
|
+
"LZX DELTA window_bits must be 17-25, got #{window_bits}"
|
|
111
|
+
end
|
|
112
|
+
elsif !(15..21).cover?(window_bits)
|
|
113
|
+
raise ArgumentError,
|
|
114
|
+
"LZX window_bits must be 15-21, got #{window_bits}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
@window_bits = window_bits
|
|
118
|
+
@window_size = 1 << window_bits
|
|
119
|
+
@reset_interval = reset_interval
|
|
120
|
+
@output_length = output_length
|
|
121
|
+
@is_delta = is_delta
|
|
122
|
+
|
|
123
|
+
# Calculate number of position slots
|
|
124
|
+
@num_offsets = POSITION_SLOTS[window_bits - 15] << 3
|
|
125
|
+
@maintree_maxsymbols = NUM_CHARS + @num_offsets
|
|
126
|
+
|
|
127
|
+
# Initialize window
|
|
128
|
+
@window = "\0" * @window_size
|
|
129
|
+
@window_posn = 0
|
|
130
|
+
@frame_posn = 0
|
|
131
|
+
@frame = 0
|
|
132
|
+
|
|
133
|
+
# Initialize R0, R1, R2 (LRU offset registers)
|
|
134
|
+
@r0 = 1
|
|
135
|
+
@r1 = 1
|
|
136
|
+
@r2 = 1
|
|
137
|
+
|
|
138
|
+
# Initialize block state
|
|
139
|
+
@block_type = BLOCKTYPE_INVALID
|
|
140
|
+
@block_length = 0
|
|
141
|
+
@block_remaining = 0
|
|
142
|
+
@header_read = false
|
|
143
|
+
|
|
144
|
+
# Intel E8 transformation state
|
|
145
|
+
@intel_filesize = 0
|
|
146
|
+
@intel_started = false
|
|
147
|
+
@e8_buf = "\0" * FRAME_SIZE
|
|
148
|
+
|
|
149
|
+
# Initialize bitstream
|
|
150
|
+
@bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
|
|
151
|
+
|
|
152
|
+
# Initialize Huffman trees
|
|
153
|
+
initialize_trees
|
|
154
|
+
|
|
155
|
+
# Output tracking
|
|
156
|
+
@offset = 0
|
|
157
|
+
@output_ptr = 0
|
|
158
|
+
@output_end = 0
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Set output length (for Intel E8 processing)
|
|
162
|
+
#
|
|
163
|
+
# @param length [Integer] Expected output length
|
|
164
|
+
# @return [void]
|
|
165
|
+
def set_output_length(length)
|
|
166
|
+
@output_length = length if length.positive?
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Decompress LZX data
|
|
170
|
+
#
|
|
171
|
+
# @param bytes [Integer] Number of bytes to decompress
|
|
172
|
+
# @return [Integer] Number of bytes decompressed
|
|
173
|
+
def decompress(bytes)
|
|
174
|
+
return 0 if bytes <= 0
|
|
175
|
+
|
|
176
|
+
total_written = 0
|
|
177
|
+
end_frame = ((@offset + bytes) / FRAME_SIZE) + 1
|
|
178
|
+
|
|
179
|
+
while @frame < end_frame
|
|
180
|
+
# Check reset interval
|
|
181
|
+
reset_state if @reset_interval.positive? && (@frame % @reset_interval).zero?
|
|
182
|
+
|
|
183
|
+
# Read DELTA chunk size if needed
|
|
184
|
+
@bitstream.read_bits(16) if @is_delta
|
|
185
|
+
|
|
186
|
+
# Read Intel filesize header if needed
|
|
187
|
+
read_intel_header unless @header_read
|
|
188
|
+
|
|
189
|
+
# Calculate frame size
|
|
190
|
+
frame_size = calculate_frame_size
|
|
191
|
+
|
|
192
|
+
# Decode blocks until frame is complete
|
|
193
|
+
decode_frame(frame_size)
|
|
194
|
+
|
|
195
|
+
# Apply Intel E8 transformation if needed
|
|
196
|
+
frame_data = if should_apply_e8_transform?(frame_size)
|
|
197
|
+
apply_e8_transform(frame_size)
|
|
198
|
+
else
|
|
199
|
+
@window[@frame_posn, frame_size]
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Write frame
|
|
203
|
+
write_amount = [bytes - total_written, frame_size].min
|
|
204
|
+
io_system.write(output, frame_data[0, write_amount])
|
|
205
|
+
total_written += write_amount
|
|
206
|
+
@offset += frame_size
|
|
207
|
+
|
|
208
|
+
# Advance frame
|
|
209
|
+
@frame += 1
|
|
210
|
+
@frame_posn += frame_size
|
|
211
|
+
@frame_posn = 0 if @frame_posn == @window_size
|
|
212
|
+
@window_posn = 0 if @window_posn == @window_size
|
|
213
|
+
|
|
214
|
+
# Re-align bitstream (byte_align is safe to call even if already aligned)
|
|
215
|
+
@bitstream.byte_align
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
total_written
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
private
|
|
222
|
+
|
|
223
|
+
# Initialize Huffman code length arrays
|
|
224
|
+
#
|
|
225
|
+
# @return [void]
|
|
226
|
+
def initialize_trees
|
|
227
|
+
@pretree_lengths = Array.new(PRETREE_MAXSYMBOLS, 0)
|
|
228
|
+
@maintree_lengths = Array.new(@maintree_maxsymbols, 0)
|
|
229
|
+
@length_lengths = Array.new(LENGTH_MAXSYMBOLS, 0)
|
|
230
|
+
@aligned_lengths = Array.new(ALIGNED_MAXSYMBOLS, 0)
|
|
231
|
+
|
|
232
|
+
@pretree = nil
|
|
233
|
+
@maintree = nil
|
|
234
|
+
@length_tree = nil
|
|
235
|
+
@aligned_tree = nil
|
|
236
|
+
@length_empty = false
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Reset LZX state (called at reset intervals)
|
|
240
|
+
#
|
|
241
|
+
# @return [void]
|
|
242
|
+
def reset_state
|
|
243
|
+
@r0 = 1
|
|
244
|
+
@r1 = 1
|
|
245
|
+
@r2 = 1
|
|
246
|
+
@header_read = false
|
|
247
|
+
@block_remaining = 0
|
|
248
|
+
@block_type = BLOCKTYPE_INVALID
|
|
249
|
+
|
|
250
|
+
# Reset tree lengths to 0
|
|
251
|
+
@maintree_lengths.fill(0)
|
|
252
|
+
@length_lengths.fill(0)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Read Intel filesize header
|
|
256
|
+
#
|
|
257
|
+
# @return [void]
|
|
258
|
+
def read_intel_header
|
|
259
|
+
if @bitstream.read_bits(1) == 1
|
|
260
|
+
high = @bitstream.read_bits(16)
|
|
261
|
+
low = @bitstream.read_bits(16)
|
|
262
|
+
@intel_filesize = (high << 16) | low
|
|
263
|
+
else
|
|
264
|
+
@intel_filesize = 0
|
|
265
|
+
end
|
|
266
|
+
@header_read = true
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Calculate frame size
|
|
270
|
+
#
|
|
271
|
+
# @return [Integer] Frame size in bytes
|
|
272
|
+
def calculate_frame_size
|
|
273
|
+
frame_size = FRAME_SIZE
|
|
274
|
+
frame_size = @output_length - @offset if @output_length.positive? && (@output_length - @offset) < frame_size
|
|
275
|
+
frame_size
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Decode blocks until frame is complete
|
|
279
|
+
#
|
|
280
|
+
# @param frame_size [Integer] Target frame size
|
|
281
|
+
# @return [void]
|
|
282
|
+
def decode_frame(frame_size)
|
|
283
|
+
bytes_todo = @frame_posn + frame_size - @window_posn
|
|
284
|
+
|
|
285
|
+
while bytes_todo.positive?
|
|
286
|
+
# Read new block header if needed
|
|
287
|
+
read_block_header if @block_remaining.zero?
|
|
288
|
+
|
|
289
|
+
# Decode as much as possible
|
|
290
|
+
this_run = [@block_remaining, bytes_todo].min
|
|
291
|
+
bytes_todo -= this_run
|
|
292
|
+
@block_remaining -= this_run
|
|
293
|
+
|
|
294
|
+
case @block_type
|
|
295
|
+
when BLOCKTYPE_VERBATIM, BLOCKTYPE_ALIGNED
|
|
296
|
+
decode_huffman_block(this_run)
|
|
297
|
+
when BLOCKTYPE_UNCOMPRESSED
|
|
298
|
+
decode_uncompressed_block(this_run)
|
|
299
|
+
else
|
|
300
|
+
raise DecompressionError, "Invalid block type: #{@block_type}"
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Read block header
|
|
306
|
+
#
|
|
307
|
+
# @return [void]
|
|
308
|
+
def read_block_header
|
|
309
|
+
# Align for uncompressed blocks
|
|
310
|
+
@bitstream.byte_align if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.allbits?(1)
|
|
311
|
+
|
|
312
|
+
# Read block type and length
|
|
313
|
+
@block_type = @bitstream.read_bits(3)
|
|
314
|
+
high = @bitstream.read_bits(16)
|
|
315
|
+
low = @bitstream.read_bits(8)
|
|
316
|
+
@block_length = (high << 8) | low
|
|
317
|
+
@block_remaining = @block_length
|
|
318
|
+
|
|
319
|
+
case @block_type
|
|
320
|
+
when BLOCKTYPE_ALIGNED
|
|
321
|
+
read_aligned_block_header
|
|
322
|
+
when BLOCKTYPE_VERBATIM
|
|
323
|
+
read_verbatim_block_header
|
|
324
|
+
when BLOCKTYPE_UNCOMPRESSED
|
|
325
|
+
read_uncompressed_block_header
|
|
326
|
+
else
|
|
327
|
+
raise DecompressionError, "Invalid block type: #{@block_type}"
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Read aligned block header (aligned tree + main/length trees)
|
|
332
|
+
#
|
|
333
|
+
# @return [void]
|
|
334
|
+
def read_aligned_block_header
|
|
335
|
+
# Read aligned tree lengths
|
|
336
|
+
8.times do |i|
|
|
337
|
+
@aligned_lengths[i] = @bitstream.read_bits(3)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Build aligned tree
|
|
341
|
+
@aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS)
|
|
342
|
+
unless @aligned_tree.build_table(ALIGNED_TABLEBITS)
|
|
343
|
+
raise DecompressionError,
|
|
344
|
+
"Failed to build aligned tree"
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Read main and length trees (same as verbatim)
|
|
348
|
+
read_main_and_length_trees
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Read verbatim block header (main/length trees)
|
|
352
|
+
#
|
|
353
|
+
# @return [void]
|
|
354
|
+
def read_verbatim_block_header
|
|
355
|
+
read_main_and_length_trees
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Read main and length trees
|
|
359
|
+
#
|
|
360
|
+
# @return [void]
|
|
361
|
+
def read_main_and_length_trees
|
|
362
|
+
# Read and build pretree
|
|
363
|
+
read_pretree
|
|
364
|
+
|
|
365
|
+
# Read main tree lengths using pretree
|
|
366
|
+
read_lengths(@maintree_lengths, 0, 256)
|
|
367
|
+
read_lengths(@maintree_lengths, 256, @maintree_maxsymbols)
|
|
368
|
+
|
|
369
|
+
# Build main tree
|
|
370
|
+
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols)
|
|
371
|
+
unless @maintree.build_table(LENGTH_TABLEBITS)
|
|
372
|
+
raise DecompressionError,
|
|
373
|
+
"Failed to build main tree"
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Mark if E8 literal is present
|
|
377
|
+
@intel_started = true if @maintree_lengths[0xE8] != 0
|
|
378
|
+
|
|
379
|
+
# Read length tree
|
|
380
|
+
read_lengths(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
|
|
381
|
+
|
|
382
|
+
# Build length tree (may be empty)
|
|
383
|
+
@length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS)
|
|
384
|
+
if @length_tree.build_table(LENGTH_TABLEBITS)
|
|
385
|
+
@length_empty = false
|
|
386
|
+
else
|
|
387
|
+
# Check if tree is completely empty (all zeros)
|
|
388
|
+
@length_empty = @length_lengths[0...LENGTH_MAXSYMBOLS].all?(&:zero?)
|
|
389
|
+
unless @length_empty
|
|
390
|
+
raise DecompressionError,
|
|
391
|
+
"Failed to build length tree"
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Read pretree (20 elements, 4 bits each)
|
|
397
|
+
#
|
|
398
|
+
# @return [void]
|
|
399
|
+
def read_pretree
|
|
400
|
+
20.times do |i|
|
|
401
|
+
@pretree_lengths[i] = @bitstream.read_bits(4)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
@pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS)
|
|
405
|
+
return if @pretree.build_table(PRETREE_TABLEBITS)
|
|
406
|
+
|
|
407
|
+
raise DecompressionError, "Failed to build pretree"
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Read code lengths using pretree
|
|
411
|
+
#
|
|
412
|
+
# @param lengths [Array<Integer>] Target length array
|
|
413
|
+
# @param first [Integer] First symbol index
|
|
414
|
+
# @param last [Integer] Last symbol index (exclusive)
|
|
415
|
+
# @return [void]
|
|
416
|
+
def read_lengths(lengths, first, last)
|
|
417
|
+
x = first
|
|
418
|
+
|
|
419
|
+
while x < last
|
|
420
|
+
z = Huffman::Decoder.decode_symbol(
|
|
421
|
+
@bitstream, @pretree.table, PRETREE_TABLEBITS,
|
|
422
|
+
@pretree_lengths, PRETREE_MAXSYMBOLS
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
case z
|
|
426
|
+
when 17
|
|
427
|
+
# Run of (4 + read 4 bits) zeros
|
|
428
|
+
run = @bitstream.read_bits(4) + 4
|
|
429
|
+
run.times do
|
|
430
|
+
lengths[x] = 0
|
|
431
|
+
x += 1
|
|
432
|
+
end
|
|
433
|
+
when 18
|
|
434
|
+
# Run of (20 + read 5 bits) zeros
|
|
435
|
+
run = @bitstream.read_bits(5) + 20
|
|
436
|
+
run.times do
|
|
437
|
+
lengths[x] = 0
|
|
438
|
+
x += 1
|
|
439
|
+
end
|
|
440
|
+
when 19
|
|
441
|
+
# Run of (4 + read 1 bit) * (read symbol)
|
|
442
|
+
run = @bitstream.read_bits(1) + 4
|
|
443
|
+
z = Huffman::Decoder.decode_symbol(
|
|
444
|
+
@bitstream, @pretree.table, PRETREE_TABLEBITS,
|
|
445
|
+
@pretree_lengths, PRETREE_MAXSYMBOLS
|
|
446
|
+
)
|
|
447
|
+
z = lengths[x] - z
|
|
448
|
+
z += 17 if z.negative?
|
|
449
|
+
run.times do
|
|
450
|
+
lengths[x] = z
|
|
451
|
+
x += 1
|
|
452
|
+
end
|
|
453
|
+
else
|
|
454
|
+
# Delta from previous length
|
|
455
|
+
z = lengths[x] - z
|
|
456
|
+
z += 17 if z.negative?
|
|
457
|
+
lengths[x] = z
|
|
458
|
+
x += 1
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# Read uncompressed block header
|
|
464
|
+
#
|
|
465
|
+
# @return [void]
|
|
466
|
+
def read_uncompressed_block_header
|
|
467
|
+
@intel_started = true
|
|
468
|
+
|
|
469
|
+
# Align to byte boundary
|
|
470
|
+
@bitstream.byte_align
|
|
471
|
+
|
|
472
|
+
# Read R0, R1, R2
|
|
473
|
+
bytes = Array.new(12) { @bitstream.read_bits(8) }
|
|
474
|
+
@r0 = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24)
|
|
475
|
+
@r1 = bytes[4] | (bytes[5] << 8) | (bytes[6] << 16) | (bytes[7] << 24)
|
|
476
|
+
@r2 = bytes[8] | (bytes[9] << 8) | (bytes[10] << 16) | (bytes[11] << 24)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Decode Huffman-compressed block
|
|
480
|
+
#
|
|
481
|
+
# @param run_length [Integer] Number of bytes to decode
|
|
482
|
+
# @return [void]
|
|
483
|
+
def decode_huffman_block(run_length)
|
|
484
|
+
while run_length.positive?
|
|
485
|
+
# Decode main symbol
|
|
486
|
+
main_element = Huffman::Decoder.decode_symbol(
|
|
487
|
+
@bitstream, @maintree.table, LENGTH_TABLEBITS,
|
|
488
|
+
@maintree_lengths, @maintree_maxsymbols
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
if main_element < NUM_CHARS
|
|
492
|
+
# Literal byte
|
|
493
|
+
@window.setbyte(@window_posn, main_element)
|
|
494
|
+
@window_posn += 1
|
|
495
|
+
run_length -= 1
|
|
496
|
+
else
|
|
497
|
+
# Match: decode length and offset
|
|
498
|
+
decode_match(main_element, run_length)
|
|
499
|
+
run_length = 0 # Match decoding handles run_length internally
|
|
500
|
+
end
|
|
501
|
+
end
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
# Decode and copy a match
|
|
505
|
+
#
|
|
506
|
+
# @param main_element [Integer] Main tree symbol
|
|
507
|
+
# @param run_length [Integer] Remaining run length
|
|
508
|
+
# @return [void]
|
|
509
|
+
def decode_match(main_element, _run_length)
|
|
510
|
+
main_element -= NUM_CHARS
|
|
511
|
+
|
|
512
|
+
# Decode match length
|
|
513
|
+
match_length = main_element & NUM_PRIMARY_LENGTHS
|
|
514
|
+
if match_length == NUM_PRIMARY_LENGTHS
|
|
515
|
+
if @length_empty
|
|
516
|
+
raise DecompressionError,
|
|
517
|
+
"Length tree needed but empty"
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
length_footer = Huffman::Decoder.decode_symbol(
|
|
521
|
+
@bitstream, @length_tree.table, LENGTH_TABLEBITS,
|
|
522
|
+
@length_lengths, LENGTH_MAXSYMBOLS
|
|
523
|
+
)
|
|
524
|
+
match_length += length_footer
|
|
525
|
+
end
|
|
526
|
+
match_length += MIN_MATCH
|
|
527
|
+
|
|
528
|
+
# Decode match offset
|
|
529
|
+
position_slot = main_element >> 3
|
|
530
|
+
|
|
531
|
+
case position_slot
|
|
532
|
+
when 0
|
|
533
|
+
match_offset = @r0
|
|
534
|
+
when 1
|
|
535
|
+
@r1, @r0 = @r0, @r1
|
|
536
|
+
when 2
|
|
537
|
+
@r2, @r0 = @r0, @r2
|
|
538
|
+
else
|
|
539
|
+
# Calculate offset from position slot
|
|
540
|
+
extra = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
|
|
541
|
+
match_offset = POSITION_BASE[position_slot] - 2
|
|
542
|
+
|
|
543
|
+
if extra >= 3 && @block_type == BLOCKTYPE_ALIGNED
|
|
544
|
+
# Use aligned offset tree for last 3 bits
|
|
545
|
+
if extra > 3
|
|
546
|
+
verbatim_bits = @bitstream.read_bits(extra - 3)
|
|
547
|
+
match_offset += verbatim_bits << 3
|
|
548
|
+
end
|
|
549
|
+
aligned_bits = Huffman::Decoder.decode_symbol(
|
|
550
|
+
@bitstream, @aligned_tree.table, ALIGNED_TABLEBITS,
|
|
551
|
+
@aligned_lengths, ALIGNED_MAXSYMBOLS
|
|
552
|
+
)
|
|
553
|
+
match_offset += aligned_bits
|
|
554
|
+
elsif extra.positive?
|
|
555
|
+
verbatim_bits = @bitstream.read_bits(extra)
|
|
556
|
+
match_offset += verbatim_bits
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# Update LRU queue
|
|
560
|
+
@r2 = @r1
|
|
561
|
+
@r1 = @r0
|
|
562
|
+
@r0 = match_offset
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
# LZX DELTA extended match length
|
|
566
|
+
match_length += decode_extended_length if match_length == MAX_MATCH && @is_delta
|
|
567
|
+
|
|
568
|
+
# Validate match
|
|
569
|
+
if @window_posn + match_length > @window_size
|
|
570
|
+
raise DecompressionError,
|
|
571
|
+
"Match runs over window boundary"
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
# Copy match
|
|
575
|
+
copy_match(match_offset, match_length)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# Decode extended match length for LZX DELTA
|
|
579
|
+
#
|
|
580
|
+
# @return [Integer] Additional length
|
|
581
|
+
def decode_extended_length
|
|
582
|
+
# Peek 3 bits for huffman tree
|
|
583
|
+
bits = @bitstream.peek_bits(3)
|
|
584
|
+
|
|
585
|
+
if bits.nobits?(1)
|
|
586
|
+
# '0' -> 8 extra bits
|
|
587
|
+
@bitstream.skip_bits(1)
|
|
588
|
+
@bitstream.read_bits(8)
|
|
589
|
+
elsif bits.nobits?(2)
|
|
590
|
+
# '10' -> 10 extra bits + 0x100
|
|
591
|
+
@bitstream.skip_bits(2)
|
|
592
|
+
@bitstream.read_bits(10) + 0x100
|
|
593
|
+
elsif bits.nobits?(4)
|
|
594
|
+
# '110' -> 12 extra bits + 0x500
|
|
595
|
+
@bitstream.skip_bits(3)
|
|
596
|
+
@bitstream.read_bits(12) + 0x500
|
|
597
|
+
else
|
|
598
|
+
# '111' -> 15 extra bits
|
|
599
|
+
@bitstream.skip_bits(3)
|
|
600
|
+
@bitstream.read_bits(15)
|
|
601
|
+
end
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
# Copy match from window
|
|
605
|
+
#
|
|
606
|
+
# @param offset [Integer] Match offset
|
|
607
|
+
# @param length [Integer] Match length
|
|
608
|
+
# @return [void]
|
|
609
|
+
def copy_match(offset, length)
|
|
610
|
+
if offset > @window_posn
|
|
611
|
+
# Match wraps around window
|
|
612
|
+
if offset > @offset && (offset - @window_posn).positive?
|
|
613
|
+
raise DecompressionError, "Match offset beyond stream"
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Copy from end of window
|
|
617
|
+
src_pos = @window_size - (offset - @window_posn)
|
|
618
|
+
copy_len = offset - @window_posn
|
|
619
|
+
|
|
620
|
+
if copy_len < length
|
|
621
|
+
# Copy first part from end of window
|
|
622
|
+
copy_len.times do
|
|
623
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
624
|
+
@window_posn += 1
|
|
625
|
+
src_pos += 1
|
|
626
|
+
end
|
|
627
|
+
# Copy rest from beginning
|
|
628
|
+
src_pos = 0
|
|
629
|
+
(length - copy_len).times do
|
|
630
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
631
|
+
@window_posn += 1
|
|
632
|
+
src_pos += 1
|
|
633
|
+
end
|
|
634
|
+
else
|
|
635
|
+
# Copy entirely from end of window
|
|
636
|
+
length.times do
|
|
637
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
638
|
+
@window_posn += 1
|
|
639
|
+
src_pos += 1
|
|
640
|
+
end
|
|
641
|
+
end
|
|
642
|
+
else
|
|
643
|
+
# Normal copy
|
|
644
|
+
src_pos = @window_posn - offset
|
|
645
|
+
length.times do
|
|
646
|
+
@window.setbyte(@window_posn, @window.getbyte(src_pos))
|
|
647
|
+
@window_posn += 1
|
|
648
|
+
src_pos += 1
|
|
649
|
+
end
|
|
650
|
+
end
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
# Decode uncompressed block
|
|
654
|
+
#
|
|
655
|
+
# @param run_length [Integer] Number of bytes to decode
|
|
656
|
+
# @return [void]
|
|
657
|
+
def decode_uncompressed_block(run_length)
|
|
658
|
+
run_length.times do
|
|
659
|
+
byte = @bitstream.read_bits(8)
|
|
660
|
+
@window.setbyte(@window_posn, byte)
|
|
661
|
+
@window_posn += 1
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
# Check if Intel E8 transformation should be applied
|
|
666
|
+
#
|
|
667
|
+
# @param frame_size [Integer] Frame size
|
|
668
|
+
# @return [Boolean] true if transformation should be applied
|
|
669
|
+
def should_apply_e8_transform?(frame_size)
|
|
670
|
+
@intel_started &&
|
|
671
|
+
@intel_filesize.positive? &&
|
|
672
|
+
@frame < 32_768 &&
|
|
673
|
+
frame_size > 10
|
|
674
|
+
end
|
|
675
|
+
|
|
676
|
+
# Apply Intel E8 transformation
|
|
677
|
+
#
|
|
678
|
+
# @param frame_size [Integer] Frame size
|
|
679
|
+
# @return [String] Transformed data
|
|
680
|
+
def apply_e8_transform(frame_size)
|
|
681
|
+
# Copy frame data to E8 buffer
|
|
682
|
+
@e8_buf[0, frame_size] = @window[@frame_posn, frame_size]
|
|
683
|
+
|
|
684
|
+
# Transform E8 calls
|
|
685
|
+
data_pos = 0
|
|
686
|
+
data_end = frame_size - 10
|
|
687
|
+
cur_pos = @offset
|
|
688
|
+
|
|
689
|
+
while data_pos < data_end
|
|
690
|
+
# Look for E8 opcode
|
|
691
|
+
unless @e8_buf.getbyte(data_pos) == 0xE8
|
|
692
|
+
data_pos += 1
|
|
693
|
+
cur_pos += 1
|
|
694
|
+
next
|
|
695
|
+
end
|
|
696
|
+
|
|
697
|
+
# Read absolute offset (little-endian)
|
|
698
|
+
abs_off = @e8_buf.getbyte(data_pos + 1) |
|
|
699
|
+
(@e8_buf.getbyte(data_pos + 2) << 8) |
|
|
700
|
+
(@e8_buf.getbyte(data_pos + 3) << 16) |
|
|
701
|
+
(@e8_buf.getbyte(data_pos + 4) << 24)
|
|
702
|
+
|
|
703
|
+
# Convert to signed
|
|
704
|
+
abs_off -= 0x100000000 if abs_off >= 0x80000000
|
|
705
|
+
|
|
706
|
+
# Check if should transform
|
|
707
|
+
if abs_off >= -cur_pos && abs_off < @intel_filesize
|
|
708
|
+
# Calculate relative offset
|
|
709
|
+
rel_off = abs_off >= 0 ? abs_off - cur_pos : abs_off + @intel_filesize
|
|
710
|
+
|
|
711
|
+
# Write relative offset (little-endian)
|
|
712
|
+
@e8_buf.setbyte(data_pos + 1, rel_off & 0xFF)
|
|
713
|
+
@e8_buf.setbyte(data_pos + 2, (rel_off >> 8) & 0xFF)
|
|
714
|
+
@e8_buf.setbyte(data_pos + 3, (rel_off >> 16) & 0xFF)
|
|
715
|
+
@e8_buf.setbyte(data_pos + 4, (rel_off >> 24) & 0xFF)
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
data_pos += 5
|
|
719
|
+
cur_pos += 5
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
@e8_buf[0, frame_size]
|
|
723
|
+
end
|
|
724
|
+
end
|
|
725
|
+
end
|
|
726
|
+
end
|