cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,726 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Decompressors
5
+ # LZX handles LZX compressed data
6
+ # Based on libmspack lzxd.c implementation
7
+ #
8
+ # The LZX method was created by Jonathan Forbes and Tomi Poutanen,
9
+ # adapted by Microsoft Corporation.
10
+ class LZX < Base
11
+ # Frame size (32KB per frame)
12
+ FRAME_SIZE = 32_768
13
+
14
+ # Block types
15
+ BLOCKTYPE_INVALID = 0
16
+ BLOCKTYPE_VERBATIM = 1
17
+ BLOCKTYPE_ALIGNED = 2
18
+ BLOCKTYPE_UNCOMPRESSED = 3
19
+
20
+ # Match constants
21
+ MIN_MATCH = 2
22
+ MAX_MATCH = 257
23
+ NUM_CHARS = 256
24
+
25
+ # Tree constants
26
+ PRETREE_NUM_ELEMENTS = 20
27
+ PRETREE_MAXSYMBOLS = 20
28
+ PRETREE_TABLEBITS = 6
29
+
30
+ ALIGNED_NUM_ELEMENTS = 8
31
+ ALIGNED_MAXSYMBOLS = 8
32
+ ALIGNED_TABLEBITS = 7
33
+
34
+ NUM_PRIMARY_LENGTHS = 7
35
+ NUM_SECONDARY_LENGTHS = 249
36
+ LENGTH_MAXSYMBOLS = 250
37
+ LENGTH_TABLEBITS = 12
38
+
39
+ # Position slots for different window sizes
40
+ POSITION_SLOTS = [30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290].freeze
41
+
42
+ # Extra bits for position slots
43
+ EXTRA_BITS = [
44
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
45
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
46
+ ].freeze
47
+
48
+ # Position base offsets
49
+ POSITION_BASE = [
50
+ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
51
+ 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384, 24_576, 32_768,
52
+ 49_152, 65_536, 98_304, 131_072, 196_608, 262_144, 393_216, 524_288, 655_360,
53
+ 786_432, 917_504, 1_048_576, 1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936,
54
+ 1_835_008, 1_966_080, 2_097_152, 2_228_224, 2_359_296, 2_490_368, 2_621_440, 2_752_512,
55
+ 2_883_584, 3_014_656, 3_145_728, 3_276_800, 3_407_872, 3_538_944, 3_670_016, 3_801_088,
56
+ 3_932_160, 4_063_232, 4_194_304, 4_325_376, 4_456_448, 4_587_520, 4_718_592, 4_849_664,
57
+ 4_980_736, 5_111_808, 5_242_880, 5_373_952, 5_505_024, 5_636_096, 5_767_168, 5_898_240,
58
+ 6_029_312, 6_160_384, 6_291_456, 6_422_528, 6_553_600, 6_684_672, 6_815_744, 6_946_816,
59
+ 7_077_888, 7_208_960, 7_340_032, 7_471_104, 7_602_176, 7_733_248, 7_864_320, 7_995_392,
60
+ 8_126_464, 8_257_536, 8_388_608, 8_519_680, 8_650_752, 8_781_824, 8_912_896, 9_043_968,
61
+ 9_175_040, 9_306_112, 9_437_184, 9_568_256, 9_699_328, 9_830_400, 9_961_472, 10_092_544,
62
+ 10_223_616, 10_354_688, 10_485_760, 10_616_832, 10_747_904, 10_878_976, 11_010_048,
63
+ 11_141_120, 11_272_192, 11_403_264, 11_534_336, 11_665_408, 11_796_480, 11_927_552,
64
+ 12_058_624, 12_189_696, 12_320_768, 12_451_840, 12_582_912, 12_713_984, 12_845_056,
65
+ 12_976_128, 13_107_200, 13_238_272, 13_369_344, 13_500_416, 13_631_488, 13_762_560,
66
+ 13_893_632, 14_024_704, 14_155_776, 14_286_848, 14_417_920, 14_548_992, 14_680_064,
67
+ 14_811_136, 14_942_208, 15_073_280, 15_204_352, 15_335_424, 15_466_496, 15_597_568,
68
+ 15_728_640, 15_859_712, 15_990_784, 16_121_856, 16_252_928, 16_384_000, 16_515_072,
69
+ 16_646_144, 16_777_216, 16_908_288, 17_039_360, 17_170_432, 17_301_504, 17_432_576,
70
+ 17_563_648, 17_694_720, 17_825_792, 17_956_864, 18_087_936, 18_219_008, 18_350_080,
71
+ 18_481_152, 18_612_224, 18_743_296, 18_874_368, 19_005_440, 19_136_512, 19_267_584,
72
+ 19_398_656, 19_529_728, 19_660_800, 19_791_872, 19_922_944, 20_054_016, 20_185_088,
73
+ 20_316_160, 20_447_232, 20_578_304, 20_709_376, 20_840_448, 20_971_520, 21_102_592,
74
+ 21_233_664, 21_364_736, 21_495_808, 21_626_880, 21_757_952, 21_889_024, 22_020_096,
75
+ 22_151_168, 22_282_240, 22_413_312, 22_544_384, 22_675_456, 22_806_528, 22_937_600,
76
+ 23_068_672, 23_199_744, 23_330_816, 23_461_888, 23_592_960, 23_724_032, 23_855_104,
77
+ 23_986_176, 24_117_248, 24_248_320, 24_379_392, 24_510_464, 24_641_536, 24_772_608,
78
+ 24_903_680, 25_034_752, 25_165_824, 25_296_896, 25_427_968, 25_559_040, 25_690_112,
79
+ 25_821_184, 25_952_256, 26_083_328, 26_214_400, 26_345_472, 26_476_544, 26_607_616,
80
+ 26_738_688, 26_869_760, 27_000_832, 27_131_904, 27_262_976, 27_394_048, 27_525_120,
81
+ 27_656_192, 27_787_264, 27_918_336, 28_049_408, 28_180_480, 28_311_552, 28_442_624,
82
+ 28_573_696, 28_704_768, 28_835_840, 28_966_912, 29_097_984, 29_229_056, 29_360_128,
83
+ 29_491_200, 29_622_272, 29_753_344, 29_884_416, 30_015_488, 30_146_560, 30_277_632,
84
+ 30_408_704, 30_539_776, 30_670_848, 30_801_920, 30_932_992, 31_064_064, 31_195_136,
85
+ 31_326_208, 31_457_280, 31_588_352, 31_719_424, 31_850_496, 31_981_568, 32_112_640,
86
+ 32_243_712, 32_374_784, 32_505_856, 32_636_928, 32_768_000, 32_899_072, 33_030_144,
87
+ 33_161_216, 33_292_288, 33_423_360
88
+ ].freeze
89
+
90
+ attr_reader :window_bits, :reset_interval, :output_length, :is_delta
91
+
92
+ # Initialize LZX decompressor
93
+ #
94
+ # @param io_system [System::IOSystem] I/O system for reading/writing
95
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
96
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
97
+ # @param buffer_size [Integer] Buffer size for I/O operations
98
+ # @param window_bits [Integer] Window size (15-21 for regular, 17-25 for DELTA)
99
+ # @param reset_interval [Integer] Frame count between resets (0 = never)
100
+ # @param output_length [Integer] Expected output length for E8 processing
101
+ # @param is_delta [Boolean] Whether this is LZX DELTA format
102
+ def initialize(io_system, input, output, buffer_size, window_bits:,
103
+ reset_interval: 0, output_length: 0, is_delta: false)
104
+ super(io_system, input, output, buffer_size)
105
+
106
+ # Validate window_bits
107
+ if is_delta
108
+ unless (17..25).cover?(window_bits)
109
+ raise ArgumentError,
110
+ "LZX DELTA window_bits must be 17-25, got #{window_bits}"
111
+ end
112
+ elsif !(15..21).cover?(window_bits)
113
+ raise ArgumentError,
114
+ "LZX window_bits must be 15-21, got #{window_bits}"
115
+ end
116
+
117
+ @window_bits = window_bits
118
+ @window_size = 1 << window_bits
119
+ @reset_interval = reset_interval
120
+ @output_length = output_length
121
+ @is_delta = is_delta
122
+
123
+ # Calculate number of position slots
124
+ @num_offsets = POSITION_SLOTS[window_bits - 15] << 3
125
+ @maintree_maxsymbols = NUM_CHARS + @num_offsets
126
+
127
+ # Initialize window
128
+ @window = "\0" * @window_size
129
+ @window_posn = 0
130
+ @frame_posn = 0
131
+ @frame = 0
132
+
133
+ # Initialize R0, R1, R2 (LRU offset registers)
134
+ @r0 = 1
135
+ @r1 = 1
136
+ @r2 = 1
137
+
138
+ # Initialize block state
139
+ @block_type = BLOCKTYPE_INVALID
140
+ @block_length = 0
141
+ @block_remaining = 0
142
+ @header_read = false
143
+
144
+ # Intel E8 transformation state
145
+ @intel_filesize = 0
146
+ @intel_started = false
147
+ @e8_buf = "\0" * FRAME_SIZE
148
+
149
+ # Initialize bitstream
150
+ @bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
151
+
152
+ # Initialize Huffman trees
153
+ initialize_trees
154
+
155
+ # Output tracking
156
+ @offset = 0
157
+ @output_ptr = 0
158
+ @output_end = 0
159
+ end
160
+
161
+ # Set output length (for Intel E8 processing)
162
+ #
163
+ # @param length [Integer] Expected output length
164
+ # @return [void]
165
+ def set_output_length(length)
166
+ @output_length = length if length.positive?
167
+ end
168
+
169
+ # Decompress LZX data
170
+ #
171
+ # @param bytes [Integer] Number of bytes to decompress
172
+ # @return [Integer] Number of bytes decompressed
173
+ def decompress(bytes)
174
+ return 0 if bytes <= 0
175
+
176
+ total_written = 0
177
+ end_frame = ((@offset + bytes) / FRAME_SIZE) + 1
178
+
179
+ while @frame < end_frame
180
+ # Check reset interval
181
+ reset_state if @reset_interval.positive? && (@frame % @reset_interval).zero?
182
+
183
+ # Read DELTA chunk size if needed
184
+ @bitstream.read_bits(16) if @is_delta
185
+
186
+ # Read Intel filesize header if needed
187
+ read_intel_header unless @header_read
188
+
189
+ # Calculate frame size
190
+ frame_size = calculate_frame_size
191
+
192
+ # Decode blocks until frame is complete
193
+ decode_frame(frame_size)
194
+
195
+ # Apply Intel E8 transformation if needed
196
+ frame_data = if should_apply_e8_transform?(frame_size)
197
+ apply_e8_transform(frame_size)
198
+ else
199
+ @window[@frame_posn, frame_size]
200
+ end
201
+
202
+ # Write frame
203
+ write_amount = [bytes - total_written, frame_size].min
204
+ io_system.write(output, frame_data[0, write_amount])
205
+ total_written += write_amount
206
+ @offset += frame_size
207
+
208
+ # Advance frame
209
+ @frame += 1
210
+ @frame_posn += frame_size
211
+ @frame_posn = 0 if @frame_posn == @window_size
212
+ @window_posn = 0 if @window_posn == @window_size
213
+
214
+ # Re-align bitstream (byte_align is safe to call even if already aligned)
215
+ @bitstream.byte_align
216
+ end
217
+
218
+ total_written
219
+ end
220
+
221
+ private
222
+
223
+ # Initialize Huffman code length arrays
224
+ #
225
+ # @return [void]
226
+ def initialize_trees
227
+ @pretree_lengths = Array.new(PRETREE_MAXSYMBOLS, 0)
228
+ @maintree_lengths = Array.new(@maintree_maxsymbols, 0)
229
+ @length_lengths = Array.new(LENGTH_MAXSYMBOLS, 0)
230
+ @aligned_lengths = Array.new(ALIGNED_MAXSYMBOLS, 0)
231
+
232
+ @pretree = nil
233
+ @maintree = nil
234
+ @length_tree = nil
235
+ @aligned_tree = nil
236
+ @length_empty = false
237
+ end
238
+
239
+ # Reset LZX state (called at reset intervals)
240
+ #
241
+ # @return [void]
242
+ def reset_state
243
+ @r0 = 1
244
+ @r1 = 1
245
+ @r2 = 1
246
+ @header_read = false
247
+ @block_remaining = 0
248
+ @block_type = BLOCKTYPE_INVALID
249
+
250
+ # Reset tree lengths to 0
251
+ @maintree_lengths.fill(0)
252
+ @length_lengths.fill(0)
253
+ end
254
+
255
+ # Read Intel filesize header
256
+ #
257
+ # @return [void]
258
+ def read_intel_header
259
+ if @bitstream.read_bits(1) == 1
260
+ high = @bitstream.read_bits(16)
261
+ low = @bitstream.read_bits(16)
262
+ @intel_filesize = (high << 16) | low
263
+ else
264
+ @intel_filesize = 0
265
+ end
266
+ @header_read = true
267
+ end
268
+
269
+ # Calculate frame size
270
+ #
271
+ # @return [Integer] Frame size in bytes
272
+ def calculate_frame_size
273
+ frame_size = FRAME_SIZE
274
+ frame_size = @output_length - @offset if @output_length.positive? && (@output_length - @offset) < frame_size
275
+ frame_size
276
+ end
277
+
278
+ # Decode blocks until frame is complete
279
+ #
280
+ # @param frame_size [Integer] Target frame size
281
+ # @return [void]
282
+ def decode_frame(frame_size)
283
+ bytes_todo = @frame_posn + frame_size - @window_posn
284
+
285
+ while bytes_todo.positive?
286
+ # Read new block header if needed
287
+ read_block_header if @block_remaining.zero?
288
+
289
+ # Decode as much as possible
290
+ this_run = [@block_remaining, bytes_todo].min
291
+ bytes_todo -= this_run
292
+ @block_remaining -= this_run
293
+
294
+ case @block_type
295
+ when BLOCKTYPE_VERBATIM, BLOCKTYPE_ALIGNED
296
+ decode_huffman_block(this_run)
297
+ when BLOCKTYPE_UNCOMPRESSED
298
+ decode_uncompressed_block(this_run)
299
+ else
300
+ raise DecompressionError, "Invalid block type: #{@block_type}"
301
+ end
302
+ end
303
+ end
304
+
305
+ # Read block header
306
+ #
307
+ # @return [void]
308
+ def read_block_header
309
+ # Align for uncompressed blocks
310
+ @bitstream.byte_align if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.allbits?(1)
311
+
312
+ # Read block type and length
313
+ @block_type = @bitstream.read_bits(3)
314
+ high = @bitstream.read_bits(16)
315
+ low = @bitstream.read_bits(8)
316
+ @block_length = (high << 8) | low
317
+ @block_remaining = @block_length
318
+
319
+ case @block_type
320
+ when BLOCKTYPE_ALIGNED
321
+ read_aligned_block_header
322
+ when BLOCKTYPE_VERBATIM
323
+ read_verbatim_block_header
324
+ when BLOCKTYPE_UNCOMPRESSED
325
+ read_uncompressed_block_header
326
+ else
327
+ raise DecompressionError, "Invalid block type: #{@block_type}"
328
+ end
329
+ end
330
+
331
+ # Read aligned block header (aligned tree + main/length trees)
332
+ #
333
+ # @return [void]
334
+ def read_aligned_block_header
335
+ # Read aligned tree lengths
336
+ 8.times do |i|
337
+ @aligned_lengths[i] = @bitstream.read_bits(3)
338
+ end
339
+
340
+ # Build aligned tree
341
+ @aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS)
342
+ unless @aligned_tree.build_table(ALIGNED_TABLEBITS)
343
+ raise DecompressionError,
344
+ "Failed to build aligned tree"
345
+ end
346
+
347
+ # Read main and length trees (same as verbatim)
348
+ read_main_and_length_trees
349
+ end
350
+
351
+ # Read verbatim block header (main/length trees)
352
+ #
353
+ # @return [void]
354
+ def read_verbatim_block_header
355
+ read_main_and_length_trees
356
+ end
357
+
358
+ # Read main and length trees
359
+ #
360
+ # @return [void]
361
+ def read_main_and_length_trees
362
+ # Read and build pretree
363
+ read_pretree
364
+
365
+ # Read main tree lengths using pretree
366
+ read_lengths(@maintree_lengths, 0, 256)
367
+ read_lengths(@maintree_lengths, 256, @maintree_maxsymbols)
368
+
369
+ # Build main tree
370
+ @maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols)
371
+ unless @maintree.build_table(LENGTH_TABLEBITS)
372
+ raise DecompressionError,
373
+ "Failed to build main tree"
374
+ end
375
+
376
+ # Mark if E8 literal is present
377
+ @intel_started = true if @maintree_lengths[0xE8] != 0
378
+
379
+ # Read length tree
380
+ read_lengths(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
381
+
382
+ # Build length tree (may be empty)
383
+ @length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS)
384
+ if @length_tree.build_table(LENGTH_TABLEBITS)
385
+ @length_empty = false
386
+ else
387
+ # Check if tree is completely empty (all zeros)
388
+ @length_empty = @length_lengths[0...LENGTH_MAXSYMBOLS].all?(&:zero?)
389
+ unless @length_empty
390
+ raise DecompressionError,
391
+ "Failed to build length tree"
392
+ end
393
+ end
394
+ end
395
+
396
+ # Read pretree (20 elements, 4 bits each)
397
+ #
398
+ # @return [void]
399
+ def read_pretree
400
+ 20.times do |i|
401
+ @pretree_lengths[i] = @bitstream.read_bits(4)
402
+ end
403
+
404
+ @pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS)
405
+ return if @pretree.build_table(PRETREE_TABLEBITS)
406
+
407
+ raise DecompressionError, "Failed to build pretree"
408
+ end
409
+
410
+ # Read code lengths using pretree
411
+ #
412
+ # @param lengths [Array<Integer>] Target length array
413
+ # @param first [Integer] First symbol index
414
+ # @param last [Integer] Last symbol index (exclusive)
415
+ # @return [void]
416
+ def read_lengths(lengths, first, last)
417
+ x = first
418
+
419
+ while x < last
420
+ z = Huffman::Decoder.decode_symbol(
421
+ @bitstream, @pretree.table, PRETREE_TABLEBITS,
422
+ @pretree_lengths, PRETREE_MAXSYMBOLS
423
+ )
424
+
425
+ case z
426
+ when 17
427
+ # Run of (4 + read 4 bits) zeros
428
+ run = @bitstream.read_bits(4) + 4
429
+ run.times do
430
+ lengths[x] = 0
431
+ x += 1
432
+ end
433
+ when 18
434
+ # Run of (20 + read 5 bits) zeros
435
+ run = @bitstream.read_bits(5) + 20
436
+ run.times do
437
+ lengths[x] = 0
438
+ x += 1
439
+ end
440
+ when 19
441
+ # Run of (4 + read 1 bit) * (read symbol)
442
+ run = @bitstream.read_bits(1) + 4
443
+ z = Huffman::Decoder.decode_symbol(
444
+ @bitstream, @pretree.table, PRETREE_TABLEBITS,
445
+ @pretree_lengths, PRETREE_MAXSYMBOLS
446
+ )
447
+ z = lengths[x] - z
448
+ z += 17 if z.negative?
449
+ run.times do
450
+ lengths[x] = z
451
+ x += 1
452
+ end
453
+ else
454
+ # Delta from previous length
455
+ z = lengths[x] - z
456
+ z += 17 if z.negative?
457
+ lengths[x] = z
458
+ x += 1
459
+ end
460
+ end
461
+ end
462
+
463
+ # Read uncompressed block header
464
+ #
465
+ # @return [void]
466
+ def read_uncompressed_block_header
467
+ @intel_started = true
468
+
469
+ # Align to byte boundary
470
+ @bitstream.byte_align
471
+
472
+ # Read R0, R1, R2
473
+ bytes = Array.new(12) { @bitstream.read_bits(8) }
474
+ @r0 = bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24)
475
+ @r1 = bytes[4] | (bytes[5] << 8) | (bytes[6] << 16) | (bytes[7] << 24)
476
+ @r2 = bytes[8] | (bytes[9] << 8) | (bytes[10] << 16) | (bytes[11] << 24)
477
+ end
478
+
479
+ # Decode Huffman-compressed block
480
+ #
481
+ # @param run_length [Integer] Number of bytes to decode
482
+ # @return [void]
483
+ def decode_huffman_block(run_length)
484
+ while run_length.positive?
485
+ # Decode main symbol
486
+ main_element = Huffman::Decoder.decode_symbol(
487
+ @bitstream, @maintree.table, LENGTH_TABLEBITS,
488
+ @maintree_lengths, @maintree_maxsymbols
489
+ )
490
+
491
+ if main_element < NUM_CHARS
492
+ # Literal byte
493
+ @window.setbyte(@window_posn, main_element)
494
+ @window_posn += 1
495
+ run_length -= 1
496
+ else
497
+ # Match: decode length and offset
498
+ decode_match(main_element, run_length)
499
+ run_length = 0 # Match decoding handles run_length internally
500
+ end
501
+ end
502
+ end
503
+
504
+ # Decode and copy a match
505
+ #
506
+ # @param main_element [Integer] Main tree symbol
507
+ # @param run_length [Integer] Remaining run length
508
+ # @return [void]
509
+ def decode_match(main_element, _run_length)
510
+ main_element -= NUM_CHARS
511
+
512
+ # Decode match length
513
+ match_length = main_element & NUM_PRIMARY_LENGTHS
514
+ if match_length == NUM_PRIMARY_LENGTHS
515
+ if @length_empty
516
+ raise DecompressionError,
517
+ "Length tree needed but empty"
518
+ end
519
+
520
+ length_footer = Huffman::Decoder.decode_symbol(
521
+ @bitstream, @length_tree.table, LENGTH_TABLEBITS,
522
+ @length_lengths, LENGTH_MAXSYMBOLS
523
+ )
524
+ match_length += length_footer
525
+ end
526
+ match_length += MIN_MATCH
527
+
528
+ # Decode match offset
529
+ position_slot = main_element >> 3
530
+
531
+ case position_slot
532
+ when 0
533
+ match_offset = @r0
534
+ when 1
535
+ @r1, @r0 = @r0, @r1
536
+ when 2
537
+ @r2, @r0 = @r0, @r2
538
+ else
539
+ # Calculate offset from position slot
540
+ extra = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
541
+ match_offset = POSITION_BASE[position_slot] - 2
542
+
543
+ if extra >= 3 && @block_type == BLOCKTYPE_ALIGNED
544
+ # Use aligned offset tree for last 3 bits
545
+ if extra > 3
546
+ verbatim_bits = @bitstream.read_bits(extra - 3)
547
+ match_offset += verbatim_bits << 3
548
+ end
549
+ aligned_bits = Huffman::Decoder.decode_symbol(
550
+ @bitstream, @aligned_tree.table, ALIGNED_TABLEBITS,
551
+ @aligned_lengths, ALIGNED_MAXSYMBOLS
552
+ )
553
+ match_offset += aligned_bits
554
+ elsif extra.positive?
555
+ verbatim_bits = @bitstream.read_bits(extra)
556
+ match_offset += verbatim_bits
557
+ end
558
+
559
+ # Update LRU queue
560
+ @r2 = @r1
561
+ @r1 = @r0
562
+ @r0 = match_offset
563
+ end
564
+
565
+ # LZX DELTA extended match length
566
+ match_length += decode_extended_length if match_length == MAX_MATCH && @is_delta
567
+
568
+ # Validate match
569
+ if @window_posn + match_length > @window_size
570
+ raise DecompressionError,
571
+ "Match runs over window boundary"
572
+ end
573
+
574
+ # Copy match
575
+ copy_match(match_offset, match_length)
576
+ end
577
+
578
+ # Decode extended match length for LZX DELTA
579
+ #
580
+ # @return [Integer] Additional length
581
+ def decode_extended_length
582
+ # Peek 3 bits for huffman tree
583
+ bits = @bitstream.peek_bits(3)
584
+
585
+ if bits.nobits?(1)
586
+ # '0' -> 8 extra bits
587
+ @bitstream.skip_bits(1)
588
+ @bitstream.read_bits(8)
589
+ elsif bits.nobits?(2)
590
+ # '10' -> 10 extra bits + 0x100
591
+ @bitstream.skip_bits(2)
592
+ @bitstream.read_bits(10) + 0x100
593
+ elsif bits.nobits?(4)
594
+ # '110' -> 12 extra bits + 0x500
595
+ @bitstream.skip_bits(3)
596
+ @bitstream.read_bits(12) + 0x500
597
+ else
598
+ # '111' -> 15 extra bits
599
+ @bitstream.skip_bits(3)
600
+ @bitstream.read_bits(15)
601
+ end
602
+ end
603
+
604
+ # Copy match from window
605
+ #
606
+ # @param offset [Integer] Match offset
607
+ # @param length [Integer] Match length
608
+ # @return [void]
609
+ def copy_match(offset, length)
610
+ if offset > @window_posn
611
+ # Match wraps around window
612
+ if offset > @offset && (offset - @window_posn).positive?
613
+ raise DecompressionError, "Match offset beyond stream"
614
+ end
615
+
616
+ # Copy from end of window
617
+ src_pos = @window_size - (offset - @window_posn)
618
+ copy_len = offset - @window_posn
619
+
620
+ if copy_len < length
621
+ # Copy first part from end of window
622
+ copy_len.times do
623
+ @window.setbyte(@window_posn, @window.getbyte(src_pos))
624
+ @window_posn += 1
625
+ src_pos += 1
626
+ end
627
+ # Copy rest from beginning
628
+ src_pos = 0
629
+ (length - copy_len).times do
630
+ @window.setbyte(@window_posn, @window.getbyte(src_pos))
631
+ @window_posn += 1
632
+ src_pos += 1
633
+ end
634
+ else
635
+ # Copy entirely from end of window
636
+ length.times do
637
+ @window.setbyte(@window_posn, @window.getbyte(src_pos))
638
+ @window_posn += 1
639
+ src_pos += 1
640
+ end
641
+ end
642
+ else
643
+ # Normal copy
644
+ src_pos = @window_posn - offset
645
+ length.times do
646
+ @window.setbyte(@window_posn, @window.getbyte(src_pos))
647
+ @window_posn += 1
648
+ src_pos += 1
649
+ end
650
+ end
651
+ end
652
+
653
+ # Decode uncompressed block
654
+ #
655
+ # @param run_length [Integer] Number of bytes to decode
656
+ # @return [void]
657
+ def decode_uncompressed_block(run_length)
658
+ run_length.times do
659
+ byte = @bitstream.read_bits(8)
660
+ @window.setbyte(@window_posn, byte)
661
+ @window_posn += 1
662
+ end
663
+ end
664
+
665
+ # Check if Intel E8 transformation should be applied
666
+ #
667
+ # @param frame_size [Integer] Frame size
668
+ # @return [Boolean] true if transformation should be applied
669
+ def should_apply_e8_transform?(frame_size)
670
+ @intel_started &&
671
+ @intel_filesize.positive? &&
672
+ @frame < 32_768 &&
673
+ frame_size > 10
674
+ end
675
+
676
+ # Apply Intel E8 transformation
677
+ #
678
+ # @param frame_size [Integer] Frame size
679
+ # @return [String] Transformed data
680
+ def apply_e8_transform(frame_size)
681
+ # Copy frame data to E8 buffer
682
+ @e8_buf[0, frame_size] = @window[@frame_posn, frame_size]
683
+
684
+ # Transform E8 calls
685
+ data_pos = 0
686
+ data_end = frame_size - 10
687
+ cur_pos = @offset
688
+
689
+ while data_pos < data_end
690
+ # Look for E8 opcode
691
+ unless @e8_buf.getbyte(data_pos) == 0xE8
692
+ data_pos += 1
693
+ cur_pos += 1
694
+ next
695
+ end
696
+
697
+ # Read absolute offset (little-endian)
698
+ abs_off = @e8_buf.getbyte(data_pos + 1) |
699
+ (@e8_buf.getbyte(data_pos + 2) << 8) |
700
+ (@e8_buf.getbyte(data_pos + 3) << 16) |
701
+ (@e8_buf.getbyte(data_pos + 4) << 24)
702
+
703
+ # Convert to signed
704
+ abs_off -= 0x100000000 if abs_off >= 0x80000000
705
+
706
+ # Check if should transform
707
+ if abs_off >= -cur_pos && abs_off < @intel_filesize
708
+ # Calculate relative offset
709
+ rel_off = abs_off >= 0 ? abs_off - cur_pos : abs_off + @intel_filesize
710
+
711
+ # Write relative offset (little-endian)
712
+ @e8_buf.setbyte(data_pos + 1, rel_off & 0xFF)
713
+ @e8_buf.setbyte(data_pos + 2, (rel_off >> 8) & 0xFF)
714
+ @e8_buf.setbyte(data_pos + 3, (rel_off >> 16) & 0xFF)
715
+ @e8_buf.setbyte(data_pos + 4, (rel_off >> 24) & 0xFF)
716
+ end
717
+
718
+ data_pos += 5
719
+ cur_pos += 5
720
+ end
721
+
722
+ @e8_buf[0, frame_size]
723
+ end
724
+ end
725
+ end
726
+ end