cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +700 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +154 -14
  6. data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +35 -43
  13. data/lib/cabriolet/cab/decompressor.rb +14 -19
  14. data/lib/cabriolet/cab/extractor.rb +140 -31
  15. data/lib/cabriolet/chm/command_handler.rb +227 -0
  16. data/lib/cabriolet/chm/compressor.rb +7 -3
  17. data/lib/cabriolet/chm/decompressor.rb +39 -21
  18. data/lib/cabriolet/chm/parser.rb +5 -2
  19. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  20. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  21. data/lib/cabriolet/cli/command_registry.rb +83 -0
  22. data/lib/cabriolet/cli.rb +356 -607
  23. data/lib/cabriolet/compressors/base.rb +1 -1
  24. data/lib/cabriolet/compressors/lzx.rb +241 -54
  25. data/lib/cabriolet/compressors/mszip.rb +35 -3
  26. data/lib/cabriolet/compressors/quantum.rb +34 -45
  27. data/lib/cabriolet/decompressors/base.rb +1 -1
  28. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  29. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  30. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  31. data/lib/cabriolet/decompressors/quantum.rb +3 -2
  32. data/lib/cabriolet/errors.rb +3 -0
  33. data/lib/cabriolet/file_entry.rb +156 -0
  34. data/lib/cabriolet/file_manager.rb +144 -0
  35. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  36. data/lib/cabriolet/hlp/compressor.rb +28 -238
  37. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  38. data/lib/cabriolet/hlp/parser.rb +52 -101
  39. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  40. data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
  41. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  42. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  43. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  44. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  45. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  46. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  47. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  48. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  49. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  50. data/lib/cabriolet/huffman/tree.rb +85 -1
  51. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  52. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  53. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  54. data/lib/cabriolet/lit/command_handler.rb +221 -0
  55. data/lib/cabriolet/lit/compressor.rb +633 -38
  56. data/lib/cabriolet/lit/decompressor.rb +518 -152
  57. data/lib/cabriolet/lit/parser.rb +670 -0
  58. data/lib/cabriolet/models/hlp_file.rb +130 -29
  59. data/lib/cabriolet/models/hlp_header.rb +105 -17
  60. data/lib/cabriolet/models/lit_header.rb +212 -25
  61. data/lib/cabriolet/models/szdd_header.rb +10 -2
  62. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  63. data/lib/cabriolet/oab/command_handler.rb +257 -0
  64. data/lib/cabriolet/oab/compressor.rb +17 -8
  65. data/lib/cabriolet/oab/decompressor.rb +41 -10
  66. data/lib/cabriolet/offset_calculator.rb +81 -0
  67. data/lib/cabriolet/plugin.rb +233 -0
  68. data/lib/cabriolet/plugin_manager.rb +453 -0
  69. data/lib/cabriolet/plugin_validator.rb +422 -0
  70. data/lib/cabriolet/system/io_system.rb +3 -0
  71. data/lib/cabriolet/system/memory_handle.rb +17 -4
  72. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  73. data/lib/cabriolet/szdd/compressor.rb +15 -11
  74. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  75. data/lib/cabriolet/version.rb +1 -1
  76. data/lib/cabriolet.rb +67 -17
  77. metadata +33 -2
@@ -40,13 +40,17 @@ module Cabriolet
40
40
 
41
41
  # Decompress LZSS data
42
42
  #
43
- # @param bytes [Integer] Number of bytes to decompress (unused, reads
44
- # until EOF)
43
+ # @param bytes [Integer, nil] Maximum number of output bytes to write (nil or 0 = until EOF)
45
44
  # @return [Integer] Number of bytes decompressed
46
- def decompress(_bytes)
45
+ def decompress(bytes = nil)
47
46
  bytes_written = 0
47
+ # Only enforce limit if bytes is a positive integer
48
+ enforce_limit = bytes&.positive?
48
49
 
49
50
  loop do
51
+ # Check if we've reached the output byte limit (only when limit is enforced)
52
+ break if enforce_limit && bytes_written >= bytes
53
+
50
54
  # Read control byte
51
55
  control_byte = read_input_byte
52
56
  break if control_byte.nil?
@@ -55,6 +59,9 @@ module Cabriolet
55
59
 
56
60
  # Process each bit in the control byte
57
61
  8.times do |bit_index|
62
+ # Check output limit before each operation (only when limit is enforced)
63
+ break if enforce_limit && bytes_written >= bytes
64
+
58
65
  mask = 1 << bit_index
59
66
 
60
67
  if control_byte.anybits?(mask)
@@ -81,6 +88,9 @@ module Cabriolet
81
88
 
82
89
  # Copy from window
83
90
  length.times do
91
+ # Check if we've reached the limit mid-match
92
+ break if enforce_limit && bytes_written >= bytes
93
+
84
94
  byte = @window[match_pos]
85
95
  @window[@window_pos] = byte
86
96
  write_output_byte(byte)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "base"
4
+
3
5
  module Cabriolet
4
6
  module Decompressors
5
7
  # LZX handles LZX compressed data
@@ -100,7 +102,7 @@ module Cabriolet
100
102
  # @param output_length [Integer] Expected output length for E8 processing
101
103
  # @param is_delta [Boolean] Whether this is LZX DELTA format
102
104
  def initialize(io_system, input, output, buffer_size, window_bits:,
103
- reset_interval: 0, output_length: 0, is_delta: false)
105
+ reset_interval: 0, output_length: 0, is_delta: false, salvage: false, **_kwargs)
104
106
  super(io_system, input, output, buffer_size)
105
107
 
106
108
  # Validate window_bits
@@ -146,8 +148,9 @@ module Cabriolet
146
148
  @intel_started = false
147
149
  @e8_buf = "\0" * FRAME_SIZE
148
150
 
149
- # Initialize bitstream
150
- @bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
151
+ # Initialize bitstream (LZX uses MSB-first bit ordering per libmspack lzxd.c)
152
+ @bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
153
+ bit_order: :msb, salvage: salvage)
151
154
 
152
155
  # Initialize Huffman trees
153
156
  initialize_trees
@@ -173,19 +176,21 @@ module Cabriolet
173
176
  def decompress(bytes)
174
177
  return 0 if bytes <= 0
175
178
 
179
+ # Read Intel filesize header if not already read (once per stream)
180
+ read_intel_header unless @header_read
181
+
176
182
  total_written = 0
177
183
  end_frame = ((@offset + bytes) / FRAME_SIZE) + 1
178
184
 
179
185
  while @frame < end_frame
180
- # Check reset interval
181
- reset_state if @reset_interval.positive? && (@frame % @reset_interval).zero?
186
+ # Check reset interval - reset offset registers at frame boundaries
187
+ if @reset_interval.positive? && (@frame % @reset_interval).zero? && @frame.positive?
188
+ @r0 = @r1 = @r2 = 1
189
+ end
182
190
 
183
191
  # Read DELTA chunk size if needed
184
192
  @bitstream.read_bits(16) if @is_delta
185
193
 
186
- # Read Intel filesize header if needed
187
- read_intel_header unless @header_read
188
-
189
194
  # Calculate frame size
190
195
  frame_size = calculate_frame_size
191
196
 
@@ -238,6 +243,10 @@ module Cabriolet
238
243
 
239
244
  # Reset LZX state (called at reset intervals)
240
245
  #
246
+ # Per libmspack: Only reset state variables, NOT Huffman code lengths.
247
+ # Lengths persist across blocks and are updated via delta encoding.
248
+ # They are only zeroed at initialization (in initialize_trees).
249
+ #
241
250
  # @return [void]
242
251
  def reset_state
243
252
  @r0 = 1
@@ -247,12 +256,17 @@ module Cabriolet
247
256
  @block_remaining = 0
248
257
  @block_type = BLOCKTYPE_INVALID
249
258
 
250
- # Reset tree lengths to 0
251
- @maintree_lengths.fill(0)
252
- @length_lengths.fill(0)
259
+ # NOTE: Do NOT reset @maintree_lengths or @length_lengths here!
260
+ # Per libmspack lzxd.c line 267-269, lengths are initialized to 0
261
+ # only once (at start) "because deltas will be applied to them".
262
+ # Resetting them here breaks delta encoding between blocks.
253
263
  end
254
264
 
255
- # Read Intel filesize header
265
+ # Read Intel filesize header (once per stream, before any frames)
266
+ #
267
+ # Format per libmspack:
268
+ # - 1 bit: Intel flag (if 0, filesize = 0; if 1, read 32-bit filesize)
269
+ # - If flag is 1: 32 bits for filesize (16 bits high, 16 bits low)
256
270
  #
257
271
  # @return [void]
258
272
  def read_intel_header
@@ -304,13 +318,20 @@ module Cabriolet
304
318
 
305
319
  # Read block header
306
320
  #
321
+ # LZX block header format (per libmspack):
322
+ # - 3 bits: block_type
323
+ # - 24 bits: block_length (16 bits high, 8 bits low, combined as (high << 8) | low)
324
+ #
307
325
  # @return [void]
308
326
  def read_block_header
309
- # Align for uncompressed blocks
327
+ # Align for uncompressed blocks - this ensures correct byte alignment
328
+ # when reading the R0, R1, R2 values from the block header
310
329
  @bitstream.byte_align if @block_type == BLOCKTYPE_UNCOMPRESSED && @block_length.allbits?(1)
311
330
 
312
- # Read block type and length
331
+ # Read block type (3 bits)
313
332
  @block_type = @bitstream.read_bits(3)
333
+
334
+ # Read block length (24 bits: 16 bits high, then 8 bits low)
314
335
  high = @bitstream.read_bits(16)
315
336
  low = @bitstream.read_bits(8)
316
337
  @block_length = (high << 8) | low
@@ -324,6 +345,8 @@ module Cabriolet
324
345
  when BLOCKTYPE_UNCOMPRESSED
325
346
  read_uncompressed_block_header
326
347
  else
348
+ # Per libmspack lzxd.c line 519-521, BLOCKTYPE_INVALID (0) and
349
+ # blocktypes 4-7 are all invalid and should raise an error
327
350
  raise DecompressionError, "Invalid block type: #{@block_type}"
328
351
  end
329
352
  end
@@ -338,11 +361,11 @@ module Cabriolet
338
361
  end
339
362
 
340
363
  # Build aligned tree
341
- @aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS)
342
- unless @aligned_tree.build_table(ALIGNED_TABLEBITS)
343
- raise DecompressionError,
344
- "Failed to build aligned tree"
345
- end
364
+ # Note: Aligned tree may be incomplete (Kraft sum < 1.0), which is valid
365
+ # as long as the unused codes are never encountered in the bitstream
366
+ @aligned_tree = Huffman::Tree.new(@aligned_lengths, ALIGNED_MAXSYMBOLS,
367
+ bit_order: :msb)
368
+ @aligned_tree.build_table(ALIGNED_TABLEBITS)
346
369
 
347
370
  # Read main and length trees (same as verbatim)
348
371
  read_main_and_length_trees
@@ -359,15 +382,14 @@ module Cabriolet
359
382
  #
360
383
  # @return [void]
361
384
  def read_main_and_length_trees
362
- # Read and build pretree
363
- read_pretree
364
-
365
385
  # Read main tree lengths using pretree
386
+ # Note: Each call to read_lengths reads its own pretree (per libmspack lzxd_read_lens)
366
387
  read_lengths(@maintree_lengths, 0, 256)
367
388
  read_lengths(@maintree_lengths, 256, @maintree_maxsymbols)
368
389
 
369
390
  # Build main tree
370
- @maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols)
391
+ @maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols,
392
+ bit_order: :msb)
371
393
  unless @maintree.build_table(LENGTH_TABLEBITS)
372
394
  raise DecompressionError,
373
395
  "Failed to build main tree"
@@ -380,7 +402,8 @@ module Cabriolet
380
402
  read_lengths(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
381
403
 
382
404
  # Build length tree (may be empty)
383
- @length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS)
405
+ @length_tree = Huffman::Tree.new(@length_lengths, LENGTH_MAXSYMBOLS,
406
+ bit_order: :msb)
384
407
  if @length_tree.build_table(LENGTH_TABLEBITS)
385
408
  @length_empty = false
386
409
  else
@@ -401,7 +424,8 @@ module Cabriolet
401
424
  @pretree_lengths[i] = @bitstream.read_bits(4)
402
425
  end
403
426
 
404
- @pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS)
427
+ @pretree = Huffman::Tree.new(@pretree_lengths, PRETREE_MAXSYMBOLS,
428
+ bit_order: :msb)
405
429
  return if @pretree.build_table(PRETREE_TABLEBITS)
406
430
 
407
431
  raise DecompressionError, "Failed to build pretree"
@@ -409,11 +433,16 @@ module Cabriolet
409
433
 
410
434
  # Read code lengths using pretree
411
435
  #
436
+ # Per libmspack's lzxd_read_lens, each call reads its own pretree first
437
+ #
412
438
  # @param lengths [Array<Integer>] Target length array
413
439
  # @param first [Integer] First symbol index
414
440
  # @param last [Integer] Last symbol index (exclusive)
415
441
  # @return [void]
416
442
  def read_lengths(lengths, first, last)
443
+ # Read and build pretree (20 elements, 4 bits each)
444
+ read_pretree
445
+
417
446
  x = first
418
447
 
419
448
  while x < last
@@ -494,9 +523,9 @@ module Cabriolet
494
523
  @window_posn += 1
495
524
  run_length -= 1
496
525
  else
497
- # Match: decode length and offset
498
- decode_match(main_element, run_length)
499
- run_length = 0 # Match decoding handles run_length internally
526
+ # Match: decode length and offset, then decrement run_length by match_length
527
+ match_length = decode_match(main_element, run_length)
528
+ run_length -= match_length
500
529
  end
501
530
  end
502
531
  end
@@ -504,8 +533,8 @@ module Cabriolet
504
533
  # Decode and copy a match
505
534
  #
506
535
  # @param main_element [Integer] Main tree symbol
507
- # @param run_length [Integer] Remaining run length
508
- # @return [void]
536
+ # @param run_length [Integer] Remaining run length (unused, kept for compatibility)
537
+ # @return [Integer] Match length (bytes consumed)
509
538
  def decode_match(main_element, _run_length)
510
539
  main_element -= NUM_CHARS
511
540
 
@@ -533,8 +562,10 @@ module Cabriolet
533
562
  match_offset = @r0
534
563
  when 1
535
564
  @r1, @r0 = @r0, @r1
565
+ match_offset = @r0
536
566
  when 2
537
567
  @r2, @r0 = @r0, @r2
568
+ match_offset = @r0
538
569
  else
539
570
  # Calculate offset from position slot
540
571
  extra = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
@@ -573,6 +604,9 @@ module Cabriolet
573
604
 
574
605
  # Copy match
575
606
  copy_match(match_offset, match_length)
607
+
608
+ # Return match length so caller can decrement run_length
609
+ match_length
576
610
  end
577
611
 
578
612
  # Decode extended match length for LZX DELTA
@@ -608,9 +642,12 @@ module Cabriolet
608
642
  # @return [void]
609
643
  def copy_match(offset, length)
610
644
  if offset > @window_posn
611
- # Match wraps around window
612
- if offset > @offset && (offset - @window_posn).positive?
613
- raise DecompressionError, "Match offset beyond stream"
645
+ # Match wraps around window - validate it doesn't read beyond available data
646
+ # Per libmspack lzxd.c lines 622-628: check if match offset goes beyond
647
+ # what has been decompressed so far (accounting for any reference data)
648
+ ref_data_size = 0 # We don't support reference data yet (LZX DELTA feature)
649
+ if offset > @offset && (offset - @window_posn) > ref_data_size
650
+ raise DecompressionError, "Match offset beyond LZX stream"
614
651
  end
615
652
 
616
653
  # Copy from end of window
@@ -14,6 +14,13 @@ module Cabriolet
14
14
  DISTANCE_MAXSYMBOLS = 32
15
15
  DISTANCE_TABLEBITS = 6
16
16
 
17
+ # MSZIP signature bytes
18
+ SIGNATURE_BYTE_C = 0x43 # ASCII 'C'
19
+ SIGNATURE_BYTE_K = 0x4B # ASCII 'K'
20
+
21
+ # Maximum bytes to search for CK signature (prevents infinite loops)
22
+ MAX_SIGNATURE_SEARCH = 10_000
23
+
17
24
  # Match lengths for literal codes 257-285
18
25
  LIT_LENGTHS = [
19
26
  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
@@ -50,7 +57,8 @@ module Cabriolet
50
57
  # @param output [System::FileHandle, System::MemoryHandle] Output handle
51
58
  # @param buffer_size [Integer] Buffer size for I/O operations
52
59
  # @param fix_mszip [Boolean] Enable repair mode for corrupted data
53
- def initialize(io_system, input, output, buffer_size, fix_mszip: false)
60
+ def initialize(io_system, input, output, buffer_size, fix_mszip: false,
61
+ salvage: false, **_kwargs)
54
62
  super(io_system, input, output, buffer_size)
55
63
  @fix_mszip = fix_mszip
56
64
 
@@ -58,9 +66,11 @@ module Cabriolet
58
66
  @window = "\0" * FRAME_SIZE
59
67
  @window_posn = 0
60
68
  @bytes_output = 0
69
+ @window_offset = 0 # Offset into window for unconsumed data (for multi-file CFDATA blocks)
61
70
 
62
71
  # Initialize bitstream
63
- @bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
72
+ @bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
73
+ salvage: salvage)
64
74
 
65
75
  # Initialize Huffman trees
66
76
  @literal_lengths = Array.new(LITERAL_MAXSYMBOLS, 0)
@@ -76,15 +86,50 @@ module Cabriolet
76
86
  def decompress(bytes)
77
87
  total_written = 0
78
88
 
89
+ if ENV["DEBUG_MSZIP"]
90
+ warn "DEBUG MSZIP.decompress(#{bytes}): ENTRY bytes_output=#{@bytes_output} window_offset=#{@window_offset} window_posn=#{@window_posn}"
91
+ end
92
+
79
93
  while bytes.positive?
80
- # Read 'CK' signature
81
- read_signature
94
+ # Check if we have buffered data from previous inflate
95
+ if @bytes_output.positive?
96
+ if ENV["DEBUG_MSZIP"]
97
+ warn "DEBUG MSZIP: Using buffered data: bytes_output=#{@bytes_output} window_offset=#{@window_offset}"
98
+ end
82
99
 
83
- # Reset window state for new block
100
+ # Write from buffer
101
+ write_amount = [bytes, @bytes_output].min
102
+ io_system.write(output, @window[@window_offset, write_amount])
103
+ total_written += write_amount
104
+ bytes -= write_amount
105
+ @bytes_output -= write_amount
106
+ @window_offset += write_amount
107
+
108
+ if ENV["DEBUG_MSZIP"]
109
+ warn "DEBUG MSZIP: After buffer write: total_written=#{total_written} bytes_remaining=#{bytes} bytes_output=#{@bytes_output}"
110
+ end
111
+
112
+ # Continue loop to check if we need more data
113
+ next
114
+ end
115
+
116
+ # No buffered data - need to inflate a new MSZIP frame
117
+ # Reset window for new frame
118
+ @window_offset = 0
84
119
  @window_posn = 0
85
- @bytes_output = 0
86
120
 
87
- # Inflate the block
121
+ # Read 'CK' signature (marks start of MSZIP frame)
122
+ # Every MSZIP frame starts with a CK signature
123
+ if ENV["DEBUG_MSZIP"]
124
+ warn "DEBUG MSZIP: Reading CK signature (new MSZIP frame)"
125
+ end
126
+ read_signature
127
+
128
+ # Inflate the MSZIP frame (processes deflate blocks until last_block or window full)
129
+ if ENV["DEBUG_MSZIP"]
130
+ warn "DEBUG MSZIP: Calling inflate_block"
131
+ end
132
+
88
133
  begin
89
134
  inflate_block
90
135
  rescue DecompressionError
@@ -97,11 +142,15 @@ module Cabriolet
97
142
  @bytes_output = FRAME_SIZE
98
143
  end
99
144
 
100
- # Write output
101
- write_amount = [bytes, @bytes_output].min
102
- io_system.write(output, @window[0, write_amount])
103
- total_written += write_amount
104
- bytes -= write_amount
145
+ if ENV["DEBUG_MSZIP"]
146
+ warn "DEBUG MSZIP: After inflate_block: bytes_output=#{@bytes_output} window_posn=#{@window_posn}"
147
+ end
148
+
149
+ # Now we have data in the window buffer - loop back to write from it
150
+ end
151
+
152
+ if ENV["DEBUG_MSZIP"]
153
+ warn "DEBUG MSZIP.decompress: EXIT total_written=#{total_written}"
105
154
  end
106
155
 
107
156
  total_written
@@ -111,49 +160,63 @@ module Cabriolet
111
160
 
112
161
  # Read and verify 'CK' signature
113
162
  def read_signature
163
+ if ENV["DEBUG_MSZIP"]
164
+ warn "DEBUG read_signature: Before byte_align"
165
+ end
166
+
114
167
  # Align to byte boundary
115
168
  @bitstream.byte_align
116
169
 
117
- # Read bytes until we find 'CK'
118
- state = 0
119
- bytes_read = 0
120
- max_search = 10_000 # Prevent infinite loops
170
+ # Read first 2 bytes
171
+ c = @bitstream.read_bits(8)
172
+ k = @bitstream.read_bits(8)
121
173
 
122
- loop do
123
- byte = @bitstream.read_bits(8)
124
- bytes_read += 1
174
+ if ENV["DEBUG_MSZIP"]
175
+ warn "DEBUG read_signature: Read 0x#{c.to_s(16)} 0x#{k.to_s(16)} (expected 'C'=0x43 'K'=0x4B)"
176
+ end
125
177
 
126
- # Check for EOF (bitstream returns 0)
127
- if bytes_read > 2 && byte.zero?
128
- raise DecompressionError,
129
- "Unexpected EOF while searching for CK signature"
178
+ # If not CK, search for it (similar to libmspack's tolerant behavior)
179
+ unless c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
180
+ # Search for CK signature in the stream (up to a reasonable limit)
181
+ max_search = 256
182
+ found = false
183
+
184
+ max_search.times do
185
+ # Shift: c becomes k, read new k
186
+ c = k
187
+ k = @bitstream.read_bits(8)
188
+
189
+ if c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
190
+ found = true
191
+ if ENV["DEBUG_MSZIP"]
192
+ warn "DEBUG read_signature: Found CK signature after searching"
193
+ end
194
+ break
195
+ end
130
196
  end
131
197
 
132
- # Prevent infinite loops
133
- if bytes_read > max_search
198
+ unless found
134
199
  raise DecompressionError,
135
- "CK signature not found in stream"
136
- end
137
-
138
- if byte == 0x43 # 'C'
139
- state = 1
140
- elsif state == 1 && byte == 0x4B # 'K'
141
- break
142
- else
143
- state = 0
200
+ "Invalid MSZIP signature: could not find CK in stream"
144
201
  end
145
202
  end
146
203
  end
147
204
 
148
205
  # Inflate a single block
206
+ #
207
+ # Processes deflate blocks until the last_block flag is set or window is full.
208
+ # Always decodes complete blocks - does not stop mid-block.
149
209
  def inflate_block
150
- loop do
151
- # Read last block flag
152
- last_block = @bitstream.read_bits(1)
210
+ # Read first block header
211
+ last_block = @bitstream.read_bits(1)
212
+ block_type = @bitstream.read_bits(2)
153
213
 
154
- # Read block type
155
- block_type = @bitstream.read_bits(2)
214
+ if ENV["DEBUG_MSZIP"]
215
+ warn "DEBUG inflate_block: First block: last_block=#{last_block} block_type=#{block_type}"
216
+ end
156
217
 
218
+ loop do
219
+ # Process current block
157
220
  case block_type
158
221
  when 0
159
222
  inflate_stored_block
@@ -167,7 +230,16 @@ module Cabriolet
167
230
  raise DecompressionError, "Invalid block type: #{block_type}"
168
231
  end
169
232
 
233
+ if ENV["DEBUG_MSZIP"]
234
+ warn "DEBUG inflate_block: After block: last_block=#{last_block} window_posn=#{@window_posn}"
235
+ end
236
+
237
+ # Stop if this was the last block
170
238
  break if last_block == 1
239
+
240
+ # Read next block header (only if we need to continue)
241
+ last_block = @bitstream.read_bits(1)
242
+ block_type = @bitstream.read_bits(2)
171
243
  end
172
244
 
173
245
  # Flush remaining window data
@@ -306,13 +378,25 @@ module Cabriolet
306
378
  end
307
379
 
308
380
  # Inflate a Huffman-compressed block
381
+ #
382
+ # Always decodes until code 256 (END OF BLOCK)
309
383
  def inflate_huffman_block
384
+ symbol_count = 0
310
385
  loop do
386
+ if ENV["DEBUG_MSZIP_SYMBOLS"]
387
+ warn "DEBUG inflate_huffman_block: window_posn=#{@window_posn} bytes_output=#{@bytes_output}"
388
+ end
389
+
311
390
  # Decode symbol from literal tree
312
391
  code = Huffman::Decoder.decode_symbol(
313
392
  @bitstream, @literal_tree.table, LITERAL_TABLEBITS,
314
393
  @literal_lengths, LITERAL_MAXSYMBOLS
315
394
  )
395
+ symbol_count += 1
396
+
397
+ if ENV["DEBUG_MSZIP_SYMBOLS"] || ENV["DEBUG_MSZIP"]
398
+ warn "DEBUG inflate_huffman_block[#{symbol_count}]: decoded code=#{code} (#{'0x%02x' % code if code < 256})"
399
+ end
316
400
 
317
401
  if code < 256
318
402
  # Literal byte
@@ -321,6 +405,9 @@ module Cabriolet
321
405
  flush_window if @window_posn == FRAME_SIZE
322
406
  elsif code == 256
323
407
  # End of block
408
+ if ENV["DEBUG_MSZIP"] || ENV["DEBUG_MSZIP_SYMBOLS"]
409
+ warn "DEBUG inflate_huffman_block: END OF BLOCK (window_posn=#{@window_posn})"
410
+ end
324
411
  break
325
412
  else
326
413
  # Length/distance pair (LZ77 match)
@@ -12,7 +12,7 @@ module Cabriolet
12
12
  FRAME_SIZE = 32_768
13
13
 
14
14
  # Match constants
15
- MAX_MATCH = 1028
15
+ MAX_MATCH = 259
16
16
 
17
17
  # Position slot tables (same as in qtmd.c)
18
18
  POSITION_BASE = [
@@ -68,7 +68,8 @@ module Cabriolet
68
68
  # @param output [System::FileHandle, System::MemoryHandle] Output handle
69
69
  # @param buffer_size [Integer] Buffer size for I/O operations
70
70
  # @param window_bits [Integer] Window size parameter (10-21)
71
- def initialize(io_system, input, output, buffer_size, window_bits: 10)
71
+ def initialize(io_system, input, output, buffer_size, window_bits: 10,
72
+ **_kwargs)
72
73
  super(io_system, input, output, buffer_size)
73
74
 
74
75
  # Validate window_bits
@@ -36,4 +36,7 @@ module Cabriolet
36
36
 
37
37
  # Raised when seek operation fails
38
38
  class SeekError < IOError; end
39
+
40
+ # Raised when plugin operations fail
41
+ class PluginError < Error; end
39
42
  end