RubyGems - cabriolet - Versions diffs - 0.1.2 → 0.2.0 - Mend

cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/README.adoc +700 -38
data/lib/cabriolet/algorithm_factory.rb +250 -0
data/lib/cabriolet/base_compressor.rb +206 -0
data/lib/cabriolet/binary/bitstream.rb +154 -14
data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
data/lib/cabriolet/binary/chm_structures.rb +2 -2
data/lib/cabriolet/binary/hlp_structures.rb +258 -37
data/lib/cabriolet/binary/lit_structures.rb +231 -65
data/lib/cabriolet/binary/oab_structures.rb +17 -1
data/lib/cabriolet/cab/command_handler.rb +226 -0
data/lib/cabriolet/cab/compressor.rb +35 -43
data/lib/cabriolet/cab/decompressor.rb +14 -19
data/lib/cabriolet/cab/extractor.rb +140 -31
data/lib/cabriolet/chm/command_handler.rb +227 -0
data/lib/cabriolet/chm/compressor.rb +7 -3
data/lib/cabriolet/chm/decompressor.rb +39 -21
data/lib/cabriolet/chm/parser.rb +5 -2
data/lib/cabriolet/cli/base_command_handler.rb +127 -0
data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
data/lib/cabriolet/cli/command_registry.rb +83 -0
data/lib/cabriolet/cli.rb +356 -607
data/lib/cabriolet/compressors/base.rb +1 -1
data/lib/cabriolet/compressors/lzx.rb +241 -54
data/lib/cabriolet/compressors/mszip.rb +35 -3
data/lib/cabriolet/compressors/quantum.rb +34 -45
data/lib/cabriolet/decompressors/base.rb +1 -1
data/lib/cabriolet/decompressors/lzss.rb +13 -3
data/lib/cabriolet/decompressors/lzx.rb +70 -33
data/lib/cabriolet/decompressors/mszip.rb +126 -39
data/lib/cabriolet/decompressors/quantum.rb +3 -2
data/lib/cabriolet/errors.rb +3 -0
data/lib/cabriolet/file_entry.rb +156 -0
data/lib/cabriolet/file_manager.rb +144 -0
data/lib/cabriolet/hlp/command_handler.rb +282 -0
data/lib/cabriolet/hlp/compressor.rb +28 -238
data/lib/cabriolet/hlp/decompressor.rb +107 -147
data/lib/cabriolet/hlp/parser.rb +52 -101
data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
data/lib/cabriolet/huffman/tree.rb +85 -1
data/lib/cabriolet/kwaj/command_handler.rb +213 -0
data/lib/cabriolet/kwaj/compressor.rb +7 -3
data/lib/cabriolet/kwaj/decompressor.rb +18 -12
data/lib/cabriolet/lit/command_handler.rb +221 -0
data/lib/cabriolet/lit/compressor.rb +633 -38
data/lib/cabriolet/lit/decompressor.rb +518 -152
data/lib/cabriolet/lit/parser.rb +670 -0
data/lib/cabriolet/models/hlp_file.rb +130 -29
data/lib/cabriolet/models/hlp_header.rb +105 -17
data/lib/cabriolet/models/lit_header.rb +212 -25
data/lib/cabriolet/models/szdd_header.rb +10 -2
data/lib/cabriolet/models/winhelp_header.rb +127 -0
data/lib/cabriolet/oab/command_handler.rb +257 -0
data/lib/cabriolet/oab/compressor.rb +17 -8
data/lib/cabriolet/oab/decompressor.rb +41 -10
data/lib/cabriolet/offset_calculator.rb +81 -0
data/lib/cabriolet/plugin.rb +233 -0
data/lib/cabriolet/plugin_manager.rb +453 -0
data/lib/cabriolet/plugin_validator.rb +422 -0
data/lib/cabriolet/system/io_system.rb +3 -0
data/lib/cabriolet/system/memory_handle.rb +17 -4
data/lib/cabriolet/szdd/command_handler.rb +217 -0
data/lib/cabriolet/szdd/compressor.rb +15 -11
data/lib/cabriolet/szdd/decompressor.rb +18 -9
data/lib/cabriolet/version.rb +1 -1
data/lib/cabriolet.rb +67 -17
metadata +33 -2

data/lib/cabriolet/compressors/base.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module Cabriolet
       # @param input [System::FileHandle, System::MemoryHandle] Input handle
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
-      def initialize(io_system, input, output, buffer_size)
+      def initialize(io_system, input, output, buffer_size, **_kwargs)
         @io_system = io_system
         @input = input
         @output = output

data/lib/cabriolet/compressors/lzx.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require_relative "base"
 require_relative "../binary/bitstream_writer"
 require_relative "../huffman/encoder"
@@ -66,7 +67,8 @@ module Cabriolet
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
       # @param window_bits [Integer] Window size (15-21 for regular LZX)
-      def initialize(io_system, input, output, buffer_size, window_bits: 15)
+      def initialize(io_system, input, output, buffer_size, window_bits: 15,
+**_kwargs)
         super(io_system, input, output, buffer_size)
         # Validate window_bits
@@ -82,8 +84,9 @@ module Cabriolet
         @num_offsets = POSITION_SLOTS[window_bits - 15] << 3
         @maintree_maxsymbols = NUM_CHARS + @num_offsets
-        # Initialize bitstream writer
-        @bitstream = Binary::BitstreamWriter.new(io_system, output, buffer_size)
+        # Initialize bitstream writer (LZX uses MSB-first bit ordering per libmspack lzxd.c)
+        @bitstream = Binary::BitstreamWriter.new(io_system, output,
+                                                 buffer_size, bit_order: :msb)
         # Initialize sliding window for LZ77
         @window = "\0" * @window_size
@@ -119,6 +122,7 @@ module Cabriolet
           frame_data = input_data[pos, frame_size]
           # Compress this frame
+          # TODO: Use compress_frame_verbatim once tree encoding is fixed
           compress_frame(frame_data)
           pos += frame_size
@@ -152,19 +156,46 @@ module Cabriolet
       # @param data [String] Frame data to compress
       # @return [void]
       def compress_frame(data)
-        # Use UNCOMPRESSED blocks for now (simplest approach)
-        write_block_header(BLOCKTYPE_UNCOMPRESSED, data.bytesize)
+        # For uncompressed blocks, block length is just the frame data size
+        # (offset registers are NOT included in the block length field)
+        block_length = data.bytesize
-        # Write R0, R1, R2 (required for uncompressed blocks)
+        # Write UNCOMPRESSED block header
+        write_block_header(BLOCKTYPE_UNCOMPRESSED, block_length)
+        # Write offset registers (R0, R1, R2)
         write_offset_registers
-        # Write raw data
+        # Write raw uncompressed data
         data.each_byte do |byte|
           @bitstream.write_bits(byte, 8)
         end
+      end
+      # Compress a single frame (32KB) - VERBATIM version (currently disabled)
+      #
+      # @param data [String] Frame data to compress
+      # @return [void]
+      def compress_frame_verbatim(data)
+        # Reset frequency statistics for each frame
+        @literal_freq.fill(0)
+        @match_freq.fill(0)
+        @length_freq.fill(0)
+        # Analyze frame to generate LZ77 tokens
+        tokens = analyze_frame(data)
-        # Ensure byte alignment at end of frame for multi-frame support
-        @bitstream.byte_align
+        # Build Huffman trees from statistics
+        build_trees
+        # Write VERBATIM block header
+        write_block_header(BLOCKTYPE_VERBATIM, data.bytesize)
+        # Write Huffman tree definitions
+        write_trees
+        # Encode all tokens using the Huffman codes
+        encode_tokens(tokens)
       end
       # Analyze frame and generate LZ77 tokens
@@ -301,68 +332,224 @@ module Cabriolet
         slot
       end
+      # Build Huffman code lengths from frequencies
+      #
+      # Uses a simplified approach: assign equal lengths to all symbols.
+      # This guarantees valid Huffman trees that satisfy Kraft inequality.
+      #
+      # @param freqs [Array<Integer>] Symbol frequencies
+      # @param num_symbols [Integer] Number of symbols
+      # @return [Array<Integer>] Code lengths
+      def build_tree_lengths(freqs, num_symbols)
+        lengths = Array.new(num_symbols, 0)
+        # Get symbols with non-zero frequencies
+        non_zero_symbols = freqs.each_with_index.select do |freq, _|
+          freq.positive?
+        end.map { |_, sym| sym }
+        # Handle edge cases
+        if non_zero_symbols.empty?
+          # Empty tree: create minimal valid tree with 2 symbols
+          lengths[0] = 1
+          lengths[1] = 1
+          return lengths
+        elsif non_zero_symbols.size == 1
+          # Single symbol: need at least 2 symbols for valid Huffman tree
+          symbol = non_zero_symbols[0]
+          lengths[symbol] = 1
+          dummy = symbol.zero? ? 1 : 0
+          lengths[dummy] = 1
+          return lengths
+        end
+        # Calculate required length: ceil(log2(count))
+        count = non_zero_symbols.size
+        bit_length = 1
+        while (1 << bit_length) < count
+          bit_length += 1
+        end
+        # Assign same length to all non-zero symbols
+        non_zero_symbols.each do |symbol|
+          lengths[symbol] = bit_length
+        end
+        # Pad with dummy symbols to make tree complete (2^bit_length total symbols)
+        # This ensures Kraft inequality sum equals exactly 1.0
+        total_needed = 1 << bit_length
+        dummy_count = total_needed - count
+        if dummy_count.positive?
+          dummy_index = 0
+          while dummy_count.positive? && dummy_index < num_symbols
+            if lengths[dummy_index].zero?
+              lengths[dummy_index] = bit_length
+              dummy_count -= 1
+            end
+            dummy_index += 1
+          end
+        end
+        lengths
+      end
       # Build Huffman trees from frequency statistics
       #
+      # This creates three trees for LZX compression:
+      # 1. Main tree: literals (0-255) + match position/length combinations
+      # 2. Length tree: additional length symbols for long matches
+      # 3. Pretree: encodes the code lengths of main/length trees
+      #
       # @return [void]
       def build_trees
-        # Build main tree (literals + matches)
-        maintree_freqs = @literal_freq + @match_freq
-        @maintree_lengths = build_tree_lengths(maintree_freqs,
+        # Step 1: Combine literal and match frequencies for main tree
+        maintree_freq = @literal_freq + @match_freq
+        # Step 2: Build main tree code lengths
+        @maintree_lengths = build_tree_lengths(maintree_freq,
                                                @maintree_maxsymbols)
-        @maintree_codes = Huffman::Encoder.build_codes(@maintree_lengths,
-                                                       @maintree_maxsymbols)
-        # Build length tree
+        # Step 3: Build length tree code lengths
         @length_lengths = build_tree_lengths(@length_freq, LENGTH_MAXSYMBOLS)
+        # Step 4: Calculate pretree frequencies by simulating tree encoding
+        pretree_freq = calculate_pretree_frequencies
+        # Step 5: Build pretree code lengths
+        @pretree_lengths = build_tree_lengths(pretree_freq, PRETREE_MAXSYMBOLS)
+        # Step 6: Generate code tables from lengths
+        @maintree_codes = Huffman::Encoder.build_codes(@maintree_lengths,
+                                                       @maintree_maxsymbols)
         @length_codes = Huffman::Encoder.build_codes(@length_lengths,
                                                      LENGTH_MAXSYMBOLS)
-        # Build pretree (used to encode the other trees)
-        # Create a valid Huffman tree that satisfies Kraft inequality
-        # For 20 symbols, use: 2@3bits + 6@4bits + 12@5bits = 1.0
-        @pretree_lengths = Array.new(PRETREE_MAXSYMBOLS, 0)
-        # Most common symbols (0-1): 3 bits
-        (0..1).each { |i| @pretree_lengths[i] = 3 }
-        # Common symbols (2-7): 4 bits
-        (2..7).each { |i| @pretree_lengths[i] = 4 }
-        # Less common symbols (8-19): 5 bits
-        (8..19).each { |i| @pretree_lengths[i] = 5 }
         @pretree_codes = Huffman::Encoder.build_codes(@pretree_lengths,
                                                       PRETREE_MAXSYMBOLS)
       end
-      # Build Huffman code lengths from frequencies
+      # Calculate pretree symbol frequencies
       #
-      # @param freqs [Array<Integer>] Symbol frequencies
-      # @param num_symbols [Integer] Number of symbols
-      # @return [Array<Integer>] Code lengths
-      def build_tree_lengths(freqs, num_symbols)
-        # Simple implementation: assign lengths based on frequency
-        # Higher frequency = shorter code
-        lengths = Array.new(num_symbols, 0)
+      # The pretree encodes the code lengths of the main and length trees.
+      # This method simulates the tree encoding process to determine which
+      # pretree symbols will be needed.
+      #
+      # @return [Array<Integer>] Frequency array for pretree symbols (0-19)
+      def calculate_pretree_frequencies
+        pretree_freq = Array.new(PRETREE_MAXSYMBOLS, 0)
-        # Get non-zero frequencies
-        non_zero = freqs.each_with_index.select { |freq, _| freq.positive? }
-        return lengths if non_zero.empty?
-        # Sort by frequency (descending)
-        sorted = non_zero.sort_by { |freq, _| -freq }
-        # Assign lengths using simple strategy
-        sorted.each_with_index do |(_, symbol), index|
-          # Assign shorter codes to more frequent symbols
-          lengths[symbol] = if index < num_symbols / 8
-                              4
-                            elsif index < num_symbols / 4
-                              6
-                            elsif index < num_symbols / 2
-                              8
-                            else
-                              10
-                            end
+        # Count symbols needed to encode main tree (two parts)
+        count_pretree_symbols(@maintree_lengths, 0, NUM_CHARS, pretree_freq)
+        count_pretree_symbols(@maintree_lengths, NUM_CHARS,
+                              @maintree_maxsymbols, pretree_freq)
+        # Count symbols needed to encode length tree
+        count_pretree_symbols(@length_lengths, 0, NUM_SECONDARY_LENGTHS,
+                              pretree_freq)
+        pretree_freq
+      end
+      # Count pretree symbols needed to encode a tree
+      #
+      # This simulates the write_tree_with_pretree encoding process to count
+      # which pretree symbols will be used, allowing us to build an optimal
+      # pretree.
+      #
+      # @param lengths [Array<Integer>] Tree lengths to encode
+      # @param start [Integer] Start index
+      # @param end_idx [Integer] End index (exclusive)
+      # @param freq [Array<Integer>] Frequency array to update
+      # @return [void]
+      def count_pretree_symbols(lengths, start, end_idx, freq)
+        i = start
+        prev_length = 0
+        while i < end_idx
+          length = lengths[i]
+          if length.zero?
+            # Count run of zeros
+            zero_count = 0
+            while i < end_idx && lengths[i].zero? && zero_count < 138
+              zero_count += 1
+              i += 1
+            end
+            # Encode long runs with symbol 18
+            if zero_count >= 20
+              while zero_count >= 20
+                run = [zero_count, 51].min
+                freq[18] += 1
+                zero_count -= run
+              end
+            end
+            # Encode medium runs with symbol 17
+            if zero_count >= 4
+              run = [zero_count, 19].min
+              freq[17] += 1
+              zero_count -= run
+            end
+            # Encode remaining short runs as deltas
+            if zero_count.positive?
+              zero_count.times do
+                delta = (17 - prev_length) % 17
+                freq[delta] += 1
+                prev_length = 0
+              end
+            end
+          else
+            # Encode as delta from previous length
+            delta = (length - prev_length) % 17
+            freq[delta] += 1
+            prev_length = length
+            i += 1
+          end
         end
+      end
-        lengths
+      # Calculate code lengths by traversing Huffman tree
+      #
+      # @param node [Array] Tree node [freq, symbol, left, right, depth]
+      # @param depth [Integer] Current depth
+      # @param lengths [Array<Integer>] Output array for lengths
+      # @return [void]
+      def calculate_depths(node, depth, lengths)
+        return unless node
+        _, symbol, left, right, = node
+        if symbol.nil?
+          # Internal node: recurse to children
+          calculate_depths(left, depth + 1, lengths)
+          calculate_depths(right, depth + 1, lengths)
+        else
+          # Leaf node: record length
+          lengths[symbol] = depth
+        end
+      end
+      # Calculate code lengths by traversing Huffman tree
+      #
+      # @param node [Array] Tree node [freq, symbol, left, right]
+      # @param depth [Integer] Current depth
+      # @param lengths [Array<Integer>] Output array for lengths
+      # @return [void]
+      def calculate_code_lengths(node, depth, lengths)
+        return unless node
+        _, symbol, left, right = node
+        if symbol.nil?
+          # Internal node: recurse to children
+          calculate_code_lengths(left, depth + 1, lengths)
+          calculate_code_lengths(right, depth + 1, lengths)
+        else
+          # Leaf node: record length
+          lengths[symbol] = depth
+        end
       end
       # Write block header

data/lib/cabriolet/compressors/mszip.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module Cabriolet
       # @param input [System::FileHandle, System::MemoryHandle] Input handle
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
-      def initialize(io_system, input, output, buffer_size)
+      def initialize(io_system, input, output, buffer_size, **_kwargs)
         super
         # Initialize bitstream writer
@@ -88,10 +88,15 @@ module Cabriolet
         # Process data in FRAME_SIZE chunks
         # Each frame is independent and contains blocks ending with last_block=1
+        frame_num = 0
         while pos < input_data.bytesize
           chunk_size = [FRAME_SIZE, input_data.bytesize - pos].min
           chunk = input_data[pos, chunk_size]
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG compress: Frame #{frame_num}: pos=#{pos}, chunk_size=#{chunk_size}"
+          end
           # Write CK signature
           write_signature
@@ -99,11 +104,19 @@ module Cabriolet
           # Each frame's block is always marked as last within that frame
           compress_block(chunk, true)
+          # Flush bitstream after each frame to ensure data is written
+          @bitstream.flush
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG compress: Frame #{frame_num} complete, flushed"
+          end
           pos += chunk_size
           total_written += chunk_size
+          frame_num += 1
         end
-        # Flush any remaining bits
+        # Final flush (may not be needed now but keep for safety)
         @bitstream.flush
         total_written
@@ -129,8 +142,19 @@ module Cabriolet
       #
       # @return [void]
       def write_signature
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG write_signature: ENTRY"
+        end
         @bitstream.byte_align
-        SIGNATURE.each { |byte| @bitstream.write_raw_byte(byte) }
+        SIGNATURE.each do |byte|
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG write_signature: Writing byte 0x#{byte.to_s(16)}"
+          end
+          @bitstream.write_raw_byte(byte)
+        end
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG write_signature: EXIT"
+        end
       end
       # Compress a single block using fixed Huffman encoding
@@ -139,6 +163,10 @@ module Cabriolet
       # @param is_last [Boolean] Whether this is the last block
       # @return [void]
       def compress_block(data, is_last)
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG compress_block: ENTRY data_size=#{data.bytesize} is_last=#{is_last}"
+        end
         # Write block header
         @bitstream.write_bits(is_last ? 1 : 0, 1) # Last block flag
         @bitstream.write_bits(FIXED_HUFFMAN_BLOCK, 2) # Block type
@@ -151,6 +179,10 @@ module Cabriolet
         # Write end-of-block symbol (256)
         encode_literal(256)
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG compress_block: EXIT"
+        end
       end
       # Encode data using LZ77 matching and Huffman encoding

data/lib/cabriolet/compressors/quantum.rb CHANGED Viewed

@@ -5,15 +5,12 @@ module Cabriolet
     # Quantum compresses data using arithmetic coding and LZ77-based matching
     # Based on the Quantum decompressor and libmspack qtmd.c implementation
     #
-    # STATUS: Functional with known limitations
-    # - Literals: WORKING ✓
-    # - Short matches (3-13 bytes): WORKING ✓
-    # - Longer matches (14+ bytes): Limited support (known issue)
-    # - Simple data round-trips successfully
-    # - Complex repeated patterns may have issues
-    #
     # The Quantum method was created by David Stafford, adapted by Microsoft
     # Corporation.
+    #
+    # NOTE: This compressor is a work-in-progress. The arithmetic coding
+    # implementation needs refinement to match the decoder exactly.
+    # For now, this implementation focuses on correct structure.
     # rubocop:disable Metrics/ClassLength
     class Quantum < Base
       # Frame size (32KB per frame)
@@ -21,7 +18,7 @@ module Cabriolet
       # Match constants
       MIN_MATCH = 3
-      MAX_MATCH = 1028
+      MAX_MATCH = 259
       # Position slot tables (same as decompressor)
       POSITION_BASE = [
@@ -77,7 +74,8 @@ module Cabriolet
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
       # @param window_bits [Integer] Window size parameter (10-21)
-      def initialize(io_system, input, output, buffer_size, window_bits: 10)
+      def initialize(io_system, input, output, buffer_size, window_bits: 10,
+**_kwargs)
         super(io_system, input, output, buffer_size)
         # Validate window_bits
@@ -179,7 +177,6 @@ module Cabriolet
       # Compress a single frame
       def compress_frame(data)
-        # No header needed - the first 16 bits of encoded data will be read as C
         pos = 0
         while pos < data.bytesize
@@ -198,27 +195,25 @@ module Cabriolet
           end
         end
-        # Finish arithmetic coding - output final range
-        # We need to output enough bits to disambiguate the final range
+        # Finish arithmetic coding
         finish_arithmetic_coding
       end
-      # Finish arithmetic coding by outputting the final state
+      # Finish arithmetic coding
       def finish_arithmetic_coding
-        # Output enough bits to ensure decoder can decode correctly
-        # We need to output a value that falls within [L, H)
-        # A common approach is to output L plus half the range
-        @underflow_bits += 1
-        bit = if @l.anybits?(0x4000)
-                1
-              else
-                0
-              end
-        @bitstream.write_bits_msb(bit, 1)
-        @underflow_bits.times do
-          @bitstream.write_bits_msb(bit ^ 1, 1)
+        # Output pending underflow bits
+        if @underflow_bits.positive?
+          bit = if @l.anybits?(0x4000)
+                  1
+                else
+                  0
+                end
+          @bitstream.write_bits_msb(bit, 1)
+          @underflow_bits.times do
+            @bitstream.write_bits_msb(bit ^ 1, 1)
+          end
+          @underflow_bits = 0
         end
-        @underflow_bits = 0
       end
       # Find best match in the sliding window
@@ -335,7 +330,6 @@ module Cabriolet
       end
       # Encode a symbol using arithmetic coding
-      # This is the inverse of GET_SYMBOL macro in qtmd.c
       def encode_symbol(model, sym)
         # Find symbol index in model
         i = 0
@@ -346,33 +340,29 @@ module Cabriolet
                 "Symbol #{sym} not found in model"
         end
-        # Calculate range (matching decoder line 93, 101-102)
-        range = (@h - @l) + 1
+        # Calculate range - use decoder's formula
+        range = ((@h - @l) & 0xFFFF) + 1
         symf = model.syms[0].cumfreq
-        # Update H and L (matching decoder lines 103-104)
-        # Decoder uses syms[i-1] and syms[i], so encoder at index j
-        # should use syms[j] and syms[j+1] to make decoder land at i=j+1
-        # But decoder returns syms[i-1].sym, so it will return syms[j].sym ✓
+        # Update H and L
         @h = @l + ((model.syms[i].cumfreq * range) / symf) - 1
         @l += ((model.syms[i + 1].cumfreq * range) / symf)
-        # Update model frequencies (matching decoder line 106)
+        # Update model frequencies
         j = i
         while j >= 0
           model.syms[j].cumfreq += 8
           j -= 1
         end
-        # Check if model needs updating (matching decoder line 107)
+        # Check if model needs updating
         update_model(model) if model.syms[0].cumfreq > 3800
-        # Normalize range (matching decoder lines 109-121)
+        # Normalize range
         normalize_range
       end
       # Normalize arithmetic coding range and output bits
-      # This implements the encoder equivalent of the decoder's normalization (lines 109-121)
       def normalize_range
         loop do
           if (@l & 0x8000) == (@h & 0x8000)
@@ -395,37 +385,36 @@ module Cabriolet
             @h |= 0x4000
             # Can't normalize further
           end
-          # Shift range (both for underflow and MSB match cases)
+          # Shift range
           @l = (@l << 1) & 0xFFFF
           @h = ((@h << 1) | 1) & 0xFFFF
         end
       end
-      # Update model statistics (matching qtmd_update_model exactly)
+      # Update model statistics
       def update_model(model)
         model.shiftsleft -= 1
         if model.shiftsleft.positive?
-          # Simple shift (matching decoder lines 129-135)
+          # Simple shift
           (model.entries - 1).downto(0) do |i|
             model.syms[i].cumfreq >>= 1
             model.syms[i].cumfreq = model.syms[i + 1].cumfreq + 1 if model.syms[i].cumfreq <= model.syms[i + 1].cumfreq
           end
         else
-          # Full rebuild (matching decoder lines 137-163)
+          # Full rebuild
           model.shiftsleft = 50
-          # Convert cumfreq to frequencies (lines 139-145)
+          # Convert cumfreq to frequencies
           (0...model.entries).each do |i|
             model.syms[i].cumfreq -= model.syms[i + 1].cumfreq
             model.syms[i].cumfreq += 1
             model.syms[i].cumfreq >>= 1
           end
-          # Sort by frequency (selection sort for stability, lines 150-158)
+          # Sort by frequency
           (0...(model.entries - 1)).each do |i|
             ((i + 1)...model.entries).each do |j|
               if model.syms[i].cumfreq < model.syms[j].cumfreq
@@ -434,7 +423,7 @@ module Cabriolet
             end
           end
-          # Convert back to cumulative frequencies (lines 161-163)
+          # Convert back to cumulative frequencies
           (model.entries - 1).downto(0) do |i|
             model.syms[i].cumfreq += model.syms[i + 1].cumfreq
           end

data/lib/cabriolet/decompressors/base.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Cabriolet
       # @param input [System::FileHandle, System::MemoryHandle] Input handle
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
-      def initialize(io_system, input, output, buffer_size)
+      def initialize(io_system, input, output, buffer_size, **_kwargs)
         @io_system = io_system
         @input = input
         @output = output