RubyGems - cabriolet - Versions diffs - 0.1.2 → 0.2.1 - Mend

cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

checksums.yaml +4 -4
data/README.adoc +703 -38
data/lib/cabriolet/algorithm_factory.rb +250 -0
data/lib/cabriolet/base_compressor.rb +206 -0
data/lib/cabriolet/binary/bitstream.rb +167 -16
data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
data/lib/cabriolet/binary/chm_structures.rb +2 -2
data/lib/cabriolet/binary/hlp_structures.rb +258 -37
data/lib/cabriolet/binary/lit_structures.rb +231 -65
data/lib/cabriolet/binary/oab_structures.rb +17 -1
data/lib/cabriolet/cab/command_handler.rb +226 -0
data/lib/cabriolet/cab/compressor.rb +108 -84
data/lib/cabriolet/cab/decompressor.rb +16 -20
data/lib/cabriolet/cab/extractor.rb +142 -66
data/lib/cabriolet/cab/file_compression_work.rb +52 -0
data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
data/lib/cabriolet/checksum.rb +49 -0
data/lib/cabriolet/chm/command_handler.rb +227 -0
data/lib/cabriolet/chm/compressor.rb +7 -3
data/lib/cabriolet/chm/decompressor.rb +39 -21
data/lib/cabriolet/chm/parser.rb +5 -2
data/lib/cabriolet/cli/base_command_handler.rb +127 -0
data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
data/lib/cabriolet/cli/command_registry.rb +83 -0
data/lib/cabriolet/cli.rb +356 -607
data/lib/cabriolet/collections/file_collection.rb +175 -0
data/lib/cabriolet/compressors/base.rb +1 -1
data/lib/cabriolet/compressors/lzx.rb +241 -54
data/lib/cabriolet/compressors/mszip.rb +35 -3
data/lib/cabriolet/compressors/quantum.rb +36 -95
data/lib/cabriolet/decompressors/base.rb +1 -1
data/lib/cabriolet/decompressors/lzss.rb +13 -3
data/lib/cabriolet/decompressors/lzx.rb +70 -33
data/lib/cabriolet/decompressors/mszip.rb +126 -39
data/lib/cabriolet/decompressors/quantum.rb +83 -53
data/lib/cabriolet/errors.rb +3 -0
data/lib/cabriolet/extraction/base_extractor.rb +88 -0
data/lib/cabriolet/extraction/extractor.rb +171 -0
data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
data/lib/cabriolet/file_entry.rb +156 -0
data/lib/cabriolet/file_manager.rb +144 -0
data/lib/cabriolet/format_base.rb +79 -0
data/lib/cabriolet/hlp/command_handler.rb +282 -0
data/lib/cabriolet/hlp/compressor.rb +28 -238
data/lib/cabriolet/hlp/decompressor.rb +107 -147
data/lib/cabriolet/hlp/parser.rb +52 -101
data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
data/lib/cabriolet/huffman/encoder.rb +15 -12
data/lib/cabriolet/huffman/tree.rb +85 -1
data/lib/cabriolet/kwaj/command_handler.rb +213 -0
data/lib/cabriolet/kwaj/compressor.rb +7 -3
data/lib/cabriolet/kwaj/decompressor.rb +18 -12
data/lib/cabriolet/lit/command_handler.rb +221 -0
data/lib/cabriolet/lit/compressor.rb +119 -168
data/lib/cabriolet/lit/content_encoder.rb +76 -0
data/lib/cabriolet/lit/content_type_detector.rb +50 -0
data/lib/cabriolet/lit/decompressor.rb +518 -152
data/lib/cabriolet/lit/directory_builder.rb +153 -0
data/lib/cabriolet/lit/guid_generator.rb +16 -0
data/lib/cabriolet/lit/header_writer.rb +124 -0
data/lib/cabriolet/lit/parser.rb +670 -0
data/lib/cabriolet/lit/piece_builder.rb +74 -0
data/lib/cabriolet/lit/structure_builder.rb +252 -0
data/lib/cabriolet/models/hlp_file.rb +130 -29
data/lib/cabriolet/models/hlp_header.rb +105 -17
data/lib/cabriolet/models/lit_header.rb +212 -25
data/lib/cabriolet/models/szdd_header.rb +10 -2
data/lib/cabriolet/models/winhelp_header.rb +127 -0
data/lib/cabriolet/oab/command_handler.rb +257 -0
data/lib/cabriolet/oab/compressor.rb +17 -8
data/lib/cabriolet/oab/decompressor.rb +41 -10
data/lib/cabriolet/offset_calculator.rb +81 -0
data/lib/cabriolet/plugin.rb +233 -0
data/lib/cabriolet/plugin_manager.rb +453 -0
data/lib/cabriolet/plugin_validator.rb +422 -0
data/lib/cabriolet/quantum_shared.rb +105 -0
data/lib/cabriolet/system/io_system.rb +3 -0
data/lib/cabriolet/system/memory_handle.rb +17 -4
data/lib/cabriolet/szdd/command_handler.rb +217 -0
data/lib/cabriolet/szdd/compressor.rb +15 -11
data/lib/cabriolet/szdd/decompressor.rb +18 -9
data/lib/cabriolet/version.rb +1 -1
data/lib/cabriolet.rb +181 -20
metadata +69 -4
data/lib/cabriolet/auto.rb +0 -173
data/lib/cabriolet/parallel.rb +0 -333

data/lib/cabriolet/collections/file_collection.rb ADDED Viewed

@@ -0,0 +1,175 @@
+# frozen_string_literal: true
+module Cabriolet
+  module Collections
+    # FileCollection manages a collection of files for compression
+    # Provides unified interface for adding files and preparing them for compression
+    class FileCollection
+      include Enumerable
+      # Initialize a new file collection
+      #
+      # @param format_options [Hash] Options specific to the archive format
+      def initialize(format_options = {})
+        @files = []
+        @format_options = format_options
+      end
+      # Add a file to the collection
+      #
+      # @param source_path [String] Path to the source file
+      # @param archive_path [String, nil] Path within the archive (defaults to basename)
+      # @param options [Hash] Additional options for this file
+      # @return [self] Returns self for chaining
+      #
+      # @example
+      #   collection.add("README.md", "docs/README.md")
+      #   collection.add("data.txt") # Uses basename
+      def add(source_path, archive_path = nil, **options)
+        validate_source(source_path)
+        @files << {
+          source: source_path,
+          archive: archive_path || ::File.basename(source_path),
+          options: options,
+        }
+        self
+      end
+      # Add multiple files at once
+      #
+      # @param files [Array<Hash>] Array of file hashes with :source, :archive, :options keys
+      # @return [self] Returns self for chaining
+      def add_all(files)
+        files.each do |file|
+          add(file[:source], file[:archive], **file.fetch(:options, {}))
+        end
+        self
+      end
+      # Iterate over files in the collection
+      #
+      # @yield [file_entry] Yields each file entry hash
+      # @return [Enumerator] If no block given
+      def each(&)
+        @files.each(&)
+      end
+      # Get the number of files in the collection
+      #
+      # @return [Integer] Number of files
+      def size
+        @files.size
+      end
+      # Check if collection is empty
+      #
+      # @return [Boolean] True if no files
+      def empty?
+        @files.empty?
+      end
+      # Clear all files from the collection
+      #
+      # @return [self] Returns self for chaining
+      def clear
+        @files.clear
+        self
+      end
+      # Prepare files for compression by reading metadata
+      #
+      # @return [Array<Hash>] Array of prepared file info hashes
+      def prepare_for_compression
+        @files.map do |file_entry|
+          prepare_file_info(file_entry)
+        end
+      end
+      # Get total uncompressed size of all files
+      #
+      # @return [Integer] Total size in bytes
+      def total_size
+        @files.sum { |f| ::File.size(f[:source]) }
+      end
+      # Group files by directory for archive organization
+      #
+      # @return [Hash] Hash with directory paths as keys and file arrays as values
+      def by_directory
+        @files.group_by do |file|
+          ::File.dirname(file[:archive])
+        end
+      end
+      # Find files by pattern in archive path
+      #
+      # @param pattern [String, Regexp] Pattern to match
+      # @return [Array<Hash>] Matching file entries
+      def find_by_pattern(pattern)
+        @files.select do |file|
+          if pattern.is_a?(Regexp)
+            file[:archive] =~ pattern
+          else
+            file[:archive].include?(pattern)
+          end
+        end
+      end
+      private
+      # Validate that source file exists and is accessible
+      #
+      # @param path [String] Path to validate
+      # @raise [ArgumentError] if file doesn't exist or isn't a regular file
+      def validate_source(path)
+        unless ::File.exist?(path)
+          raise ArgumentError, "File does not exist: #{path}"
+        end
+        unless ::File.file?(path)
+          raise ArgumentError, "Not a regular file: #{path}"
+        end
+      end
+      # Prepare file information for compression
+      #
+      # @param file_entry [Hash] Original file entry
+      # @return [Hash] Prepared file info with metadata
+      def prepare_file_info(file_entry)
+        stat = ::File.stat(file_entry[:source])
+        {
+          source_path: file_entry[:source],
+          archive_path: file_entry[:archive],
+          size: stat.size,
+          mtime: stat.mtime,
+          atime: stat.atime,
+          attributes: calculate_attributes(stat),
+          options: file_entry[:options],
+        }
+      end
+      # Calculate file attributes for archive format
+      #
+      # @param stat [File::Stat] File stat object
+      # @return [Integer] Attribute flags
+      def calculate_attributes(stat)
+        attribs = Constants::ATTRIB_ARCH
+        # Set read-only flag if not writable
+        attribs |= Constants::ATTRIB_READONLY unless stat.writable?
+        # Set hidden flag if hidden (Unix dotfiles)
+        basename = ::File.basename(@files.first[:source])
+        attribs |= Constants::ATTRIB_HIDDEN if basename.start_with?(".")
+        # Set system flag for system files
+        attribs |= Constants::ATTRIB_SYSTEM if stat.socket? || stat.symlink?
+        attribs
+      end
+    end
+  end
+end

data/lib/cabriolet/compressors/base.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module Cabriolet
       # @param input [System::FileHandle, System::MemoryHandle] Input handle
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
-      def initialize(io_system, input, output, buffer_size)
+      def initialize(io_system, input, output, buffer_size, **_kwargs)
         @io_system = io_system
         @input = input
         @output = output

data/lib/cabriolet/compressors/lzx.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require_relative "base"
 require_relative "../binary/bitstream_writer"
 require_relative "../huffman/encoder"
@@ -66,7 +67,8 @@ module Cabriolet
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
       # @param window_bits [Integer] Window size (15-21 for regular LZX)
-      def initialize(io_system, input, output, buffer_size, window_bits: 15)
+      def initialize(io_system, input, output, buffer_size, window_bits: 15,
+**_kwargs)
         super(io_system, input, output, buffer_size)
         # Validate window_bits
@@ -82,8 +84,9 @@ module Cabriolet
         @num_offsets = POSITION_SLOTS[window_bits - 15] << 3
         @maintree_maxsymbols = NUM_CHARS + @num_offsets
-        # Initialize bitstream writer
-        @bitstream = Binary::BitstreamWriter.new(io_system, output, buffer_size)
+        # Initialize bitstream writer (LZX uses MSB-first bit ordering per libmspack lzxd.c)
+        @bitstream = Binary::BitstreamWriter.new(io_system, output,
+                                                 buffer_size, bit_order: :msb)
         # Initialize sliding window for LZ77
         @window = "\0" * @window_size
@@ -119,6 +122,7 @@ module Cabriolet
           frame_data = input_data[pos, frame_size]
           # Compress this frame
+          # TODO: Use compress_frame_verbatim once tree encoding is fixed
           compress_frame(frame_data)
           pos += frame_size
@@ -152,19 +156,46 @@ module Cabriolet
       # @param data [String] Frame data to compress
       # @return [void]
       def compress_frame(data)
-        # Use UNCOMPRESSED blocks for now (simplest approach)
-        write_block_header(BLOCKTYPE_UNCOMPRESSED, data.bytesize)
+        # For uncompressed blocks, block length is just the frame data size
+        # (offset registers are NOT included in the block length field)
+        block_length = data.bytesize
-        # Write R0, R1, R2 (required for uncompressed blocks)
+        # Write UNCOMPRESSED block header
+        write_block_header(BLOCKTYPE_UNCOMPRESSED, block_length)
+        # Write offset registers (R0, R1, R2)
         write_offset_registers
-        # Write raw data
+        # Write raw uncompressed data
         data.each_byte do |byte|
           @bitstream.write_bits(byte, 8)
         end
+      end
+      # Compress a single frame (32KB) - VERBATIM version (currently disabled)
+      #
+      # @param data [String] Frame data to compress
+      # @return [void]
+      def compress_frame_verbatim(data)
+        # Reset frequency statistics for each frame
+        @literal_freq.fill(0)
+        @match_freq.fill(0)
+        @length_freq.fill(0)
+        # Analyze frame to generate LZ77 tokens
+        tokens = analyze_frame(data)
-        # Ensure byte alignment at end of frame for multi-frame support
-        @bitstream.byte_align
+        # Build Huffman trees from statistics
+        build_trees
+        # Write VERBATIM block header
+        write_block_header(BLOCKTYPE_VERBATIM, data.bytesize)
+        # Write Huffman tree definitions
+        write_trees
+        # Encode all tokens using the Huffman codes
+        encode_tokens(tokens)
       end
       # Analyze frame and generate LZ77 tokens
@@ -301,68 +332,224 @@ module Cabriolet
         slot
       end
+      # Build Huffman code lengths from frequencies
+      #
+      # Uses a simplified approach: assign equal lengths to all symbols.
+      # This guarantees valid Huffman trees that satisfy Kraft inequality.
+      #
+      # @param freqs [Array<Integer>] Symbol frequencies
+      # @param num_symbols [Integer] Number of symbols
+      # @return [Array<Integer>] Code lengths
+      def build_tree_lengths(freqs, num_symbols)
+        lengths = Array.new(num_symbols, 0)
+        # Get symbols with non-zero frequencies
+        non_zero_symbols = freqs.each_with_index.select do |freq, _|
+          freq.positive?
+        end.map { |_, sym| sym }
+        # Handle edge cases
+        if non_zero_symbols.empty?
+          # Empty tree: create minimal valid tree with 2 symbols
+          lengths[0] = 1
+          lengths[1] = 1
+          return lengths
+        elsif non_zero_symbols.size == 1
+          # Single symbol: need at least 2 symbols for valid Huffman tree
+          symbol = non_zero_symbols[0]
+          lengths[symbol] = 1
+          dummy = symbol.zero? ? 1 : 0
+          lengths[dummy] = 1
+          return lengths
+        end
+        # Calculate required length: ceil(log2(count))
+        count = non_zero_symbols.size
+        bit_length = 1
+        while (1 << bit_length) < count
+          bit_length += 1
+        end
+        # Assign same length to all non-zero symbols
+        non_zero_symbols.each do |symbol|
+          lengths[symbol] = bit_length
+        end
+        # Pad with dummy symbols to make tree complete (2^bit_length total symbols)
+        # This ensures Kraft inequality sum equals exactly 1.0
+        total_needed = 1 << bit_length
+        dummy_count = total_needed - count
+        if dummy_count.positive?
+          dummy_index = 0
+          while dummy_count.positive? && dummy_index < num_symbols
+            if lengths[dummy_index].zero?
+              lengths[dummy_index] = bit_length
+              dummy_count -= 1
+            end
+            dummy_index += 1
+          end
+        end
+        lengths
+      end
       # Build Huffman trees from frequency statistics
       #
+      # This creates three trees for LZX compression:
+      # 1. Main tree: literals (0-255) + match position/length combinations
+      # 2. Length tree: additional length symbols for long matches
+      # 3. Pretree: encodes the code lengths of main/length trees
+      #
       # @return [void]
       def build_trees
-        # Build main tree (literals + matches)
-        maintree_freqs = @literal_freq + @match_freq
-        @maintree_lengths = build_tree_lengths(maintree_freqs,
+        # Step 1: Combine literal and match frequencies for main tree
+        maintree_freq = @literal_freq + @match_freq
+        # Step 2: Build main tree code lengths
+        @maintree_lengths = build_tree_lengths(maintree_freq,
                                                @maintree_maxsymbols)
-        @maintree_codes = Huffman::Encoder.build_codes(@maintree_lengths,
-                                                       @maintree_maxsymbols)
-        # Build length tree
+        # Step 3: Build length tree code lengths
         @length_lengths = build_tree_lengths(@length_freq, LENGTH_MAXSYMBOLS)
+        # Step 4: Calculate pretree frequencies by simulating tree encoding
+        pretree_freq = calculate_pretree_frequencies
+        # Step 5: Build pretree code lengths
+        @pretree_lengths = build_tree_lengths(pretree_freq, PRETREE_MAXSYMBOLS)
+        # Step 6: Generate code tables from lengths
+        @maintree_codes = Huffman::Encoder.build_codes(@maintree_lengths,
+                                                       @maintree_maxsymbols)
         @length_codes = Huffman::Encoder.build_codes(@length_lengths,
                                                      LENGTH_MAXSYMBOLS)
-        # Build pretree (used to encode the other trees)
-        # Create a valid Huffman tree that satisfies Kraft inequality
-        # For 20 symbols, use: 2@3bits + 6@4bits + 12@5bits = 1.0
-        @pretree_lengths = Array.new(PRETREE_MAXSYMBOLS, 0)
-        # Most common symbols (0-1): 3 bits
-        (0..1).each { |i| @pretree_lengths[i] = 3 }
-        # Common symbols (2-7): 4 bits
-        (2..7).each { |i| @pretree_lengths[i] = 4 }
-        # Less common symbols (8-19): 5 bits
-        (8..19).each { |i| @pretree_lengths[i] = 5 }
         @pretree_codes = Huffman::Encoder.build_codes(@pretree_lengths,
                                                       PRETREE_MAXSYMBOLS)
       end
-      # Build Huffman code lengths from frequencies
+      # Calculate pretree symbol frequencies
       #
-      # @param freqs [Array<Integer>] Symbol frequencies
-      # @param num_symbols [Integer] Number of symbols
-      # @return [Array<Integer>] Code lengths
-      def build_tree_lengths(freqs, num_symbols)
-        # Simple implementation: assign lengths based on frequency
-        # Higher frequency = shorter code
-        lengths = Array.new(num_symbols, 0)
+      # The pretree encodes the code lengths of the main and length trees.
+      # This method simulates the tree encoding process to determine which
+      # pretree symbols will be needed.
+      #
+      # @return [Array<Integer>] Frequency array for pretree symbols (0-19)
+      def calculate_pretree_frequencies
+        pretree_freq = Array.new(PRETREE_MAXSYMBOLS, 0)
-        # Get non-zero frequencies
-        non_zero = freqs.each_with_index.select { |freq, _| freq.positive? }
-        return lengths if non_zero.empty?
-        # Sort by frequency (descending)
-        sorted = non_zero.sort_by { |freq, _| -freq }
-        # Assign lengths using simple strategy
-        sorted.each_with_index do |(_, symbol), index|
-          # Assign shorter codes to more frequent symbols
-          lengths[symbol] = if index < num_symbols / 8
-                              4
-                            elsif index < num_symbols / 4
-                              6
-                            elsif index < num_symbols / 2
-                              8
-                            else
-                              10
-                            end
+        # Count symbols needed to encode main tree (two parts)
+        count_pretree_symbols(@maintree_lengths, 0, NUM_CHARS, pretree_freq)
+        count_pretree_symbols(@maintree_lengths, NUM_CHARS,
+                              @maintree_maxsymbols, pretree_freq)
+        # Count symbols needed to encode length tree
+        count_pretree_symbols(@length_lengths, 0, NUM_SECONDARY_LENGTHS,
+                              pretree_freq)
+        pretree_freq
+      end
+      # Count pretree symbols needed to encode a tree
+      #
+      # This simulates the write_tree_with_pretree encoding process to count
+      # which pretree symbols will be used, allowing us to build an optimal
+      # pretree.
+      #
+      # @param lengths [Array<Integer>] Tree lengths to encode
+      # @param start [Integer] Start index
+      # @param end_idx [Integer] End index (exclusive)
+      # @param freq [Array<Integer>] Frequency array to update
+      # @return [void]
+      def count_pretree_symbols(lengths, start, end_idx, freq)
+        i = start
+        prev_length = 0
+        while i < end_idx
+          length = lengths[i]
+          if length.zero?
+            # Count run of zeros
+            zero_count = 0
+            while i < end_idx && lengths[i].zero? && zero_count < 138
+              zero_count += 1
+              i += 1
+            end
+            # Encode long runs with symbol 18
+            if zero_count >= 20
+              while zero_count >= 20
+                run = [zero_count, 51].min
+                freq[18] += 1
+                zero_count -= run
+              end
+            end
+            # Encode medium runs with symbol 17
+            if zero_count >= 4
+              run = [zero_count, 19].min
+              freq[17] += 1
+              zero_count -= run
+            end
+            # Encode remaining short runs as deltas
+            if zero_count.positive?
+              zero_count.times do
+                delta = (17 - prev_length) % 17
+                freq[delta] += 1
+                prev_length = 0
+              end
+            end
+          else
+            # Encode as delta from previous length
+            delta = (length - prev_length) % 17
+            freq[delta] += 1
+            prev_length = length
+            i += 1
+          end
         end
+      end
-        lengths
+      # Calculate code lengths by traversing Huffman tree
+      #
+      # @param node [Array] Tree node [freq, symbol, left, right, depth]
+      # @param depth [Integer] Current depth
+      # @param lengths [Array<Integer>] Output array for lengths
+      # @return [void]
+      def calculate_depths(node, depth, lengths)
+        return unless node
+        _, symbol, left, right, = node
+        if symbol.nil?
+          # Internal node: recurse to children
+          calculate_depths(left, depth + 1, lengths)
+          calculate_depths(right, depth + 1, lengths)
+        else
+          # Leaf node: record length
+          lengths[symbol] = depth
+        end
+      end
+      # Calculate code lengths by traversing Huffman tree
+      #
+      # @param node [Array] Tree node [freq, symbol, left, right]
+      # @param depth [Integer] Current depth
+      # @param lengths [Array<Integer>] Output array for lengths
+      # @return [void]
+      def calculate_code_lengths(node, depth, lengths)
+        return unless node
+        _, symbol, left, right = node
+        if symbol.nil?
+          # Internal node: recurse to children
+          calculate_code_lengths(left, depth + 1, lengths)
+          calculate_code_lengths(right, depth + 1, lengths)
+        else
+          # Leaf node: record length
+          lengths[symbol] = depth
+        end
       end
       # Write block header

data/lib/cabriolet/compressors/mszip.rb CHANGED Viewed

@@ -56,7 +56,7 @@ module Cabriolet
       # @param input [System::FileHandle, System::MemoryHandle] Input handle
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
-      def initialize(io_system, input, output, buffer_size)
+      def initialize(io_system, input, output, buffer_size, **_kwargs)
         super
         # Initialize bitstream writer
@@ -88,10 +88,15 @@ module Cabriolet
         # Process data in FRAME_SIZE chunks
         # Each frame is independent and contains blocks ending with last_block=1
+        frame_num = 0
         while pos < input_data.bytesize
           chunk_size = [FRAME_SIZE, input_data.bytesize - pos].min
           chunk = input_data[pos, chunk_size]
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG compress: Frame #{frame_num}: pos=#{pos}, chunk_size=#{chunk_size}"
+          end
           # Write CK signature
           write_signature
@@ -99,11 +104,19 @@ module Cabriolet
           # Each frame's block is always marked as last within that frame
           compress_block(chunk, true)
+          # Flush bitstream after each frame to ensure data is written
+          @bitstream.flush
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG compress: Frame #{frame_num} complete, flushed"
+          end
           pos += chunk_size
           total_written += chunk_size
+          frame_num += 1
         end
-        # Flush any remaining bits
+        # Final flush (may not be needed now but keep for safety)
         @bitstream.flush
         total_written
@@ -129,8 +142,19 @@ module Cabriolet
       #
       # @return [void]
       def write_signature
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG write_signature: ENTRY"
+        end
         @bitstream.byte_align
-        SIGNATURE.each { |byte| @bitstream.write_raw_byte(byte) }
+        SIGNATURE.each do |byte|
+          if ENV["DEBUG_MSZIP_COMPRESS"]
+            warn "DEBUG write_signature: Writing byte 0x#{byte.to_s(16)}"
+          end
+          @bitstream.write_raw_byte(byte)
+        end
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG write_signature: EXIT"
+        end
       end
       # Compress a single block using fixed Huffman encoding
@@ -139,6 +163,10 @@ module Cabriolet
       # @param is_last [Boolean] Whether this is the last block
       # @return [void]
       def compress_block(data, is_last)
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG compress_block: ENTRY data_size=#{data.bytesize} is_last=#{is_last}"
+        end
         # Write block header
         @bitstream.write_bits(is_last ? 1 : 0, 1) # Last block flag
         @bitstream.write_bits(FIXED_HUFFMAN_BLOCK, 2) # Block type
@@ -151,6 +179,10 @@ module Cabriolet
         # Write end-of-block symbol (256)
         encode_literal(256)
+        if ENV["DEBUG_MSZIP_COMPRESS"]
+          warn "DEBUG compress_block: EXIT"
+        end
       end
       # Encode data using LZ77 matching and Huffman encoding