RubyGems - cabriolet - Versions diffs - 0.1.2 → 0.2.1 - Mend

cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

checksums.yaml +4 -4
data/README.adoc +703 -38
data/lib/cabriolet/algorithm_factory.rb +250 -0
data/lib/cabriolet/base_compressor.rb +206 -0
data/lib/cabriolet/binary/bitstream.rb +167 -16
data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
data/lib/cabriolet/binary/chm_structures.rb +2 -2
data/lib/cabriolet/binary/hlp_structures.rb +258 -37
data/lib/cabriolet/binary/lit_structures.rb +231 -65
data/lib/cabriolet/binary/oab_structures.rb +17 -1
data/lib/cabriolet/cab/command_handler.rb +226 -0
data/lib/cabriolet/cab/compressor.rb +108 -84
data/lib/cabriolet/cab/decompressor.rb +16 -20
data/lib/cabriolet/cab/extractor.rb +142 -66
data/lib/cabriolet/cab/file_compression_work.rb +52 -0
data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
data/lib/cabriolet/checksum.rb +49 -0
data/lib/cabriolet/chm/command_handler.rb +227 -0
data/lib/cabriolet/chm/compressor.rb +7 -3
data/lib/cabriolet/chm/decompressor.rb +39 -21
data/lib/cabriolet/chm/parser.rb +5 -2
data/lib/cabriolet/cli/base_command_handler.rb +127 -0
data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
data/lib/cabriolet/cli/command_registry.rb +83 -0
data/lib/cabriolet/cli.rb +356 -607
data/lib/cabriolet/collections/file_collection.rb +175 -0
data/lib/cabriolet/compressors/base.rb +1 -1
data/lib/cabriolet/compressors/lzx.rb +241 -54
data/lib/cabriolet/compressors/mszip.rb +35 -3
data/lib/cabriolet/compressors/quantum.rb +36 -95
data/lib/cabriolet/decompressors/base.rb +1 -1
data/lib/cabriolet/decompressors/lzss.rb +13 -3
data/lib/cabriolet/decompressors/lzx.rb +70 -33
data/lib/cabriolet/decompressors/mszip.rb +126 -39
data/lib/cabriolet/decompressors/quantum.rb +83 -53
data/lib/cabriolet/errors.rb +3 -0
data/lib/cabriolet/extraction/base_extractor.rb +88 -0
data/lib/cabriolet/extraction/extractor.rb +171 -0
data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
data/lib/cabriolet/file_entry.rb +156 -0
data/lib/cabriolet/file_manager.rb +144 -0
data/lib/cabriolet/format_base.rb +79 -0
data/lib/cabriolet/hlp/command_handler.rb +282 -0
data/lib/cabriolet/hlp/compressor.rb +28 -238
data/lib/cabriolet/hlp/decompressor.rb +107 -147
data/lib/cabriolet/hlp/parser.rb +52 -101
data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
data/lib/cabriolet/huffman/encoder.rb +15 -12
data/lib/cabriolet/huffman/tree.rb +85 -1
data/lib/cabriolet/kwaj/command_handler.rb +213 -0
data/lib/cabriolet/kwaj/compressor.rb +7 -3
data/lib/cabriolet/kwaj/decompressor.rb +18 -12
data/lib/cabriolet/lit/command_handler.rb +221 -0
data/lib/cabriolet/lit/compressor.rb +119 -168
data/lib/cabriolet/lit/content_encoder.rb +76 -0
data/lib/cabriolet/lit/content_type_detector.rb +50 -0
data/lib/cabriolet/lit/decompressor.rb +518 -152
data/lib/cabriolet/lit/directory_builder.rb +153 -0
data/lib/cabriolet/lit/guid_generator.rb +16 -0
data/lib/cabriolet/lit/header_writer.rb +124 -0
data/lib/cabriolet/lit/parser.rb +670 -0
data/lib/cabriolet/lit/piece_builder.rb +74 -0
data/lib/cabriolet/lit/structure_builder.rb +252 -0
data/lib/cabriolet/models/hlp_file.rb +130 -29
data/lib/cabriolet/models/hlp_header.rb +105 -17
data/lib/cabriolet/models/lit_header.rb +212 -25
data/lib/cabriolet/models/szdd_header.rb +10 -2
data/lib/cabriolet/models/winhelp_header.rb +127 -0
data/lib/cabriolet/oab/command_handler.rb +257 -0
data/lib/cabriolet/oab/compressor.rb +17 -8
data/lib/cabriolet/oab/decompressor.rb +41 -10
data/lib/cabriolet/offset_calculator.rb +81 -0
data/lib/cabriolet/plugin.rb +233 -0
data/lib/cabriolet/plugin_manager.rb +453 -0
data/lib/cabriolet/plugin_validator.rb +422 -0
data/lib/cabriolet/quantum_shared.rb +105 -0
data/lib/cabriolet/system/io_system.rb +3 -0
data/lib/cabriolet/system/memory_handle.rb +17 -4
data/lib/cabriolet/szdd/command_handler.rb +217 -0
data/lib/cabriolet/szdd/compressor.rb +15 -11
data/lib/cabriolet/szdd/decompressor.rb +18 -9
data/lib/cabriolet/version.rb +1 -1
data/lib/cabriolet.rb +181 -20
metadata +69 -4
data/lib/cabriolet/auto.rb +0 -173
data/lib/cabriolet/parallel.rb +0 -333

data/lib/cabriolet/decompressors/mszip.rb CHANGED Viewed

@@ -14,6 +14,13 @@ module Cabriolet
       DISTANCE_MAXSYMBOLS = 32
       DISTANCE_TABLEBITS = 6
+      # MSZIP signature bytes
+      SIGNATURE_BYTE_C = 0x43  # ASCII 'C'
+      SIGNATURE_BYTE_K = 0x4B  # ASCII 'K'
+      # Maximum bytes to search for CK signature (prevents infinite loops)
+      MAX_SIGNATURE_SEARCH = 10_000
       # Match lengths for literal codes 257-285
       LIT_LENGTHS = [
         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
@@ -50,7 +57,8 @@ module Cabriolet
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
       # @param fix_mszip [Boolean] Enable repair mode for corrupted data
-      def initialize(io_system, input, output, buffer_size, fix_mszip: false)
+      def initialize(io_system, input, output, buffer_size, fix_mszip: false,
+salvage: false, **_kwargs)
         super(io_system, input, output, buffer_size)
         @fix_mszip = fix_mszip
@@ -58,9 +66,11 @@ module Cabriolet
         @window = "\0" * FRAME_SIZE
         @window_posn = 0
         @bytes_output = 0
+        @window_offset = 0 # Offset into window for unconsumed data (for multi-file CFDATA blocks)
         # Initialize bitstream
-        @bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
+        @bitstream = Binary::Bitstream.new(io_system, input, buffer_size,
+                                           salvage: salvage)
         # Initialize Huffman trees
         @literal_lengths = Array.new(LITERAL_MAXSYMBOLS, 0)
@@ -76,15 +86,50 @@ module Cabriolet
       def decompress(bytes)
         total_written = 0
+        if ENV["DEBUG_MSZIP"]
+          warn "DEBUG MSZIP.decompress(#{bytes}): ENTRY bytes_output=#{@bytes_output} window_offset=#{@window_offset} window_posn=#{@window_posn}"
+        end
         while bytes.positive?
-          # Read 'CK' signature
-          read_signature
+          # Check if we have buffered data from previous inflate
+          if @bytes_output.positive?
+            if ENV["DEBUG_MSZIP"]
+              warn "DEBUG MSZIP: Using buffered data: bytes_output=#{@bytes_output} window_offset=#{@window_offset}"
+            end
-          # Reset window state for new block
+            # Write from buffer
+            write_amount = [bytes, @bytes_output].min
+            io_system.write(output, @window[@window_offset, write_amount])
+            total_written += write_amount
+            bytes -= write_amount
+            @bytes_output -= write_amount
+            @window_offset += write_amount
+            if ENV["DEBUG_MSZIP"]
+              warn "DEBUG MSZIP: After buffer write: total_written=#{total_written} bytes_remaining=#{bytes} bytes_output=#{@bytes_output}"
+            end
+            # Continue loop to check if we need more data
+            next
+          end
+          # No buffered data - need to inflate a new MSZIP frame
+          # Reset window for new frame
+          @window_offset = 0
           @window_posn = 0
-          @bytes_output = 0
-          # Inflate the block
+          # Read 'CK' signature (marks start of MSZIP frame)
+          # Every MSZIP frame starts with a CK signature
+          if ENV["DEBUG_MSZIP"]
+            warn "DEBUG MSZIP: Reading CK signature (new MSZIP frame)"
+          end
+          read_signature
+          # Inflate the MSZIP frame (processes deflate blocks until last_block or window full)
+          if ENV["DEBUG_MSZIP"]
+            warn "DEBUG MSZIP: Calling inflate_block"
+          end
           begin
             inflate_block
           rescue DecompressionError
@@ -97,11 +142,15 @@ module Cabriolet
             @bytes_output = FRAME_SIZE
           end
-          # Write output
-          write_amount = [bytes, @bytes_output].min
-          io_system.write(output, @window[0, write_amount])
-          total_written += write_amount
-          bytes -= write_amount
+          if ENV["DEBUG_MSZIP"]
+            warn "DEBUG MSZIP: After inflate_block: bytes_output=#{@bytes_output} window_posn=#{@window_posn}"
+          end
+          # Now we have data in the window buffer - loop back to write from it
+        end
+        if ENV["DEBUG_MSZIP"]
+          warn "DEBUG MSZIP.decompress: EXIT total_written=#{total_written}"
         end
         total_written
@@ -111,49 +160,63 @@ module Cabriolet
       # Read and verify 'CK' signature
       def read_signature
+        if ENV["DEBUG_MSZIP"]
+          warn "DEBUG read_signature: Before byte_align"
+        end
         # Align to byte boundary
         @bitstream.byte_align
-        # Read bytes until we find 'CK'
-        state = 0
-        bytes_read = 0
-        max_search = 10_000 # Prevent infinite loops
+        # Read first 2 bytes
+        c = @bitstream.read_bits(8)
+        k = @bitstream.read_bits(8)
-        loop do
-          byte = @bitstream.read_bits(8)
-          bytes_read += 1
+        if ENV["DEBUG_MSZIP"]
+          warn "DEBUG read_signature: Read 0x#{c.to_s(16)} 0x#{k.to_s(16)} (expected 'C'=0x43 'K'=0x4B)"
+        end
-          # Check for EOF (bitstream returns 0)
-          if bytes_read > 2 && byte.zero?
-            raise DecompressionError,
-                  "Unexpected EOF while searching for CK signature"
+        # If not CK, search for it (similar to libmspack's tolerant behavior)
+        unless c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
+          # Search for CK signature in the stream (up to a reasonable limit)
+          max_search = 256
+          found = false
+          max_search.times do
+            # Shift: c becomes k, read new k
+            c = k
+            k = @bitstream.read_bits(8)
+            if c == SIGNATURE_BYTE_C && k == SIGNATURE_BYTE_K
+              found = true
+              if ENV["DEBUG_MSZIP"]
+                warn "DEBUG read_signature: Found CK signature after searching"
+              end
+              break
+            end
           end
-          # Prevent infinite loops
-          if bytes_read > max_search
+          unless found
             raise DecompressionError,
-                  "CK signature not found in stream"
-          end
-          if byte == 0x43 # 'C'
-            state = 1
-          elsif state == 1 && byte == 0x4B # 'K'
-            break
-          else
-            state = 0
+                  "Invalid MSZIP signature: could not find CK in stream"
           end
         end
       end
       # Inflate a single block
+      #
+      # Processes deflate blocks until the last_block flag is set or window is full.
+      # Always decodes complete blocks - does not stop mid-block.
       def inflate_block
-        loop do
-          # Read last block flag
-          last_block = @bitstream.read_bits(1)
+        # Read first block header
+        last_block = @bitstream.read_bits(1)
+        block_type = @bitstream.read_bits(2)
-          # Read block type
-          block_type = @bitstream.read_bits(2)
+        if ENV["DEBUG_MSZIP"]
+          warn "DEBUG inflate_block: First block: last_block=#{last_block} block_type=#{block_type}"
+        end
+        loop do
+          # Process current block
           case block_type
           when 0
             inflate_stored_block
@@ -167,7 +230,16 @@ module Cabriolet
             raise DecompressionError, "Invalid block type: #{block_type}"
           end
+          if ENV["DEBUG_MSZIP"]
+            warn "DEBUG inflate_block: After block: last_block=#{last_block} window_posn=#{@window_posn}"
+          end
+          # Stop if this was the last block
           break if last_block == 1
+          # Read next block header (only if we need to continue)
+          last_block = @bitstream.read_bits(1)
+          block_type = @bitstream.read_bits(2)
         end
         # Flush remaining window data
@@ -306,13 +378,25 @@ module Cabriolet
       end
       # Inflate a Huffman-compressed block
+      #
+      # Always decodes until code 256 (END OF BLOCK)
       def inflate_huffman_block
+        symbol_count = 0
         loop do
+          if ENV["DEBUG_MSZIP_SYMBOLS"]
+            warn "DEBUG inflate_huffman_block: window_posn=#{@window_posn} bytes_output=#{@bytes_output}"
+          end
           # Decode symbol from literal tree
           code = Huffman::Decoder.decode_symbol(
             @bitstream, @literal_tree.table, LITERAL_TABLEBITS,
             @literal_lengths, LITERAL_MAXSYMBOLS
           )
+          symbol_count += 1
+          if ENV["DEBUG_MSZIP_SYMBOLS"] || ENV["DEBUG_MSZIP"]
+            warn "DEBUG inflate_huffman_block[#{symbol_count}]: decoded code=#{code} (#{'0x%02x' % code if code < 256})"
+          end
           if code < 256
             # Literal byte
@@ -321,6 +405,9 @@ module Cabriolet
             flush_window if @window_posn == FRAME_SIZE
           elsif code == 256
             # End of block
+            if ENV["DEBUG_MSZIP"] || ENV["DEBUG_MSZIP_SYMBOLS"]
+              warn "DEBUG inflate_huffman_block: END OF BLOCK (window_posn=#{@window_posn})"
+            end
             break
           else
             # Length/distance pair (LZ77 match)

data/lib/cabriolet/decompressors/quantum.rb CHANGED Viewed

@@ -1,5 +1,33 @@
 # frozen_string_literal: true
+require_relative "../quantum_shared"
+# Compatibility shim for String#bytesplice (added in Ruby 3.2)
+unless String.method_defined?(:bytesplice)
+  module StringBytespliceCompat
+    # Compatibility implementation of bytesplice for Ruby < 3.2
+    # Uses clear/append which is slower but works with mutable strings
+    def bytesplice(index, length, other_string, other_index = 0,
+other_length = nil)
+      other_length ||= other_string.bytesize
+      # Build new string content
+      prefix = byteslice(0, index)
+      middle = other_string.byteslice(other_index, other_length)
+      suffix = byteslice((index + length)..-1)
+      new_content = prefix + middle + suffix
+      # Modify receiver in place
+      clear
+      self << new_content
+      self
+    end
+  end
+  String.prepend(StringBytespliceCompat)
+end
 module Cabriolet
   module Decompressors
     # Quantum handles Quantum-compressed data using arithmetic coding
@@ -8,59 +36,10 @@ module Cabriolet
     # The Quantum method was created by David Stafford, adapted by Microsoft
     # Corporation.
     class Quantum < Base
-      # Frame size (32KB per frame)
-      FRAME_SIZE = 32_768
-      # Match constants
-      MAX_MATCH = 1028
-      # Position slot tables (same as in qtmd.c)
-      POSITION_BASE = [
-        0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
-        512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
-        24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
-        393_216, 524_288, 786_432, 1_048_576, 1_572_864
-      ].freeze
-      EXTRA_BITS = [
-        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
-        9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
-        17, 17, 18, 18, 19, 19
-      ].freeze
-      LENGTH_BASE = [
-        0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
-        30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
-      ].freeze
-      LENGTH_EXTRA = [
-        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
-      ].freeze
+      include QuantumShared
       attr_reader :window_bits, :window_size
-      # Represents a symbol in an arithmetic coding model
-      class ModelSymbol
-        attr_accessor :sym, :cumfreq
-        def initialize(sym, cumfreq)
-          @sym = sym
-          @cumfreq = cumfreq
-        end
-      end
-      # Represents an arithmetic coding model
-      class Model
-        attr_accessor :shiftsleft, :entries, :syms
-        def initialize(syms, entries)
-          @syms = syms
-          @entries = entries
-          @shiftsleft = 4
-        end
-      end
       # Initialize Quantum decompressor
       #
       # @param io_system [System::IOSystem] I/O system for reading/writing
@@ -68,7 +47,8 @@ module Cabriolet
       # @param output [System::FileHandle, System::MemoryHandle] Output handle
       # @param buffer_size [Integer] Buffer size for I/O operations
       # @param window_bits [Integer] Window size parameter (10-21)
-      def initialize(io_system, input, output, buffer_size, window_bits: 10)
+      def initialize(io_system, input, output, buffer_size, window_bits: 10,
+**_kwargs)
         super(io_system, input, output, buffer_size)
         # Validate window_bits
@@ -80,8 +60,13 @@ module Cabriolet
         @window_bits = window_bits
         @window_size = 1 << window_bits
-        # Initialize window
-        @window = "\0" * @window_size
+        # Initialize window (mutable for Ruby < 3.2 bytesplice compatibility)
+        @window = if String.method_defined?(:bytesplice)
+                    "\0" * @window_size
+                  else
+                    # In Ruby < 3.2, create mutable window using String.new
+                    String.new("\0" * @window_size)
+                  end
         @window_posn = 0
         @frame_todo = FRAME_SIZE
@@ -408,7 +393,52 @@ module Cabriolet
       end
       # Copy match from window
+      # Optimized to use bulk byte operations for better performance
       def copy_match(offset, length)
+        # Use bulk copy for matches longer than 32 bytes
+        if length > 32
+          copy_match_bulk(offset, length)
+        else
+          copy_match_byte_by_byte(offset, length)
+        end
+      end
+      # Bulk copy using bytesplice for better performance on longer matches
+      def copy_match_bulk(offset, length)
+        if offset > @window_posn
+          # Match wraps around window
+          if offset > @window_size
+            raise DecompressionError,
+                  "Match offset beyond window"
+          end
+          # Copy from end of window
+          src_pos = @window_size - (offset - @window_posn)
+          copy_len = offset - @window_posn
+          if copy_len < length
+            # Copy from end, then from beginning
+            @window.bytesplice(@window_posn, copy_len, @window, src_pos,
+                               copy_len)
+            @window_posn += copy_len
+            remaining = length - copy_len
+            @window.bytesplice(@window_posn, remaining, @window, 0, remaining)
+            @window_posn += remaining
+          else
+            # Copy entirely from end
+            @window.bytesplice(@window_posn, length, @window, src_pos, length)
+            @window_posn += length
+          end
+        else
+          # Normal copy - use bytesplice for bulk operation
+          src_pos = @window_posn - offset
+          @window.bytesplice(@window_posn, length, @window, src_pos, length)
+          @window_posn += length
+        end
+      end
+      # Byte-by-byte copy for short matches (fallback)
+      def copy_match_byte_by_byte(offset, length)
         if offset > @window_posn
           # Match wraps around window
           if offset > @window_size

data/lib/cabriolet/errors.rb CHANGED Viewed

@@ -36,4 +36,7 @@ module Cabriolet
   # Raised when seek operation fails
   class SeekError < IOError; end
+  # Raised when plugin operations fail
+  class PluginError < Error; end
 end

data/lib/cabriolet/extraction/base_extractor.rb ADDED Viewed

@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+require "fileutils"
+module Cabriolet
+  module Extraction
+    # BaseExtractor provides common extraction functionality for all extractors
+    # Reduces code duplication between SimpleExtractor and Parallel::Extractor
+    class BaseExtractor
+      # Initialize the base extractor
+      #
+      # @param output_dir [String] Directory to extract files to
+      # @param preserve_paths [Boolean] Whether to preserve directory structure
+      # @param overwrite [Boolean] Whether to overwrite existing files
+      def initialize(output_dir, preserve_paths: true, overwrite: false)
+        @output_dir = output_dir
+        @preserve_paths = preserve_paths
+        @overwrite = overwrite
+      end
+      protected
+      # Build the output path for a file, handling path preservation and cleaning
+      #
+      # @param filename [String] Original filename from archive (may have backslashes)
+      # @return [String] Full output path for the file
+      def build_output_path(filename)
+        # Normalize path separators (Windows archives use backslashes)
+        clean_name = filename.gsub("\\", "/")
+        if @preserve_paths
+          # Keep directory structure
+          ::File.join(@output_dir, clean_name)
+        else
+          # Flatten to output directory (just basename)
+          ::File.join(@output_dir, ::File.basename(clean_name))
+        end
+      end
+      # Extract a single file to disk
+      #
+      # @param file [Object] File object from archive (must respond to :name and :data)
+      # @yield [path, data] Optional block for custom handling instead of default write
+      # @return [String, nil] Output path if successful, nil if skipped or failed
+      def extract_file(file)
+        output_path = build_output_path(file.name)
+        # Check if file exists and skip if not overwriting
+        if ::File.exist?(output_path) && !@overwrite
+          return nil
+        end
+        # Create parent directory
+        dir = ::File.dirname(output_path)
+        FileUtils.mkdir_p(dir) unless ::File.directory?(dir)
+        # Get file data
+        data = file.data
+        return nil unless data
+        # Write file data
+        ::File.binwrite(output_path, data)
+        # Preserve file attributes if available
+        preserve_file_attributes(output_path, file)
+        output_path
+      rescue StandardError => e
+        warn "Failed to extract #{file.name}: #{e.message}"
+        nil
+      end
+      # Preserve file attributes (timestamps, etc.) if available on the file object
+      #
+      # @param path [String] Path to extracted file
+      # @param file [Object] File object from archive
+      def preserve_file_attributes(path, file)
+        # Try various timestamp attributes that different formats use
+        if file.respond_to?(:datetime) && file.datetime
+          ::File.utime(::File.atime(path), file.datetime, path)
+        elsif file.respond_to?(:mtime) && file.mtime
+          atime = file.respond_to?(:atime) ? file.atime : ::File.atime(path)
+          ::File.utime(atime, file.mtime, path)
+        end
+      end
+    end
+  end
+end

data/lib/cabriolet/extraction/extractor.rb ADDED Viewed

@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+require "fractor"
+require_relative "file_extraction_work"
+require_relative "file_extraction_worker"
+module Cabriolet
+  module Extraction
+    # Unified extractor using Fractor for parallel file extraction
+    # Single workers: 1 = sequential, N = parallel
+    class Extractor
+      DEFAULT_WORKERS = 4
+      attr_reader :archive, :output_dir, :workers, :stats
+      def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
+        @archive = archive
+        @output_dir = output_dir
+        @workers = [workers, 1].max # At least 1 worker
+        @preserve_paths = options.fetch(:preserve_paths, true)
+        @overwrite = options.fetch(:overwrite, false)
+        @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
+      end
+      # Extract all files from archive
+      #
+      # @return [Hash] Extraction statistics
+      def extract_all
+        FileUtils.mkdir_p(@output_dir)
+        # Create work items for all files
+        work_items = @archive.files.map do |file|
+          FileExtractionWork.new(
+            file,
+            output_dir: @output_dir,
+            preserve_paths: @preserve_paths,
+            overwrite: @overwrite,
+          )
+        end
+        # Create supervisor with workers
+        supervisor = Fractor::Supervisor.new(
+          worker_pools: [
+            {
+              worker_class: FileExtractionWorker,
+              num_workers: @workers,
+            },
+          ],
+        )
+        # Add all work items
+        supervisor.add_work_items(work_items)
+        # Run extraction
+        supervisor.run
+        # Collect results
+        collect_stats(supervisor.results)
+        @stats
+      end
+      # Extract files with progress callback
+      #
+      # @yield [current, total, file] Progress callback
+      # @return [Hash] Extraction statistics
+      def extract_with_progress(&block)
+        return extract_all unless block
+        FileUtils.mkdir_p(@output_dir)
+        # For progress tracking, we need to process in batches
+        # or use a custom approach since Fractor doesn't have built-in callbacks
+        total = @archive.files.count
+        current = 0
+        # Sequential mode uses simple iteration with progress
+        if @workers == 1
+          @archive.files.each do |file|
+            extract_single_file(file)
+            current += 1
+            yield(current, total, file)
+          end
+          return @stats
+        end
+        # Parallel mode: batch files for progress updates
+        batch_size = [@archive.files.count / @workers, 1].max
+        batches = @archive.files.each_slice(batch_size).to_a
+        batches.each do |batch|
+          work_items = batch.map do |file|
+            FileExtractionWork.new(
+              file,
+              output_dir: @output_dir,
+              preserve_paths: @preserve_paths,
+              overwrite: @overwrite,
+            )
+          end
+          supervisor = Fractor::Supervisor.new(
+            worker_pools: [
+              {
+                worker_class: FileExtractionWorker,
+                num_workers: @workers,
+              },
+            ],
+          )
+          supervisor.add_work_items(work_items)
+          supervisor.run
+          batch.each do |file|
+            current += 1
+            yield(current, total, file)
+          end
+        end
+        @stats
+      end
+      private
+      # Extract a single file (for sequential mode with progress)
+      #
+      # @param file [Object] File to extract
+      # @return [Object] Result from worker
+      def extract_single_file(file)
+        work = FileExtractionWork.new(
+          file,
+          output_dir: @output_dir,
+          preserve_paths: @preserve_paths,
+          overwrite: @overwrite,
+        )
+        worker = FileExtractionWorker.new
+        result = worker.process(work)
+        update_stats_from_result(result)
+        result
+      end
+      # Collect statistics from Fractor results
+      #
+      # @param results [Fractor::Results] Results from supervisor
+      def collect_stats(results)
+        results.results.each do |result|
+          update_stats_from_result(result)
+        end
+      end
+      # Update stats from a single work result
+      #
+      # @param result [Fractor::WorkResult] Result from worker
+      def update_stats_from_result(result)
+        if result.success?
+          data = result.result
+          if data.is_a?(Hash) && data[:status] == :skipped
+            @stats[:skipped] += 1
+          else
+            @stats[:extracted] += 1
+            @stats[:bytes] += data[:size] if data.is_a?(Hash) && data[:size]
+          end
+        else
+          @stats[:failed] += 1
+        end
+      end
+    end
+  end
+end