RubyGems - cabriolet - Versions diffs - 0.1.0 - Mend

cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +7 -0
data/ARCHITECTURE.md +799 -0
data/CHANGELOG.md +44 -0
data/LICENSE +29 -0
data/README.adoc +1207 -0
data/exe/cabriolet +6 -0
data/lib/cabriolet/auto.rb +173 -0
data/lib/cabriolet/binary/bitstream.rb +148 -0
data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
data/lib/cabriolet/binary/chm_structures.rb +213 -0
data/lib/cabriolet/binary/hlp_structures.rb +66 -0
data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
data/lib/cabriolet/binary/lit_structures.rb +107 -0
data/lib/cabriolet/binary/oab_structures.rb +112 -0
data/lib/cabriolet/binary/structures.rb +56 -0
data/lib/cabriolet/binary/szdd_structures.rb +60 -0
data/lib/cabriolet/cab/compressor.rb +382 -0
data/lib/cabriolet/cab/decompressor.rb +510 -0
data/lib/cabriolet/cab/extractor.rb +357 -0
data/lib/cabriolet/cab/parser.rb +264 -0
data/lib/cabriolet/chm/compressor.rb +513 -0
data/lib/cabriolet/chm/decompressor.rb +436 -0
data/lib/cabriolet/chm/parser.rb +254 -0
data/lib/cabriolet/cli.rb +776 -0
data/lib/cabriolet/compressors/base.rb +34 -0
data/lib/cabriolet/compressors/lzss.rb +250 -0
data/lib/cabriolet/compressors/lzx.rb +581 -0
data/lib/cabriolet/compressors/mszip.rb +315 -0
data/lib/cabriolet/compressors/quantum.rb +446 -0
data/lib/cabriolet/constants.rb +75 -0
data/lib/cabriolet/decompressors/base.rb +39 -0
data/lib/cabriolet/decompressors/lzss.rb +138 -0
data/lib/cabriolet/decompressors/lzx.rb +726 -0
data/lib/cabriolet/decompressors/mszip.rb +390 -0
data/lib/cabriolet/decompressors/none.rb +27 -0
data/lib/cabriolet/decompressors/quantum.rb +456 -0
data/lib/cabriolet/errors.rb +39 -0
data/lib/cabriolet/format_detector.rb +156 -0
data/lib/cabriolet/hlp/compressor.rb +272 -0
data/lib/cabriolet/hlp/decompressor.rb +198 -0
data/lib/cabriolet/hlp/parser.rb +131 -0
data/lib/cabriolet/huffman/decoder.rb +79 -0
data/lib/cabriolet/huffman/encoder.rb +108 -0
data/lib/cabriolet/huffman/tree.rb +138 -0
data/lib/cabriolet/kwaj/compressor.rb +479 -0
data/lib/cabriolet/kwaj/decompressor.rb +237 -0
data/lib/cabriolet/kwaj/parser.rb +183 -0
data/lib/cabriolet/lit/compressor.rb +255 -0
data/lib/cabriolet/lit/decompressor.rb +250 -0
data/lib/cabriolet/models/cabinet.rb +81 -0
data/lib/cabriolet/models/chm_file.rb +28 -0
data/lib/cabriolet/models/chm_header.rb +67 -0
data/lib/cabriolet/models/chm_section.rb +38 -0
data/lib/cabriolet/models/file.rb +119 -0
data/lib/cabriolet/models/folder.rb +102 -0
data/lib/cabriolet/models/folder_data.rb +21 -0
data/lib/cabriolet/models/hlp_file.rb +45 -0
data/lib/cabriolet/models/hlp_header.rb +37 -0
data/lib/cabriolet/models/kwaj_header.rb +98 -0
data/lib/cabriolet/models/lit_header.rb +55 -0
data/lib/cabriolet/models/oab_header.rb +95 -0
data/lib/cabriolet/models/szdd_header.rb +72 -0
data/lib/cabriolet/modifier.rb +326 -0
data/lib/cabriolet/oab/compressor.rb +353 -0
data/lib/cabriolet/oab/decompressor.rb +315 -0
data/lib/cabriolet/parallel.rb +333 -0
data/lib/cabriolet/repairer.rb +288 -0
data/lib/cabriolet/streaming.rb +221 -0
data/lib/cabriolet/system/file_handle.rb +107 -0
data/lib/cabriolet/system/io_system.rb +87 -0
data/lib/cabriolet/system/memory_handle.rb +105 -0
data/lib/cabriolet/szdd/compressor.rb +217 -0
data/lib/cabriolet/szdd/decompressor.rb +184 -0
data/lib/cabriolet/szdd/parser.rb +127 -0
data/lib/cabriolet/validator.rb +332 -0
data/lib/cabriolet/version.rb +5 -0
data/lib/cabriolet.rb +104 -0
metadata +157 -0

data/lib/cabriolet/parallel.rb ADDED Viewed

@@ -0,0 +1,333 @@
+# frozen_string_literal: true
+module Cabriolet
+  # Parallel extraction for multi-core performance
+  module Parallel
+    # Parallel extractor for archives
+    class Extractor
+      DEFAULT_WORKERS = 4
+      def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
+        @archive = archive
+        @output_dir = output_dir
+        @workers = [workers, 1].max # At least 1 worker
+        @options = options
+        @preserve_paths = options.fetch(:preserve_paths, true)
+        @overwrite = options.fetch(:overwrite, false)
+        @queue = Queue.new
+        @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
+        @stats_mutex = Mutex.new
+      end
+      # Extract all files using parallel workers
+      #
+      # @return [Hash] Extraction statistics
+      #
+      # @example
+      #   extractor = Cabriolet::Parallel::Extractor.new(cab, 'output/', workers: 8)
+      #   stats = extractor.extract_all
+      def extract_all
+        FileUtils.mkdir_p(@output_dir)
+        # Queue all files
+        @archive.files.each { |file| @queue << file }
+        # Add termination signals
+        @workers.times { @queue << :done }
+        # Start worker threads
+        threads = Array.new(@workers) do |worker_id|
+          Thread.new { worker_loop(worker_id) }
+        end
+        # Wait for all workers to complete
+        threads.each(&:join)
+        @stats
+      end
+      # Extract files with progress callback
+      #
+      # @yield [current, total, file] Progress callback
+      # @return [Hash] Extraction statistics
+      #
+      # @example
+      #   extractor.extract_with_progress do |current, total, file|
+      #     puts "#{current}/#{total}: #{file.name}"
+      #   end
+      def extract_with_progress(&block)
+        return extract_all unless block
+        total = @archive.files.count
+        current = 0
+        current_mutex = Mutex.new
+        FileUtils.mkdir_p(@output_dir)
+        # Queue all files
+        @archive.files.each { |file| @queue << file }
+        @workers.times { @queue << :done }
+        # Start worker threads with progress
+        threads = Array.new(@workers) do |_worker_id|
+          Thread.new do
+            loop do
+              file = @queue.pop
+              break if file == :done
+              extract_file(file)
+              current_mutex.synchronize do
+                current += 1
+                yield(current, total, file)
+              end
+            end
+          end
+        end
+        threads.each(&:join)
+        @stats
+      end
+      private
+      def worker_loop(_worker_id)
+        loop do
+          file = @queue.pop
+          break if file == :done
+          extract_file(file)
+        end
+      end
+      def extract_file(file)
+        output_path = build_output_path(file.name)
+        if File.exist?(output_path) && !@overwrite
+          update_stats(:skipped)
+          return
+        end
+        begin
+          # Create directory (thread-safe)
+          FileUtils.mkdir_p(File.dirname(output_path))
+          # Extract file data
+          data = file.data
+          # Write file (one at a time per file)
+          File.write(output_path, data, mode: "wb")
+          # Preserve timestamps if available
+          if file.respond_to?(:datetime) && file.datetime
+            File.utime(File.atime(output_path), file.datetime, output_path)
+          end
+          update_stats(:extracted, data.bytesize)
+        rescue StandardError => e
+          update_stats(:failed)
+          warn "Worker error extracting #{file.name}: #{e.message}"
+        end
+      end
+      def build_output_path(filename)
+        if @preserve_paths
+          clean_name = filename.gsub("\\", "/")
+          File.join(@output_dir, clean_name)
+        else
+          base_name = File.basename(filename.gsub("\\", "/"))
+          File.join(@output_dir, base_name)
+        end
+      end
+      def update_stats(stat_type, bytes = 0)
+        @stats_mutex.synchronize do
+          @stats[stat_type] += 1
+          @stats[:bytes] += bytes if bytes.positive?
+        end
+      end
+    end
+    # Parallel batch processor
+    class BatchProcessor
+      def initialize(workers: Extractor::DEFAULT_WORKERS)
+        @workers = workers
+        @stats = { total: 0, successful: 0, failed: 0 }
+        @stats_mutex = Mutex.new
+      end
+      # Process multiple archives in parallel
+      #
+      # @param archive_paths [Array<String>] Paths to archives
+      # @param output_base [String] Base output directory
+      # @yield [archive_path, stats] Optional callback per archive
+      # @return [Hash] Overall statistics
+      #
+      # @example
+      #   processor = Cabriolet::Parallel::BatchProcessor.new(workers: 8)
+      #   stats = processor.process_all(Dir.glob('*.cab'), 'output/')
+      def process_all(archive_paths, output_base, &block)
+        queue = Queue.new
+        archive_paths.each { |path| queue << path }
+        @workers.times { queue << :done }
+        threads = Array.new(@workers) do
+          Thread.new { process_loop(queue, output_base, &block) }
+        end
+        threads.each(&:join)
+        @stats
+      end
+      private
+      def process_loop(queue, output_base, &block)
+        loop do
+          archive_path = queue.pop
+          break if archive_path == :done
+          process_one(archive_path, output_base, &block)
+        end
+      end
+      def process_one(archive_path, output_base)
+        update_stats(:total)
+        begin
+          archive = Cabriolet::Auto.open(archive_path)
+          output_dir = File.join(output_base, File.basename(archive_path, ".*"))
+          extractor = Extractor.new(archive, output_dir, workers: 2)
+          stats = extractor.extract_all
+          update_stats(:successful)
+          yield(archive_path, stats) if block_given?
+        rescue StandardError => e
+          update_stats(:failed)
+          warn "Failed to process #{archive_path}: #{e.message}"
+        end
+      end
+      def update_stats(stat_type)
+        @stats_mutex.synchronize do
+          @stats[stat_type] += 1
+        end
+      end
+      attr_reader :stats
+    end
+    # Thread pool for custom parallel operations
+    class ThreadPool
+      def initialize(size: Extractor::DEFAULT_WORKERS)
+        @size = size
+        @queue = Queue.new
+        @threads = []
+        @running = false
+      end
+      # Start the thread pool
+      def start
+        return if @running
+        @running = true
+        @threads = Array.new(@size) do
+          Thread.new { worker_loop }
+        end
+      end
+      # Submit a task to the pool
+      #
+      # @yield Task to execute
+      def submit(&block)
+        start unless @running
+        @queue << block
+      end
+      # Shutdown the thread pool
+      #
+      # @param wait [Boolean] Wait for pending tasks to complete
+      def shutdown(wait: true)
+        return unless @running
+        if wait
+          # Wait for queue to empty
+          sleep 0.01 until @queue.empty?
+        end
+        # Send termination signals
+        @size.times { @queue << :shutdown }
+        # Wait for threads to finish
+        @threads.each(&:join)
+        @threads.clear
+        @running = false
+      end
+      # Execute tasks in parallel with automatic cleanup
+      #
+      # @param items [Array] Items to process
+      # @yield [item] Process each item
+      # @return [Array] Results from each task
+      def map(items)
+        start
+        results = []
+        results_mutex = Mutex.new
+        items.each_with_index do |item, index|
+          submit do
+            result = yield(item)
+            results_mutex.synchronize do
+              results[index] = result
+            end
+          end
+        end
+        shutdown(wait: true)
+        results
+      end
+      private
+      def worker_loop
+        loop do
+          task = @queue.pop
+          break if task == :shutdown
+          begin
+            task.call
+          rescue StandardError => e
+            warn "Thread pool worker error: #{e.message}"
+          end
+        end
+      end
+    end
+    class << self
+      # Extract archive using parallel workers
+      #
+      # @param archive [Object] Archive object
+      # @param output_dir [String] Output directory
+      # @param workers [Integer] Number of parallel workers
+      # @return [Hash] Extraction statistics
+      def extract(archive, output_dir, workers: Extractor::DEFAULT_WORKERS,
+**options)
+        extractor = Extractor.new(archive, output_dir, workers: workers,
+                                                       **options)
+        extractor.extract_all
+      end
+      # Process multiple archives in parallel
+      #
+      # @param paths [Array<String>] Archive paths
+      # @param output_base [String] Base output directory
+      # @param workers [Integer] Number of parallel workers
+      # @return [Hash] Processing statistics
+      def process_batch(paths, output_base, workers: Extractor::DEFAULT_WORKERS)
+        processor = BatchProcessor.new(workers: workers)
+        processor.process_all(paths, output_base)
+      end
+    end
+  end
+end

data/lib/cabriolet/repairer.rb ADDED Viewed

@@ -0,0 +1,288 @@
+# frozen_string_literal: true
+module Cabriolet
+  # Archive repair and recovery functionality
+  class Repairer
+    def initialize(path, **options)
+      @path = path
+      @options = options
+      @format = FormatDetector.detect(path)
+      @recovery_stats = { recovered: 0, failed: 0, partial: 0 }
+    end
+    # Attempt to repair the archive
+    #
+    # @param output [String] Output path for repaired archive
+    # @param options [Hash] Repair options
+    # @option options [Boolean] :salvage_mode (true) Enable salvage mode
+    # @option options [Boolean] :skip_corrupted (true) Skip corrupted files
+    # @option options [Boolean] :rebuild_index (true) Rebuild file index
+    # @return [RepairReport] Repair report
+    #
+    # @example
+    #   repairer = Cabriolet::Repairer.new('corrupted.cab')
+    #   report = repairer.repair(output: 'repaired.cab')
+    def repair(output:, **options)
+      salvage_mode = options.fetch(:salvage_mode, true)
+      skip_corrupted = options.fetch(:skip_corrupted, true)
+      rebuild_index = options.fetch(:rebuild_index, true)
+      begin
+        # Parse with salvage mode enabled
+        parser_class = FormatDetector.format_to_parser(@format)
+        unless parser_class
+          raise UnsupportedFormatError,
+                "No parser for format: #{@format}"
+        end
+        archive = parser_class.new(
+          salvage_mode: salvage_mode,
+          skip_checksum: true,
+          continue_on_error: true,
+        ).parse(@path)
+        # Extract recoverable files
+        recovered_files = extract_recoverable_files(archive, skip_corrupted)
+        # Rebuild archive
+        rebuild_archive(recovered_files, output) if rebuild_index
+        RepairReport.new(
+          success: true,
+          original_file: @path,
+          repaired_file: output,
+          stats: @recovery_stats,
+          recovered_files: recovered_files.map(&:name),
+        )
+      rescue StandardError => e
+        RepairReport.new(
+          success: false,
+          original_file: @path,
+          repaired_file: output,
+          stats: @recovery_stats,
+          error: e.message,
+        )
+      end
+    end
+    # Salvage files from corrupted archive
+    #
+    # @param output_dir [String] Directory to save recovered files
+    # @return [SalvageReport] Salvage report with statistics
+    #
+    # @example
+    #   repairer = Cabriolet::Repairer.new('corrupted.cab')
+    #   report = repairer.salvage(output_dir: 'recovered/')
+    def salvage(output_dir:)
+      FileUtils.mkdir_p(output_dir)
+      parser_class = FormatDetector.format_to_parser(@format)
+      archive = parser_class.new(
+        salvage_mode: true,
+        skip_checksum: true,
+        continue_on_error: true,
+      ).parse(@path)
+      salvaged_files = []
+      archive.files.each do |file|
+        data = file.data
+        output_path = File.join(output_dir, sanitize_filename(file.name))
+        FileUtils.mkdir_p(File.dirname(output_path))
+        File.write(output_path, data, mode: "wb")
+        @recovery_stats[:recovered] += 1
+        salvaged_files << file.name
+      rescue StandardError => e
+        @recovery_stats[:failed] += 1
+        warn "Could not salvage #{file.name}: #{e.message}"
+      end
+      SalvageReport.new(
+        output_dir: output_dir,
+        stats: @recovery_stats,
+        salvaged_files: salvaged_files,
+      )
+    end
+    private
+    def extract_recoverable_files(archive, skip_corrupted)
+      recovered = []
+      archive.files.each do |file|
+        # Try to decompress file data
+        data = file.data
+        # Verify data integrity if possible
+        if file.respond_to?(:size) && data.bytesize == file.size
+          recovered << RecoveredFile.new(file, data, :complete)
+          @recovery_stats[:recovered] += 1
+        elsif skip_corrupted
+          @recovery_stats[:failed] += 1
+        else
+          recovered << RecoveredFile.new(file, data, :partial)
+          @recovery_stats[:partial] += 1
+        end
+      rescue StandardError => e
+        @recovery_stats[:failed] += 1
+        warn "Failed to recover #{file.name}: #{e.message}" unless skip_corrupted
+      end
+      recovered
+    end
+    def rebuild_archive(files, output_path)
+      # Rebuild based on format
+      case @format
+      when :cab
+        rebuild_cab(files, output_path)
+      else
+        # For other formats, just extract the files
+        # Full rebuild may not be supported
+        raise UnsupportedOperationError, "Rebuild not supported for #{@format}"
+      end
+    end
+    def rebuild_cab(files, output_path)
+      require_relative "cab/compressor"
+      compressor = CAB::Compressor.new(
+        output: output_path,
+        compression: :mszip, # Use safe compression
+      )
+      files.each do |recovered_file|
+        compressor.add_file_data(
+          recovered_file.name,
+          recovered_file.data,
+          attributes: recovered_file.attributes,
+          date: recovered_file.date,
+          time: recovered_file.time,
+        )
+      end
+      compressor.compress
+    end
+    def sanitize_filename(filename)
+      # Remove path traversal attempts and dangerous characters
+      filename.gsub("\\", "/").gsub("..", "_").gsub(%r{^/}, "")
+    end
+    # Recovered file wrapper
+    class RecoveredFile
+      attr_reader :name, :data, :status, :attributes, :date, :time
+      def initialize(original_file, data, status)
+        @name = original_file.name
+        @data = data
+        @status = status # :complete or :partial
+        @attributes = original_file.attributes if original_file.respond_to?(:attributes)
+        @date = original_file.date if original_file.respond_to?(:date)
+        @time = original_file.time if original_file.respond_to?(:time)
+      end
+      def complete?
+        @status == :complete
+      end
+      def partial?
+        @status == :partial
+      end
+    end
+  end
+  # Repair report
+  class RepairReport
+    attr_reader :success, :original_file, :repaired_file, :stats,
+                :recovered_files, :error
+    def initialize(success:, original_file:, repaired_file:, stats:,
+recovered_files: [], error: nil)
+      @success = success
+      @original_file = original_file
+      @repaired_file = repaired_file
+      @stats = stats
+      @recovered_files = recovered_files
+      @error = error
+    end
+    def success?
+      @success
+    end
+    def summary
+      if success?
+        "Repair successful: #{@stats[:recovered]} files recovered, #{@stats[:failed]} failed"
+      else
+        "Repair failed: #{@error}"
+      end
+    end
+    def detailed_report
+      report = ["=" * 60]
+      report << "Archive Repair Report"
+      report << ("=" * 60)
+      report << "Original: #{@original_file}"
+      report << "Repaired: #{@repaired_file}"
+      report << "Status: #{success? ? 'SUCCESS' : 'FAILED'}"
+      report << ""
+      report << "Statistics:"
+      report << "  Recovered: #{@stats[:recovered]}"
+      report << "  Partial:   #{@stats[:partial]}"
+      report << "  Failed:    #{@stats[:failed]}"
+      report << ""
+      if @error
+        report << "Error: #{@error}"
+        report << ""
+      end
+      if @recovered_files.any?
+        report << "Recovered Files:"
+        @recovered_files.each { |f| report << "  - #{f}" }
+        report << ""
+      end
+      report << ("=" * 60)
+      report.join("\n")
+    end
+  end
+  # Salvage report
+  class SalvageReport
+    attr_reader :output_dir, :stats, :salvaged_files
+    def initialize(output_dir:, stats:, salvaged_files:)
+      @output_dir = output_dir
+      @stats = stats
+      @salvaged_files = salvaged_files
+    end
+    def summary
+      "Salvaged #{@stats[:recovered]} files to #{@output_dir}, #{@stats[:failed]} failed"
+    end
+    def detailed_report
+      report = ["=" * 60]
+      report << "Salvage Operation Report"
+      report << ("=" * 60)
+      report << "Output Directory: #{@output_dir}"
+      report << ""
+      report << "Statistics:"
+      report << "  Salvaged: #{@stats[:recovered]}"
+      report << "  Failed:   #{@stats[:failed]}"
+      report << ""
+      if @salvaged_files.any?
+        report << "Salvaged Files:"
+        @salvaged_files.each { |f| report << "  - #{f}" }
+        report << ""
+      end
+      report << ("=" * 60)
+      report.join("\n")
+    end
+  end
+end