RubyGems - cabriolet - Versions diffs - 0.2.0 → 0.2.1 - Mend

cabriolet 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/README.adoc +3 -0
data/lib/cabriolet/binary/bitstream.rb +32 -21
data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
data/lib/cabriolet/cab/compressor.rb +85 -53
data/lib/cabriolet/cab/decompressor.rb +2 -1
data/lib/cabriolet/cab/extractor.rb +2 -35
data/lib/cabriolet/cab/file_compression_work.rb +52 -0
data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
data/lib/cabriolet/checksum.rb +49 -0
data/lib/cabriolet/collections/file_collection.rb +175 -0
data/lib/cabriolet/compressors/quantum.rb +3 -51
data/lib/cabriolet/decompressors/quantum.rb +81 -52
data/lib/cabriolet/extraction/base_extractor.rb +88 -0
data/lib/cabriolet/extraction/extractor.rb +171 -0
data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
data/lib/cabriolet/format_base.rb +79 -0
data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
data/lib/cabriolet/huffman/encoder.rb +15 -12
data/lib/cabriolet/lit/compressor.rb +45 -689
data/lib/cabriolet/lit/content_encoder.rb +76 -0
data/lib/cabriolet/lit/content_type_detector.rb +50 -0
data/lib/cabriolet/lit/directory_builder.rb +153 -0
data/lib/cabriolet/lit/guid_generator.rb +16 -0
data/lib/cabriolet/lit/header_writer.rb +124 -0
data/lib/cabriolet/lit/piece_builder.rb +74 -0
data/lib/cabriolet/lit/structure_builder.rb +252 -0
data/lib/cabriolet/quantum_shared.rb +105 -0
data/lib/cabriolet/version.rb +1 -1
data/lib/cabriolet.rb +114 -3
metadata +38 -4
data/lib/cabriolet/auto.rb +0 -173
data/lib/cabriolet/parallel.rb +0 -333

data/lib/cabriolet/auto.rb DELETED Viewed

@@ -1,173 +0,0 @@
-# frozen_string_literal: true
-require_relative "format_detector"
-module Cabriolet
-  # Auto-detection and extraction module
-  module Auto
-    class << self
-      # Open and parse an archive with automatic format detection
-      #
-      # @param path [String] Path to the archive file
-      # @param options [Hash] Options to pass to the parser
-      # @return [Object] Parsed archive object
-      # @raise [UnsupportedFormatError] if format cannot be detected or is unsupported
-      #
-      # @example
-      #   archive = Cabriolet::Auto.open('unknown.archive')
-      #   archive.files.each { |f| puts f.name }
-      def open(path, **options)
-        format = FormatDetector.detect(path)
-        unless format
-          raise UnsupportedFormatError,
-                "Unable to detect format for: #{path}"
-        end
-        parser_class = FormatDetector.format_to_parser(format)
-        unless parser_class
-          raise UnsupportedFormatError,
-                "No parser available for format: #{format}"
-        end
-        parser_class.new(**options).parse(path)
-      end
-      # Detect format and extract all files automatically
-      #
-      # @param archive_path [String] Path to the archive
-      # @param output_dir [String] Directory to extract to
-      # @param options [Hash] Extraction options
-      # @option options [Boolean] :preserve_paths (true) Preserve directory structure
-      # @option options [Boolean] :overwrite (false) Overwrite existing files
-      # @option options [Boolean] :parallel (false) Use parallel extraction
-      # @option options [Integer] :workers (4) Number of parallel workers
-      # @return [Hash] Extraction statistics
-      #
-      # @example
-      #   Cabriolet::Auto.extract('archive.cab', 'output/')
-      #   Cabriolet::Auto.extract('file.chm', 'docs/', parallel: true, workers: 8)
-      def extract(archive_path, output_dir, **options)
-        archive = open(archive_path)
-        extractor = if options[:parallel]
-                      ParallelExtractor.new(archive, output_dir, **options)
-                    else
-                      SimpleExtractor.new(archive, output_dir, **options)
-                    end
-        extractor.extract_all
-      end
-      # Detect format only without parsing
-      #
-      # @param path [String] Path to the file
-      # @return [Symbol, nil] Detected format symbol or nil
-      #
-      # @example
-      #   format = Cabriolet::Auto.detect_format('file.cab')
-      #   # => :cab
-      def detect_format(path)
-        FormatDetector.detect(path)
-      end
-      # Get information about an archive without full extraction
-      #
-      # @param path [String] Path to the archive
-      # @return [Hash] Archive information
-      #
-      # @example
-      #   info = Cabriolet::Auto.info('archive.cab')
-      #   # => { format: :cab, file_count: 145, total_size: 52428800, ... }
-      def info(path)
-        archive = open(path)
-        format = detect_format(path)
-        {
-          format: format,
-          path: path,
-          file_count: archive.files.count,
-          total_size: archive.files.sum { |f| f.size || 0 },
-          compressed_size: File.size(path),
-          compression_ratio: calculate_compression_ratio(archive, path),
-          files: archive.files.map { |f| file_info(f) },
-        }
-      end
-      private
-      def calculate_compression_ratio(archive, path)
-        total_uncompressed = archive.files.sum { |f| f.size || 0 }
-        compressed = File.size(path)
-        return 0 if total_uncompressed.zero?
-        ((compressed.to_f / total_uncompressed) * 100).round(2)
-      end
-      def file_info(file)
-        {
-          name: file.name,
-          size: file.size,
-          compressed_size: file.respond_to?(:compressed_size) ? file.compressed_size : nil,
-          attributes: file.respond_to?(:attributes) ? file.attributes : nil,
-          date: file.respond_to?(:date) ? file.date : nil,
-          time: file.respond_to?(:time) ? file.time : nil,
-        }
-      end
-    end
-    # Simple sequential extractor
-    class SimpleExtractor
-      def initialize(archive, output_dir, **options)
-        @archive = archive
-        @output_dir = output_dir
-        @options = options
-        @preserve_paths = options.fetch(:preserve_paths, true)
-        @overwrite = options.fetch(:overwrite, false)
-        @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
-      end
-      def extract_all
-        FileUtils.mkdir_p(@output_dir)
-        @archive.files.each do |file|
-          extract_file(file)
-        end
-        @stats
-      end
-      private
-      def extract_file(file)
-        output_path = build_output_path(file.name)
-        if File.exist?(output_path) && !@overwrite
-          @stats[:skipped] += 1
-          return
-        end
-        FileUtils.mkdir_p(File.dirname(output_path))
-        File.write(output_path, file.data, mode: "wb")
-        @stats[:extracted] += 1
-        @stats[:bytes] += file.data.bytesize
-      rescue StandardError => e
-        @stats[:failed] += 1
-        warn "Failed to extract #{file.name}: #{e.message}"
-      end
-      def build_output_path(filename)
-        if @preserve_paths
-          # Keep directory structure
-          clean_name = filename.gsub("\\", "/")
-          File.join(@output_dir, clean_name)
-        else
-          # Flatten to output directory
-          base_name = File.basename(filename.gsub("\\", "/"))
-          File.join(@output_dir, base_name)
-        end
-      end
-    end
-  end
-end

data/lib/cabriolet/parallel.rb DELETED Viewed

@@ -1,333 +0,0 @@
-# frozen_string_literal: true
-module Cabriolet
-  # Parallel extraction for multi-core performance
-  module Parallel
-    # Parallel extractor for archives
-    class Extractor
-      DEFAULT_WORKERS = 4
-      def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
-        @archive = archive
-        @output_dir = output_dir
-        @workers = [workers, 1].max # At least 1 worker
-        @options = options
-        @preserve_paths = options.fetch(:preserve_paths, true)
-        @overwrite = options.fetch(:overwrite, false)
-        @queue = Queue.new
-        @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
-        @stats_mutex = Mutex.new
-      end
-      # Extract all files using parallel workers
-      #
-      # @return [Hash] Extraction statistics
-      #
-      # @example
-      #   extractor = Cabriolet::Parallel::Extractor.new(cab, 'output/', workers: 8)
-      #   stats = extractor.extract_all
-      def extract_all
-        FileUtils.mkdir_p(@output_dir)
-        # Queue all files
-        @archive.files.each { |file| @queue << file }
-        # Add termination signals
-        @workers.times { @queue << :done }
-        # Start worker threads
-        threads = Array.new(@workers) do |worker_id|
-          Thread.new { worker_loop(worker_id) }
-        end
-        # Wait for all workers to complete
-        threads.each(&:join)
-        @stats
-      end
-      # Extract files with progress callback
-      #
-      # @yield [current, total, file] Progress callback
-      # @return [Hash] Extraction statistics
-      #
-      # @example
-      #   extractor.extract_with_progress do |current, total, file|
-      #     puts "#{current}/#{total}: #{file.name}"
-      #   end
-      def extract_with_progress(&block)
-        return extract_all unless block
-        total = @archive.files.count
-        current = 0
-        current_mutex = Mutex.new
-        FileUtils.mkdir_p(@output_dir)
-        # Queue all files
-        @archive.files.each { |file| @queue << file }
-        @workers.times { @queue << :done }
-        # Start worker threads with progress
-        threads = Array.new(@workers) do |_worker_id|
-          Thread.new do
-            loop do
-              file = @queue.pop
-              break if file == :done
-              extract_file(file)
-              current_mutex.synchronize do
-                current += 1
-                yield(current, total, file)
-              end
-            end
-          end
-        end
-        threads.each(&:join)
-        @stats
-      end
-      private
-      def worker_loop(_worker_id)
-        loop do
-          file = @queue.pop
-          break if file == :done
-          extract_file(file)
-        end
-      end
-      def extract_file(file)
-        output_path = build_output_path(file.name)
-        if File.exist?(output_path) && !@overwrite
-          update_stats(:skipped)
-          return
-        end
-        begin
-          # Create directory (thread-safe)
-          FileUtils.mkdir_p(File.dirname(output_path))
-          # Extract file data
-          data = file.data
-          # Write file (one at a time per file)
-          File.write(output_path, data, mode: "wb")
-          # Preserve timestamps if available
-          if file.respond_to?(:datetime) && file.datetime
-            File.utime(File.atime(output_path), file.datetime, output_path)
-          end
-          update_stats(:extracted, data.bytesize)
-        rescue StandardError => e
-          update_stats(:failed)
-          warn "Worker error extracting #{file.name}: #{e.message}"
-        end
-      end
-      def build_output_path(filename)
-        if @preserve_paths
-          clean_name = filename.gsub("\\", "/")
-          File.join(@output_dir, clean_name)
-        else
-          base_name = File.basename(filename.gsub("\\", "/"))
-          File.join(@output_dir, base_name)
-        end
-      end
-      def update_stats(stat_type, bytes = 0)
-        @stats_mutex.synchronize do
-          @stats[stat_type] += 1
-          @stats[:bytes] += bytes if bytes.positive?
-        end
-      end
-    end
-    # Parallel batch processor
-    class BatchProcessor
-      def initialize(workers: Extractor::DEFAULT_WORKERS)
-        @workers = workers
-        @stats = { total: 0, successful: 0, failed: 0 }
-        @stats_mutex = Mutex.new
-      end
-      # Process multiple archives in parallel
-      #
-      # @param archive_paths [Array<String>] Paths to archives
-      # @param output_base [String] Base output directory
-      # @yield [archive_path, stats] Optional callback per archive
-      # @return [Hash] Overall statistics
-      #
-      # @example
-      #   processor = Cabriolet::Parallel::BatchProcessor.new(workers: 8)
-      #   stats = processor.process_all(Dir.glob('*.cab'), 'output/')
-      def process_all(archive_paths, output_base, &block)
-        queue = Queue.new
-        archive_paths.each { |path| queue << path }
-        @workers.times { queue << :done }
-        threads = Array.new(@workers) do
-          Thread.new { process_loop(queue, output_base, &block) }
-        end
-        threads.each(&:join)
-        @stats
-      end
-      private
-      def process_loop(queue, output_base, &block)
-        loop do
-          archive_path = queue.pop
-          break if archive_path == :done
-          process_one(archive_path, output_base, &block)
-        end
-      end
-      def process_one(archive_path, output_base)
-        update_stats(:total)
-        begin
-          archive = Cabriolet::Auto.open(archive_path)
-          output_dir = File.join(output_base, File.basename(archive_path, ".*"))
-          extractor = Extractor.new(archive, output_dir, workers: 2)
-          stats = extractor.extract_all
-          update_stats(:successful)
-          yield(archive_path, stats) if block_given?
-        rescue StandardError => e
-          update_stats(:failed)
-          warn "Failed to process #{archive_path}: #{e.message}"
-        end
-      end
-      def update_stats(stat_type)
-        @stats_mutex.synchronize do
-          @stats[stat_type] += 1
-        end
-      end
-      attr_reader :stats
-    end
-    # Thread pool for custom parallel operations
-    class ThreadPool
-      def initialize(size: Extractor::DEFAULT_WORKERS)
-        @size = size
-        @queue = Queue.new
-        @threads = []
-        @running = false
-      end
-      # Start the thread pool
-      def start
-        return if @running
-        @running = true
-        @threads = Array.new(@size) do
-          Thread.new { worker_loop }
-        end
-      end
-      # Submit a task to the pool
-      #
-      # @yield Task to execute
-      def submit(&block)
-        start unless @running
-        @queue << block
-      end
-      # Shutdown the thread pool
-      #
-      # @param wait [Boolean] Wait for pending tasks to complete
-      def shutdown(wait: true)
-        return unless @running
-        if wait
-          # Wait for queue to empty
-          sleep 0.01 until @queue.empty?
-        end
-        # Send termination signals
-        @size.times { @queue << :shutdown }
-        # Wait for threads to finish
-        @threads.each(&:join)
-        @threads.clear
-        @running = false
-      end
-      # Execute tasks in parallel with automatic cleanup
-      #
-      # @param items [Array] Items to process
-      # @yield [item] Process each item
-      # @return [Array] Results from each task
-      def map(items)
-        start
-        results = []
-        results_mutex = Mutex.new
-        items.each_with_index do |item, index|
-          submit do
-            result = yield(item)
-            results_mutex.synchronize do
-              results[index] = result
-            end
-          end
-        end
-        shutdown(wait: true)
-        results
-      end
-      private
-      def worker_loop
-        loop do
-          task = @queue.pop
-          break if task == :shutdown
-          begin
-            task.call
-          rescue StandardError => e
-            warn "Thread pool worker error: #{e.message}"
-          end
-        end
-      end
-    end
-    class << self
-      # Extract archive using parallel workers
-      #
-      # @param archive [Object] Archive object
-      # @param output_dir [String] Output directory
-      # @param workers [Integer] Number of parallel workers
-      # @return [Hash] Extraction statistics
-      def extract(archive, output_dir, workers: Extractor::DEFAULT_WORKERS,
-**options)
-        extractor = Extractor.new(archive, output_dir, workers: workers,
-                                                       **options)
-        extractor.extract_all
-      end
-      # Process multiple archives in parallel
-      #
-      # @param paths [Array<String>] Archive paths
-      # @param output_base [String] Base output directory
-      # @param workers [Integer] Number of parallel workers
-      # @return [Hash] Processing statistics
-      def process_batch(paths, output_base, workers: Extractor::DEFAULT_WORKERS)
-        processor = BatchProcessor.new(workers: workers)
-        processor.process_all(paths, output_base)
-      end
-    end
-  end
-end