cabriolet 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +3 -0
  3. data/lib/cabriolet/binary/bitstream.rb +32 -21
  4. data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
  5. data/lib/cabriolet/cab/compressor.rb +85 -53
  6. data/lib/cabriolet/cab/decompressor.rb +2 -1
  7. data/lib/cabriolet/cab/extractor.rb +2 -35
  8. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  9. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  10. data/lib/cabriolet/checksum.rb +49 -0
  11. data/lib/cabriolet/collections/file_collection.rb +175 -0
  12. data/lib/cabriolet/compressors/quantum.rb +3 -51
  13. data/lib/cabriolet/decompressors/quantum.rb +81 -52
  14. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  15. data/lib/cabriolet/extraction/extractor.rb +171 -0
  16. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  17. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  18. data/lib/cabriolet/format_base.rb +79 -0
  19. data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
  20. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  21. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  22. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  23. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  24. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  25. data/lib/cabriolet/huffman/encoder.rb +15 -12
  26. data/lib/cabriolet/lit/compressor.rb +45 -689
  27. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  28. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  29. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  30. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  31. data/lib/cabriolet/lit/header_writer.rb +124 -0
  32. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  33. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  34. data/lib/cabriolet/quantum_shared.rb +105 -0
  35. data/lib/cabriolet/version.rb +1 -1
  36. data/lib/cabriolet.rb +114 -3
  37. metadata +38 -4
  38. data/lib/cabriolet/auto.rb +0 -173
  39. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Collections
5
+ # FileCollection manages a collection of files for compression
6
+ # Provides unified interface for adding files and preparing them for compression
7
+ class FileCollection
8
+ include Enumerable
9
+
10
+ # Initialize a new file collection
11
+ #
12
+ # @param format_options [Hash] Options specific to the archive format
13
+ def initialize(format_options = {})
14
+ @files = []
15
+ @format_options = format_options
16
+ end
17
+
18
+ # Add a file to the collection
19
+ #
20
+ # @param source_path [String] Path to the source file
21
+ # @param archive_path [String, nil] Path within the archive (defaults to basename)
22
+ # @param options [Hash] Additional options for this file
23
+ # @return [self] Returns self for chaining
24
+ #
25
+ # @example
26
+ # collection.add("README.md", "docs/README.md")
27
+ # collection.add("data.txt") # Uses basename
28
+ def add(source_path, archive_path = nil, **options)
29
+ validate_source(source_path)
30
+
31
+ @files << {
32
+ source: source_path,
33
+ archive: archive_path || ::File.basename(source_path),
34
+ options: options,
35
+ }
36
+
37
+ self
38
+ end
39
+
40
+ # Add multiple files at once
41
+ #
42
+ # @param files [Array<Hash>] Array of file hashes with :source, :archive, :options keys
43
+ # @return [self] Returns self for chaining
44
+ def add_all(files)
45
+ files.each do |file|
46
+ add(file[:source], file[:archive], **file.fetch(:options, {}))
47
+ end
48
+ self
49
+ end
50
+
51
+ # Iterate over files in the collection
52
+ #
53
+ # @yield [file_entry] Yields each file entry hash
54
+ # @return [Enumerator] If no block given
55
+ def each(&)
56
+ @files.each(&)
57
+ end
58
+
59
+ # Get the number of files in the collection
60
+ #
61
+ # @return [Integer] Number of files
62
+ def size
63
+ @files.size
64
+ end
65
+
66
+ # Check if collection is empty
67
+ #
68
+ # @return [Boolean] True if no files
69
+ def empty?
70
+ @files.empty?
71
+ end
72
+
73
+ # Clear all files from the collection
74
+ #
75
+ # @return [self] Returns self for chaining
76
+ def clear
77
+ @files.clear
78
+ self
79
+ end
80
+
81
+ # Prepare files for compression by reading metadata
82
+ #
83
+ # @return [Array<Hash>] Array of prepared file info hashes
84
+ def prepare_for_compression
85
+ @files.map do |file_entry|
86
+ prepare_file_info(file_entry)
87
+ end
88
+ end
89
+
90
+ # Get total uncompressed size of all files
91
+ #
92
+ # @return [Integer] Total size in bytes
93
+ def total_size
94
+ @files.sum { |f| ::File.size(f[:source]) }
95
+ end
96
+
97
+ # Group files by directory for archive organization
98
+ #
99
+ # @return [Hash] Hash with directory paths as keys and file arrays as values
100
+ def by_directory
101
+ @files.group_by do |file|
102
+ ::File.dirname(file[:archive])
103
+ end
104
+ end
105
+
106
+ # Find files by pattern in archive path
107
+ #
108
+ # @param pattern [String, Regexp] Pattern to match
109
+ # @return [Array<Hash>] Matching file entries
110
+ def find_by_pattern(pattern)
111
+ @files.select do |file|
112
+ if pattern.is_a?(Regexp)
113
+ file[:archive] =~ pattern
114
+ else
115
+ file[:archive].include?(pattern)
116
+ end
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ # Validate that source file exists and is accessible
123
+ #
124
+ # @param path [String] Path to validate
125
+ # @raise [ArgumentError] if file doesn't exist or isn't a regular file
126
+ def validate_source(path)
127
+ unless ::File.exist?(path)
128
+ raise ArgumentError, "File does not exist: #{path}"
129
+ end
130
+
131
+ unless ::File.file?(path)
132
+ raise ArgumentError, "Not a regular file: #{path}"
133
+ end
134
+ end
135
+
136
+ # Prepare file information for compression
137
+ #
138
+ # @param file_entry [Hash] Original file entry
139
+ # @return [Hash] Prepared file info with metadata
140
+ def prepare_file_info(file_entry)
141
+ stat = ::File.stat(file_entry[:source])
142
+
143
+ {
144
+ source_path: file_entry[:source],
145
+ archive_path: file_entry[:archive],
146
+ size: stat.size,
147
+ mtime: stat.mtime,
148
+ atime: stat.atime,
149
+ attributes: calculate_attributes(stat),
150
+ options: file_entry[:options],
151
+ }
152
+ end
153
+
154
+ # Calculate file attributes for archive format
155
+ #
156
+ # @param stat [File::Stat] File stat object
157
+ # @return [Integer] Attribute flags
158
+ def calculate_attributes(stat)
159
+ attribs = Constants::ATTRIB_ARCH
160
+
161
+ # Set read-only flag if not writable
162
+ attribs |= Constants::ATTRIB_READONLY unless stat.writable?
163
+
164
+ # Set hidden flag if hidden (Unix dotfiles)
165
+ basename = ::File.basename(@files.first[:source])
166
+ attribs |= Constants::ATTRIB_HIDDEN if basename.start_with?(".")
167
+
168
+ # Set system flag for system files
169
+ attribs |= Constants::ATTRIB_SYSTEM if stat.socket? || stat.symlink?
170
+
171
+ attribs
172
+ end
173
+ end
174
+ end
175
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../quantum_shared"
4
+
3
5
  module Cabriolet
4
6
  module Compressors
5
7
  # Quantum compresses data using arithmetic coding and LZ77-based matching
@@ -13,60 +15,10 @@ module Cabriolet
13
15
  # For now, this implementation focuses on correct structure.
14
16
  # rubocop:disable Metrics/ClassLength
15
17
  class Quantum < Base
16
- # Frame size (32KB per frame)
17
- FRAME_SIZE = 32_768
18
-
19
- # Match constants
20
- MIN_MATCH = 3
21
- MAX_MATCH = 259
22
-
23
- # Position slot tables (same as decompressor)
24
- POSITION_BASE = [
25
- 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
26
- 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
27
- 24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
28
- 393_216, 524_288, 786_432, 1_048_576, 1_572_864
29
- ].freeze
30
-
31
- EXTRA_BITS = [
32
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
33
- 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
34
- 17, 17, 18, 18, 19, 19
35
- ].freeze
36
-
37
- LENGTH_BASE = [
38
- 0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
39
- 30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
40
- ].freeze
41
-
42
- LENGTH_EXTRA = [
43
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
44
- 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
45
- ].freeze
18
+ include QuantumShared
46
19
 
47
20
  attr_reader :window_bits, :window_size
48
21
 
49
- # Represents a symbol in an arithmetic coding model
50
- class ModelSymbol
51
- attr_accessor :sym, :cumfreq
52
-
53
- def initialize(sym, cumfreq)
54
- @sym = sym
55
- @cumfreq = cumfreq
56
- end
57
- end
58
-
59
- # Represents an arithmetic coding model
60
- class Model
61
- attr_accessor :shiftsleft, :entries, :syms
62
-
63
- def initialize(syms, entries)
64
- @syms = syms
65
- @entries = entries
66
- @shiftsleft = 4
67
- end
68
- end
69
-
70
22
  # Initialize Quantum compressor
71
23
  #
72
24
  # @param io_system [System::IOSystem] I/O system for reading/writing
@@ -1,5 +1,33 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../quantum_shared"
4
+
5
+ # Compatibility shim for String#bytesplice (added in Ruby 3.2)
6
+ unless String.method_defined?(:bytesplice)
7
+ module StringBytespliceCompat
8
+ # Compatibility implementation of bytesplice for Ruby < 3.2
9
+ # Uses clear/append which is slower but works with mutable strings
10
+ def bytesplice(index, length, other_string, other_index = 0,
11
+ other_length = nil)
12
+ other_length ||= other_string.bytesize
13
+
14
+ # Build new string content
15
+ prefix = byteslice(0, index)
16
+ middle = other_string.byteslice(other_index, other_length)
17
+ suffix = byteslice((index + length)..-1)
18
+ new_content = prefix + middle + suffix
19
+
20
+ # Modify receiver in place
21
+ clear
22
+ self << new_content
23
+
24
+ self
25
+ end
26
+ end
27
+
28
+ String.prepend(StringBytespliceCompat)
29
+ end
30
+
3
31
  module Cabriolet
4
32
  module Decompressors
5
33
  # Quantum handles Quantum-compressed data using arithmetic coding
@@ -8,59 +36,10 @@ module Cabriolet
8
36
  # The Quantum method was created by David Stafford, adapted by Microsoft
9
37
  # Corporation.
10
38
  class Quantum < Base
11
- # Frame size (32KB per frame)
12
- FRAME_SIZE = 32_768
13
-
14
- # Match constants
15
- MAX_MATCH = 259
16
-
17
- # Position slot tables (same as in qtmd.c)
18
- POSITION_BASE = [
19
- 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
20
- 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
21
- 24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
22
- 393_216, 524_288, 786_432, 1_048_576, 1_572_864
23
- ].freeze
24
-
25
- EXTRA_BITS = [
26
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
27
- 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
28
- 17, 17, 18, 18, 19, 19
29
- ].freeze
30
-
31
- LENGTH_BASE = [
32
- 0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
33
- 30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
34
- ].freeze
35
-
36
- LENGTH_EXTRA = [
37
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
38
- 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
39
- ].freeze
39
+ include QuantumShared
40
40
 
41
41
  attr_reader :window_bits, :window_size
42
42
 
43
- # Represents a symbol in an arithmetic coding model
44
- class ModelSymbol
45
- attr_accessor :sym, :cumfreq
46
-
47
- def initialize(sym, cumfreq)
48
- @sym = sym
49
- @cumfreq = cumfreq
50
- end
51
- end
52
-
53
- # Represents an arithmetic coding model
54
- class Model
55
- attr_accessor :shiftsleft, :entries, :syms
56
-
57
- def initialize(syms, entries)
58
- @syms = syms
59
- @entries = entries
60
- @shiftsleft = 4
61
- end
62
- end
63
-
64
43
  # Initialize Quantum decompressor
65
44
  #
66
45
  # @param io_system [System::IOSystem] I/O system for reading/writing
@@ -81,8 +60,13 @@ module Cabriolet
81
60
  @window_bits = window_bits
82
61
  @window_size = 1 << window_bits
83
62
 
84
- # Initialize window
85
- @window = "\0" * @window_size
63
+ # Initialize window (mutable for Ruby < 3.2 bytesplice compatibility)
64
+ @window = if String.method_defined?(:bytesplice)
65
+ "\0" * @window_size
66
+ else
67
+ # In Ruby < 3.2, create mutable window using String.new
68
+ String.new("\0" * @window_size)
69
+ end
86
70
  @window_posn = 0
87
71
  @frame_todo = FRAME_SIZE
88
72
 
@@ -409,7 +393,52 @@ module Cabriolet
409
393
  end
410
394
 
411
395
  # Copy match from window
396
+ # Optimized to use bulk byte operations for better performance
412
397
  def copy_match(offset, length)
398
+ # Use bulk copy for matches longer than 32 bytes
399
+ if length > 32
400
+ copy_match_bulk(offset, length)
401
+ else
402
+ copy_match_byte_by_byte(offset, length)
403
+ end
404
+ end
405
+
406
+ # Bulk copy using bytesplice for better performance on longer matches
407
+ def copy_match_bulk(offset, length)
408
+ if offset > @window_posn
409
+ # Match wraps around window
410
+ if offset > @window_size
411
+ raise DecompressionError,
412
+ "Match offset beyond window"
413
+ end
414
+
415
+ # Copy from end of window
416
+ src_pos = @window_size - (offset - @window_posn)
417
+ copy_len = offset - @window_posn
418
+
419
+ if copy_len < length
420
+ # Copy from end, then from beginning
421
+ @window.bytesplice(@window_posn, copy_len, @window, src_pos,
422
+ copy_len)
423
+ @window_posn += copy_len
424
+ remaining = length - copy_len
425
+ @window.bytesplice(@window_posn, remaining, @window, 0, remaining)
426
+ @window_posn += remaining
427
+ else
428
+ # Copy entirely from end
429
+ @window.bytesplice(@window_posn, length, @window, src_pos, length)
430
+ @window_posn += length
431
+ end
432
+ else
433
+ # Normal copy - use bytesplice for bulk operation
434
+ src_pos = @window_posn - offset
435
+ @window.bytesplice(@window_posn, length, @window, src_pos, length)
436
+ @window_posn += length
437
+ end
438
+ end
439
+
440
+ # Byte-by-byte copy for short matches (fallback)
441
+ def copy_match_byte_by_byte(offset, length)
413
442
  if offset > @window_posn
414
443
  # Match wraps around window
415
444
  if offset > @window_size
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+
5
+ module Cabriolet
6
+ module Extraction
7
+ # BaseExtractor provides common extraction functionality for all extractors
8
+ # Reduces code duplication between SimpleExtractor and Parallel::Extractor
9
+ class BaseExtractor
10
+ # Initialize the base extractor
11
+ #
12
+ # @param output_dir [String] Directory to extract files to
13
+ # @param preserve_paths [Boolean] Whether to preserve directory structure
14
+ # @param overwrite [Boolean] Whether to overwrite existing files
15
+ def initialize(output_dir, preserve_paths: true, overwrite: false)
16
+ @output_dir = output_dir
17
+ @preserve_paths = preserve_paths
18
+ @overwrite = overwrite
19
+ end
20
+
21
+ protected
22
+
23
+ # Build the output path for a file, handling path preservation and cleaning
24
+ #
25
+ # @param filename [String] Original filename from archive (may have backslashes)
26
+ # @return [String] Full output path for the file
27
+ def build_output_path(filename)
28
+ # Normalize path separators (Windows archives use backslashes)
29
+ clean_name = filename.gsub("\\", "/")
30
+
31
+ if @preserve_paths
32
+ # Keep directory structure
33
+ ::File.join(@output_dir, clean_name)
34
+ else
35
+ # Flatten to output directory (just basename)
36
+ ::File.join(@output_dir, ::File.basename(clean_name))
37
+ end
38
+ end
39
+
40
+ # Extract a single file to disk
41
+ #
42
+ # @param file [Object] File object from archive (must respond to :name and :data)
43
+ # @yield [path, data] Optional block for custom handling instead of default write
44
+ # @return [String, nil] Output path if successful, nil if skipped or failed
45
+ def extract_file(file)
46
+ output_path = build_output_path(file.name)
47
+
48
+ # Check if file exists and skip if not overwriting
49
+ if ::File.exist?(output_path) && !@overwrite
50
+ return nil
51
+ end
52
+
53
+ # Create parent directory
54
+ dir = ::File.dirname(output_path)
55
+ FileUtils.mkdir_p(dir) unless ::File.directory?(dir)
56
+
57
+ # Get file data
58
+ data = file.data
59
+ return nil unless data
60
+
61
+ # Write file data
62
+ ::File.binwrite(output_path, data)
63
+
64
+ # Preserve file attributes if available
65
+ preserve_file_attributes(output_path, file)
66
+
67
+ output_path
68
+ rescue StandardError => e
69
+ warn "Failed to extract #{file.name}: #{e.message}"
70
+ nil
71
+ end
72
+
73
+ # Preserve file attributes (timestamps, etc.) if available on the file object
74
+ #
75
+ # @param path [String] Path to extracted file
76
+ # @param file [Object] File object from archive
77
+ def preserve_file_attributes(path, file)
78
+ # Try various timestamp attributes that different formats use
79
+ if file.respond_to?(:datetime) && file.datetime
80
+ ::File.utime(::File.atime(path), file.datetime, path)
81
+ elsif file.respond_to?(:mtime) && file.mtime
82
+ atime = file.respond_to?(:atime) ? file.atime : ::File.atime(path)
83
+ ::File.utime(atime, file.mtime, path)
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+ require_relative "file_extraction_work"
5
+ require_relative "file_extraction_worker"
6
+
7
+ module Cabriolet
8
+ module Extraction
9
+ # Unified extractor using Fractor for parallel file extraction
10
+ # Single workers: 1 = sequential, N = parallel
11
+ class Extractor
12
+ DEFAULT_WORKERS = 4
13
+
14
+ attr_reader :archive, :output_dir, :workers, :stats
15
+
16
+ def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
17
+ @archive = archive
18
+ @output_dir = output_dir
19
+ @workers = [workers, 1].max # At least 1 worker
20
+ @preserve_paths = options.fetch(:preserve_paths, true)
21
+ @overwrite = options.fetch(:overwrite, false)
22
+ @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
23
+ end
24
+
25
+ # Extract all files from archive
26
+ #
27
+ # @return [Hash] Extraction statistics
28
+ def extract_all
29
+ FileUtils.mkdir_p(@output_dir)
30
+
31
+ # Create work items for all files
32
+ work_items = @archive.files.map do |file|
33
+ FileExtractionWork.new(
34
+ file,
35
+ output_dir: @output_dir,
36
+ preserve_paths: @preserve_paths,
37
+ overwrite: @overwrite,
38
+ )
39
+ end
40
+
41
+ # Create supervisor with workers
42
+ supervisor = Fractor::Supervisor.new(
43
+ worker_pools: [
44
+ {
45
+ worker_class: FileExtractionWorker,
46
+ num_workers: @workers,
47
+ },
48
+ ],
49
+ )
50
+
51
+ # Add all work items
52
+ supervisor.add_work_items(work_items)
53
+
54
+ # Run extraction
55
+ supervisor.run
56
+
57
+ # Collect results
58
+ collect_stats(supervisor.results)
59
+
60
+ @stats
61
+ end
62
+
63
+ # Extract files with progress callback
64
+ #
65
+ # @yield [current, total, file] Progress callback
66
+ # @return [Hash] Extraction statistics
67
+ def extract_with_progress(&block)
68
+ return extract_all unless block
69
+
70
+ FileUtils.mkdir_p(@output_dir)
71
+
72
+ # For progress tracking, we need to process in batches
73
+ # or use a custom approach since Fractor doesn't have built-in callbacks
74
+ total = @archive.files.count
75
+ current = 0
76
+
77
+ # Sequential mode uses simple iteration with progress
78
+ if @workers == 1
79
+ @archive.files.each do |file|
80
+ extract_single_file(file)
81
+ current += 1
82
+ yield(current, total, file)
83
+ end
84
+ return @stats
85
+ end
86
+
87
+ # Parallel mode: batch files for progress updates
88
+ batch_size = [@archive.files.count / @workers, 1].max
89
+ batches = @archive.files.each_slice(batch_size).to_a
90
+
91
+ batches.each do |batch|
92
+ work_items = batch.map do |file|
93
+ FileExtractionWork.new(
94
+ file,
95
+ output_dir: @output_dir,
96
+ preserve_paths: @preserve_paths,
97
+ overwrite: @overwrite,
98
+ )
99
+ end
100
+
101
+ supervisor = Fractor::Supervisor.new(
102
+ worker_pools: [
103
+ {
104
+ worker_class: FileExtractionWorker,
105
+ num_workers: @workers,
106
+ },
107
+ ],
108
+ )
109
+
110
+ supervisor.add_work_items(work_items)
111
+ supervisor.run
112
+
113
+ batch.each do |file|
114
+ current += 1
115
+ yield(current, total, file)
116
+ end
117
+ end
118
+
119
+ @stats
120
+ end
121
+
122
+ private
123
+
124
+ # Extract a single file (for sequential mode with progress)
125
+ #
126
+ # @param file [Object] File to extract
127
+ # @return [Object] Result from worker
128
+ def extract_single_file(file)
129
+ work = FileExtractionWork.new(
130
+ file,
131
+ output_dir: @output_dir,
132
+ preserve_paths: @preserve_paths,
133
+ overwrite: @overwrite,
134
+ )
135
+
136
+ worker = FileExtractionWorker.new
137
+ result = worker.process(work)
138
+
139
+ update_stats_from_result(result)
140
+
141
+ result
142
+ end
143
+
144
+ # Collect statistics from Fractor results
145
+ #
146
+ # @param results [Fractor::Results] Results from supervisor
147
+ def collect_stats(results)
148
+ results.results.each do |result|
149
+ update_stats_from_result(result)
150
+ end
151
+ end
152
+
153
+ # Update stats from a single work result
154
+ #
155
+ # @param result [Fractor::WorkResult] Result from worker
156
+ def update_stats_from_result(result)
157
+ if result.success?
158
+ data = result.result
159
+ if data.is_a?(Hash) && data[:status] == :skipped
160
+ @stats[:skipped] += 1
161
+ else
162
+ @stats[:extracted] += 1
163
+ @stats[:bytes] += data[:size] if data.is_a?(Hash) && data[:size]
164
+ end
165
+ else
166
+ @stats[:failed] += 1
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end