cabriolet 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +3 -0
- data/lib/cabriolet/binary/bitstream.rb +32 -21
- data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
- data/lib/cabriolet/cab/compressor.rb +85 -53
- data/lib/cabriolet/cab/decompressor.rb +2 -1
- data/lib/cabriolet/cab/extractor.rb +2 -35
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/quantum.rb +3 -51
- data/lib/cabriolet/decompressors/quantum.rb +81 -52
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/lit/compressor.rb +45 -689
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +114 -3
- metadata +38 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Collections
|
|
5
|
+
# FileCollection manages a collection of files for compression
|
|
6
|
+
# Provides unified interface for adding files and preparing them for compression
|
|
7
|
+
class FileCollection
|
|
8
|
+
include Enumerable
|
|
9
|
+
|
|
10
|
+
# Initialize a new file collection
|
|
11
|
+
#
|
|
12
|
+
# @param format_options [Hash] Options specific to the archive format
|
|
13
|
+
def initialize(format_options = {})
|
|
14
|
+
@files = []
|
|
15
|
+
@format_options = format_options
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Add a file to the collection
|
|
19
|
+
#
|
|
20
|
+
# @param source_path [String] Path to the source file
|
|
21
|
+
# @param archive_path [String, nil] Path within the archive (defaults to basename)
|
|
22
|
+
# @param options [Hash] Additional options for this file
|
|
23
|
+
# @return [self] Returns self for chaining
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# collection.add("README.md", "docs/README.md")
|
|
27
|
+
# collection.add("data.txt") # Uses basename
|
|
28
|
+
def add(source_path, archive_path = nil, **options)
|
|
29
|
+
validate_source(source_path)
|
|
30
|
+
|
|
31
|
+
@files << {
|
|
32
|
+
source: source_path,
|
|
33
|
+
archive: archive_path || ::File.basename(source_path),
|
|
34
|
+
options: options,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
self
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Add multiple files at once
|
|
41
|
+
#
|
|
42
|
+
# @param files [Array<Hash>] Array of file hashes with :source, :archive, :options keys
|
|
43
|
+
# @return [self] Returns self for chaining
|
|
44
|
+
def add_all(files)
|
|
45
|
+
files.each do |file|
|
|
46
|
+
add(file[:source], file[:archive], **file.fetch(:options, {}))
|
|
47
|
+
end
|
|
48
|
+
self
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Iterate over files in the collection
|
|
52
|
+
#
|
|
53
|
+
# @yield [file_entry] Yields each file entry hash
|
|
54
|
+
# @return [Enumerator] If no block given
|
|
55
|
+
def each(&)
|
|
56
|
+
@files.each(&)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Get the number of files in the collection
|
|
60
|
+
#
|
|
61
|
+
# @return [Integer] Number of files
|
|
62
|
+
def size
|
|
63
|
+
@files.size
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Check if collection is empty
|
|
67
|
+
#
|
|
68
|
+
# @return [Boolean] True if no files
|
|
69
|
+
def empty?
|
|
70
|
+
@files.empty?
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Clear all files from the collection
|
|
74
|
+
#
|
|
75
|
+
# @return [self] Returns self for chaining
|
|
76
|
+
def clear
|
|
77
|
+
@files.clear
|
|
78
|
+
self
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Prepare files for compression by reading metadata
|
|
82
|
+
#
|
|
83
|
+
# @return [Array<Hash>] Array of prepared file info hashes
|
|
84
|
+
def prepare_for_compression
|
|
85
|
+
@files.map do |file_entry|
|
|
86
|
+
prepare_file_info(file_entry)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Get total uncompressed size of all files
|
|
91
|
+
#
|
|
92
|
+
# @return [Integer] Total size in bytes
|
|
93
|
+
def total_size
|
|
94
|
+
@files.sum { |f| ::File.size(f[:source]) }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Group files by directory for archive organization
|
|
98
|
+
#
|
|
99
|
+
# @return [Hash] Hash with directory paths as keys and file arrays as values
|
|
100
|
+
def by_directory
|
|
101
|
+
@files.group_by do |file|
|
|
102
|
+
::File.dirname(file[:archive])
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Find files by pattern in archive path
|
|
107
|
+
#
|
|
108
|
+
# @param pattern [String, Regexp] Pattern to match
|
|
109
|
+
# @return [Array<Hash>] Matching file entries
|
|
110
|
+
def find_by_pattern(pattern)
|
|
111
|
+
@files.select do |file|
|
|
112
|
+
if pattern.is_a?(Regexp)
|
|
113
|
+
file[:archive] =~ pattern
|
|
114
|
+
else
|
|
115
|
+
file[:archive].include?(pattern)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
# Validate that source file exists and is accessible
|
|
123
|
+
#
|
|
124
|
+
# @param path [String] Path to validate
|
|
125
|
+
# @raise [ArgumentError] if file doesn't exist or isn't a regular file
|
|
126
|
+
def validate_source(path)
|
|
127
|
+
unless ::File.exist?(path)
|
|
128
|
+
raise ArgumentError, "File does not exist: #{path}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
unless ::File.file?(path)
|
|
132
|
+
raise ArgumentError, "Not a regular file: #{path}"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Prepare file information for compression
|
|
137
|
+
#
|
|
138
|
+
# @param file_entry [Hash] Original file entry
|
|
139
|
+
# @return [Hash] Prepared file info with metadata
|
|
140
|
+
def prepare_file_info(file_entry)
|
|
141
|
+
stat = ::File.stat(file_entry[:source])
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
source_path: file_entry[:source],
|
|
145
|
+
archive_path: file_entry[:archive],
|
|
146
|
+
size: stat.size,
|
|
147
|
+
mtime: stat.mtime,
|
|
148
|
+
atime: stat.atime,
|
|
149
|
+
attributes: calculate_attributes(stat),
|
|
150
|
+
options: file_entry[:options],
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Calculate file attributes for archive format
|
|
155
|
+
#
|
|
156
|
+
# @param stat [File::Stat] File stat object
|
|
157
|
+
# @return [Integer] Attribute flags
|
|
158
|
+
def calculate_attributes(stat)
|
|
159
|
+
attribs = Constants::ATTRIB_ARCH
|
|
160
|
+
|
|
161
|
+
# Set read-only flag if not writable
|
|
162
|
+
attribs |= Constants::ATTRIB_READONLY unless stat.writable?
|
|
163
|
+
|
|
164
|
+
# Set hidden flag if hidden (Unix dotfiles)
|
|
165
|
+
basename = ::File.basename(@files.first[:source])
|
|
166
|
+
attribs |= Constants::ATTRIB_HIDDEN if basename.start_with?(".")
|
|
167
|
+
|
|
168
|
+
# Set system flag for system files
|
|
169
|
+
attribs |= Constants::ATTRIB_SYSTEM if stat.socket? || stat.symlink?
|
|
170
|
+
|
|
171
|
+
attribs
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../quantum_shared"
|
|
4
|
+
|
|
3
5
|
module Cabriolet
|
|
4
6
|
module Compressors
|
|
5
7
|
# Quantum compresses data using arithmetic coding and LZ77-based matching
|
|
@@ -13,60 +15,10 @@ module Cabriolet
|
|
|
13
15
|
# For now, this implementation focuses on correct structure.
|
|
14
16
|
# rubocop:disable Metrics/ClassLength
|
|
15
17
|
class Quantum < Base
|
|
16
|
-
|
|
17
|
-
FRAME_SIZE = 32_768
|
|
18
|
-
|
|
19
|
-
# Match constants
|
|
20
|
-
MIN_MATCH = 3
|
|
21
|
-
MAX_MATCH = 259
|
|
22
|
-
|
|
23
|
-
# Position slot tables (same as decompressor)
|
|
24
|
-
POSITION_BASE = [
|
|
25
|
-
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
26
|
-
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
27
|
-
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
28
|
-
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
29
|
-
].freeze
|
|
30
|
-
|
|
31
|
-
EXTRA_BITS = [
|
|
32
|
-
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
33
|
-
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
34
|
-
17, 17, 18, 18, 19, 19
|
|
35
|
-
].freeze
|
|
36
|
-
|
|
37
|
-
LENGTH_BASE = [
|
|
38
|
-
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
39
|
-
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
40
|
-
].freeze
|
|
41
|
-
|
|
42
|
-
LENGTH_EXTRA = [
|
|
43
|
-
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
44
|
-
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
45
|
-
].freeze
|
|
18
|
+
include QuantumShared
|
|
46
19
|
|
|
47
20
|
attr_reader :window_bits, :window_size
|
|
48
21
|
|
|
49
|
-
# Represents a symbol in an arithmetic coding model
|
|
50
|
-
class ModelSymbol
|
|
51
|
-
attr_accessor :sym, :cumfreq
|
|
52
|
-
|
|
53
|
-
def initialize(sym, cumfreq)
|
|
54
|
-
@sym = sym
|
|
55
|
-
@cumfreq = cumfreq
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Represents an arithmetic coding model
|
|
60
|
-
class Model
|
|
61
|
-
attr_accessor :shiftsleft, :entries, :syms
|
|
62
|
-
|
|
63
|
-
def initialize(syms, entries)
|
|
64
|
-
@syms = syms
|
|
65
|
-
@entries = entries
|
|
66
|
-
@shiftsleft = 4
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
22
|
# Initialize Quantum compressor
|
|
71
23
|
#
|
|
72
24
|
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../quantum_shared"
|
|
4
|
+
|
|
5
|
+
# Compatibility shim for String#bytesplice (added in Ruby 3.2)
|
|
6
|
+
unless String.method_defined?(:bytesplice)
|
|
7
|
+
module StringBytespliceCompat
|
|
8
|
+
# Compatibility implementation of bytesplice for Ruby < 3.2
|
|
9
|
+
# Uses clear/append which is slower but works with mutable strings
|
|
10
|
+
def bytesplice(index, length, other_string, other_index = 0,
|
|
11
|
+
other_length = nil)
|
|
12
|
+
other_length ||= other_string.bytesize
|
|
13
|
+
|
|
14
|
+
# Build new string content
|
|
15
|
+
prefix = byteslice(0, index)
|
|
16
|
+
middle = other_string.byteslice(other_index, other_length)
|
|
17
|
+
suffix = byteslice((index + length)..-1)
|
|
18
|
+
new_content = prefix + middle + suffix
|
|
19
|
+
|
|
20
|
+
# Modify receiver in place
|
|
21
|
+
clear
|
|
22
|
+
self << new_content
|
|
23
|
+
|
|
24
|
+
self
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
String.prepend(StringBytespliceCompat)
|
|
29
|
+
end
|
|
30
|
+
|
|
3
31
|
module Cabriolet
|
|
4
32
|
module Decompressors
|
|
5
33
|
# Quantum handles Quantum-compressed data using arithmetic coding
|
|
@@ -8,59 +36,10 @@ module Cabriolet
|
|
|
8
36
|
# The Quantum method was created by David Stafford, adapted by Microsoft
|
|
9
37
|
# Corporation.
|
|
10
38
|
class Quantum < Base
|
|
11
|
-
|
|
12
|
-
FRAME_SIZE = 32_768
|
|
13
|
-
|
|
14
|
-
# Match constants
|
|
15
|
-
MAX_MATCH = 259
|
|
16
|
-
|
|
17
|
-
# Position slot tables (same as in qtmd.c)
|
|
18
|
-
POSITION_BASE = [
|
|
19
|
-
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
20
|
-
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
21
|
-
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
22
|
-
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
23
|
-
].freeze
|
|
24
|
-
|
|
25
|
-
EXTRA_BITS = [
|
|
26
|
-
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
27
|
-
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
28
|
-
17, 17, 18, 18, 19, 19
|
|
29
|
-
].freeze
|
|
30
|
-
|
|
31
|
-
LENGTH_BASE = [
|
|
32
|
-
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
33
|
-
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
34
|
-
].freeze
|
|
35
|
-
|
|
36
|
-
LENGTH_EXTRA = [
|
|
37
|
-
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
38
|
-
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
39
|
-
].freeze
|
|
39
|
+
include QuantumShared
|
|
40
40
|
|
|
41
41
|
attr_reader :window_bits, :window_size
|
|
42
42
|
|
|
43
|
-
# Represents a symbol in an arithmetic coding model
|
|
44
|
-
class ModelSymbol
|
|
45
|
-
attr_accessor :sym, :cumfreq
|
|
46
|
-
|
|
47
|
-
def initialize(sym, cumfreq)
|
|
48
|
-
@sym = sym
|
|
49
|
-
@cumfreq = cumfreq
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Represents an arithmetic coding model
|
|
54
|
-
class Model
|
|
55
|
-
attr_accessor :shiftsleft, :entries, :syms
|
|
56
|
-
|
|
57
|
-
def initialize(syms, entries)
|
|
58
|
-
@syms = syms
|
|
59
|
-
@entries = entries
|
|
60
|
-
@shiftsleft = 4
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
43
|
# Initialize Quantum decompressor
|
|
65
44
|
#
|
|
66
45
|
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
@@ -81,8 +60,13 @@ module Cabriolet
|
|
|
81
60
|
@window_bits = window_bits
|
|
82
61
|
@window_size = 1 << window_bits
|
|
83
62
|
|
|
84
|
-
# Initialize window
|
|
85
|
-
@window =
|
|
63
|
+
# Initialize window (mutable for Ruby < 3.2 bytesplice compatibility)
|
|
64
|
+
@window = if String.method_defined?(:bytesplice)
|
|
65
|
+
"\0" * @window_size
|
|
66
|
+
else
|
|
67
|
+
# In Ruby < 3.2, create mutable window using String.new
|
|
68
|
+
String.new("\0" * @window_size)
|
|
69
|
+
end
|
|
86
70
|
@window_posn = 0
|
|
87
71
|
@frame_todo = FRAME_SIZE
|
|
88
72
|
|
|
@@ -409,7 +393,52 @@ module Cabriolet
|
|
|
409
393
|
end
|
|
410
394
|
|
|
411
395
|
# Copy match from window
|
|
396
|
+
# Optimized to use bulk byte operations for better performance
|
|
412
397
|
def copy_match(offset, length)
|
|
398
|
+
# Use bulk copy for matches longer than 32 bytes
|
|
399
|
+
if length > 32
|
|
400
|
+
copy_match_bulk(offset, length)
|
|
401
|
+
else
|
|
402
|
+
copy_match_byte_by_byte(offset, length)
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Bulk copy using bytesplice for better performance on longer matches
|
|
407
|
+
def copy_match_bulk(offset, length)
|
|
408
|
+
if offset > @window_posn
|
|
409
|
+
# Match wraps around window
|
|
410
|
+
if offset > @window_size
|
|
411
|
+
raise DecompressionError,
|
|
412
|
+
"Match offset beyond window"
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Copy from end of window
|
|
416
|
+
src_pos = @window_size - (offset - @window_posn)
|
|
417
|
+
copy_len = offset - @window_posn
|
|
418
|
+
|
|
419
|
+
if copy_len < length
|
|
420
|
+
# Copy from end, then from beginning
|
|
421
|
+
@window.bytesplice(@window_posn, copy_len, @window, src_pos,
|
|
422
|
+
copy_len)
|
|
423
|
+
@window_posn += copy_len
|
|
424
|
+
remaining = length - copy_len
|
|
425
|
+
@window.bytesplice(@window_posn, remaining, @window, 0, remaining)
|
|
426
|
+
@window_posn += remaining
|
|
427
|
+
else
|
|
428
|
+
# Copy entirely from end
|
|
429
|
+
@window.bytesplice(@window_posn, length, @window, src_pos, length)
|
|
430
|
+
@window_posn += length
|
|
431
|
+
end
|
|
432
|
+
else
|
|
433
|
+
# Normal copy - use bytesplice for bulk operation
|
|
434
|
+
src_pos = @window_posn - offset
|
|
435
|
+
@window.bytesplice(@window_posn, length, @window, src_pos, length)
|
|
436
|
+
@window_posn += length
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Byte-by-byte copy for short matches (fallback)
|
|
441
|
+
def copy_match_byte_by_byte(offset, length)
|
|
413
442
|
if offset > @window_posn
|
|
414
443
|
# Match wraps around window
|
|
415
444
|
if offset > @window_size
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module Cabriolet
|
|
6
|
+
module Extraction
|
|
7
|
+
# BaseExtractor provides common extraction functionality for all extractors
|
|
8
|
+
# Reduces code duplication between SimpleExtractor and Parallel::Extractor
|
|
9
|
+
class BaseExtractor
|
|
10
|
+
# Initialize the base extractor
|
|
11
|
+
#
|
|
12
|
+
# @param output_dir [String] Directory to extract files to
|
|
13
|
+
# @param preserve_paths [Boolean] Whether to preserve directory structure
|
|
14
|
+
# @param overwrite [Boolean] Whether to overwrite existing files
|
|
15
|
+
def initialize(output_dir, preserve_paths: true, overwrite: false)
|
|
16
|
+
@output_dir = output_dir
|
|
17
|
+
@preserve_paths = preserve_paths
|
|
18
|
+
@overwrite = overwrite
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# Build the output path for a file, handling path preservation and cleaning
|
|
24
|
+
#
|
|
25
|
+
# @param filename [String] Original filename from archive (may have backslashes)
|
|
26
|
+
# @return [String] Full output path for the file
|
|
27
|
+
def build_output_path(filename)
|
|
28
|
+
# Normalize path separators (Windows archives use backslashes)
|
|
29
|
+
clean_name = filename.gsub("\\", "/")
|
|
30
|
+
|
|
31
|
+
if @preserve_paths
|
|
32
|
+
# Keep directory structure
|
|
33
|
+
::File.join(@output_dir, clean_name)
|
|
34
|
+
else
|
|
35
|
+
# Flatten to output directory (just basename)
|
|
36
|
+
::File.join(@output_dir, ::File.basename(clean_name))
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Extract a single file to disk
|
|
41
|
+
#
|
|
42
|
+
# @param file [Object] File object from archive (must respond to :name and :data)
|
|
43
|
+
# @yield [path, data] Optional block for custom handling instead of default write
|
|
44
|
+
# @return [String, nil] Output path if successful, nil if skipped or failed
|
|
45
|
+
def extract_file(file)
|
|
46
|
+
output_path = build_output_path(file.name)
|
|
47
|
+
|
|
48
|
+
# Check if file exists and skip if not overwriting
|
|
49
|
+
if ::File.exist?(output_path) && !@overwrite
|
|
50
|
+
return nil
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Create parent directory
|
|
54
|
+
dir = ::File.dirname(output_path)
|
|
55
|
+
FileUtils.mkdir_p(dir) unless ::File.directory?(dir)
|
|
56
|
+
|
|
57
|
+
# Get file data
|
|
58
|
+
data = file.data
|
|
59
|
+
return nil unless data
|
|
60
|
+
|
|
61
|
+
# Write file data
|
|
62
|
+
::File.binwrite(output_path, data)
|
|
63
|
+
|
|
64
|
+
# Preserve file attributes if available
|
|
65
|
+
preserve_file_attributes(output_path, file)
|
|
66
|
+
|
|
67
|
+
output_path
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
warn "Failed to extract #{file.name}: #{e.message}"
|
|
70
|
+
nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Preserve file attributes (timestamps, etc.) if available on the file object
|
|
74
|
+
#
|
|
75
|
+
# @param path [String] Path to extracted file
|
|
76
|
+
# @param file [Object] File object from archive
|
|
77
|
+
def preserve_file_attributes(path, file)
|
|
78
|
+
# Try various timestamp attributes that different formats use
|
|
79
|
+
if file.respond_to?(:datetime) && file.datetime
|
|
80
|
+
::File.utime(::File.atime(path), file.datetime, path)
|
|
81
|
+
elsif file.respond_to?(:mtime) && file.mtime
|
|
82
|
+
atime = file.respond_to?(:atime) ? file.atime : ::File.atime(path)
|
|
83
|
+
::File.utime(atime, file.mtime, path)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fractor"
|
|
4
|
+
require_relative "file_extraction_work"
|
|
5
|
+
require_relative "file_extraction_worker"
|
|
6
|
+
|
|
7
|
+
module Cabriolet
|
|
8
|
+
module Extraction
|
|
9
|
+
# Unified extractor using Fractor for parallel file extraction
|
|
10
|
+
# Single workers: 1 = sequential, N = parallel
|
|
11
|
+
class Extractor
|
|
12
|
+
DEFAULT_WORKERS = 4
|
|
13
|
+
|
|
14
|
+
attr_reader :archive, :output_dir, :workers, :stats
|
|
15
|
+
|
|
16
|
+
def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
|
|
17
|
+
@archive = archive
|
|
18
|
+
@output_dir = output_dir
|
|
19
|
+
@workers = [workers, 1].max # At least 1 worker
|
|
20
|
+
@preserve_paths = options.fetch(:preserve_paths, true)
|
|
21
|
+
@overwrite = options.fetch(:overwrite, false)
|
|
22
|
+
@stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Extract all files from archive
|
|
26
|
+
#
|
|
27
|
+
# @return [Hash] Extraction statistics
|
|
28
|
+
def extract_all
|
|
29
|
+
FileUtils.mkdir_p(@output_dir)
|
|
30
|
+
|
|
31
|
+
# Create work items for all files
|
|
32
|
+
work_items = @archive.files.map do |file|
|
|
33
|
+
FileExtractionWork.new(
|
|
34
|
+
file,
|
|
35
|
+
output_dir: @output_dir,
|
|
36
|
+
preserve_paths: @preserve_paths,
|
|
37
|
+
overwrite: @overwrite,
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Create supervisor with workers
|
|
42
|
+
supervisor = Fractor::Supervisor.new(
|
|
43
|
+
worker_pools: [
|
|
44
|
+
{
|
|
45
|
+
worker_class: FileExtractionWorker,
|
|
46
|
+
num_workers: @workers,
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Add all work items
|
|
52
|
+
supervisor.add_work_items(work_items)
|
|
53
|
+
|
|
54
|
+
# Run extraction
|
|
55
|
+
supervisor.run
|
|
56
|
+
|
|
57
|
+
# Collect results
|
|
58
|
+
collect_stats(supervisor.results)
|
|
59
|
+
|
|
60
|
+
@stats
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Extract files with progress callback
|
|
64
|
+
#
|
|
65
|
+
# @yield [current, total, file] Progress callback
|
|
66
|
+
# @return [Hash] Extraction statistics
|
|
67
|
+
def extract_with_progress(&block)
|
|
68
|
+
return extract_all unless block
|
|
69
|
+
|
|
70
|
+
FileUtils.mkdir_p(@output_dir)
|
|
71
|
+
|
|
72
|
+
# For progress tracking, we need to process in batches
|
|
73
|
+
# or use a custom approach since Fractor doesn't have built-in callbacks
|
|
74
|
+
total = @archive.files.count
|
|
75
|
+
current = 0
|
|
76
|
+
|
|
77
|
+
# Sequential mode uses simple iteration with progress
|
|
78
|
+
if @workers == 1
|
|
79
|
+
@archive.files.each do |file|
|
|
80
|
+
extract_single_file(file)
|
|
81
|
+
current += 1
|
|
82
|
+
yield(current, total, file)
|
|
83
|
+
end
|
|
84
|
+
return @stats
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Parallel mode: batch files for progress updates
|
|
88
|
+
batch_size = [@archive.files.count / @workers, 1].max
|
|
89
|
+
batches = @archive.files.each_slice(batch_size).to_a
|
|
90
|
+
|
|
91
|
+
batches.each do |batch|
|
|
92
|
+
work_items = batch.map do |file|
|
|
93
|
+
FileExtractionWork.new(
|
|
94
|
+
file,
|
|
95
|
+
output_dir: @output_dir,
|
|
96
|
+
preserve_paths: @preserve_paths,
|
|
97
|
+
overwrite: @overwrite,
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
supervisor = Fractor::Supervisor.new(
|
|
102
|
+
worker_pools: [
|
|
103
|
+
{
|
|
104
|
+
worker_class: FileExtractionWorker,
|
|
105
|
+
num_workers: @workers,
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
supervisor.add_work_items(work_items)
|
|
111
|
+
supervisor.run
|
|
112
|
+
|
|
113
|
+
batch.each do |file|
|
|
114
|
+
current += 1
|
|
115
|
+
yield(current, total, file)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
@stats
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
private
|
|
123
|
+
|
|
124
|
+
# Extract a single file (for sequential mode with progress)
|
|
125
|
+
#
|
|
126
|
+
# @param file [Object] File to extract
|
|
127
|
+
# @return [Object] Result from worker
|
|
128
|
+
def extract_single_file(file)
|
|
129
|
+
work = FileExtractionWork.new(
|
|
130
|
+
file,
|
|
131
|
+
output_dir: @output_dir,
|
|
132
|
+
preserve_paths: @preserve_paths,
|
|
133
|
+
overwrite: @overwrite,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
worker = FileExtractionWorker.new
|
|
137
|
+
result = worker.process(work)
|
|
138
|
+
|
|
139
|
+
update_stats_from_result(result)
|
|
140
|
+
|
|
141
|
+
result
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Collect statistics from Fractor results
|
|
145
|
+
#
|
|
146
|
+
# @param results [Fractor::Results] Results from supervisor
|
|
147
|
+
def collect_stats(results)
|
|
148
|
+
results.results.each do |result|
|
|
149
|
+
update_stats_from_result(result)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Update stats from a single work result
|
|
154
|
+
#
|
|
155
|
+
# @param result [Fractor::WorkResult] Result from worker
|
|
156
|
+
def update_stats_from_result(result)
|
|
157
|
+
if result.success?
|
|
158
|
+
data = result.result
|
|
159
|
+
if data.is_a?(Hash) && data[:status] == :skipped
|
|
160
|
+
@stats[:skipped] += 1
|
|
161
|
+
else
|
|
162
|
+
@stats[:extracted] += 1
|
|
163
|
+
@stats[:bytes] += data[:size] if data.is_a?(Hash) && data[:size]
|
|
164
|
+
end
|
|
165
|
+
else
|
|
166
|
+
@stats[:failed] += 1
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|