cabriolet 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +3 -0
- data/lib/cabriolet/binary/bitstream.rb +32 -21
- data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
- data/lib/cabriolet/cab/compressor.rb +85 -53
- data/lib/cabriolet/cab/decompressor.rb +2 -1
- data/lib/cabriolet/cab/extractor.rb +170 -121
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/quantum.rb +3 -51
- data/lib/cabriolet/decompressors/lzx.rb +59 -1
- data/lib/cabriolet/decompressors/quantum.rb +81 -52
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/lit/compressor.rb +45 -689
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +114 -3
- metadata +38 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Collections
|
|
5
|
+
# FileCollection manages a collection of files for compression
|
|
6
|
+
# Provides unified interface for adding files and preparing them for compression
|
|
7
|
+
class FileCollection
|
|
8
|
+
include Enumerable
|
|
9
|
+
|
|
10
|
+
# Initialize a new file collection
|
|
11
|
+
#
|
|
12
|
+
# @param format_options [Hash] Options specific to the archive format
|
|
13
|
+
def initialize(format_options = {})
|
|
14
|
+
@files = []
|
|
15
|
+
@format_options = format_options
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Add a file to the collection
|
|
19
|
+
#
|
|
20
|
+
# @param source_path [String] Path to the source file
|
|
21
|
+
# @param archive_path [String, nil] Path within the archive (defaults to basename)
|
|
22
|
+
# @param options [Hash] Additional options for this file
|
|
23
|
+
# @return [self] Returns self for chaining
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# collection.add("README.md", "docs/README.md")
|
|
27
|
+
# collection.add("data.txt") # Uses basename
|
|
28
|
+
def add(source_path, archive_path = nil, **options)
|
|
29
|
+
validate_source(source_path)
|
|
30
|
+
|
|
31
|
+
@files << {
|
|
32
|
+
source: source_path,
|
|
33
|
+
archive: archive_path || ::File.basename(source_path),
|
|
34
|
+
options: options,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
self
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Add multiple files at once
|
|
41
|
+
#
|
|
42
|
+
# @param files [Array<Hash>] Array of file hashes with :source, :archive, :options keys
|
|
43
|
+
# @return [self] Returns self for chaining
|
|
44
|
+
def add_all(files)
|
|
45
|
+
files.each do |file|
|
|
46
|
+
add(file[:source], file[:archive], **file.fetch(:options, {}))
|
|
47
|
+
end
|
|
48
|
+
self
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Iterate over files in the collection
|
|
52
|
+
#
|
|
53
|
+
# @yield [file_entry] Yields each file entry hash
|
|
54
|
+
# @return [Enumerator] If no block given
|
|
55
|
+
def each(&)
|
|
56
|
+
@files.each(&)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Get the number of files in the collection
|
|
60
|
+
#
|
|
61
|
+
# @return [Integer] Number of files
|
|
62
|
+
def size
|
|
63
|
+
@files.size
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Check if collection is empty
|
|
67
|
+
#
|
|
68
|
+
# @return [Boolean] True if no files
|
|
69
|
+
def empty?
|
|
70
|
+
@files.empty?
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Clear all files from the collection
|
|
74
|
+
#
|
|
75
|
+
# @return [self] Returns self for chaining
|
|
76
|
+
def clear
|
|
77
|
+
@files.clear
|
|
78
|
+
self
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Prepare files for compression by reading metadata
|
|
82
|
+
#
|
|
83
|
+
# @return [Array<Hash>] Array of prepared file info hashes
|
|
84
|
+
def prepare_for_compression
|
|
85
|
+
@files.map do |file_entry|
|
|
86
|
+
prepare_file_info(file_entry)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Get total uncompressed size of all files
|
|
91
|
+
#
|
|
92
|
+
# @return [Integer] Total size in bytes
|
|
93
|
+
def total_size
|
|
94
|
+
@files.sum { |f| ::File.size(f[:source]) }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Group files by directory for archive organization
|
|
98
|
+
#
|
|
99
|
+
# @return [Hash] Hash with directory paths as keys and file arrays as values
|
|
100
|
+
def by_directory
|
|
101
|
+
@files.group_by do |file|
|
|
102
|
+
::File.dirname(file[:archive])
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Find files by pattern in archive path
|
|
107
|
+
#
|
|
108
|
+
# @param pattern [String, Regexp] Pattern to match
|
|
109
|
+
# @return [Array<Hash>] Matching file entries
|
|
110
|
+
def find_by_pattern(pattern)
|
|
111
|
+
@files.select do |file|
|
|
112
|
+
if pattern.is_a?(Regexp)
|
|
113
|
+
file[:archive] =~ pattern
|
|
114
|
+
else
|
|
115
|
+
file[:archive].include?(pattern)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
# Validate that source file exists and is accessible
|
|
123
|
+
#
|
|
124
|
+
# @param path [String] Path to validate
|
|
125
|
+
# @raise [ArgumentError] if file doesn't exist or isn't a regular file
|
|
126
|
+
def validate_source(path)
|
|
127
|
+
unless ::File.exist?(path)
|
|
128
|
+
raise ArgumentError, "File does not exist: #{path}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
unless ::File.file?(path)
|
|
132
|
+
raise ArgumentError, "Not a regular file: #{path}"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Prepare file information for compression
|
|
137
|
+
#
|
|
138
|
+
# @param file_entry [Hash] Original file entry
|
|
139
|
+
# @return [Hash] Prepared file info with metadata
|
|
140
|
+
def prepare_file_info(file_entry)
|
|
141
|
+
stat = ::File.stat(file_entry[:source])
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
source_path: file_entry[:source],
|
|
145
|
+
archive_path: file_entry[:archive],
|
|
146
|
+
size: stat.size,
|
|
147
|
+
mtime: stat.mtime,
|
|
148
|
+
atime: stat.atime,
|
|
149
|
+
attributes: calculate_attributes(stat),
|
|
150
|
+
options: file_entry[:options],
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Calculate file attributes for archive format
|
|
155
|
+
#
|
|
156
|
+
# @param stat [File::Stat] File stat object
|
|
157
|
+
# @return [Integer] Attribute flags
|
|
158
|
+
def calculate_attributes(stat)
|
|
159
|
+
attribs = Constants::ATTRIB_ARCH
|
|
160
|
+
|
|
161
|
+
# Set read-only flag if not writable
|
|
162
|
+
attribs |= Constants::ATTRIB_READONLY unless stat.writable?
|
|
163
|
+
|
|
164
|
+
# Set hidden flag if hidden (Unix dotfiles)
|
|
165
|
+
basename = ::File.basename(@files.first[:source])
|
|
166
|
+
attribs |= Constants::ATTRIB_HIDDEN if basename.start_with?(".")
|
|
167
|
+
|
|
168
|
+
# Set system flag for system files
|
|
169
|
+
attribs |= Constants::ATTRIB_SYSTEM if stat.socket? || stat.symlink?
|
|
170
|
+
|
|
171
|
+
attribs
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../quantum_shared"
|
|
4
|
+
|
|
3
5
|
module Cabriolet
|
|
4
6
|
module Compressors
|
|
5
7
|
# Quantum compresses data using arithmetic coding and LZ77-based matching
|
|
@@ -13,60 +15,10 @@ module Cabriolet
|
|
|
13
15
|
# For now, this implementation focuses on correct structure.
|
|
14
16
|
# rubocop:disable Metrics/ClassLength
|
|
15
17
|
class Quantum < Base
|
|
16
|
-
|
|
17
|
-
FRAME_SIZE = 32_768
|
|
18
|
-
|
|
19
|
-
# Match constants
|
|
20
|
-
MIN_MATCH = 3
|
|
21
|
-
MAX_MATCH = 259
|
|
22
|
-
|
|
23
|
-
# Position slot tables (same as decompressor)
|
|
24
|
-
POSITION_BASE = [
|
|
25
|
-
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
26
|
-
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
27
|
-
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
28
|
-
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
29
|
-
].freeze
|
|
30
|
-
|
|
31
|
-
EXTRA_BITS = [
|
|
32
|
-
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
33
|
-
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
34
|
-
17, 17, 18, 18, 19, 19
|
|
35
|
-
].freeze
|
|
36
|
-
|
|
37
|
-
LENGTH_BASE = [
|
|
38
|
-
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
39
|
-
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
40
|
-
].freeze
|
|
41
|
-
|
|
42
|
-
LENGTH_EXTRA = [
|
|
43
|
-
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
44
|
-
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
45
|
-
].freeze
|
|
18
|
+
include QuantumShared
|
|
46
19
|
|
|
47
20
|
attr_reader :window_bits, :window_size
|
|
48
21
|
|
|
49
|
-
# Represents a symbol in an arithmetic coding model
|
|
50
|
-
class ModelSymbol
|
|
51
|
-
attr_accessor :sym, :cumfreq
|
|
52
|
-
|
|
53
|
-
def initialize(sym, cumfreq)
|
|
54
|
-
@sym = sym
|
|
55
|
-
@cumfreq = cumfreq
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Represents an arithmetic coding model
|
|
60
|
-
class Model
|
|
61
|
-
attr_accessor :shiftsleft, :entries, :syms
|
|
62
|
-
|
|
63
|
-
def initialize(syms, entries)
|
|
64
|
-
@syms = syms
|
|
65
|
-
@entries = entries
|
|
66
|
-
@shiftsleft = 4
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
22
|
# Initialize Quantum compressor
|
|
71
23
|
#
|
|
72
24
|
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
@@ -105,6 +105,9 @@ module Cabriolet
|
|
|
105
105
|
reset_interval: 0, output_length: 0, is_delta: false, salvage: false, **_kwargs)
|
|
106
106
|
super(io_system, input, output, buffer_size)
|
|
107
107
|
|
|
108
|
+
# Store salvage flag for error handling
|
|
109
|
+
@salvage = salvage
|
|
110
|
+
|
|
108
111
|
# Validate window_bits
|
|
109
112
|
if is_delta
|
|
110
113
|
unless (17..25).cover?(window_bits)
|
|
@@ -195,7 +198,17 @@ module Cabriolet
|
|
|
195
198
|
frame_size = calculate_frame_size
|
|
196
199
|
|
|
197
200
|
# Decode blocks until frame is complete
|
|
198
|
-
|
|
201
|
+
begin
|
|
202
|
+
decode_frame(frame_size)
|
|
203
|
+
rescue DecompressionError => e
|
|
204
|
+
# In salvage mode, if decompression fails, return what we have so far
|
|
205
|
+
if @salvage
|
|
206
|
+
warn "Salvage: LZX decompression failed at frame #{@frame}: #{e.message}"
|
|
207
|
+
return total_written
|
|
208
|
+
else
|
|
209
|
+
raise
|
|
210
|
+
end
|
|
211
|
+
end
|
|
199
212
|
|
|
200
213
|
# Apply Intel E8 transformation if needed
|
|
201
214
|
frame_data = if should_apply_e8_transform?(frame_size)
|
|
@@ -391,6 +404,35 @@ module Cabriolet
|
|
|
391
404
|
@maintree = Huffman::Tree.new(@maintree_lengths, @maintree_maxsymbols,
|
|
392
405
|
bit_order: :msb)
|
|
393
406
|
unless @maintree.build_table(LENGTH_TABLEBITS)
|
|
407
|
+
# In salvage mode, try to build with a default distribution
|
|
408
|
+
if @salvage
|
|
409
|
+
# For a valid Huffman tree with @maintree_maxsymbols symbols and LENGTH_TABLEBITS=12,
|
|
410
|
+
# we need sum(2^(12-len)) = 4096 (for complete tree) or <= 4096 (for partial).
|
|
411
|
+
# For @maintree_maxsymbols = 784, we need to distribute symbols across lengths 8-10:
|
|
412
|
+
# Using: 128 at len8 (2048 slots) + 384 at len9 (768 slots) + 272 at len10 (256 slots)
|
|
413
|
+
# Total: 2048 + 768 + 256 = 3072 slots, leaving 1024 for longer codes
|
|
414
|
+
# Simpler: use lengths that sum to exactly 4096
|
|
415
|
+
# 784 symbols: distribute as 192 at len9, 592 at len10 = 384 + 592 = 976 (not enough)
|
|
416
|
+
# 784 symbols: distribute as 64 at len8, 576 at len9, 144 at len10 = 128 + 1152 + 144 = 1424
|
|
417
|
+
# Final: 784 symbols across lengths 8-11 to fill 4096 slots
|
|
418
|
+
# Verify: 64*128 + 384*64 + 256*32 + 80*16 = 8192 + 24576 + 8192 + 1280 = 42240 (wrong)
|
|
419
|
+
|
|
420
|
+
# Recalculate: 2^(12-len) slots needed per symbol
|
|
421
|
+
# len8: 16 slots/symbol, len9: 8 slots/symbol, len10: 4 slots/symbol, len11: 2 slots/symbol
|
|
422
|
+
# Total slots = sum(2^(12-len) for each symbol) must <= 4096
|
|
423
|
+
# Simple valid distribution for 784 symbols:
|
|
424
|
+
# 256 at len10 = 256*4 = 1024
|
|
425
|
+
# 528 at len12 = 528*1 = 528
|
|
426
|
+
# Total = 1552 (valid but incomplete tree)
|
|
427
|
+
|
|
428
|
+
default_main_lengths = []
|
|
429
|
+
256.times { default_main_lengths << 10 }
|
|
430
|
+
528.times { default_main_lengths << 12 }
|
|
431
|
+
@maintree_lengths = default_main_lengths
|
|
432
|
+
@maintree = Huffman::Tree.new(default_main_lengths, @maintree_maxsymbols,
|
|
433
|
+
bit_order: :msb)
|
|
434
|
+
return if @maintree.build_table(LENGTH_TABLEBITS)
|
|
435
|
+
end
|
|
394
436
|
raise DecompressionError,
|
|
395
437
|
"Failed to build main tree"
|
|
396
438
|
end
|
|
@@ -428,6 +470,22 @@ module Cabriolet
|
|
|
428
470
|
bit_order: :msb)
|
|
429
471
|
return if @pretree.build_table(PRETREE_TABLEBITS)
|
|
430
472
|
|
|
473
|
+
# In salvage mode, try to continue with a valid default tree
|
|
474
|
+
if @salvage
|
|
475
|
+
# For a valid Huffman tree with table_bits=6, we need exactly 64 slots.
|
|
476
|
+
# With 8 symbols at length 3: 8 * 2^(6-3) = 8 * 8 = 64 slots (complete)
|
|
477
|
+
# For simplicity: 8 at length 3 fills direct table (64 slots)
|
|
478
|
+
default_lengths = [
|
|
479
|
+
3, 3, 3, 3, 3, 3, 3, 3, # 8 at length 3: fills 64 slots
|
|
480
|
+
7, 7, 7, 7, 7, 7, 7, 7, # 8 at length 7: extended table
|
|
481
|
+
7, 7, 7, 7 # 4 at length 7: extended table
|
|
482
|
+
]
|
|
483
|
+
@pretree_lengths = default_lengths
|
|
484
|
+
@pretree = Huffman::Tree.new(default_lengths, PRETREE_MAXSYMBOLS,
|
|
485
|
+
bit_order: :msb)
|
|
486
|
+
return if @pretree.build_table(PRETREE_TABLEBITS)
|
|
487
|
+
end
|
|
488
|
+
|
|
431
489
|
raise DecompressionError, "Failed to build pretree"
|
|
432
490
|
end
|
|
433
491
|
|
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../quantum_shared"
|
|
4
|
+
|
|
5
|
+
# Compatibility shim for String#bytesplice (added in Ruby 3.2)
|
|
6
|
+
unless String.method_defined?(:bytesplice)
|
|
7
|
+
module StringBytespliceCompat
|
|
8
|
+
# Compatibility implementation of bytesplice for Ruby < 3.2
|
|
9
|
+
# Uses clear/append which is slower but works with mutable strings
|
|
10
|
+
def bytesplice(index, length, other_string, other_index = 0,
|
|
11
|
+
other_length = nil)
|
|
12
|
+
other_length ||= other_string.bytesize
|
|
13
|
+
|
|
14
|
+
# Build new string content
|
|
15
|
+
prefix = byteslice(0, index)
|
|
16
|
+
middle = other_string.byteslice(other_index, other_length)
|
|
17
|
+
suffix = byteslice((index + length)..-1)
|
|
18
|
+
new_content = prefix + middle + suffix
|
|
19
|
+
|
|
20
|
+
# Modify receiver in place
|
|
21
|
+
clear
|
|
22
|
+
self << new_content
|
|
23
|
+
|
|
24
|
+
self
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
String.prepend(StringBytespliceCompat)
|
|
29
|
+
end
|
|
30
|
+
|
|
3
31
|
module Cabriolet
|
|
4
32
|
module Decompressors
|
|
5
33
|
# Quantum handles Quantum-compressed data using arithmetic coding
|
|
@@ -8,59 +36,10 @@ module Cabriolet
|
|
|
8
36
|
# The Quantum method was created by David Stafford, adapted by Microsoft
|
|
9
37
|
# Corporation.
|
|
10
38
|
class Quantum < Base
|
|
11
|
-
|
|
12
|
-
FRAME_SIZE = 32_768
|
|
13
|
-
|
|
14
|
-
# Match constants
|
|
15
|
-
MAX_MATCH = 259
|
|
16
|
-
|
|
17
|
-
# Position slot tables (same as in qtmd.c)
|
|
18
|
-
POSITION_BASE = [
|
|
19
|
-
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
20
|
-
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
21
|
-
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
22
|
-
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
23
|
-
].freeze
|
|
24
|
-
|
|
25
|
-
EXTRA_BITS = [
|
|
26
|
-
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
27
|
-
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
28
|
-
17, 17, 18, 18, 19, 19
|
|
29
|
-
].freeze
|
|
30
|
-
|
|
31
|
-
LENGTH_BASE = [
|
|
32
|
-
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
33
|
-
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
34
|
-
].freeze
|
|
35
|
-
|
|
36
|
-
LENGTH_EXTRA = [
|
|
37
|
-
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
38
|
-
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
39
|
-
].freeze
|
|
39
|
+
include QuantumShared
|
|
40
40
|
|
|
41
41
|
attr_reader :window_bits, :window_size
|
|
42
42
|
|
|
43
|
-
# Represents a symbol in an arithmetic coding model
|
|
44
|
-
class ModelSymbol
|
|
45
|
-
attr_accessor :sym, :cumfreq
|
|
46
|
-
|
|
47
|
-
def initialize(sym, cumfreq)
|
|
48
|
-
@sym = sym
|
|
49
|
-
@cumfreq = cumfreq
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Represents an arithmetic coding model
|
|
54
|
-
class Model
|
|
55
|
-
attr_accessor :shiftsleft, :entries, :syms
|
|
56
|
-
|
|
57
|
-
def initialize(syms, entries)
|
|
58
|
-
@syms = syms
|
|
59
|
-
@entries = entries
|
|
60
|
-
@shiftsleft = 4
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
43
|
# Initialize Quantum decompressor
|
|
65
44
|
#
|
|
66
45
|
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
@@ -81,8 +60,13 @@ module Cabriolet
|
|
|
81
60
|
@window_bits = window_bits
|
|
82
61
|
@window_size = 1 << window_bits
|
|
83
62
|
|
|
84
|
-
# Initialize window
|
|
85
|
-
@window =
|
|
63
|
+
# Initialize window (mutable for Ruby < 3.2 bytesplice compatibility)
|
|
64
|
+
@window = if String.method_defined?(:bytesplice)
|
|
65
|
+
"\0" * @window_size
|
|
66
|
+
else
|
|
67
|
+
# In Ruby < 3.2, create mutable window using String.new
|
|
68
|
+
String.new("\0" * @window_size)
|
|
69
|
+
end
|
|
86
70
|
@window_posn = 0
|
|
87
71
|
@frame_todo = FRAME_SIZE
|
|
88
72
|
|
|
@@ -409,7 +393,52 @@ module Cabriolet
|
|
|
409
393
|
end
|
|
410
394
|
|
|
411
395
|
# Copy match from window
|
|
396
|
+
# Optimized to use bulk byte operations for better performance
|
|
412
397
|
def copy_match(offset, length)
|
|
398
|
+
# Use bulk copy for matches longer than 32 bytes
|
|
399
|
+
if length > 32
|
|
400
|
+
copy_match_bulk(offset, length)
|
|
401
|
+
else
|
|
402
|
+
copy_match_byte_by_byte(offset, length)
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Bulk copy using bytesplice for better performance on longer matches
|
|
407
|
+
def copy_match_bulk(offset, length)
|
|
408
|
+
if offset > @window_posn
|
|
409
|
+
# Match wraps around window
|
|
410
|
+
if offset > @window_size
|
|
411
|
+
raise DecompressionError,
|
|
412
|
+
"Match offset beyond window"
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Copy from end of window
|
|
416
|
+
src_pos = @window_size - (offset - @window_posn)
|
|
417
|
+
copy_len = offset - @window_posn
|
|
418
|
+
|
|
419
|
+
if copy_len < length
|
|
420
|
+
# Copy from end, then from beginning
|
|
421
|
+
@window.bytesplice(@window_posn, copy_len, @window, src_pos,
|
|
422
|
+
copy_len)
|
|
423
|
+
@window_posn += copy_len
|
|
424
|
+
remaining = length - copy_len
|
|
425
|
+
@window.bytesplice(@window_posn, remaining, @window, 0, remaining)
|
|
426
|
+
@window_posn += remaining
|
|
427
|
+
else
|
|
428
|
+
# Copy entirely from end
|
|
429
|
+
@window.bytesplice(@window_posn, length, @window, src_pos, length)
|
|
430
|
+
@window_posn += length
|
|
431
|
+
end
|
|
432
|
+
else
|
|
433
|
+
# Normal copy - use bytesplice for bulk operation
|
|
434
|
+
src_pos = @window_posn - offset
|
|
435
|
+
@window.bytesplice(@window_posn, length, @window, src_pos, length)
|
|
436
|
+
@window_posn += length
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Byte-by-byte copy for short matches (fallback)
|
|
441
|
+
def copy_match_byte_by_byte(offset, length)
|
|
413
442
|
if offset > @window_posn
|
|
414
443
|
# Match wraps around window
|
|
415
444
|
if offset > @window_size
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module Cabriolet
|
|
6
|
+
module Extraction
|
|
7
|
+
# BaseExtractor provides common extraction functionality for all extractors
|
|
8
|
+
# Reduces code duplication between SimpleExtractor and Parallel::Extractor
|
|
9
|
+
class BaseExtractor
|
|
10
|
+
# Initialize the base extractor
|
|
11
|
+
#
|
|
12
|
+
# @param output_dir [String] Directory to extract files to
|
|
13
|
+
# @param preserve_paths [Boolean] Whether to preserve directory structure
|
|
14
|
+
# @param overwrite [Boolean] Whether to overwrite existing files
|
|
15
|
+
def initialize(output_dir, preserve_paths: true, overwrite: false)
|
|
16
|
+
@output_dir = output_dir
|
|
17
|
+
@preserve_paths = preserve_paths
|
|
18
|
+
@overwrite = overwrite
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
# Build the output path for a file, handling path preservation and cleaning
|
|
24
|
+
#
|
|
25
|
+
# @param filename [String] Original filename from archive (may have backslashes)
|
|
26
|
+
# @return [String] Full output path for the file
|
|
27
|
+
def build_output_path(filename)
|
|
28
|
+
# Normalize path separators (Windows archives use backslashes)
|
|
29
|
+
clean_name = filename.gsub("\\", "/")
|
|
30
|
+
|
|
31
|
+
if @preserve_paths
|
|
32
|
+
# Keep directory structure
|
|
33
|
+
::File.join(@output_dir, clean_name)
|
|
34
|
+
else
|
|
35
|
+
# Flatten to output directory (just basename)
|
|
36
|
+
::File.join(@output_dir, ::File.basename(clean_name))
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Extract a single file to disk
|
|
41
|
+
#
|
|
42
|
+
# @param file [Object] File object from archive (must respond to :name and :data)
|
|
43
|
+
# @yield [path, data] Optional block for custom handling instead of default write
|
|
44
|
+
# @return [String, nil] Output path if successful, nil if skipped or failed
|
|
45
|
+
def extract_file(file)
|
|
46
|
+
output_path = build_output_path(file.name)
|
|
47
|
+
|
|
48
|
+
# Check if file exists and skip if not overwriting
|
|
49
|
+
if ::File.exist?(output_path) && !@overwrite
|
|
50
|
+
return nil
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Create parent directory
|
|
54
|
+
dir = ::File.dirname(output_path)
|
|
55
|
+
FileUtils.mkdir_p(dir) unless ::File.directory?(dir)
|
|
56
|
+
|
|
57
|
+
# Get file data
|
|
58
|
+
data = file.data
|
|
59
|
+
return nil unless data
|
|
60
|
+
|
|
61
|
+
# Write file data
|
|
62
|
+
::File.binwrite(output_path, data)
|
|
63
|
+
|
|
64
|
+
# Preserve file attributes if available
|
|
65
|
+
preserve_file_attributes(output_path, file)
|
|
66
|
+
|
|
67
|
+
output_path
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
warn "Failed to extract #{file.name}: #{e.message}"
|
|
70
|
+
nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Preserve file attributes (timestamps, etc.) if available on the file object
|
|
74
|
+
#
|
|
75
|
+
# @param path [String] Path to extracted file
|
|
76
|
+
# @param file [Object] File object from archive
|
|
77
|
+
def preserve_file_attributes(path, file)
|
|
78
|
+
# Try various timestamp attributes that different formats use
|
|
79
|
+
if file.respond_to?(:datetime) && file.datetime
|
|
80
|
+
::File.utime(::File.atime(path), file.datetime, path)
|
|
81
|
+
elsif file.respond_to?(:mtime) && file.mtime
|
|
82
|
+
atime = file.respond_to?(:atime) ? file.atime : ::File.atime(path)
|
|
83
|
+
::File.utime(atime, file.mtime, path)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|