cabriolet 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +3 -0
- data/lib/cabriolet/binary/bitstream.rb +32 -21
- data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
- data/lib/cabriolet/cab/compressor.rb +85 -53
- data/lib/cabriolet/cab/decompressor.rb +2 -1
- data/lib/cabriolet/cab/extractor.rb +2 -35
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/quantum.rb +3 -51
- data/lib/cabriolet/decompressors/quantum.rb +81 -52
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/lit/compressor.rb +45 -689
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +114 -3
- metadata +38 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "guid_generator"
|
|
4
|
+
require_relative "content_type_detector"
|
|
5
|
+
require_relative "directory_builder"
|
|
6
|
+
|
|
7
|
+
module Cabriolet
|
|
8
|
+
module LIT
|
|
9
|
+
# Builds complete LIT structure from file data
|
|
10
|
+
class StructureBuilder
|
|
11
|
+
attr_reader :io_system, :version, :language_id, :creator_id
|
|
12
|
+
|
|
13
|
+
# Initialize structure builder
|
|
14
|
+
#
|
|
15
|
+
# @param io_system [System::IOSystem] I/O system for file operations
|
|
16
|
+
# @param version [Integer] LIT format version
|
|
17
|
+
# @param language_id [Integer] Language ID
|
|
18
|
+
# @param creator_id [Integer] Creator ID
|
|
19
|
+
def initialize(io_system:, version: 1, language_id: 0x409, creator_id: 0)
|
|
20
|
+
@io_system = io_system
|
|
21
|
+
@version = version
|
|
22
|
+
@language_id = language_id
|
|
23
|
+
@creator_id = creator_id
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Build complete LIT structure from file data
|
|
27
|
+
#
|
|
28
|
+
# @param file_data [Array<Hash>] File data array from prepare_files
|
|
29
|
+
# @return [Hash] Complete LIT structure
|
|
30
|
+
def build(file_data)
|
|
31
|
+
structure = {}
|
|
32
|
+
|
|
33
|
+
# Generate GUIDs
|
|
34
|
+
structure[:header_guid] = GuidGenerator.generate
|
|
35
|
+
structure[:piece3_guid] = Binary::LITStructures::GUIDs::PIECE3
|
|
36
|
+
structure[:piece4_guid] = Binary::LITStructures::GUIDs::PIECE4
|
|
37
|
+
|
|
38
|
+
# Build directory
|
|
39
|
+
structure[:directory] = build_directory(file_data)
|
|
40
|
+
|
|
41
|
+
# Build sections
|
|
42
|
+
structure[:sections] = build_sections
|
|
43
|
+
|
|
44
|
+
# Build manifest
|
|
45
|
+
structure[:manifest] = build_manifest(file_data)
|
|
46
|
+
|
|
47
|
+
# Build secondary header metadata
|
|
48
|
+
structure[:secondary_header] = build_secondary_header_metadata
|
|
49
|
+
|
|
50
|
+
# Calculate piece offsets and sizes
|
|
51
|
+
structure[:pieces] = calculate_pieces(structure)
|
|
52
|
+
|
|
53
|
+
# Update secondary header with content offset
|
|
54
|
+
update_secondary_header_content_offset(structure)
|
|
55
|
+
|
|
56
|
+
# Store metadata
|
|
57
|
+
structure[:version] = @version
|
|
58
|
+
structure[:file_data] = file_data
|
|
59
|
+
|
|
60
|
+
structure
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
# Build directory structure from file data
|
|
66
|
+
#
|
|
67
|
+
# @param file_data [Array<Hash>] File data array
|
|
68
|
+
# @return [Hash] Directory structure
|
|
69
|
+
def build_directory(file_data)
|
|
70
|
+
builder = DirectoryBuilder.new
|
|
71
|
+
|
|
72
|
+
# Add entries for all files
|
|
73
|
+
section = 0
|
|
74
|
+
offset = 0
|
|
75
|
+
|
|
76
|
+
file_data.each do |file_info|
|
|
77
|
+
builder.add_entry(
|
|
78
|
+
name: file_info[:lit_path],
|
|
79
|
+
section: section,
|
|
80
|
+
offset: offset,
|
|
81
|
+
size: file_info[:uncompressed_size],
|
|
82
|
+
)
|
|
83
|
+
offset += file_info[:uncompressed_size]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Calculate NameList size
|
|
87
|
+
namelist_size = calculate_namelist_size
|
|
88
|
+
|
|
89
|
+
# Calculate manifest size
|
|
90
|
+
manifest_size = calculate_manifest_size(file_data)
|
|
91
|
+
|
|
92
|
+
# Add special entries for LIT structure
|
|
93
|
+
builder.add_entry(
|
|
94
|
+
name: Binary::LITStructures::Paths::NAMELIST,
|
|
95
|
+
section: 0,
|
|
96
|
+
offset: offset,
|
|
97
|
+
size: namelist_size,
|
|
98
|
+
)
|
|
99
|
+
offset += namelist_size
|
|
100
|
+
|
|
101
|
+
builder.add_entry(
|
|
102
|
+
name: Binary::LITStructures::Paths::MANIFEST,
|
|
103
|
+
section: 0,
|
|
104
|
+
offset: offset,
|
|
105
|
+
size: manifest_size,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
builder.build
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Build sections array
|
|
112
|
+
#
|
|
113
|
+
# @return [Array<Hash>] Sections array
|
|
114
|
+
def build_sections
|
|
115
|
+
# For simple implementation: single uncompressed section
|
|
116
|
+
[
|
|
117
|
+
{
|
|
118
|
+
name: "Uncompressed",
|
|
119
|
+
transforms: [],
|
|
120
|
+
compressed: false,
|
|
121
|
+
encrypted: false,
|
|
122
|
+
},
|
|
123
|
+
]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Build manifest from file data
|
|
127
|
+
#
|
|
128
|
+
# @param file_data [Array<Hash>] File data array
|
|
129
|
+
# @return [Hash] Manifest structure
|
|
130
|
+
def build_manifest(file_data)
|
|
131
|
+
mappings = []
|
|
132
|
+
|
|
133
|
+
file_data.each_with_index do |file_info, index|
|
|
134
|
+
mappings << {
|
|
135
|
+
offset: index,
|
|
136
|
+
internal_name: file_info[:lit_path],
|
|
137
|
+
original_name: file_info[:lit_path],
|
|
138
|
+
content_type: ContentTypeDetector.content_type(file_info[:lit_path]),
|
|
139
|
+
group: ContentTypeDetector.file_group(file_info[:lit_path]),
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
{ mappings: mappings }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Build secondary header metadata
|
|
147
|
+
#
|
|
148
|
+
# @return [Hash] Secondary header metadata
|
|
149
|
+
def build_secondary_header_metadata
|
|
150
|
+
# Calculate actual secondary header length
|
|
151
|
+
temp_header = Binary::LITStructures::SecondaryHeader.new
|
|
152
|
+
sec_hdr_length = temp_header.to_binary_s.bytesize
|
|
153
|
+
|
|
154
|
+
{
|
|
155
|
+
length: sec_hdr_length,
|
|
156
|
+
entry_chunklen: 0x2000, # 8KB chunks for entry directory
|
|
157
|
+
count_chunklen: 0x200, # 512B chunks for count directory
|
|
158
|
+
entry_unknown: 0x100000,
|
|
159
|
+
count_unknown: 0x20000,
|
|
160
|
+
entry_depth: 1, # No AOLI index layer
|
|
161
|
+
entry_entries: 0, # Will be set when directory built
|
|
162
|
+
count_entries: 0, # Will be set when directory built
|
|
163
|
+
content_offset: 0, # Will be calculated after pieces
|
|
164
|
+
timestamp: Time.now.to_i,
|
|
165
|
+
language_id: @language_id,
|
|
166
|
+
creator_id: @creator_id,
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Calculate piece offsets and sizes
|
|
171
|
+
#
|
|
172
|
+
# @param structure [Hash] Partial structure (needs secondary_header)
|
|
173
|
+
# @return [Array<Hash>] Pieces array
|
|
174
|
+
def calculate_pieces(structure)
|
|
175
|
+
pieces = []
|
|
176
|
+
|
|
177
|
+
# Calculate starting offset (after headers and pieces)
|
|
178
|
+
sec_hdr_length = structure[:secondary_header][:length]
|
|
179
|
+
current_offset = 40 + 80 + sec_hdr_length
|
|
180
|
+
|
|
181
|
+
# Piece 0: File size information (16 bytes)
|
|
182
|
+
pieces << { offset: current_offset, size: 16 }
|
|
183
|
+
current_offset += 16
|
|
184
|
+
|
|
185
|
+
# Piece 1: Directory (IFCM structure)
|
|
186
|
+
# Build DirectoryBuilder to calculate size
|
|
187
|
+
dir_builder = DirectoryBuilder.new(chunk_size: structure[:directory][:chunk_size])
|
|
188
|
+
structure[:directory][:entries].each do |entry|
|
|
189
|
+
dir_builder.add_entry(
|
|
190
|
+
name: entry[:name],
|
|
191
|
+
section: entry[:section],
|
|
192
|
+
offset: entry[:offset],
|
|
193
|
+
size: entry[:size],
|
|
194
|
+
)
|
|
195
|
+
end
|
|
196
|
+
piece1_size = dir_builder.calculate_size
|
|
197
|
+
pieces << { offset: current_offset, size: piece1_size }
|
|
198
|
+
current_offset += piece1_size
|
|
199
|
+
|
|
200
|
+
# Piece 2: Index information (typically empty or minimal)
|
|
201
|
+
piece2_size = 512
|
|
202
|
+
pieces << { offset: current_offset, size: piece2_size }
|
|
203
|
+
current_offset += piece2_size
|
|
204
|
+
|
|
205
|
+
# Piece 3: Standard GUID (16 bytes)
|
|
206
|
+
pieces << { offset: current_offset, size: 16 }
|
|
207
|
+
current_offset += 16
|
|
208
|
+
|
|
209
|
+
# Piece 4: Standard GUID (16 bytes)
|
|
210
|
+
pieces << { offset: current_offset, size: 16 }
|
|
211
|
+
|
|
212
|
+
pieces
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Update secondary header with final content offset
|
|
216
|
+
#
|
|
217
|
+
# @param structure [Hash] Structure to update
|
|
218
|
+
def update_secondary_header_content_offset(structure)
|
|
219
|
+
pieces = structure[:pieces]
|
|
220
|
+
last_piece = pieces.last
|
|
221
|
+
content_offset = last_piece[:offset] + last_piece[:size]
|
|
222
|
+
|
|
223
|
+
structure[:secondary_header][:content_offset] = content_offset
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Calculate NameList size (estimate)
|
|
227
|
+
#
|
|
228
|
+
# @return [Integer] Estimated size
|
|
229
|
+
def calculate_namelist_size
|
|
230
|
+
# Simple estimate: ~100 bytes for minimal NameList
|
|
231
|
+
100
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Calculate manifest size (estimate)
|
|
235
|
+
#
|
|
236
|
+
# @param file_data [Array<Hash>] File data array
|
|
237
|
+
# @return [Integer] Estimated size
|
|
238
|
+
def calculate_manifest_size(file_data)
|
|
239
|
+
# Rough estimate: directory header + entries
|
|
240
|
+
size = 10 # Directory header
|
|
241
|
+
|
|
242
|
+
file_data.each do |file_info|
|
|
243
|
+
# Per entry: offset (4) + 3 length bytes + names + content type + terminator
|
|
244
|
+
size += 4 + 3
|
|
245
|
+
size += (file_info[:lit_path].bytesize * 2) + 20 + 1
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
size
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
# Shared Quantum compression constants and models
|
|
5
|
+
# Used by both Compressors::Quantum and Decompressors::Quantum
|
|
6
|
+
module QuantumShared
|
|
7
|
+
# Frame size (32KB per frame)
|
|
8
|
+
FRAME_SIZE = 32_768
|
|
9
|
+
|
|
10
|
+
# Match constants
|
|
11
|
+
MIN_MATCH = 3
|
|
12
|
+
MAX_MATCH = 259
|
|
13
|
+
|
|
14
|
+
# Position slot tables (same as in qtmd.c)
|
|
15
|
+
POSITION_BASE = [
|
|
16
|
+
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
|
|
17
|
+
512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
|
|
18
|
+
24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
|
|
19
|
+
393_216, 524_288, 786_432, 1_048_576, 1_572_864
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
EXTRA_BITS = [
|
|
23
|
+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
|
24
|
+
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
|
25
|
+
17, 17, 18, 18, 19, 19
|
|
26
|
+
].freeze
|
|
27
|
+
|
|
28
|
+
LENGTH_BASE = [
|
|
29
|
+
0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
|
|
30
|
+
30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
|
|
31
|
+
].freeze
|
|
32
|
+
|
|
33
|
+
LENGTH_EXTRA = [
|
|
34
|
+
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
|
|
35
|
+
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# Represents a symbol in an arithmetic coding model
|
|
39
|
+
class ModelSymbol
|
|
40
|
+
attr_accessor :sym, :cumfreq
|
|
41
|
+
|
|
42
|
+
def initialize(sym, cumfreq)
|
|
43
|
+
@sym = sym
|
|
44
|
+
@cumfreq = cumfreq
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Represents an arithmetic coding model
|
|
49
|
+
class Model
|
|
50
|
+
attr_accessor :shiftsleft, :entries, :syms
|
|
51
|
+
|
|
52
|
+
def initialize(syms, entries)
|
|
53
|
+
@syms = syms
|
|
54
|
+
@entries = entries
|
|
55
|
+
@shiftsleft = 4
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Find position slot for a given offset
|
|
60
|
+
#
|
|
61
|
+
# @param offset [Integer] Position offset
|
|
62
|
+
# @return [Integer] Position slot index
|
|
63
|
+
def self.find_position_slot(offset)
|
|
64
|
+
return 0 if offset < 4
|
|
65
|
+
|
|
66
|
+
# Binary search through POSITION_BASE
|
|
67
|
+
low = 1
|
|
68
|
+
high = POSITION_BASE.size - 1
|
|
69
|
+
|
|
70
|
+
while low < high
|
|
71
|
+
mid = (low + high + 1) / 2
|
|
72
|
+
if POSITION_BASE[mid] <= offset
|
|
73
|
+
low = mid
|
|
74
|
+
else
|
|
75
|
+
high = mid - 1
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
low
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Find length slot for a given length
|
|
83
|
+
#
|
|
84
|
+
# @param length [Integer] Match length
|
|
85
|
+
# @return [Integer] Length slot index
|
|
86
|
+
def self.find_length_slot(length)
|
|
87
|
+
return 0 if length < 4
|
|
88
|
+
|
|
89
|
+
# Binary search through LENGTH_BASE
|
|
90
|
+
low = 1
|
|
91
|
+
high = LENGTH_BASE.size - 1
|
|
92
|
+
|
|
93
|
+
while low < high
|
|
94
|
+
mid = (low + high + 1) / 2
|
|
95
|
+
if LENGTH_BASE[mid] <= length
|
|
96
|
+
low = mid
|
|
97
|
+
else
|
|
98
|
+
high = mid - 1
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
low
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
data/lib/cabriolet/version.rb
CHANGED
data/lib/cabriolet.rb
CHANGED
|
@@ -26,6 +26,7 @@ require_relative "cabriolet/binary/oab_structures"
|
|
|
26
26
|
require_relative "cabriolet/file_entry"
|
|
27
27
|
require_relative "cabriolet/file_manager"
|
|
28
28
|
require_relative "cabriolet/base_compressor"
|
|
29
|
+
require_relative "cabriolet/checksum"
|
|
29
30
|
|
|
30
31
|
# Cabriolet is a pure Ruby library for extracting Microsoft Cabinet (.CAB) files,
|
|
31
32
|
# CHM (Compiled HTML Help) files, and related compression formats.
|
|
@@ -61,7 +62,9 @@ module Cabriolet
|
|
|
61
62
|
end
|
|
62
63
|
|
|
63
64
|
self.verbose = false
|
|
64
|
-
|
|
65
|
+
# Default buffer size of 64KB - better for modern systems
|
|
66
|
+
# Larger buffers reduce I/O syscall overhead significantly
|
|
67
|
+
self.default_buffer_size = 65_536
|
|
65
68
|
end
|
|
66
69
|
|
|
67
70
|
# Models
|
|
@@ -92,6 +95,8 @@ require_relative "cabriolet/algorithm_factory"
|
|
|
92
95
|
|
|
93
96
|
# Load core components
|
|
94
97
|
|
|
98
|
+
require_relative "cabriolet/quantum_shared"
|
|
99
|
+
|
|
95
100
|
require_relative "cabriolet/huffman/tree"
|
|
96
101
|
require_relative "cabriolet/huffman/decoder"
|
|
97
102
|
require_relative "cabriolet/huffman/encoder"
|
|
@@ -143,12 +148,118 @@ require_relative "cabriolet/oab/compressor"
|
|
|
143
148
|
|
|
144
149
|
# Load new advanced features
|
|
145
150
|
require_relative "cabriolet/format_detector"
|
|
146
|
-
require_relative "cabriolet/
|
|
151
|
+
require_relative "cabriolet/extraction/base_extractor"
|
|
152
|
+
require_relative "cabriolet/extraction/extractor"
|
|
147
153
|
require_relative "cabriolet/streaming"
|
|
148
154
|
require_relative "cabriolet/validator"
|
|
149
155
|
require_relative "cabriolet/repairer"
|
|
150
156
|
require_relative "cabriolet/modifier"
|
|
151
|
-
require_relative "cabriolet/parallel"
|
|
152
157
|
|
|
153
158
|
# Load CLI (optional, for command-line usage)
|
|
154
159
|
require_relative "cabriolet/cli"
|
|
160
|
+
|
|
161
|
+
# Convenience methods at top level
|
|
162
|
+
module Cabriolet
|
|
163
|
+
class << self
|
|
164
|
+
# Open and parse an archive with automatic format detection
|
|
165
|
+
#
|
|
166
|
+
# @param path [String] Path to the archive file
|
|
167
|
+
# @param options [Hash] Options to pass to the parser
|
|
168
|
+
# @return [Object] Parsed archive object
|
|
169
|
+
# @raise [UnsupportedFormatError] if format cannot be detected or is unsupported
|
|
170
|
+
#
|
|
171
|
+
# @example
|
|
172
|
+
# archive = Cabriolet.open('unknown.archive')
|
|
173
|
+
# archive.files.each { |f| puts f.name }
|
|
174
|
+
def open(path, **options)
|
|
175
|
+
parser_class = FormatDetector.parser_for(path)
|
|
176
|
+
|
|
177
|
+
unless parser_class
|
|
178
|
+
format = detect_format(path)
|
|
179
|
+
raise UnsupportedFormatError,
|
|
180
|
+
"Unable to detect format or no parser available for: #{path} (detected: #{format || 'unknown'})"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
parser_class.new(**options).parse(path)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Detect format of an archive file
|
|
187
|
+
#
|
|
188
|
+
# @param path [String] Path to the file
|
|
189
|
+
# @return [Symbol, nil] Detected format symbol or nil
|
|
190
|
+
#
|
|
191
|
+
# @example
|
|
192
|
+
# format = Cabriolet.detect_format('file.cab')
|
|
193
|
+
# # => :cab
|
|
194
|
+
def detect_format(path)
|
|
195
|
+
FormatDetector.detect(path)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Extract files from an archive with automatic format detection
|
|
199
|
+
#
|
|
200
|
+
# @param archive_path [String] Path to the archive
|
|
201
|
+
# @param output_dir [String] Directory to extract to
|
|
202
|
+
# @param options [Hash] Extraction options
|
|
203
|
+
# @option options [Integer] :workers (4) Number of parallel workers (1 = sequential)
|
|
204
|
+
# @option options [Boolean] :preserve_paths (true) Preserve directory structure
|
|
205
|
+
# @option options [Boolean] :overwrite (false) Overwrite existing files
|
|
206
|
+
# @return [Hash] Extraction statistics
|
|
207
|
+
#
|
|
208
|
+
# @example Sequential extraction
|
|
209
|
+
# Cabriolet.extract('archive.cab', 'output/')
|
|
210
|
+
#
|
|
211
|
+
# @example Parallel extraction with 8 workers
|
|
212
|
+
# stats = Cabriolet.extract('file.chm', 'docs/', workers: 8)
|
|
213
|
+
# puts "Extracted #{stats[:extracted]} files"
|
|
214
|
+
def extract(archive_path, output_dir, **options)
|
|
215
|
+
archive = open(archive_path)
|
|
216
|
+
extractor = Extraction::Extractor.new(archive, output_dir, **options)
|
|
217
|
+
extractor.extract_all
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Get information about an archive without full extraction
|
|
221
|
+
#
|
|
222
|
+
# @param path [String] Path to the archive
|
|
223
|
+
# @return [Hash] Archive information
|
|
224
|
+
#
|
|
225
|
+
# @example
|
|
226
|
+
# info = Cabriolet.info('archive.cab')
|
|
227
|
+
# # => { format: :cab, file_count: 145, total_size: 52428800, ... }
|
|
228
|
+
def info(path)
|
|
229
|
+
archive = open(path)
|
|
230
|
+
format = detect_format(path)
|
|
231
|
+
|
|
232
|
+
{
|
|
233
|
+
format: format,
|
|
234
|
+
path: path,
|
|
235
|
+
file_count: archive.files.count,
|
|
236
|
+
total_size: archive.files.sum { |f| f.size || 0 },
|
|
237
|
+
compressed_size: File.size(path),
|
|
238
|
+
compression_ratio: calculate_compression_ratio(archive, path),
|
|
239
|
+
files: archive.files.map { |f| file_info(f) },
|
|
240
|
+
}
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
private
|
|
244
|
+
|
|
245
|
+
def calculate_compression_ratio(archive, path)
|
|
246
|
+
total_uncompressed = archive.files.sum { |f| f.size || 0 }
|
|
247
|
+
compressed = File.size(path)
|
|
248
|
+
|
|
249
|
+
return 0 if total_uncompressed.zero?
|
|
250
|
+
|
|
251
|
+
((compressed.to_f / total_uncompressed) * 100).round(2)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def file_info(file)
|
|
255
|
+
{
|
|
256
|
+
name: file.name,
|
|
257
|
+
size: file.size,
|
|
258
|
+
compressed_size: file.respond_to?(:compressed_size) ? file.compressed_size : nil,
|
|
259
|
+
attributes: file.respond_to?(:attributes) ? file.attributes : nil,
|
|
260
|
+
date: file.respond_to?(:date) ? file.date : nil,
|
|
261
|
+
time: file.respond_to?(:time) ? file.time : nil,
|
|
262
|
+
}
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cabriolet
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-01-
|
|
11
|
+
date: 2026-01-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bindata
|
|
@@ -24,6 +24,20 @@ dependencies:
|
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '2.5'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: fractor
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0.1'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0.1'
|
|
27
41
|
- !ruby/object:Gem::Dependency
|
|
28
42
|
name: thor
|
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -54,7 +68,6 @@ files:
|
|
|
54
68
|
- exe/cabriolet
|
|
55
69
|
- lib/cabriolet.rb
|
|
56
70
|
- lib/cabriolet/algorithm_factory.rb
|
|
57
|
-
- lib/cabriolet/auto.rb
|
|
58
71
|
- lib/cabriolet/base_compressor.rb
|
|
59
72
|
- lib/cabriolet/binary/bitstream.rb
|
|
60
73
|
- lib/cabriolet/binary/bitstream_writer.rb
|
|
@@ -69,7 +82,10 @@ files:
|
|
|
69
82
|
- lib/cabriolet/cab/compressor.rb
|
|
70
83
|
- lib/cabriolet/cab/decompressor.rb
|
|
71
84
|
- lib/cabriolet/cab/extractor.rb
|
|
85
|
+
- lib/cabriolet/cab/file_compression_work.rb
|
|
86
|
+
- lib/cabriolet/cab/file_compression_worker.rb
|
|
72
87
|
- lib/cabriolet/cab/parser.rb
|
|
88
|
+
- lib/cabriolet/checksum.rb
|
|
73
89
|
- lib/cabriolet/chm/command_handler.rb
|
|
74
90
|
- lib/cabriolet/chm/compressor.rb
|
|
75
91
|
- lib/cabriolet/chm/decompressor.rb
|
|
@@ -78,6 +94,7 @@ files:
|
|
|
78
94
|
- lib/cabriolet/cli/base_command_handler.rb
|
|
79
95
|
- lib/cabriolet/cli/command_dispatcher.rb
|
|
80
96
|
- lib/cabriolet/cli/command_registry.rb
|
|
97
|
+
- lib/cabriolet/collections/file_collection.rb
|
|
81
98
|
- lib/cabriolet/compressors/base.rb
|
|
82
99
|
- lib/cabriolet/compressors/lzss.rb
|
|
83
100
|
- lib/cabriolet/compressors/lzx.rb
|
|
@@ -91,8 +108,13 @@ files:
|
|
|
91
108
|
- lib/cabriolet/decompressors/none.rb
|
|
92
109
|
- lib/cabriolet/decompressors/quantum.rb
|
|
93
110
|
- lib/cabriolet/errors.rb
|
|
111
|
+
- lib/cabriolet/extraction/base_extractor.rb
|
|
112
|
+
- lib/cabriolet/extraction/extractor.rb
|
|
113
|
+
- lib/cabriolet/extraction/file_extraction_work.rb
|
|
114
|
+
- lib/cabriolet/extraction/file_extraction_worker.rb
|
|
94
115
|
- lib/cabriolet/file_entry.rb
|
|
95
116
|
- lib/cabriolet/file_manager.rb
|
|
117
|
+
- lib/cabriolet/format_base.rb
|
|
96
118
|
- lib/cabriolet/format_detector.rb
|
|
97
119
|
- lib/cabriolet/hlp/command_handler.rb
|
|
98
120
|
- lib/cabriolet/hlp/compressor.rb
|
|
@@ -101,9 +123,14 @@ files:
|
|
|
101
123
|
- lib/cabriolet/hlp/quickhelp/compression_stream.rb
|
|
102
124
|
- lib/cabriolet/hlp/quickhelp/compressor.rb
|
|
103
125
|
- lib/cabriolet/hlp/quickhelp/decompressor.rb
|
|
126
|
+
- lib/cabriolet/hlp/quickhelp/file_writer.rb
|
|
104
127
|
- lib/cabriolet/hlp/quickhelp/huffman_stream.rb
|
|
105
128
|
- lib/cabriolet/hlp/quickhelp/huffman_tree.rb
|
|
129
|
+
- lib/cabriolet/hlp/quickhelp/offset_calculator.rb
|
|
106
130
|
- lib/cabriolet/hlp/quickhelp/parser.rb
|
|
131
|
+
- lib/cabriolet/hlp/quickhelp/structure_builder.rb
|
|
132
|
+
- lib/cabriolet/hlp/quickhelp/topic_builder.rb
|
|
133
|
+
- lib/cabriolet/hlp/quickhelp/topic_compressor.rb
|
|
107
134
|
- lib/cabriolet/hlp/winhelp/btree_builder.rb
|
|
108
135
|
- lib/cabriolet/hlp/winhelp/compressor.rb
|
|
109
136
|
- lib/cabriolet/hlp/winhelp/decompressor.rb
|
|
@@ -118,8 +145,15 @@ files:
|
|
|
118
145
|
- lib/cabriolet/kwaj/parser.rb
|
|
119
146
|
- lib/cabriolet/lit/command_handler.rb
|
|
120
147
|
- lib/cabriolet/lit/compressor.rb
|
|
148
|
+
- lib/cabriolet/lit/content_encoder.rb
|
|
149
|
+
- lib/cabriolet/lit/content_type_detector.rb
|
|
121
150
|
- lib/cabriolet/lit/decompressor.rb
|
|
151
|
+
- lib/cabriolet/lit/directory_builder.rb
|
|
152
|
+
- lib/cabriolet/lit/guid_generator.rb
|
|
153
|
+
- lib/cabriolet/lit/header_writer.rb
|
|
122
154
|
- lib/cabriolet/lit/parser.rb
|
|
155
|
+
- lib/cabriolet/lit/piece_builder.rb
|
|
156
|
+
- lib/cabriolet/lit/structure_builder.rb
|
|
123
157
|
- lib/cabriolet/models/cabinet.rb
|
|
124
158
|
- lib/cabriolet/models/chm_file.rb
|
|
125
159
|
- lib/cabriolet/models/chm_header.rb
|
|
@@ -139,11 +173,11 @@ files:
|
|
|
139
173
|
- lib/cabriolet/oab/compressor.rb
|
|
140
174
|
- lib/cabriolet/oab/decompressor.rb
|
|
141
175
|
- lib/cabriolet/offset_calculator.rb
|
|
142
|
-
- lib/cabriolet/parallel.rb
|
|
143
176
|
- lib/cabriolet/platform.rb
|
|
144
177
|
- lib/cabriolet/plugin.rb
|
|
145
178
|
- lib/cabriolet/plugin_manager.rb
|
|
146
179
|
- lib/cabriolet/plugin_validator.rb
|
|
180
|
+
- lib/cabriolet/quantum_shared.rb
|
|
147
181
|
- lib/cabriolet/repairer.rb
|
|
148
182
|
- lib/cabriolet/streaming.rb
|
|
149
183
|
- lib/cabriolet/system/file_handle.rb
|