cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module CAB
|
|
5
|
+
# Compressor creates CAB files from source files
|
|
6
|
+
# rubocop:disable Metrics/ClassLength
|
|
7
|
+
class Compressor
|
|
8
|
+
attr_reader :io_system, :files, :compression, :set_id, :cabinet_index
|
|
9
|
+
|
|
10
|
+
# Initialize a new compressor
|
|
11
|
+
#
|
|
12
|
+
# @param io_system [System::IOSystem] I/O system for writing
|
|
13
|
+
def initialize(io_system = nil)
|
|
14
|
+
@io_system = io_system || System::IOSystem.new
|
|
15
|
+
@files = []
|
|
16
|
+
@compression = :mszip
|
|
17
|
+
@set_id = rand(0xFFFF)
|
|
18
|
+
@cabinet_index = 0
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Add a file to the cabinet
|
|
22
|
+
#
|
|
23
|
+
# @param source_path [String] Path to source file
|
|
24
|
+
# @param cab_path [String] Path within cabinet (optional)
|
|
25
|
+
# @return [void]
|
|
26
|
+
def add_file(source_path, cab_path = nil)
|
|
27
|
+
unless ::File.exist?(source_path)
|
|
28
|
+
raise ArgumentError,
|
|
29
|
+
"File does not exist: #{source_path}"
|
|
30
|
+
end
|
|
31
|
+
unless ::File.file?(source_path)
|
|
32
|
+
raise ArgumentError,
|
|
33
|
+
"Not a file: #{source_path}"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@files << {
|
|
37
|
+
source: source_path,
|
|
38
|
+
cab_path: cab_path || ::File.basename(source_path),
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Generate the cabinet file
|
|
43
|
+
#
|
|
44
|
+
# @param output_file [String] Path to output CAB file
|
|
45
|
+
# @param options [Hash] Options
|
|
46
|
+
# @option options [Symbol] :compression Compression type (:none, :mszip, :lzx, :quantum)
|
|
47
|
+
# @option options [Integer] :set_id Cabinet set ID
|
|
48
|
+
# @option options [Integer] :cabinet_index Cabinet index in set
|
|
49
|
+
# @return [Integer] Bytes written
|
|
50
|
+
def generate(output_file, **options)
|
|
51
|
+
raise ArgumentError, "No files to compress" if @files.empty?
|
|
52
|
+
|
|
53
|
+
@compression = options[:compression] || @compression
|
|
54
|
+
@set_id = options[:set_id] || @set_id
|
|
55
|
+
@cabinet_index = options[:cabinet_index] || @cabinet_index
|
|
56
|
+
|
|
57
|
+
# Collect file information
|
|
58
|
+
file_infos = collect_file_infos
|
|
59
|
+
|
|
60
|
+
# Calculate offsets and sizes
|
|
61
|
+
offsets = calculate_offsets(file_infos)
|
|
62
|
+
|
|
63
|
+
# Compress files and collect data blocks
|
|
64
|
+
compressed_data = compress_files(file_infos)
|
|
65
|
+
|
|
66
|
+
# Write cabinet file
|
|
67
|
+
write_cabinet(output_file, file_infos, offsets, compressed_data)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
# Collect information about all files to be compressed
|
|
73
|
+
def collect_file_infos
|
|
74
|
+
@files.map do |file_entry|
|
|
75
|
+
source_path = file_entry[:source]
|
|
76
|
+
stat = ::File.stat(source_path)
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
source_path: source_path,
|
|
80
|
+
cab_path: file_entry[:cab_path],
|
|
81
|
+
size: stat.size,
|
|
82
|
+
mtime: stat.mtime,
|
|
83
|
+
attribs: calculate_attributes(stat),
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Calculate file attributes based on file stats
|
|
89
|
+
def calculate_attributes(stat)
|
|
90
|
+
attribs = Constants::ATTRIB_ARCH # Default to archived
|
|
91
|
+
|
|
92
|
+
# Read-only
|
|
93
|
+
attribs |= Constants::ATTRIB_READONLY unless stat.writable?
|
|
94
|
+
|
|
95
|
+
# Executable (Unix systems)
|
|
96
|
+
attribs |= Constants::ATTRIB_EXEC if stat.executable?
|
|
97
|
+
|
|
98
|
+
attribs
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Calculate all offsets in the cabinet file
|
|
102
|
+
def calculate_offsets(file_infos)
|
|
103
|
+
offset = Constants::CFHEADER_SIZE
|
|
104
|
+
num_folders = 1 # Single folder for now
|
|
105
|
+
file_infos.size
|
|
106
|
+
|
|
107
|
+
# Folder entries
|
|
108
|
+
folders_offset = offset
|
|
109
|
+
offset += Constants::CFFOLDER_SIZE * num_folders
|
|
110
|
+
|
|
111
|
+
# File entries
|
|
112
|
+
files_offset = offset
|
|
113
|
+
file_infos.each do |info|
|
|
114
|
+
offset += Constants::CFFILE_SIZE
|
|
115
|
+
offset += info[:cab_path].bytesize + 1 # null-terminated
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Data blocks
|
|
119
|
+
data_offset = offset
|
|
120
|
+
|
|
121
|
+
{
|
|
122
|
+
folders_offset: folders_offset,
|
|
123
|
+
files_offset: files_offset,
|
|
124
|
+
data_offset: data_offset,
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Compress all files and return block data
|
|
129
|
+
def compress_files(file_infos)
|
|
130
|
+
blocks = []
|
|
131
|
+
total_uncompressed = 0
|
|
132
|
+
|
|
133
|
+
file_infos.each do |info|
|
|
134
|
+
file_data = ::File.binread(info[:source_path])
|
|
135
|
+
total_uncompressed += file_data.bytesize
|
|
136
|
+
|
|
137
|
+
# Split into blocks of max 32KB
|
|
138
|
+
offset = 0
|
|
139
|
+
while offset < file_data.bytesize
|
|
140
|
+
chunk_size = [Constants::BLOCK_MAX, file_data.bytesize - offset].min
|
|
141
|
+
chunk = file_data[offset, chunk_size]
|
|
142
|
+
|
|
143
|
+
# Compress chunk
|
|
144
|
+
compressed_chunk = compress_chunk(chunk)
|
|
145
|
+
|
|
146
|
+
blocks << {
|
|
147
|
+
uncompressed_size: chunk.bytesize,
|
|
148
|
+
compressed_size: compressed_chunk.bytesize,
|
|
149
|
+
data: compressed_chunk,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
offset += chunk_size
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
{
|
|
157
|
+
blocks: blocks,
|
|
158
|
+
total_uncompressed: total_uncompressed,
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Compress a single chunk of data
|
|
163
|
+
def compress_chunk(data)
|
|
164
|
+
case @compression
|
|
165
|
+
when :none
|
|
166
|
+
data
|
|
167
|
+
when :mszip
|
|
168
|
+
compress_mszip(data)
|
|
169
|
+
when :lzx
|
|
170
|
+
compress_lzx(data)
|
|
171
|
+
when :quantum
|
|
172
|
+
compress_quantum(data)
|
|
173
|
+
else
|
|
174
|
+
raise ArgumentError, "Unsupported compression type: #{@compression}"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Compress data using MSZIP
|
|
179
|
+
def compress_mszip(data)
|
|
180
|
+
input = System::MemoryHandle.new(data, Constants::MODE_READ)
|
|
181
|
+
output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
182
|
+
|
|
183
|
+
compressor = Compressors::MSZIP.new(@io_system, input, output,
|
|
184
|
+
Cabriolet.default_buffer_size)
|
|
185
|
+
compressor.compress
|
|
186
|
+
|
|
187
|
+
output.data
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Compress data using LZX
|
|
191
|
+
def compress_lzx(data)
|
|
192
|
+
input = System::MemoryHandle.new(data, Constants::MODE_READ)
|
|
193
|
+
output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
194
|
+
|
|
195
|
+
compressor = Compressors::LZX.new(@io_system, input, output,
|
|
196
|
+
Cabriolet.default_buffer_size, window_bits: 15)
|
|
197
|
+
compressor.compress
|
|
198
|
+
|
|
199
|
+
output.data
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Compress data using Quantum
|
|
203
|
+
def compress_quantum(data)
|
|
204
|
+
input = System::MemoryHandle.new(data, Constants::MODE_READ)
|
|
205
|
+
output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
206
|
+
|
|
207
|
+
compressor = Compressors::Quantum.new(@io_system, input, output,
|
|
208
|
+
Cabriolet.default_buffer_size, window_bits: 10)
|
|
209
|
+
compressor.compress
|
|
210
|
+
|
|
211
|
+
output.data
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Write the complete cabinet file
|
|
215
|
+
def write_cabinet(output_file, file_infos, offsets, compressed_data)
|
|
216
|
+
handle = @io_system.open(output_file, Constants::MODE_WRITE)
|
|
217
|
+
|
|
218
|
+
begin
|
|
219
|
+
# Calculate total cabinet size
|
|
220
|
+
cabinet_size = offsets[:data_offset]
|
|
221
|
+
compressed_data[:blocks].each do |block|
|
|
222
|
+
cabinet_size += Constants::CFDATA_SIZE + block[:compressed_size]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Write CFHEADER
|
|
226
|
+
write_header(handle, file_infos.size, compressed_data[:blocks].size,
|
|
227
|
+
offsets[:files_offset], cabinet_size)
|
|
228
|
+
|
|
229
|
+
# Write CFFOLDER
|
|
230
|
+
write_folder(handle, compressed_data[:blocks].size,
|
|
231
|
+
offsets[:data_offset])
|
|
232
|
+
|
|
233
|
+
# Write CFFILE entries
|
|
234
|
+
folder_offset = 0
|
|
235
|
+
file_infos.each do |info|
|
|
236
|
+
write_file_entry(handle, info, folder_offset)
|
|
237
|
+
folder_offset += info[:size]
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Write CFDATA blocks
|
|
241
|
+
compressed_data[:blocks].each do |block|
|
|
242
|
+
write_data_block(handle, block)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
cabinet_size
|
|
246
|
+
ensure
|
|
247
|
+
@io_system.close(handle)
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Write CFHEADER
|
|
252
|
+
def write_header(handle, num_files, _num_blocks, files_offset,
|
|
253
|
+
cabinet_size)
|
|
254
|
+
header = Binary::CFHeader.new
|
|
255
|
+
header.signature = "MSCF"
|
|
256
|
+
header.reserved1 = 0
|
|
257
|
+
header.cabinet_size = cabinet_size
|
|
258
|
+
header.reserved2 = 0
|
|
259
|
+
header.files_offset = files_offset
|
|
260
|
+
header.reserved3 = 0
|
|
261
|
+
header.minor_version = 3
|
|
262
|
+
header.major_version = 1
|
|
263
|
+
header.num_folders = 1 # Single folder for now
|
|
264
|
+
header.num_files = num_files
|
|
265
|
+
header.flags = 0 # No reserved space, no prev/next cabinet
|
|
266
|
+
header.set_id = @set_id
|
|
267
|
+
header.cabinet_index = @cabinet_index
|
|
268
|
+
|
|
269
|
+
@io_system.write(handle, header.to_binary_s)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Write CFFOLDER
|
|
273
|
+
def write_folder(handle, num_blocks, data_offset)
|
|
274
|
+
folder = Binary::CFFolder.new
|
|
275
|
+
folder.data_offset = data_offset
|
|
276
|
+
folder.num_blocks = num_blocks
|
|
277
|
+
folder.comp_type = compression_type_value
|
|
278
|
+
|
|
279
|
+
@io_system.write(handle, folder.to_binary_s)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# Get compression type value
|
|
283
|
+
def compression_type_value
|
|
284
|
+
{
|
|
285
|
+
none: Constants::COMP_TYPE_NONE,
|
|
286
|
+
mszip: Constants::COMP_TYPE_MSZIP,
|
|
287
|
+
lzx: Constants::COMP_TYPE_LZX,
|
|
288
|
+
quantum: Constants::COMP_TYPE_QUANTUM,
|
|
289
|
+
}.fetch(@compression, Constants::COMP_TYPE_MSZIP)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Write CFFILE entry
|
|
293
|
+
def write_file_entry(handle, info, folder_offset)
|
|
294
|
+
file_entry = Binary::CFFile.new
|
|
295
|
+
file_entry.uncompressed_size = info[:size]
|
|
296
|
+
file_entry.folder_offset = folder_offset
|
|
297
|
+
file_entry.folder_index = 0 # Single folder
|
|
298
|
+
file_entry.date, file_entry.time = encode_datetime(info[:mtime])
|
|
299
|
+
file_entry.attribs = info[:attribs]
|
|
300
|
+
|
|
301
|
+
@io_system.write(handle, file_entry.to_binary_s)
|
|
302
|
+
@io_system.write(handle, info[:cab_path])
|
|
303
|
+
@io_system.write(handle, "\x00") # null terminator
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Encode Time object to CAB date/time format
|
|
307
|
+
def encode_datetime(time)
|
|
308
|
+
date_bits = (time.day & 0x1F) |
|
|
309
|
+
((time.month & 0x0F) << 5) |
|
|
310
|
+
(((time.year - 1980) & 0x7F) << 9)
|
|
311
|
+
|
|
312
|
+
time_bits = ((time.sec / 2) & 0x1F) |
|
|
313
|
+
((time.min & 0x3F) << 5) |
|
|
314
|
+
((time.hour & 0x1F) << 11)
|
|
315
|
+
|
|
316
|
+
[date_bits, time_bits]
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Write CFDATA block
|
|
320
|
+
def write_data_block(handle, block)
|
|
321
|
+
# Calculate checksum
|
|
322
|
+
checksum = calculate_checksum(block[:data])
|
|
323
|
+
|
|
324
|
+
# Create CFDATA header
|
|
325
|
+
cfdata = Binary::CFData.new
|
|
326
|
+
cfdata.checksum = checksum
|
|
327
|
+
cfdata.compressed_size = block[:compressed_size]
|
|
328
|
+
cfdata.uncompressed_size = block[:uncompressed_size]
|
|
329
|
+
|
|
330
|
+
# Add header checksum
|
|
331
|
+
header_data = cfdata.to_binary_s
|
|
332
|
+
cfdata.checksum = calculate_checksum(header_data[4, 4], checksum)
|
|
333
|
+
|
|
334
|
+
@io_system.write(handle, cfdata.to_binary_s)
|
|
335
|
+
@io_system.write(handle, block[:data])
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Calculate checksum for data
|
|
339
|
+
# Same algorithm as used in Extractor
|
|
340
|
+
# rubocop:disable Metrics/MethodLength
|
|
341
|
+
def calculate_checksum(data, initial = 0)
|
|
342
|
+
cksum = initial
|
|
343
|
+
bytes = data.bytes
|
|
344
|
+
|
|
345
|
+
# Process 4-byte chunks
|
|
346
|
+
(bytes.size / 4).times do |i|
|
|
347
|
+
offset = i * 4
|
|
348
|
+
value = bytes[offset] |
|
|
349
|
+
(bytes[offset + 1] << 8) |
|
|
350
|
+
(bytes[offset + 2] << 16) |
|
|
351
|
+
(bytes[offset + 3] << 24)
|
|
352
|
+
cksum ^= value
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Process remaining bytes
|
|
356
|
+
remainder = bytes.size % 4
|
|
357
|
+
if remainder.positive?
|
|
358
|
+
ul = 0
|
|
359
|
+
offset = bytes.size - remainder
|
|
360
|
+
|
|
361
|
+
case remainder
|
|
362
|
+
when 3
|
|
363
|
+
ul |= bytes[offset + 2] << 16
|
|
364
|
+
ul |= bytes[offset + 1] << 8
|
|
365
|
+
ul |= bytes[offset]
|
|
366
|
+
when 2
|
|
367
|
+
ul |= bytes[offset + 1] << 8
|
|
368
|
+
ul |= bytes[offset]
|
|
369
|
+
when 1
|
|
370
|
+
ul |= bytes[offset]
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
cksum ^= ul
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
cksum & 0xFFFFFFFF
|
|
377
|
+
end
|
|
378
|
+
# rubocop:enable Metrics/MethodLength
|
|
379
|
+
end
|
|
380
|
+
# rubocop:enable Metrics/ClassLength
|
|
381
|
+
end
|
|
382
|
+
end
|