cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../binary/bitstream_writer"
|
|
4
|
+
require_relative "../huffman/encoder"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
module Compressors
|
|
8
|
+
# MSZIP handles MSZIP (DEFLATE) compression
|
|
9
|
+
# Based on RFC 1951 and libmspack implementation
|
|
10
|
+
class MSZIP < Base
|
|
11
|
+
# MSZIP frame size (32KB sliding window)
|
|
12
|
+
FRAME_SIZE = 32_768
|
|
13
|
+
|
|
14
|
+
# MSZIP signature bytes
|
|
15
|
+
SIGNATURE = [0x43, 0x4B].freeze # 'CK'
|
|
16
|
+
|
|
17
|
+
# Block types
|
|
18
|
+
STORED_BLOCK = 0
|
|
19
|
+
FIXED_HUFFMAN_BLOCK = 1
|
|
20
|
+
DYNAMIC_HUFFMAN_BLOCK = 2
|
|
21
|
+
|
|
22
|
+
# Match length constants
|
|
23
|
+
MIN_MATCH = 3
|
|
24
|
+
MAX_MATCH = 258
|
|
25
|
+
|
|
26
|
+
# Window size for LZ77
|
|
27
|
+
WINDOW_SIZE = 32_768
|
|
28
|
+
|
|
29
|
+
# Match lengths for literal codes 257-285
|
|
30
|
+
LIT_LENGTHS = [
|
|
31
|
+
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
|
|
32
|
+
31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
# Match offsets for distance codes 0-29
|
|
36
|
+
DIST_OFFSETS = [
|
|
37
|
+
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385,
|
|
38
|
+
513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
# Extra bits for literal codes 257-285
|
|
42
|
+
LIT_EXTRABITS = [
|
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
|
|
44
|
+
2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
47
|
+
# Extra bits for distance codes 0-29
|
|
48
|
+
DIST_EXTRABITS = [
|
|
49
|
+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
|
|
50
|
+
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
|
|
51
|
+
].freeze
|
|
52
|
+
|
|
53
|
+
# Initialize MSZIP compressor
|
|
54
|
+
#
|
|
55
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
56
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
57
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
58
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
59
|
+
def initialize(io_system, input, output, buffer_size)
|
|
60
|
+
super
|
|
61
|
+
|
|
62
|
+
# Initialize bitstream writer
|
|
63
|
+
@bitstream = Binary::BitstreamWriter.new(io_system, output, buffer_size)
|
|
64
|
+
|
|
65
|
+
# Build fixed Huffman codes
|
|
66
|
+
@fixed_codes = Huffman::Encoder.build_fixed_codes
|
|
67
|
+
|
|
68
|
+
# Initialize sliding window for LZ77
|
|
69
|
+
@window = "\0" * WINDOW_SIZE
|
|
70
|
+
@window_pos = 0
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Compress input data using MSZIP (DEFLATE) algorithm
|
|
74
|
+
#
|
|
75
|
+
# @return [Integer] Number of bytes written
|
|
76
|
+
def compress
|
|
77
|
+
input_data = read_all_input
|
|
78
|
+
total_written = 0
|
|
79
|
+
pos = 0
|
|
80
|
+
|
|
81
|
+
# Handle empty input - still need to write a block
|
|
82
|
+
if input_data.empty?
|
|
83
|
+
write_signature
|
|
84
|
+
compress_block("", true)
|
|
85
|
+
@bitstream.flush
|
|
86
|
+
return 0
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Process data in FRAME_SIZE chunks
|
|
90
|
+
# Each frame is independent and contains blocks ending with last_block=1
|
|
91
|
+
while pos < input_data.bytesize
|
|
92
|
+
chunk_size = [FRAME_SIZE, input_data.bytesize - pos].min
|
|
93
|
+
chunk = input_data[pos, chunk_size]
|
|
94
|
+
|
|
95
|
+
# Write CK signature
|
|
96
|
+
write_signature
|
|
97
|
+
|
|
98
|
+
# Compress block with fixed Huffman
|
|
99
|
+
# Each frame's block is always marked as last within that frame
|
|
100
|
+
compress_block(chunk, true)
|
|
101
|
+
|
|
102
|
+
pos += chunk_size
|
|
103
|
+
total_written += chunk_size
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Flush any remaining bits
|
|
107
|
+
@bitstream.flush
|
|
108
|
+
|
|
109
|
+
total_written
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
private
|
|
113
|
+
|
|
114
|
+
# Read all input data into memory
|
|
115
|
+
#
|
|
116
|
+
# @return [String] All input data
|
|
117
|
+
def read_all_input
|
|
118
|
+
data = +""
|
|
119
|
+
loop do
|
|
120
|
+
chunk = @io_system.read(@input, @buffer_size)
|
|
121
|
+
break if chunk.empty?
|
|
122
|
+
|
|
123
|
+
data << chunk
|
|
124
|
+
end
|
|
125
|
+
data
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Write MSZIP signature (CK)
|
|
129
|
+
#
|
|
130
|
+
# @return [void]
|
|
131
|
+
def write_signature
|
|
132
|
+
@bitstream.byte_align
|
|
133
|
+
SIGNATURE.each { |byte| @bitstream.write_raw_byte(byte) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Compress a single block using fixed Huffman encoding
|
|
137
|
+
#
|
|
138
|
+
# @param data [String] Data to compress
|
|
139
|
+
# @param is_last [Boolean] Whether this is the last block
|
|
140
|
+
# @return [void]
|
|
141
|
+
def compress_block(data, is_last)
|
|
142
|
+
# Write block header
|
|
143
|
+
@bitstream.write_bits(is_last ? 1 : 0, 1) # Last block flag
|
|
144
|
+
@bitstream.write_bits(FIXED_HUFFMAN_BLOCK, 2) # Block type
|
|
145
|
+
|
|
146
|
+
# Reset window position for this block
|
|
147
|
+
@window_pos = 0
|
|
148
|
+
|
|
149
|
+
# Encode data using LZ77 and Huffman
|
|
150
|
+
encode_data(data)
|
|
151
|
+
|
|
152
|
+
# Write end-of-block symbol (256)
|
|
153
|
+
encode_literal(256)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Encode data using LZ77 matching and Huffman encoding
|
|
157
|
+
#
|
|
158
|
+
# @param data [String] Data to encode
|
|
159
|
+
# @return [void]
|
|
160
|
+
def encode_data(data)
|
|
161
|
+
pos = 0
|
|
162
|
+
|
|
163
|
+
while pos < data.bytesize
|
|
164
|
+
# Try to find a match in the window
|
|
165
|
+
match = find_match(data, pos)
|
|
166
|
+
|
|
167
|
+
if match && match[:length] >= MIN_MATCH
|
|
168
|
+
# Encode as length/distance pair
|
|
169
|
+
encode_match(match[:length], match[:distance])
|
|
170
|
+
|
|
171
|
+
# Add matched bytes to window
|
|
172
|
+
match[:length].times do
|
|
173
|
+
add_to_window(data.getbyte(pos))
|
|
174
|
+
pos += 1
|
|
175
|
+
end
|
|
176
|
+
else
|
|
177
|
+
# Encode as literal
|
|
178
|
+
byte = data.getbyte(pos)
|
|
179
|
+
encode_literal(byte)
|
|
180
|
+
add_to_window(byte)
|
|
181
|
+
pos += 1
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Find the longest match in the sliding window
|
|
187
|
+
#
|
|
188
|
+
# @param data [String] Input data
|
|
189
|
+
# @param pos [Integer] Current position in data
|
|
190
|
+
# @return [Hash, nil] Match info with :length and :distance, or nil
|
|
191
|
+
def find_match(data, pos)
|
|
192
|
+
return nil if pos >= data.bytesize
|
|
193
|
+
|
|
194
|
+
best_match = nil
|
|
195
|
+
max_length = [MAX_MATCH, data.bytesize - pos].min
|
|
196
|
+
|
|
197
|
+
# Don't search if we can't get MIN_MATCH
|
|
198
|
+
return nil if max_length < MIN_MATCH
|
|
199
|
+
|
|
200
|
+
# Search window for matches (simple greedy search)
|
|
201
|
+
# Start from most recent positions for better compression
|
|
202
|
+
search_start = [@window_pos - WINDOW_SIZE, 0].max
|
|
203
|
+
search_end = @window_pos
|
|
204
|
+
|
|
205
|
+
(search_start...search_end).each do |win_pos|
|
|
206
|
+
length = 0
|
|
207
|
+
|
|
208
|
+
# Count matching bytes
|
|
209
|
+
while length < max_length &&
|
|
210
|
+
data.getbyte(pos + length) == @window.getbyte(win_pos + length)
|
|
211
|
+
length += 1
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Update best match if this is longer
|
|
215
|
+
next unless length >= MIN_MATCH && (best_match.nil? || length > best_match[:length])
|
|
216
|
+
|
|
217
|
+
distance = @window_pos - win_pos
|
|
218
|
+
best_match = { length: length, distance: distance }
|
|
219
|
+
|
|
220
|
+
# Stop if we found maximum match
|
|
221
|
+
break if length == MAX_MATCH
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
best_match
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Add byte to sliding window
|
|
228
|
+
#
|
|
229
|
+
# @param byte [Integer] Byte to add
|
|
230
|
+
# @return [void]
|
|
231
|
+
def add_to_window(byte)
|
|
232
|
+
@window.setbyte(@window_pos % WINDOW_SIZE, byte)
|
|
233
|
+
@window_pos += 1
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Encode a literal byte using fixed Huffman codes
|
|
237
|
+
#
|
|
238
|
+
# @param byte [Integer] Byte value (0-255) or end-of-block (256)
|
|
239
|
+
# @return [void]
|
|
240
|
+
def encode_literal(byte)
|
|
241
|
+
Huffman::Encoder.encode_symbol(byte, @fixed_codes[:literal], @bitstream)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Encode a match as length/distance pair
|
|
245
|
+
#
|
|
246
|
+
# @param length [Integer] Match length (3-258)
|
|
247
|
+
# @param distance [Integer] Match distance (1-32768)
|
|
248
|
+
# @return [void]
|
|
249
|
+
def encode_match(length, distance)
|
|
250
|
+
# Encode length
|
|
251
|
+
length_code, extra_bits, extra_value = encode_length(length)
|
|
252
|
+
Huffman::Encoder.encode_symbol(length_code, @fixed_codes[:literal],
|
|
253
|
+
@bitstream)
|
|
254
|
+
@bitstream.write_bits(extra_value, extra_bits) if extra_bits.positive?
|
|
255
|
+
|
|
256
|
+
# Encode distance
|
|
257
|
+
dist_code, extra_bits, extra_value = encode_distance(distance)
|
|
258
|
+
Huffman::Encoder.encode_symbol(dist_code, @fixed_codes[:distance],
|
|
259
|
+
@bitstream)
|
|
260
|
+
@bitstream.write_bits(extra_value, extra_bits) if extra_bits.positive?
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Encode length into length code and extra bits
|
|
264
|
+
#
|
|
265
|
+
# @param length [Integer] Match length (3-258)
|
|
266
|
+
# @return [Array<Integer>] [code, extra_bits, extra_value]
|
|
267
|
+
def encode_length(length)
|
|
268
|
+
# Handle edge case for length 258 (max length)
|
|
269
|
+
return [285, 0, 0] if length == 258
|
|
270
|
+
|
|
271
|
+
# Find the appropriate length code
|
|
272
|
+
LIT_LENGTHS.each_with_index do |base_length, index|
|
|
273
|
+
next if index >= 29 # Only codes 0-28 are valid
|
|
274
|
+
|
|
275
|
+
extra_bits = LIT_EXTRABITS[index]
|
|
276
|
+
max_length = if index == 28
|
|
277
|
+
258 # Last code handles length 258
|
|
278
|
+
else
|
|
279
|
+
base_length + (1 << extra_bits) - 1
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
next unless length.between?(base_length, max_length)
|
|
283
|
+
|
|
284
|
+
code = 257 + index
|
|
285
|
+
extra_value = length - base_length
|
|
286
|
+
return [code, extra_bits, extra_value]
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Should not reach here
|
|
290
|
+
raise Errors::CompressionError, "Invalid length: #{length}"
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Encode distance into distance code and extra bits
|
|
294
|
+
#
|
|
295
|
+
# @param distance [Integer] Match distance (1-32768)
|
|
296
|
+
# @return [Array<Integer>] [code, extra_bits, extra_value]
|
|
297
|
+
def encode_distance(distance)
|
|
298
|
+
# Find the appropriate distance code (only 0-29 are valid)
|
|
299
|
+
(0...30).each do |code|
|
|
300
|
+
base_offset = DIST_OFFSETS[code]
|
|
301
|
+
extra_bits = DIST_EXTRABITS[code]
|
|
302
|
+
max_offset = base_offset + (1 << extra_bits) - 1
|
|
303
|
+
|
|
304
|
+
if distance.between?(base_offset, max_offset)
|
|
305
|
+
extra_value = distance - base_offset
|
|
306
|
+
return [code, extra_bits, extra_value]
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Should not reach here
|
|
311
|
+
raise Errors::CompressionError, "Invalid distance: #{distance}"
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|