cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module WinHelp
|
|
6
|
+
# Zeck LZ77 decompressor for Windows Help files
|
|
7
|
+
#
|
|
8
|
+
# Implements the Zeck LZ77 compression algorithm used in WinHelp files.
|
|
9
|
+
# This is a variant of LZ77 with specific encoding:
|
|
10
|
+
# - 4KB sliding window (4096 bytes)
|
|
11
|
+
# - Minimum match: 3 bytes
|
|
12
|
+
# - Maximum match: 271 bytes
|
|
13
|
+
# - Flag-based token control (8 tokens per flag byte)
|
|
14
|
+
#
|
|
15
|
+
# Encoding:
|
|
16
|
+
# - Flag byte: 8 bits controlling next 8 tokens
|
|
17
|
+
# - Bit = 0: Literal byte follows
|
|
18
|
+
# - Bit = 1: Match follows (2-3 bytes)
|
|
19
|
+
# - Match format:
|
|
20
|
+
# - Byte 1: OOOO LLLL (O=offset high 4 bits, L=length 0-15)
|
|
21
|
+
# - Byte 2: OOOO OOOO (O=offset low 8 bits)
|
|
22
|
+
# - Byte 3 (if L=15): Extra length byte (0-252, add 19)
|
|
23
|
+
#
|
|
24
|
+
# Match decoding:
|
|
25
|
+
# - Offset: 12 bits (0-4095)
|
|
26
|
+
# - Length 3-18: 4 bits (0-15, add 3)
|
|
27
|
+
# - Length 19-271: Extra byte (0-252, add 19)
|
|
28
|
+
class ZeckLZ77
|
|
29
|
+
# Window size for LZ77 compression
|
|
30
|
+
WINDOW_SIZE = 4096
|
|
31
|
+
|
|
32
|
+
# Minimum match length
|
|
33
|
+
MIN_MATCH = 3
|
|
34
|
+
|
|
35
|
+
# Maximum match length without extra byte
|
|
36
|
+
MAX_SHORT_MATCH = 18
|
|
37
|
+
|
|
38
|
+
# Maximum match length with extra byte
|
|
39
|
+
MAX_LONG_MATCH = 271
|
|
40
|
+
|
|
41
|
+
# Initialize decompressor
|
|
42
|
+
def initialize
|
|
43
|
+
@window = String.new(capacity: WINDOW_SIZE)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Compress data using Zeck LZ77
|
|
47
|
+
#
|
|
48
|
+
# @param input [String] Uncompressed data
|
|
49
|
+
# @return [String] Compressed data
|
|
50
|
+
def compress(input)
|
|
51
|
+
output = +""
|
|
52
|
+
pos = 0
|
|
53
|
+
@window.clear
|
|
54
|
+
|
|
55
|
+
while pos < input.bytesize
|
|
56
|
+
# Collect up to 8 tokens for this flag byte
|
|
57
|
+
tokens = []
|
|
58
|
+
flag = 0
|
|
59
|
+
|
|
60
|
+
8.times do |bit|
|
|
61
|
+
break if pos >= input.bytesize
|
|
62
|
+
|
|
63
|
+
# Try to find a match
|
|
64
|
+
match = find_best_match(input, pos)
|
|
65
|
+
|
|
66
|
+
if match && match[:length] >= MIN_MATCH
|
|
67
|
+
# Encode match
|
|
68
|
+
tokens << encode_match(match)
|
|
69
|
+
flag |= (1 << bit) # Set flag bit for match
|
|
70
|
+
|
|
71
|
+
# Add matched bytes to window
|
|
72
|
+
match[:length].times do
|
|
73
|
+
add_to_window(input.getbyte(pos))
|
|
74
|
+
pos += 1
|
|
75
|
+
end
|
|
76
|
+
else
|
|
77
|
+
# Encode literal
|
|
78
|
+
byte = input.getbyte(pos)
|
|
79
|
+
tokens << [byte].pack("C")
|
|
80
|
+
add_to_window(byte)
|
|
81
|
+
pos += 1
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Write flag byte followed by tokens
|
|
86
|
+
output << [flag].pack("C")
|
|
87
|
+
tokens.each { |token| output << token }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
output
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Find best match in sliding window
|
|
94
|
+
#
|
|
95
|
+
# @param input [String] Input data
|
|
96
|
+
# @param pos [Integer] Current position
|
|
97
|
+
# @return [Hash, nil] Match info or nil
|
|
98
|
+
def find_best_match(input, pos)
|
|
99
|
+
return nil if @window.empty?
|
|
100
|
+
|
|
101
|
+
best_match = nil
|
|
102
|
+
best_length = 0
|
|
103
|
+
|
|
104
|
+
# Search window for matches
|
|
105
|
+
window_size = @window.bytesize
|
|
106
|
+
max_offset = [window_size, WINDOW_SIZE].min
|
|
107
|
+
|
|
108
|
+
# Start from most recent bytes (end of window)
|
|
109
|
+
(1..max_offset).each do |offset|
|
|
110
|
+
window_pos = window_size - offset
|
|
111
|
+
match_length = 0
|
|
112
|
+
|
|
113
|
+
# Count matching bytes
|
|
114
|
+
while match_length < MAX_LONG_MATCH &&
|
|
115
|
+
(pos + match_length) < input.bytesize &&
|
|
116
|
+
(window_pos + match_length) < window_size
|
|
117
|
+
|
|
118
|
+
if @window.getbyte(window_pos + match_length) == input.getbyte(pos + match_length)
|
|
119
|
+
match_length += 1
|
|
120
|
+
else
|
|
121
|
+
break
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Update best match if this is better
|
|
126
|
+
if match_length >= MIN_MATCH && match_length > best_length
|
|
127
|
+
best_length = match_length
|
|
128
|
+
best_match = {
|
|
129
|
+
offset: offset,
|
|
130
|
+
length: match_length,
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
best_match
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Encode a match into bytes
|
|
139
|
+
#
|
|
140
|
+
# @param match [Hash] Match with :offset and :length
|
|
141
|
+
# @return [String] Encoded match (2-3 bytes)
|
|
142
|
+
def encode_match(match)
|
|
143
|
+
offset = match[:offset]
|
|
144
|
+
length = match[:length]
|
|
145
|
+
|
|
146
|
+
# Calculate encoded length
|
|
147
|
+
encoded_length = length - MIN_MATCH
|
|
148
|
+
|
|
149
|
+
if length <= MAX_SHORT_MATCH
|
|
150
|
+
# Short match: 2 bytes
|
|
151
|
+
# Byte 1: OOOO LLLL (high offset 4 bits, length 0-15)
|
|
152
|
+
# Byte 2: OOOO OOOO (low offset 8 bits)
|
|
153
|
+
byte1 = ((offset >> 4) & 0xF0) | (encoded_length & 0x0F)
|
|
154
|
+
byte2 = offset & 0xFF
|
|
155
|
+
[byte1, byte2].pack("CC")
|
|
156
|
+
else
|
|
157
|
+
# Long match: 3 bytes (length > 18, needs extra byte)
|
|
158
|
+
# Byte 1: OOOO 1111 (high offset, length = 15)
|
|
159
|
+
# Byte 2: OOOO OOOO (low offset)
|
|
160
|
+
# Byte 3: Extra length (length - 19)
|
|
161
|
+
byte1 = ((offset >> 4) & 0xF0) | 0x0F
|
|
162
|
+
byte2 = offset & 0xFF
|
|
163
|
+
byte3 = length - 19
|
|
164
|
+
[byte1, byte2, byte3].pack("CCC")
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Decompress Zeck LZ77 compressed data
|
|
169
|
+
#
|
|
170
|
+
# @param input [String] Compressed data
|
|
171
|
+
# @param output_size [Integer] Expected decompressed size
|
|
172
|
+
# @return [String] Decompressed data
|
|
173
|
+
# @raise [Cabriolet::DecompressionError] if decompression fails
|
|
174
|
+
def decompress(input, output_size)
|
|
175
|
+
output = String.new(capacity: output_size)
|
|
176
|
+
input_pos = 0
|
|
177
|
+
@window.clear
|
|
178
|
+
|
|
179
|
+
while output.bytesize < output_size && input_pos < input.bytesize
|
|
180
|
+
# Read flag byte
|
|
181
|
+
flags = input.getbyte(input_pos)
|
|
182
|
+
input_pos += 1
|
|
183
|
+
break if input_pos > input.bytesize
|
|
184
|
+
|
|
185
|
+
# Process 8 tokens controlled by flag byte
|
|
186
|
+
8.times do |bit|
|
|
187
|
+
break if output.bytesize >= output_size
|
|
188
|
+
|
|
189
|
+
if flags.nobits?(1 << bit)
|
|
190
|
+
# Bit = 0: Literal byte
|
|
191
|
+
byte = input.getbyte(input_pos)
|
|
192
|
+
return output if byte.nil? # End of input
|
|
193
|
+
|
|
194
|
+
input_pos += 1
|
|
195
|
+
output << byte.chr
|
|
196
|
+
add_to_window(byte)
|
|
197
|
+
else
|
|
198
|
+
# Bit = 1: Match (2-3 bytes)
|
|
199
|
+
break if input_pos + 1 >= input.bytesize
|
|
200
|
+
|
|
201
|
+
# Read match bytes
|
|
202
|
+
byte1 = input.getbyte(input_pos)
|
|
203
|
+
byte2 = input.getbyte(input_pos + 1)
|
|
204
|
+
input_pos += 2
|
|
205
|
+
|
|
206
|
+
# Decode offset and length
|
|
207
|
+
# byte1: OOOO LLLL (high offset and length)
|
|
208
|
+
# byte2: OOOO OOOO (low offset)
|
|
209
|
+
offset = ((byte1 & 0xF0) << 4) | byte2
|
|
210
|
+
length = (byte1 & 0x0F) + MIN_MATCH
|
|
211
|
+
|
|
212
|
+
# If length is max short match, check for extra length byte
|
|
213
|
+
if length == (15 + MIN_MATCH) && input_pos < input.bytesize
|
|
214
|
+
extra = input.getbyte(input_pos)
|
|
215
|
+
input_pos += 1
|
|
216
|
+
length = extra + 19 # Length 19-271
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Copy from window
|
|
220
|
+
copy_match(output, offset, length)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
output
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
private
|
|
229
|
+
|
|
230
|
+
# Add byte to sliding window
|
|
231
|
+
#
|
|
232
|
+
# @param byte [Integer] Byte to add
|
|
233
|
+
def add_to_window(byte)
|
|
234
|
+
@window << byte.chr
|
|
235
|
+
@window = @window[-WINDOW_SIZE..] if @window.bytesize > WINDOW_SIZE
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Copy match from window
|
|
239
|
+
#
|
|
240
|
+
# @param output [String] Output buffer
|
|
241
|
+
# @param offset [Integer] Offset in window (0-4095)
|
|
242
|
+
# @param length [Integer] Match length (3-271)
|
|
243
|
+
def copy_match(output, offset, length)
|
|
244
|
+
# Calculate position in window
|
|
245
|
+
window_pos = @window.bytesize - offset
|
|
246
|
+
|
|
247
|
+
if window_pos.negative?
|
|
248
|
+
raise Cabriolet::DecompressionError,
|
|
249
|
+
"Invalid offset: #{offset}"
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Copy bytes from window
|
|
253
|
+
length.times do
|
|
254
|
+
if window_pos < @window.bytesize
|
|
255
|
+
byte = @window.getbyte(window_pos)
|
|
256
|
+
output << byte.chr
|
|
257
|
+
add_to_window(byte)
|
|
258
|
+
window_pos += 1
|
|
259
|
+
else
|
|
260
|
+
# Match extends beyond current window, copy from output
|
|
261
|
+
# This handles overlapping matches
|
|
262
|
+
byte = output.getbyte(output.bytesize - offset)
|
|
263
|
+
output << byte.chr
|
|
264
|
+
add_to_window(byte)
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
@@ -53,20 +53,23 @@ module Cabriolet
|
|
|
53
53
|
#
|
|
54
54
|
# @return [Hash] Hash with :literal and :distance code tables
|
|
55
55
|
def self.build_fixed_codes
|
|
56
|
-
#
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
# Memoize fixed codes since they never change
|
|
57
|
+
@build_fixed_codes ||= begin
|
|
58
|
+
# Fixed literal/length code lengths
|
|
59
|
+
literal_lengths = Array.new(288, 0)
|
|
60
|
+
(0...144).each { |i| literal_lengths[i] = 8 }
|
|
61
|
+
(144...256).each { |i| literal_lengths[i] = 9 }
|
|
62
|
+
(256...280).each { |i| literal_lengths[i] = 7 }
|
|
63
|
+
(280...288).each { |i| literal_lengths[i] = 8 }
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
+
# Fixed distance code lengths (all 5 bits)
|
|
66
|
+
distance_lengths = Array.new(32, 5)
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
{
|
|
69
|
+
literal: build_codes(literal_lengths, 288),
|
|
70
|
+
distance: build_codes(distance_lengths, 32),
|
|
71
|
+
}
|
|
72
|
+
end
|
|
70
73
|
end
|
|
71
74
|
|
|
72
75
|
# Encode a symbol using Huffman codes and write to bitstream
|
|
@@ -13,9 +13,11 @@ module Cabriolet
|
|
|
13
13
|
#
|
|
14
14
|
# @param lengths [Array<Integer>] Code lengths for each symbol
|
|
15
15
|
# @param num_symbols [Integer] Number of symbols
|
|
16
|
-
|
|
16
|
+
# @param bit_order [Symbol] Bit ordering (:lsb or :msb), defaults to :lsb
|
|
17
|
+
def initialize(lengths, num_symbols, bit_order: :lsb)
|
|
17
18
|
@lengths = lengths
|
|
18
19
|
@num_symbols = num_symbols
|
|
20
|
+
@bit_order = bit_order
|
|
19
21
|
@table = nil
|
|
20
22
|
end
|
|
21
23
|
|
|
@@ -39,6 +41,88 @@ module Cabriolet
|
|
|
39
41
|
table_mask = 1 << table_bits
|
|
40
42
|
bit_mask = table_mask >> 1
|
|
41
43
|
|
|
44
|
+
if @bit_order == :msb
|
|
45
|
+
build_table_msb(table_bits, pos, table_mask, bit_mask)
|
|
46
|
+
else
|
|
47
|
+
build_table_lsb(table_bits, pos, table_mask, bit_mask)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
# Build table for MSB-first bit ordering
|
|
54
|
+
def build_table_msb(table_bits, pos, table_mask, bit_mask)
|
|
55
|
+
# Fill entries for codes short enough for direct mapping
|
|
56
|
+
(1..table_bits).each do |bit_num|
|
|
57
|
+
(0...num_symbols).each do |sym|
|
|
58
|
+
next unless lengths[sym] == bit_num
|
|
59
|
+
|
|
60
|
+
leaf = pos
|
|
61
|
+
return false if (pos += bit_mask) > table_mask
|
|
62
|
+
|
|
63
|
+
# Fill all possible lookups of this symbol
|
|
64
|
+
fill = bit_mask
|
|
65
|
+
while fill.positive?
|
|
66
|
+
@table[leaf] = sym
|
|
67
|
+
leaf += 1
|
|
68
|
+
fill -= 1
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
bit_mask >>= 1
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Exit with success if table is complete
|
|
75
|
+
return true if pos == table_mask
|
|
76
|
+
|
|
77
|
+
# Mark remaining entries as unused
|
|
78
|
+
(pos...(table_mask)).each do |sym|
|
|
79
|
+
@table[sym] = 0xFFFF
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# next_symbol = base of allocation for long codes
|
|
83
|
+
next_symbol = [(table_mask >> 1), num_symbols].max
|
|
84
|
+
|
|
85
|
+
# Process longer codes (table_bits + 1 to MAX_BITS)
|
|
86
|
+
pos <<= 16
|
|
87
|
+
table_mask <<= 16
|
|
88
|
+
bit_mask = 1 << 15
|
|
89
|
+
|
|
90
|
+
((table_bits + 1)..MAX_BITS).each do |bit_num|
|
|
91
|
+
(0...num_symbols).each do |sym|
|
|
92
|
+
next unless lengths[sym] == bit_num
|
|
93
|
+
|
|
94
|
+
return false if pos >= table_mask
|
|
95
|
+
|
|
96
|
+
# leaf = the first table_bits of the code
|
|
97
|
+
leaf = pos >> 16
|
|
98
|
+
|
|
99
|
+
# Build the tree path for this long code
|
|
100
|
+
(0...(bit_num - table_bits)).each do |fill_idx|
|
|
101
|
+
# If this path hasn't been taken yet, allocate two entries
|
|
102
|
+
if @table[leaf] == 0xFFFF
|
|
103
|
+
@table[next_symbol << 1] = 0xFFFF
|
|
104
|
+
@table[(next_symbol << 1) + 1] = 0xFFFF
|
|
105
|
+
@table[leaf] = next_symbol
|
|
106
|
+
next_symbol += 1
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Follow the path and select either left or right for next bit
|
|
110
|
+
leaf = @table[leaf] << 1
|
|
111
|
+
leaf += 1 if (pos >> (15 - fill_idx)).anybits?(1)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
@table[leaf] = sym
|
|
115
|
+
pos += bit_mask
|
|
116
|
+
end
|
|
117
|
+
bit_mask >>= 1
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Full table?
|
|
121
|
+
pos == table_mask
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Build table for LSB-first bit ordering (original implementation)
|
|
125
|
+
def build_table_lsb(table_bits, pos, table_mask, bit_mask)
|
|
42
126
|
# Fill entries for codes short enough for direct mapping (LSB ordering)
|
|
43
127
|
(1..table_bits).each do |bit_num|
|
|
44
128
|
(0...num_symbols).each do |sym|
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../cli/base_command_handler"
|
|
4
|
+
require_relative "decompressor"
|
|
5
|
+
require_relative "compressor"
|
|
6
|
+
|
|
7
|
+
module Cabriolet
|
|
8
|
+
module KWAJ
|
|
9
|
+
# Command handler for KWAJ compressed format
|
|
10
|
+
#
|
|
11
|
+
# This handler implements the unified command interface for KWAJ files,
|
|
12
|
+
# wrapping the existing KWAJ::Decompressor and KWAJ::Compressor classes.
|
|
13
|
+
#
|
|
14
|
+
class CommandHandler < Commands::BaseCommandHandler
|
|
15
|
+
# List KWAJ file information
|
|
16
|
+
#
|
|
17
|
+
# For KWAJ files, list displays detailed file information
|
|
18
|
+
# rather than a file listing (single file archive).
|
|
19
|
+
#
|
|
20
|
+
# @param file [String] Path to the KWAJ file
|
|
21
|
+
# @param options [Hash] Additional options (unused)
|
|
22
|
+
# @return [void]
|
|
23
|
+
def list(file, _options = {})
|
|
24
|
+
validate_file_exists(file)
|
|
25
|
+
|
|
26
|
+
decompressor = Decompressor.new
|
|
27
|
+
header = decompressor.open(file)
|
|
28
|
+
|
|
29
|
+
display_kwaj_info(header, file)
|
|
30
|
+
|
|
31
|
+
decompressor.close(header)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Extract KWAJ compressed file
|
|
35
|
+
#
|
|
36
|
+
# Extracts/decompresses the KWAJ file to its original form.
|
|
37
|
+
# Auto-detects output filename if not specified.
|
|
38
|
+
#
|
|
39
|
+
# @param file [String] Path to the KWAJ file
|
|
40
|
+
# @param output_dir [String] Output directory (not typically used for KWAJ)
|
|
41
|
+
# @param options [Hash] Additional options
|
|
42
|
+
# @option options [String] :output Output file path
|
|
43
|
+
# @return [void]
|
|
44
|
+
def extract(file, output_dir = nil, options = {})
|
|
45
|
+
validate_file_exists(file)
|
|
46
|
+
|
|
47
|
+
output = options[:output]
|
|
48
|
+
|
|
49
|
+
# Auto-detect output name if not provided
|
|
50
|
+
if output.nil? && output_dir.nil?
|
|
51
|
+
output = auto_output_filename(file)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# If output_dir is specified, ensure it exists and construct output path
|
|
55
|
+
if output.nil? && output_dir
|
|
56
|
+
output_dir = ensure_output_dir(output_dir)
|
|
57
|
+
base_name = File.basename(file, ".*")
|
|
58
|
+
output = File.join(output_dir, base_name)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
decompressor = Decompressor.new
|
|
62
|
+
header = decompressor.open(file)
|
|
63
|
+
|
|
64
|
+
puts "Extracting #{file} -> #{output}" if verbose?
|
|
65
|
+
bytes = decompressor.extract(header, file, output)
|
|
66
|
+
decompressor.close(header)
|
|
67
|
+
|
|
68
|
+
puts "Extracted #{file} to #{output} (#{bytes} bytes)"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Create KWAJ compressed file
|
|
72
|
+
#
|
|
73
|
+
# Compresses a file using KWAJ compression.
|
|
74
|
+
#
|
|
75
|
+
# @param output [String] Output KWAJ file path
|
|
76
|
+
# @param files [Array<String>] Input file (single file for KWAJ)
|
|
77
|
+
# @param options [Hash] Additional options
|
|
78
|
+
# @option options [String] :compression Compression method (:none, :xor, :szdd, :mszip)
|
|
79
|
+
# @option options [Boolean] :include_length Include uncompressed length
|
|
80
|
+
# @option options [String] :filename Original filename to embed
|
|
81
|
+
# @option options [String] :extra_data Extra data to include
|
|
82
|
+
# @return [void]
|
|
83
|
+
# @raise [ArgumentError] if no file specified or multiple files
|
|
84
|
+
def create(output, files = [], options = {})
|
|
85
|
+
raise ArgumentError, "No file specified" if files.empty?
|
|
86
|
+
|
|
87
|
+
if files.size > 1
|
|
88
|
+
raise ArgumentError,
|
|
89
|
+
"KWAJ format supports only one file at a time"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
file = files.first
|
|
93
|
+
unless File.exist?(file)
|
|
94
|
+
raise ArgumentError,
|
|
95
|
+
"File does not exist: #{file}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
compression = parse_compression_option(options[:compression])
|
|
99
|
+
compress_options = { compression: compression }
|
|
100
|
+
|
|
101
|
+
compress_options[:include_length] = true if options[:include_length]
|
|
102
|
+
compress_options[:filename] = options[:filename] if options[:filename]
|
|
103
|
+
if options[:extra_data]
|
|
104
|
+
compress_options[:extra_data] =
|
|
105
|
+
options[:extra_data]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Auto-generate output name if not provided
|
|
109
|
+
if output.nil?
|
|
110
|
+
output = "#{file}.kwj"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
compressor = Compressor.new
|
|
114
|
+
|
|
115
|
+
puts "Compressing #{file} -> #{output} (#{compression} compression)" if verbose?
|
|
116
|
+
bytes = compressor.compress(file, output, **compress_options)
|
|
117
|
+
|
|
118
|
+
puts "Compressed #{file} to #{output} (#{bytes} bytes, #{compression} compression)"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Display detailed KWAJ file information
|
|
122
|
+
#
|
|
123
|
+
# @param file [String] Path to the KWAJ file
|
|
124
|
+
# @param options [Hash] Additional options (unused)
|
|
125
|
+
# @return [void]
|
|
126
|
+
def info(file, _options = {})
|
|
127
|
+
validate_file_exists(file)
|
|
128
|
+
|
|
129
|
+
decompressor = Decompressor.new
|
|
130
|
+
header = decompressor.open(file)
|
|
131
|
+
|
|
132
|
+
display_kwaj_info(header, file)
|
|
133
|
+
|
|
134
|
+
decompressor.close(header)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Test KWAJ file integrity
|
|
138
|
+
#
|
|
139
|
+
# Verifies the KWAJ file structure.
|
|
140
|
+
#
|
|
141
|
+
# @param file [String] Path to the KWAJ file
|
|
142
|
+
# @param options [Hash] Additional options (unused)
|
|
143
|
+
# @return [void]
|
|
144
|
+
def test(file, _options = {})
|
|
145
|
+
validate_file_exists(file)
|
|
146
|
+
|
|
147
|
+
decompressor = Decompressor.new
|
|
148
|
+
header = decompressor.open(file)
|
|
149
|
+
|
|
150
|
+
puts "Testing #{file}..."
|
|
151
|
+
# TODO: Implement full integrity testing
|
|
152
|
+
puts "OK: KWAJ file structure is valid"
|
|
153
|
+
puts "Compression: #{header.compression_name}"
|
|
154
|
+
puts "Data offset: #{header.data_offset} bytes"
|
|
155
|
+
puts "Uncompressed size: #{header.length || 'unknown'} bytes"
|
|
156
|
+
|
|
157
|
+
decompressor.close(header)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private
|
|
161
|
+
|
|
162
|
+
# Display KWAJ file information
|
|
163
|
+
#
|
|
164
|
+
# @param header [Header] The KWAJ header object
|
|
165
|
+
# @param file [String] Original file path
|
|
166
|
+
# @return [void]
|
|
167
|
+
def display_kwaj_info(header, file)
|
|
168
|
+
puts "KWAJ File Information"
|
|
169
|
+
puts "=" * 50
|
|
170
|
+
puts "Filename: #{file}"
|
|
171
|
+
puts "Compression: #{header.compression_name}"
|
|
172
|
+
puts "Data offset: #{header.data_offset} bytes"
|
|
173
|
+
puts "Uncompressed size: #{header.length || 'unknown'} bytes"
|
|
174
|
+
puts "Original filename: #{header.filename}" if header.filename
|
|
175
|
+
if header.extra && !header.extra.empty?
|
|
176
|
+
puts "Extra data: #{header.extra_length} bytes"
|
|
177
|
+
puts " #{header.extra}"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Auto-detect output filename from KWAJ header
|
|
182
|
+
#
|
|
183
|
+
# @param file [String] Original file path
|
|
184
|
+
# @return [String] Detected output filename
|
|
185
|
+
def auto_output_filename(file)
|
|
186
|
+
decompressor = Decompressor.new
|
|
187
|
+
header = decompressor.open(file)
|
|
188
|
+
output = decompressor.auto_output_filename(file, header)
|
|
189
|
+
decompressor.close(header)
|
|
190
|
+
output
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Parse compression option to symbol
|
|
194
|
+
#
|
|
195
|
+
# @param compression_value [String, Symbol] The compression type
|
|
196
|
+
# @return [Symbol] The compression symbol
|
|
197
|
+
def parse_compression_option(compression_value)
|
|
198
|
+
return :szdd if compression_value.nil?
|
|
199
|
+
|
|
200
|
+
compression = compression_value.to_sym
|
|
201
|
+
valid_compressions = %i[none xor szdd mszip]
|
|
202
|
+
|
|
203
|
+
unless valid_compressions.include?(compression)
|
|
204
|
+
raise ArgumentError,
|
|
205
|
+
"Invalid compression: #{compression_value}. " \
|
|
206
|
+
"Valid options: #{valid_compressions.join(', ')}"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
compression
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
@@ -22,8 +22,10 @@ module Cabriolet
|
|
|
22
22
|
#
|
|
23
23
|
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for
|
|
24
24
|
# default
|
|
25
|
-
|
|
25
|
+
# @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
|
|
26
|
+
def initialize(io_system = nil, algorithm_factory = nil)
|
|
26
27
|
@io_system = io_system || System::IOSystem.new
|
|
28
|
+
@algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
|
|
27
29
|
end
|
|
28
30
|
|
|
29
31
|
# Compress a file to KWAJ format
|
|
@@ -372,12 +374,14 @@ module Cabriolet
|
|
|
372
374
|
# @param output_handle [System::FileHandle] Output handle
|
|
373
375
|
# @return [Integer] Number of bytes written
|
|
374
376
|
def compress_szdd(input_handle, output_handle)
|
|
375
|
-
compressor =
|
|
377
|
+
compressor = @algorithm_factory.create(
|
|
378
|
+
:lzss,
|
|
379
|
+
:compressor,
|
|
376
380
|
@io_system,
|
|
377
381
|
input_handle,
|
|
378
382
|
output_handle,
|
|
379
383
|
2048,
|
|
380
|
-
Compressors::LZSS::MODE_QBASIC,
|
|
384
|
+
mode: Compressors::LZSS::MODE_QBASIC,
|
|
381
385
|
)
|
|
382
386
|
compressor.compress
|
|
383
387
|
end
|