cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Compressors
|
|
5
|
+
# Base class for all compression algorithms
|
|
6
|
+
#
|
|
7
|
+
# Provides common interface and functionality for compressors.
|
|
8
|
+
# Each compressor implementation must override the compress method.
|
|
9
|
+
class Base
|
|
10
|
+
attr_reader :io_system, :input, :output, :buffer_size
|
|
11
|
+
|
|
12
|
+
# Initialize base compressor
|
|
13
|
+
#
|
|
14
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
15
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
16
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
17
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
18
|
+
def initialize(io_system, input, output, buffer_size)
|
|
19
|
+
@io_system = io_system
|
|
20
|
+
@input = input
|
|
21
|
+
@output = output
|
|
22
|
+
@buffer_size = buffer_size
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Compress the input data
|
|
26
|
+
#
|
|
27
|
+
# @return [Integer] Number of bytes written
|
|
28
|
+
# @raise [NotImplementedError] Must be implemented by subclasses
|
|
29
|
+
def compress
|
|
30
|
+
raise NotImplementedError, "#{self.class} must implement #compress"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Compressors
|
|
5
|
+
# LZSS compressor for creating LZSS-compressed data
|
|
6
|
+
#
|
|
7
|
+
# LZSS (Lempel-Ziv-Storer-Szymanski) is a derivative of LZ77 compression.
|
|
8
|
+
# It uses a 4096-byte sliding window with a control byte mechanism to
|
|
9
|
+
# indicate whether the next operation is a literal byte copy or a match
|
|
10
|
+
# from the window history.
|
|
11
|
+
#
|
|
12
|
+
# The compression algorithm searches for matching sequences in the sliding
|
|
13
|
+
# window and encodes them as (offset, length) pairs when the match is 3 or
|
|
14
|
+
# more bytes. Shorter sequences are encoded as literal bytes.
|
|
15
|
+
class LZSS < Base
|
|
16
|
+
# LZSS algorithm constants
|
|
17
|
+
WINDOW_SIZE = 4096
|
|
18
|
+
WINDOW_FILL = 0x20
|
|
19
|
+
MIN_MATCH = 3
|
|
20
|
+
MAX_MATCH = 18 # 0x0F + 3
|
|
21
|
+
|
|
22
|
+
# LZSS modes
|
|
23
|
+
MODE_EXPAND = 0
|
|
24
|
+
MODE_MSHELP = 1
|
|
25
|
+
MODE_QBASIC = 2
|
|
26
|
+
|
|
27
|
+
attr_reader :mode, :window, :window_pos
|
|
28
|
+
|
|
29
|
+
# Initialize LZSS compressor
|
|
30
|
+
#
|
|
31
|
+
# @param io_system [System::IOSystem] I/O system for reading/writing
|
|
32
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
33
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
34
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
35
|
+
# @param mode [Integer] LZSS mode (default: MODE_EXPAND)
|
|
36
|
+
def initialize(io_system, input, output, buffer_size,
|
|
37
|
+
mode = MODE_EXPAND)
|
|
38
|
+
super(io_system, input, output, buffer_size)
|
|
39
|
+
@mode = mode
|
|
40
|
+
@window = Array.new(WINDOW_SIZE, WINDOW_FILL)
|
|
41
|
+
@window_pos = initialize_window_position
|
|
42
|
+
@invert = mode == MODE_MSHELP ? 0xFF : 0x00
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Compress input data using LZSS algorithm
|
|
46
|
+
#
|
|
47
|
+
# @return [Integer] Number of bytes written
|
|
48
|
+
def compress
|
|
49
|
+
bytes_written = 0
|
|
50
|
+
input_data = read_all_input
|
|
51
|
+
input_pos = 0
|
|
52
|
+
|
|
53
|
+
while input_pos < input_data.bytesize
|
|
54
|
+
control_byte, encoded_ops, input_pos = process_block(
|
|
55
|
+
input_data,
|
|
56
|
+
input_pos,
|
|
57
|
+
)
|
|
58
|
+
bytes_written += write_block(control_byte, encoded_ops)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
bytes_written
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
# Process up to 8 operations for one control byte
|
|
67
|
+
#
|
|
68
|
+
# @param input_data [String] Input data being compressed
|
|
69
|
+
# @param input_pos [Integer] Current position in input
|
|
70
|
+
# @return [Array] control_byte, encoded_ops, new_input_pos
|
|
71
|
+
def process_block(input_data, input_pos)
|
|
72
|
+
control_bits = []
|
|
73
|
+
encoded_ops = []
|
|
74
|
+
|
|
75
|
+
8.times do
|
|
76
|
+
break if input_pos >= input_data.bytesize
|
|
77
|
+
|
|
78
|
+
match = find_match(input_data, input_pos)
|
|
79
|
+
|
|
80
|
+
if match && match[:length] >= MIN_MATCH
|
|
81
|
+
control_bits << 0
|
|
82
|
+
encoded_ops << encode_match(match[:offset], match[:length])
|
|
83
|
+
input_pos = add_match_to_window(input_data, input_pos,
|
|
84
|
+
match[:length])
|
|
85
|
+
else
|
|
86
|
+
control_bits << 1
|
|
87
|
+
byte = input_data.getbyte(input_pos)
|
|
88
|
+
encoded_ops << [byte].pack("C")
|
|
89
|
+
input_pos = add_byte_to_window(byte, input_pos)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
control_byte = build_control_byte(control_bits)
|
|
94
|
+
[control_byte, encoded_ops, input_pos]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Build control byte from control bits
|
|
98
|
+
#
|
|
99
|
+
# @param control_bits [Array<Integer>] Array of bits (0 or 1)
|
|
100
|
+
# @return [Integer] Control byte value
|
|
101
|
+
def build_control_byte(control_bits)
|
|
102
|
+
control_byte = 0
|
|
103
|
+
control_bits.each_with_index do |bit, index|
|
|
104
|
+
control_byte |= (bit << index)
|
|
105
|
+
end
|
|
106
|
+
control_byte ^ @invert
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Write control byte and encoded operations
|
|
110
|
+
#
|
|
111
|
+
# @param control_byte [Integer] Control byte value
|
|
112
|
+
# @param encoded_ops [Array<String>] Encoded operations
|
|
113
|
+
# @return [Integer] Number of bytes written
|
|
114
|
+
def write_block(control_byte, encoded_ops)
|
|
115
|
+
bytes = write_output_byte(control_byte)
|
|
116
|
+
encoded_ops.each do |data|
|
|
117
|
+
bytes += write_output_data(data)
|
|
118
|
+
end
|
|
119
|
+
bytes
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Add matched bytes to window
|
|
123
|
+
#
|
|
124
|
+
# @param input_data [String] Input data
|
|
125
|
+
# @param input_pos [Integer] Current input position
|
|
126
|
+
# @param length [Integer] Number of bytes to add
|
|
127
|
+
# @return [Integer] New input position
|
|
128
|
+
def add_match_to_window(input_data, input_pos, length)
|
|
129
|
+
length.times do
|
|
130
|
+
@window[@window_pos] = input_data.getbyte(input_pos)
|
|
131
|
+
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
132
|
+
input_pos += 1
|
|
133
|
+
end
|
|
134
|
+
input_pos
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Add single byte to window
|
|
138
|
+
#
|
|
139
|
+
# @param byte [Integer] Byte value
|
|
140
|
+
# @param input_pos [Integer] Current input position
|
|
141
|
+
# @return [Integer] New input position
|
|
142
|
+
def add_byte_to_window(byte, input_pos)
|
|
143
|
+
@window[@window_pos] = byte
|
|
144
|
+
@window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
|
|
145
|
+
input_pos + 1
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Initialize the window position based on mode
|
|
149
|
+
#
|
|
150
|
+
# @return [Integer] Initial window position
|
|
151
|
+
def initialize_window_position
|
|
152
|
+
offset = @mode == MODE_QBASIC ? 18 : 16
|
|
153
|
+
WINDOW_SIZE - offset
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Read all input data into memory
|
|
157
|
+
#
|
|
158
|
+
# @return [String] All input data
|
|
159
|
+
def read_all_input
|
|
160
|
+
data = +""
|
|
161
|
+
loop do
|
|
162
|
+
chunk = @io_system.read(@input, @buffer_size)
|
|
163
|
+
break if chunk.empty?
|
|
164
|
+
|
|
165
|
+
data << chunk
|
|
166
|
+
end
|
|
167
|
+
data
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Find the longest match in the sliding window
|
|
171
|
+
#
|
|
172
|
+
# @param input_data [String] Input data being compressed
|
|
173
|
+
# @param input_pos [Integer] Current position in input
|
|
174
|
+
# @return [Hash, nil] Hash with :offset and :length, or nil if no match
|
|
175
|
+
def find_match(input_data, input_pos)
|
|
176
|
+
best_match = nil
|
|
177
|
+
max_length = [MAX_MATCH, input_data.bytesize - input_pos].min
|
|
178
|
+
|
|
179
|
+
# Don't search if we can't even get a MIN_MATCH
|
|
180
|
+
return nil if max_length < MIN_MATCH
|
|
181
|
+
|
|
182
|
+
# Search the entire window for matches
|
|
183
|
+
WINDOW_SIZE.times do |offset|
|
|
184
|
+
length = 0
|
|
185
|
+
|
|
186
|
+
# Count matching bytes
|
|
187
|
+
while length < max_length &&
|
|
188
|
+
input_data.getbyte(input_pos + length) ==
|
|
189
|
+
@window[(offset + length) & (WINDOW_SIZE - 1)]
|
|
190
|
+
length += 1
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Update best match if this is longer
|
|
194
|
+
next unless length >= MIN_MATCH &&
|
|
195
|
+
(best_match.nil? || length > best_match[:length])
|
|
196
|
+
|
|
197
|
+
best_match = { offset: offset, length: length }
|
|
198
|
+
|
|
199
|
+
# Stop if we found the maximum possible match
|
|
200
|
+
break if length == MAX_MATCH
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
best_match
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Encode a match as two bytes
|
|
207
|
+
#
|
|
208
|
+
# @param offset [Integer] Offset into window (0-4095)
|
|
209
|
+
# @param length [Integer] Length of match (3-18)
|
|
210
|
+
# @return [String] Two-byte encoded match
|
|
211
|
+
def encode_match(offset, length)
|
|
212
|
+
offset_low = offset & 0xFF
|
|
213
|
+
offset_high = (offset >> 8) & 0x0F
|
|
214
|
+
length_encoded = (length - 3) & 0x0F
|
|
215
|
+
|
|
216
|
+
byte1 = offset_low
|
|
217
|
+
byte2 = (offset_high << 4) | length_encoded
|
|
218
|
+
|
|
219
|
+
[byte1, byte2].pack("C2")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Write a single byte to the output
|
|
223
|
+
#
|
|
224
|
+
# @param byte [Integer] Byte to write
|
|
225
|
+
# @return [Integer] Number of bytes written (1)
|
|
226
|
+
# @raise [Errors::CompressionError] if write fails
|
|
227
|
+
def write_output_byte(byte)
|
|
228
|
+
data = [byte].pack("C")
|
|
229
|
+
written = @io_system.write(@output, data)
|
|
230
|
+
return written if written == 1
|
|
231
|
+
|
|
232
|
+
raise Errors::CompressionError, "Failed to write output byte"
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Write data to the output
|
|
236
|
+
#
|
|
237
|
+
# @param data [String] Data to write
|
|
238
|
+
# @return [Integer] Number of bytes written
|
|
239
|
+
# @raise [Errors::CompressionError] if write fails
|
|
240
|
+
def write_output_data(data)
|
|
241
|
+
written = @io_system.write(@output, data)
|
|
242
|
+
return written if written == data.bytesize
|
|
243
|
+
|
|
244
|
+
raise Errors::CompressionError,
|
|
245
|
+
"Failed to write output data (expected #{data.bytesize}, " \
|
|
246
|
+
"wrote #{written})"
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|