cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Compressors
5
+ # Base class for all compression algorithms
6
+ #
7
+ # Provides common interface and functionality for compressors.
8
+ # Each compressor implementation must override the compress method.
9
+ class Base
10
+ attr_reader :io_system, :input, :output, :buffer_size
11
+
12
+ # Initialize base compressor
13
+ #
14
+ # @param io_system [System::IOSystem] I/O system for reading/writing
15
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
16
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
17
+ # @param buffer_size [Integer] Buffer size for I/O operations
18
+ def initialize(io_system, input, output, buffer_size)
19
+ @io_system = io_system
20
+ @input = input
21
+ @output = output
22
+ @buffer_size = buffer_size
23
+ end
24
+
25
+ # Compress the input data
26
+ #
27
+ # @return [Integer] Number of bytes written
28
+ # @raise [NotImplementedError] Must be implemented by subclasses
29
+ def compress
30
+ raise NotImplementedError, "#{self.class} must implement #compress"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Compressors
5
+ # LZSS compressor for creating LZSS-compressed data
6
+ #
7
+ # LZSS (Lempel-Ziv-Storer-Szymanski) is a derivative of LZ77 compression.
8
+ # It uses a 4096-byte sliding window with a control byte mechanism to
9
+ # indicate whether the next operation is a literal byte copy or a match
10
+ # from the window history.
11
+ #
12
+ # The compression algorithm searches for matching sequences in the sliding
13
+ # window and encodes them as (offset, length) pairs when the match is 3 or
14
+ # more bytes. Shorter sequences are encoded as literal bytes.
15
+ class LZSS < Base
16
+ # LZSS algorithm constants
17
+ WINDOW_SIZE = 4096
18
+ WINDOW_FILL = 0x20
19
+ MIN_MATCH = 3
20
+ MAX_MATCH = 18 # 0x0F + 3
21
+
22
+ # LZSS modes
23
+ MODE_EXPAND = 0
24
+ MODE_MSHELP = 1
25
+ MODE_QBASIC = 2
26
+
27
+ attr_reader :mode, :window, :window_pos
28
+
29
+ # Initialize LZSS compressor
30
+ #
31
+ # @param io_system [System::IOSystem] I/O system for reading/writing
32
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
33
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
34
+ # @param buffer_size [Integer] Buffer size for I/O operations
35
+ # @param mode [Integer] LZSS mode (default: MODE_EXPAND)
36
+ def initialize(io_system, input, output, buffer_size,
37
+ mode = MODE_EXPAND)
38
+ super(io_system, input, output, buffer_size)
39
+ @mode = mode
40
+ @window = Array.new(WINDOW_SIZE, WINDOW_FILL)
41
+ @window_pos = initialize_window_position
42
+ @invert = mode == MODE_MSHELP ? 0xFF : 0x00
43
+ end
44
+
45
+ # Compress input data using LZSS algorithm
46
+ #
47
+ # @return [Integer] Number of bytes written
48
+ def compress
49
+ bytes_written = 0
50
+ input_data = read_all_input
51
+ input_pos = 0
52
+
53
+ while input_pos < input_data.bytesize
54
+ control_byte, encoded_ops, input_pos = process_block(
55
+ input_data,
56
+ input_pos,
57
+ )
58
+ bytes_written += write_block(control_byte, encoded_ops)
59
+ end
60
+
61
+ bytes_written
62
+ end
63
+
64
+ private
65
+
66
+ # Process up to 8 operations for one control byte
67
+ #
68
+ # @param input_data [String] Input data being compressed
69
+ # @param input_pos [Integer] Current position in input
70
+ # @return [Array] control_byte, encoded_ops, new_input_pos
71
+ def process_block(input_data, input_pos)
72
+ control_bits = []
73
+ encoded_ops = []
74
+
75
+ 8.times do
76
+ break if input_pos >= input_data.bytesize
77
+
78
+ match = find_match(input_data, input_pos)
79
+
80
+ if match && match[:length] >= MIN_MATCH
81
+ control_bits << 0
82
+ encoded_ops << encode_match(match[:offset], match[:length])
83
+ input_pos = add_match_to_window(input_data, input_pos,
84
+ match[:length])
85
+ else
86
+ control_bits << 1
87
+ byte = input_data.getbyte(input_pos)
88
+ encoded_ops << [byte].pack("C")
89
+ input_pos = add_byte_to_window(byte, input_pos)
90
+ end
91
+ end
92
+
93
+ control_byte = build_control_byte(control_bits)
94
+ [control_byte, encoded_ops, input_pos]
95
+ end
96
+
97
+ # Build control byte from control bits
98
+ #
99
+ # @param control_bits [Array<Integer>] Array of bits (0 or 1)
100
+ # @return [Integer] Control byte value
101
+ def build_control_byte(control_bits)
102
+ control_byte = 0
103
+ control_bits.each_with_index do |bit, index|
104
+ control_byte |= (bit << index)
105
+ end
106
+ control_byte ^ @invert
107
+ end
108
+
109
+ # Write control byte and encoded operations
110
+ #
111
+ # @param control_byte [Integer] Control byte value
112
+ # @param encoded_ops [Array<String>] Encoded operations
113
+ # @return [Integer] Number of bytes written
114
+ def write_block(control_byte, encoded_ops)
115
+ bytes = write_output_byte(control_byte)
116
+ encoded_ops.each do |data|
117
+ bytes += write_output_data(data)
118
+ end
119
+ bytes
120
+ end
121
+
122
+ # Add matched bytes to window
123
+ #
124
+ # @param input_data [String] Input data
125
+ # @param input_pos [Integer] Current input position
126
+ # @param length [Integer] Number of bytes to add
127
+ # @return [Integer] New input position
128
+ def add_match_to_window(input_data, input_pos, length)
129
+ length.times do
130
+ @window[@window_pos] = input_data.getbyte(input_pos)
131
+ @window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
132
+ input_pos += 1
133
+ end
134
+ input_pos
135
+ end
136
+
137
+ # Add single byte to window
138
+ #
139
+ # @param byte [Integer] Byte value
140
+ # @param input_pos [Integer] Current input position
141
+ # @return [Integer] New input position
142
+ def add_byte_to_window(byte, input_pos)
143
+ @window[@window_pos] = byte
144
+ @window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
145
+ input_pos + 1
146
+ end
147
+
148
+ # Initialize the window position based on mode
149
+ #
150
+ # @return [Integer] Initial window position
151
+ def initialize_window_position
152
+ offset = @mode == MODE_QBASIC ? 18 : 16
153
+ WINDOW_SIZE - offset
154
+ end
155
+
156
+ # Read all input data into memory
157
+ #
158
+ # @return [String] All input data
159
+ def read_all_input
160
+ data = +""
161
+ loop do
162
+ chunk = @io_system.read(@input, @buffer_size)
163
+ break if chunk.empty?
164
+
165
+ data << chunk
166
+ end
167
+ data
168
+ end
169
+
170
+ # Find the longest match in the sliding window
171
+ #
172
+ # @param input_data [String] Input data being compressed
173
+ # @param input_pos [Integer] Current position in input
174
+ # @return [Hash, nil] Hash with :offset and :length, or nil if no match
175
+ def find_match(input_data, input_pos)
176
+ best_match = nil
177
+ max_length = [MAX_MATCH, input_data.bytesize - input_pos].min
178
+
179
+ # Don't search if we can't even get a MIN_MATCH
180
+ return nil if max_length < MIN_MATCH
181
+
182
+ # Search the entire window for matches
183
+ WINDOW_SIZE.times do |offset|
184
+ length = 0
185
+
186
+ # Count matching bytes
187
+ while length < max_length &&
188
+ input_data.getbyte(input_pos + length) ==
189
+ @window[(offset + length) & (WINDOW_SIZE - 1)]
190
+ length += 1
191
+ end
192
+
193
+ # Update best match if this is longer
194
+ next unless length >= MIN_MATCH &&
195
+ (best_match.nil? || length > best_match[:length])
196
+
197
+ best_match = { offset: offset, length: length }
198
+
199
+ # Stop if we found the maximum possible match
200
+ break if length == MAX_MATCH
201
+ end
202
+
203
+ best_match
204
+ end
205
+
206
+ # Encode a match as two bytes
207
+ #
208
+ # @param offset [Integer] Offset into window (0-4095)
209
+ # @param length [Integer] Length of match (3-18)
210
+ # @return [String] Two-byte encoded match
211
+ def encode_match(offset, length)
212
+ offset_low = offset & 0xFF
213
+ offset_high = (offset >> 8) & 0x0F
214
+ length_encoded = (length - 3) & 0x0F
215
+
216
+ byte1 = offset_low
217
+ byte2 = (offset_high << 4) | length_encoded
218
+
219
+ [byte1, byte2].pack("C2")
220
+ end
221
+
222
+ # Write a single byte to the output
223
+ #
224
+ # @param byte [Integer] Byte to write
225
+ # @return [Integer] Number of bytes written (1)
226
+ # @raise [Errors::CompressionError] if write fails
227
+ def write_output_byte(byte)
228
+ data = [byte].pack("C")
229
+ written = @io_system.write(@output, data)
230
+ return written if written == 1
231
+
232
+ raise Errors::CompressionError, "Failed to write output byte"
233
+ end
234
+
235
+ # Write data to the output
236
+ #
237
+ # @param data [String] Data to write
238
+ # @return [Integer] Number of bytes written
239
+ # @raise [Errors::CompressionError] if write fails
240
+ def write_output_data(data)
241
+ written = @io_system.write(@output, data)
242
+ return written if written == data.bytesize
243
+
244
+ raise Errors::CompressionError,
245
+ "Failed to write output data (expected #{data.bytesize}, " \
246
+ "wrote #{written})"
247
+ end
248
+ end
249
+ end
250
+ end