cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,446 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Compressors
5
+ # Quantum compresses data using arithmetic coding and LZ77-based matching
6
+ # Based on the Quantum decompressor and libmspack qtmd.c implementation
7
+ #
8
+ # STATUS: Functional with known limitations
9
+ # - Literals: WORKING ✓
10
+ # - Short matches (3-13 bytes): WORKING ✓
11
+ # - Longer matches (14+ bytes): Limited support (known issue)
12
+ # - Simple data round-trips successfully
13
+ # - Complex repeated patterns may have issues
14
+ #
15
+ # The Quantum method was created by David Stafford, adapted by Microsoft
16
+ # Corporation.
17
+ # rubocop:disable Metrics/ClassLength
18
+ class Quantum < Base
19
+ # Frame size (32KB per frame)
20
+ FRAME_SIZE = 32_768
21
+
22
+ # Match constants
23
+ MIN_MATCH = 3
24
+ MAX_MATCH = 1028
25
+
26
+ # Position slot tables (same as decompressor)
27
+ POSITION_BASE = [
28
+ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384,
29
+ 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384,
30
+ 24_576, 32_768, 49_152, 65_536, 98_304, 131_072, 196_608, 262_144,
31
+ 393_216, 524_288, 786_432, 1_048_576, 1_572_864
32
+ ].freeze
33
+
34
+ EXTRA_BITS = [
35
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
36
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
37
+ 17, 17, 18, 18, 19, 19
38
+ ].freeze
39
+
40
+ LENGTH_BASE = [
41
+ 0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
42
+ 30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
43
+ ].freeze
44
+
45
+ LENGTH_EXTRA = [
46
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
47
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
48
+ ].freeze
49
+
50
+ attr_reader :window_bits, :window_size
51
+
52
+ # Represents a symbol in an arithmetic coding model
53
+ class ModelSymbol
54
+ attr_accessor :sym, :cumfreq
55
+
56
+ def initialize(sym, cumfreq)
57
+ @sym = sym
58
+ @cumfreq = cumfreq
59
+ end
60
+ end
61
+
62
+ # Represents an arithmetic coding model
63
+ class Model
64
+ attr_accessor :shiftsleft, :entries, :syms
65
+
66
+ def initialize(syms, entries)
67
+ @syms = syms
68
+ @entries = entries
69
+ @shiftsleft = 4
70
+ end
71
+ end
72
+
73
+ # Initialize Quantum compressor
74
+ #
75
+ # @param io_system [System::IOSystem] I/O system for reading/writing
76
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
77
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
78
+ # @param buffer_size [Integer] Buffer size for I/O operations
79
+ # @param window_bits [Integer] Window size parameter (10-21)
80
+ def initialize(io_system, input, output, buffer_size, window_bits: 10)
81
+ super(io_system, input, output, buffer_size)
82
+
83
+ # Validate window_bits
84
+ unless (10..21).cover?(window_bits)
85
+ raise ArgumentError,
86
+ "Quantum window_bits must be 10-21, got #{window_bits}"
87
+ end
88
+
89
+ @window_bits = window_bits
90
+ @window_size = 1 << window_bits
91
+
92
+ # Initialize bitstream for MSB-first writing
93
+ @bitstream = Binary::BitstreamWriter.new(io_system, output,
94
+ buffer_size, msb_first: true)
95
+
96
+ # Initialize models
97
+ initialize_models
98
+ end
99
+
100
+ # Compress the input data
101
+ #
102
+ # @return [Integer] Total bytes compressed
103
+ def compress
104
+ total_bytes = 0
105
+
106
+ loop do
107
+ # Read frame data
108
+ frame_data = io_system.read(input, FRAME_SIZE)
109
+ break if frame_data.empty?
110
+
111
+ total_bytes += frame_data.bytesize
112
+
113
+ # Compress frame
114
+ compress_frame(frame_data)
115
+
116
+ # Write trailer (0xFF marker)
117
+ @bitstream.flush_msb
118
+ @bitstream.write_byte(0xFF)
119
+
120
+ # Reset models for next frame
121
+ initialize_models
122
+
123
+ break if frame_data.bytesize < FRAME_SIZE
124
+ end
125
+
126
+ total_bytes
127
+ end
128
+
129
+ private
130
+
131
+ # Initialize all 7 arithmetic coding models (exactly matching decoder)
132
+ def initialize_models
133
+ # Models depend on window size
134
+ i = @window_bits * 2
135
+
136
+ # Four literal models (64 symbols each)
137
+ @m0sym = init_model_syms(0, 64)
138
+ @model0 = Model.new(@m0sym, 64)
139
+
140
+ @m1sym = init_model_syms(64, 64)
141
+ @model1 = Model.new(@m1sym, 64)
142
+
143
+ @m2sym = init_model_syms(128, 64)
144
+ @model2 = Model.new(@m2sym, 64)
145
+
146
+ @m3sym = init_model_syms(192, 64)
147
+ @model3 = Model.new(@m3sym, 64)
148
+
149
+ # Three match models (size depends on window)
150
+ @m4sym = init_model_syms(0, [i, 24].min)
151
+ @model4 = Model.new(@m4sym, [i, 24].min)
152
+
153
+ @m5sym = init_model_syms(0, [i, 36].min)
154
+ @model5 = Model.new(@m5sym, [i, 36].min)
155
+
156
+ @m6sym = init_model_syms(0, i)
157
+ @model6 = Model.new(@m6sym, i)
158
+
159
+ # Match length model
160
+ @m6lsym = init_model_syms(0, 27)
161
+ @model6len = Model.new(@m6lsym, 27)
162
+
163
+ # Selector model (7 symbols: 0-3 literals, 4-6 matches)
164
+ @m7sym = init_model_syms(0, 7)
165
+ @model7 = Model.new(@m7sym, 7)
166
+
167
+ # Arithmetic coding state
168
+ @h = 0xFFFF
169
+ @l = 0
170
+ @underflow_bits = 0
171
+ end
172
+
173
+ # Initialize model symbol array (exactly matching qtmd_init_model)
174
+ def init_model_syms(start, len)
175
+ Array.new(len + 1) do |i|
176
+ ModelSymbol.new(start + i, len - i)
177
+ end
178
+ end
179
+
180
+ # Compress a single frame
181
+ def compress_frame(data)
182
+ # No header needed - the first 16 bits of encoded data will be read as C
183
+ pos = 0
184
+
185
+ while pos < data.bytesize
186
+ # Try to find a match
187
+ match_length, match_offset = find_match(data, pos)
188
+
189
+ if match_length >= MIN_MATCH
190
+ # Encode match
191
+ encode_match(match_length, match_offset)
192
+ pos += match_length
193
+ else
194
+ # Encode literal
195
+ byte = data.getbyte(pos)
196
+ encode_literal(byte)
197
+ pos += 1
198
+ end
199
+ end
200
+
201
+ # Finish arithmetic coding - output final range
202
+ # We need to output enough bits to disambiguate the final range
203
+ finish_arithmetic_coding
204
+ end
205
+
206
+ # Finish arithmetic coding by outputting the final state
207
+ def finish_arithmetic_coding
208
+ # Output enough bits to ensure decoder can decode correctly
209
+ # We need to output a value that falls within [L, H)
210
+ # A common approach is to output L plus half the range
211
+ @underflow_bits += 1
212
+ bit = if @l.anybits?(0x4000)
213
+ 1
214
+ else
215
+ 0
216
+ end
217
+ @bitstream.write_bits_msb(bit, 1)
218
+ @underflow_bits.times do
219
+ @bitstream.write_bits_msb(bit ^ 1, 1)
220
+ end
221
+ @underflow_bits = 0
222
+ end
223
+
224
+ # Find best match in the sliding window
225
+ def find_match(data, pos)
226
+ return [0, 0] if pos < MIN_MATCH
227
+
228
+ best_length = 0
229
+ best_offset = 0
230
+ max_offset = [pos, @window_size].min
231
+
232
+ # Search backwards for matches
233
+ (1..max_offset).each do |offset|
234
+ match_pos = pos - offset
235
+ length = 0
236
+
237
+ # Count matching bytes
238
+ while length < MAX_MATCH &&
239
+ (pos + length) < data.bytesize &&
240
+ data.getbyte(match_pos + length) == data.getbyte(pos + length)
241
+ length += 1
242
+ end
243
+
244
+ if length > best_length
245
+ best_length = length
246
+ best_offset = offset
247
+ end
248
+ end
249
+
250
+ [best_length, best_offset]
251
+ end
252
+
253
+ # Encode a literal byte
254
+ def encode_literal(byte)
255
+ # Select model based on byte value (0-63, 64-127, 128-191, 192-255)
256
+ selector = byte >> 6
257
+ model = case selector
258
+ when 0 then @model0
259
+ when 1 then @model1
260
+ when 2 then @model2
261
+ else @model3
262
+ end
263
+
264
+ # Encode selector (0-3 for literals)
265
+ encode_symbol(@model7, selector)
266
+
267
+ # Encode full byte value in selected model
268
+ encode_symbol(model, byte)
269
+ end
270
+
271
+ # Encode a match
272
+ def encode_match(length, offset)
273
+ if length == 3
274
+ # Use model4 for 3-byte matches
275
+ encode_symbol(@model7, 4)
276
+ encode_position(@model4, offset)
277
+ elsif length == 4
278
+ # Use model5 for 4-byte matches
279
+ encode_symbol(@model7, 5)
280
+ encode_position(@model5, offset)
281
+ else
282
+ # Use model6 for longer matches
283
+ encode_symbol(@model7, 6)
284
+ encode_length(@model6len, length - 5)
285
+ encode_position(@model6, offset)
286
+ end
287
+ end
288
+
289
+ # Encode position using position slots
290
+ def encode_position(model, offset)
291
+ # Find position slot
292
+ slot = find_position_slot(offset - 1)
293
+
294
+ # Encode slot
295
+ encode_symbol(model, slot)
296
+
297
+ # Encode extra bits if needed
298
+ extra = EXTRA_BITS[slot]
299
+ return unless extra.positive?
300
+
301
+ value = (offset - 1) - POSITION_BASE[slot]
302
+ @bitstream.write_bits_msb(value, extra)
303
+ end
304
+
305
+ # Find position slot for an offset
306
+ def find_position_slot(offset)
307
+ POSITION_BASE.each_with_index do |base, i|
308
+ return i if offset < base + (1 << EXTRA_BITS[i])
309
+ end
310
+ POSITION_BASE.length - 1
311
+ end
312
+
313
+ # Encode match length
314
+ def encode_length(model, length)
315
+ # Find length slot
316
+ slot = find_length_slot(length)
317
+
318
+ # Encode slot
319
+ encode_symbol(model, slot)
320
+
321
+ # Encode extra bits if needed
322
+ extra = LENGTH_EXTRA[slot]
323
+ return unless extra.positive?
324
+
325
+ value = length - LENGTH_BASE[slot]
326
+ @bitstream.write_bits_msb(value, extra)
327
+ end
328
+
329
+ # Find length slot for a length value
330
+ def find_length_slot(length)
331
+ LENGTH_BASE.each_with_index do |base, i|
332
+ return i if length < base + (1 << LENGTH_EXTRA[i])
333
+ end
334
+ LENGTH_BASE.length - 1
335
+ end
336
+
337
+ # Encode a symbol using arithmetic coding
338
+ # This is the inverse of GET_SYMBOL macro in qtmd.c
339
+ def encode_symbol(model, sym)
340
+ # Find symbol index in model
341
+ i = 0
342
+ i += 1 while i < model.entries && model.syms[i].sym != sym
343
+
344
+ if i >= model.entries
345
+ raise ArgumentError,
346
+ "Symbol #{sym} not found in model"
347
+ end
348
+
349
+ # Calculate range (matching decoder line 93, 101-102)
350
+ range = (@h - @l) + 1
351
+ symf = model.syms[0].cumfreq
352
+
353
+ # Update H and L (matching decoder lines 103-104)
354
+ # Decoder uses syms[i-1] and syms[i], so encoder at index j
355
+ # should use syms[j] and syms[j+1] to make decoder land at i=j+1
356
+ # But decoder returns syms[i-1].sym, so it will return syms[j].sym ✓
357
+ @h = @l + ((model.syms[i].cumfreq * range) / symf) - 1
358
+ @l += ((model.syms[i + 1].cumfreq * range) / symf)
359
+
360
+ # Update model frequencies (matching decoder line 106)
361
+ j = i
362
+ while j >= 0
363
+ model.syms[j].cumfreq += 8
364
+ j -= 1
365
+ end
366
+
367
+ # Check if model needs updating (matching decoder line 107)
368
+ update_model(model) if model.syms[0].cumfreq > 3800
369
+
370
+ # Normalize range (matching decoder lines 109-121)
371
+ normalize_range
372
+ end
373
+
374
+ # Normalize arithmetic coding range and output bits
375
+ # This implements the encoder equivalent of the decoder's normalization (lines 109-121)
376
+ def normalize_range
377
+ loop do
378
+ if (@l & 0x8000) == (@h & 0x8000)
379
+ # MSBs are same, output bit
380
+ bit = (@l >> 15) & 1
381
+ @bitstream.write_bits_msb(bit, 1)
382
+
383
+ # Output pending underflow bits (inverted)
384
+ @underflow_bits.times do
385
+ @bitstream.write_bits_msb(bit ^ 1, 1)
386
+ end
387
+ @underflow_bits = 0
388
+ else
389
+ # MSBs differ - check for underflow
390
+ break unless @l.anybits?(0x4000) && @h.nobits?(0x4000)
391
+
392
+ # Underflow case - track pending bits
393
+ @underflow_bits += 1
394
+ @l &= 0x3FFF
395
+ @h |= 0x4000
396
+
397
+ # Can't normalize further
398
+
399
+ end
400
+
401
+ # Shift range (both for underflow and MSB match cases)
402
+ @l = (@l << 1) & 0xFFFF
403
+ @h = ((@h << 1) | 1) & 0xFFFF
404
+ end
405
+ end
406
+
407
+ # Update model statistics (matching qtmd_update_model exactly)
408
+ def update_model(model)
409
+ model.shiftsleft -= 1
410
+
411
+ if model.shiftsleft.positive?
412
+ # Simple shift (matching decoder lines 129-135)
413
+ (model.entries - 1).downto(0) do |i|
414
+ model.syms[i].cumfreq >>= 1
415
+ model.syms[i].cumfreq = model.syms[i + 1].cumfreq + 1 if model.syms[i].cumfreq <= model.syms[i + 1].cumfreq
416
+ end
417
+ else
418
+ # Full rebuild (matching decoder lines 137-163)
419
+ model.shiftsleft = 50
420
+
421
+ # Convert cumfreq to frequencies (lines 139-145)
422
+ (0...model.entries).each do |i|
423
+ model.syms[i].cumfreq -= model.syms[i + 1].cumfreq
424
+ model.syms[i].cumfreq += 1
425
+ model.syms[i].cumfreq >>= 1
426
+ end
427
+
428
+ # Sort by frequency (selection sort for stability, lines 150-158)
429
+ (0...(model.entries - 1)).each do |i|
430
+ ((i + 1)...model.entries).each do |j|
431
+ if model.syms[i].cumfreq < model.syms[j].cumfreq
432
+ model.syms[i], model.syms[j] = model.syms[j], model.syms[i]
433
+ end
434
+ end
435
+ end
436
+
437
+ # Convert back to cumulative frequencies (lines 161-163)
438
+ (model.entries - 1).downto(0) do |i|
439
+ model.syms[i].cumfreq += model.syms[i + 1].cumfreq
440
+ end
441
+ end
442
+ end
443
+ # rubocop:enable Metrics/ClassLength
444
+ end
445
+ end
446
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ # CAB format constants
5
+ module Constants
6
+ # CAB signature
7
+ CAB_SIGNATURE = 0x4643534D # "MSCF"
8
+
9
+ # Compression types
10
+ COMP_TYPE_NONE = 0
11
+ COMP_TYPE_MSZIP = 1
12
+ COMP_TYPE_QUANTUM = 2
13
+ COMP_TYPE_LZX = 3
14
+
15
+ # Compression type mask
16
+ COMP_TYPE_MASK = 0x000F
17
+
18
+ # CAB header flags
19
+ FLAG_PREV_CABINET = 0x0001
20
+ FLAG_NEXT_CABINET = 0x0002
21
+ FLAG_RESERVE_PRESENT = 0x0004
22
+
23
+ # File attribute flags
24
+ ATTRIB_READONLY = 0x01
25
+ ATTRIB_HIDDEN = 0x02
26
+ ATTRIB_SYSTEM = 0x04
27
+ ATTRIB_ARCH = 0x20
28
+ ATTRIB_EXEC = 0x40
29
+ ATTRIB_UTF_NAME = 0x80
30
+
31
+ # Folder index special values
32
+ FOLDER_CONTINUED_FROM_PREV = 0xFFFD
33
+ FOLDER_CONTINUED_TO_NEXT = 0xFFFE
34
+ FOLDER_CONTINUED_PREV_AND_NEXT = 0xFFFF
35
+
36
+ # Block and folder limits
37
+ BLOCK_MAX = 32_768 # Maximum uncompressed block size
38
+ INPUT_MAX = BLOCK_MAX + 6144 # Maximum compressed block size (LZX worst case)
39
+ FOLDER_MAX = 65_535 # Maximum number of data blocks per folder
40
+ LENGTH_MAX = BLOCK_MAX * FOLDER_MAX # Maximum file size
41
+
42
+ # Structure sizes
43
+ CFHEADER_SIZE = 36
44
+ CFHEADER_EXT_SIZE = 4
45
+ CFFOLDER_SIZE = 8
46
+ CFFILE_SIZE = 16
47
+ CFDATA_SIZE = 8
48
+
49
+ # I/O modes
50
+ MODE_READ = 0
51
+ MODE_WRITE = 1
52
+ MODE_UPDATE = 2
53
+ MODE_APPEND = 3
54
+
55
+ # Seek modes
56
+ SEEK_START = 0
57
+ SEEK_CUR = 1
58
+ SEEK_END = 2
59
+
60
+ # KWAJ compression types
61
+ KWAJ_COMP_NONE = 0
62
+ KWAJ_COMP_XOR = 1
63
+ KWAJ_COMP_SZDD = 2
64
+ KWAJ_COMP_LZH = 3
65
+ KWAJ_COMP_MSZIP = 4
66
+
67
+ # KWAJ header flags
68
+ KWAJ_HDR_HASLENGTH = 0x01
69
+ KWAJ_HDR_HASUNKNOWN1 = 0x02
70
+ KWAJ_HDR_HASUNKNOWN2 = 0x04
71
+ KWAJ_HDR_HASFILENAME = 0x08
72
+ KWAJ_HDR_HASFILEEXT = 0x10
73
+ KWAJ_HDR_HASEXTRATEXT = 0x20
74
+ end
75
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Decompressors
5
+ # Base class for all decompression algorithms
6
+ class Base
7
+ attr_reader :io_system, :input, :output, :buffer_size
8
+
9
+ # Initialize a new decompressor
10
+ #
11
+ # @param io_system [System::IOSystem] I/O system for reading/writing
12
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
13
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
14
+ # @param buffer_size [Integer] Buffer size for I/O operations
15
+ def initialize(io_system, input, output, buffer_size)
16
+ @io_system = io_system
17
+ @input = input
18
+ @output = output
19
+ @buffer_size = buffer_size
20
+ end
21
+
22
+ # Decompress the specified number of bytes
23
+ #
24
+ # @param bytes [Integer] Number of bytes to decompress
25
+ # @return [Integer] Number of bytes decompressed
26
+ # @raise [NotImplementedError] Must be implemented by subclasses
27
+ def decompress(bytes)
28
+ raise NotImplementedError, "#{self.class} must implement #decompress"
29
+ end
30
+
31
+ # Free any resources used by the decompressor
32
+ #
33
+ # @return [void]
34
+ def free
35
+ # Override in subclasses if cleanup needed
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Decompressors
5
+ # LZSS decompressor for LZSS-compressed CAB data
6
+ #
7
+ # LZSS (Lempel-Ziv-Storer-Szymanski) is a derivative of LZ77 compression.
8
+ # It uses a 4096-byte sliding window with a control byte mechanism to
9
+ # indicate whether the next operation is a literal byte copy or a match
10
+ # from the window history.
11
+ class LZSS < Base
12
+ # LZSS algorithm constants
13
+ WINDOW_SIZE = 4096
14
+ WINDOW_FILL = 0x20
15
+
16
+ # LZSS modes
17
+ MODE_EXPAND = 0
18
+ MODE_MSHELP = 1
19
+ MODE_QBASIC = 2
20
+
21
+ attr_reader :mode, :window, :window_pos
22
+
23
+ # Initialize LZSS decompressor
24
+ #
25
+ # @param io_system [System::IOSystem] I/O system for reading/writing
26
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
27
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
28
+ # @param buffer_size [Integer] Buffer size for I/O operations
29
+ # @param mode [Integer] LZSS mode (default: MODE_EXPAND)
30
+ def initialize(io_system, input, output, buffer_size,
31
+ mode = MODE_EXPAND)
32
+ super(io_system, input, output, buffer_size)
33
+ @mode = mode
34
+ @window = Array.new(WINDOW_SIZE, WINDOW_FILL)
35
+ @window_pos = initialize_window_position
36
+ @input_buffer = ""
37
+ @input_pos = 0
38
+ @invert = mode == MODE_MSHELP ? 0xFF : 0x00
39
+ end
40
+
41
+ # Decompress LZSS data
42
+ #
43
+ # @param bytes [Integer] Number of bytes to decompress (unused, reads
44
+ # until EOF)
45
+ # @return [Integer] Number of bytes decompressed
46
+ def decompress(_bytes)
47
+ bytes_written = 0
48
+
49
+ loop do
50
+ # Read control byte
51
+ control_byte = read_input_byte
52
+ break if control_byte.nil?
53
+
54
+ control_byte ^= @invert
55
+
56
+ # Process each bit in the control byte
57
+ 8.times do |bit_index|
58
+ mask = 1 << bit_index
59
+
60
+ if control_byte.anybits?(mask)
61
+ # Bit is 1: literal byte
62
+ literal = read_input_byte
63
+ break if literal.nil?
64
+
65
+ @window[@window_pos] = literal
66
+ write_output_byte(literal)
67
+ bytes_written += 1
68
+
69
+ @window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
70
+ else
71
+ # Bit is 0: match from window
72
+ offset_low = read_input_byte
73
+ break if offset_low.nil?
74
+
75
+ offset_high_and_length = read_input_byte
76
+ break if offset_high_and_length.nil?
77
+
78
+ # Decode match position and length
79
+ match_pos = offset_low | ((offset_high_and_length & 0xF0) << 4)
80
+ length = (offset_high_and_length & 0x0F) + 3
81
+
82
+ # Copy from window
83
+ length.times do
84
+ byte = @window[match_pos]
85
+ @window[@window_pos] = byte
86
+ write_output_byte(byte)
87
+ bytes_written += 1
88
+
89
+ @window_pos = (@window_pos + 1) & (WINDOW_SIZE - 1)
90
+ match_pos = (match_pos + 1) & (WINDOW_SIZE - 1)
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ bytes_written
97
+ end
98
+
99
+ private
100
+
101
+ # Initialize the window position based on mode
102
+ #
103
+ # @return [Integer] Initial window position
104
+ def initialize_window_position
105
+ offset = @mode == MODE_QBASIC ? 18 : 16
106
+ WINDOW_SIZE - offset
107
+ end
108
+
109
+ # Read a single byte from the input buffer
110
+ #
111
+ # @return [Integer, nil] Byte value or nil at EOF
112
+ def read_input_byte
113
+ if @input_pos >= @input_buffer.bytesize
114
+ @input_buffer = @io_system.read(@input, @buffer_size)
115
+ @input_pos = 0
116
+ return nil if @input_buffer.empty?
117
+ end
118
+
119
+ byte = @input_buffer.getbyte(@input_pos)
120
+ @input_pos += 1
121
+ byte
122
+ end
123
+
124
+ # Write a single byte to the output
125
+ #
126
+ # @param byte [Integer] Byte to write
127
+ # @return [void]
128
+ # @raise [Errors::DecompressionError] if write fails
129
+ def write_output_byte(byte)
130
+ data = [byte].pack("C")
131
+ written = @io_system.write(@output, data)
132
+ return if written == 1
133
+
134
+ raise Errors::DecompressionError, "Failed to write output byte"
135
+ end
136
+ end
137
+ end
138
+ end