cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,390 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Decompressors
5
+ # MSZIP handles MSZIP (deflate) compressed data
6
+ # Based on RFC 1951 and libmspack implementation
7
+ class MSZIP < Base
8
+ # MSZIP frame size (32KB sliding window)
9
+ FRAME_SIZE = 32_768
10
+
11
+ # Huffman tree constants
12
+ LITERAL_MAXSYMBOLS = 288
13
+ LITERAL_TABLEBITS = 9
14
+ DISTANCE_MAXSYMBOLS = 32
15
+ DISTANCE_TABLEBITS = 6
16
+
17
+ # Match lengths for literal codes 257-285
18
+ LIT_LENGTHS = [
19
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
20
+ 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
21
+ ].freeze
22
+
23
+ # Match offsets for distance codes 0-29
24
+ DIST_OFFSETS = [
25
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385,
26
+ 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577
27
+ ].freeze
28
+
29
+ # Extra bits for literal codes 257-285
30
+ LIT_EXTRABITS = [
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
32
+ 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
33
+ ].freeze
34
+
35
+ # Extra bits for distance codes 0-29
36
+ DIST_EXTRABITS = [
37
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
38
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
39
+ ].freeze
40
+
41
+ # Order of bit length code lengths
42
+ BITLEN_ORDER = [
43
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
44
+ ].freeze
45
+
46
+ # Initialize MSZIP decompressor
47
+ #
48
+ # @param io_system [System::IOSystem] I/O system for reading/writing
49
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
50
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
51
+ # @param buffer_size [Integer] Buffer size for I/O operations
52
+ # @param fix_mszip [Boolean] Enable repair mode for corrupted data
53
+ def initialize(io_system, input, output, buffer_size, fix_mszip: false)
54
+ super(io_system, input, output, buffer_size)
55
+ @fix_mszip = fix_mszip
56
+
57
+ # Initialize sliding window
58
+ @window = "\0" * FRAME_SIZE
59
+ @window_posn = 0
60
+ @bytes_output = 0
61
+
62
+ # Initialize bitstream
63
+ @bitstream = Binary::Bitstream.new(io_system, input, buffer_size)
64
+
65
+ # Initialize Huffman trees
66
+ @literal_lengths = Array.new(LITERAL_MAXSYMBOLS, 0)
67
+ @distance_lengths = Array.new(DISTANCE_MAXSYMBOLS, 0)
68
+ @literal_tree = nil
69
+ @distance_tree = nil
70
+ end
71
+
72
+ # Decompress MSZIP data
73
+ #
74
+ # @param bytes [Integer] Number of bytes to decompress
75
+ # @return [Integer] Number of bytes decompressed
76
+ def decompress(bytes)
77
+ total_written = 0
78
+
79
+ while bytes.positive?
80
+ # Read 'CK' signature
81
+ read_signature
82
+
83
+ # Reset window state for new block
84
+ @window_posn = 0
85
+ @bytes_output = 0
86
+
87
+ # Inflate the block
88
+ begin
89
+ inflate_block
90
+ rescue DecompressionError
91
+ raise unless @fix_mszip
92
+
93
+ # In repair mode, pad with zeros
94
+ (@bytes_output...FRAME_SIZE).each do |i|
95
+ @window.setbyte(i, 0)
96
+ end
97
+ @bytes_output = FRAME_SIZE
98
+ end
99
+
100
+ # Write output
101
+ write_amount = [bytes, @bytes_output].min
102
+ io_system.write(output, @window[0, write_amount])
103
+ total_written += write_amount
104
+ bytes -= write_amount
105
+ end
106
+
107
+ total_written
108
+ end
109
+
110
+ private
111
+
112
+ # Read and verify 'CK' signature
113
+ def read_signature
114
+ # Align to byte boundary
115
+ @bitstream.byte_align
116
+
117
+ # Read bytes until we find 'CK'
118
+ state = 0
119
+ bytes_read = 0
120
+ max_search = 10_000 # Prevent infinite loops
121
+
122
+ loop do
123
+ byte = @bitstream.read_bits(8)
124
+ bytes_read += 1
125
+
126
+ # Check for EOF (bitstream returns 0)
127
+ if bytes_read > 2 && byte.zero?
128
+ raise DecompressionError,
129
+ "Unexpected EOF while searching for CK signature"
130
+ end
131
+
132
+ # Prevent infinite loops
133
+ if bytes_read > max_search
134
+ raise DecompressionError,
135
+ "CK signature not found in stream"
136
+ end
137
+
138
+ if byte == 0x43 # 'C'
139
+ state = 1
140
+ elsif state == 1 && byte == 0x4B # 'K'
141
+ break
142
+ else
143
+ state = 0
144
+ end
145
+ end
146
+ end
147
+
148
+ # Inflate a single block
149
+ def inflate_block
150
+ loop do
151
+ # Read last block flag
152
+ last_block = @bitstream.read_bits(1)
153
+
154
+ # Read block type
155
+ block_type = @bitstream.read_bits(2)
156
+
157
+ case block_type
158
+ when 0
159
+ inflate_stored_block
160
+ when 1
161
+ build_fixed_trees
162
+ inflate_huffman_block
163
+ when 2
164
+ build_dynamic_trees
165
+ inflate_huffman_block
166
+ else
167
+ raise DecompressionError, "Invalid block type: #{block_type}"
168
+ end
169
+
170
+ break if last_block == 1
171
+ end
172
+
173
+ # Flush remaining window data
174
+ flush_window if @window_posn.positive?
175
+ end
176
+
177
+ # Inflate an uncompressed (stored) block
178
+ def inflate_stored_block
179
+ # Align to byte boundary
180
+ @bitstream.byte_align
181
+
182
+ # Read length and complement
183
+ length = @bitstream.read_bits(16)
184
+ complement = @bitstream.read_bits(16)
185
+
186
+ # Verify complement
187
+ unless length == (~complement & 0xFFFF)
188
+ raise DecompressionError,
189
+ "Stored block length complement mismatch"
190
+ end
191
+
192
+ # Copy uncompressed data
193
+ length.times do
194
+ byte = @bitstream.read_bits(8)
195
+ @window.setbyte(@window_posn, byte)
196
+ @window_posn += 1
197
+ flush_window if @window_posn == FRAME_SIZE
198
+ end
199
+ end
200
+
201
+ # Build fixed Huffman trees (RFC 1951)
202
+ def build_fixed_trees
203
+ # Fixed literal/length tree
204
+ @literal_lengths.fill(0)
205
+ (0...144).each { |i| @literal_lengths[i] = 8 }
206
+ (144...256).each { |i| @literal_lengths[i] = 9 }
207
+ (256...280).each { |i| @literal_lengths[i] = 7 }
208
+ (280...288).each { |i| @literal_lengths[i] = 8 }
209
+
210
+ # Fixed distance tree
211
+ @distance_lengths.fill(5, 0, 32)
212
+
213
+ # Build decode tables
214
+ build_literal_table
215
+ build_distance_table
216
+ end
217
+
218
+ # Build dynamic Huffman trees from stream
219
+ def build_dynamic_trees
220
+ # Read code counts
221
+ lit_codes = @bitstream.read_bits(5) + 257
222
+ dist_codes = @bitstream.read_bits(5) + 1
223
+ bitlen_codes = @bitstream.read_bits(4) + 4
224
+
225
+ # Validate counts
226
+ if lit_codes > LITERAL_MAXSYMBOLS
227
+ raise DecompressionError,
228
+ "Too many literal codes: #{lit_codes}"
229
+ end
230
+ if dist_codes > DISTANCE_MAXSYMBOLS
231
+ raise DecompressionError,
232
+ "Too many distance codes: #{dist_codes}"
233
+ end
234
+
235
+ # Read bit length code lengths
236
+ bl_lengths = Array.new(19, 0)
237
+ bitlen_codes.times do |i|
238
+ bl_lengths[BITLEN_ORDER[i]] = @bitstream.read_bits(3)
239
+ end
240
+
241
+ # Build bit length decode table
242
+ bl_tree = Huffman::Tree.new(bl_lengths, 19)
243
+ unless bl_tree.build_table(7)
244
+ raise DecompressionError,
245
+ "Failed to build bit length tree"
246
+ end
247
+
248
+ # Read code lengths using bit length tree
249
+ code_lengths = []
250
+ last_code = 0
251
+
252
+ while code_lengths.size < (lit_codes + dist_codes)
253
+ code = Huffman::Decoder.decode_symbol(
254
+ @bitstream, bl_tree.table, 7, bl_lengths, 19
255
+ )
256
+
257
+ if code < 16
258
+ # Literal code length
259
+ code_lengths << code
260
+ last_code = code
261
+ elsif code == 16
262
+ # Repeat last code 3-6 times
263
+ run = @bitstream.read_bits(2) + 3
264
+ run.times { code_lengths << last_code }
265
+ elsif code == 17
266
+ # Repeat 0 for 3-10 times
267
+ run = @bitstream.read_bits(3) + 3
268
+ run.times { code_lengths << 0 }
269
+ elsif code == 18
270
+ # Repeat 0 for 11-138 times
271
+ run = @bitstream.read_bits(7) + 11
272
+ run.times { code_lengths << 0 }
273
+ else
274
+ raise DecompressionError, "Invalid bit length code: #{code}"
275
+ end
276
+ end
277
+
278
+ # Split into literal and distance lengths
279
+ @literal_lengths = code_lengths[0,
280
+ lit_codes] + Array.new(
281
+ LITERAL_MAXSYMBOLS - lit_codes, 0
282
+ )
283
+ @distance_lengths = code_lengths[lit_codes, dist_codes] +
284
+ Array.new(DISTANCE_MAXSYMBOLS - dist_codes, 0)
285
+
286
+ # Build decode tables
287
+ build_literal_table
288
+ build_distance_table
289
+ end
290
+
291
+ # Build literal/length decode table
292
+ def build_literal_table
293
+ @literal_tree = Huffman::Tree.new(@literal_lengths, LITERAL_MAXSYMBOLS)
294
+ return if @literal_tree.build_table(LITERAL_TABLEBITS)
295
+
296
+ raise DecompressionError, "Failed to build literal tree"
297
+ end
298
+
299
+ # Build distance decode table
300
+ def build_distance_table
301
+ @distance_tree = Huffman::Tree.new(@distance_lengths,
302
+ DISTANCE_MAXSYMBOLS)
303
+ return if @distance_tree.build_table(DISTANCE_TABLEBITS)
304
+
305
+ raise DecompressionError, "Failed to build distance tree"
306
+ end
307
+
308
+ # Inflate a Huffman-compressed block
309
+ def inflate_huffman_block
310
+ loop do
311
+ # Decode symbol from literal tree
312
+ code = Huffman::Decoder.decode_symbol(
313
+ @bitstream, @literal_tree.table, LITERAL_TABLEBITS,
314
+ @literal_lengths, LITERAL_MAXSYMBOLS
315
+ )
316
+
317
+ if code < 256
318
+ # Literal byte
319
+ @window.setbyte(@window_posn, code)
320
+ @window_posn += 1
321
+ flush_window if @window_posn == FRAME_SIZE
322
+ elsif code == 256
323
+ # End of block
324
+ break
325
+ else
326
+ # Length/distance pair (LZ77 match)
327
+ decode_match(code)
328
+ end
329
+ end
330
+ end
331
+
332
+ # Decode and copy a match (LZ77)
333
+ #
334
+ # @param code [Integer] Length code (257-285)
335
+ def decode_match(code)
336
+ # Validate code
337
+ code -= 257
338
+ if code >= 29
339
+ raise DecompressionError,
340
+ "Invalid length code: #{code + 257}"
341
+ end
342
+
343
+ # Decode length
344
+ extra_bits = LIT_EXTRABITS[code]
345
+ length = LIT_LENGTHS[code]
346
+ length += @bitstream.read_bits(extra_bits) if extra_bits.positive?
347
+
348
+ # Decode distance
349
+ dist_code = Huffman::Decoder.decode_symbol(
350
+ @bitstream, @distance_tree.table, DISTANCE_TABLEBITS,
351
+ @distance_lengths, DISTANCE_MAXSYMBOLS
352
+ )
353
+ if dist_code >= 30
354
+ raise DecompressionError,
355
+ "Invalid distance code: #{dist_code}"
356
+ end
357
+
358
+ extra_bits = DIST_EXTRABITS[dist_code]
359
+ distance = DIST_OFFSETS[dist_code]
360
+ distance += @bitstream.read_bits(extra_bits) if extra_bits.positive?
361
+
362
+ # Calculate match position with wraparound
363
+ match_posn = if distance > @window_posn
364
+ FRAME_SIZE + @window_posn - distance
365
+ else
366
+ @window_posn - distance
367
+ end
368
+
369
+ # Copy match
370
+ length.times do
371
+ @window.setbyte(@window_posn, @window.getbyte(match_posn))
372
+ @window_posn += 1
373
+ match_posn = (match_posn + 1) & (FRAME_SIZE - 1)
374
+ flush_window if @window_posn == FRAME_SIZE
375
+ end
376
+ end
377
+
378
+ # Flush window data to output
379
+ def flush_window
380
+ @bytes_output += @window_posn
381
+ if @bytes_output > FRAME_SIZE
382
+ raise DecompressionError,
383
+ "Output overflow"
384
+ end
385
+
386
+ @window_posn = 0
387
+ end
388
+ end
389
+ end
390
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Decompressors
5
+ # None handles uncompressed data (no compression)
6
+ class None < Base
7
+ # Decompress (copy) the specified number of bytes
8
+ #
9
+ # @param bytes [Integer] Number of bytes to copy
10
+ # @return [Integer] Number of bytes copied
11
+ def decompress(bytes)
12
+ total_copied = 0
13
+
14
+ while total_copied < bytes
15
+ chunk_size = [bytes - total_copied, @buffer_size].min
16
+ data = @io_system.read(@input, chunk_size)
17
+ break if data.empty?
18
+
19
+ @io_system.write(@output, data)
20
+ total_copied += data.bytesize
21
+ end
22
+
23
+ total_copied
24
+ end
25
+ end
26
+ end
27
+ end