cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,510 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module CAB
5
+ # Decompressor is the main interface for CAB file operations
6
+ class Decompressor
7
+ attr_reader :io_system, :parser
8
+ attr_accessor :buffer_size, :fix_mszip, :salvage, :search_buffer_size
9
+
10
+ # Initialize a new CAB decompressor
11
+ #
12
+ # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
13
+ def initialize(io_system = nil)
14
+ @io_system = io_system || System::IOSystem.new
15
+ @parser = Parser.new(@io_system)
16
+ @buffer_size = Cabriolet.default_buffer_size
17
+ @fix_mszip = false
18
+ @salvage = false
19
+ @search_buffer_size = 32_768
20
+ end
21
+
22
+ # Open and parse a CAB file
23
+ #
24
+ # @param filename [String] Path to the CAB file
25
+ # @return [Models::Cabinet] Parsed cabinet
26
+ # @raise [ParseError] if the file is not a valid CAB
27
+ def open(filename)
28
+ @parser.parse(filename)
29
+ end
30
+
31
+ # Extract a single file from the cabinet
32
+ #
33
+ # @param file [Models::File] File to extract
34
+ # @param output_path [String] Where to write the file
35
+ # @param options [Hash] Extraction options
36
+ # @return [Integer] Number of bytes extracted
37
+ def extract_file(file, output_path, **options)
38
+ extractor = Extractor.new(@io_system, self)
39
+ extractor.extract_file(file, output_path, **options)
40
+ end
41
+
42
+ # Extract all files from the cabinet
43
+ #
44
+ # @param cabinet [Models::Cabinet] Cabinet to extract from
45
+ # @param output_dir [String] Directory to extract to
46
+ # @param options [Hash] Extraction options
47
+ # @return [Integer] Number of files extracted
48
+ def extract_all(cabinet, output_dir, **options)
49
+ extractor = Extractor.new(@io_system, self)
50
+ extractor.extract_all(cabinet, output_dir, **options)
51
+ end
52
+
53
+ # Create appropriate decompressor for a folder
54
+ #
55
+ # @param folder [Models::Folder] Folder to create decompressor for
56
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
57
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
58
+ # @return [Decompressors::Base] Appropriate decompressor instance
59
+ def create_decompressor(folder, input, output)
60
+ case folder.compression_method
61
+ when Constants::COMP_TYPE_NONE
62
+ Decompressors::None.new(@io_system, input, output, @buffer_size)
63
+ when Constants::COMP_TYPE_MSZIP
64
+ Decompressors::MSZIP.new(@io_system, input, output, @buffer_size,
65
+ fix_mszip: @fix_mszip)
66
+ when Constants::COMP_TYPE_LZX
67
+ window_bits = folder.compression_level
68
+ Decompressors::LZX.new(@io_system, input, output, @buffer_size,
69
+ window_bits: window_bits)
70
+ when Constants::COMP_TYPE_QUANTUM
71
+ window_bits = folder.compression_level
72
+ Decompressors::Quantum.new(@io_system, input, output, @buffer_size,
73
+ window_bits: window_bits)
74
+ else
75
+ raise UnsupportedFormatError,
76
+ "Unsupported compression type: #{folder.compression_method}"
77
+ end
78
+ end
79
+
80
+ # Append a cabinet to another, merging their folders and files
81
+ #
82
+ # @param cabinet [Models::Cabinet] The left cabinet
83
+ # @param next_cabinet [Models::Cabinet] The cabinet to append
84
+ # @return [Boolean] true if successful
85
+ # @raise [ArgumentError] if cabinets cannot be merged
86
+ def append(cabinet, next_cabinet)
87
+ merge_cabinets(cabinet, next_cabinet)
88
+ end
89
+
90
+ # Prepend a cabinet to another, merging their folders and files
91
+ #
92
+ # @param cabinet [Models::Cabinet] The right cabinet
93
+ # @param prev_cabinet [Models::Cabinet] The cabinet to prepend
94
+ # @return [Boolean] true if successful
95
+ # @raise [ArgumentError] if cabinets cannot be merged
96
+ def prepend(cabinet, prev_cabinet)
97
+ merge_cabinets(prev_cabinet, cabinet)
98
+ end
99
+
100
+ # Search for embedded CAB files within a file
101
+ #
102
+ # @param filename [String] Path to file to search
103
+ # @return [Models::Cabinet, nil] First cabinet found, or nil if none found
104
+ def search(filename)
105
+ search_buf = Array.new(@search_buffer_size)
106
+ first_cabinet = nil
107
+ link_cabinet = nil
108
+ first_len = 0
109
+ false_cabs = 0
110
+
111
+ handle = @io_system.open(filename, Constants::MODE_READ)
112
+ file_length = handle.size
113
+
114
+ # Check for InstallShield header at start of file
115
+ if file_length >= 4
116
+ header = @io_system.read(handle, 4)
117
+ @io_system.seek(handle, 0, Constants::SEEK_START)
118
+ if header.unpack1("V") == 0x28635349
119
+ @io_system.message(handle, "WARNING; found InstallShield header. Use unshield " \
120
+ "(https://github.com/twogood/unshield) to unpack this file")
121
+ end
122
+ end
123
+
124
+ offset = 0
125
+ while offset < file_length
126
+ # Calculate read length
127
+ length = [file_length - offset, @search_buffer_size].min
128
+
129
+ # Read chunk
130
+ @io_system.seek(handle, offset, Constants::SEEK_START)
131
+ bytes_read = @io_system.read(handle, length)
132
+ break if bytes_read.nil? || bytes_read.empty?
133
+
134
+ search_buf[0, bytes_read.bytesize] = bytes_read.bytes
135
+
136
+ # Search for cabinets in this chunk
137
+ cab_offset = find_cabinet_in_buffer(search_buf, bytes_read.size,
138
+ offset, handle, filename, file_length)
139
+
140
+ if cab_offset
141
+ # Try to parse cabinet at this offset
142
+ cabinet = try_parse_cab_at_offset(handle, filename, cab_offset)
143
+
144
+ if cabinet
145
+ # Capture first cabinet length
146
+ first_len = cabinet.length if cab_offset.zero?
147
+
148
+ # Link into list
149
+ if first_cabinet.nil?
150
+ first_cabinet = cabinet
151
+ else
152
+ link_cabinet.next = cabinet
153
+ end
154
+ link_cabinet = cabinet
155
+
156
+ # Continue searching after this cabinet
157
+ offset = cab_offset + cabinet.length
158
+ else
159
+ false_cabs += 1
160
+ # Restart search after signature
161
+ offset = cab_offset + 4
162
+ end
163
+ else
164
+ # No cabinet found in this chunk, move to next
165
+ offset += length
166
+ end
167
+ end
168
+
169
+ @io_system.close(handle)
170
+
171
+ # Warn about truncated/extra data
172
+ if first_len.positive? && first_len != file_length && (first_cabinet.nil? || first_cabinet.base_offset.zero?)
173
+ if first_len < file_length
174
+ @io_system.message(handle,
175
+ "WARNING; possible #{file_length - first_len} extra bytes at end of file.")
176
+ else
177
+ @io_system.message(handle,
178
+ "WARNING; file possibly truncated by #{first_len - file_length} bytes.")
179
+ end
180
+ end
181
+
182
+ if false_cabs.positive? && Cabriolet.verbose
183
+ @io_system.message(handle,
184
+ "#{false_cabs} false cabinets found")
185
+ end
186
+
187
+ first_cabinet
188
+ rescue StandardError
189
+ @io_system.close(handle) if handle
190
+ raise
191
+ end
192
+
193
+ private
194
+
195
+ # Check if two folders can be merged
196
+ #
197
+ # @param left_folder [Models::Folder] Last folder of left cabinet
198
+ # @param right_folder [Models::Folder] First folder of right cabinet
199
+ # @return [Boolean] true if folders can be merged
200
+ def can_merge_folders?(left_folder, right_folder)
201
+ # Check compression type matches
202
+ unless left_folder.comp_type == right_folder.comp_type
203
+ @io_system.message("Folder merge: compression type mismatch")
204
+ return false
205
+ end
206
+
207
+ # Check total blocks won't exceed maximum
208
+ total_blocks = left_folder.num_blocks + right_folder.num_blocks
209
+ if total_blocks > Constants::FOLDER_MAX
210
+ @io_system.message("Folder merge: too many data blocks (#{total_blocks} > #{Constants::FOLDER_MAX})")
211
+ return false
212
+ end
213
+
214
+ # Check both folders have merge files
215
+ left_files = left_folder.merge_next
216
+ right_files = right_folder.merge_prev
217
+
218
+ unless left_files && right_files
219
+ @io_system.message("Folder merge: one cabinet has no files to merge")
220
+ return false
221
+ end
222
+
223
+ # Verify files match by offset and length
224
+ matching = false
225
+ left_file = left_files
226
+
227
+ while left_file
228
+ right_file = right_files
229
+ while right_file
230
+ if left_file.offset == right_file.offset && left_file.length == right_file.length
231
+ matching = true
232
+ break
233
+ end
234
+ right_file = right_file.next_file
235
+ end
236
+
237
+ @io_system.message("WARNING; merged file #{left_file.filename} not listed in both cabinets") unless matching
238
+
239
+ left_file = left_file.next_file
240
+ end
241
+
242
+ matching
243
+ end
244
+
245
+ # Merge two cabinets together
246
+ #
247
+ # @param left_cab [Models::Cabinet] The left cabinet
248
+ # @param right_cab [Models::Cabinet] The right cabinet
249
+ # @return [Boolean] true if successful
250
+ # @raise [ArgumentError] if cabinets cannot be merged
251
+ def merge_cabinets(left_cab, right_cab)
252
+ # Basic validation
253
+ unless left_cab && right_cab
254
+ raise ArgumentError,
255
+ "Both cabinets must be provided"
256
+ end
257
+ if left_cab == right_cab
258
+ raise ArgumentError,
259
+ "Cannot merge a cabinet with itself"
260
+ end
261
+ if left_cab.next_cabinet || right_cab.prev_cabinet
262
+ raise ArgumentError,
263
+ "Cabinets already joined"
264
+ end
265
+
266
+ # Check for circular references
267
+ cab = left_cab.prev_cabinet
268
+ while cab
269
+ if cab == right_cab
270
+ raise ArgumentError,
271
+ "Circular cabinet chain detected"
272
+ end
273
+
274
+ cab = cab.prev_cabinet
275
+ end
276
+
277
+ cab = right_cab.next_cabinet
278
+ while cab
279
+ if cab == left_cab
280
+ raise ArgumentError,
281
+ "Circular cabinet chain detected"
282
+ end
283
+
284
+ cab = cab.next_cabinet
285
+ end
286
+
287
+ # Warn about mismatched set IDs or indices
288
+ @io_system.message("WARNING; merged cabinets with differing Set IDs") if left_cab.set_id != right_cab.set_id
289
+
290
+ @io_system.message("WARNING; merged cabinets with odd order") if left_cab.set_index > right_cab.set_index
291
+
292
+ # Find last folder of left cabinet and first folder of right cabinet
293
+ left_folder = left_cab.folders.last
294
+ right_folder = right_cab.folders.first
295
+
296
+ # Check if folders need merging
297
+ if left_folder.merge_next && right_folder.merge_prev
298
+ # Folders need merging - validate they can be merged
299
+ unless can_merge_folders?(
300
+ left_folder, right_folder
301
+ )
302
+ raise DataFormatError,
303
+ "Folders cannot be merged"
304
+ end
305
+
306
+ # Create new FolderData for right folder's data
307
+ new_data = Models::FolderData.new(right_folder.data.cabinet,
308
+ right_folder.data.offset)
309
+
310
+ # Append to left folder's data chain
311
+ data_tail = left_folder.data
312
+ data_tail = data_tail.next_data while data_tail.next_data
313
+ data_tail.next_data = new_data
314
+
315
+ # Copy any additional data segments from right folder
316
+ next_data = right_folder.data.next_data
317
+ while next_data
318
+ new_data.next_data = Models::FolderData.new(next_data.cabinet,
319
+ next_data.offset)
320
+ new_data = new_data.next_data
321
+ next_data = next_data.next_data
322
+ end
323
+
324
+ # Update block count (subtract 1 because blocks are shared at boundary)
325
+ left_folder.num_blocks += right_folder.num_blocks - 1
326
+
327
+ # Update merge_next pointer
328
+ # Special case: if right folder merges both ways, keep left's merge_next
329
+ if right_folder.merge_next.nil? || right_folder.merge_next.folder != right_folder
330
+ left_folder.merge_next = right_folder.merge_next
331
+ end
332
+
333
+ # Link remaining folders from right cabinet (skip the merged first folder)
334
+ left_cab.folders.concat(right_cab.folders[1..]) if right_folder.next_folder
335
+
336
+ # Link files from right cabinet
337
+ left_cab.files.concat(right_cab.files)
338
+
339
+ # Remove duplicate files that belong to the merged right folder
340
+ left_cab.files.reject! { |file| file.folder == right_folder }
341
+ else
342
+ # No folder merge needed - just link them
343
+ left_cab.folders.concat(right_cab.folders)
344
+ left_cab.files.concat(right_cab.files)
345
+ end
346
+
347
+ # Link cabinets
348
+ left_cab.next_cabinet = right_cab
349
+ right_cab.prev_cabinet = left_cab
350
+
351
+ # Update all cabinets in the set to share the same file and folder lists
352
+ cab = left_cab.prev_cabinet
353
+ while cab
354
+ cab.files = left_cab.files
355
+ cab.folders = left_cab.folders
356
+ cab = cab.prev_cabinet
357
+ end
358
+
359
+ cab = left_cab.next_cabinet
360
+ while cab
361
+ cab.files = left_cab.files
362
+ cab.folders = left_cab.folders
363
+ cab = cab.next_cabinet
364
+ end
365
+
366
+ true
367
+ end
368
+
369
+ # Find cabinet signature in buffer using state machine
370
+ #
371
+ # @param buf [Array<Integer>] Search buffer
372
+ # @param length [Integer] Valid data length in buffer
373
+ # @param base_offset [Integer] Offset of buffer start in file
374
+ # @param handle [IO] File handle
375
+ # @param filename [String] Filename
376
+ # @param file_length [Integer] Total file length
377
+ # @return [Integer, nil] Offset of cabinet in file, or nil
378
+ def find_cabinet_in_buffer(buf, length, base_offset, _handle, _filename,
379
+ file_length)
380
+ state = 0
381
+ cablen_u32 = 0
382
+ foffset_u32 = 0
383
+ i = 0
384
+
385
+ while i < length
386
+ case state
387
+ when 0
388
+ # Look for 'M' (0x4D)
389
+ i += 1 while i < length && buf[i] != 0x4D
390
+ state = 1 if i < length
391
+ i += 1
392
+ when 1
393
+ # Check for 'S' (0x53)
394
+ state = buf[i] == 0x53 ? 2 : 0
395
+ i += 1
396
+ when 2
397
+ # Check for 'C' (0x43)
398
+ state = buf[i] == 0x43 ? 3 : 0
399
+ i += 1
400
+ when 3
401
+ # Check for 'F' (0x46)
402
+ state = buf[i] == 0x46 ? 4 : 0
403
+ i += 1
404
+ when 4, 5, 6, 7
405
+ # Skip bytes 4-7
406
+ state += 1
407
+ i += 1
408
+ when 8
409
+ # Byte 8: cabinet length (LSB)
410
+ cablen_u32 = buf[i]
411
+ state += 1
412
+ i += 1
413
+ when 9
414
+ # Byte 9
415
+ cablen_u32 |= buf[i] << 8
416
+ state += 1
417
+ i += 1
418
+ when 10
419
+ # Byte 10
420
+ cablen_u32 |= buf[i] << 16
421
+ state += 1
422
+ i += 1
423
+ when 11
424
+ # Byte 11
425
+ cablen_u32 |= buf[i] << 24
426
+ state += 1
427
+ i += 1
428
+ when 12, 13, 14, 15
429
+ # Skip bytes 12-15
430
+ state += 1
431
+ i += 1
432
+ when 16
433
+ # Byte 16: files offset (LSB)
434
+ foffset_u32 = buf[i]
435
+ state += 1
436
+ i += 1
437
+ when 17
438
+ # Byte 17
439
+ foffset_u32 |= buf[i] << 8
440
+ state += 1
441
+ i += 1
442
+ when 18
443
+ # Byte 18
444
+ foffset_u32 |= buf[i] << 16
445
+ state += 1
446
+ i += 1
447
+ when 19
448
+ # Byte 19: complete header read
449
+ foffset_u32 |= buf[i] << 24
450
+
451
+ # Calculate cabinet offset in file
452
+ caboff = base_offset + i - 19
453
+
454
+ # Validate this looks like a real cabinet
455
+ return caboff if validate_cabinet_signature(foffset_u32,
456
+ cablen_u32, caboff, file_length)
457
+
458
+ # Not valid, restart search after "MSCF"
459
+ return nil
460
+ end
461
+ end
462
+
463
+ nil
464
+ end
465
+
466
+ # Validate that signature looks like a real cabinet
467
+ #
468
+ # @param foffset_u32 [Integer] Files offset from header
469
+ # @param cablen_u32 [Integer] Cabinet length from header
470
+ # @param caboff [Integer] Offset of cabinet in file
471
+ # @param file_length [Integer] Total file length
472
+ # @return [Boolean] true if looks valid
473
+ def validate_cabinet_signature(foffset_u32, cablen_u32, caboff,
474
+ file_length)
475
+ # Files offset must be less than cabinet length
476
+ return false if foffset_u32 >= cablen_u32
477
+
478
+ # Offset + files offset must be roughly within file
479
+ return false if (caboff + foffset_u32) >= (file_length + 32)
480
+
481
+ # In salvage mode, allow garbage length
482
+ # Otherwise, offset + length must be roughly within file
483
+ return false if !@salvage && (caboff + cablen_u32) >= (file_length + 32)
484
+
485
+ true
486
+ end
487
+
488
+ # Try to parse a cabinet at the given offset
489
+ #
490
+ # @param handle [IO] File handle
491
+ # @param filename [String] Filename
492
+ # @param offset [Integer] Offset in file
493
+ # @return [Models::Cabinet, nil] Cabinet if successful, nil otherwise
494
+ def try_parse_cab_at_offset(handle, filename, offset)
495
+ # Try parsing in quiet mode (suppress errors)
496
+ old_verbose = Cabriolet.verbose
497
+ Cabriolet.verbose = false
498
+
499
+ begin
500
+ parser = Parser.new(@io_system)
501
+ parser.parse_handle(handle, filename, offset, @salvage, true)
502
+ rescue StandardError
503
+ nil
504
+ ensure
505
+ Cabriolet.verbose = old_verbose
506
+ end
507
+ end
508
+ end
509
+ end
510
+ end