cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # HLP (Windows Help) file format binary structures
8
+ #
9
+ # NOTE: This implementation is based on the knowledge that HLP files use
10
+ # LZSS compression with MODE_MSHELP, but cannot be fully validated due to
11
+ # lack of test fixtures and incomplete libmspack implementation.
12
+ module HLPStructures
13
+ # HLP file signature (common Windows Help magic)
14
+ # Note: Actual signature may vary; this is a placeholder
15
+ SIGNATURE = "?_\x03\x00".b.freeze
16
+
17
+ # HLP file header
18
+ #
19
+ # Structure (placeholder based on typical compressed formats):
20
+ # - 4 bytes: signature/magic
21
+ # - 2 bytes: version
22
+ # - 4 bytes: file count
23
+ # - 4 bytes: directory offset
24
+ class Header < BinData::Record
25
+ endian :little
26
+
27
+ string :signature, length: 4
28
+ uint16 :version
29
+ uint32 :file_count
30
+ uint32 :directory_offset
31
+ end
32
+
33
+ # HLP file entry in directory
34
+ #
35
+ # Structure:
36
+ # - 4 bytes: filename length
37
+ # - N bytes: filename (null-terminated)
38
+ # - 4 bytes: offset in archive
39
+ # - 4 bytes: uncompressed size
40
+ # - 4 bytes: compressed size
41
+ # - 1 byte: compression flag (0 = uncompressed, 1 = LZSS)
42
+ class FileEntry < BinData::Record
43
+ endian :little
44
+
45
+ uint32 :filename_length
46
+ string :filename, read_length: :filename_length
47
+ uint32 :offset
48
+ uint32 :uncompressed_size
49
+ uint32 :compressed_size
50
+ uint8 :compression_flag
51
+ end
52
+
53
+ # Topic header (for compressed help topics)
54
+ #
55
+ # Structure:
56
+ # - 4 bytes: uncompressed size
57
+ # - 4 bytes: compressed size
58
+ class TopicHeader < BinData::Record
59
+ endian :little
60
+
61
+ uint32 :uncompressed_size
62
+ uint32 :compressed_size
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # KWAJ file format binary structures
8
+ #
9
+ # KWAJ has a fixed base header followed by optional headers determined
10
+ # by flag bits in the header.
11
+ module KWAJStructures
12
+ # KWAJ base header (14 bytes)
13
+ #
14
+ # Structure:
15
+ # - 4 bytes: signature1 (KWAJ)
16
+ # - 4 bytes: signature2 (0xD127F088)
17
+ # - 2 bytes: compression method
18
+ # - 2 bytes: data offset
19
+ # - 2 bytes: header flags
20
+ class BaseHeader < BinData::Record
21
+ endian :little
22
+
23
+ uint32 :signature1
24
+ uint32 :signature2
25
+ uint16 :comp_method
26
+ uint16 :data_offset
27
+ uint16 :flags
28
+ end
29
+
30
+ # Optional length field (4 bytes)
31
+ class LengthField < BinData::Record
32
+ endian :little
33
+
34
+ uint32 :uncompressed_length
35
+ end
36
+
37
+ # Optional unknown field 1 (2 bytes)
38
+ class Unknown1Field < BinData::Record
39
+ endian :little
40
+
41
+ uint16 :unknown1
42
+ end
43
+
44
+ # Optional unknown field 2 (variable length)
45
+ class Unknown2Field < BinData::Record
46
+ endian :little
47
+
48
+ uint16 :data_length
49
+ string :data, read_length: :data_length
50
+ end
51
+
52
+ # Optional extra text field (variable length)
53
+ class ExtraTextField < BinData::Record
54
+ endian :little
55
+
56
+ uint16 :text_length
57
+ string :data, read_length: :text_length
58
+ end
59
+
60
+ # KWAJ signature constants
61
+ SIGNATURE1 = 0x4A41574B # "KWAJ" in little-endian
62
+ SIGNATURE2 = 0xD127F088
63
+
64
+ # Helper method to check if a signature is valid
65
+ #
66
+ # @param sig1 [Integer] First signature value
67
+ # @param sig2 [Integer] Second signature value
68
+ # @return [Boolean] true if signatures are valid
69
+ def self.valid_signature?(sig1, sig2)
70
+ sig1 == SIGNATURE1 && sig2 == SIGNATURE2
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # Microsoft Reader LIT file format binary structures
8
+ #
9
+ # NOTE: LIT format specifications are not publicly documented.
10
+ # These structures are based on analysis and reverse engineering.
11
+ # DES-encrypted (DRM-protected) LIT files are not supported.
12
+ module LITStructures
13
+ # LIT file signature: "ITOLITLS" or similar variants
14
+ # The actual signature may vary based on LIT version
15
+ SIGNATURE = "ITOLITLS"
16
+
17
+ # LIT file header
18
+ #
19
+ # Structure (approximate):
20
+ # - 8 bytes: signature
21
+ # - 4 bytes: version
22
+ # - 4 bytes: flags (includes encryption flag)
23
+ # - 4 bytes: file count
24
+ # - 4 bytes: header size
25
+ class LITHeader < BinData::Record
26
+ endian :little
27
+
28
+ string :signature, length: 8
29
+ uint32 :version
30
+ uint32 :flags
31
+ uint32 :file_count
32
+ uint32 :header_size
33
+ end
34
+
35
+ # LIT file entry in the directory
36
+ #
37
+ # Structure (approximate):
38
+ # - 4 bytes: filename length
39
+ # - N bytes: filename (UTF-8 or UTF-16)
40
+ # - 8 bytes: file offset
41
+ # - 8 bytes: compressed size
42
+ # - 8 bytes: uncompressed size
43
+ # - 4 bytes: flags (compressed, encrypted, etc.)
44
+ class LITFileEntry < BinData::Record
45
+ endian :little
46
+
47
+ uint32 :filename_length
48
+ string :filename, read_length: :filename_length
49
+ uint64 :offset
50
+ uint64 :compressed_size
51
+ uint64 :uncompressed_size
52
+ uint32 :flags
53
+ end
54
+
55
+ # LIT content section header
56
+ #
57
+ # Structure (approximate):
58
+ # - 4 bytes: section type
59
+ # - 4 bytes: section size
60
+ # - 4 bytes: compression method (0 = none, 1 = LZX)
61
+ # - 4 bytes: encryption method (0 = none, 1 = DES)
62
+ class SectionHeader < BinData::Record
63
+ endian :little
64
+
65
+ uint32 :section_type
66
+ uint32 :section_size
67
+ uint32 :compression_method
68
+ uint32 :encryption_method
69
+ end
70
+
71
+ # DES encryption header (if encrypted)
72
+ #
73
+ # NOTE: DES encryption is not currently supported.
74
+ # This structure is provided for completeness.
75
+ #
76
+ # Structure (approximate):
77
+ # - 16 bytes: encryption key hash
78
+ # - 8 bytes: IV (initialization vector)
79
+ # - 4 bytes: encryption flags
80
+ class EncryptionHeader < BinData::Record
81
+ endian :little
82
+
83
+ string :key_hash, length: 16
84
+ string :iv, length: 8
85
+ uint32 :flags
86
+ end
87
+
88
+ # Flags for file entries
89
+ module FileFlags
90
+ COMPRESSED = 0x01
91
+ ENCRYPTED = 0x02
92
+ end
93
+
94
+ # Compression methods
95
+ module CompressionMethod
96
+ NONE = 0
97
+ LZX = 1
98
+ end
99
+
100
+ # Encryption methods
101
+ module EncryptionMethod
102
+ NONE = 0
103
+ DES = 1
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # OAB (Outlook Offline Address Book) binary structures
8
+ #
9
+ # OAB files use LZX compression and come in two formats:
10
+ # - Full files (version 3.1)
11
+ # - Incremental patches (version 3.2)
12
+ module OABStructures
13
+ # OAB header for full files (version 3.1)
14
+ #
15
+ # Structure (16 bytes):
16
+ # - 4 bytes: version_hi (should be 3)
17
+ # - 4 bytes: version_lo (should be 1 for full files)
18
+ # - 4 bytes: block_max (maximum block size)
19
+ # - 4 bytes: target_size (decompressed output size)
20
+ class FullHeader < BinData::Record
21
+ endian :little
22
+
23
+ uint32 :version_hi
24
+ uint32 :version_lo
25
+ uint32 :block_max
26
+ uint32 :target_size
27
+
28
+ # Check if header is valid
29
+ #
30
+ # @return [Boolean]
31
+ def valid?
32
+ version_hi == 3 && version_lo == 1
33
+ end
34
+ end
35
+
36
+ # OAB block header for full files
37
+ #
38
+ # Structure (16 bytes):
39
+ # - 4 bytes: flags (0=uncompressed, 1=LZX compressed)
40
+ # - 4 bytes: compressed_size
41
+ # - 4 bytes: uncompressed_size
42
+ # - 4 bytes: crc (CRC32 of decompressed data)
43
+ class BlockHeader < BinData::Record
44
+ endian :little
45
+
46
+ uint32 :flags
47
+ uint32 :compressed_size
48
+ uint32 :uncompressed_size
49
+ uint32 :crc
50
+
51
+ # Check if block is compressed
52
+ #
53
+ # @return [Boolean]
54
+ def compressed?
55
+ flags == 1
56
+ end
57
+
58
+ # Check if block is uncompressed
59
+ #
60
+ # @return [Boolean]
61
+ def uncompressed?
62
+ flags.zero?
63
+ end
64
+ end
65
+
66
+ # OAB header for patch files (version 3.2)
67
+ #
68
+ # Structure (28 bytes):
69
+ # - 4 bytes: version_hi (should be 3)
70
+ # - 4 bytes: version_lo (should be 2 for patches)
71
+ # - 4 bytes: block_max (maximum block size)
72
+ # - 4 bytes: source_size (base file size)
73
+ # - 4 bytes: target_size (output file size)
74
+ # - 4 bytes: source_crc (CRC32 of base file)
75
+ # - 4 bytes: target_crc (CRC32 of output file)
76
+ class PatchHeader < BinData::Record
77
+ endian :little
78
+
79
+ uint32 :version_hi
80
+ uint32 :version_lo
81
+ uint32 :block_max
82
+ uint32 :source_size
83
+ uint32 :target_size
84
+ uint32 :source_crc
85
+ uint32 :target_crc
86
+
87
+ # Check if header is valid
88
+ #
89
+ # @return [Boolean]
90
+ def valid?
91
+ version_hi == 3 && version_lo == 2
92
+ end
93
+ end
94
+
95
+ # OAB block header for patch files
96
+ #
97
+ # Structure (16 bytes):
98
+ # - 4 bytes: patch_size (compressed patch data size)
99
+ # - 4 bytes: target_size (decompressed output block size)
100
+ # - 4 bytes: source_size (base data needed for this block)
101
+ # - 4 bytes: crc (CRC32 of decompressed output)
102
+ class PatchBlockHeader < BinData::Record
103
+ endian :little
104
+
105
+ uint32 :patch_size
106
+ uint32 :target_size
107
+ uint32 :source_size
108
+ uint32 :crc
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # CAB file header structure (CFHEADER)
8
+ class CFHeader < BinData::Record
9
+ endian :little
10
+
11
+ string :signature, length: 4
12
+ uint32 :reserved1
13
+ uint32 :cabinet_size
14
+ uint32 :reserved2
15
+ uint32 :files_offset
16
+ uint32 :reserved3
17
+ uint8 :minor_version
18
+ uint8 :major_version
19
+ uint16 :num_folders
20
+ uint16 :num_files
21
+ uint16 :flags
22
+ uint16 :set_id
23
+ uint16 :cabinet_index
24
+ end
25
+
26
+ # Folder structure (CFFOLDER)
27
+ class CFFolder < BinData::Record
28
+ endian :little
29
+
30
+ uint32 :data_offset
31
+ uint16 :num_blocks
32
+ uint16 :comp_type
33
+ end
34
+
35
+ # File structure (CFFILE)
36
+ class CFFile < BinData::Record
37
+ endian :little
38
+
39
+ uint32 :uncompressed_size
40
+ uint32 :folder_offset
41
+ uint16 :folder_index
42
+ uint16 :date
43
+ uint16 :time
44
+ uint16 :attribs
45
+ end
46
+
47
+ # Data block structure (CFDATA)
48
+ class CFData < BinData::Record
49
+ endian :little
50
+
51
+ uint32 :checksum
52
+ uint16 :compressed_size
53
+ uint16 :uncompressed_size
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bindata"
4
+
5
+ module Cabriolet
6
+ module Binary
7
+ # SZDD file format binary structures
8
+ module SZDDStructures
9
+ # SZDD signatures
10
+ SIGNATURE_NORMAL = [0x53, 0x5A, 0x44, 0x44, 0x88, 0xF0, 0x27, 0x33]
11
+ .pack("C*").freeze
12
+ SIGNATURE_QBASIC = [0x53, 0x5A, 0x20, 0x88, 0xF0, 0x27, 0x33, 0xD1]
13
+ .pack("C*").freeze
14
+
15
+ # SZDD header for NORMAL format (EXPAND.EXE)
16
+ #
17
+ # Structure:
18
+ # - 8 bytes: signature (SZDD\x88\xF0\x27\x33)
19
+ # - 1 byte: compression mode (0x41)
20
+ # - 1 byte: missing character
21
+ # - 4 bytes: uncompressed size (little-endian)
22
+ class NormalHeader < BinData::Record
23
+ endian :little
24
+
25
+ string :signature, length: 8
26
+ uint8 :compression_mode
27
+ uint8 :missing_char
28
+ uint32 :uncompressed_size
29
+ end
30
+
31
+ # SZDD header for QBASIC format
32
+ #
33
+ # Structure:
34
+ # - 8 bytes: signature (SZDD \x88\xF0\x27\x33\xD1)
35
+ # - 4 bytes: uncompressed size (little-endian)
36
+ class QBasicHeader < BinData::Record
37
+ endian :little
38
+
39
+ string :signature, length: 8
40
+ uint32 :uncompressed_size
41
+ end
42
+
43
+ # Header data for NORMAL format (after signature)
44
+ class NormalData < BinData::Record
45
+ endian :little
46
+
47
+ uint8 :compression_mode
48
+ uint8 :missing_char
49
+ uint32 :uncompressed_size
50
+ end
51
+
52
+ # Header data for QBASIC format (after signature)
53
+ class QBasicData < BinData::Record
54
+ endian :little
55
+
56
+ uint32 :uncompressed_size
57
+ end
58
+ end
59
+ end
60
+ end