cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -4,6 +4,10 @@ module Cabriolet
4
4
  module Binary
5
5
  # BitstreamWriter provides bit-level I/O operations for writing compressed data
6
6
  class BitstreamWriter
7
+ # Pre-computed byte constants for fast single-byte writes
8
+ # Avoids repeated array packing for each byte written
9
+ BYTE_CONSTANTS = Array.new(256) { |i| [i].pack("C") }.freeze
10
+
7
11
  attr_reader :io_system, :handle, :buffer_size
8
12
 
9
13
  # Initialize a new bitstream writer
@@ -11,15 +15,22 @@ module Cabriolet
11
15
  # @param io_system [System::IOSystem] I/O system for writing data
12
16
  # @param handle [System::FileHandle, System::MemoryHandle] Handle to write to
13
17
  # @param buffer_size [Integer] Size of the output buffer
14
- # @param msb_first [Boolean] Whether to write bits MSB-first (for Quantum)
18
+ # @param bit_order [Symbol] Bit ordering - :lsb (default) or :msb
19
+ # @param msb_first [Boolean] Deprecated: use bit_order instead
15
20
  def initialize(io_system, handle,
16
- buffer_size = Cabriolet.default_buffer_size, msb_first: false)
21
+ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, msb_first: false)
17
22
  @io_system = io_system
18
23
  @handle = handle
19
24
  @buffer_size = buffer_size
20
- @msb_first = msb_first
25
+
26
+ # Support legacy msb_first parameter or new bit_order parameter
27
+ @bit_order = msb_first ? :msb : bit_order
28
+ @msb_first = (@bit_order == :msb)
29
+
21
30
  @bit_buffer = 0
22
31
  @bits_in_buffer = 0
32
+ @accumulated = 0
33
+ @bits_accumulated = 0
23
34
  end
24
35
 
25
36
  # Write specified number of bits to the stream
@@ -34,16 +45,38 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
34
45
  "Can only write 1-32 bits at a time"
35
46
  end
36
47
 
37
- # Add bits to buffer (LSB first, like DEFLATE)
38
- @bit_buffer |= ((value & ((1 << num_bits) - 1)) << @bits_in_buffer)
39
- @bits_in_buffer += num_bits
48
+ # Delegate to MSB method if in MSB mode
49
+ if @bit_order == :msb
50
+ write_bits_msb_internal(value, num_bits)
51
+ return
52
+ end
40
53
 
41
- # Flush complete bytes
42
- while @bits_in_buffer >= 8
43
- byte = @bit_buffer & 0xFF
44
- write_byte(byte)
45
- @bit_buffer >>= 8
46
- @bits_in_buffer -= 8
54
+ # LSB-first mode (default)
55
+ # Mask value to num_bits
56
+ value &= (1 << num_bits) - 1
57
+
58
+ # Accumulate bits
59
+ @accumulated |= (value << @bits_accumulated)
60
+ @bits_accumulated += num_bits
61
+
62
+ # Transfer accumulated bits to buffer in 8-bit chunks
63
+ while @bits_accumulated >= 8
64
+ # Take the lowest 8 bits from accumulated
65
+ byte = @accumulated & 0xFF
66
+ @accumulated >>= 8
67
+ @bits_accumulated -= 8
68
+
69
+ # Add to buffer
70
+ @bit_buffer |= (byte << @bits_in_buffer)
71
+ @bits_in_buffer += 8
72
+
73
+ # Flush complete bytes from buffer
74
+ while @bits_in_buffer >= 8
75
+ flush_byte = @bit_buffer & 0xFF
76
+ write_byte(flush_byte)
77
+ @bit_buffer >>= 8
78
+ @bits_in_buffer -= 8
79
+ end
47
80
  end
48
81
  end
49
82
 
@@ -51,20 +84,44 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
51
84
  #
52
85
  # @return [void]
53
86
  def byte_align
54
- return if @bits_in_buffer.zero?
87
+ if @bit_order == :msb
88
+ # MSB mode: align to 16-bit boundary (like Bitstream reader)
89
+ return if @bits_in_buffer.zero?
90
+
91
+ padding = (16 - @bits_in_buffer) % 16
92
+ else
93
+ # LSB mode: align to 8-bit boundary
94
+ return if @bits_accumulated.zero?
55
95
 
56
- # Pad with zeros to complete the current byte
57
- padding_bits = 8 - (@bits_in_buffer % 8)
58
- write_bits(0, padding_bits) if padding_bits < 8
96
+ padding = (8 - @bits_accumulated) % 8
97
+ end
98
+ if padding.positive?
99
+ write_bits(0, padding)
100
+ end
59
101
  end
60
102
 
61
103
  # Flush any remaining bits in the buffer
62
104
  #
63
105
  # @return [void]
64
106
  def flush
107
+ # For MSB mode, use the special MSB flush
108
+ if @bit_order == :msb
109
+ flush_msb_internal
110
+ return
111
+ end
112
+
113
+ # LSB mode flush
114
+ # First flush any accumulated bits
115
+ if @bits_accumulated.positive?
116
+ byte = @accumulated & 0xFF
117
+ write_byte(byte)
118
+ @accumulated = 0
119
+ @bits_accumulated = 0
120
+ end
121
+
122
+ # Then flush buffer
65
123
  return if @bits_in_buffer.zero?
66
124
 
67
- # Write any remaining bits (padded with zeros)
68
125
  byte = @bit_buffer & 0xFF
69
126
  write_byte(byte)
70
127
  @bit_buffer = 0
@@ -76,10 +133,70 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
76
133
  # @param byte [Integer] Byte value to write
77
134
  # @return [void]
78
135
  def write_byte(byte)
79
- data = [byte].pack("C")
136
+ # Use pre-encoded byte constant for better performance
137
+ data = BYTE_CONSTANTS[byte]
138
+ # DEBUG
139
+ if ENV["DEBUG_BITSTREAM"]
140
+ warn "DEBUG write_byte: pos=#{@bits_in_buffer} byte=#{byte} (#{byte.to_s(2).rjust(
141
+ 8, '0'
142
+ )})"
143
+ end
80
144
  @io_system.write(@handle, data)
81
145
  end
82
146
 
147
+ # Write bits in MSB-first mode (internal implementation)
148
+ # Matches the behavior of Bitstream's MSB mode for reading
149
+ #
150
+ # @param value [Integer] Value to write
151
+ # @param num_bits [Integer] Number of bits to write
152
+ # @return [void]
153
+ def write_bits_msb_internal(value, num_bits)
154
+ # Mask value to num_bits
155
+ value &= (1 << num_bits) - 1
156
+
157
+ # Add bits to buffer (MSB first - inject at left side)
158
+ @bit_buffer = (@bit_buffer << num_bits) | value
159
+ @bits_in_buffer += num_bits
160
+
161
+ # Flush complete 16-bit words
162
+ # The most significant bits are at the left of the buffer
163
+ # We want to extract the highest 16 bits and keep the rest
164
+ while @bits_in_buffer >= 16
165
+ # Extract the highest 16 bits by shifting right by (bits_in_buffer - 16)
166
+ # This moves the top 16 bits to positions 0-15
167
+ @bits_in_buffer -= 16
168
+ shift = @bits_in_buffer
169
+ word = (@bit_buffer >> shift) & 0xFFFF
170
+ # Write little-endian (LSB byte first, then MSB byte) to match Bitstream reader
171
+ write_byte(word & 0xFF)
172
+ write_byte((word >> 8) & 0xFF)
173
+ end
174
+ end
175
+
176
+ # Flush MSB buffer (internal implementation)
177
+ # Write remaining bits padded to 16-bit boundary
178
+ #
179
+ # @return [void]
180
+ def flush_msb_internal
181
+ return if @bits_in_buffer.zero?
182
+
183
+ # Pad to 16-bit boundary
184
+ padding = (16 - @bits_in_buffer) % 16
185
+ @bit_buffer <<= padding if padding.positive?
186
+ @bits_in_buffer += padding
187
+
188
+ # Write final 16-bit word
189
+ if @bits_in_buffer == 16
190
+ word = @bit_buffer & 0xFFFF
191
+ # Write little-endian (LSB byte first, then MSB byte) to match Bitstream reader
192
+ write_byte(word & 0xFF)
193
+ write_byte((word >> 8) & 0xFF)
194
+ end
195
+
196
+ @bit_buffer = 0
197
+ @bits_in_buffer = 0
198
+ end
199
+
83
200
  # Write a raw byte directly (for signatures, etc.)
84
201
  # This ensures the bit buffer is flushed first
85
202
  #
@@ -105,9 +222,21 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
105
222
  # @param num_bits [Integer] Number of bits to write
106
223
  # @return [void]
107
224
  def write_bits_be(value, num_bits)
108
- num_bits.times do |i|
109
- bit = (value >> (num_bits - 1 - i)) & 1
110
- write_bits(bit, 1)
225
+ # Write full bytes first for better performance
226
+ full_bytes = num_bits / 8
227
+ remaining_bits = num_bits % 8
228
+
229
+ # Write complete bytes MSB first
230
+ full_bytes.times do |i|
231
+ byte_shift = num_bits - 8 - (i * 8)
232
+ byte = (value >> byte_shift) & 0xFF
233
+ write_bits(byte, 8)
234
+ end
235
+
236
+ # Write remaining bits
237
+ if remaining_bits.positive?
238
+ remaining_value = value & ((1 << remaining_bits) - 1)
239
+ write_bits(remaining_value, remaining_bits)
111
240
  end
112
241
  end
113
242
 
@@ -83,8 +83,8 @@ module Cabriolet
83
83
  uint32 :quickref_size
84
84
  end
85
85
 
86
- # LZX Control Data
87
- class LZXControlData < BinData::Record
86
+ # CHM LZX Control Data
87
+ class CHMLZXControlData < BinData::Record
88
88
  endian :little
89
89
 
90
90
  uint32 :len
@@ -4,62 +4,283 @@ require "bindata"
4
4
 
5
5
  module Cabriolet
6
6
  module Binary
7
- # HLP (Windows Help) file format binary structures
7
+ # HLP (Windows Help / QuickHelp) file format binary structures
8
8
  #
9
- # NOTE: This implementation is based on the knowledge that HLP files use
10
- # LZSS compression with MODE_MSHELP, but cannot be fully validated due to
11
- # lack of test fixtures and incomplete libmspack implementation.
9
+ # Based on the QuickHelp binary format specification from DosHelp project.
10
+ # HLP files store help databases with topics, compression, and hyperlinks.
11
+ #
12
+ # Format overview:
13
+ # - Signature (2 bytes): 0x4C 0x4E ("LN")
14
+ # - File Header (68 bytes)
15
+ # - Topic Index (variable)
16
+ # - Context Strings (variable)
17
+ # - Context Map (variable)
18
+ # - Keywords (optional, variable)
19
+ # - Huffman Tree (optional, variable)
20
+ # - Topic Texts (variable, compressed)
12
21
  module HLPStructures
13
- # HLP file signature (common Windows Help magic)
14
- # Note: Actual signature may vary; this is a placeholder
15
- SIGNATURE = "?_\x03\x00".b.freeze
22
+ # QuickHelp file signature: 0x4C, 0x4E ("LN")
23
+ SIGNATURE = "\x4C\x4E".b.freeze unless defined?(SIGNATURE)
24
+
25
+ # File attributes flags
26
+ module Attributes
27
+ CASE_SENSITIVE = 0x01 unless defined?(CASE_SENSITIVE)
28
+ LOCKED = 0x02 unless defined?(LOCKED)
29
+ end
30
+
31
+ # Control bytes for keyword compression
32
+ module ControlBytes
33
+ # Dictionary entry with optional space (0x10-0x17)
34
+ DICT_ENTRY_MIN = 0x10 unless defined?(DICT_ENTRY_MIN)
35
+ DICT_ENTRY_MAX = 0x17 unless defined?(DICT_ENTRY_MAX)
36
+
37
+ # Run of spaces (0x18)
38
+ SPACE_RUN = 0x18 unless defined?(SPACE_RUN)
16
39
 
17
- # HLP file header
40
+ # Run of bytes (0x19)
41
+ BYTE_RUN = 0x19 unless defined?(BYTE_RUN)
42
+
43
+ # Escape byte (0x1A)
44
+ ESCAPE = 0x1A unless defined?(ESCAPE)
45
+ end
46
+
47
+ # Text style flags for topic lines
48
+ module TextStyle
49
+ NONE = 0x00 unless defined?(NONE)
50
+ BOLD = 0x01 unless defined?(BOLD)
51
+ ITALIC = 0x02 unless defined?(ITALIC)
52
+ UNDERLINE = 0x04 unless defined?(UNDERLINE)
53
+ end
54
+
55
+ # QuickHelp file header (70 bytes total: 2 byte signature + 68 byte header)
18
56
  #
19
- # Structure (placeholder based on typical compressed formats):
20
- # - 4 bytes: signature/magic
21
- # - 2 bytes: version
22
- # - 4 bytes: file count
23
- # - 4 bytes: directory offset
24
- class Header < BinData::Record
57
+ # Structure:
58
+ # - 2 bytes: signature (0x4C 0x4E)
59
+ # - 2 bytes: version (always 2)
60
+ # - 2 bytes: attributes (bit flags)
61
+ # - 1 byte: control character (usually ':' or 0xFF)
62
+ # - 1 byte: padding
63
+ # - 2 bytes: topic count
64
+ # - 2 bytes: context count
65
+ # - 1 byte: display width
66
+ # - 1 byte: padding
67
+ # - 2 bytes: predefined context count
68
+ # - 14 bytes: database name (null-terminated, null-padded)
69
+ # - 4 bytes: reserved
70
+ # - 4 bytes: topic index offset
71
+ # - 4 bytes: context strings offset
72
+ # - 4 bytes: context map offset
73
+ # - 4 bytes: keywords offset (0 if not used)
74
+ # - 4 bytes: huffman tree offset (0 if not used)
75
+ # - 4 bytes: topic text offset
76
+ # - 4 bytes: reserved
77
+ # - 4 bytes: reserved
78
+ # - 4 bytes: database size
79
+ class FileHeader < BinData::Record
25
80
  endian :little
26
81
 
27
- string :signature, length: 4
82
+ string :signature, length: 2
28
83
  uint16 :version
29
- uint32 :file_count
30
- uint32 :directory_offset
84
+ uint16 :attributes
85
+ uint8 :control_character
86
+ uint8 :padding1
87
+ uint16 :topic_count
88
+ uint16 :context_count
89
+ uint8 :display_width
90
+ uint8 :padding2
91
+ uint16 :predefined_ctx_count
92
+ string :database_name, length: 14
93
+ uint32 :reserved1
94
+ uint32 :topic_index_offset
95
+ uint32 :context_strings_offset
96
+ uint32 :context_map_offset
97
+ uint32 :keywords_offset
98
+ uint32 :huffman_tree_offset
99
+ uint32 :topic_text_offset
100
+ uint32 :reserved2
101
+ uint32 :reserved3
102
+ uint32 :database_size
103
+ end
104
+
105
+ # Topic index entry (4 bytes per topic)
106
+ #
107
+ # Array of (topic_count + 1) DWORDs that specify offsets of topic texts.
108
+ # The last entry indicates the end of the last topic.
109
+ class TopicOffset < BinData::Record
110
+ endian :little
111
+ uint32 :offset
31
112
  end
32
113
 
33
- # HLP file entry in directory
114
+ # Context map entry (2 bytes per context)
115
+ #
116
+ # Maps context strings to topic indices.
117
+ class ContextMapEntry < BinData::Record
118
+ endian :little
119
+ uint16 :topic_index
120
+ end
121
+
122
+ # Huffman tree node (2 bytes per node)
123
+ #
124
+ # Leaf node: bit 15 set, bits 0-7 contain symbol
125
+ # Internal node: bit 15 clear, node_value/2 is left child index, i+1 is right child
126
+ class HuffmanNode < BinData::Record
127
+ endian :little
128
+ int16 :node_value
129
+
130
+ # Check if this is a leaf node
131
+ def leaf?
132
+ node_value.negative?
133
+ end
134
+
135
+ # Get symbol for leaf node
136
+ def symbol
137
+ return nil unless leaf?
138
+
139
+ node_value & 0xFF
140
+ end
141
+
142
+ # Get left child index for internal node
143
+ def left_child_index
144
+ return nil if leaf?
145
+
146
+ node_value / 2
147
+ end
148
+ end
149
+
150
+ # Topic compressed header (2 bytes)
151
+ #
152
+ # Appears at the start of each compressed topic text.
153
+ class TopicHeader < BinData::Record
154
+ endian :little
155
+ uint16 :decompressed_length
156
+ end
157
+
158
+ # Windows Help (WinHelp) 3.x file header (28 bytes)
34
159
  #
35
160
  # Structure:
36
- # - 4 bytes: filename length
37
- # - N bytes: filename (null-terminated)
38
- # - 4 bytes: offset in archive
39
- # - 4 bytes: uncompressed size
40
- # - 4 bytes: compressed size
41
- # - 1 byte: compression flag (0 = uncompressed, 1 = LZSS)
42
- class FileEntry < BinData::Record
161
+ # - 2 bytes: Magic number (0x35F3)
162
+ # - 2 bytes: Unknown/version
163
+ # - 4 bytes: Directory offset
164
+ # - 4 bytes: Free list offset
165
+ # - 4 bytes: File size
166
+ # - 12 bytes: Reserved/padding
167
+ class WinHelp3Header < BinData::Record
43
168
  endian :little
44
169
 
45
- uint32 :filename_length
46
- string :filename, read_length: :filename_length
47
- uint32 :offset
48
- uint32 :uncompressed_size
49
- uint32 :compressed_size
50
- uint8 :compression_flag
170
+ uint16 :magic # 0x35F3
171
+ uint16 :unknown
172
+ uint32 :directory_offset
173
+ uint32 :free_list_offset
174
+ uint32 :file_size
175
+ string :reserved, length: 12
51
176
  end
52
177
 
53
- # Topic header (for compressed help topics)
178
+ # Windows Help (WinHelp) 4.x file header (32 bytes)
54
179
  #
55
180
  # Structure:
56
- # - 4 bytes: uncompressed size
57
- # - 4 bytes: compressed size
58
- class TopicHeader < BinData::Record
181
+ # - 4 bytes: Magic number (0x3F5F0000 or similar)
182
+ # - 4 bytes: Directory offset
183
+ # - 4 bytes: Free list offset
184
+ # - 4 bytes: File size
185
+ # - 16 bytes: Reserved/unknown
186
+ class WinHelp4Header < BinData::Record
187
+ endian :little
188
+
189
+ uint32 :magic # 0x3F5F0000 or similar
190
+ uint32 :directory_offset
191
+ uint32 :free_list_offset
192
+ uint32 :file_size
193
+ string :reserved, length: 16
194
+ end
195
+
196
+ # WinHelp internal file directory entry
197
+ #
198
+ # Variable size structure:
199
+ # - 4 bytes: File size
200
+ # - 2 bytes: Starting block number
201
+ # - Variable: File name (null-terminated, aligned)
202
+ class WinHelpDirectoryEntry < BinData::Record
203
+ endian :little
204
+
205
+ uint32 :file_size
206
+ uint16 :starting_block
207
+ stringz :filename
208
+ end
209
+
210
+ # WinHelp B+ tree header (from FILEHEADER of directory)
211
+ #
212
+ # Structure from helpdeco:
213
+ # - 2 bytes: Magic (0x293B)
214
+ # - 2 bytes: Flags (bit 0x0002 always 1, bit 0x0400 1 if directory)
215
+ # - 2 bytes: PageSize (0x0400=1k if directory, 0x0800=2k else)
216
+ # - 16 bytes: Structure (string describing structure of data)
217
+ # - 2 bytes: MustBeZero (0)
218
+ # - 2 bytes: PageSplits (number of page splits Btree has suffered)
219
+ # - 2 bytes: RootPage (page number of Btree root page)
220
+ # - 2 bytes: MustBeNegOne (0xFFFF)
221
+ # - 2 bytes: TotalPages (number of Btree pages)
222
+ # - 2 bytes: NLevels (number of levels of Btree)
223
+ # - 4 bytes: TotalBtreeEntries (number of entries in Btree)
224
+ #
225
+ # Total: 38 bytes (not 30!)
226
+ class WinHelpBTreeHeader < BinData::Record
227
+ endian :little
228
+
229
+ uint16 :magic # 0x293B
230
+ uint16 :flags
231
+ uint16 :page_size
232
+ string :structure, length: 16
233
+ int16 :must_be_zero
234
+ int16 :page_splits
235
+ int16 :root_page
236
+ int16 :must_be_neg_one
237
+ int16 :total_pages
238
+ int16 :n_levels
239
+ int32 :total_btree_entries
240
+ # Total: 2 + 2 + 2 + 16 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 4 = 38 bytes
241
+ end
242
+
243
+ # WinHelp B+ tree leaf node header
244
+ #
245
+ # Structure at beginning of every leaf-page:
246
+ # - 2 bytes: Unknown (no ID to identify leaf-page)
247
+ # - 2 bytes: NEntries (number of entries in this leaf-page)
248
+ # - 2 bytes: PreviousPage (page number of preceeding leaf-page or -1)
249
+ # - 2 bytes: NextPage (page number of next leaf-page or -1)
250
+ class WinHelpBTreeNodeHeader < BinData::Record
251
+ endian :little
252
+
253
+ uint16 :unknown
254
+ int16 :n_entries
255
+ int16 :previous_page
256
+ int16 :next_page
257
+ end
258
+
259
+ # WinHelp B+ tree index node header (for internal nodes)
260
+ #
261
+ # Structure at beginning of every index-page:
262
+ # - 2 bytes: Unknown (no ID to identify index-page)
263
+ # - 2 bytes: NEntries (number of entries in this index-page)
264
+ # - 2 bytes: PreviousPage (page number of previous page)
265
+ class WinHelpBTreeIndexHeader < BinData::Record
266
+ endian :little
267
+
268
+ uint16 :unknown
269
+ int16 :n_entries
270
+ int16 :previous_page
271
+ end
272
+
273
+ # WinHelp FILEHEADER structure at FileOffset of each internal file
274
+ #
275
+ # - 4 bytes: ReservedSpace (reserved space in help file incl. FILEHEADER)
276
+ # - 4 bytes: UsedSpace (used space in help file excl. FILEHEADER)
277
+ # - 1 byte: FileFlags (normally 4)
278
+ class WinHelpFileHeader < BinData::Record
59
279
  endian :little
60
280
 
61
- uint32 :uncompressed_size
62
- uint32 :compressed_size
281
+ int32 :reserved_space
282
+ int32 :used_space
283
+ uint8 :file_flags
63
284
  end
64
285
  end
65
286
  end