cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -4,6 +4,10 @@ module Cabriolet
|
|
|
4
4
|
module Binary
|
|
5
5
|
# BitstreamWriter provides bit-level I/O operations for writing compressed data
|
|
6
6
|
class BitstreamWriter
|
|
7
|
+
# Pre-computed byte constants for fast single-byte writes
|
|
8
|
+
# Avoids repeated array packing for each byte written
|
|
9
|
+
BYTE_CONSTANTS = Array.new(256) { |i| [i].pack("C") }.freeze
|
|
10
|
+
|
|
7
11
|
attr_reader :io_system, :handle, :buffer_size
|
|
8
12
|
|
|
9
13
|
# Initialize a new bitstream writer
|
|
@@ -11,15 +15,22 @@ module Cabriolet
|
|
|
11
15
|
# @param io_system [System::IOSystem] I/O system for writing data
|
|
12
16
|
# @param handle [System::FileHandle, System::MemoryHandle] Handle to write to
|
|
13
17
|
# @param buffer_size [Integer] Size of the output buffer
|
|
14
|
-
# @param
|
|
18
|
+
# @param bit_order [Symbol] Bit ordering - :lsb (default) or :msb
|
|
19
|
+
# @param msb_first [Boolean] Deprecated: use bit_order instead
|
|
15
20
|
def initialize(io_system, handle,
|
|
16
|
-
buffer_size = Cabriolet.default_buffer_size, msb_first: false)
|
|
21
|
+
buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, msb_first: false)
|
|
17
22
|
@io_system = io_system
|
|
18
23
|
@handle = handle
|
|
19
24
|
@buffer_size = buffer_size
|
|
20
|
-
|
|
25
|
+
|
|
26
|
+
# Support legacy msb_first parameter or new bit_order parameter
|
|
27
|
+
@bit_order = msb_first ? :msb : bit_order
|
|
28
|
+
@msb_first = (@bit_order == :msb)
|
|
29
|
+
|
|
21
30
|
@bit_buffer = 0
|
|
22
31
|
@bits_in_buffer = 0
|
|
32
|
+
@accumulated = 0
|
|
33
|
+
@bits_accumulated = 0
|
|
23
34
|
end
|
|
24
35
|
|
|
25
36
|
# Write specified number of bits to the stream
|
|
@@ -34,16 +45,38 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
|
|
|
34
45
|
"Can only write 1-32 bits at a time"
|
|
35
46
|
end
|
|
36
47
|
|
|
37
|
-
#
|
|
38
|
-
@
|
|
39
|
-
|
|
48
|
+
# Delegate to MSB method if in MSB mode
|
|
49
|
+
if @bit_order == :msb
|
|
50
|
+
write_bits_msb_internal(value, num_bits)
|
|
51
|
+
return
|
|
52
|
+
end
|
|
40
53
|
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
# LSB-first mode (default)
|
|
55
|
+
# Mask value to num_bits
|
|
56
|
+
value &= (1 << num_bits) - 1
|
|
57
|
+
|
|
58
|
+
# Accumulate bits
|
|
59
|
+
@accumulated |= (value << @bits_accumulated)
|
|
60
|
+
@bits_accumulated += num_bits
|
|
61
|
+
|
|
62
|
+
# Transfer accumulated bits to buffer in 8-bit chunks
|
|
63
|
+
while @bits_accumulated >= 8
|
|
64
|
+
# Take the lowest 8 bits from accumulated
|
|
65
|
+
byte = @accumulated & 0xFF
|
|
66
|
+
@accumulated >>= 8
|
|
67
|
+
@bits_accumulated -= 8
|
|
68
|
+
|
|
69
|
+
# Add to buffer
|
|
70
|
+
@bit_buffer |= (byte << @bits_in_buffer)
|
|
71
|
+
@bits_in_buffer += 8
|
|
72
|
+
|
|
73
|
+
# Flush complete bytes from buffer
|
|
74
|
+
while @bits_in_buffer >= 8
|
|
75
|
+
flush_byte = @bit_buffer & 0xFF
|
|
76
|
+
write_byte(flush_byte)
|
|
77
|
+
@bit_buffer >>= 8
|
|
78
|
+
@bits_in_buffer -= 8
|
|
79
|
+
end
|
|
47
80
|
end
|
|
48
81
|
end
|
|
49
82
|
|
|
@@ -51,20 +84,44 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
|
|
|
51
84
|
#
|
|
52
85
|
# @return [void]
|
|
53
86
|
def byte_align
|
|
54
|
-
|
|
87
|
+
if @bit_order == :msb
|
|
88
|
+
# MSB mode: align to 16-bit boundary (like Bitstream reader)
|
|
89
|
+
return if @bits_in_buffer.zero?
|
|
90
|
+
|
|
91
|
+
padding = (16 - @bits_in_buffer) % 16
|
|
92
|
+
else
|
|
93
|
+
# LSB mode: align to 8-bit boundary
|
|
94
|
+
return if @bits_accumulated.zero?
|
|
55
95
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
96
|
+
padding = (8 - @bits_accumulated) % 8
|
|
97
|
+
end
|
|
98
|
+
if padding.positive?
|
|
99
|
+
write_bits(0, padding)
|
|
100
|
+
end
|
|
59
101
|
end
|
|
60
102
|
|
|
61
103
|
# Flush any remaining bits in the buffer
|
|
62
104
|
#
|
|
63
105
|
# @return [void]
|
|
64
106
|
def flush
|
|
107
|
+
# For MSB mode, use the special MSB flush
|
|
108
|
+
if @bit_order == :msb
|
|
109
|
+
flush_msb_internal
|
|
110
|
+
return
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# LSB mode flush
|
|
114
|
+
# First flush any accumulated bits
|
|
115
|
+
if @bits_accumulated.positive?
|
|
116
|
+
byte = @accumulated & 0xFF
|
|
117
|
+
write_byte(byte)
|
|
118
|
+
@accumulated = 0
|
|
119
|
+
@bits_accumulated = 0
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Then flush buffer
|
|
65
123
|
return if @bits_in_buffer.zero?
|
|
66
124
|
|
|
67
|
-
# Write any remaining bits (padded with zeros)
|
|
68
125
|
byte = @bit_buffer & 0xFF
|
|
69
126
|
write_byte(byte)
|
|
70
127
|
@bit_buffer = 0
|
|
@@ -76,10 +133,70 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
|
|
|
76
133
|
# @param byte [Integer] Byte value to write
|
|
77
134
|
# @return [void]
|
|
78
135
|
def write_byte(byte)
|
|
79
|
-
|
|
136
|
+
# Use pre-encoded byte constant for better performance
|
|
137
|
+
data = BYTE_CONSTANTS[byte]
|
|
138
|
+
# DEBUG
|
|
139
|
+
if ENV["DEBUG_BITSTREAM"]
|
|
140
|
+
warn "DEBUG write_byte: pos=#{@bits_in_buffer} byte=#{byte} (#{byte.to_s(2).rjust(
|
|
141
|
+
8, '0'
|
|
142
|
+
)})"
|
|
143
|
+
end
|
|
80
144
|
@io_system.write(@handle, data)
|
|
81
145
|
end
|
|
82
146
|
|
|
147
|
+
# Write bits in MSB-first mode (internal implementation)
|
|
148
|
+
# Matches the behavior of Bitstream's MSB mode for reading
|
|
149
|
+
#
|
|
150
|
+
# @param value [Integer] Value to write
|
|
151
|
+
# @param num_bits [Integer] Number of bits to write
|
|
152
|
+
# @return [void]
|
|
153
|
+
def write_bits_msb_internal(value, num_bits)
|
|
154
|
+
# Mask value to num_bits
|
|
155
|
+
value &= (1 << num_bits) - 1
|
|
156
|
+
|
|
157
|
+
# Add bits to buffer (MSB first - inject at left side)
|
|
158
|
+
@bit_buffer = (@bit_buffer << num_bits) | value
|
|
159
|
+
@bits_in_buffer += num_bits
|
|
160
|
+
|
|
161
|
+
# Flush complete 16-bit words
|
|
162
|
+
# The most significant bits are at the left of the buffer
|
|
163
|
+
# We want to extract the highest 16 bits and keep the rest
|
|
164
|
+
while @bits_in_buffer >= 16
|
|
165
|
+
# Extract the highest 16 bits by shifting right by (bits_in_buffer - 16)
|
|
166
|
+
# This moves the top 16 bits to positions 0-15
|
|
167
|
+
@bits_in_buffer -= 16
|
|
168
|
+
shift = @bits_in_buffer
|
|
169
|
+
word = (@bit_buffer >> shift) & 0xFFFF
|
|
170
|
+
# Write little-endian (LSB byte first, then MSB byte) to match Bitstream reader
|
|
171
|
+
write_byte(word & 0xFF)
|
|
172
|
+
write_byte((word >> 8) & 0xFF)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Flush MSB buffer (internal implementation)
|
|
177
|
+
# Write remaining bits padded to 16-bit boundary
|
|
178
|
+
#
|
|
179
|
+
# @return [void]
|
|
180
|
+
def flush_msb_internal
|
|
181
|
+
return if @bits_in_buffer.zero?
|
|
182
|
+
|
|
183
|
+
# Pad to 16-bit boundary
|
|
184
|
+
padding = (16 - @bits_in_buffer) % 16
|
|
185
|
+
@bit_buffer <<= padding if padding.positive?
|
|
186
|
+
@bits_in_buffer += padding
|
|
187
|
+
|
|
188
|
+
# Write final 16-bit word
|
|
189
|
+
if @bits_in_buffer == 16
|
|
190
|
+
word = @bit_buffer & 0xFFFF
|
|
191
|
+
# Write little-endian (LSB byte first, then MSB byte) to match Bitstream reader
|
|
192
|
+
write_byte(word & 0xFF)
|
|
193
|
+
write_byte((word >> 8) & 0xFF)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
@bit_buffer = 0
|
|
197
|
+
@bits_in_buffer = 0
|
|
198
|
+
end
|
|
199
|
+
|
|
83
200
|
# Write a raw byte directly (for signatures, etc.)
|
|
84
201
|
# This ensures the bit buffer is flushed first
|
|
85
202
|
#
|
|
@@ -105,9 +222,21 @@ buffer_size = Cabriolet.default_buffer_size, msb_first: false)
|
|
|
105
222
|
# @param num_bits [Integer] Number of bits to write
|
|
106
223
|
# @return [void]
|
|
107
224
|
def write_bits_be(value, num_bits)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
225
|
+
# Write full bytes first for better performance
|
|
226
|
+
full_bytes = num_bits / 8
|
|
227
|
+
remaining_bits = num_bits % 8
|
|
228
|
+
|
|
229
|
+
# Write complete bytes MSB first
|
|
230
|
+
full_bytes.times do |i|
|
|
231
|
+
byte_shift = num_bits - 8 - (i * 8)
|
|
232
|
+
byte = (value >> byte_shift) & 0xFF
|
|
233
|
+
write_bits(byte, 8)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Write remaining bits
|
|
237
|
+
if remaining_bits.positive?
|
|
238
|
+
remaining_value = value & ((1 << remaining_bits) - 1)
|
|
239
|
+
write_bits(remaining_value, remaining_bits)
|
|
111
240
|
end
|
|
112
241
|
end
|
|
113
242
|
|
|
@@ -4,62 +4,283 @@ require "bindata"
|
|
|
4
4
|
|
|
5
5
|
module Cabriolet
|
|
6
6
|
module Binary
|
|
7
|
-
# HLP (Windows Help) file format binary structures
|
|
7
|
+
# HLP (Windows Help / QuickHelp) file format binary structures
|
|
8
8
|
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
9
|
+
# Based on the QuickHelp binary format specification from DosHelp project.
|
|
10
|
+
# HLP files store help databases with topics, compression, and hyperlinks.
|
|
11
|
+
#
|
|
12
|
+
# Format overview:
|
|
13
|
+
# - Signature (2 bytes): 0x4C 0x4E ("LN")
|
|
14
|
+
# - File Header (68 bytes)
|
|
15
|
+
# - Topic Index (variable)
|
|
16
|
+
# - Context Strings (variable)
|
|
17
|
+
# - Context Map (variable)
|
|
18
|
+
# - Keywords (optional, variable)
|
|
19
|
+
# - Huffman Tree (optional, variable)
|
|
20
|
+
# - Topic Texts (variable, compressed)
|
|
12
21
|
module HLPStructures
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
|
|
22
|
+
# QuickHelp file signature: 0x4C, 0x4E ("LN")
|
|
23
|
+
SIGNATURE = "\x4C\x4E".b.freeze unless defined?(SIGNATURE)
|
|
24
|
+
|
|
25
|
+
# File attributes flags
|
|
26
|
+
module Attributes
|
|
27
|
+
CASE_SENSITIVE = 0x01 unless defined?(CASE_SENSITIVE)
|
|
28
|
+
LOCKED = 0x02 unless defined?(LOCKED)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Control bytes for keyword compression
|
|
32
|
+
module ControlBytes
|
|
33
|
+
# Dictionary entry with optional space (0x10-0x17)
|
|
34
|
+
DICT_ENTRY_MIN = 0x10 unless defined?(DICT_ENTRY_MIN)
|
|
35
|
+
DICT_ENTRY_MAX = 0x17 unless defined?(DICT_ENTRY_MAX)
|
|
36
|
+
|
|
37
|
+
# Run of spaces (0x18)
|
|
38
|
+
SPACE_RUN = 0x18 unless defined?(SPACE_RUN)
|
|
16
39
|
|
|
17
|
-
|
|
40
|
+
# Run of bytes (0x19)
|
|
41
|
+
BYTE_RUN = 0x19 unless defined?(BYTE_RUN)
|
|
42
|
+
|
|
43
|
+
# Escape byte (0x1A)
|
|
44
|
+
ESCAPE = 0x1A unless defined?(ESCAPE)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Text style flags for topic lines
|
|
48
|
+
module TextStyle
|
|
49
|
+
NONE = 0x00 unless defined?(NONE)
|
|
50
|
+
BOLD = 0x01 unless defined?(BOLD)
|
|
51
|
+
ITALIC = 0x02 unless defined?(ITALIC)
|
|
52
|
+
UNDERLINE = 0x04 unless defined?(UNDERLINE)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# QuickHelp file header (70 bytes total: 2 byte signature + 68 byte header)
|
|
18
56
|
#
|
|
19
|
-
# Structure
|
|
20
|
-
# -
|
|
21
|
-
# - 2 bytes: version
|
|
22
|
-
# -
|
|
23
|
-
# -
|
|
24
|
-
|
|
57
|
+
# Structure:
|
|
58
|
+
# - 2 bytes: signature (0x4C 0x4E)
|
|
59
|
+
# - 2 bytes: version (always 2)
|
|
60
|
+
# - 2 bytes: attributes (bit flags)
|
|
61
|
+
# - 1 byte: control character (usually ':' or 0xFF)
|
|
62
|
+
# - 1 byte: padding
|
|
63
|
+
# - 2 bytes: topic count
|
|
64
|
+
# - 2 bytes: context count
|
|
65
|
+
# - 1 byte: display width
|
|
66
|
+
# - 1 byte: padding
|
|
67
|
+
# - 2 bytes: predefined context count
|
|
68
|
+
# - 14 bytes: database name (null-terminated, null-padded)
|
|
69
|
+
# - 4 bytes: reserved
|
|
70
|
+
# - 4 bytes: topic index offset
|
|
71
|
+
# - 4 bytes: context strings offset
|
|
72
|
+
# - 4 bytes: context map offset
|
|
73
|
+
# - 4 bytes: keywords offset (0 if not used)
|
|
74
|
+
# - 4 bytes: huffman tree offset (0 if not used)
|
|
75
|
+
# - 4 bytes: topic text offset
|
|
76
|
+
# - 4 bytes: reserved
|
|
77
|
+
# - 4 bytes: reserved
|
|
78
|
+
# - 4 bytes: database size
|
|
79
|
+
class FileHeader < BinData::Record
|
|
25
80
|
endian :little
|
|
26
81
|
|
|
27
|
-
string :signature, length:
|
|
82
|
+
string :signature, length: 2
|
|
28
83
|
uint16 :version
|
|
29
|
-
|
|
30
|
-
|
|
84
|
+
uint16 :attributes
|
|
85
|
+
uint8 :control_character
|
|
86
|
+
uint8 :padding1
|
|
87
|
+
uint16 :topic_count
|
|
88
|
+
uint16 :context_count
|
|
89
|
+
uint8 :display_width
|
|
90
|
+
uint8 :padding2
|
|
91
|
+
uint16 :predefined_ctx_count
|
|
92
|
+
string :database_name, length: 14
|
|
93
|
+
uint32 :reserved1
|
|
94
|
+
uint32 :topic_index_offset
|
|
95
|
+
uint32 :context_strings_offset
|
|
96
|
+
uint32 :context_map_offset
|
|
97
|
+
uint32 :keywords_offset
|
|
98
|
+
uint32 :huffman_tree_offset
|
|
99
|
+
uint32 :topic_text_offset
|
|
100
|
+
uint32 :reserved2
|
|
101
|
+
uint32 :reserved3
|
|
102
|
+
uint32 :database_size
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Topic index entry (4 bytes per topic)
|
|
106
|
+
#
|
|
107
|
+
# Array of (topic_count + 1) DWORDs that specify offsets of topic texts.
|
|
108
|
+
# The last entry indicates the end of the last topic.
|
|
109
|
+
class TopicOffset < BinData::Record
|
|
110
|
+
endian :little
|
|
111
|
+
uint32 :offset
|
|
31
112
|
end
|
|
32
113
|
|
|
33
|
-
#
|
|
114
|
+
# Context map entry (2 bytes per context)
|
|
115
|
+
#
|
|
116
|
+
# Maps context strings to topic indices.
|
|
117
|
+
class ContextMapEntry < BinData::Record
|
|
118
|
+
endian :little
|
|
119
|
+
uint16 :topic_index
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Huffman tree node (2 bytes per node)
|
|
123
|
+
#
|
|
124
|
+
# Leaf node: bit 15 set, bits 0-7 contain symbol
|
|
125
|
+
# Internal node: bit 15 clear, node_value/2 is left child index, i+1 is right child
|
|
126
|
+
class HuffmanNode < BinData::Record
|
|
127
|
+
endian :little
|
|
128
|
+
int16 :node_value
|
|
129
|
+
|
|
130
|
+
# Check if this is a leaf node
|
|
131
|
+
def leaf?
|
|
132
|
+
node_value.negative?
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get symbol for leaf node
|
|
136
|
+
def symbol
|
|
137
|
+
return nil unless leaf?
|
|
138
|
+
|
|
139
|
+
node_value & 0xFF
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Get left child index for internal node
|
|
143
|
+
def left_child_index
|
|
144
|
+
return nil if leaf?
|
|
145
|
+
|
|
146
|
+
node_value / 2
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Topic compressed header (2 bytes)
|
|
151
|
+
#
|
|
152
|
+
# Appears at the start of each compressed topic text.
|
|
153
|
+
class TopicHeader < BinData::Record
|
|
154
|
+
endian :little
|
|
155
|
+
uint16 :decompressed_length
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Windows Help (WinHelp) 3.x file header (28 bytes)
|
|
34
159
|
#
|
|
35
160
|
# Structure:
|
|
36
|
-
# -
|
|
37
|
-
# -
|
|
38
|
-
# - 4 bytes: offset
|
|
39
|
-
# - 4 bytes:
|
|
40
|
-
# - 4 bytes:
|
|
41
|
-
# -
|
|
42
|
-
class
|
|
161
|
+
# - 2 bytes: Magic number (0x35F3)
|
|
162
|
+
# - 2 bytes: Unknown/version
|
|
163
|
+
# - 4 bytes: Directory offset
|
|
164
|
+
# - 4 bytes: Free list offset
|
|
165
|
+
# - 4 bytes: File size
|
|
166
|
+
# - 12 bytes: Reserved/padding
|
|
167
|
+
class WinHelp3Header < BinData::Record
|
|
43
168
|
endian :little
|
|
44
169
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
uint32 :
|
|
48
|
-
uint32 :
|
|
49
|
-
uint32 :
|
|
50
|
-
|
|
170
|
+
uint16 :magic # 0x35F3
|
|
171
|
+
uint16 :unknown
|
|
172
|
+
uint32 :directory_offset
|
|
173
|
+
uint32 :free_list_offset
|
|
174
|
+
uint32 :file_size
|
|
175
|
+
string :reserved, length: 12
|
|
51
176
|
end
|
|
52
177
|
|
|
53
|
-
#
|
|
178
|
+
# Windows Help (WinHelp) 4.x file header (32 bytes)
|
|
54
179
|
#
|
|
55
180
|
# Structure:
|
|
56
|
-
# - 4 bytes:
|
|
57
|
-
# - 4 bytes:
|
|
58
|
-
|
|
181
|
+
# - 4 bytes: Magic number (0x3F5F0000 or similar)
|
|
182
|
+
# - 4 bytes: Directory offset
|
|
183
|
+
# - 4 bytes: Free list offset
|
|
184
|
+
# - 4 bytes: File size
|
|
185
|
+
# - 16 bytes: Reserved/unknown
|
|
186
|
+
class WinHelp4Header < BinData::Record
|
|
187
|
+
endian :little
|
|
188
|
+
|
|
189
|
+
uint32 :magic # 0x3F5F0000 or similar
|
|
190
|
+
uint32 :directory_offset
|
|
191
|
+
uint32 :free_list_offset
|
|
192
|
+
uint32 :file_size
|
|
193
|
+
string :reserved, length: 16
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# WinHelp internal file directory entry
|
|
197
|
+
#
|
|
198
|
+
# Variable size structure:
|
|
199
|
+
# - 4 bytes: File size
|
|
200
|
+
# - 2 bytes: Starting block number
|
|
201
|
+
# - Variable: File name (null-terminated, aligned)
|
|
202
|
+
class WinHelpDirectoryEntry < BinData::Record
|
|
203
|
+
endian :little
|
|
204
|
+
|
|
205
|
+
uint32 :file_size
|
|
206
|
+
uint16 :starting_block
|
|
207
|
+
stringz :filename
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# WinHelp B+ tree header (from FILEHEADER of directory)
|
|
211
|
+
#
|
|
212
|
+
# Structure from helpdeco:
|
|
213
|
+
# - 2 bytes: Magic (0x293B)
|
|
214
|
+
# - 2 bytes: Flags (bit 0x0002 always 1, bit 0x0400 1 if directory)
|
|
215
|
+
# - 2 bytes: PageSize (0x0400=1k if directory, 0x0800=2k else)
|
|
216
|
+
# - 16 bytes: Structure (string describing structure of data)
|
|
217
|
+
# - 2 bytes: MustBeZero (0)
|
|
218
|
+
# - 2 bytes: PageSplits (number of page splits Btree has suffered)
|
|
219
|
+
# - 2 bytes: RootPage (page number of Btree root page)
|
|
220
|
+
# - 2 bytes: MustBeNegOne (0xFFFF)
|
|
221
|
+
# - 2 bytes: TotalPages (number of Btree pages)
|
|
222
|
+
# - 2 bytes: NLevels (number of levels of Btree)
|
|
223
|
+
# - 4 bytes: TotalBtreeEntries (number of entries in Btree)
|
|
224
|
+
#
|
|
225
|
+
# Total: 38 bytes (not 30!)
|
|
226
|
+
class WinHelpBTreeHeader < BinData::Record
|
|
227
|
+
endian :little
|
|
228
|
+
|
|
229
|
+
uint16 :magic # 0x293B
|
|
230
|
+
uint16 :flags
|
|
231
|
+
uint16 :page_size
|
|
232
|
+
string :structure, length: 16
|
|
233
|
+
int16 :must_be_zero
|
|
234
|
+
int16 :page_splits
|
|
235
|
+
int16 :root_page
|
|
236
|
+
int16 :must_be_neg_one
|
|
237
|
+
int16 :total_pages
|
|
238
|
+
int16 :n_levels
|
|
239
|
+
int32 :total_btree_entries
|
|
240
|
+
# Total: 2 + 2 + 2 + 16 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 4 = 38 bytes
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# WinHelp B+ tree leaf node header
|
|
244
|
+
#
|
|
245
|
+
# Structure at beginning of every leaf-page:
|
|
246
|
+
# - 2 bytes: Unknown (no ID to identify leaf-page)
|
|
247
|
+
# - 2 bytes: NEntries (number of entries in this leaf-page)
|
|
248
|
+
# - 2 bytes: PreviousPage (page number of preceeding leaf-page or -1)
|
|
249
|
+
# - 2 bytes: NextPage (page number of next leaf-page or -1)
|
|
250
|
+
class WinHelpBTreeNodeHeader < BinData::Record
|
|
251
|
+
endian :little
|
|
252
|
+
|
|
253
|
+
uint16 :unknown
|
|
254
|
+
int16 :n_entries
|
|
255
|
+
int16 :previous_page
|
|
256
|
+
int16 :next_page
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# WinHelp B+ tree index node header (for internal nodes)
|
|
260
|
+
#
|
|
261
|
+
# Structure at beginning of every index-page:
|
|
262
|
+
# - 2 bytes: Unknown (no ID to identify index-page)
|
|
263
|
+
# - 2 bytes: NEntries (number of entries in this index-page)
|
|
264
|
+
# - 2 bytes: PreviousPage (page number of previous page)
|
|
265
|
+
class WinHelpBTreeIndexHeader < BinData::Record
|
|
266
|
+
endian :little
|
|
267
|
+
|
|
268
|
+
uint16 :unknown
|
|
269
|
+
int16 :n_entries
|
|
270
|
+
int16 :previous_page
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# WinHelp FILEHEADER structure at FileOffset of each internal file
|
|
274
|
+
#
|
|
275
|
+
# - 4 bytes: ReservedSpace (reserved space in help file incl. FILEHEADER)
|
|
276
|
+
# - 4 bytes: UsedSpace (used space in help file excl. FILEHEADER)
|
|
277
|
+
# - 1 byte: FileFlags (normally 4)
|
|
278
|
+
class WinHelpFileHeader < BinData::Record
|
|
59
279
|
endian :little
|
|
60
280
|
|
|
61
|
-
|
|
62
|
-
|
|
281
|
+
int32 :reserved_space
|
|
282
|
+
int32 :used_space
|
|
283
|
+
uint8 :file_flags
|
|
63
284
|
end
|
|
64
285
|
end
|
|
65
286
|
end
|