cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../binary/hlp_structures"
|
|
4
|
+
require_relative "../../models/winhelp_header"
|
|
5
|
+
require_relative "../../errors"
|
|
6
|
+
require_relative "../../system/io_system"
|
|
7
|
+
require_relative "../../constants"
|
|
8
|
+
|
|
9
|
+
module Cabriolet
|
|
10
|
+
module HLP
|
|
11
|
+
module WinHelp
|
|
12
|
+
# Parser for Windows Help (WinHelp) files
|
|
13
|
+
#
|
|
14
|
+
# Parses Windows Help files (3.x and 4.x formats) used in Windows 3.0+
|
|
15
|
+
# through Windows XP.
|
|
16
|
+
#
|
|
17
|
+
# Format structure:
|
|
18
|
+
# - File Header (28 or 32 bytes depending on version)
|
|
19
|
+
# - Internal File Directory
|
|
20
|
+
# - File Data Blocks (|SYSTEM, |TOPIC, etc.)
|
|
21
|
+
#
|
|
22
|
+
# Magic numbers:
|
|
23
|
+
# - WinHelp 3.x (16-bit): 0x35F3
|
|
24
|
+
# - WinHelp 4.x (32-bit): 0x3F5F0000 (varies)
|
|
25
|
+
class Parser
|
|
26
|
+
attr_reader :io_system
|
|
27
|
+
|
|
28
|
+
# Initialize parser
|
|
29
|
+
#
|
|
30
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
|
|
31
|
+
def initialize(io_system = nil)
|
|
32
|
+
@io_system = io_system || System::IOSystem.new
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Parse a WinHelp file
|
|
36
|
+
#
|
|
37
|
+
# @param filename [String] Path to WinHelp file
|
|
38
|
+
# @return [Models::WinHelpHeader] Parsed header with metadata
|
|
39
|
+
# @raise [Cabriolet::ParseError] if file is not valid WinHelp
|
|
40
|
+
def parse(filename)
|
|
41
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
42
|
+
|
|
43
|
+
begin
|
|
44
|
+
header = parse_file(handle)
|
|
45
|
+
header.filename = filename
|
|
46
|
+
header
|
|
47
|
+
ensure
|
|
48
|
+
@io_system.close(handle)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Parse complete WinHelp file structure
|
|
55
|
+
#
|
|
56
|
+
# @param handle [System::FileHandle] Open file handle
|
|
57
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
58
|
+
# @raise [Cabriolet::ParseError] if parsing fails
|
|
59
|
+
def parse_file(handle)
|
|
60
|
+
# Detect version and parse appropriate header
|
|
61
|
+
version = detect_version(handle)
|
|
62
|
+
|
|
63
|
+
case version
|
|
64
|
+
when :winhelp3
|
|
65
|
+
parse_winhelp3(handle)
|
|
66
|
+
when :winhelp4
|
|
67
|
+
parse_winhelp4(handle)
|
|
68
|
+
else
|
|
69
|
+
raise Cabriolet::ParseError, "Unknown WinHelp version"
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Detect WinHelp version from magic number
|
|
74
|
+
#
|
|
75
|
+
# @param handle [System::FileHandle] Open file handle
|
|
76
|
+
# @return [Symbol] :winhelp3 or :winhelp4
|
|
77
|
+
# @raise [Cabriolet::ParseError] if magic number is invalid
|
|
78
|
+
def detect_version(handle)
|
|
79
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
80
|
+
magic_data = @io_system.read(handle, 4)
|
|
81
|
+
|
|
82
|
+
if magic_data.nil? || magic_data.bytesize < 4
|
|
83
|
+
raise Cabriolet::ParseError,
|
|
84
|
+
"File too small for WinHelp header"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Check for WinHelp 3.x (little-endian 16-bit magic: 0x35F3)
|
|
88
|
+
magic_word = magic_data[0..1].unpack1("v")
|
|
89
|
+
return :winhelp3 if magic_word == 0x35F3
|
|
90
|
+
|
|
91
|
+
# Check for WinHelp 4.x (little-endian 32-bit magic, low 16 bits: 0x5F3F or 0x3F5F)
|
|
92
|
+
magic_dword = magic_data.unpack1("V")
|
|
93
|
+
return :winhelp4 if (magic_dword & 0xFFFF) == 0x5F3F || (magic_dword & 0xFFFF) == 0x3F5F
|
|
94
|
+
|
|
95
|
+
raise Cabriolet::ParseError,
|
|
96
|
+
"Unknown WinHelp magic: 0x#{magic_dword.to_s(16).upcase}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Parse WinHelp 3.x file
|
|
100
|
+
#
|
|
101
|
+
# @param handle [System::FileHandle] Open file handle
|
|
102
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
103
|
+
def parse_winhelp3(handle)
|
|
104
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
105
|
+
header_data = @io_system.read(handle, 28)
|
|
106
|
+
|
|
107
|
+
if header_data.bytesize < 28
|
|
108
|
+
raise Cabriolet::ParseError,
|
|
109
|
+
"File too small for WinHelp 3.x header"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
binary_header = Binary::HLPStructures::WinHelp3Header.read(header_data)
|
|
113
|
+
|
|
114
|
+
# Validate magic
|
|
115
|
+
unless binary_header.magic == 0x35F3
|
|
116
|
+
raise Cabriolet::ParseError,
|
|
117
|
+
"Invalid WinHelp 3.x magic: 0x#{binary_header.magic.to_i.to_s(16)}"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Create header model
|
|
121
|
+
header = Models::WinHelpHeader.new(
|
|
122
|
+
version: :winhelp3,
|
|
123
|
+
magic: binary_header.magic,
|
|
124
|
+
directory_offset: binary_header.directory_offset,
|
|
125
|
+
free_list_offset: binary_header.free_list_offset,
|
|
126
|
+
file_size: binary_header.file_size,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Parse directory (WinHelp 3.x format: variable-length entries)
|
|
130
|
+
parse_directory_winhelp3(handle, header)
|
|
131
|
+
|
|
132
|
+
header
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Parse WinHelp 4.x file
|
|
136
|
+
#
|
|
137
|
+
# @param handle [System::FileHandle] Open file handle
|
|
138
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
139
|
+
def parse_winhelp4(handle)
|
|
140
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
141
|
+
header_data = @io_system.read(handle, 32)
|
|
142
|
+
|
|
143
|
+
if header_data.bytesize < 32
|
|
144
|
+
raise Cabriolet::ParseError,
|
|
145
|
+
"File too small for WinHelp 4.x header"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
binary_header = Binary::HLPStructures::WinHelp4Header.read(header_data)
|
|
149
|
+
|
|
150
|
+
# Validate magic (lower 16 bits should be 0x5F3F or 0x3F5F)
|
|
151
|
+
magic_val = binary_header.magic.respond_to?(:to_i) ? binary_header.magic.to_i : binary_header.magic
|
|
152
|
+
unless (magic_val & 0xFFFF) == 0x5F3F || (magic_val & 0xFFFF) == 0x3F5F
|
|
153
|
+
raise Cabriolet::ParseError,
|
|
154
|
+
"Invalid WinHelp 4.x magic: 0x#{magic_val.to_s(16)}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Determine if directory_offset needs +2 adjustment
|
|
158
|
+
# The BinData structure reads 4 bytes for magic, but the actual format has:
|
|
159
|
+
# - 2 bytes: magic (0x5F3F)
|
|
160
|
+
# - 2 bytes: version/flags
|
|
161
|
+
# - 4 bytes: directory_offset
|
|
162
|
+
#
|
|
163
|
+
# If the version field (bytes 2-3) has a non-zero high byte, it's a 2-byte magic format
|
|
164
|
+
# and directory_offset needs +2 adjustment. If version is small (< 256),
|
|
165
|
+
# it's likely a 4-byte magic format where directory_offset is already correct.
|
|
166
|
+
version_bytes = (magic_val >> 16) & 0xFFFF
|
|
167
|
+
needs_offset_adjustment = version_bytes > 255
|
|
168
|
+
|
|
169
|
+
# Create header model
|
|
170
|
+
header = Models::WinHelpHeader.new(
|
|
171
|
+
version: :winhelp4,
|
|
172
|
+
magic: binary_header.magic,
|
|
173
|
+
directory_offset: needs_offset_adjustment ? binary_header.directory_offset + 2 : binary_header.directory_offset,
|
|
174
|
+
free_list_offset: binary_header.free_list_offset,
|
|
175
|
+
file_size: binary_header.file_size,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Parse directory (WinHelp 4.x format: fixed 12-byte entries)
|
|
179
|
+
parse_directory_winhelp4(handle, header)
|
|
180
|
+
|
|
181
|
+
header
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Parse WinHelp 3.x internal file directory
|
|
185
|
+
#
|
|
186
|
+
# WinHelp 3.x directory structure:
|
|
187
|
+
# - Directory starts at directory_offset
|
|
188
|
+
# - Each entry is variable length:
|
|
189
|
+
# - 4 bytes: file size
|
|
190
|
+
# - 2 bytes: starting block number
|
|
191
|
+
# - Null-terminated filename (padded to even length)
|
|
192
|
+
# - End of directory marked by zero size
|
|
193
|
+
#
|
|
194
|
+
# @param handle [System::FileHandle] Open file handle
|
|
195
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
196
|
+
def parse_directory_winhelp3(handle, header)
|
|
197
|
+
return if header.directory_offset.zero?
|
|
198
|
+
|
|
199
|
+
dir_start = header.directory_offset
|
|
200
|
+
@io_system.seek(handle, dir_start, Constants::SEEK_START)
|
|
201
|
+
|
|
202
|
+
header.internal_files = []
|
|
203
|
+
|
|
204
|
+
# Read variable-length directory entries
|
|
205
|
+
loop do
|
|
206
|
+
# Read file size (4 bytes)
|
|
207
|
+
size_data = @io_system.read(handle, 4)
|
|
208
|
+
break if size_data.nil? || size_data.bytesize < 4
|
|
209
|
+
|
|
210
|
+
file_size = size_data.unpack1("V")
|
|
211
|
+
|
|
212
|
+
# End of directory marker
|
|
213
|
+
break if file_size.zero?
|
|
214
|
+
|
|
215
|
+
# Read starting block (2 bytes)
|
|
216
|
+
block_data = @io_system.read(handle, 2)
|
|
217
|
+
break if block_data.nil? || block_data.bytesize < 2
|
|
218
|
+
|
|
219
|
+
starting_block = block_data.unpack1("v")
|
|
220
|
+
|
|
221
|
+
# Read filename (null-terminated, padded to even)
|
|
222
|
+
filename = +""
|
|
223
|
+
loop do
|
|
224
|
+
byte_data = @io_system.read(handle, 1)
|
|
225
|
+
break if byte_data.nil? || byte_data.empty?
|
|
226
|
+
|
|
227
|
+
byte = byte_data.getbyte(0)
|
|
228
|
+
break if byte.zero?
|
|
229
|
+
|
|
230
|
+
filename << byte.chr
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Align to even boundary
|
|
234
|
+
align_read(handle)
|
|
235
|
+
|
|
236
|
+
# Skip empty filenames
|
|
237
|
+
next if filename.empty?
|
|
238
|
+
|
|
239
|
+
header.internal_files << {
|
|
240
|
+
filename: filename,
|
|
241
|
+
file_size: file_size,
|
|
242
|
+
starting_block: starting_block,
|
|
243
|
+
}
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Parse WinHelp 4.x internal file directory using B+ tree
|
|
248
|
+
#
|
|
249
|
+
# WinHelp 4.x directory structure:
|
|
250
|
+
# - FILEHEADER at directory_offset
|
|
251
|
+
# - BTREEHEADER immediately after FILEHEADER
|
|
252
|
+
# - B+ tree pages containing filename -> file_offset mappings
|
|
253
|
+
#
|
|
254
|
+
# @param handle [System::FileHandle] Open file handle
|
|
255
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
256
|
+
# @raise [ParseError] if directory is invalid
|
|
257
|
+
def parse_directory_winhelp4(handle, header)
|
|
258
|
+
return if header.directory_offset.zero?
|
|
259
|
+
|
|
260
|
+
# Seek to directory and read FILEHEADER
|
|
261
|
+
@io_system.seek(handle, header.directory_offset, Constants::SEEK_START)
|
|
262
|
+
file_header_data = @io_system.read(handle, 9) # FILEHEADER is 9 bytes
|
|
263
|
+
|
|
264
|
+
if file_header_data.nil? || file_header_data.bytesize < 9
|
|
265
|
+
raise Cabriolet::ParseError,
|
|
266
|
+
"Failed to read FILEHEADER"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Read BTREEHEADER (38 bytes according to helpdeco)
|
|
270
|
+
btree_header_data = @io_system.read(handle, 38) # BTREEHEADER is 38 bytes
|
|
271
|
+
|
|
272
|
+
if btree_header_data.nil? || btree_header_data.bytesize < 38
|
|
273
|
+
raise Cabriolet::ParseError,
|
|
274
|
+
"Failed to read BTREEHEADER"
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
btree_header = Binary::HLPStructures::WinHelpBTreeHeader.read(btree_header_data)
|
|
278
|
+
|
|
279
|
+
# Validate B+ tree magic
|
|
280
|
+
unless btree_header.magic == 0x293B
|
|
281
|
+
raise Cabriolet::ParseError,
|
|
282
|
+
"Invalid B+ tree magic: 0x#{btree_header.magic.to_i.to_s(16)}"
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Store first page offset (where B+ tree pages start)
|
|
286
|
+
first_page_offset = @io_system.tell(handle)
|
|
287
|
+
|
|
288
|
+
# Parse all files from B+ tree
|
|
289
|
+
header.internal_files = []
|
|
290
|
+
parse_btree_files(handle, header, btree_header, first_page_offset)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Parse all files from WinHelp B+ tree
|
|
294
|
+
#
|
|
295
|
+
# @param handle [System::FileHandle] Open file handle
|
|
296
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
297
|
+
# @param btree_header [Binary::HLPStructures::WinHelpBTreeHeader] B+ tree header
|
|
298
|
+
# @param first_page_offset [Integer] Offset of first B+ tree page
|
|
299
|
+
def parse_btree_files(handle, header, btree_header, first_page_offset)
|
|
300
|
+
return unless btree_header.total_btree_entries.positive?
|
|
301
|
+
|
|
302
|
+
# Start at root page and traverse to first leaf page
|
|
303
|
+
current_page = btree_header.root_page
|
|
304
|
+
|
|
305
|
+
# If we have multiple levels, traverse down index pages to find first leaf page
|
|
306
|
+
if btree_header.n_levels > 1
|
|
307
|
+
(btree_header.n_levels - 1).times do
|
|
308
|
+
# Seek to index page
|
|
309
|
+
page_offset = first_page_offset + (current_page * btree_header.page_size)
|
|
310
|
+
@io_system.seek(handle, page_offset, Constants::SEEK_START)
|
|
311
|
+
|
|
312
|
+
# Read index header
|
|
313
|
+
index_header_data = @io_system.read(handle, 6)
|
|
314
|
+
break if index_header_data.nil? || index_header_data.bytesize < 6
|
|
315
|
+
|
|
316
|
+
# For index pages, the first page is always 0 (leftmost child)
|
|
317
|
+
# The index header is followed by entries: (filename, page_number)
|
|
318
|
+
# We want the leftmost (smallest filename), so we take the first entry's page
|
|
319
|
+
current_page = read_first_page_from_index(handle,
|
|
320
|
+
index_header_data)
|
|
321
|
+
break if current_page.nil?
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Now read all leaf pages
|
|
326
|
+
loop do
|
|
327
|
+
# Seek to leaf page
|
|
328
|
+
page_offset = first_page_offset + (current_page * btree_header.page_size)
|
|
329
|
+
@io_system.seek(handle, page_offset, Constants::SEEK_START)
|
|
330
|
+
|
|
331
|
+
# Read leaf node header
|
|
332
|
+
leaf_header_data = @io_system.read(handle, 8)
|
|
333
|
+
break if leaf_header_data.nil? || leaf_header_data.bytesize < 8
|
|
334
|
+
|
|
335
|
+
leaf_header = Binary::HLPStructures::WinHelpBTreeNodeHeader.read(leaf_header_data)
|
|
336
|
+
|
|
337
|
+
# Read all entries in this leaf page
|
|
338
|
+
leaf_header.n_entries.times do
|
|
339
|
+
# Read null-terminated filename
|
|
340
|
+
filename = read_cstring(handle)
|
|
341
|
+
break if filename.nil?
|
|
342
|
+
|
|
343
|
+
# Read file offset (4-byte LE value)
|
|
344
|
+
offset_data = @io_system.read(handle, 4)
|
|
345
|
+
break if offset_data.nil? || offset_data.bytesize < 4
|
|
346
|
+
|
|
347
|
+
file_offset = offset_data.unpack1("V")
|
|
348
|
+
|
|
349
|
+
# Skip empty filenames
|
|
350
|
+
next if filename.empty?
|
|
351
|
+
|
|
352
|
+
# Read FILEHEADER at file_offset to get file size
|
|
353
|
+
# This will seek away, so save current position first
|
|
354
|
+
current_position = @io_system.tell(handle)
|
|
355
|
+
file_size = read_file_size(handle, file_offset)
|
|
356
|
+
@io_system.seek(handle, current_position, Constants::SEEK_START)
|
|
357
|
+
|
|
358
|
+
header.internal_files << {
|
|
359
|
+
filename: filename,
|
|
360
|
+
file_size: file_size,
|
|
361
|
+
file_offset: file_offset, # Store actual offset, not block number
|
|
362
|
+
}
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Move to next leaf page or exit
|
|
366
|
+
break if leaf_header.next_page == -1
|
|
367
|
+
|
|
368
|
+
current_page = leaf_header.next_page
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Read first page number from index page
|
|
373
|
+
#
|
|
374
|
+
# @param handle [System::FileHandle] Open file handle
|
|
375
|
+
# @param index_header_data [String] Index header data (6 bytes)
|
|
376
|
+
# @return [Integer, nil] First page number or nil on error
|
|
377
|
+
def read_first_page_from_index(handle, _index_header_data)
|
|
378
|
+
# For index pages, we want the leftmost (smallest filename)
|
|
379
|
+
# The index header is followed by entries: (filename, page_number)
|
|
380
|
+
# We read the first filename and then the page number
|
|
381
|
+
filename = read_cstring(handle)
|
|
382
|
+
return nil if filename.nil?
|
|
383
|
+
|
|
384
|
+
# Read page number (2-byte LE)
|
|
385
|
+
page_data = @io_system.read(handle, 2)
|
|
386
|
+
return nil if page_data.nil? || page_data.bytesize < 2
|
|
387
|
+
|
|
388
|
+
page_data.unpack1("v")
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Read file size from FILEHEADER at given offset
|
|
392
|
+
#
|
|
393
|
+
# @param handle [System::FileHandle] Open file handle
|
|
394
|
+
# @param file_offset [Integer] Offset of FILEHEADER
|
|
395
|
+
# @return [Integer] File size (UsedSpace from FILEHEADER)
|
|
396
|
+
def read_file_size(handle, file_offset)
|
|
397
|
+
# Seek to FILEHEADER
|
|
398
|
+
@io_system.seek(handle, file_offset, Constants::SEEK_START)
|
|
399
|
+
|
|
400
|
+
# Read FILEHEADER (9 bytes)
|
|
401
|
+
file_header_data = @io_system.read(handle, 9)
|
|
402
|
+
return 0 if file_header_data.nil? || file_header_data.bytesize < 9
|
|
403
|
+
|
|
404
|
+
file_header = Binary::HLPStructures::WinHelpFileHeader.read(file_header_data)
|
|
405
|
+
|
|
406
|
+
# Return UsedSpace (the actual file size)
|
|
407
|
+
file_header.used_space
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Parse variable-length directory entries (WinHelp 3.x style)
|
|
411
|
+
def parse_directory_variable(handle, header)
|
|
412
|
+
loop do
|
|
413
|
+
# Read file size (4 bytes)
|
|
414
|
+
size_data = @io_system.read(handle, 4)
|
|
415
|
+
break if size_data.nil? || size_data.bytesize < 4
|
|
416
|
+
|
|
417
|
+
file_size = size_data.unpack1("V")
|
|
418
|
+
|
|
419
|
+
# End of directory marker
|
|
420
|
+
break if file_size.zero?
|
|
421
|
+
|
|
422
|
+
# Read starting block (2 bytes)
|
|
423
|
+
block_data = @io_system.read(handle, 2)
|
|
424
|
+
break if block_data.nil? || block_data.bytesize < 2
|
|
425
|
+
|
|
426
|
+
starting_block = block_data.unpack1("v")
|
|
427
|
+
|
|
428
|
+
# Read filename (null-terminated, padded to even)
|
|
429
|
+
filename = +""
|
|
430
|
+
loop do
|
|
431
|
+
byte_data = @io_system.read(handle, 1)
|
|
432
|
+
break if byte_data.nil? || byte_data.empty?
|
|
433
|
+
|
|
434
|
+
byte = byte_data.getbyte(0)
|
|
435
|
+
break if byte.zero?
|
|
436
|
+
|
|
437
|
+
filename << byte.chr
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Align to even boundary
|
|
441
|
+
align_read(handle)
|
|
442
|
+
|
|
443
|
+
# Skip empty filenames
|
|
444
|
+
next if filename.empty?
|
|
445
|
+
|
|
446
|
+
header.internal_files << {
|
|
447
|
+
filename: filename,
|
|
448
|
+
file_size: file_size,
|
|
449
|
+
starting_block: starting_block,
|
|
450
|
+
}
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Read null-terminated string from handle
|
|
455
|
+
#
|
|
456
|
+
# @param handle [System::FileHandle] Open file handle
|
|
457
|
+
# @return [String, nil] String or nil if read fails
|
|
458
|
+
def read_cstring(handle)
|
|
459
|
+
result = +""
|
|
460
|
+
loop do
|
|
461
|
+
byte_data = @io_system.read(handle, 1)
|
|
462
|
+
return nil if byte_data.nil? || byte_data.empty?
|
|
463
|
+
|
|
464
|
+
byte = byte_data.getbyte(0)
|
|
465
|
+
break if byte.zero?
|
|
466
|
+
|
|
467
|
+
result << byte.chr
|
|
468
|
+
end
|
|
469
|
+
result
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Align file position (skip padding after filename)
|
|
473
|
+
#
|
|
474
|
+
# @param handle [System::FileHandle] Open file handle
|
|
475
|
+
def align_read(handle)
|
|
476
|
+
# WinHelp aligns directory entries to 2-byte boundaries
|
|
477
|
+
pos = @io_system.tell(handle)
|
|
478
|
+
# If position is odd, read one byte to align
|
|
479
|
+
@io_system.read(handle, 1) if pos.odd?
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
end
|