cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +700 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +154 -14
  6. data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +35 -43
  13. data/lib/cabriolet/cab/decompressor.rb +14 -19
  14. data/lib/cabriolet/cab/extractor.rb +140 -31
  15. data/lib/cabriolet/chm/command_handler.rb +227 -0
  16. data/lib/cabriolet/chm/compressor.rb +7 -3
  17. data/lib/cabriolet/chm/decompressor.rb +39 -21
  18. data/lib/cabriolet/chm/parser.rb +5 -2
  19. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  20. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  21. data/lib/cabriolet/cli/command_registry.rb +83 -0
  22. data/lib/cabriolet/cli.rb +356 -607
  23. data/lib/cabriolet/compressors/base.rb +1 -1
  24. data/lib/cabriolet/compressors/lzx.rb +241 -54
  25. data/lib/cabriolet/compressors/mszip.rb +35 -3
  26. data/lib/cabriolet/compressors/quantum.rb +34 -45
  27. data/lib/cabriolet/decompressors/base.rb +1 -1
  28. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  29. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  30. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  31. data/lib/cabriolet/decompressors/quantum.rb +3 -2
  32. data/lib/cabriolet/errors.rb +3 -0
  33. data/lib/cabriolet/file_entry.rb +156 -0
  34. data/lib/cabriolet/file_manager.rb +144 -0
  35. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  36. data/lib/cabriolet/hlp/compressor.rb +28 -238
  37. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  38. data/lib/cabriolet/hlp/parser.rb +52 -101
  39. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  40. data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
  41. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  42. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  43. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  44. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  45. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  46. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  47. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  48. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  49. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  50. data/lib/cabriolet/huffman/tree.rb +85 -1
  51. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  52. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  53. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  54. data/lib/cabriolet/lit/command_handler.rb +221 -0
  55. data/lib/cabriolet/lit/compressor.rb +633 -38
  56. data/lib/cabriolet/lit/decompressor.rb +518 -152
  57. data/lib/cabriolet/lit/parser.rb +670 -0
  58. data/lib/cabriolet/models/hlp_file.rb +130 -29
  59. data/lib/cabriolet/models/hlp_header.rb +105 -17
  60. data/lib/cabriolet/models/lit_header.rb +212 -25
  61. data/lib/cabriolet/models/szdd_header.rb +10 -2
  62. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  63. data/lib/cabriolet/oab/command_handler.rb +257 -0
  64. data/lib/cabriolet/oab/compressor.rb +17 -8
  65. data/lib/cabriolet/oab/decompressor.rb +41 -10
  66. data/lib/cabriolet/offset_calculator.rb +81 -0
  67. data/lib/cabriolet/plugin.rb +233 -0
  68. data/lib/cabriolet/plugin_manager.rb +453 -0
  69. data/lib/cabriolet/plugin_validator.rb +422 -0
  70. data/lib/cabriolet/system/io_system.rb +3 -0
  71. data/lib/cabriolet/system/memory_handle.rb +17 -4
  72. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  73. data/lib/cabriolet/szdd/compressor.rb +15 -11
  74. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  75. data/lib/cabriolet/version.rb +1 -1
  76. data/lib/cabriolet.rb +67 -17
  77. metadata +33 -2
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "parser"
4
+ require_relative "zeck_lz77"
5
+ require_relative "../../system/io_system"
6
+ require_relative "../../constants"
7
+
8
+ module Cabriolet
9
+ module HLP
10
+ module WinHelp
11
+ # Decompressor for Windows Help files
12
+ #
13
+ # Extracts and decompresses content from WinHelp files using:
14
+ # - WinHelp::Parser for file structure
15
+ # - ZeckLZ77 for topic decompression
16
+ #
17
+ # Handles both WinHelp 3.x and 4.x formats.
18
+ class Decompressor
19
+ attr_reader :io_system, :header
20
+
21
+ # Initialize decompressor
22
+ #
23
+ # @param filename [String] Path to WinHelp file
24
+ # @param io_system [System::IOSystem, nil] Custom I/O system
25
+ def initialize(filename, io_system = nil)
26
+ @filename = filename
27
+ @io_system = io_system || System::IOSystem.new
28
+ @parser = Parser.new(@io_system)
29
+ @zeck = ZeckLZ77.new
30
+ @header = nil
31
+ end
32
+
33
+ # Parse the WinHelp file structure
34
+ #
35
+ # @return [Models::WinHelpHeader] Parsed header
36
+ def parse
37
+ @header = @parser.parse(@filename)
38
+ end
39
+
40
+ # Extract a specific internal file by name
41
+ #
42
+ # @param filename [String] Internal filename (e.g., "|SYSTEM", "|TOPIC")
43
+ # @return [String, nil] Raw file data or nil if not found
44
+ def extract_internal_file(filename)
45
+ parse unless @header
46
+
47
+ file_entry = @header.find_file(filename)
48
+ return nil unless file_entry
49
+
50
+ # Use file_offset if available (B+ tree format), otherwise fall back to starting_block
51
+ if file_entry[:file_offset]
52
+ file_offset = file_entry[:file_offset]
53
+ else
54
+ # Calculate file offset from starting block (WinHelp 3.x format)
55
+ # Block size is typically 4096 bytes
56
+ block_size = 4096
57
+ file_offset = file_entry[:starting_block] * block_size
58
+ end
59
+
60
+ # Open the WinHelp file and seek to file data
61
+ handle = @io_system.open(@filename, Constants::MODE_READ)
62
+ begin
63
+ @io_system.seek(handle, file_offset, Constants::SEEK_START)
64
+ @io_system.read(handle, file_entry[:file_size])
65
+ ensure
66
+ @io_system.close(handle)
67
+ end
68
+ end
69
+
70
+ # Extract |SYSTEM file data
71
+ #
72
+ # @return [String, nil] System file data
73
+ def extract_system_file
74
+ extract_internal_file("|SYSTEM")
75
+ end
76
+
77
+ # Extract |TOPIC file data
78
+ #
79
+ # @return [String, nil] Topic file data (compressed)
80
+ def extract_topic_file
81
+ extract_internal_file("|TOPIC")
82
+ end
83
+
84
+ # Decompress topic data using Zeck LZ77
85
+ #
86
+ # @param compressed_data [String] Compressed topic data
87
+ # @param output_size [Integer] Expected decompressed size
88
+ # @return [String] Decompressed topic text
89
+ def decompress_topic(compressed_data, output_size)
90
+ @zeck.decompress(compressed_data, output_size)
91
+ end
92
+
93
+ # Extract all topics from |TOPIC file
94
+ #
95
+ # This is a simplified implementation that returns raw topic data.
96
+ # Full implementation would parse topic headers and extract individual topics.
97
+ #
98
+ # @return [Array<Hash>] Array of topic hashes with :data key
99
+ def extract_topics
100
+ parse unless @header
101
+
102
+ topic_data = extract_topic_file
103
+ return [] unless topic_data
104
+
105
+ # For now, return the raw topic data
106
+ # Full implementation would parse topic block headers
107
+ [{
108
+ index: 0,
109
+ data: topic_data,
110
+ compressed: true,
111
+ }]
112
+ end
113
+
114
+ # Extract all files to a directory
115
+ #
116
+ # @param output_dir [String] Output directory path
117
+ # @return [Integer] Number of files extracted
118
+ def extract_all(output_dir)
119
+ parse unless @header
120
+
121
+ FileUtils.mkdir_p(output_dir)
122
+
123
+ count = 0
124
+ @header.internal_files.each do |file_entry|
125
+ data = extract_internal_file(file_entry[:filename])
126
+ next unless data
127
+
128
+ # Sanitize filename for file system
129
+ safe_name = sanitize_filename(file_entry[:filename])
130
+ output_path = File.join(output_dir, safe_name)
131
+
132
+ File.binwrite(output_path, data)
133
+ count += 1
134
+ end
135
+
136
+ count
137
+ end
138
+
139
+ # Sanitize filename for file system
140
+ #
141
+ # @param filename [String] Internal filename
142
+ # @return [String] Safe filename
143
+ def sanitize_filename(filename)
144
+ # Encode to ASCII, replacing non-ASCII and control characters with _
145
+ sanitized = filename.encode("ASCII", invalid: :replace,
146
+ undef: :replace, replace: "_")
147
+
148
+ # Replace | with _pipe_ (after encoding to handle | correctly)
149
+ sanitized = sanitized.gsub("|", "_pipe_")
150
+
151
+ # Replace remaining invalid filename characters with _
152
+ sanitized = sanitized.gsub(/[\/\\:<>"|?*]/, "_")
153
+
154
+ # Replace multiple consecutive underscores with single underscore
155
+ sanitized = sanitized.squeeze("_")
156
+
157
+ # Remove leading/trailing underscores
158
+ sanitized = sanitized.gsub(/^_+|_+$/, "")
159
+
160
+ # Use default name if empty
161
+ sanitized = "_unnamed_file_" if sanitized.empty?
162
+
163
+ sanitized
164
+ end
165
+
166
+ # Get list of internal filenames
167
+ #
168
+ # @return [Array<String>] Internal file names
169
+ def internal_filenames
170
+ parse unless @header
171
+ @header.internal_filenames
172
+ end
173
+
174
+ # Check if |SYSTEM file exists
175
+ #
176
+ # @return [Boolean] true if |SYSTEM present
177
+ def has_system_file?
178
+ parse unless @header
179
+ @header.has_system_file?
180
+ end
181
+
182
+ # Check if |TOPIC file exists
183
+ #
184
+ # @return [Boolean] true if |TOPIC present
185
+ def has_topic_file?
186
+ parse unless @header
187
+ @header.has_topic_file?
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,484 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../binary/hlp_structures"
4
+ require_relative "../../models/winhelp_header"
5
+ require_relative "../../errors"
6
+ require_relative "../../system/io_system"
7
+ require_relative "../../constants"
8
+
9
+ module Cabriolet
10
+ module HLP
11
+ module WinHelp
12
+ # Parser for Windows Help (WinHelp) files
13
+ #
14
+ # Parses Windows Help files (3.x and 4.x formats) used in Windows 3.0+
15
+ # through Windows XP.
16
+ #
17
+ # Format structure:
18
+ # - File Header (28 or 32 bytes depending on version)
19
+ # - Internal File Directory
20
+ # - File Data Blocks (|SYSTEM, |TOPIC, etc.)
21
+ #
22
+ # Magic numbers:
23
+ # - WinHelp 3.x (16-bit): 0x35F3
24
+ # - WinHelp 4.x (32-bit): 0x3F5F0000 (varies)
25
+ class Parser
26
+ attr_reader :io_system
27
+
28
+ # Initialize parser
29
+ #
30
+ # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
31
+ def initialize(io_system = nil)
32
+ @io_system = io_system || System::IOSystem.new
33
+ end
34
+
35
+ # Parse a WinHelp file
36
+ #
37
+ # @param filename [String] Path to WinHelp file
38
+ # @return [Models::WinHelpHeader] Parsed header with metadata
39
+ # @raise [Cabriolet::ParseError] if file is not valid WinHelp
40
+ def parse(filename)
41
+ handle = @io_system.open(filename, Constants::MODE_READ)
42
+
43
+ begin
44
+ header = parse_file(handle)
45
+ header.filename = filename
46
+ header
47
+ ensure
48
+ @io_system.close(handle)
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ # Parse complete WinHelp file structure
55
+ #
56
+ # @param handle [System::FileHandle] Open file handle
57
+ # @return [Models::WinHelpHeader] Parsed header
58
+ # @raise [Cabriolet::ParseError] if parsing fails
59
+ def parse_file(handle)
60
+ # Detect version and parse appropriate header
61
+ version = detect_version(handle)
62
+
63
+ case version
64
+ when :winhelp3
65
+ parse_winhelp3(handle)
66
+ when :winhelp4
67
+ parse_winhelp4(handle)
68
+ else
69
+ raise Cabriolet::ParseError, "Unknown WinHelp version"
70
+ end
71
+ end
72
+
73
+ # Detect WinHelp version from magic number
74
+ #
75
+ # @param handle [System::FileHandle] Open file handle
76
+ # @return [Symbol] :winhelp3 or :winhelp4
77
+ # @raise [Cabriolet::ParseError] if magic number is invalid
78
+ def detect_version(handle)
79
+ @io_system.seek(handle, 0, Constants::SEEK_START)
80
+ magic_data = @io_system.read(handle, 4)
81
+
82
+ if magic_data.nil? || magic_data.bytesize < 4
83
+ raise Cabriolet::ParseError,
84
+ "File too small for WinHelp header"
85
+ end
86
+
87
+ # Check for WinHelp 3.x (little-endian 16-bit magic: 0x35F3)
88
+ magic_word = magic_data[0..1].unpack1("v")
89
+ return :winhelp3 if magic_word == 0x35F3
90
+
91
+ # Check for WinHelp 4.x (little-endian 32-bit magic, low 16 bits: 0x5F3F or 0x3F5F)
92
+ magic_dword = magic_data.unpack1("V")
93
+ return :winhelp4 if (magic_dword & 0xFFFF) == 0x5F3F || (magic_dword & 0xFFFF) == 0x3F5F
94
+
95
+ raise Cabriolet::ParseError,
96
+ "Unknown WinHelp magic: 0x#{magic_dword.to_s(16).upcase}"
97
+ end
98
+
99
+ # Parse WinHelp 3.x file
100
+ #
101
+ # @param handle [System::FileHandle] Open file handle
102
+ # @return [Models::WinHelpHeader] Parsed header
103
+ def parse_winhelp3(handle)
104
+ @io_system.seek(handle, 0, Constants::SEEK_START)
105
+ header_data = @io_system.read(handle, 28)
106
+
107
+ if header_data.bytesize < 28
108
+ raise Cabriolet::ParseError,
109
+ "File too small for WinHelp 3.x header"
110
+ end
111
+
112
+ binary_header = Binary::HLPStructures::WinHelp3Header.read(header_data)
113
+
114
+ # Validate magic
115
+ unless binary_header.magic == 0x35F3
116
+ raise Cabriolet::ParseError,
117
+ "Invalid WinHelp 3.x magic: 0x#{binary_header.magic.to_i.to_s(16)}"
118
+ end
119
+
120
+ # Create header model
121
+ header = Models::WinHelpHeader.new(
122
+ version: :winhelp3,
123
+ magic: binary_header.magic,
124
+ directory_offset: binary_header.directory_offset,
125
+ free_list_offset: binary_header.free_list_offset,
126
+ file_size: binary_header.file_size,
127
+ )
128
+
129
+ # Parse directory (WinHelp 3.x format: variable-length entries)
130
+ parse_directory_winhelp3(handle, header)
131
+
132
+ header
133
+ end
134
+
135
+ # Parse WinHelp 4.x file
136
+ #
137
+ # @param handle [System::FileHandle] Open file handle
138
+ # @return [Models::WinHelpHeader] Parsed header
139
+ def parse_winhelp4(handle)
140
+ @io_system.seek(handle, 0, Constants::SEEK_START)
141
+ header_data = @io_system.read(handle, 32)
142
+
143
+ if header_data.bytesize < 32
144
+ raise Cabriolet::ParseError,
145
+ "File too small for WinHelp 4.x header"
146
+ end
147
+
148
+ binary_header = Binary::HLPStructures::WinHelp4Header.read(header_data)
149
+
150
+ # Validate magic (lower 16 bits should be 0x5F3F or 0x3F5F)
151
+ magic_val = binary_header.magic.respond_to?(:to_i) ? binary_header.magic.to_i : binary_header.magic
152
+ unless (magic_val & 0xFFFF) == 0x5F3F || (magic_val & 0xFFFF) == 0x3F5F
153
+ raise Cabriolet::ParseError,
154
+ "Invalid WinHelp 4.x magic: 0x#{magic_val.to_s(16)}"
155
+ end
156
+
157
+ # Determine if directory_offset needs +2 adjustment
158
+ # The BinData structure reads 4 bytes for magic, but the actual format has:
159
+ # - 2 bytes: magic (0x5F3F)
160
+ # - 2 bytes: version/flags
161
+ # - 4 bytes: directory_offset
162
+ #
163
+ # If the version field (bytes 2-3) has a non-zero high byte, it's a 2-byte magic format
164
+ # and directory_offset needs +2 adjustment. If version is small (< 256),
165
+ # it's likely a 4-byte magic format where directory_offset is already correct.
166
+ version_bytes = (magic_val >> 16) & 0xFFFF
167
+ needs_offset_adjustment = version_bytes > 255
168
+
169
+ # Create header model
170
+ header = Models::WinHelpHeader.new(
171
+ version: :winhelp4,
172
+ magic: binary_header.magic,
173
+ directory_offset: needs_offset_adjustment ? binary_header.directory_offset + 2 : binary_header.directory_offset,
174
+ free_list_offset: binary_header.free_list_offset,
175
+ file_size: binary_header.file_size,
176
+ )
177
+
178
+ # Parse directory (WinHelp 4.x format: fixed 12-byte entries)
179
+ parse_directory_winhelp4(handle, header)
180
+
181
+ header
182
+ end
183
+
184
+ # Parse WinHelp 3.x internal file directory
185
+ #
186
+ # WinHelp 3.x directory structure:
187
+ # - Directory starts at directory_offset
188
+ # - Each entry is variable length:
189
+ # - 4 bytes: file size
190
+ # - 2 bytes: starting block number
191
+ # - Null-terminated filename (padded to even length)
192
+ # - End of directory marked by zero size
193
+ #
194
+ # @param handle [System::FileHandle] Open file handle
195
+ # @param header [Models::WinHelpHeader] Header to populate
196
+ def parse_directory_winhelp3(handle, header)
197
+ return if header.directory_offset.zero?
198
+
199
+ dir_start = header.directory_offset
200
+ @io_system.seek(handle, dir_start, Constants::SEEK_START)
201
+
202
+ header.internal_files = []
203
+
204
+ # Read variable-length directory entries
205
+ loop do
206
+ # Read file size (4 bytes)
207
+ size_data = @io_system.read(handle, 4)
208
+ break if size_data.nil? || size_data.bytesize < 4
209
+
210
+ file_size = size_data.unpack1("V")
211
+
212
+ # End of directory marker
213
+ break if file_size.zero?
214
+
215
+ # Read starting block (2 bytes)
216
+ block_data = @io_system.read(handle, 2)
217
+ break if block_data.nil? || block_data.bytesize < 2
218
+
219
+ starting_block = block_data.unpack1("v")
220
+
221
+ # Read filename (null-terminated, padded to even)
222
+ filename = +""
223
+ loop do
224
+ byte_data = @io_system.read(handle, 1)
225
+ break if byte_data.nil? || byte_data.empty?
226
+
227
+ byte = byte_data.getbyte(0)
228
+ break if byte.zero?
229
+
230
+ filename << byte.chr
231
+ end
232
+
233
+ # Align to even boundary
234
+ align_read(handle)
235
+
236
+ # Skip empty filenames
237
+ next if filename.empty?
238
+
239
+ header.internal_files << {
240
+ filename: filename,
241
+ file_size: file_size,
242
+ starting_block: starting_block,
243
+ }
244
+ end
245
+ end
246
+
247
+ # Parse WinHelp 4.x internal file directory using B+ tree
248
+ #
249
+ # WinHelp 4.x directory structure:
250
+ # - FILEHEADER at directory_offset
251
+ # - BTREEHEADER immediately after FILEHEADER
252
+ # - B+ tree pages containing filename -> file_offset mappings
253
+ #
254
+ # @param handle [System::FileHandle] Open file handle
255
+ # @param header [Models::WinHelpHeader] Header to populate
256
+ # @raise [ParseError] if directory is invalid
257
+ def parse_directory_winhelp4(handle, header)
258
+ return if header.directory_offset.zero?
259
+
260
+ # Seek to directory and read FILEHEADER
261
+ @io_system.seek(handle, header.directory_offset, Constants::SEEK_START)
262
+ file_header_data = @io_system.read(handle, 9) # FILEHEADER is 9 bytes
263
+
264
+ if file_header_data.nil? || file_header_data.bytesize < 9
265
+ raise Cabriolet::ParseError,
266
+ "Failed to read FILEHEADER"
267
+ end
268
+
269
+ # Read BTREEHEADER (38 bytes according to helpdeco)
270
+ btree_header_data = @io_system.read(handle, 38) # BTREEHEADER is 38 bytes
271
+
272
+ if btree_header_data.nil? || btree_header_data.bytesize < 38
273
+ raise Cabriolet::ParseError,
274
+ "Failed to read BTREEHEADER"
275
+ end
276
+
277
+ btree_header = Binary::HLPStructures::WinHelpBTreeHeader.read(btree_header_data)
278
+
279
+ # Validate B+ tree magic
280
+ unless btree_header.magic == 0x293B
281
+ raise Cabriolet::ParseError,
282
+ "Invalid B+ tree magic: 0x#{btree_header.magic.to_i.to_s(16)}"
283
+ end
284
+
285
+ # Store first page offset (where B+ tree pages start)
286
+ first_page_offset = @io_system.tell(handle)
287
+
288
+ # Parse all files from B+ tree
289
+ header.internal_files = []
290
+ parse_btree_files(handle, header, btree_header, first_page_offset)
291
+ end
292
+
293
+ # Parse all files from WinHelp B+ tree
294
+ #
295
+ # @param handle [System::FileHandle] Open file handle
296
+ # @param header [Models::WinHelpHeader] Header to populate
297
+ # @param btree_header [Binary::HLPStructures::WinHelpBTreeHeader] B+ tree header
298
+ # @param first_page_offset [Integer] Offset of first B+ tree page
299
+ def parse_btree_files(handle, header, btree_header, first_page_offset)
300
+ return unless btree_header.total_btree_entries.positive?
301
+
302
+ # Start at root page and traverse to first leaf page
303
+ current_page = btree_header.root_page
304
+
305
+ # If we have multiple levels, traverse down index pages to find first leaf page
306
+ if btree_header.n_levels > 1
307
+ (btree_header.n_levels - 1).times do
308
+ # Seek to index page
309
+ page_offset = first_page_offset + (current_page * btree_header.page_size)
310
+ @io_system.seek(handle, page_offset, Constants::SEEK_START)
311
+
312
+ # Read index header
313
+ index_header_data = @io_system.read(handle, 6)
314
+ break if index_header_data.nil? || index_header_data.bytesize < 6
315
+
316
+ # For index pages, the first page is always 0 (leftmost child)
317
+ # The index header is followed by entries: (filename, page_number)
318
+ # We want the leftmost (smallest filename), so we take the first entry's page
319
+ current_page = read_first_page_from_index(handle,
320
+ index_header_data)
321
+ break if current_page.nil?
322
+ end
323
+ end
324
+
325
+ # Now read all leaf pages
326
+ loop do
327
+ # Seek to leaf page
328
+ page_offset = first_page_offset + (current_page * btree_header.page_size)
329
+ @io_system.seek(handle, page_offset, Constants::SEEK_START)
330
+
331
+ # Read leaf node header
332
+ leaf_header_data = @io_system.read(handle, 8)
333
+ break if leaf_header_data.nil? || leaf_header_data.bytesize < 8
334
+
335
+ leaf_header = Binary::HLPStructures::WinHelpBTreeNodeHeader.read(leaf_header_data)
336
+
337
+ # Read all entries in this leaf page
338
+ leaf_header.n_entries.times do
339
+ # Read null-terminated filename
340
+ filename = read_cstring(handle)
341
+ break if filename.nil?
342
+
343
+ # Read file offset (4-byte LE value)
344
+ offset_data = @io_system.read(handle, 4)
345
+ break if offset_data.nil? || offset_data.bytesize < 4
346
+
347
+ file_offset = offset_data.unpack1("V")
348
+
349
+ # Skip empty filenames
350
+ next if filename.empty?
351
+
352
+ # Read FILEHEADER at file_offset to get file size
353
+ # This will seek away, so save current position first
354
+ current_position = @io_system.tell(handle)
355
+ file_size = read_file_size(handle, file_offset)
356
+ @io_system.seek(handle, current_position, Constants::SEEK_START)
357
+
358
+ header.internal_files << {
359
+ filename: filename,
360
+ file_size: file_size,
361
+ file_offset: file_offset, # Store actual offset, not block number
362
+ }
363
+ end
364
+
365
+ # Move to next leaf page or exit
366
+ break if leaf_header.next_page == -1
367
+
368
+ current_page = leaf_header.next_page
369
+ end
370
+ end
371
+
372
+ # Read first page number from index page
373
+ #
374
+ # @param handle [System::FileHandle] Open file handle
375
+ # @param index_header_data [String] Index header data (6 bytes)
376
+ # @return [Integer, nil] First page number or nil on error
377
+ def read_first_page_from_index(handle, _index_header_data)
378
+ # For index pages, we want the leftmost (smallest filename)
379
+ # The index header is followed by entries: (filename, page_number)
380
+ # We read the first filename and then the page number
381
+ filename = read_cstring(handle)
382
+ return nil if filename.nil?
383
+
384
+ # Read page number (2-byte LE)
385
+ page_data = @io_system.read(handle, 2)
386
+ return nil if page_data.nil? || page_data.bytesize < 2
387
+
388
+ page_data.unpack1("v")
389
+ end
390
+
391
+ # Read file size from FILEHEADER at given offset
392
+ #
393
+ # @param handle [System::FileHandle] Open file handle
394
+ # @param file_offset [Integer] Offset of FILEHEADER
395
+ # @return [Integer] File size (UsedSpace from FILEHEADER)
396
+ def read_file_size(handle, file_offset)
397
+ # Seek to FILEHEADER
398
+ @io_system.seek(handle, file_offset, Constants::SEEK_START)
399
+
400
+ # Read FILEHEADER (9 bytes)
401
+ file_header_data = @io_system.read(handle, 9)
402
+ return 0 if file_header_data.nil? || file_header_data.bytesize < 9
403
+
404
+ file_header = Binary::HLPStructures::WinHelpFileHeader.read(file_header_data)
405
+
406
+ # Return UsedSpace (the actual file size)
407
+ file_header.used_space
408
+ end
409
+
410
+ # Parse variable-length directory entries (WinHelp 3.x style)
411
+ def parse_directory_variable(handle, header)
412
+ loop do
413
+ # Read file size (4 bytes)
414
+ size_data = @io_system.read(handle, 4)
415
+ break if size_data.nil? || size_data.bytesize < 4
416
+
417
+ file_size = size_data.unpack1("V")
418
+
419
+ # End of directory marker
420
+ break if file_size.zero?
421
+
422
+ # Read starting block (2 bytes)
423
+ block_data = @io_system.read(handle, 2)
424
+ break if block_data.nil? || block_data.bytesize < 2
425
+
426
+ starting_block = block_data.unpack1("v")
427
+
428
+ # Read filename (null-terminated, padded to even)
429
+ filename = +""
430
+ loop do
431
+ byte_data = @io_system.read(handle, 1)
432
+ break if byte_data.nil? || byte_data.empty?
433
+
434
+ byte = byte_data.getbyte(0)
435
+ break if byte.zero?
436
+
437
+ filename << byte.chr
438
+ end
439
+
440
+ # Align to even boundary
441
+ align_read(handle)
442
+
443
+ # Skip empty filenames
444
+ next if filename.empty?
445
+
446
+ header.internal_files << {
447
+ filename: filename,
448
+ file_size: file_size,
449
+ starting_block: starting_block,
450
+ }
451
+ end
452
+ end
453
+
454
+ # Read null-terminated string from handle
455
+ #
456
+ # @param handle [System::FileHandle] Open file handle
457
+ # @return [String, nil] String or nil if read fails
458
+ def read_cstring(handle)
459
+ result = +""
460
+ loop do
461
+ byte_data = @io_system.read(handle, 1)
462
+ return nil if byte_data.nil? || byte_data.empty?
463
+
464
+ byte = byte_data.getbyte(0)
465
+ break if byte.zero?
466
+
467
+ result << byte.chr
468
+ end
469
+ result
470
+ end
471
+
472
+ # Align file position (skip padding after filename)
473
+ #
474
+ # @param handle [System::FileHandle] Open file handle
475
+ def align_read(handle)
476
+ # WinHelp aligns directory entries to 2-byte boundaries
477
+ pos = @io_system.tell(handle)
478
+ # If position is odd, read one byte to align
479
+ @io_system.read(handle, 1) if pos.odd?
480
+ end
481
+ end
482
+ end
483
+ end
484
+ end