cabriolet 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +700 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +154 -14
- data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +35 -43
- data/lib/cabriolet/cab/decompressor.rb +14 -19
- data/lib/cabriolet/cab/extractor.rb +140 -31
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +34 -45
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +3 -2
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +633 -38
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +67 -17
- metadata +33 -2
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "parser"
|
|
4
|
+
require_relative "zeck_lz77"
|
|
5
|
+
require_relative "../../system/io_system"
|
|
6
|
+
require_relative "../../constants"
|
|
7
|
+
|
|
8
|
+
module Cabriolet
|
|
9
|
+
module HLP
|
|
10
|
+
module WinHelp
|
|
11
|
+
# Decompressor for Windows Help files
|
|
12
|
+
#
|
|
13
|
+
# Extracts and decompresses content from WinHelp files using:
|
|
14
|
+
# - WinHelp::Parser for file structure
|
|
15
|
+
# - ZeckLZ77 for topic decompression
|
|
16
|
+
#
|
|
17
|
+
# Handles both WinHelp 3.x and 4.x formats.
|
|
18
|
+
class Decompressor
|
|
19
|
+
attr_reader :io_system, :header
|
|
20
|
+
|
|
21
|
+
# Initialize decompressor
|
|
22
|
+
#
|
|
23
|
+
# @param filename [String] Path to WinHelp file
|
|
24
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system
|
|
25
|
+
def initialize(filename, io_system = nil)
|
|
26
|
+
@filename = filename
|
|
27
|
+
@io_system = io_system || System::IOSystem.new
|
|
28
|
+
@parser = Parser.new(@io_system)
|
|
29
|
+
@zeck = ZeckLZ77.new
|
|
30
|
+
@header = nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Parse the WinHelp file structure
|
|
34
|
+
#
|
|
35
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
36
|
+
def parse
|
|
37
|
+
@header = @parser.parse(@filename)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Extract a specific internal file by name
|
|
41
|
+
#
|
|
42
|
+
# @param filename [String] Internal filename (e.g., "|SYSTEM", "|TOPIC")
|
|
43
|
+
# @return [String, nil] Raw file data or nil if not found
|
|
44
|
+
def extract_internal_file(filename)
|
|
45
|
+
parse unless @header
|
|
46
|
+
|
|
47
|
+
file_entry = @header.find_file(filename)
|
|
48
|
+
return nil unless file_entry
|
|
49
|
+
|
|
50
|
+
# Use file_offset if available (B+ tree format), otherwise fall back to starting_block
|
|
51
|
+
if file_entry[:file_offset]
|
|
52
|
+
file_offset = file_entry[:file_offset]
|
|
53
|
+
else
|
|
54
|
+
# Calculate file offset from starting block (WinHelp 3.x format)
|
|
55
|
+
# Block size is typically 4096 bytes
|
|
56
|
+
block_size = 4096
|
|
57
|
+
file_offset = file_entry[:starting_block] * block_size
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Open the WinHelp file and seek to file data
|
|
61
|
+
handle = @io_system.open(@filename, Constants::MODE_READ)
|
|
62
|
+
begin
|
|
63
|
+
@io_system.seek(handle, file_offset, Constants::SEEK_START)
|
|
64
|
+
@io_system.read(handle, file_entry[:file_size])
|
|
65
|
+
ensure
|
|
66
|
+
@io_system.close(handle)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Extract |SYSTEM file data
|
|
71
|
+
#
|
|
72
|
+
# @return [String, nil] System file data
|
|
73
|
+
def extract_system_file
|
|
74
|
+
extract_internal_file("|SYSTEM")
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Extract |TOPIC file data
|
|
78
|
+
#
|
|
79
|
+
# @return [String, nil] Topic file data (compressed)
|
|
80
|
+
def extract_topic_file
|
|
81
|
+
extract_internal_file("|TOPIC")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Decompress topic data using Zeck LZ77
|
|
85
|
+
#
|
|
86
|
+
# @param compressed_data [String] Compressed topic data
|
|
87
|
+
# @param output_size [Integer] Expected decompressed size
|
|
88
|
+
# @return [String] Decompressed topic text
|
|
89
|
+
def decompress_topic(compressed_data, output_size)
|
|
90
|
+
@zeck.decompress(compressed_data, output_size)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Extract all topics from |TOPIC file
|
|
94
|
+
#
|
|
95
|
+
# This is a simplified implementation that returns raw topic data.
|
|
96
|
+
# Full implementation would parse topic headers and extract individual topics.
|
|
97
|
+
#
|
|
98
|
+
# @return [Array<Hash>] Array of topic hashes with :data key
|
|
99
|
+
def extract_topics
|
|
100
|
+
parse unless @header
|
|
101
|
+
|
|
102
|
+
topic_data = extract_topic_file
|
|
103
|
+
return [] unless topic_data
|
|
104
|
+
|
|
105
|
+
# For now, return the raw topic data
|
|
106
|
+
# Full implementation would parse topic block headers
|
|
107
|
+
[{
|
|
108
|
+
index: 0,
|
|
109
|
+
data: topic_data,
|
|
110
|
+
compressed: true,
|
|
111
|
+
}]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Extract all files to a directory
|
|
115
|
+
#
|
|
116
|
+
# @param output_dir [String] Output directory path
|
|
117
|
+
# @return [Integer] Number of files extracted
|
|
118
|
+
def extract_all(output_dir)
|
|
119
|
+
parse unless @header
|
|
120
|
+
|
|
121
|
+
FileUtils.mkdir_p(output_dir)
|
|
122
|
+
|
|
123
|
+
count = 0
|
|
124
|
+
@header.internal_files.each do |file_entry|
|
|
125
|
+
data = extract_internal_file(file_entry[:filename])
|
|
126
|
+
next unless data
|
|
127
|
+
|
|
128
|
+
# Sanitize filename for file system
|
|
129
|
+
safe_name = sanitize_filename(file_entry[:filename])
|
|
130
|
+
output_path = File.join(output_dir, safe_name)
|
|
131
|
+
|
|
132
|
+
File.binwrite(output_path, data)
|
|
133
|
+
count += 1
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
count
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Sanitize filename for file system
|
|
140
|
+
#
|
|
141
|
+
# @param filename [String] Internal filename
|
|
142
|
+
# @return [String] Safe filename
|
|
143
|
+
def sanitize_filename(filename)
|
|
144
|
+
# Encode to ASCII, replacing non-ASCII and control characters with _
|
|
145
|
+
sanitized = filename.encode("ASCII", invalid: :replace,
|
|
146
|
+
undef: :replace, replace: "_")
|
|
147
|
+
|
|
148
|
+
# Replace | with _pipe_ (after encoding to handle | correctly)
|
|
149
|
+
sanitized = sanitized.gsub("|", "_pipe_")
|
|
150
|
+
|
|
151
|
+
# Replace remaining invalid filename characters with _
|
|
152
|
+
sanitized = sanitized.gsub(/[\/\\:<>"|?*]/, "_")
|
|
153
|
+
|
|
154
|
+
# Replace multiple consecutive underscores with single underscore
|
|
155
|
+
sanitized = sanitized.squeeze("_")
|
|
156
|
+
|
|
157
|
+
# Remove leading/trailing underscores
|
|
158
|
+
sanitized = sanitized.gsub(/^_+|_+$/, "")
|
|
159
|
+
|
|
160
|
+
# Use default name if empty
|
|
161
|
+
sanitized = "_unnamed_file_" if sanitized.empty?
|
|
162
|
+
|
|
163
|
+
sanitized
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Get list of internal filenames
|
|
167
|
+
#
|
|
168
|
+
# @return [Array<String>] Internal file names
|
|
169
|
+
def internal_filenames
|
|
170
|
+
parse unless @header
|
|
171
|
+
@header.internal_filenames
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Check if |SYSTEM file exists
|
|
175
|
+
#
|
|
176
|
+
# @return [Boolean] true if |SYSTEM present
|
|
177
|
+
def has_system_file?
|
|
178
|
+
parse unless @header
|
|
179
|
+
@header.has_system_file?
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Check if |TOPIC file exists
|
|
183
|
+
#
|
|
184
|
+
# @return [Boolean] true if |TOPIC present
|
|
185
|
+
def has_topic_file?
|
|
186
|
+
parse unless @header
|
|
187
|
+
@header.has_topic_file?
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../binary/hlp_structures"
|
|
4
|
+
require_relative "../../models/winhelp_header"
|
|
5
|
+
require_relative "../../errors"
|
|
6
|
+
require_relative "../../system/io_system"
|
|
7
|
+
require_relative "../../constants"
|
|
8
|
+
|
|
9
|
+
module Cabriolet
|
|
10
|
+
module HLP
|
|
11
|
+
module WinHelp
|
|
12
|
+
# Parser for Windows Help (WinHelp) files
|
|
13
|
+
#
|
|
14
|
+
# Parses Windows Help files (3.x and 4.x formats) used in Windows 3.0+
|
|
15
|
+
# through Windows XP.
|
|
16
|
+
#
|
|
17
|
+
# Format structure:
|
|
18
|
+
# - File Header (28 or 32 bytes depending on version)
|
|
19
|
+
# - Internal File Directory
|
|
20
|
+
# - File Data Blocks (|SYSTEM, |TOPIC, etc.)
|
|
21
|
+
#
|
|
22
|
+
# Magic numbers:
|
|
23
|
+
# - WinHelp 3.x (16-bit): 0x35F3
|
|
24
|
+
# - WinHelp 4.x (32-bit): 0x3F5F0000 (varies)
|
|
25
|
+
class Parser
|
|
26
|
+
attr_reader :io_system
|
|
27
|
+
|
|
28
|
+
# Initialize parser
|
|
29
|
+
#
|
|
30
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
|
|
31
|
+
def initialize(io_system = nil)
|
|
32
|
+
@io_system = io_system || System::IOSystem.new
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Parse a WinHelp file
|
|
36
|
+
#
|
|
37
|
+
# @param filename [String] Path to WinHelp file
|
|
38
|
+
# @return [Models::WinHelpHeader] Parsed header with metadata
|
|
39
|
+
# @raise [Cabriolet::ParseError] if file is not valid WinHelp
|
|
40
|
+
def parse(filename)
|
|
41
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
42
|
+
|
|
43
|
+
begin
|
|
44
|
+
header = parse_file(handle)
|
|
45
|
+
header.filename = filename
|
|
46
|
+
header
|
|
47
|
+
ensure
|
|
48
|
+
@io_system.close(handle)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Parse complete WinHelp file structure
|
|
55
|
+
#
|
|
56
|
+
# @param handle [System::FileHandle] Open file handle
|
|
57
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
58
|
+
# @raise [Cabriolet::ParseError] if parsing fails
|
|
59
|
+
def parse_file(handle)
|
|
60
|
+
# Detect version and parse appropriate header
|
|
61
|
+
version = detect_version(handle)
|
|
62
|
+
|
|
63
|
+
case version
|
|
64
|
+
when :winhelp3
|
|
65
|
+
parse_winhelp3(handle)
|
|
66
|
+
when :winhelp4
|
|
67
|
+
parse_winhelp4(handle)
|
|
68
|
+
else
|
|
69
|
+
raise Cabriolet::ParseError, "Unknown WinHelp version"
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Detect WinHelp version from magic number
|
|
74
|
+
#
|
|
75
|
+
# @param handle [System::FileHandle] Open file handle
|
|
76
|
+
# @return [Symbol] :winhelp3 or :winhelp4
|
|
77
|
+
# @raise [Cabriolet::ParseError] if magic number is invalid
|
|
78
|
+
def detect_version(handle)
|
|
79
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
80
|
+
magic_data = @io_system.read(handle, 4)
|
|
81
|
+
|
|
82
|
+
if magic_data.nil? || magic_data.bytesize < 4
|
|
83
|
+
raise Cabriolet::ParseError,
|
|
84
|
+
"File too small for WinHelp header"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Check for WinHelp 3.x (little-endian 16-bit magic: 0x35F3)
|
|
88
|
+
magic_word = magic_data[0..1].unpack1("v")
|
|
89
|
+
return :winhelp3 if magic_word == 0x35F3
|
|
90
|
+
|
|
91
|
+
# Check for WinHelp 4.x (little-endian 32-bit magic, low 16 bits: 0x5F3F or 0x3F5F)
|
|
92
|
+
magic_dword = magic_data.unpack1("V")
|
|
93
|
+
return :winhelp4 if (magic_dword & 0xFFFF) == 0x5F3F || (magic_dword & 0xFFFF) == 0x3F5F
|
|
94
|
+
|
|
95
|
+
raise Cabriolet::ParseError,
|
|
96
|
+
"Unknown WinHelp magic: 0x#{magic_dword.to_s(16).upcase}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Parse WinHelp 3.x file
|
|
100
|
+
#
|
|
101
|
+
# @param handle [System::FileHandle] Open file handle
|
|
102
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
103
|
+
def parse_winhelp3(handle)
|
|
104
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
105
|
+
header_data = @io_system.read(handle, 28)
|
|
106
|
+
|
|
107
|
+
if header_data.bytesize < 28
|
|
108
|
+
raise Cabriolet::ParseError,
|
|
109
|
+
"File too small for WinHelp 3.x header"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
binary_header = Binary::HLPStructures::WinHelp3Header.read(header_data)
|
|
113
|
+
|
|
114
|
+
# Validate magic
|
|
115
|
+
unless binary_header.magic == 0x35F3
|
|
116
|
+
raise Cabriolet::ParseError,
|
|
117
|
+
"Invalid WinHelp 3.x magic: 0x#{binary_header.magic.to_i.to_s(16)}"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Create header model
|
|
121
|
+
header = Models::WinHelpHeader.new(
|
|
122
|
+
version: :winhelp3,
|
|
123
|
+
magic: binary_header.magic,
|
|
124
|
+
directory_offset: binary_header.directory_offset,
|
|
125
|
+
free_list_offset: binary_header.free_list_offset,
|
|
126
|
+
file_size: binary_header.file_size,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Parse directory (WinHelp 3.x format: variable-length entries)
|
|
130
|
+
parse_directory_winhelp3(handle, header)
|
|
131
|
+
|
|
132
|
+
header
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Parse WinHelp 4.x file
|
|
136
|
+
#
|
|
137
|
+
# @param handle [System::FileHandle] Open file handle
|
|
138
|
+
# @return [Models::WinHelpHeader] Parsed header
|
|
139
|
+
def parse_winhelp4(handle)
|
|
140
|
+
@io_system.seek(handle, 0, Constants::SEEK_START)
|
|
141
|
+
header_data = @io_system.read(handle, 32)
|
|
142
|
+
|
|
143
|
+
if header_data.bytesize < 32
|
|
144
|
+
raise Cabriolet::ParseError,
|
|
145
|
+
"File too small for WinHelp 4.x header"
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
binary_header = Binary::HLPStructures::WinHelp4Header.read(header_data)
|
|
149
|
+
|
|
150
|
+
# Validate magic (lower 16 bits should be 0x5F3F or 0x3F5F)
|
|
151
|
+
magic_val = binary_header.magic.respond_to?(:to_i) ? binary_header.magic.to_i : binary_header.magic
|
|
152
|
+
unless (magic_val & 0xFFFF) == 0x5F3F || (magic_val & 0xFFFF) == 0x3F5F
|
|
153
|
+
raise Cabriolet::ParseError,
|
|
154
|
+
"Invalid WinHelp 4.x magic: 0x#{magic_val.to_s(16)}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Determine if directory_offset needs +2 adjustment
|
|
158
|
+
# The BinData structure reads 4 bytes for magic, but the actual format has:
|
|
159
|
+
# - 2 bytes: magic (0x5F3F)
|
|
160
|
+
# - 2 bytes: version/flags
|
|
161
|
+
# - 4 bytes: directory_offset
|
|
162
|
+
#
|
|
163
|
+
# If the version field (bytes 2-3) has a non-zero high byte, it's a 2-byte magic format
|
|
164
|
+
# and directory_offset needs +2 adjustment. If version is small (< 256),
|
|
165
|
+
# it's likely a 4-byte magic format where directory_offset is already correct.
|
|
166
|
+
version_bytes = (magic_val >> 16) & 0xFFFF
|
|
167
|
+
needs_offset_adjustment = version_bytes > 255
|
|
168
|
+
|
|
169
|
+
# Create header model
|
|
170
|
+
header = Models::WinHelpHeader.new(
|
|
171
|
+
version: :winhelp4,
|
|
172
|
+
magic: binary_header.magic,
|
|
173
|
+
directory_offset: needs_offset_adjustment ? binary_header.directory_offset + 2 : binary_header.directory_offset,
|
|
174
|
+
free_list_offset: binary_header.free_list_offset,
|
|
175
|
+
file_size: binary_header.file_size,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Parse directory (WinHelp 4.x format: fixed 12-byte entries)
|
|
179
|
+
parse_directory_winhelp4(handle, header)
|
|
180
|
+
|
|
181
|
+
header
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Parse WinHelp 3.x internal file directory
|
|
185
|
+
#
|
|
186
|
+
# WinHelp 3.x directory structure:
|
|
187
|
+
# - Directory starts at directory_offset
|
|
188
|
+
# - Each entry is variable length:
|
|
189
|
+
# - 4 bytes: file size
|
|
190
|
+
# - 2 bytes: starting block number
|
|
191
|
+
# - Null-terminated filename (padded to even length)
|
|
192
|
+
# - End of directory marked by zero size
|
|
193
|
+
#
|
|
194
|
+
# @param handle [System::FileHandle] Open file handle
|
|
195
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
196
|
+
def parse_directory_winhelp3(handle, header)
|
|
197
|
+
return if header.directory_offset.zero?
|
|
198
|
+
|
|
199
|
+
dir_start = header.directory_offset
|
|
200
|
+
@io_system.seek(handle, dir_start, Constants::SEEK_START)
|
|
201
|
+
|
|
202
|
+
header.internal_files = []
|
|
203
|
+
|
|
204
|
+
# Read variable-length directory entries
|
|
205
|
+
loop do
|
|
206
|
+
# Read file size (4 bytes)
|
|
207
|
+
size_data = @io_system.read(handle, 4)
|
|
208
|
+
break if size_data.nil? || size_data.bytesize < 4
|
|
209
|
+
|
|
210
|
+
file_size = size_data.unpack1("V")
|
|
211
|
+
|
|
212
|
+
# End of directory marker
|
|
213
|
+
break if file_size.zero?
|
|
214
|
+
|
|
215
|
+
# Read starting block (2 bytes)
|
|
216
|
+
block_data = @io_system.read(handle, 2)
|
|
217
|
+
break if block_data.nil? || block_data.bytesize < 2
|
|
218
|
+
|
|
219
|
+
starting_block = block_data.unpack1("v")
|
|
220
|
+
|
|
221
|
+
# Read filename (null-terminated, padded to even)
|
|
222
|
+
filename = +""
|
|
223
|
+
loop do
|
|
224
|
+
byte_data = @io_system.read(handle, 1)
|
|
225
|
+
break if byte_data.nil? || byte_data.empty?
|
|
226
|
+
|
|
227
|
+
byte = byte_data.getbyte(0)
|
|
228
|
+
break if byte.zero?
|
|
229
|
+
|
|
230
|
+
filename << byte.chr
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Align to even boundary
|
|
234
|
+
align_read(handle)
|
|
235
|
+
|
|
236
|
+
# Skip empty filenames
|
|
237
|
+
next if filename.empty?
|
|
238
|
+
|
|
239
|
+
header.internal_files << {
|
|
240
|
+
filename: filename,
|
|
241
|
+
file_size: file_size,
|
|
242
|
+
starting_block: starting_block,
|
|
243
|
+
}
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Parse WinHelp 4.x internal file directory using B+ tree
|
|
248
|
+
#
|
|
249
|
+
# WinHelp 4.x directory structure:
|
|
250
|
+
# - FILEHEADER at directory_offset
|
|
251
|
+
# - BTREEHEADER immediately after FILEHEADER
|
|
252
|
+
# - B+ tree pages containing filename -> file_offset mappings
|
|
253
|
+
#
|
|
254
|
+
# @param handle [System::FileHandle] Open file handle
|
|
255
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
256
|
+
# @raise [ParseError] if directory is invalid
|
|
257
|
+
def parse_directory_winhelp4(handle, header)
|
|
258
|
+
return if header.directory_offset.zero?
|
|
259
|
+
|
|
260
|
+
# Seek to directory and read FILEHEADER
|
|
261
|
+
@io_system.seek(handle, header.directory_offset, Constants::SEEK_START)
|
|
262
|
+
file_header_data = @io_system.read(handle, 9) # FILEHEADER is 9 bytes
|
|
263
|
+
|
|
264
|
+
if file_header_data.nil? || file_header_data.bytesize < 9
|
|
265
|
+
raise Cabriolet::ParseError,
|
|
266
|
+
"Failed to read FILEHEADER"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Read BTREEHEADER (38 bytes according to helpdeco)
|
|
270
|
+
btree_header_data = @io_system.read(handle, 38) # BTREEHEADER is 38 bytes
|
|
271
|
+
|
|
272
|
+
if btree_header_data.nil? || btree_header_data.bytesize < 38
|
|
273
|
+
raise Cabriolet::ParseError,
|
|
274
|
+
"Failed to read BTREEHEADER"
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
btree_header = Binary::HLPStructures::WinHelpBTreeHeader.read(btree_header_data)
|
|
278
|
+
|
|
279
|
+
# Validate B+ tree magic
|
|
280
|
+
unless btree_header.magic == 0x293B
|
|
281
|
+
raise Cabriolet::ParseError,
|
|
282
|
+
"Invalid B+ tree magic: 0x#{btree_header.magic.to_i.to_s(16)}"
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Store first page offset (where B+ tree pages start)
|
|
286
|
+
first_page_offset = @io_system.tell(handle)
|
|
287
|
+
|
|
288
|
+
# Parse all files from B+ tree
|
|
289
|
+
header.internal_files = []
|
|
290
|
+
parse_btree_files(handle, header, btree_header, first_page_offset)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Parse all files from WinHelp B+ tree
|
|
294
|
+
#
|
|
295
|
+
# @param handle [System::FileHandle] Open file handle
|
|
296
|
+
# @param header [Models::WinHelpHeader] Header to populate
|
|
297
|
+
# @param btree_header [Binary::HLPStructures::WinHelpBTreeHeader] B+ tree header
|
|
298
|
+
# @param first_page_offset [Integer] Offset of first B+ tree page
|
|
299
|
+
def parse_btree_files(handle, header, btree_header, first_page_offset)
|
|
300
|
+
return unless btree_header.total_btree_entries.positive?
|
|
301
|
+
|
|
302
|
+
# Start at root page and traverse to first leaf page
|
|
303
|
+
current_page = btree_header.root_page
|
|
304
|
+
|
|
305
|
+
# If we have multiple levels, traverse down index pages to find first leaf page
|
|
306
|
+
if btree_header.n_levels > 1
|
|
307
|
+
(btree_header.n_levels - 1).times do
|
|
308
|
+
# Seek to index page
|
|
309
|
+
page_offset = first_page_offset + (current_page * btree_header.page_size)
|
|
310
|
+
@io_system.seek(handle, page_offset, Constants::SEEK_START)
|
|
311
|
+
|
|
312
|
+
# Read index header
|
|
313
|
+
index_header_data = @io_system.read(handle, 6)
|
|
314
|
+
break if index_header_data.nil? || index_header_data.bytesize < 6
|
|
315
|
+
|
|
316
|
+
# For index pages, the first page is always 0 (leftmost child)
|
|
317
|
+
# The index header is followed by entries: (filename, page_number)
|
|
318
|
+
# We want the leftmost (smallest filename), so we take the first entry's page
|
|
319
|
+
current_page = read_first_page_from_index(handle,
|
|
320
|
+
index_header_data)
|
|
321
|
+
break if current_page.nil?
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Now read all leaf pages
|
|
326
|
+
loop do
|
|
327
|
+
# Seek to leaf page
|
|
328
|
+
page_offset = first_page_offset + (current_page * btree_header.page_size)
|
|
329
|
+
@io_system.seek(handle, page_offset, Constants::SEEK_START)
|
|
330
|
+
|
|
331
|
+
# Read leaf node header
|
|
332
|
+
leaf_header_data = @io_system.read(handle, 8)
|
|
333
|
+
break if leaf_header_data.nil? || leaf_header_data.bytesize < 8
|
|
334
|
+
|
|
335
|
+
leaf_header = Binary::HLPStructures::WinHelpBTreeNodeHeader.read(leaf_header_data)
|
|
336
|
+
|
|
337
|
+
# Read all entries in this leaf page
|
|
338
|
+
leaf_header.n_entries.times do
|
|
339
|
+
# Read null-terminated filename
|
|
340
|
+
filename = read_cstring(handle)
|
|
341
|
+
break if filename.nil?
|
|
342
|
+
|
|
343
|
+
# Read file offset (4-byte LE value)
|
|
344
|
+
offset_data = @io_system.read(handle, 4)
|
|
345
|
+
break if offset_data.nil? || offset_data.bytesize < 4
|
|
346
|
+
|
|
347
|
+
file_offset = offset_data.unpack1("V")
|
|
348
|
+
|
|
349
|
+
# Skip empty filenames
|
|
350
|
+
next if filename.empty?
|
|
351
|
+
|
|
352
|
+
# Read FILEHEADER at file_offset to get file size
|
|
353
|
+
# This will seek away, so save current position first
|
|
354
|
+
current_position = @io_system.tell(handle)
|
|
355
|
+
file_size = read_file_size(handle, file_offset)
|
|
356
|
+
@io_system.seek(handle, current_position, Constants::SEEK_START)
|
|
357
|
+
|
|
358
|
+
header.internal_files << {
|
|
359
|
+
filename: filename,
|
|
360
|
+
file_size: file_size,
|
|
361
|
+
file_offset: file_offset, # Store actual offset, not block number
|
|
362
|
+
}
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Move to next leaf page or exit
|
|
366
|
+
break if leaf_header.next_page == -1
|
|
367
|
+
|
|
368
|
+
current_page = leaf_header.next_page
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Read first page number from index page
|
|
373
|
+
#
|
|
374
|
+
# @param handle [System::FileHandle] Open file handle
|
|
375
|
+
# @param index_header_data [String] Index header data (6 bytes)
|
|
376
|
+
# @return [Integer, nil] First page number or nil on error
|
|
377
|
+
def read_first_page_from_index(handle, _index_header_data)
|
|
378
|
+
# For index pages, we want the leftmost (smallest filename)
|
|
379
|
+
# The index header is followed by entries: (filename, page_number)
|
|
380
|
+
# We read the first filename and then the page number
|
|
381
|
+
filename = read_cstring(handle)
|
|
382
|
+
return nil if filename.nil?
|
|
383
|
+
|
|
384
|
+
# Read page number (2-byte LE)
|
|
385
|
+
page_data = @io_system.read(handle, 2)
|
|
386
|
+
return nil if page_data.nil? || page_data.bytesize < 2
|
|
387
|
+
|
|
388
|
+
page_data.unpack1("v")
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Read file size from FILEHEADER at given offset
|
|
392
|
+
#
|
|
393
|
+
# @param handle [System::FileHandle] Open file handle
|
|
394
|
+
# @param file_offset [Integer] Offset of FILEHEADER
|
|
395
|
+
# @return [Integer] File size (UsedSpace from FILEHEADER)
|
|
396
|
+
def read_file_size(handle, file_offset)
|
|
397
|
+
# Seek to FILEHEADER
|
|
398
|
+
@io_system.seek(handle, file_offset, Constants::SEEK_START)
|
|
399
|
+
|
|
400
|
+
# Read FILEHEADER (9 bytes)
|
|
401
|
+
file_header_data = @io_system.read(handle, 9)
|
|
402
|
+
return 0 if file_header_data.nil? || file_header_data.bytesize < 9
|
|
403
|
+
|
|
404
|
+
file_header = Binary::HLPStructures::WinHelpFileHeader.read(file_header_data)
|
|
405
|
+
|
|
406
|
+
# Return UsedSpace (the actual file size)
|
|
407
|
+
file_header.used_space
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Parse variable-length directory entries (WinHelp 3.x style)
|
|
411
|
+
def parse_directory_variable(handle, header)
|
|
412
|
+
loop do
|
|
413
|
+
# Read file size (4 bytes)
|
|
414
|
+
size_data = @io_system.read(handle, 4)
|
|
415
|
+
break if size_data.nil? || size_data.bytesize < 4
|
|
416
|
+
|
|
417
|
+
file_size = size_data.unpack1("V")
|
|
418
|
+
|
|
419
|
+
# End of directory marker
|
|
420
|
+
break if file_size.zero?
|
|
421
|
+
|
|
422
|
+
# Read starting block (2 bytes)
|
|
423
|
+
block_data = @io_system.read(handle, 2)
|
|
424
|
+
break if block_data.nil? || block_data.bytesize < 2
|
|
425
|
+
|
|
426
|
+
starting_block = block_data.unpack1("v")
|
|
427
|
+
|
|
428
|
+
# Read filename (null-terminated, padded to even)
|
|
429
|
+
filename = +""
|
|
430
|
+
loop do
|
|
431
|
+
byte_data = @io_system.read(handle, 1)
|
|
432
|
+
break if byte_data.nil? || byte_data.empty?
|
|
433
|
+
|
|
434
|
+
byte = byte_data.getbyte(0)
|
|
435
|
+
break if byte.zero?
|
|
436
|
+
|
|
437
|
+
filename << byte.chr
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Align to even boundary
|
|
441
|
+
align_read(handle)
|
|
442
|
+
|
|
443
|
+
# Skip empty filenames
|
|
444
|
+
next if filename.empty?
|
|
445
|
+
|
|
446
|
+
header.internal_files << {
|
|
447
|
+
filename: filename,
|
|
448
|
+
file_size: file_size,
|
|
449
|
+
starting_block: starting_block,
|
|
450
|
+
}
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Read null-terminated string from handle
|
|
455
|
+
#
|
|
456
|
+
# @param handle [System::FileHandle] Open file handle
|
|
457
|
+
# @return [String, nil] String or nil if read fails
|
|
458
|
+
def read_cstring(handle)
|
|
459
|
+
result = +""
|
|
460
|
+
loop do
|
|
461
|
+
byte_data = @io_system.read(handle, 1)
|
|
462
|
+
return nil if byte_data.nil? || byte_data.empty?
|
|
463
|
+
|
|
464
|
+
byte = byte_data.getbyte(0)
|
|
465
|
+
break if byte.zero?
|
|
466
|
+
|
|
467
|
+
result << byte.chr
|
|
468
|
+
end
|
|
469
|
+
result
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Align file position (skip padding after filename)
|
|
473
|
+
#
|
|
474
|
+
# @param handle [System::FileHandle] Open file handle
|
|
475
|
+
def align_read(handle)
|
|
476
|
+
# WinHelp aligns directory entries to 2-byte boundaries
|
|
477
|
+
pos = @io_system.tell(handle)
|
|
478
|
+
# If position is odd, read one byte to align
|
|
479
|
+
@io_system.read(handle, 1) if pos.odd?
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
end
|