cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Writes QuickHelp files to disk
|
|
7
|
+
class FileWriter
|
|
8
|
+
# Initialize file writer
|
|
9
|
+
#
|
|
10
|
+
# @param io_system [System::IOSystem] I/O system for writing
|
|
11
|
+
def initialize(io_system)
|
|
12
|
+
@io_system = io_system
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Write complete QuickHelp file
|
|
16
|
+
#
|
|
17
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
18
|
+
# @param structure [Hash] QuickHelp structure
|
|
19
|
+
# @return [Integer] Bytes written
|
|
20
|
+
def write_quickhelp_file(output_handle, structure)
|
|
21
|
+
bytes_written = 0
|
|
22
|
+
|
|
23
|
+
# Write file header
|
|
24
|
+
bytes_written += write_file_header(output_handle, structure[:header])
|
|
25
|
+
|
|
26
|
+
# Write topic index
|
|
27
|
+
bytes_written += write_topic_index(output_handle,
|
|
28
|
+
structure[:header][:offsets])
|
|
29
|
+
|
|
30
|
+
# Write context strings
|
|
31
|
+
bytes_written += write_context_strings(output_handle,
|
|
32
|
+
structure[:contexts])
|
|
33
|
+
|
|
34
|
+
# Write context map
|
|
35
|
+
bytes_written += write_context_map(output_handle,
|
|
36
|
+
structure[:context_map])
|
|
37
|
+
|
|
38
|
+
# Write topic texts
|
|
39
|
+
bytes_written += write_topic_texts(output_handle, structure[:topics])
|
|
40
|
+
|
|
41
|
+
bytes_written
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Write file header
|
|
45
|
+
#
|
|
46
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
47
|
+
# @param header_info [Hash] Header information
|
|
48
|
+
# @return [Integer] Bytes written
|
|
49
|
+
def write_file_header(output_handle, header_info)
|
|
50
|
+
header = Binary::HLPStructures::FileHeader.new
|
|
51
|
+
header.signature = Binary::HLPStructures::SIGNATURE
|
|
52
|
+
header.version = header_info[:version]
|
|
53
|
+
header.attributes = header_info[:attributes]
|
|
54
|
+
header.control_character = header_info[:control_character]
|
|
55
|
+
header.padding1 = 0
|
|
56
|
+
header.topic_count = header_info[:topic_count]
|
|
57
|
+
header.context_count = header_info[:context_count]
|
|
58
|
+
header.display_width = header_info[:display_width]
|
|
59
|
+
header.padding2 = 0
|
|
60
|
+
header.predefined_ctx_count = header_info[:predefined_ctx_count]
|
|
61
|
+
header.database_name = header_info[:database_name]
|
|
62
|
+
header.reserved1 = 0
|
|
63
|
+
header.topic_index_offset = header_info[:offsets][:topic_index]
|
|
64
|
+
header.context_strings_offset = header_info[:offsets][:context_strings]
|
|
65
|
+
header.context_map_offset = header_info[:offsets][:context_map]
|
|
66
|
+
header.keywords_offset = header_info[:offsets][:keywords]
|
|
67
|
+
header.huffman_tree_offset = header_info[:offsets][:huffman_tree]
|
|
68
|
+
header.topic_text_offset = header_info[:offsets][:topic_text]
|
|
69
|
+
header.reserved2 = 0
|
|
70
|
+
header.reserved3 = 0
|
|
71
|
+
header.database_size = header_info[:offsets][:database_size]
|
|
72
|
+
|
|
73
|
+
header_data = header.to_binary_s
|
|
74
|
+
@io_system.write(output_handle, header_data)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Write topic index
|
|
78
|
+
#
|
|
79
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
80
|
+
# @param offsets [Hash] Offset information
|
|
81
|
+
# @return [Integer] Bytes written
|
|
82
|
+
def write_topic_index(output_handle, offsets)
|
|
83
|
+
# Write all topic offsets including end marker
|
|
84
|
+
index_data = offsets[:topic_offsets].pack("V*")
|
|
85
|
+
@io_system.write(output_handle, index_data)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Write context strings
|
|
89
|
+
#
|
|
90
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
91
|
+
# @param contexts [Array<String>] Context strings
|
|
92
|
+
# @return [Integer] Bytes written
|
|
93
|
+
def write_context_strings(output_handle, contexts)
|
|
94
|
+
data = contexts.map { |ctx| "#{ctx}\u0000" }.join
|
|
95
|
+
@io_system.write(output_handle, data)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Write context map
|
|
99
|
+
#
|
|
100
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
101
|
+
# @param context_map [Array<Integer>] Topic indices
|
|
102
|
+
# @return [Integer] Bytes written
|
|
103
|
+
def write_context_map(output_handle, context_map)
|
|
104
|
+
map_data = context_map.pack("v*")
|
|
105
|
+
@io_system.write(output_handle, map_data)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Write topic texts
|
|
109
|
+
#
|
|
110
|
+
# @param output_handle [System::FileHandle] Output file handle
|
|
111
|
+
# @param topics [Array<Hash>] Compressed topics
|
|
112
|
+
# @return [Integer] Bytes written
|
|
113
|
+
def write_topic_texts(output_handle, topics)
|
|
114
|
+
total_bytes = 0
|
|
115
|
+
|
|
116
|
+
topics.each do |topic|
|
|
117
|
+
total_bytes += @io_system.write(output_handle, topic[:compressed])
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
total_bytes
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../binary/bitstream"
|
|
4
|
+
|
|
5
|
+
module Cabriolet
|
|
6
|
+
module HLP
|
|
7
|
+
module QuickHelp
|
|
8
|
+
# Huffman stream decoder for QuickHelp topics
|
|
9
|
+
#
|
|
10
|
+
# Wraps a bitstream and uses a Huffman tree to decode symbols.
|
|
11
|
+
# Reads bits from MSB to LSB within each byte.
|
|
12
|
+
class HuffmanStream
|
|
13
|
+
# Initialize Huffman stream decoder
|
|
14
|
+
#
|
|
15
|
+
# @param input [String, IO] Input data (Huffman-encoded bitstream)
|
|
16
|
+
# @param huffman_tree [HuffmanTree] Huffman tree for decoding
|
|
17
|
+
def initialize(input, huffman_tree)
|
|
18
|
+
@input = input.is_a?(String) ? StringIO.new(input) : input
|
|
19
|
+
@huffman_tree = huffman_tree
|
|
20
|
+
# QuickHelp uses MSB-first bit order
|
|
21
|
+
@bitstream = Binary::Bitstream.new(@input, true) # MSB first
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Read and decode bytes from the Huffman stream
|
|
25
|
+
#
|
|
26
|
+
# @param length [Integer] Number of decoded bytes to read
|
|
27
|
+
# @return [String] Decoded data
|
|
28
|
+
def read(length)
|
|
29
|
+
result = String.new(encoding: Encoding::BINARY)
|
|
30
|
+
|
|
31
|
+
length.times do
|
|
32
|
+
byte = read_byte
|
|
33
|
+
break if byte.nil?
|
|
34
|
+
|
|
35
|
+
result << byte.chr
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
result
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Read and decode a single byte
|
|
42
|
+
#
|
|
43
|
+
# @return [Integer, nil] Decoded byte value or nil on EOF
|
|
44
|
+
def read_byte
|
|
45
|
+
return nil if @huffman_tree.empty?
|
|
46
|
+
|
|
47
|
+
# Handle singular tree (single symbol, no bits needed)
|
|
48
|
+
if @huffman_tree.singular?
|
|
49
|
+
return @huffman_tree.root.symbol
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Decode using tree
|
|
53
|
+
decoder = @huffman_tree.create_decoder
|
|
54
|
+
|
|
55
|
+
until decoder.has_value?
|
|
56
|
+
bit = @bitstream.read_bits(1)
|
|
57
|
+
return nil if bit.nil? # EOF
|
|
58
|
+
|
|
59
|
+
decoder.push(bit != 0)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
decoder.value
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Check if at end of stream
|
|
66
|
+
#
|
|
67
|
+
# @return [Boolean] true if EOF
|
|
68
|
+
def eof?
|
|
69
|
+
@input.eof?
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Huffman tree for QuickHelp topic compression
|
|
7
|
+
#
|
|
8
|
+
# Represents a Huffman tree that encodes symbols 0-255.
|
|
9
|
+
# Based on the QuickHelp binary format specification.
|
|
10
|
+
class HuffmanTree
|
|
11
|
+
attr_reader :root, :symbol_count
|
|
12
|
+
|
|
13
|
+
# Node in the Huffman tree
|
|
14
|
+
class Node
|
|
15
|
+
attr_accessor :symbol, :left_child, :right_child
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@symbol = nil
|
|
19
|
+
@left_child = nil
|
|
20
|
+
@right_child = nil
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def leaf?
|
|
24
|
+
@left_child.nil? && @right_child.nil?
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Initialize empty tree
|
|
29
|
+
def initialize
|
|
30
|
+
@root = nil
|
|
31
|
+
@symbol_count = 0
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Check if tree is empty
|
|
35
|
+
#
|
|
36
|
+
# @return [Boolean] true if empty
|
|
37
|
+
def empty?
|
|
38
|
+
@root.nil?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Check if tree has single node
|
|
42
|
+
#
|
|
43
|
+
# @return [Boolean] true if singular
|
|
44
|
+
def singular?
|
|
45
|
+
!@root.nil? && @root.leaf?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Deserialize Huffman tree from node values
|
|
49
|
+
#
|
|
50
|
+
# @param node_values [Array<Integer>] Array of 16-bit node values
|
|
51
|
+
# @return [HuffmanTree] Deserialized tree
|
|
52
|
+
# @raise [Cabriolet::ParseError] if tree is invalid
|
|
53
|
+
def self.deserialize(node_values)
|
|
54
|
+
tree = new
|
|
55
|
+
return tree if node_values.empty?
|
|
56
|
+
|
|
57
|
+
n = node_values.length
|
|
58
|
+
if n.even?
|
|
59
|
+
raise Cabriolet::ParseError,
|
|
60
|
+
"Invalid Huffman tree: expected odd number of nodes"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
nodes = Array.new(n) { Node.new }
|
|
64
|
+
symbol_exists = Array.new(256, false)
|
|
65
|
+
|
|
66
|
+
n.times do |i|
|
|
67
|
+
node = nodes[i]
|
|
68
|
+
node_value = node_values[i]
|
|
69
|
+
|
|
70
|
+
if node_value.negative? # Leaf node (bit 15 set)
|
|
71
|
+
symbol = node_value & 0xFF
|
|
72
|
+
if symbol_exists[symbol]
|
|
73
|
+
raise Cabriolet::ParseError,
|
|
74
|
+
"Invalid Huffman tree: symbol #{symbol} already encoded"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
node.symbol = symbol
|
|
78
|
+
symbol_exists[symbol] = true
|
|
79
|
+
else # Internal node
|
|
80
|
+
child0 = node_value / 2
|
|
81
|
+
child1 = i + 1
|
|
82
|
+
|
|
83
|
+
# Validate child indices are within bounds
|
|
84
|
+
unless child0 < n && child1 < n
|
|
85
|
+
raise Cabriolet::ParseError,
|
|
86
|
+
"Invalid Huffman tree: invalid child node location (child0=#{child0}, child1=#{child1}, n=#{n})"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Check for cycles by verifying left child hasn't been assigned yet
|
|
90
|
+
if !nodes[child0].nil? && nodes[child0].left_child
|
|
91
|
+
raise Cabriolet::ParseError,
|
|
92
|
+
"Invalid Huffman tree: cycle detected"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
node.left_child = nodes[child0]
|
|
96
|
+
node.right_child = nodes[child1]
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
tree.instance_variable_set(:@root, nodes[0])
|
|
101
|
+
tree.instance_variable_set(:@symbol_count, (n / 2) + 1)
|
|
102
|
+
tree
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Create a decoder for this tree
|
|
106
|
+
#
|
|
107
|
+
# @return [HuffmanDecoder] New decoder
|
|
108
|
+
def create_decoder
|
|
109
|
+
HuffmanDecoder.new(self)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Decoder for Huffman-encoded data
|
|
114
|
+
#
|
|
115
|
+
# Usage:
|
|
116
|
+
# decoder = tree.create_decoder
|
|
117
|
+
# while !decoder.has_value?
|
|
118
|
+
# decoder.push(bitstream.read_bit)
|
|
119
|
+
# end
|
|
120
|
+
# symbol = decoder.value
|
|
121
|
+
class HuffmanDecoder
|
|
122
|
+
attr_reader :current_node
|
|
123
|
+
|
|
124
|
+
# Initialize decoder
|
|
125
|
+
#
|
|
126
|
+
# @param tree [HuffmanTree] Huffman tree to use
|
|
127
|
+
def initialize(tree)
|
|
128
|
+
@tree = tree
|
|
129
|
+
@current_node = tree.root
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Check if decoder has decoded a complete symbol
|
|
133
|
+
#
|
|
134
|
+
# @return [Boolean] true if value is ready
|
|
135
|
+
def has_value?
|
|
136
|
+
!@current_node.nil? && @current_node.leaf?
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Get decoded symbol value
|
|
140
|
+
#
|
|
141
|
+
# @return [Integer] Symbol value (0-255)
|
|
142
|
+
# @raise [RuntimeError] if no value is ready
|
|
143
|
+
def value
|
|
144
|
+
raise "Decoder does not have a value" unless has_value?
|
|
145
|
+
|
|
146
|
+
@current_node.symbol
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Push a bit into the decoder
|
|
150
|
+
#
|
|
151
|
+
# @param bit [Boolean, Integer] Bit value (true/1 for right, false/0 for left)
|
|
152
|
+
# @raise [RuntimeError] if tree is empty or at leaf
|
|
153
|
+
def push(bit)
|
|
154
|
+
raise "Cannot walk an empty tree" if @current_node.nil?
|
|
155
|
+
raise "Cannot walk further from a leaf" if @current_node.leaf?
|
|
156
|
+
|
|
157
|
+
@current_node = bit ? @current_node.right_child : @current_node.left_child
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Reset decoder to tree root
|
|
161
|
+
def reset
|
|
162
|
+
@current_node = @tree.root
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Calculates file offsets for QuickHelp structure
|
|
7
|
+
class OffsetCalculator
|
|
8
|
+
# Calculate all offsets in the file
|
|
9
|
+
#
|
|
10
|
+
# @param topics [Array<Hash>] Compressed topics
|
|
11
|
+
# @param contexts [Array<String>] Context strings
|
|
12
|
+
# @param context_map [Array<Integer>] Context topic indices
|
|
13
|
+
# @return [Hash] Calculated offsets
|
|
14
|
+
def self.calculate(topics:, contexts:, context_map:)
|
|
15
|
+
offsets = {}
|
|
16
|
+
|
|
17
|
+
# Start after file header (70 bytes = 2 signature + 68 header)
|
|
18
|
+
current_offset = 70
|
|
19
|
+
|
|
20
|
+
# Topic index: (topic_count + 1) * 4 bytes
|
|
21
|
+
offsets[:topic_index] = current_offset
|
|
22
|
+
topic_count = topics.size
|
|
23
|
+
current_offset += (topic_count + 1) * 4
|
|
24
|
+
|
|
25
|
+
# Context strings: sum of string lengths + null terminators
|
|
26
|
+
offsets[:context_strings] = current_offset
|
|
27
|
+
contexts.each do |ctx|
|
|
28
|
+
current_offset += ctx.bytesize + 1 # +1 for null terminator
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Context map: context_count * 2 bytes
|
|
32
|
+
offsets[:context_map] = current_offset
|
|
33
|
+
current_offset += context_map.size * 2
|
|
34
|
+
|
|
35
|
+
# Keywords: not implemented yet, set to 0
|
|
36
|
+
offsets[:keywords] = 0
|
|
37
|
+
|
|
38
|
+
# Huffman tree: not implemented yet, set to 0
|
|
39
|
+
offsets[:huffman_tree] = 0
|
|
40
|
+
|
|
41
|
+
# Topic text: starts after context map
|
|
42
|
+
offsets[:topic_text] = current_offset
|
|
43
|
+
|
|
44
|
+
# Calculate topic text offsets
|
|
45
|
+
offsets[:topic_offsets] = []
|
|
46
|
+
topics.each do |topic|
|
|
47
|
+
offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
|
|
48
|
+
current_offset += topic[:compressed_length]
|
|
49
|
+
end
|
|
50
|
+
# Add end marker
|
|
51
|
+
offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
|
|
52
|
+
|
|
53
|
+
# Total database size
|
|
54
|
+
offsets[:database_size] = current_offset
|
|
55
|
+
|
|
56
|
+
offsets
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../system/io_system"
|
|
4
|
+
require_relative "../../constants"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
module HLP
|
|
8
|
+
module QuickHelp
|
|
9
|
+
# Parser for QuickHelp (.HLP) files
|
|
10
|
+
#
|
|
11
|
+
# Parses the QuickHelp binary format as specified in the DosHelp project.
|
|
12
|
+
# Structure:
|
|
13
|
+
# - Signature (2 bytes)
|
|
14
|
+
# - File Header (68 bytes)
|
|
15
|
+
# - Topic Index (variable)
|
|
16
|
+
# - Context Strings (variable)
|
|
17
|
+
# - Context Map (variable)
|
|
18
|
+
# - Keywords (optional)
|
|
19
|
+
# - Huffman Tree (optional)
|
|
20
|
+
# - Topic Texts (compressed)
|
|
21
|
+
class Parser
|
|
22
|
+
attr_reader :io_system
|
|
23
|
+
|
|
24
|
+
# Initialize parser
|
|
25
|
+
#
|
|
26
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
|
|
27
|
+
def initialize(io_system = nil)
|
|
28
|
+
@io_system = io_system || System::IOSystem.new
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Parse a QuickHelp file
|
|
32
|
+
#
|
|
33
|
+
# @param filename [String] Path to HLP file
|
|
34
|
+
# @return [Models::HLPHeader] Parsed header with metadata
|
|
35
|
+
# @raise [Cabriolet::ParseError] if file is not valid QuickHelp
|
|
36
|
+
def parse(filename)
|
|
37
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
38
|
+
|
|
39
|
+
begin
|
|
40
|
+
header = parse_file(handle)
|
|
41
|
+
header.filename = filename
|
|
42
|
+
header
|
|
43
|
+
ensure
|
|
44
|
+
@io_system.close(handle)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Parse complete QuickHelp file structure
|
|
51
|
+
#
|
|
52
|
+
# @param handle [System::FileHandle] Open file handle
|
|
53
|
+
# @return [Models::HLPHeader] Parsed header
|
|
54
|
+
# @raise [Cabriolet::ParseError] if parsing fails
|
|
55
|
+
def parse_file(handle)
|
|
56
|
+
# Check signature first
|
|
57
|
+
check_signature(handle)
|
|
58
|
+
|
|
59
|
+
# Parse file header
|
|
60
|
+
header = parse_header(handle)
|
|
61
|
+
|
|
62
|
+
# Parse topic index
|
|
63
|
+
topic_offsets = parse_topic_index(handle, header)
|
|
64
|
+
|
|
65
|
+
# Parse context strings and map
|
|
66
|
+
parse_contexts(handle, header)
|
|
67
|
+
|
|
68
|
+
# Parse keywords if present
|
|
69
|
+
parse_keywords(handle, header) if header.keywords_offset.positive?
|
|
70
|
+
|
|
71
|
+
# Parse Huffman tree if present
|
|
72
|
+
if header.huffman_tree_offset.positive?
|
|
73
|
+
parse_huffman_tree(handle,
|
|
74
|
+
header)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Calculate topic sizes from offsets
|
|
78
|
+
populate_topics(header, topic_offsets)
|
|
79
|
+
|
|
80
|
+
header
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check file signature
|
|
84
|
+
#
|
|
85
|
+
# @param handle [System::FileHandle] Open file handle
|
|
86
|
+
# @raise [Cabriolet::ParseError] if signature is invalid
|
|
87
|
+
def check_signature(handle)
|
|
88
|
+
sig_data = @io_system.read(handle, 2)
|
|
89
|
+
|
|
90
|
+
unless sig_data == Binary::HLPStructures::SIGNATURE
|
|
91
|
+
raise Cabriolet::ParseError,
|
|
92
|
+
"Invalid QuickHelp signature: expected 'LN' (0x4C 0x4E), " \
|
|
93
|
+
"got #{sig_data.bytes.map do |b|
|
|
94
|
+
format('0x%02X', b)
|
|
95
|
+
end.join(' ')}"
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Parse file header
|
|
100
|
+
#
|
|
101
|
+
# @param handle [System::FileHandle] Open file handle positioned after signature
|
|
102
|
+
# @return [Models::HLPHeader] Parsed header
|
|
103
|
+
# @raise [Cabriolet::ParseError] if header is invalid
|
|
104
|
+
def parse_header(handle)
|
|
105
|
+
header_data = @io_system.read(handle, 68)
|
|
106
|
+
if header_data.bytesize < 68
|
|
107
|
+
raise Cabriolet::ParseError,
|
|
108
|
+
"File too small for QuickHelp header"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
binary_header = Binary::HLPStructures::FileHeader.read(
|
|
112
|
+
Binary::HLPStructures::SIGNATURE + header_data,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Validate version
|
|
116
|
+
unless binary_header.version == 2
|
|
117
|
+
raise Cabriolet::ParseError,
|
|
118
|
+
"Unsupported QuickHelp version: #{binary_header.version}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Create header model
|
|
122
|
+
Models::HLPHeader.new(
|
|
123
|
+
magic: binary_header.signature,
|
|
124
|
+
version: binary_header.version,
|
|
125
|
+
attributes: binary_header.attributes,
|
|
126
|
+
control_character: binary_header.control_character,
|
|
127
|
+
topic_count: binary_header.topic_count,
|
|
128
|
+
context_count: binary_header.context_count,
|
|
129
|
+
display_width: binary_header.display_width,
|
|
130
|
+
predefined_ctx_count: binary_header.predefined_ctx_count,
|
|
131
|
+
database_name: binary_header.database_name,
|
|
132
|
+
topic_index_offset: binary_header.topic_index_offset,
|
|
133
|
+
context_strings_offset: binary_header.context_strings_offset,
|
|
134
|
+
context_map_offset: binary_header.context_map_offset,
|
|
135
|
+
keywords_offset: binary_header.keywords_offset,
|
|
136
|
+
huffman_tree_offset: binary_header.huffman_tree_offset,
|
|
137
|
+
topic_text_offset: binary_header.topic_text_offset,
|
|
138
|
+
database_size: binary_header.database_size,
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Parse topic index section
|
|
143
|
+
#
|
|
144
|
+
# @param handle [System::FileHandle] Open file handle
|
|
145
|
+
# @param header [Models::HLPHeader] Header with offset information
|
|
146
|
+
# @return [Array<Integer>] Topic offsets (including end marker)
|
|
147
|
+
# @raise [Cabriolet::ParseError] if topic index is invalid
|
|
148
|
+
def parse_topic_index(handle, header)
|
|
149
|
+
# Seek to topic index
|
|
150
|
+
@io_system.seek(handle, header.topic_index_offset, Constants::SEEK_START)
|
|
151
|
+
|
|
152
|
+
# Read (topic_count + 1) DWORDs
|
|
153
|
+
count = header.topic_count + 1
|
|
154
|
+
index_data = @io_system.read(handle, count * 4)
|
|
155
|
+
|
|
156
|
+
if index_data.bytesize < count * 4
|
|
157
|
+
raise Cabriolet::ParseError, "Cannot read complete topic index"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Unpack as array of little-endian 32-bit integers
|
|
161
|
+
index_data.unpack("V#{count}")
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Parse context strings and context map
|
|
165
|
+
#
|
|
166
|
+
# @param handle [System::FileHandle] Open file handle
|
|
167
|
+
# @param header [Models::HLPHeader] Header with offset information
|
|
168
|
+
# @raise [Cabriolet::ParseError] if context data is invalid
|
|
169
|
+
def parse_contexts(handle, header)
|
|
170
|
+
return if header.context_count.zero?
|
|
171
|
+
|
|
172
|
+
# Read context strings
|
|
173
|
+
@io_system.seek(handle, header.context_strings_offset, Constants::SEEK_START)
|
|
174
|
+
strings_size = header.context_map_offset - header.context_strings_offset
|
|
175
|
+
strings_data = @io_system.read(handle, strings_size)
|
|
176
|
+
|
|
177
|
+
# Split by null terminators
|
|
178
|
+
header.contexts = strings_data.force_encoding(Encoding::ASCII).split("\x00")
|
|
179
|
+
|
|
180
|
+
# Read context map
|
|
181
|
+
@io_system.seek(handle, header.context_map_offset, Constants::SEEK_START)
|
|
182
|
+
map_data = @io_system.read(handle, header.context_count * 2)
|
|
183
|
+
|
|
184
|
+
if map_data.bytesize < header.context_count * 2
|
|
185
|
+
raise Cabriolet::ParseError, "Cannot read complete context map"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Unpack as array of little-endian 16-bit integers
|
|
189
|
+
header.context_map = map_data.unpack("v#{header.context_count}")
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Parse keywords dictionary
|
|
193
|
+
#
|
|
194
|
+
# @param handle [System::FileHandle] Open file handle
|
|
195
|
+
# @param header [Models::HLPHeader] Header with offset information
|
|
196
|
+
# @raise [Cabriolet::ParseError] if keywords section is invalid
|
|
197
|
+
def parse_keywords(handle, header)
|
|
198
|
+
@io_system.seek(handle, header.keywords_offset, Constants::SEEK_START)
|
|
199
|
+
|
|
200
|
+
# Calculate section size
|
|
201
|
+
next_offset = header.huffman_tree_offset.positive? ? header.huffman_tree_offset : header.topic_text_offset
|
|
202
|
+
section_size = next_offset - header.keywords_offset
|
|
203
|
+
|
|
204
|
+
return if section_size <= 0
|
|
205
|
+
|
|
206
|
+
section_data = @io_system.read(handle, section_size)
|
|
207
|
+
|
|
208
|
+
# Parse length-prefixed strings
|
|
209
|
+
header.keywords = []
|
|
210
|
+
pos = 0
|
|
211
|
+
|
|
212
|
+
while pos < section_data.bytesize
|
|
213
|
+
length = section_data.getbyte(pos)
|
|
214
|
+
break if length.nil? || length.zero?
|
|
215
|
+
|
|
216
|
+
pos += 1
|
|
217
|
+
break if pos + length > section_data.bytesize
|
|
218
|
+
|
|
219
|
+
keyword = section_data[pos, length]
|
|
220
|
+
header.keywords << keyword
|
|
221
|
+
pos += length
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Parse Huffman tree
|
|
226
|
+
#
|
|
227
|
+
# @param handle [System::FileHandle] Open file handle
|
|
228
|
+
# @param header [Models::HLPHeader] Header with offset information
|
|
229
|
+
# @raise [Cabriolet::ParseError] if Huffman tree is invalid
|
|
230
|
+
def parse_huffman_tree(handle, header)
|
|
231
|
+
@io_system.seek(handle, header.huffman_tree_offset, Constants::SEEK_START)
|
|
232
|
+
|
|
233
|
+
# Read nodes until we hit terminating 0x0000
|
|
234
|
+
nodes = []
|
|
235
|
+
loop do
|
|
236
|
+
node_data = @io_system.read(handle, 2)
|
|
237
|
+
break if node_data.bytesize < 2
|
|
238
|
+
|
|
239
|
+
node_value = node_data.unpack1("v")
|
|
240
|
+
break if node_value.zero? # Terminating null
|
|
241
|
+
|
|
242
|
+
nodes << node_value
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Validate node count (must be odd, representing a proper binary tree)
|
|
246
|
+
if nodes.length.even? && !nodes.empty?
|
|
247
|
+
raise Cabriolet::ParseError,
|
|
248
|
+
"Invalid Huffman tree: expected odd number of nodes"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Store raw node values (will be decoded during decompression)
|
|
252
|
+
header.huffman_tree = nodes
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Populate topic metadata from offset array
|
|
256
|
+
#
|
|
257
|
+
# @param header [Models::HLPHeader] Header to populate
|
|
258
|
+
# @param offsets [Array<Integer>] Topic offsets
|
|
259
|
+
def populate_topics(header, offsets)
|
|
260
|
+
header.topics = []
|
|
261
|
+
|
|
262
|
+
header.topic_count.times do |i|
|
|
263
|
+
topic = Models::HLPTopic.new(
|
|
264
|
+
index: i,
|
|
265
|
+
offset: offsets[i],
|
|
266
|
+
size: offsets[i + 1] - offsets[i],
|
|
267
|
+
)
|
|
268
|
+
header.topics << topic
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
end
|