cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Writes QuickHelp files to disk
7
+ class FileWriter
8
+ # Initialize file writer
9
+ #
10
+ # @param io_system [System::IOSystem] I/O system for writing
11
+ def initialize(io_system)
12
+ @io_system = io_system
13
+ end
14
+
15
+ # Write complete QuickHelp file
16
+ #
17
+ # @param output_handle [System::FileHandle] Output file handle
18
+ # @param structure [Hash] QuickHelp structure
19
+ # @return [Integer] Bytes written
20
+ def write_quickhelp_file(output_handle, structure)
21
+ bytes_written = 0
22
+
23
+ # Write file header
24
+ bytes_written += write_file_header(output_handle, structure[:header])
25
+
26
+ # Write topic index
27
+ bytes_written += write_topic_index(output_handle,
28
+ structure[:header][:offsets])
29
+
30
+ # Write context strings
31
+ bytes_written += write_context_strings(output_handle,
32
+ structure[:contexts])
33
+
34
+ # Write context map
35
+ bytes_written += write_context_map(output_handle,
36
+ structure[:context_map])
37
+
38
+ # Write topic texts
39
+ bytes_written += write_topic_texts(output_handle, structure[:topics])
40
+
41
+ bytes_written
42
+ end
43
+
44
+ # Write file header
45
+ #
46
+ # @param output_handle [System::FileHandle] Output file handle
47
+ # @param header_info [Hash] Header information
48
+ # @return [Integer] Bytes written
49
+ def write_file_header(output_handle, header_info)
50
+ header = Binary::HLPStructures::FileHeader.new
51
+ header.signature = Binary::HLPStructures::SIGNATURE
52
+ header.version = header_info[:version]
53
+ header.attributes = header_info[:attributes]
54
+ header.control_character = header_info[:control_character]
55
+ header.padding1 = 0
56
+ header.topic_count = header_info[:topic_count]
57
+ header.context_count = header_info[:context_count]
58
+ header.display_width = header_info[:display_width]
59
+ header.padding2 = 0
60
+ header.predefined_ctx_count = header_info[:predefined_ctx_count]
61
+ header.database_name = header_info[:database_name]
62
+ header.reserved1 = 0
63
+ header.topic_index_offset = header_info[:offsets][:topic_index]
64
+ header.context_strings_offset = header_info[:offsets][:context_strings]
65
+ header.context_map_offset = header_info[:offsets][:context_map]
66
+ header.keywords_offset = header_info[:offsets][:keywords]
67
+ header.huffman_tree_offset = header_info[:offsets][:huffman_tree]
68
+ header.topic_text_offset = header_info[:offsets][:topic_text]
69
+ header.reserved2 = 0
70
+ header.reserved3 = 0
71
+ header.database_size = header_info[:offsets][:database_size]
72
+
73
+ header_data = header.to_binary_s
74
+ @io_system.write(output_handle, header_data)
75
+ end
76
+
77
+ # Write topic index
78
+ #
79
+ # @param output_handle [System::FileHandle] Output file handle
80
+ # @param offsets [Hash] Offset information
81
+ # @return [Integer] Bytes written
82
+ def write_topic_index(output_handle, offsets)
83
+ # Write all topic offsets including end marker
84
+ index_data = offsets[:topic_offsets].pack("V*")
85
+ @io_system.write(output_handle, index_data)
86
+ end
87
+
88
+ # Write context strings
89
+ #
90
+ # @param output_handle [System::FileHandle] Output file handle
91
+ # @param contexts [Array<String>] Context strings
92
+ # @return [Integer] Bytes written
93
+ def write_context_strings(output_handle, contexts)
94
+ data = contexts.map { |ctx| "#{ctx}\u0000" }.join
95
+ @io_system.write(output_handle, data)
96
+ end
97
+
98
+ # Write context map
99
+ #
100
+ # @param output_handle [System::FileHandle] Output file handle
101
+ # @param context_map [Array<Integer>] Topic indices
102
+ # @return [Integer] Bytes written
103
+ def write_context_map(output_handle, context_map)
104
+ map_data = context_map.pack("v*")
105
+ @io_system.write(output_handle, map_data)
106
+ end
107
+
108
+ # Write topic texts
109
+ #
110
+ # @param output_handle [System::FileHandle] Output file handle
111
+ # @param topics [Array<Hash>] Compressed topics
112
+ # @return [Integer] Bytes written
113
+ def write_topic_texts(output_handle, topics)
114
+ total_bytes = 0
115
+
116
+ topics.each do |topic|
117
+ total_bytes += @io_system.write(output_handle, topic[:compressed])
118
+ end
119
+
120
+ total_bytes
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../binary/bitstream"
4
+
5
+ module Cabriolet
6
+ module HLP
7
+ module QuickHelp
8
+ # Huffman stream decoder for QuickHelp topics
9
+ #
10
+ # Wraps a bitstream and uses a Huffman tree to decode symbols.
11
+ # Reads bits from MSB to LSB within each byte.
12
+ class HuffmanStream
13
+ # Initialize Huffman stream decoder
14
+ #
15
+ # @param input [String, IO] Input data (Huffman-encoded bitstream)
16
+ # @param huffman_tree [HuffmanTree] Huffman tree for decoding
17
+ def initialize(input, huffman_tree)
18
+ @input = input.is_a?(String) ? StringIO.new(input) : input
19
+ @huffman_tree = huffman_tree
20
+ # QuickHelp uses MSB-first bit order
21
+ @bitstream = Binary::Bitstream.new(@input, true) # MSB first
22
+ end
23
+
24
+ # Read and decode bytes from the Huffman stream
25
+ #
26
+ # @param length [Integer] Number of decoded bytes to read
27
+ # @return [String] Decoded data
28
+ def read(length)
29
+ result = String.new(encoding: Encoding::BINARY)
30
+
31
+ length.times do
32
+ byte = read_byte
33
+ break if byte.nil?
34
+
35
+ result << byte.chr
36
+ end
37
+
38
+ result
39
+ end
40
+
41
+ # Read and decode a single byte
42
+ #
43
+ # @return [Integer, nil] Decoded byte value or nil on EOF
44
+ def read_byte
45
+ return nil if @huffman_tree.empty?
46
+
47
+ # Handle singular tree (single symbol, no bits needed)
48
+ if @huffman_tree.singular?
49
+ return @huffman_tree.root.symbol
50
+ end
51
+
52
+ # Decode using tree
53
+ decoder = @huffman_tree.create_decoder
54
+
55
+ until decoder.has_value?
56
+ bit = @bitstream.read_bits(1)
57
+ return nil if bit.nil? # EOF
58
+
59
+ decoder.push(bit != 0)
60
+ end
61
+
62
+ decoder.value
63
+ end
64
+
65
+ # Check if at end of stream
66
+ #
67
+ # @return [Boolean] true if EOF
68
+ def eof?
69
+ @input.eof?
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Huffman tree for QuickHelp topic compression
7
+ #
8
+ # Represents a Huffman tree that encodes symbols 0-255.
9
+ # Based on the QuickHelp binary format specification.
10
+ class HuffmanTree
11
+ attr_reader :root, :symbol_count
12
+
13
+ # Node in the Huffman tree
14
+ class Node
15
+ attr_accessor :symbol, :left_child, :right_child
16
+
17
+ def initialize
18
+ @symbol = nil
19
+ @left_child = nil
20
+ @right_child = nil
21
+ end
22
+
23
+ def leaf?
24
+ @left_child.nil? && @right_child.nil?
25
+ end
26
+ end
27
+
28
+ # Initialize empty tree
29
+ def initialize
30
+ @root = nil
31
+ @symbol_count = 0
32
+ end
33
+
34
+ # Check if tree is empty
35
+ #
36
+ # @return [Boolean] true if empty
37
+ def empty?
38
+ @root.nil?
39
+ end
40
+
41
+ # Check if tree has single node
42
+ #
43
+ # @return [Boolean] true if singular
44
+ def singular?
45
+ !@root.nil? && @root.leaf?
46
+ end
47
+
48
+ # Deserialize Huffman tree from node values
49
+ #
50
+ # @param node_values [Array<Integer>] Array of 16-bit node values
51
+ # @return [HuffmanTree] Deserialized tree
52
+ # @raise [Cabriolet::ParseError] if tree is invalid
53
+ def self.deserialize(node_values)
54
+ tree = new
55
+ return tree if node_values.empty?
56
+
57
+ n = node_values.length
58
+ if n.even?
59
+ raise Cabriolet::ParseError,
60
+ "Invalid Huffman tree: expected odd number of nodes"
61
+ end
62
+
63
+ nodes = Array.new(n) { Node.new }
64
+ symbol_exists = Array.new(256, false)
65
+
66
+ n.times do |i|
67
+ node = nodes[i]
68
+ node_value = node_values[i]
69
+
70
+ if node_value.negative? # Leaf node (bit 15 set)
71
+ symbol = node_value & 0xFF
72
+ if symbol_exists[symbol]
73
+ raise Cabriolet::ParseError,
74
+ "Invalid Huffman tree: symbol #{symbol} already encoded"
75
+ end
76
+
77
+ node.symbol = symbol
78
+ symbol_exists[symbol] = true
79
+ else # Internal node
80
+ child0 = node_value / 2
81
+ child1 = i + 1
82
+
83
+ # Validate child indices are within bounds
84
+ unless child0 < n && child1 < n
85
+ raise Cabriolet::ParseError,
86
+ "Invalid Huffman tree: invalid child node location (child0=#{child0}, child1=#{child1}, n=#{n})"
87
+ end
88
+
89
+ # Check for cycles by verifying left child hasn't been assigned yet
90
+ if !nodes[child0].nil? && nodes[child0].left_child
91
+ raise Cabriolet::ParseError,
92
+ "Invalid Huffman tree: cycle detected"
93
+ end
94
+
95
+ node.left_child = nodes[child0]
96
+ node.right_child = nodes[child1]
97
+ end
98
+ end
99
+
100
+ tree.instance_variable_set(:@root, nodes[0])
101
+ tree.instance_variable_set(:@symbol_count, (n / 2) + 1)
102
+ tree
103
+ end
104
+
105
+ # Create a decoder for this tree
106
+ #
107
+ # @return [HuffmanDecoder] New decoder
108
+ def create_decoder
109
+ HuffmanDecoder.new(self)
110
+ end
111
+ end
112
+
113
+ # Decoder for Huffman-encoded data
114
+ #
115
+ # Usage:
116
+ # decoder = tree.create_decoder
117
+ # while !decoder.has_value?
118
+ # decoder.push(bitstream.read_bit)
119
+ # end
120
+ # symbol = decoder.value
121
+ class HuffmanDecoder
122
+ attr_reader :current_node
123
+
124
+ # Initialize decoder
125
+ #
126
+ # @param tree [HuffmanTree] Huffman tree to use
127
+ def initialize(tree)
128
+ @tree = tree
129
+ @current_node = tree.root
130
+ end
131
+
132
+ # Check if decoder has decoded a complete symbol
133
+ #
134
+ # @return [Boolean] true if value is ready
135
+ def has_value?
136
+ !@current_node.nil? && @current_node.leaf?
137
+ end
138
+
139
+ # Get decoded symbol value
140
+ #
141
+ # @return [Integer] Symbol value (0-255)
142
+ # @raise [RuntimeError] if no value is ready
143
+ def value
144
+ raise "Decoder does not have a value" unless has_value?
145
+
146
+ @current_node.symbol
147
+ end
148
+
149
+ # Push a bit into the decoder
150
+ #
151
+ # @param bit [Boolean, Integer] Bit value (true/1 for right, false/0 for left)
152
+ # @raise [RuntimeError] if tree is empty or at leaf
153
+ def push(bit)
154
+ raise "Cannot walk an empty tree" if @current_node.nil?
155
+ raise "Cannot walk further from a leaf" if @current_node.leaf?
156
+
157
+ @current_node = bit ? @current_node.right_child : @current_node.left_child
158
+ end
159
+
160
+ # Reset decoder to tree root
161
+ def reset
162
+ @current_node = @tree.root
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Calculates file offsets for QuickHelp structure
7
+ class OffsetCalculator
8
+ # Calculate all offsets in the file
9
+ #
10
+ # @param topics [Array<Hash>] Compressed topics
11
+ # @param contexts [Array<String>] Context strings
12
+ # @param context_map [Array<Integer>] Context topic indices
13
+ # @return [Hash] Calculated offsets
14
+ def self.calculate(topics:, contexts:, context_map:)
15
+ offsets = {}
16
+
17
+ # Start after file header (70 bytes = 2 signature + 68 header)
18
+ current_offset = 70
19
+
20
+ # Topic index: (topic_count + 1) * 4 bytes
21
+ offsets[:topic_index] = current_offset
22
+ topic_count = topics.size
23
+ current_offset += (topic_count + 1) * 4
24
+
25
+ # Context strings: sum of string lengths + null terminators
26
+ offsets[:context_strings] = current_offset
27
+ contexts.each do |ctx|
28
+ current_offset += ctx.bytesize + 1 # +1 for null terminator
29
+ end
30
+
31
+ # Context map: context_count * 2 bytes
32
+ offsets[:context_map] = current_offset
33
+ current_offset += context_map.size * 2
34
+
35
+ # Keywords: not implemented yet, set to 0
36
+ offsets[:keywords] = 0
37
+
38
+ # Huffman tree: not implemented yet, set to 0
39
+ offsets[:huffman_tree] = 0
40
+
41
+ # Topic text: starts after context map
42
+ offsets[:topic_text] = current_offset
43
+
44
+ # Calculate topic text offsets
45
+ offsets[:topic_offsets] = []
46
+ topics.each do |topic|
47
+ offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
48
+ current_offset += topic[:compressed_length]
49
+ end
50
+ # Add end marker
51
+ offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
52
+
53
+ # Total database size
54
+ offsets[:database_size] = current_offset
55
+
56
+ offsets
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../system/io_system"
4
+ require_relative "../../constants"
5
+
6
+ module Cabriolet
7
+ module HLP
8
+ module QuickHelp
9
+ # Parser for QuickHelp (.HLP) files
10
+ #
11
+ # Parses the QuickHelp binary format as specified in the DosHelp project.
12
+ # Structure:
13
+ # - Signature (2 bytes)
14
+ # - File Header (68 bytes)
15
+ # - Topic Index (variable)
16
+ # - Context Strings (variable)
17
+ # - Context Map (variable)
18
+ # - Keywords (optional)
19
+ # - Huffman Tree (optional)
20
+ # - Topic Texts (compressed)
21
+ class Parser
22
+ attr_reader :io_system
23
+
24
+ # Initialize parser
25
+ #
26
+ # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
27
+ def initialize(io_system = nil)
28
+ @io_system = io_system || System::IOSystem.new
29
+ end
30
+
31
+ # Parse a QuickHelp file
32
+ #
33
+ # @param filename [String] Path to HLP file
34
+ # @return [Models::HLPHeader] Parsed header with metadata
35
+ # @raise [Cabriolet::ParseError] if file is not valid QuickHelp
36
+ def parse(filename)
37
+ handle = @io_system.open(filename, Constants::MODE_READ)
38
+
39
+ begin
40
+ header = parse_file(handle)
41
+ header.filename = filename
42
+ header
43
+ ensure
44
+ @io_system.close(handle)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Parse complete QuickHelp file structure
51
+ #
52
+ # @param handle [System::FileHandle] Open file handle
53
+ # @return [Models::HLPHeader] Parsed header
54
+ # @raise [Cabriolet::ParseError] if parsing fails
55
+ def parse_file(handle)
56
+ # Check signature first
57
+ check_signature(handle)
58
+
59
+ # Parse file header
60
+ header = parse_header(handle)
61
+
62
+ # Parse topic index
63
+ topic_offsets = parse_topic_index(handle, header)
64
+
65
+ # Parse context strings and map
66
+ parse_contexts(handle, header)
67
+
68
+ # Parse keywords if present
69
+ parse_keywords(handle, header) if header.keywords_offset.positive?
70
+
71
+ # Parse Huffman tree if present
72
+ if header.huffman_tree_offset.positive?
73
+ parse_huffman_tree(handle,
74
+ header)
75
+ end
76
+
77
+ # Calculate topic sizes from offsets
78
+ populate_topics(header, topic_offsets)
79
+
80
+ header
81
+ end
82
+
83
+ # Check file signature
84
+ #
85
+ # @param handle [System::FileHandle] Open file handle
86
+ # @raise [Cabriolet::ParseError] if signature is invalid
87
+ def check_signature(handle)
88
+ sig_data = @io_system.read(handle, 2)
89
+
90
+ unless sig_data == Binary::HLPStructures::SIGNATURE
91
+ raise Cabriolet::ParseError,
92
+ "Invalid QuickHelp signature: expected 'LN' (0x4C 0x4E), " \
93
+ "got #{sig_data.bytes.map do |b|
94
+ format('0x%02X', b)
95
+ end.join(' ')}"
96
+ end
97
+ end
98
+
99
+ # Parse file header
100
+ #
101
+ # @param handle [System::FileHandle] Open file handle positioned after signature
102
+ # @return [Models::HLPHeader] Parsed header
103
+ # @raise [Cabriolet::ParseError] if header is invalid
104
+ def parse_header(handle)
105
+ header_data = @io_system.read(handle, 68)
106
+ if header_data.bytesize < 68
107
+ raise Cabriolet::ParseError,
108
+ "File too small for QuickHelp header"
109
+ end
110
+
111
+ binary_header = Binary::HLPStructures::FileHeader.read(
112
+ Binary::HLPStructures::SIGNATURE + header_data,
113
+ )
114
+
115
+ # Validate version
116
+ unless binary_header.version == 2
117
+ raise Cabriolet::ParseError,
118
+ "Unsupported QuickHelp version: #{binary_header.version}"
119
+ end
120
+
121
+ # Create header model
122
+ Models::HLPHeader.new(
123
+ magic: binary_header.signature,
124
+ version: binary_header.version,
125
+ attributes: binary_header.attributes,
126
+ control_character: binary_header.control_character,
127
+ topic_count: binary_header.topic_count,
128
+ context_count: binary_header.context_count,
129
+ display_width: binary_header.display_width,
130
+ predefined_ctx_count: binary_header.predefined_ctx_count,
131
+ database_name: binary_header.database_name,
132
+ topic_index_offset: binary_header.topic_index_offset,
133
+ context_strings_offset: binary_header.context_strings_offset,
134
+ context_map_offset: binary_header.context_map_offset,
135
+ keywords_offset: binary_header.keywords_offset,
136
+ huffman_tree_offset: binary_header.huffman_tree_offset,
137
+ topic_text_offset: binary_header.topic_text_offset,
138
+ database_size: binary_header.database_size,
139
+ )
140
+ end
141
+
142
+ # Parse topic index section
143
+ #
144
+ # @param handle [System::FileHandle] Open file handle
145
+ # @param header [Models::HLPHeader] Header with offset information
146
+ # @return [Array<Integer>] Topic offsets (including end marker)
147
+ # @raise [Cabriolet::ParseError] if topic index is invalid
148
+ def parse_topic_index(handle, header)
149
+ # Seek to topic index
150
+ @io_system.seek(handle, header.topic_index_offset, Constants::SEEK_START)
151
+
152
+ # Read (topic_count + 1) DWORDs
153
+ count = header.topic_count + 1
154
+ index_data = @io_system.read(handle, count * 4)
155
+
156
+ if index_data.bytesize < count * 4
157
+ raise Cabriolet::ParseError, "Cannot read complete topic index"
158
+ end
159
+
160
+ # Unpack as array of little-endian 32-bit integers
161
+ index_data.unpack("V#{count}")
162
+ end
163
+
164
+ # Parse context strings and context map
165
+ #
166
+ # @param handle [System::FileHandle] Open file handle
167
+ # @param header [Models::HLPHeader] Header with offset information
168
+ # @raise [Cabriolet::ParseError] if context data is invalid
169
+ def parse_contexts(handle, header)
170
+ return if header.context_count.zero?
171
+
172
+ # Read context strings
173
+ @io_system.seek(handle, header.context_strings_offset, Constants::SEEK_START)
174
+ strings_size = header.context_map_offset - header.context_strings_offset
175
+ strings_data = @io_system.read(handle, strings_size)
176
+
177
+ # Split by null terminators
178
+ header.contexts = strings_data.force_encoding(Encoding::ASCII).split("\x00")
179
+
180
+ # Read context map
181
+ @io_system.seek(handle, header.context_map_offset, Constants::SEEK_START)
182
+ map_data = @io_system.read(handle, header.context_count * 2)
183
+
184
+ if map_data.bytesize < header.context_count * 2
185
+ raise Cabriolet::ParseError, "Cannot read complete context map"
186
+ end
187
+
188
+ # Unpack as array of little-endian 16-bit integers
189
+ header.context_map = map_data.unpack("v#{header.context_count}")
190
+ end
191
+
192
+ # Parse keywords dictionary
193
+ #
194
+ # @param handle [System::FileHandle] Open file handle
195
+ # @param header [Models::HLPHeader] Header with offset information
196
+ # @raise [Cabriolet::ParseError] if keywords section is invalid
197
+ def parse_keywords(handle, header)
198
+ @io_system.seek(handle, header.keywords_offset, Constants::SEEK_START)
199
+
200
+ # Calculate section size
201
+ next_offset = header.huffman_tree_offset.positive? ? header.huffman_tree_offset : header.topic_text_offset
202
+ section_size = next_offset - header.keywords_offset
203
+
204
+ return if section_size <= 0
205
+
206
+ section_data = @io_system.read(handle, section_size)
207
+
208
+ # Parse length-prefixed strings
209
+ header.keywords = []
210
+ pos = 0
211
+
212
+ while pos < section_data.bytesize
213
+ length = section_data.getbyte(pos)
214
+ break if length.nil? || length.zero?
215
+
216
+ pos += 1
217
+ break if pos + length > section_data.bytesize
218
+
219
+ keyword = section_data[pos, length]
220
+ header.keywords << keyword
221
+ pos += length
222
+ end
223
+ end
224
+
225
+ # Parse Huffman tree
226
+ #
227
+ # @param handle [System::FileHandle] Open file handle
228
+ # @param header [Models::HLPHeader] Header with offset information
229
+ # @raise [Cabriolet::ParseError] if Huffman tree is invalid
230
+ def parse_huffman_tree(handle, header)
231
+ @io_system.seek(handle, header.huffman_tree_offset, Constants::SEEK_START)
232
+
233
+ # Read nodes until we hit terminating 0x0000
234
+ nodes = []
235
+ loop do
236
+ node_data = @io_system.read(handle, 2)
237
+ break if node_data.bytesize < 2
238
+
239
+ node_value = node_data.unpack1("v")
240
+ break if node_value.zero? # Terminating null
241
+
242
+ nodes << node_value
243
+ end
244
+
245
+ # Validate node count (must be odd, representing a proper binary tree)
246
+ if nodes.length.even? && !nodes.empty?
247
+ raise Cabriolet::ParseError,
248
+ "Invalid Huffman tree: expected odd number of nodes"
249
+ end
250
+
251
+ # Store raw node values (will be decoded during decompression)
252
+ header.huffman_tree = nodes
253
+ end
254
+
255
+ # Populate topic metadata from offset array
256
+ #
257
+ # @param header [Models::HLPHeader] Header to populate
258
+ # @param offsets [Array<Integer>] Topic offsets
259
+ def populate_topics(header, offsets)
260
+ header.topics = []
261
+
262
+ header.topic_count.times do |i|
263
+ topic = Models::HLPTopic.new(
264
+ index: i,
265
+ offset: offsets[i],
266
+ size: offsets[i + 1] - offsets[i],
267
+ )
268
+ header.topics << topic
269
+ end
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end