cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +700 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +154 -14
  6. data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +35 -43
  13. data/lib/cabriolet/cab/decompressor.rb +14 -19
  14. data/lib/cabriolet/cab/extractor.rb +140 -31
  15. data/lib/cabriolet/chm/command_handler.rb +227 -0
  16. data/lib/cabriolet/chm/compressor.rb +7 -3
  17. data/lib/cabriolet/chm/decompressor.rb +39 -21
  18. data/lib/cabriolet/chm/parser.rb +5 -2
  19. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  20. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  21. data/lib/cabriolet/cli/command_registry.rb +83 -0
  22. data/lib/cabriolet/cli.rb +356 -607
  23. data/lib/cabriolet/compressors/base.rb +1 -1
  24. data/lib/cabriolet/compressors/lzx.rb +241 -54
  25. data/lib/cabriolet/compressors/mszip.rb +35 -3
  26. data/lib/cabriolet/compressors/quantum.rb +34 -45
  27. data/lib/cabriolet/decompressors/base.rb +1 -1
  28. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  29. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  30. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  31. data/lib/cabriolet/decompressors/quantum.rb +3 -2
  32. data/lib/cabriolet/errors.rb +3 -0
  33. data/lib/cabriolet/file_entry.rb +156 -0
  34. data/lib/cabriolet/file_manager.rb +144 -0
  35. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  36. data/lib/cabriolet/hlp/compressor.rb +28 -238
  37. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  38. data/lib/cabriolet/hlp/parser.rb +52 -101
  39. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  40. data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
  41. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  42. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  43. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  44. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  45. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  46. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  47. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  48. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  49. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  50. data/lib/cabriolet/huffman/tree.rb +85 -1
  51. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  52. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  53. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  54. data/lib/cabriolet/lit/command_handler.rb +221 -0
  55. data/lib/cabriolet/lit/compressor.rb +633 -38
  56. data/lib/cabriolet/lit/decompressor.rb +518 -152
  57. data/lib/cabriolet/lit/parser.rb +670 -0
  58. data/lib/cabriolet/models/hlp_file.rb +130 -29
  59. data/lib/cabriolet/models/hlp_header.rb +105 -17
  60. data/lib/cabriolet/models/lit_header.rb +212 -25
  61. data/lib/cabriolet/models/szdd_header.rb +10 -2
  62. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  63. data/lib/cabriolet/oab/command_handler.rb +257 -0
  64. data/lib/cabriolet/oab/compressor.rb +17 -8
  65. data/lib/cabriolet/oab/decompressor.rb +41 -10
  66. data/lib/cabriolet/offset_calculator.rb +81 -0
  67. data/lib/cabriolet/plugin.rb +233 -0
  68. data/lib/cabriolet/plugin_manager.rb +453 -0
  69. data/lib/cabriolet/plugin_validator.rb +422 -0
  70. data/lib/cabriolet/system/io_system.rb +3 -0
  71. data/lib/cabriolet/system/memory_handle.rb +17 -4
  72. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  73. data/lib/cabriolet/szdd/compressor.rb +15 -11
  74. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  75. data/lib/cabriolet/version.rb +1 -1
  76. data/lib/cabriolet.rb +67 -17
  77. metadata +33 -2
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Huffman tree for QuickHelp topic compression
7
+ #
8
+ # Represents a Huffman tree that encodes symbols 0-255.
9
+ # Based on the QuickHelp binary format specification.
10
+ class HuffmanTree
11
+ attr_reader :root, :symbol_count
12
+
13
+ # Node in the Huffman tree
14
+ class Node
15
+ attr_accessor :symbol, :left_child, :right_child
16
+
17
+ def initialize
18
+ @symbol = nil
19
+ @left_child = nil
20
+ @right_child = nil
21
+ end
22
+
23
+ def leaf?
24
+ @left_child.nil? && @right_child.nil?
25
+ end
26
+ end
27
+
28
+ # Initialize empty tree
29
+ def initialize
30
+ @root = nil
31
+ @symbol_count = 0
32
+ end
33
+
34
+ # Check if tree is empty
35
+ #
36
+ # @return [Boolean] true if empty
37
+ def empty?
38
+ @root.nil?
39
+ end
40
+
41
+ # Check if tree has single node
42
+ #
43
+ # @return [Boolean] true if singular
44
+ def singular?
45
+ !@root.nil? && @root.leaf?
46
+ end
47
+
48
+ # Deserialize Huffman tree from node values
49
+ #
50
+ # @param node_values [Array<Integer>] Array of 16-bit node values
51
+ # @return [HuffmanTree] Deserialized tree
52
+ # @raise [Cabriolet::ParseError] if tree is invalid
53
+ def self.deserialize(node_values)
54
+ tree = new
55
+ return tree if node_values.empty?
56
+
57
+ n = node_values.length
58
+ if n.even?
59
+ raise Cabriolet::ParseError,
60
+ "Invalid Huffman tree: expected odd number of nodes"
61
+ end
62
+
63
+ nodes = Array.new(n) { Node.new }
64
+ symbol_exists = Array.new(256, false)
65
+
66
+ n.times do |i|
67
+ node = nodes[i]
68
+ node_value = node_values[i]
69
+
70
+ if node_value.negative? # Leaf node (bit 15 set)
71
+ symbol = node_value & 0xFF
72
+ if symbol_exists[symbol]
73
+ raise Cabriolet::ParseError,
74
+ "Invalid Huffman tree: symbol #{symbol} already encoded"
75
+ end
76
+
77
+ node.symbol = symbol
78
+ symbol_exists[symbol] = true
79
+ else # Internal node
80
+ child0 = node_value / 2
81
+ child1 = i + 1
82
+
83
+ # Validate child indices are within bounds
84
+ unless child0 < n && child1 < n
85
+ raise Cabriolet::ParseError,
86
+ "Invalid Huffman tree: invalid child node location (child0=#{child0}, child1=#{child1}, n=#{n})"
87
+ end
88
+
89
+ # Check for cycles by verifying left child hasn't been assigned yet
90
+ if !nodes[child0].nil? && nodes[child0].left_child
91
+ raise Cabriolet::ParseError,
92
+ "Invalid Huffman tree: cycle detected"
93
+ end
94
+
95
+ node.left_child = nodes[child0]
96
+ node.right_child = nodes[child1]
97
+ end
98
+ end
99
+
100
+ tree.instance_variable_set(:@root, nodes[0])
101
+ tree.instance_variable_set(:@symbol_count, (n / 2) + 1)
102
+ tree
103
+ end
104
+
105
+ # Create a decoder for this tree
106
+ #
107
+ # @return [HuffmanDecoder] New decoder
108
+ def create_decoder
109
+ HuffmanDecoder.new(self)
110
+ end
111
+ end
112
+
113
+ # Decoder for Huffman-encoded data
114
+ #
115
+ # Usage:
116
+ # decoder = tree.create_decoder
117
+ # while !decoder.has_value?
118
+ # decoder.push(bitstream.read_bit)
119
+ # end
120
+ # symbol = decoder.value
121
+ class HuffmanDecoder
122
+ attr_reader :current_node
123
+
124
+ # Initialize decoder
125
+ #
126
+ # @param tree [HuffmanTree] Huffman tree to use
127
+ def initialize(tree)
128
+ @tree = tree
129
+ @current_node = tree.root
130
+ end
131
+
132
+ # Check if decoder has decoded a complete symbol
133
+ #
134
+ # @return [Boolean] true if value is ready
135
+ def has_value?
136
+ !@current_node.nil? && @current_node.leaf?
137
+ end
138
+
139
+ # Get decoded symbol value
140
+ #
141
+ # @return [Integer] Symbol value (0-255)
142
+ # @raise [RuntimeError] if no value is ready
143
+ def value
144
+ raise "Decoder does not have a value" unless has_value?
145
+
146
+ @current_node.symbol
147
+ end
148
+
149
+ # Push a bit into the decoder
150
+ #
151
+ # @param bit [Boolean, Integer] Bit value (true/1 for right, false/0 for left)
152
+ # @raise [RuntimeError] if tree is empty or at leaf
153
+ def push(bit)
154
+ raise "Cannot walk an empty tree" if @current_node.nil?
155
+ raise "Cannot walk further from a leaf" if @current_node.leaf?
156
+
157
+ @current_node = bit ? @current_node.right_child : @current_node.left_child
158
+ end
159
+
160
+ # Reset decoder to tree root
161
+ def reset
162
+ @current_node = @tree.root
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../system/io_system"
4
+ require_relative "../../constants"
5
+
6
+ module Cabriolet
7
+ module HLP
8
+ module QuickHelp
9
+ # Parser for QuickHelp (.HLP) files
10
+ #
11
+ # Parses the QuickHelp binary format as specified in the DosHelp project.
12
+ # Structure:
13
+ # - Signature (2 bytes)
14
+ # - File Header (68 bytes)
15
+ # - Topic Index (variable)
16
+ # - Context Strings (variable)
17
+ # - Context Map (variable)
18
+ # - Keywords (optional)
19
+ # - Huffman Tree (optional)
20
+ # - Topic Texts (compressed)
21
+ class Parser
22
+ attr_reader :io_system
23
+
24
+ # Initialize parser
25
+ #
26
+ # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
27
+ def initialize(io_system = nil)
28
+ @io_system = io_system || System::IOSystem.new
29
+ end
30
+
31
+ # Parse a QuickHelp file
32
+ #
33
+ # @param filename [String] Path to HLP file
34
+ # @return [Models::HLPHeader] Parsed header with metadata
35
+ # @raise [Cabriolet::ParseError] if file is not valid QuickHelp
36
+ def parse(filename)
37
+ handle = @io_system.open(filename, Constants::MODE_READ)
38
+
39
+ begin
40
+ header = parse_file(handle)
41
+ header.filename = filename
42
+ header
43
+ ensure
44
+ @io_system.close(handle)
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Parse complete QuickHelp file structure
51
+ #
52
+ # @param handle [System::FileHandle] Open file handle
53
+ # @return [Models::HLPHeader] Parsed header
54
+ # @raise [Cabriolet::ParseError] if parsing fails
55
+ def parse_file(handle)
56
+ # Check signature first
57
+ check_signature(handle)
58
+
59
+ # Parse file header
60
+ header = parse_header(handle)
61
+
62
+ # Parse topic index
63
+ topic_offsets = parse_topic_index(handle, header)
64
+
65
+ # Parse context strings and map
66
+ parse_contexts(handle, header)
67
+
68
+ # Parse keywords if present
69
+ parse_keywords(handle, header) if header.keywords_offset.positive?
70
+
71
+ # Parse Huffman tree if present
72
+ if header.huffman_tree_offset.positive?
73
+ parse_huffman_tree(handle,
74
+ header)
75
+ end
76
+
77
+ # Calculate topic sizes from offsets
78
+ populate_topics(header, topic_offsets)
79
+
80
+ header
81
+ end
82
+
83
+ # Check file signature
84
+ #
85
+ # @param handle [System::FileHandle] Open file handle
86
+ # @raise [Cabriolet::ParseError] if signature is invalid
87
+ def check_signature(handle)
88
+ sig_data = @io_system.read(handle, 2)
89
+
90
+ unless sig_data == Binary::HLPStructures::SIGNATURE
91
+ raise Cabriolet::ParseError,
92
+ "Invalid QuickHelp signature: expected 'LN' (0x4C 0x4E), " \
93
+ "got #{sig_data.bytes.map do |b|
94
+ format('0x%02X', b)
95
+ end.join(' ')}"
96
+ end
97
+ end
98
+
99
+ # Parse file header
100
+ #
101
+ # @param handle [System::FileHandle] Open file handle positioned after signature
102
+ # @return [Models::HLPHeader] Parsed header
103
+ # @raise [Cabriolet::ParseError] if header is invalid
104
+ def parse_header(handle)
105
+ header_data = @io_system.read(handle, 68)
106
+ if header_data.bytesize < 68
107
+ raise Cabriolet::ParseError,
108
+ "File too small for QuickHelp header"
109
+ end
110
+
111
+ binary_header = Binary::HLPStructures::FileHeader.read(
112
+ Binary::HLPStructures::SIGNATURE + header_data,
113
+ )
114
+
115
+ # Validate version
116
+ unless binary_header.version == 2
117
+ raise Cabriolet::ParseError,
118
+ "Unsupported QuickHelp version: #{binary_header.version}"
119
+ end
120
+
121
+ # Create header model
122
+ Models::HLPHeader.new(
123
+ magic: binary_header.signature,
124
+ version: binary_header.version,
125
+ attributes: binary_header.attributes,
126
+ control_character: binary_header.control_character,
127
+ topic_count: binary_header.topic_count,
128
+ context_count: binary_header.context_count,
129
+ display_width: binary_header.display_width,
130
+ predefined_ctx_count: binary_header.predefined_ctx_count,
131
+ database_name: binary_header.database_name,
132
+ topic_index_offset: binary_header.topic_index_offset,
133
+ context_strings_offset: binary_header.context_strings_offset,
134
+ context_map_offset: binary_header.context_map_offset,
135
+ keywords_offset: binary_header.keywords_offset,
136
+ huffman_tree_offset: binary_header.huffman_tree_offset,
137
+ topic_text_offset: binary_header.topic_text_offset,
138
+ database_size: binary_header.database_size,
139
+ )
140
+ end
141
+
142
+ # Parse topic index section
143
+ #
144
+ # @param handle [System::FileHandle] Open file handle
145
+ # @param header [Models::HLPHeader] Header with offset information
146
+ # @return [Array<Integer>] Topic offsets (including end marker)
147
+ # @raise [Cabriolet::ParseError] if topic index is invalid
148
+ def parse_topic_index(handle, header)
149
+ # Seek to topic index
150
+ @io_system.seek(handle, header.topic_index_offset, Constants::SEEK_START)
151
+
152
+ # Read (topic_count + 1) DWORDs
153
+ count = header.topic_count + 1
154
+ index_data = @io_system.read(handle, count * 4)
155
+
156
+ if index_data.bytesize < count * 4
157
+ raise Cabriolet::ParseError, "Cannot read complete topic index"
158
+ end
159
+
160
+ # Unpack as array of little-endian 32-bit integers
161
+ index_data.unpack("V#{count}")
162
+ end
163
+
164
+ # Parse context strings and context map
165
+ #
166
+ # @param handle [System::FileHandle] Open file handle
167
+ # @param header [Models::HLPHeader] Header with offset information
168
+ # @raise [Cabriolet::ParseError] if context data is invalid
169
+ def parse_contexts(handle, header)
170
+ return if header.context_count.zero?
171
+
172
+ # Read context strings
173
+ @io_system.seek(handle, header.context_strings_offset, Constants::SEEK_START)
174
+ strings_size = header.context_map_offset - header.context_strings_offset
175
+ strings_data = @io_system.read(handle, strings_size)
176
+
177
+ # Split by null terminators
178
+ header.contexts = strings_data.force_encoding(Encoding::ASCII).split("\x00")
179
+
180
+ # Read context map
181
+ @io_system.seek(handle, header.context_map_offset, Constants::SEEK_START)
182
+ map_data = @io_system.read(handle, header.context_count * 2)
183
+
184
+ if map_data.bytesize < header.context_count * 2
185
+ raise Cabriolet::ParseError, "Cannot read complete context map"
186
+ end
187
+
188
+ # Unpack as array of little-endian 16-bit integers
189
+ header.context_map = map_data.unpack("v#{header.context_count}")
190
+ end
191
+
192
+ # Parse keywords dictionary
193
+ #
194
+ # @param handle [System::FileHandle] Open file handle
195
+ # @param header [Models::HLPHeader] Header with offset information
196
+ # @raise [Cabriolet::ParseError] if keywords section is invalid
197
+ def parse_keywords(handle, header)
198
+ @io_system.seek(handle, header.keywords_offset, Constants::SEEK_START)
199
+
200
+ # Calculate section size
201
+ next_offset = header.huffman_tree_offset.positive? ? header.huffman_tree_offset : header.topic_text_offset
202
+ section_size = next_offset - header.keywords_offset
203
+
204
+ return if section_size <= 0
205
+
206
+ section_data = @io_system.read(handle, section_size)
207
+
208
+ # Parse length-prefixed strings
209
+ header.keywords = []
210
+ pos = 0
211
+
212
+ while pos < section_data.bytesize
213
+ length = section_data.getbyte(pos)
214
+ break if length.nil? || length.zero?
215
+
216
+ pos += 1
217
+ break if pos + length > section_data.bytesize
218
+
219
+ keyword = section_data[pos, length]
220
+ header.keywords << keyword
221
+ pos += length
222
+ end
223
+ end
224
+
225
+ # Parse Huffman tree
226
+ #
227
+ # @param handle [System::FileHandle] Open file handle
228
+ # @param header [Models::HLPHeader] Header with offset information
229
+ # @raise [Cabriolet::ParseError] if Huffman tree is invalid
230
+ def parse_huffman_tree(handle, header)
231
+ @io_system.seek(handle, header.huffman_tree_offset, Constants::SEEK_START)
232
+
233
+ # Read nodes until we hit terminating 0x0000
234
+ nodes = []
235
+ loop do
236
+ node_data = @io_system.read(handle, 2)
237
+ break if node_data.bytesize < 2
238
+
239
+ node_value = node_data.unpack1("v")
240
+ break if node_value.zero? # Terminating null
241
+
242
+ nodes << node_value
243
+ end
244
+
245
+ # Validate node count (must be odd, representing a proper binary tree)
246
+ if nodes.length.even? && !nodes.empty?
247
+ raise Cabriolet::ParseError,
248
+ "Invalid Huffman tree: expected odd number of nodes"
249
+ end
250
+
251
+ # Store raw node values (will be decoded during decompression)
252
+ header.huffman_tree = nodes
253
+ end
254
+
255
+ # Populate topic metadata from offset array
256
+ #
257
+ # @param header [Models::HLPHeader] Header to populate
258
+ # @param offsets [Array<Integer>] Topic offsets
259
+ def populate_topics(header, offsets)
260
+ header.topics = []
261
+
262
+ header.topic_count.times do |i|
263
+ topic = Models::HLPTopic.new(
264
+ index: i,
265
+ offset: offsets[i],
266
+ size: offsets[i + 1] - offsets[i],
267
+ )
268
+ header.topics << topic
269
+ end
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end