cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "topic_builder"
|
|
4
|
+
require_relative "topic_compressor"
|
|
5
|
+
require_relative "structure_builder"
|
|
6
|
+
require_relative "file_writer"
|
|
7
|
+
|
|
8
|
+
module Cabriolet
|
|
9
|
+
module HLP
|
|
10
|
+
module QuickHelp
|
|
11
|
+
# Compressor creates QuickHelp (.HLP) compressed archives
|
|
12
|
+
#
|
|
13
|
+
# QuickHelp files (DOS format) contain topics with Huffman encoding
|
|
14
|
+
# and optional keyword compression using LZSS MODE_MSHELP.
|
|
15
|
+
#
|
|
16
|
+
# NOTE: This implementation is based on the DosHelp project specification
|
|
17
|
+
# for the QuickHelp format used in DOS-era development tools.
|
|
18
|
+
class Compressor
|
|
19
|
+
attr_reader :io_system
|
|
20
|
+
|
|
21
|
+
# Default buffer size for I/O operations
|
|
22
|
+
DEFAULT_BUFFER_SIZE = 2048
|
|
23
|
+
|
|
24
|
+
# Initialize a new QuickHelp compressor
|
|
25
|
+
#
|
|
26
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
|
|
27
|
+
# @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
|
|
28
|
+
def initialize(io_system = nil, algorithm_factory = nil)
|
|
29
|
+
@io_system = io_system || System::IOSystem.new
|
|
30
|
+
@algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
|
|
31
|
+
@files = []
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Add a file to the QuickHelp archive
|
|
35
|
+
#
|
|
36
|
+
# @param source_path [String] Path to source file
|
|
37
|
+
# @param hlp_path [String] Path within QuickHelp archive
|
|
38
|
+
# @param compress [Boolean] Whether to compress the file
|
|
39
|
+
# @return [void]
|
|
40
|
+
def add_file(source_path, hlp_path, compress: true)
|
|
41
|
+
@files << {
|
|
42
|
+
source: source_path,
|
|
43
|
+
hlp_path: hlp_path,
|
|
44
|
+
compress: compress,
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Add data from memory to the QuickHelp archive
|
|
49
|
+
#
|
|
50
|
+
# @param data [String] Data to add
|
|
51
|
+
# @param hlp_path [String] Path within QuickHelp archive
|
|
52
|
+
# @param compress [Boolean] Whether to compress the data
|
|
53
|
+
# @return [void]
|
|
54
|
+
def add_data(data, hlp_path, compress: true)
|
|
55
|
+
@files << {
|
|
56
|
+
data: data,
|
|
57
|
+
hlp_path: hlp_path,
|
|
58
|
+
compress: compress,
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Generate HLP archive
|
|
63
|
+
#
|
|
64
|
+
# @param output_file [String] Path to output HLP file
|
|
65
|
+
# @param options [Hash] Compression options
|
|
66
|
+
# @option options [Integer] :version QuickHelp format version (default: 2)
|
|
67
|
+
# @option options [String] :database_name Database name for external links (max 13 chars)
|
|
68
|
+
# @option options [Integer] :control_character Control character (default: 0x3A ':')
|
|
69
|
+
# @option options [Boolean] :case_sensitive Case-sensitive contexts (default: false)
|
|
70
|
+
# @return [Integer] Bytes written to output file
|
|
71
|
+
# @raise [Cabriolet::CompressionError] if compression fails
|
|
72
|
+
def generate(output_file, **options)
|
|
73
|
+
version = options.fetch(:version, 2)
|
|
74
|
+
database_name = options.fetch(:database_name, "")
|
|
75
|
+
control_char = options.fetch(:control_character, 0x3A) # ':'
|
|
76
|
+
case_sensitive = options.fetch(:case_sensitive, false)
|
|
77
|
+
|
|
78
|
+
raise ArgumentError, "No files added to archive" if @files.empty?
|
|
79
|
+
raise ArgumentError, "Version must be 2" unless version == 2
|
|
80
|
+
|
|
81
|
+
if database_name.length > 13
|
|
82
|
+
raise ArgumentError,
|
|
83
|
+
"Database name too long (max 13 chars)"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Prepare topics from files
|
|
87
|
+
topics = prepare_topics
|
|
88
|
+
|
|
89
|
+
# Build QuickHelp structure
|
|
90
|
+
structure_builder = StructureBuilder.new(
|
|
91
|
+
version: version,
|
|
92
|
+
database_name: database_name,
|
|
93
|
+
control_char: control_char,
|
|
94
|
+
case_sensitive: case_sensitive,
|
|
95
|
+
)
|
|
96
|
+
qh_structure = structure_builder.build(topics)
|
|
97
|
+
|
|
98
|
+
# Write to output file
|
|
99
|
+
output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
|
|
100
|
+
begin
|
|
101
|
+
file_writer = FileWriter.new(@io_system)
|
|
102
|
+
bytes_written = file_writer.write_quickhelp_file(output_handle,
|
|
103
|
+
qh_structure)
|
|
104
|
+
bytes_written
|
|
105
|
+
ensure
|
|
106
|
+
@io_system.close(output_handle)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
# Prepare topics from added files
|
|
113
|
+
#
|
|
114
|
+
# @return [Array<Hash>] Topic information
|
|
115
|
+
def prepare_topics
|
|
116
|
+
@files.map.with_index do |file_spec, index|
|
|
117
|
+
# Get source data
|
|
118
|
+
data = file_spec[:data] || read_file_data(file_spec[:source])
|
|
119
|
+
|
|
120
|
+
{
|
|
121
|
+
index: index,
|
|
122
|
+
text: data,
|
|
123
|
+
context: file_spec[:hlp_path],
|
|
124
|
+
compress: file_spec[:compress],
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Read file data from disk
|
|
130
|
+
#
|
|
131
|
+
# @param filename [String] Path to file
|
|
132
|
+
# @return [String] File contents
|
|
133
|
+
def read_file_data(filename)
|
|
134
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
135
|
+
begin
|
|
136
|
+
data = +""
|
|
137
|
+
loop do
|
|
138
|
+
chunk = @io_system.read(handle, DEFAULT_BUFFER_SIZE)
|
|
139
|
+
break if chunk.empty?
|
|
140
|
+
|
|
141
|
+
data << chunk
|
|
142
|
+
end
|
|
143
|
+
data
|
|
144
|
+
ensure
|
|
145
|
+
@io_system.close(handle)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../system/io_system"
|
|
4
|
+
require_relative "../../constants"
|
|
5
|
+
require_relative "huffman_tree"
|
|
6
|
+
require_relative "huffman_stream"
|
|
7
|
+
require_relative "compression_stream"
|
|
8
|
+
|
|
9
|
+
module Cabriolet
|
|
10
|
+
module HLP
|
|
11
|
+
module QuickHelp
|
|
12
|
+
# Decompressor for QuickHelp (.HLP) files
|
|
13
|
+
#
|
|
14
|
+
# Extracts and decompresses topics from QuickHelp databases.
|
|
15
|
+
# Topics can be extracted by index or context string.
|
|
16
|
+
#
|
|
17
|
+
# Each topic contains formatted text lines with:
|
|
18
|
+
# - Text content
|
|
19
|
+
# - Style attributes (bold, italic, underline)
|
|
20
|
+
# - Hyperlinks to other topics or external contexts
|
|
21
|
+
# - Control commands (title, popup, etc.)
|
|
22
|
+
class Decompressor
|
|
23
|
+
attr_reader :io_system, :parser
|
|
24
|
+
attr_accessor :buffer_size
|
|
25
|
+
|
|
26
|
+
# Input buffer size for decompression
|
|
27
|
+
DEFAULT_BUFFER_SIZE = 2048
|
|
28
|
+
|
|
29
|
+
# Initialize a new HLP decompressor
|
|
30
|
+
#
|
|
31
|
+
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for
|
|
32
|
+
# default
|
|
33
|
+
# @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
|
|
34
|
+
def initialize(io_system = nil, algorithm_factory = nil)
|
|
35
|
+
@io_system = io_system || System::IOSystem.new
|
|
36
|
+
@algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
|
|
37
|
+
@parser = Parser.new(@io_system)
|
|
38
|
+
@buffer_size = DEFAULT_BUFFER_SIZE
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Open and parse an HLP file
|
|
42
|
+
#
|
|
43
|
+
# @param filename [String] Path to the HLP file
|
|
44
|
+
# @return [Models::HLPHeader] Parsed header with topics
|
|
45
|
+
# @raise [Errors::ParseError] if the file is not a valid HLP
|
|
46
|
+
def open(filename)
|
|
47
|
+
header = @parser.parse(filename)
|
|
48
|
+
header.filename = filename
|
|
49
|
+
header
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Close an HLP file (no-op for compatibility)
|
|
53
|
+
#
|
|
54
|
+
# @param _header [Models::HLPHeader] Header to close
|
|
55
|
+
# @return [void]
|
|
56
|
+
def close(_header)
|
|
57
|
+
# No resources to free in the header itself
|
|
58
|
+
# File handles are managed separately during extraction
|
|
59
|
+
nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Extract a file (topic) from HLP archive
|
|
63
|
+
#
|
|
64
|
+
# This is a wrapper around extract_topic_text for API consistency
|
|
65
|
+
# with other format decompressors.
|
|
66
|
+
#
|
|
67
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
68
|
+
# @param hlp_file [Models::HLPFile] File entry to extract
|
|
69
|
+
# @param output_path [String] Path to write extracted content
|
|
70
|
+
# @return [void]
|
|
71
|
+
# @raise [ArgumentError] if parameters are invalid
|
|
72
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
73
|
+
def extract_file(header, hlp_file, output_path)
|
|
74
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
75
|
+
raise ArgumentError, "HLP file must not be nil" unless hlp_file
|
|
76
|
+
raise ArgumentError, "Output path must not be nil" unless output_path
|
|
77
|
+
|
|
78
|
+
# Find topic by file index
|
|
79
|
+
topic = header.topics[hlp_file.index] if hlp_file.respond_to?(:index)
|
|
80
|
+
if hlp_file.respond_to?(:offset)
|
|
81
|
+
topic ||= header.topics.find do |t|
|
|
82
|
+
t.offset == hlp_file.offset
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
unless topic
|
|
87
|
+
raise Errors::DecompressionError, "Topic not found for file"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Extract topic text
|
|
91
|
+
content = extract_topic_text(header, topic)
|
|
92
|
+
|
|
93
|
+
# Write to output file
|
|
94
|
+
File.write(output_path, content)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Extract a file (topic) to memory
|
|
98
|
+
#
|
|
99
|
+
# This is a wrapper around extract_topic_text for API consistency
|
|
100
|
+
# with other format decompressors.
|
|
101
|
+
#
|
|
102
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
103
|
+
# @param hlp_file [Models::HLPFile] File entry to extract
|
|
104
|
+
# @return [String] Extracted content
|
|
105
|
+
# @raise [ArgumentError] if parameters are invalid
|
|
106
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
107
|
+
def extract_file_to_memory(header, hlp_file)
|
|
108
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
109
|
+
raise ArgumentError, "HLP file must not be nil" unless hlp_file
|
|
110
|
+
|
|
111
|
+
# Find topic by file index
|
|
112
|
+
topic = header.topics[hlp_file.index] if hlp_file.respond_to?(:index)
|
|
113
|
+
if hlp_file.respond_to?(:offset)
|
|
114
|
+
topic ||= header.topics.find do |t|
|
|
115
|
+
t.offset == hlp_file.offset
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
unless topic
|
|
120
|
+
raise Errors::DecompressionError, "Topic not found for file"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Extract and return topic text
|
|
124
|
+
extract_topic_text(header, topic)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Extract topic text by topic index
|
|
128
|
+
#
|
|
129
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
130
|
+
# @param topic_index [Integer] Zero-based topic index
|
|
131
|
+
# @return [String] Plain text content of the topic
|
|
132
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
133
|
+
def extract_topic_by_index(header, topic_index)
|
|
134
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
135
|
+
|
|
136
|
+
if topic_index.negative? || topic_index >= header.topic_count
|
|
137
|
+
raise ArgumentError,
|
|
138
|
+
"Topic index out of range"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
topic = header.topics[topic_index]
|
|
142
|
+
extract_topic_text(header, topic)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Extract topic text by context string
|
|
146
|
+
#
|
|
147
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
148
|
+
# @param context_string [String] Context string to look up
|
|
149
|
+
# @return [String, nil] Plain text content of the topic, or nil if not found
|
|
150
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
151
|
+
def extract_topic_by_context(header, context_string)
|
|
152
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
153
|
+
|
|
154
|
+
unless context_string
|
|
155
|
+
raise ArgumentError,
|
|
156
|
+
"Context string must not be nil"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Find topic index from context map
|
|
160
|
+
topic_index = find_topic_index(header, context_string)
|
|
161
|
+
return nil unless topic_index
|
|
162
|
+
|
|
163
|
+
extract_topic_by_index(header, topic_index)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Extract and parse topic text with formatting
|
|
167
|
+
#
|
|
168
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
169
|
+
# @param topic [Models::HLPTopic] Topic to extract
|
|
170
|
+
# @return [String] Plain text content of the topic
|
|
171
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
172
|
+
def extract_topic_text(header, topic)
|
|
173
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
174
|
+
raise ArgumentError, "Topic must not be nil" unless topic
|
|
175
|
+
|
|
176
|
+
# Decompress and parse topic
|
|
177
|
+
decompressed_data = decompress_topic(header, topic)
|
|
178
|
+
parse_topic_text(topic, decompressed_data, header.control_char)
|
|
179
|
+
|
|
180
|
+
topic.plain_text
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Extract all topics to a directory
|
|
184
|
+
#
|
|
185
|
+
# @param header [Models::HLPHeader] HLP header from open()
|
|
186
|
+
# @param output_dir [String] Directory to extract topics to
|
|
187
|
+
# @return [Integer] Number of topics extracted
|
|
188
|
+
# @raise [Errors::DecompressionError] if extraction fails
|
|
189
|
+
def extract_all(header, output_dir)
|
|
190
|
+
raise ArgumentError, "Header must not be nil" unless header
|
|
191
|
+
|
|
192
|
+
unless output_dir
|
|
193
|
+
raise ArgumentError,
|
|
194
|
+
"Output directory must not be nil"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Create output directory if needed
|
|
198
|
+
FileUtils.mkdir_p(output_dir)
|
|
199
|
+
|
|
200
|
+
extracted = 0
|
|
201
|
+
header.topics.each_with_index do |topic, index|
|
|
202
|
+
# Decompress and parse topic
|
|
203
|
+
decompressed_data = decompress_topic(header, topic)
|
|
204
|
+
parse_topic_text(topic, decompressed_data, header.control_char)
|
|
205
|
+
|
|
206
|
+
# Write topic to file
|
|
207
|
+
output_path = ::File.join(output_dir,
|
|
208
|
+
"topic_#{index.to_s.rjust(4, '0')}.txt")
|
|
209
|
+
File.write(output_path, topic.plain_text)
|
|
210
|
+
extracted += 1
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
extracted
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
|
|
218
|
+
# Find topic index from context string
|
|
219
|
+
#
|
|
220
|
+
# @param header [Models::HLPHeader] Header with context data
|
|
221
|
+
# @param context_string [String] Context string to look up
|
|
222
|
+
# @return [Integer, nil] Topic index or nil if not found
|
|
223
|
+
def find_topic_index(header, context_string)
|
|
224
|
+
# Case-sensitive or case-insensitive comparison
|
|
225
|
+
comparer = if header.case_sensitive?
|
|
226
|
+
->(a, b) { a == b }
|
|
227
|
+
else
|
|
228
|
+
->(a, b) {
|
|
229
|
+
a.downcase == b.downcase
|
|
230
|
+
}
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
header.contexts.each_with_index do |ctx, idx|
|
|
234
|
+
return header.context_map[idx] if comparer.call(ctx, context_string)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
nil
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Decompress a topic from the HLP file
|
|
241
|
+
#
|
|
242
|
+
# @param header [Models::HLPHeader] HLP header with compression info
|
|
243
|
+
# @param topic [Models::HLPTopic] Topic to decompress
|
|
244
|
+
# @return [String] Binary decompressed topic data
|
|
245
|
+
# @raise [Cabriolet::DecompressionError] if decompression fails
|
|
246
|
+
def decompress_topic(header, topic)
|
|
247
|
+
handle = @io_system.open(header.filename, Constants::MODE_READ)
|
|
248
|
+
|
|
249
|
+
begin
|
|
250
|
+
# Seek to topic data
|
|
251
|
+
@io_system.seek(handle, header.topic_text_offset + topic.offset, Constants::SEEK_START)
|
|
252
|
+
|
|
253
|
+
# Read compressed topic data
|
|
254
|
+
compressed_data = @io_system.read(handle, topic.size)
|
|
255
|
+
|
|
256
|
+
# Parse decompressed length (first 2 bytes)
|
|
257
|
+
if compressed_data.bytesize < 2
|
|
258
|
+
raise Cabriolet::DecompressionError,
|
|
259
|
+
"Topic data too short for decompressed length"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
decompressed_length = compressed_data[0, 2].unpack1("v")
|
|
263
|
+
encoded_data = compressed_data[2..]
|
|
264
|
+
|
|
265
|
+
# Step 1: Huffman decoding (if tree present)
|
|
266
|
+
compact_data = if header.has_huffman?
|
|
267
|
+
huffman_decode(encoded_data, header)
|
|
268
|
+
else
|
|
269
|
+
encoded_data
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Step 2: Keyword decompression (if keywords present)
|
|
273
|
+
decompress_data(compact_data, decompressed_length, header)
|
|
274
|
+
ensure
|
|
275
|
+
@io_system.close(handle) if handle
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Huffman decode compressed data
|
|
280
|
+
#
|
|
281
|
+
# @param data [String] Binary Huffman-encoded data
|
|
282
|
+
# @param header [Models::HLPHeader] Header with Huffman tree
|
|
283
|
+
# @return [String] Binary Huffman-decoded data
|
|
284
|
+
def huffman_decode(data, header)
|
|
285
|
+
tree = HuffmanTree.deserialize(header.huffman_tree)
|
|
286
|
+
huffman_stream = HuffmanStream.new(data, tree)
|
|
287
|
+
|
|
288
|
+
# Read until EOF
|
|
289
|
+
result = String.new(encoding: Encoding::BINARY)
|
|
290
|
+
loop do
|
|
291
|
+
chunk = huffman_stream.read(1024)
|
|
292
|
+
break if chunk.empty?
|
|
293
|
+
|
|
294
|
+
result << chunk
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
result
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Decompress data using keyword compression
|
|
301
|
+
#
|
|
302
|
+
# @param data [String] Binary compact data
|
|
303
|
+
# @param output_length [Integer] Expected decompressed length
|
|
304
|
+
# @param header [Models::HLPHeader] Header with keywords
|
|
305
|
+
# @return [String] Binary decompressed data
|
|
306
|
+
def decompress_data(data, output_length, header)
|
|
307
|
+
# Always use CompressionStream to decode escape sequences
|
|
308
|
+
# (0x1A followed by a byte makes that byte literal)
|
|
309
|
+
compression_stream = CompressionStream.new(data,
|
|
310
|
+
header.keywords || [])
|
|
311
|
+
compression_stream.read(output_length)
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Parse topic text from decompressed binary data
|
|
315
|
+
#
|
|
316
|
+
# @param topic [Models::HLPTopic] Topic to populate
|
|
317
|
+
# @param data [String] Binary decompressed topic data
|
|
318
|
+
# @param control_char [String] Control character for commands
|
|
319
|
+
# @return [void]
|
|
320
|
+
def parse_topic_text(topic, data, control_char)
|
|
321
|
+
topic.lines = []
|
|
322
|
+
topic.source_data = data
|
|
323
|
+
pos = 0
|
|
324
|
+
|
|
325
|
+
while pos < data.bytesize
|
|
326
|
+
# Parse a line
|
|
327
|
+
line, bytes_read = parse_line(data, pos)
|
|
328
|
+
pos += bytes_read
|
|
329
|
+
|
|
330
|
+
# Check if line is a command
|
|
331
|
+
unless process_command(line, control_char, topic)
|
|
332
|
+
# Not a command, add to topic
|
|
333
|
+
topic.add_line(line)
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Parse a single line from topic data
|
|
339
|
+
#
|
|
340
|
+
# Format: [text_length][text][newline][attr_length][attrs][0xFF terminator]
|
|
341
|
+
# - text_length: includes text + newline = text_bytes + 1
|
|
342
|
+
# - text: the actual line content (without newline)
|
|
343
|
+
# - newline: 0x0D carriage return
|
|
344
|
+
# - attr_length: includes attrs + terminator = attrs_bytes + 1
|
|
345
|
+
# - attrs: attribute data (without terminator)
|
|
346
|
+
#
|
|
347
|
+
# @param data [String] Binary topic data
|
|
348
|
+
# @param offset [Integer] Offset to start reading
|
|
349
|
+
# @return [Array<Models::HLPLine, Integer>] Parsed line and bytes read
|
|
350
|
+
# @raise [Cabriolet::DecompressionError] if parsing fails
|
|
351
|
+
def parse_line(data, offset)
|
|
352
|
+
pos = offset
|
|
353
|
+
|
|
354
|
+
# Read text length byte
|
|
355
|
+
text_length = data.getbyte(pos)
|
|
356
|
+
if text_length.nil?
|
|
357
|
+
raise Cabriolet::DecompressionError,
|
|
358
|
+
"Unexpected EOF reading text length"
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
pos += 1
|
|
362
|
+
|
|
363
|
+
# Read text (text_length - 2 bytes: -1 for len byte, -1 for newline)
|
|
364
|
+
# text_length includes text + newline, so text = text_length - 1 bytes
|
|
365
|
+
# But we want to exclude newline from the actual text content
|
|
366
|
+
text_bytes = text_length - 2
|
|
367
|
+
if pos + text_bytes > data.bytesize
|
|
368
|
+
raise Cabriolet::DecompressionError, "Unexpected EOF reading text"
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
text = data[pos, text_bytes].force_encoding(Encoding::ASCII)
|
|
372
|
+
pos += text_bytes
|
|
373
|
+
|
|
374
|
+
# Skip newline byte
|
|
375
|
+
newline = data.getbyte(pos)
|
|
376
|
+
if newline.nil?
|
|
377
|
+
raise Cabriolet::DecompressionError,
|
|
378
|
+
"Unexpected EOF reading newline"
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
pos += 1
|
|
382
|
+
|
|
383
|
+
# Create line with text
|
|
384
|
+
line = Models::HLPLine.new(text)
|
|
385
|
+
|
|
386
|
+
# Read attribute length byte
|
|
387
|
+
attr_length = data.getbyte(pos)
|
|
388
|
+
if attr_length.nil?
|
|
389
|
+
raise Cabriolet::DecompressionError,
|
|
390
|
+
"Unexpected EOF reading attribute length"
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
pos += 1
|
|
394
|
+
|
|
395
|
+
# Read attribute data (length-1 bytes, excluding terminator)
|
|
396
|
+
attr_bytes = attr_length - 1
|
|
397
|
+
if pos + attr_bytes > data.bytesize
|
|
398
|
+
raise Cabriolet::DecompressionError,
|
|
399
|
+
"Unexpected EOF reading attributes"
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
attr_data = data[pos, attr_bytes]
|
|
403
|
+
pos += attr_bytes
|
|
404
|
+
|
|
405
|
+
# Skip terminator byte if present
|
|
406
|
+
if pos < data.bytesize
|
|
407
|
+
terminator = data.getbyte(pos)
|
|
408
|
+
pos += 1 if terminator == 0xFF
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Parse attributes and hyperlinks
|
|
412
|
+
parse_line_attributes(line, attr_data)
|
|
413
|
+
|
|
414
|
+
bytes_read = pos - offset
|
|
415
|
+
[line, bytes_read]
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Parse line attributes and hyperlinks
|
|
419
|
+
#
|
|
420
|
+
# @param line [Models::HLPLine] Line to populate with attributes
|
|
421
|
+
# @param attr_data [String] Binary attribute data
|
|
422
|
+
# @return [void]
|
|
423
|
+
def parse_line_attributes(line, attr_data)
|
|
424
|
+
pos = 0
|
|
425
|
+
char_index = 0
|
|
426
|
+
|
|
427
|
+
# Parse style attributes
|
|
428
|
+
while pos < attr_data.bytesize
|
|
429
|
+
# Check for end of attributes marker (0xFF)
|
|
430
|
+
break if attr_data.getbyte(pos) == 0xFF
|
|
431
|
+
|
|
432
|
+
# Read style byte (default for first chunk)
|
|
433
|
+
style = if char_index.zero?
|
|
434
|
+
Binary::HLPStructures::TextStyle::NONE
|
|
435
|
+
else
|
|
436
|
+
attr_data.getbyte(pos)
|
|
437
|
+
pos += 1
|
|
438
|
+
break if pos >= attr_data.bytesize # No length byte
|
|
439
|
+
|
|
440
|
+
attr_data.getbyte(pos - 1)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
# Read chunk length
|
|
444
|
+
if pos >= attr_data.bytesize
|
|
445
|
+
break
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
chunk_length = attr_data.getbyte(pos)
|
|
449
|
+
pos += 1
|
|
450
|
+
|
|
451
|
+
# Apply style to characters
|
|
452
|
+
chunk_length = [chunk_length, line.length - char_index].min
|
|
453
|
+
line.apply_style(char_index, char_index + chunk_length - 1, style)
|
|
454
|
+
char_index += chunk_length
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Skip 0xFF marker if present
|
|
458
|
+
pos += 1 if pos < attr_data.bytesize && attr_data.getbyte(pos) == 0xFF
|
|
459
|
+
|
|
460
|
+
# Parse hyperlinks
|
|
461
|
+
while pos < attr_data.bytesize
|
|
462
|
+
# Read link start (1-based)
|
|
463
|
+
link_start = attr_data.getbyte(pos)
|
|
464
|
+
pos += 1
|
|
465
|
+
break if pos >= attr_data.bytesize
|
|
466
|
+
|
|
467
|
+
# Read link end (1-based)
|
|
468
|
+
link_end = attr_data.getbyte(pos)
|
|
469
|
+
pos += 1
|
|
470
|
+
|
|
471
|
+
# Validate link position
|
|
472
|
+
if link_start.zero? || link_start > link_end
|
|
473
|
+
raise Cabriolet::DecompressionError, "Invalid hyperlink position"
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
# Read NULL-terminated context string
|
|
477
|
+
context_end = attr_data.index("\x00", pos)
|
|
478
|
+
if context_end.nil?
|
|
479
|
+
# No more data
|
|
480
|
+
break
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
context_string = attr_data[pos, context_end - pos]
|
|
484
|
+
pos = context_end + 1
|
|
485
|
+
|
|
486
|
+
# Check for numeric link
|
|
487
|
+
if context_string.empty? && pos + 1 < attr_data.bytesize
|
|
488
|
+
# Read WORD for numeric topic index
|
|
489
|
+
numeric_context = attr_data[pos, 2].unpack1("v")
|
|
490
|
+
pos += 2
|
|
491
|
+
context_string = "@L#{format('%04X', numeric_context)}"
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Apply link to line
|
|
495
|
+
line.apply_link(link_start, link_end, context_string)
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
# Process command line
|
|
500
|
+
#
|
|
501
|
+
# @param line [Models::HLPLine] Line to check
|
|
502
|
+
# @param control_char [String] Control character
|
|
503
|
+
# @param topic [Models::HLPTopic] Topic being parsed
|
|
504
|
+
# @return [Boolean] true if line was a command
|
|
505
|
+
def process_command(line, control_char, topic)
|
|
506
|
+
text = line.text
|
|
507
|
+
return false if text.empty?
|
|
508
|
+
return false unless text[0] == control_char
|
|
509
|
+
|
|
510
|
+
# Parse command
|
|
511
|
+
return false if text.length < 2
|
|
512
|
+
|
|
513
|
+
command_char = text[1]
|
|
514
|
+
parameter = text.length > 2 ? text[2..] : ""
|
|
515
|
+
|
|
516
|
+
# Execute command
|
|
517
|
+
case command_char
|
|
518
|
+
when "n" # :n - Topic title
|
|
519
|
+
topic.metadata[:title] = parameter
|
|
520
|
+
when "l" # :l - Window length
|
|
521
|
+
topic.metadata[:window_height] = parameter.to_i
|
|
522
|
+
when "z" # :z - Freeze height
|
|
523
|
+
topic.metadata[:freeze_height] = parameter.to_i
|
|
524
|
+
when "g" # :g - Popup
|
|
525
|
+
topic.metadata[:popup] = true
|
|
526
|
+
when "i" # :i - List
|
|
527
|
+
topic.metadata[:list] = true
|
|
528
|
+
when "x" # :x - Hidden/Command
|
|
529
|
+
topic.metadata[:hidden] = true
|
|
530
|
+
when "u" # :u - Raw
|
|
531
|
+
topic.metadata[:raw] = true
|
|
532
|
+
when "c" # :c - Category
|
|
533
|
+
topic.metadata[:category] = parameter
|
|
534
|
+
when ">" # :> - Next topic
|
|
535
|
+
topic.metadata[:next] = parameter
|
|
536
|
+
when "<" # :< - Previous topic
|
|
537
|
+
topic.metadata[:previous] = parameter
|
|
538
|
+
when "r" # :r - References
|
|
539
|
+
topic.metadata[:references] = parameter.split(",").map(&:strip)
|
|
540
|
+
when "y" # :y - Execute command
|
|
541
|
+
topic.metadata[:execute] = parameter
|
|
542
|
+
when "p" # :p - Paste section
|
|
543
|
+
topic.metadata[:paste] = parameter
|
|
544
|
+
when "e" # :e - End paste section
|
|
545
|
+
topic.metadata[:end_paste] = true
|
|
546
|
+
when "m" # :m - Mark
|
|
547
|
+
topic.metadata[:mark] = parameter
|
|
548
|
+
else
|
|
549
|
+
# Unknown command, treat as text
|
|
550
|
+
return false
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
true # Command was processed
|
|
554
|
+
end
|
|
555
|
+
end
|
|
556
|
+
end
|
|
557
|
+
end
|
|
558
|
+
end
|