cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +700 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +154 -14
  6. data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +35 -43
  13. data/lib/cabriolet/cab/decompressor.rb +14 -19
  14. data/lib/cabriolet/cab/extractor.rb +140 -31
  15. data/lib/cabriolet/chm/command_handler.rb +227 -0
  16. data/lib/cabriolet/chm/compressor.rb +7 -3
  17. data/lib/cabriolet/chm/decompressor.rb +39 -21
  18. data/lib/cabriolet/chm/parser.rb +5 -2
  19. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  20. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  21. data/lib/cabriolet/cli/command_registry.rb +83 -0
  22. data/lib/cabriolet/cli.rb +356 -607
  23. data/lib/cabriolet/compressors/base.rb +1 -1
  24. data/lib/cabriolet/compressors/lzx.rb +241 -54
  25. data/lib/cabriolet/compressors/mszip.rb +35 -3
  26. data/lib/cabriolet/compressors/quantum.rb +34 -45
  27. data/lib/cabriolet/decompressors/base.rb +1 -1
  28. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  29. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  30. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  31. data/lib/cabriolet/decompressors/quantum.rb +3 -2
  32. data/lib/cabriolet/errors.rb +3 -0
  33. data/lib/cabriolet/file_entry.rb +156 -0
  34. data/lib/cabriolet/file_manager.rb +144 -0
  35. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  36. data/lib/cabriolet/hlp/compressor.rb +28 -238
  37. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  38. data/lib/cabriolet/hlp/parser.rb +52 -101
  39. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  40. data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
  41. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  42. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  43. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  44. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  45. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  46. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  47. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  48. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  49. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  50. data/lib/cabriolet/huffman/tree.rb +85 -1
  51. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  52. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  53. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  54. data/lib/cabriolet/lit/command_handler.rb +221 -0
  55. data/lib/cabriolet/lit/compressor.rb +633 -38
  56. data/lib/cabriolet/lit/decompressor.rb +518 -152
  57. data/lib/cabriolet/lit/parser.rb +670 -0
  58. data/lib/cabriolet/models/hlp_file.rb +130 -29
  59. data/lib/cabriolet/models/hlp_header.rb +105 -17
  60. data/lib/cabriolet/models/lit_header.rb +212 -25
  61. data/lib/cabriolet/models/szdd_header.rb +10 -2
  62. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  63. data/lib/cabriolet/oab/command_handler.rb +257 -0
  64. data/lib/cabriolet/oab/compressor.rb +17 -8
  65. data/lib/cabriolet/oab/decompressor.rb +41 -10
  66. data/lib/cabriolet/offset_calculator.rb +81 -0
  67. data/lib/cabriolet/plugin.rb +233 -0
  68. data/lib/cabriolet/plugin_manager.rb +453 -0
  69. data/lib/cabriolet/plugin_validator.rb +422 -0
  70. data/lib/cabriolet/system/io_system.rb +3 -0
  71. data/lib/cabriolet/system/memory_handle.rb +17 -4
  72. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  73. data/lib/cabriolet/szdd/compressor.rb +15 -11
  74. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  75. data/lib/cabriolet/version.rb +1 -1
  76. data/lib/cabriolet.rb +67 -17
  77. metadata +33 -2
@@ -1,19 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "quickhelp/parser"
4
+ require_relative "winhelp/parser"
5
+
3
6
  module Cabriolet
4
7
  module HLP
5
- # Parser for HLP (Windows Help) files
8
+ # Main parser for HLP files
6
9
  #
7
- # NOTE: This implementation is based on the knowledge that HLP files use
8
- # LZSS compression with MODE_MSHELP, but cannot be fully validated due to
9
- # lack of test fixtures and incomplete libmspack implementation.
10
+ # Detects the HLP format variant and delegates to the appropriate parser:
11
+ # - QuickHelp (DOS format with "LN" signature)
12
+ # - Windows Help (WinHelp 3.x/4.x format)
10
13
  class Parser
11
14
  attr_reader :io_system
12
15
 
13
16
  # Initialize parser
14
17
  #
15
- # @param io_system [System::IOSystem, nil] Custom I/O system or nil for
16
- # default
18
+ # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
17
19
  def initialize(io_system = nil)
18
20
  @io_system = io_system || System::IOSystem.new
19
21
  end
@@ -21,111 +23,60 @@ module Cabriolet
21
23
  # Parse an HLP file
22
24
  #
23
25
  # @param filename [String] Path to HLP file
24
- # @return [Models::HLPHeader] Parsed header
25
- # @raise [Errors::ParseError] if file is not valid HLP
26
+ # @return [Models::HLPHeader, Models::WinHelpHeader] Parsed header with metadata
27
+ # @raise [Cabriolet::ParseError] if file is not a valid HLP format
26
28
  def parse(filename)
27
- handle = @io_system.open(filename, Constants::MODE_READ)
28
-
29
- begin
30
- parse_header(handle)
31
- ensure
32
- @io_system.close(handle)
29
+ # Detect format
30
+ format = detect_format(filename)
31
+
32
+ # Dispatch to appropriate parser
33
+ case format
34
+ when :quickhelp
35
+ QuickHelp::Parser.new(@io_system).parse(filename)
36
+ when :winhelp
37
+ WinHelp::Parser.new(@io_system).parse(filename)
38
+ else
39
+ raise Cabriolet::ParseError,
40
+ "Unknown HLP format in file: #{filename}"
33
41
  end
34
42
  end
35
43
 
36
44
  private
37
45
 
38
- # Parse HLP header from file handle
39
- #
40
- # @param handle [System::FileHandle] Open file handle
41
- # @return [Models::HLPHeader] Parsed header with file list
42
- # @raise [Errors::ParseError] if header is invalid
43
- def parse_header(handle)
44
- # Read header structure
45
- header_data = @io_system.read(handle, 18)
46
- raise Errors::ParseError, "File too small for HLP header" if
47
- header_data.bytesize < 18
48
-
49
- binary_header = Binary::HLPStructures::Header.read(header_data)
50
-
51
- # Validate signature
52
- unless valid_signature?(binary_header.signature)
53
- raise Errors::ParseError,
54
- "Invalid HLP signature: #{binary_header.signature.inspect}"
55
- end
56
-
57
- # Create header model
58
- header = Models::HLPHeader.new(
59
- magic: binary_header.signature,
60
- version: binary_header.version,
61
- length: 0,
62
- )
63
-
64
- # Parse file directory if present
65
- if binary_header.file_count.positive? &&
66
- binary_header.directory_offset.positive?
67
- parse_directory(handle, header, binary_header)
68
- end
69
-
70
- header
71
- end
72
-
73
- # Parse file directory
46
+ # Detect HLP format variant
74
47
  #
75
- # @param handle [System::FileHandle] Open file handle
76
- # @param header [Models::HLPHeader] Header to populate
77
- # @param binary_header [Binary::HLPStructures::Header] Binary header
78
- # @return [void]
79
- def parse_directory(handle, header, binary_header)
80
- # Seek to directory
81
- @io_system.seek(
82
- handle,
83
- binary_header.directory_offset,
84
- Constants::SEEK_START,
85
- )
86
-
87
- # Read each file entry
88
- binary_header.file_count.times do
89
- # Read filename length
90
- length_data = @io_system.read(handle, 4)
91
- break if length_data.bytesize < 4
92
-
93
- filename_length = length_data.unpack1("V")
94
- next if filename_length.zero? || filename_length > 1024
95
-
96
- # Read filename
97
- filename = @io_system.read(handle, filename_length)
98
- next if filename.bytesize != filename_length
99
-
100
- # Read rest of entry (offset, sizes, compression flag)
101
- metadata_data = @io_system.read(handle, 13)
102
- next if metadata_data.bytesize < 13
103
-
104
- offset, uncompressed_size, compressed_size, compression_flag =
105
- metadata_data.unpack("V3C")
106
-
107
- # Create file model
108
- file = Models::HLPFile.new(
109
- filename: filename.force_encoding("ASCII-8BIT"),
110
- offset: offset,
111
- length: uncompressed_size,
112
- compressed_length: compressed_size,
113
- compressed: compression_flag != 0,
114
- )
48
+ # @param filename [String] Path to HLP file
49
+ # @return [Symbol] :quickhelp or :winhelp
50
+ # @raise [Cabriolet::ParseError] if format cannot be determined
51
+ def detect_format(filename)
52
+ handle = @io_system.open(filename, Constants::MODE_READ)
115
53
 
116
- header.files << file
54
+ begin
55
+ # Read first 4 bytes to check signature
56
+ sig_data = @io_system.read(handle, 4)
57
+
58
+ # Check QuickHelp signature ("LN" at offset 0)
59
+ if sig_data[0..1] == Binary::HLPStructures::SIGNATURE
60
+ return :quickhelp
61
+ end
62
+
63
+ # Check WinHelp 3.x magic (0x35F3 at offset 0, 16-bit)
64
+ magic_word = sig_data[0..1].unpack1("v")
65
+ return :winhelp if magic_word == 0x35F3
66
+
67
+ # Check WinHelp 4.x magic (0x5F3F or 0x3F5F in lower 16 bits of 32-bit value)
68
+ magic_dword = sig_data.unpack1("V")
69
+ return :winhelp if (magic_dword & 0xFFFF) == 0x5F3F || (magic_dword & 0xFFFF) == 0x3F5F
70
+
71
+ # Unknown format
72
+ raise Cabriolet::ParseError,
73
+ "Unknown HLP signature: #{sig_data.bytes.map do |b|
74
+ format('0x%02X', b)
75
+ end.join(' ')}"
76
+ ensure
77
+ @io_system.close(handle)
117
78
  end
118
79
  end
119
-
120
- # Check if signature is valid HLP
121
- #
122
- # @param signature [String] Signature bytes
123
- # @return [Boolean] true if valid
124
- def valid_signature?(_signature)
125
- # Accept the placeholder signature or other common HLP signatures
126
- # For now, accept any signature since we're testing without real fixtures
127
- true
128
- end
129
80
  end
130
81
  end
131
82
  end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../binary/bitstream"
4
+
5
+ module Cabriolet
6
+ module HLP
7
+ module QuickHelp
8
+ # Compression stream decoder for QuickHelp topics
9
+ #
10
+ # Handles dictionary substitution (keyword compression) and run-length
11
+ # encoding as specified in the QuickHelp format.
12
+ #
13
+ # Control bytes 0x10-0x1A have special meanings:
14
+ # - 0x10-0x17: Dictionary entry (with optional space append)
15
+ # - 0x18: Run of spaces
16
+ # - 0x19: Run of bytes
17
+ # - 0x1A: Escape byte
18
+ class CompressionStream
19
+ # Initialize compression stream decoder
20
+ #
21
+ # @param input [String, IO] Input data (compressed)
22
+ # @param keywords [Array<String>] Keyword dictionary
23
+ def initialize(input, keywords = [])
24
+ @input = input.is_a?(String) ? StringIO.new(input) : input
25
+ @keywords = keywords || []
26
+ @buffer = ""
27
+ @buffer_pos = 0
28
+ end
29
+
30
+ # Read bytes from the decompressed stream
31
+ #
32
+ # @param length [Integer] Number of bytes to read
33
+ # @return [String] Decompressed data
34
+ def read(length)
35
+ result = String.new(encoding: Encoding::BINARY)
36
+
37
+ while result.bytesize < length
38
+ # Fill buffer if needed
39
+ fill_buffer(length - result.bytesize) if @buffer_pos >= @buffer.bytesize
40
+
41
+ # Check for EOF
42
+ break if @buffer_pos >= @buffer.bytesize
43
+
44
+ # Copy from buffer
45
+ available = @buffer.bytesize - @buffer_pos
46
+ to_copy = [length - result.bytesize, available].min
47
+ result << @buffer[@buffer_pos, to_copy]
48
+ @buffer_pos += to_copy
49
+ end
50
+
51
+ result
52
+ end
53
+
54
+ # Check if at end of stream
55
+ #
56
+ # @return [Boolean] true if EOF
57
+ def eof?
58
+ @buffer_pos >= @buffer.bytesize && @input.eof?
59
+ end
60
+
61
+ private
62
+
63
+ # Fill internal buffer by decoding compressed data
64
+ #
65
+ # @param max_bytes [Integer] Maximum bytes to decode
66
+ def fill_buffer(max_bytes)
67
+ @buffer = String.new(encoding: Encoding::BINARY)
68
+ @buffer_pos = 0
69
+
70
+ # Decode until buffer has enough data or we hit EOF
71
+ while @buffer.bytesize <= 256 && @buffer.bytesize < max_bytes
72
+ byte = read_byte
73
+ break if byte.nil? # EOF
74
+
75
+ if byte < 0x10 || byte > 0x1A
76
+ # Regular value byte
77
+ @buffer << byte.chr
78
+ elsif byte == 0x1A
79
+ # Escape byte - next byte is literal
80
+ escaped = read_byte
81
+ if escaped.nil?
82
+ raise Cabriolet::DecompressionError,
83
+ "Unexpected EOF after escape byte"
84
+ end
85
+
86
+ @buffer << escaped.chr
87
+ elsif byte == 0x19
88
+ # Run of bytes: REPEAT-BYTE, REPEAT-COUNT
89
+ repeat_byte = read_byte
90
+ repeat_count = read_byte
91
+ if repeat_byte.nil? || repeat_count.nil?
92
+ raise Cabriolet::DecompressionError,
93
+ "Unexpected EOF in byte run"
94
+ end
95
+
96
+ @buffer << (repeat_byte.chr * repeat_count)
97
+ elsif byte == 0x18
98
+ # Run of spaces: SPACE-COUNT
99
+ space_count = read_byte
100
+ if space_count.nil?
101
+ raise Cabriolet::DecompressionError,
102
+ "Unexpected EOF in space run"
103
+ end
104
+
105
+ @buffer << (" " * space_count)
106
+ else
107
+ # Dictionary entry (0x10-0x17)
108
+ dict_index_low = read_byte
109
+ if dict_index_low.nil?
110
+ raise Cabriolet::DecompressionError,
111
+ "Unexpected EOF reading dictionary index"
112
+ end
113
+
114
+ # Extract append-space flag (bit 2) and index (bits 0-1 + next 8 bits)
115
+ append_space = byte.anybits?(0x04)
116
+ dict_index = ((byte & 0x03) << 8) | dict_index_low
117
+
118
+ if dict_index >= @keywords.length
119
+ raise Cabriolet::DecompressionError,
120
+ "Dictionary index #{dict_index} out of range (max #{@keywords.length - 1})"
121
+ end
122
+
123
+ @buffer << @keywords[dict_index]
124
+ @buffer << " " if append_space
125
+ end
126
+ end
127
+ end
128
+
129
+ # Read a single byte from input
130
+ #
131
+ # @return [Integer, nil] Byte value or nil on EOF
132
+ def read_byte
133
+ @input.getbyte
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end