cabriolet 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +3 -0
  3. data/lib/cabriolet/binary/bitstream.rb +32 -21
  4. data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
  5. data/lib/cabriolet/cab/compressor.rb +85 -53
  6. data/lib/cabriolet/cab/decompressor.rb +2 -1
  7. data/lib/cabriolet/cab/extractor.rb +2 -35
  8. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  9. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  10. data/lib/cabriolet/checksum.rb +49 -0
  11. data/lib/cabriolet/collections/file_collection.rb +175 -0
  12. data/lib/cabriolet/compressors/quantum.rb +3 -51
  13. data/lib/cabriolet/decompressors/quantum.rb +81 -52
  14. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  15. data/lib/cabriolet/extraction/extractor.rb +171 -0
  16. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  17. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  18. data/lib/cabriolet/format_base.rb +79 -0
  19. data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
  20. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  21. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  22. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  23. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  24. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  25. data/lib/cabriolet/huffman/encoder.rb +15 -12
  26. data/lib/cabriolet/lit/compressor.rb +45 -689
  27. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  28. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  29. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  30. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  31. data/lib/cabriolet/lit/header_writer.rb +124 -0
  32. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  33. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  34. data/lib/cabriolet/quantum_shared.rb +105 -0
  35. data/lib/cabriolet/version.rb +1 -1
  36. data/lib/cabriolet.rb +114 -3
  37. metadata +38 -4
  38. data/lib/cabriolet/auto.rb +0 -173
  39. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Calculates file offsets for QuickHelp structure
7
+ class OffsetCalculator
8
+ # Calculate all offsets in the file
9
+ #
10
+ # @param topics [Array<Hash>] Compressed topics
11
+ # @param contexts [Array<String>] Context strings
12
+ # @param context_map [Array<Integer>] Context topic indices
13
+ # @return [Hash] Calculated offsets
14
+ def self.calculate(topics:, contexts:, context_map:)
15
+ offsets = {}
16
+
17
+ # Start after file header (70 bytes = 2 signature + 68 header)
18
+ current_offset = 70
19
+
20
+ # Topic index: (topic_count + 1) * 4 bytes
21
+ offsets[:topic_index] = current_offset
22
+ topic_count = topics.size
23
+ current_offset += (topic_count + 1) * 4
24
+
25
+ # Context strings: sum of string lengths + null terminators
26
+ offsets[:context_strings] = current_offset
27
+ contexts.each do |ctx|
28
+ current_offset += ctx.bytesize + 1 # +1 for null terminator
29
+ end
30
+
31
+ # Context map: context_count * 2 bytes
32
+ offsets[:context_map] = current_offset
33
+ current_offset += context_map.size * 2
34
+
35
+ # Keywords: not implemented yet, set to 0
36
+ offsets[:keywords] = 0
37
+
38
+ # Huffman tree: not implemented yet, set to 0
39
+ offsets[:huffman_tree] = 0
40
+
41
+ # Topic text: starts after context map
42
+ offsets[:topic_text] = current_offset
43
+
44
+ # Calculate topic text offsets
45
+ offsets[:topic_offsets] = []
46
+ topics.each do |topic|
47
+ offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
48
+ current_offset += topic[:compressed_length]
49
+ end
50
+ # Add end marker
51
+ offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
52
+
53
+ # Total database size
54
+ offsets[:database_size] = current_offset
55
+
56
+ offsets
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "topic_compressor"
4
+ require_relative "offset_calculator"
5
+
6
+ module Cabriolet
7
+ module HLP
8
+ module QuickHelp
9
+ # Builds complete QuickHelp structure from file data
10
+ class StructureBuilder
11
+ attr_reader :version, :database_name, :control_char, :case_sensitive
12
+
13
+ # Initialize structure builder
14
+ #
15
+ # @param version [Integer] QuickHelp format version
16
+ # @param database_name [String] Database name for external links
17
+ # @param control_char [Integer] Control character
18
+ # @param case_sensitive [Boolean] Case-sensitive contexts
19
+ def initialize(version: 2, database_name: "", control_char: 0x3A,
20
+ case_sensitive: false)
21
+ @version = version
22
+ @database_name = database_name
23
+ @control_char = control_char
24
+ @case_sensitive = case_sensitive
25
+ end
26
+
27
+ # Build complete QuickHelp structure from topics
28
+ #
29
+ # @param topics [Array<Hash>] Topic data with :text, :context, :compress keys
30
+ # @return [Hash] Complete QuickHelp structure
31
+ def build(topics)
32
+ structure = {}
33
+
34
+ # Compress topics
35
+ structure[:topics] = compress_topics(topics)
36
+
37
+ # Build context data
38
+ structure[:contexts] = topics.map { |t| t[:context] }
39
+ structure[:context_map] = topics.map.with_index { |_t, i| i }
40
+
41
+ # Calculate offsets
42
+ structure[:offsets] = OffsetCalculator.calculate(
43
+ topics: structure[:topics],
44
+ contexts: structure[:contexts],
45
+ context_map: structure[:context_map],
46
+ )
47
+
48
+ # Build header
49
+ structure[:header] = build_header(structure)
50
+
51
+ structure
52
+ end
53
+
54
+ private
55
+
56
+ # Compress all topics
57
+ #
58
+ # @param topics [Array<Hash>] Topic data
59
+ # @return [Array<Hash>] Compressed topics
60
+ def compress_topics(topics)
61
+ topics.map do |topic|
62
+ if topic[:compress]
63
+ TopicCompressor.compress_topic(topic[:text])
64
+ else
65
+ TopicCompressor.store_uncompressed(topic[:text])
66
+ end
67
+ end
68
+ end
69
+
70
+ # Build file header
71
+ #
72
+ # @param structure [Hash] QuickHelp structure
73
+ # @return [Hash] Header information
74
+ def build_header(structure)
75
+ attributes = 0
76
+ attributes |= Binary::HLPStructures::Attributes::CASE_SENSITIVE if @case_sensitive
77
+
78
+ {
79
+ version: @version,
80
+ attributes: attributes,
81
+ control_character: @control_char,
82
+ topic_count: structure[:topics].size,
83
+ context_count: structure[:contexts].size,
84
+ display_width: 80,
85
+ predefined_ctx_count: 0,
86
+ database_name: @database_name.ljust(14, "\x00")[0, 14],
87
+ offsets: structure[:offsets],
88
+ }
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Builds topic data in QuickHelp internal format
7
+ class TopicBuilder
8
+ # Build topic data in QuickHelp internal format
9
+ #
10
+ # Topic format as expected by decompressor:
11
+ # - Each line: [text_length][text][newline][attr_len][attrs][0xFF]
12
+ # - text_length = text + newline + 1 (for attr_len byte) = text_bytes + 2
13
+ # - Line structure: text_length byte + text + newline + attr_len byte + attr_data
14
+ #
15
+ # The decompressor reads:
16
+ # - text_length = data.getbyte(pos)
17
+ # - text_bytes = text_length - 2 (reads text, skips newline)
18
+ # - attr_length = data.getbyte(pos after text + newline)
19
+ #
20
+ # @param text [String] Raw topic text
21
+ # @return [String] Formatted topic data
22
+ def self.build_topic_data(text)
23
+ result = +""
24
+
25
+ # Split text into lines
26
+ lines = text.split("\n")
27
+
28
+ lines.each do |line|
29
+ text_bytes = line.b
30
+ newline = "\x0D" # Carriage return
31
+
32
+ # Attribute section: just 0xFF terminator (attr_len = 1)
33
+ attr_data = "\xFF"
34
+ attr_len = 1
35
+
36
+ # text_length = text + newline + 1 (for attr_len byte)
37
+ # This ensures text_bytes = text_length - 2 gives correct text length
38
+ text_length = text_bytes.bytesize + 2
39
+
40
+ result << text_length.chr
41
+ result << text_bytes
42
+ result << newline
43
+ result << attr_len.chr
44
+ result << attr_data
45
+ end
46
+
47
+ result
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module HLP
5
+ module QuickHelp
6
+ # Compresses topic text using QuickHelp keyword compression
7
+ class TopicCompressor
8
+ # Compress topic text using QuickHelp keyword compression
9
+ #
10
+ # QuickHelp format uses:
11
+ # - 0x00-0x0F: Literal bytes
12
+ # - 0x10-0x17: Dictionary entry references
13
+ # - 0x18: Run of spaces
14
+ # - 0x19: Run of bytes (repeat)
15
+ # - 0x1A: Escape byte (next byte is literal)
16
+ # - 0x1B-0xFF: Literal bytes
17
+ #
18
+ # Without a dictionary, we encode literals directly and escape
19
+ # control characters (0x10-0x1A) with 0x1A prefix.
20
+ #
21
+ # Topic text format:
22
+ # - Each line: [len][text][newline][attr_len][attrs][0xFF]
23
+ # - len includes itself, text, newline, attr_len
24
+ # - attr_len includes itself and attrs, minimum 1 (just 0xFF terminator)
25
+ #
26
+ # @param text [String] Topic text
27
+ # @return [Hash] Compressed topic with metadata
28
+ def self.compress_topic(text)
29
+ topic_data = TopicBuilder.build_topic_data(text)
30
+ encoded = encode_keyword_compression(topic_data)
31
+
32
+ # Prepend decompressed length (2 bytes)
33
+ length_header = [topic_data.bytesize].pack("v")
34
+
35
+ {
36
+ text: text,
37
+ compressed: length_header + encoded,
38
+ decompressed_length: topic_data.bytesize,
39
+ compressed_length: (length_header + encoded).bytesize,
40
+ }
41
+ end
42
+
43
+ # Store topic without compression
44
+ #
45
+ # @param text [String] Topic text
46
+ # @return [Hash] Uncompressed topic with metadata
47
+ def self.store_uncompressed(text)
48
+ topic_data = TopicBuilder.build_topic_data(text)
49
+
50
+ {
51
+ text: text,
52
+ compressed: topic_data,
53
+ decompressed_length: topic_data.bytesize,
54
+ compressed_length: topic_data.bytesize,
55
+ }
56
+ end
57
+
58
+ # Encode data using QuickHelp keyword compression format
59
+ #
60
+ # @param data [String] Data to encode
61
+ # @return [String] Encoded data
62
+ def self.encode_keyword_compression(data)
63
+ result = +""
64
+
65
+ data.bytes.each do |byte|
66
+ if byte < 0x10 || byte == 0x1B || byte > 0x1A
67
+ # Literal byte (except control range 0x10-0x1A)
68
+ result << byte.chr
69
+ elsif byte.between?(0x10, 0x1A)
70
+ # Control byte - escape it
71
+ result << 0x1A.chr << byte.chr
72
+ else
73
+ # 0x1B is also literal (above control range)
74
+ result << byte.chr
75
+ end
76
+ end
77
+
78
+ result
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -53,20 +53,23 @@ module Cabriolet
53
53
  #
54
54
  # @return [Hash] Hash with :literal and :distance code tables
55
55
  def self.build_fixed_codes
56
- # Fixed literal/length code lengths
57
- literal_lengths = Array.new(288, 0)
58
- (0...144).each { |i| literal_lengths[i] = 8 }
59
- (144...256).each { |i| literal_lengths[i] = 9 }
60
- (256...280).each { |i| literal_lengths[i] = 7 }
61
- (280...288).each { |i| literal_lengths[i] = 8 }
56
+ # Memoize fixed codes since they never change
57
+ @build_fixed_codes ||= begin
58
+ # Fixed literal/length code lengths
59
+ literal_lengths = Array.new(288, 0)
60
+ (0...144).each { |i| literal_lengths[i] = 8 }
61
+ (144...256).each { |i| literal_lengths[i] = 9 }
62
+ (256...280).each { |i| literal_lengths[i] = 7 }
63
+ (280...288).each { |i| literal_lengths[i] = 8 }
62
64
 
63
- # Fixed distance code lengths (all 5 bits)
64
- distance_lengths = Array.new(32, 5)
65
+ # Fixed distance code lengths (all 5 bits)
66
+ distance_lengths = Array.new(32, 5)
65
67
 
66
- {
67
- literal: build_codes(literal_lengths, 288),
68
- distance: build_codes(distance_lengths, 32),
69
- }
68
+ {
69
+ literal: build_codes(literal_lengths, 288),
70
+ distance: build_codes(distance_lengths, 32),
71
+ }
72
+ end
70
73
  end
71
74
 
72
75
  # Encode a symbol using Huffman codes and write to bitstream