cabriolet 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +3 -0
- data/lib/cabriolet/binary/bitstream.rb +32 -21
- data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
- data/lib/cabriolet/cab/compressor.rb +85 -53
- data/lib/cabriolet/cab/decompressor.rb +2 -1
- data/lib/cabriolet/cab/extractor.rb +2 -35
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/quantum.rb +3 -51
- data/lib/cabriolet/decompressors/quantum.rb +81 -52
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/lit/compressor.rb +45 -689
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +114 -3
- metadata +38 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Calculates file offsets for QuickHelp structure
|
|
7
|
+
class OffsetCalculator
|
|
8
|
+
# Calculate all offsets in the file
|
|
9
|
+
#
|
|
10
|
+
# @param topics [Array<Hash>] Compressed topics
|
|
11
|
+
# @param contexts [Array<String>] Context strings
|
|
12
|
+
# @param context_map [Array<Integer>] Context topic indices
|
|
13
|
+
# @return [Hash] Calculated offsets
|
|
14
|
+
def self.calculate(topics:, contexts:, context_map:)
|
|
15
|
+
offsets = {}
|
|
16
|
+
|
|
17
|
+
# Start after file header (70 bytes = 2 signature + 68 header)
|
|
18
|
+
current_offset = 70
|
|
19
|
+
|
|
20
|
+
# Topic index: (topic_count + 1) * 4 bytes
|
|
21
|
+
offsets[:topic_index] = current_offset
|
|
22
|
+
topic_count = topics.size
|
|
23
|
+
current_offset += (topic_count + 1) * 4
|
|
24
|
+
|
|
25
|
+
# Context strings: sum of string lengths + null terminators
|
|
26
|
+
offsets[:context_strings] = current_offset
|
|
27
|
+
contexts.each do |ctx|
|
|
28
|
+
current_offset += ctx.bytesize + 1 # +1 for null terminator
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Context map: context_count * 2 bytes
|
|
32
|
+
offsets[:context_map] = current_offset
|
|
33
|
+
current_offset += context_map.size * 2
|
|
34
|
+
|
|
35
|
+
# Keywords: not implemented yet, set to 0
|
|
36
|
+
offsets[:keywords] = 0
|
|
37
|
+
|
|
38
|
+
# Huffman tree: not implemented yet, set to 0
|
|
39
|
+
offsets[:huffman_tree] = 0
|
|
40
|
+
|
|
41
|
+
# Topic text: starts after context map
|
|
42
|
+
offsets[:topic_text] = current_offset
|
|
43
|
+
|
|
44
|
+
# Calculate topic text offsets
|
|
45
|
+
offsets[:topic_offsets] = []
|
|
46
|
+
topics.each do |topic|
|
|
47
|
+
offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
|
|
48
|
+
current_offset += topic[:compressed_length]
|
|
49
|
+
end
|
|
50
|
+
# Add end marker
|
|
51
|
+
offsets[:topic_offsets] << (current_offset - offsets[:topic_text])
|
|
52
|
+
|
|
53
|
+
# Total database size
|
|
54
|
+
offsets[:database_size] = current_offset
|
|
55
|
+
|
|
56
|
+
offsets
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "topic_compressor"
|
|
4
|
+
require_relative "offset_calculator"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
module HLP
|
|
8
|
+
module QuickHelp
|
|
9
|
+
# Builds complete QuickHelp structure from file data
|
|
10
|
+
class StructureBuilder
|
|
11
|
+
attr_reader :version, :database_name, :control_char, :case_sensitive
|
|
12
|
+
|
|
13
|
+
# Initialize structure builder
|
|
14
|
+
#
|
|
15
|
+
# @param version [Integer] QuickHelp format version
|
|
16
|
+
# @param database_name [String] Database name for external links
|
|
17
|
+
# @param control_char [Integer] Control character
|
|
18
|
+
# @param case_sensitive [Boolean] Case-sensitive contexts
|
|
19
|
+
def initialize(version: 2, database_name: "", control_char: 0x3A,
|
|
20
|
+
case_sensitive: false)
|
|
21
|
+
@version = version
|
|
22
|
+
@database_name = database_name
|
|
23
|
+
@control_char = control_char
|
|
24
|
+
@case_sensitive = case_sensitive
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Build complete QuickHelp structure from topics
|
|
28
|
+
#
|
|
29
|
+
# @param topics [Array<Hash>] Topic data with :text, :context, :compress keys
|
|
30
|
+
# @return [Hash] Complete QuickHelp structure
|
|
31
|
+
def build(topics)
|
|
32
|
+
structure = {}
|
|
33
|
+
|
|
34
|
+
# Compress topics
|
|
35
|
+
structure[:topics] = compress_topics(topics)
|
|
36
|
+
|
|
37
|
+
# Build context data
|
|
38
|
+
structure[:contexts] = topics.map { |t| t[:context] }
|
|
39
|
+
structure[:context_map] = topics.map.with_index { |_t, i| i }
|
|
40
|
+
|
|
41
|
+
# Calculate offsets
|
|
42
|
+
structure[:offsets] = OffsetCalculator.calculate(
|
|
43
|
+
topics: structure[:topics],
|
|
44
|
+
contexts: structure[:contexts],
|
|
45
|
+
context_map: structure[:context_map],
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Build header
|
|
49
|
+
structure[:header] = build_header(structure)
|
|
50
|
+
|
|
51
|
+
structure
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Compress all topics
|
|
57
|
+
#
|
|
58
|
+
# @param topics [Array<Hash>] Topic data
|
|
59
|
+
# @return [Array<Hash>] Compressed topics
|
|
60
|
+
def compress_topics(topics)
|
|
61
|
+
topics.map do |topic|
|
|
62
|
+
if topic[:compress]
|
|
63
|
+
TopicCompressor.compress_topic(topic[:text])
|
|
64
|
+
else
|
|
65
|
+
TopicCompressor.store_uncompressed(topic[:text])
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Build file header
|
|
71
|
+
#
|
|
72
|
+
# @param structure [Hash] QuickHelp structure
|
|
73
|
+
# @return [Hash] Header information
|
|
74
|
+
def build_header(structure)
|
|
75
|
+
attributes = 0
|
|
76
|
+
attributes |= Binary::HLPStructures::Attributes::CASE_SENSITIVE if @case_sensitive
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
version: @version,
|
|
80
|
+
attributes: attributes,
|
|
81
|
+
control_character: @control_char,
|
|
82
|
+
topic_count: structure[:topics].size,
|
|
83
|
+
context_count: structure[:contexts].size,
|
|
84
|
+
display_width: 80,
|
|
85
|
+
predefined_ctx_count: 0,
|
|
86
|
+
database_name: @database_name.ljust(14, "\x00")[0, 14],
|
|
87
|
+
offsets: structure[:offsets],
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Builds topic data in QuickHelp internal format
|
|
7
|
+
class TopicBuilder
|
|
8
|
+
# Build topic data in QuickHelp internal format
|
|
9
|
+
#
|
|
10
|
+
# Topic format as expected by decompressor:
|
|
11
|
+
# - Each line: [text_length][text][newline][attr_len][attrs][0xFF]
|
|
12
|
+
# - text_length = text + newline + 1 (for attr_len byte) = text_bytes + 2
|
|
13
|
+
# - Line structure: text_length byte + text + newline + attr_len byte + attr_data
|
|
14
|
+
#
|
|
15
|
+
# The decompressor reads:
|
|
16
|
+
# - text_length = data.getbyte(pos)
|
|
17
|
+
# - text_bytes = text_length - 2 (reads text, skips newline)
|
|
18
|
+
# - attr_length = data.getbyte(pos after text + newline)
|
|
19
|
+
#
|
|
20
|
+
# @param text [String] Raw topic text
|
|
21
|
+
# @return [String] Formatted topic data
|
|
22
|
+
def self.build_topic_data(text)
|
|
23
|
+
result = +""
|
|
24
|
+
|
|
25
|
+
# Split text into lines
|
|
26
|
+
lines = text.split("\n")
|
|
27
|
+
|
|
28
|
+
lines.each do |line|
|
|
29
|
+
text_bytes = line.b
|
|
30
|
+
newline = "\x0D" # Carriage return
|
|
31
|
+
|
|
32
|
+
# Attribute section: just 0xFF terminator (attr_len = 1)
|
|
33
|
+
attr_data = "\xFF"
|
|
34
|
+
attr_len = 1
|
|
35
|
+
|
|
36
|
+
# text_length = text + newline + 1 (for attr_len byte)
|
|
37
|
+
# This ensures text_bytes = text_length - 2 gives correct text length
|
|
38
|
+
text_length = text_bytes.bytesize + 2
|
|
39
|
+
|
|
40
|
+
result << text_length.chr
|
|
41
|
+
result << text_bytes
|
|
42
|
+
result << newline
|
|
43
|
+
result << attr_len.chr
|
|
44
|
+
result << attr_data
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
result
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Compresses topic text using QuickHelp keyword compression
|
|
7
|
+
class TopicCompressor
|
|
8
|
+
# Compress topic text using QuickHelp keyword compression
|
|
9
|
+
#
|
|
10
|
+
# QuickHelp format uses:
|
|
11
|
+
# - 0x00-0x0F: Literal bytes
|
|
12
|
+
# - 0x10-0x17: Dictionary entry references
|
|
13
|
+
# - 0x18: Run of spaces
|
|
14
|
+
# - 0x19: Run of bytes (repeat)
|
|
15
|
+
# - 0x1A: Escape byte (next byte is literal)
|
|
16
|
+
# - 0x1B-0xFF: Literal bytes
|
|
17
|
+
#
|
|
18
|
+
# Without a dictionary, we encode literals directly and escape
|
|
19
|
+
# control characters (0x10-0x1A) with 0x1A prefix.
|
|
20
|
+
#
|
|
21
|
+
# Topic text format:
|
|
22
|
+
# - Each line: [len][text][newline][attr_len][attrs][0xFF]
|
|
23
|
+
# - len includes itself, text, newline, attr_len
|
|
24
|
+
# - attr_len includes itself and attrs, minimum 1 (just 0xFF terminator)
|
|
25
|
+
#
|
|
26
|
+
# @param text [String] Topic text
|
|
27
|
+
# @return [Hash] Compressed topic with metadata
|
|
28
|
+
def self.compress_topic(text)
|
|
29
|
+
topic_data = TopicBuilder.build_topic_data(text)
|
|
30
|
+
encoded = encode_keyword_compression(topic_data)
|
|
31
|
+
|
|
32
|
+
# Prepend decompressed length (2 bytes)
|
|
33
|
+
length_header = [topic_data.bytesize].pack("v")
|
|
34
|
+
|
|
35
|
+
{
|
|
36
|
+
text: text,
|
|
37
|
+
compressed: length_header + encoded,
|
|
38
|
+
decompressed_length: topic_data.bytesize,
|
|
39
|
+
compressed_length: (length_header + encoded).bytesize,
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Store topic without compression
|
|
44
|
+
#
|
|
45
|
+
# @param text [String] Topic text
|
|
46
|
+
# @return [Hash] Uncompressed topic with metadata
|
|
47
|
+
def self.store_uncompressed(text)
|
|
48
|
+
topic_data = TopicBuilder.build_topic_data(text)
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
text: text,
|
|
52
|
+
compressed: topic_data,
|
|
53
|
+
decompressed_length: topic_data.bytesize,
|
|
54
|
+
compressed_length: topic_data.bytesize,
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Encode data using QuickHelp keyword compression format
|
|
59
|
+
#
|
|
60
|
+
# @param data [String] Data to encode
|
|
61
|
+
# @return [String] Encoded data
|
|
62
|
+
def self.encode_keyword_compression(data)
|
|
63
|
+
result = +""
|
|
64
|
+
|
|
65
|
+
data.bytes.each do |byte|
|
|
66
|
+
if byte < 0x10 || byte == 0x1B || byte > 0x1A
|
|
67
|
+
# Literal byte (except control range 0x10-0x1A)
|
|
68
|
+
result << byte.chr
|
|
69
|
+
elsif byte.between?(0x10, 0x1A)
|
|
70
|
+
# Control byte - escape it
|
|
71
|
+
result << 0x1A.chr << byte.chr
|
|
72
|
+
else
|
|
73
|
+
# 0x1B is also literal (above control range)
|
|
74
|
+
result << byte.chr
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
result
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -53,20 +53,23 @@ module Cabriolet
|
|
|
53
53
|
#
|
|
54
54
|
# @return [Hash] Hash with :literal and :distance code tables
|
|
55
55
|
def self.build_fixed_codes
|
|
56
|
-
#
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
# Memoize fixed codes since they never change
|
|
57
|
+
@build_fixed_codes ||= begin
|
|
58
|
+
# Fixed literal/length code lengths
|
|
59
|
+
literal_lengths = Array.new(288, 0)
|
|
60
|
+
(0...144).each { |i| literal_lengths[i] = 8 }
|
|
61
|
+
(144...256).each { |i| literal_lengths[i] = 9 }
|
|
62
|
+
(256...280).each { |i| literal_lengths[i] = 7 }
|
|
63
|
+
(280...288).each { |i| literal_lengths[i] = 8 }
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
+
# Fixed distance code lengths (all 5 bits)
|
|
66
|
+
distance_lengths = Array.new(32, 5)
|
|
65
67
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
{
|
|
69
|
+
literal: build_codes(literal_lengths, 288),
|
|
70
|
+
distance: build_codes(distance_lengths, 32),
|
|
71
|
+
}
|
|
72
|
+
end
|
|
70
73
|
end
|
|
71
74
|
|
|
72
75
|
# Encode a symbol using Huffman codes and write to bitstream
|