cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "topic_compressor"
|
|
4
|
+
require_relative "offset_calculator"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
module HLP
|
|
8
|
+
module QuickHelp
|
|
9
|
+
# Builds complete QuickHelp structure from file data
|
|
10
|
+
class StructureBuilder
|
|
11
|
+
attr_reader :version, :database_name, :control_char, :case_sensitive
|
|
12
|
+
|
|
13
|
+
# Initialize structure builder
|
|
14
|
+
#
|
|
15
|
+
# @param version [Integer] QuickHelp format version
|
|
16
|
+
# @param database_name [String] Database name for external links
|
|
17
|
+
# @param control_char [Integer] Control character
|
|
18
|
+
# @param case_sensitive [Boolean] Case-sensitive contexts
|
|
19
|
+
def initialize(version: 2, database_name: "", control_char: 0x3A,
|
|
20
|
+
case_sensitive: false)
|
|
21
|
+
@version = version
|
|
22
|
+
@database_name = database_name
|
|
23
|
+
@control_char = control_char
|
|
24
|
+
@case_sensitive = case_sensitive
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Build complete QuickHelp structure from topics
|
|
28
|
+
#
|
|
29
|
+
# @param topics [Array<Hash>] Topic data with :text, :context, :compress keys
|
|
30
|
+
# @return [Hash] Complete QuickHelp structure
|
|
31
|
+
def build(topics)
|
|
32
|
+
structure = {}
|
|
33
|
+
|
|
34
|
+
# Compress topics
|
|
35
|
+
structure[:topics] = compress_topics(topics)
|
|
36
|
+
|
|
37
|
+
# Build context data
|
|
38
|
+
structure[:contexts] = topics.map { |t| t[:context] }
|
|
39
|
+
structure[:context_map] = topics.map.with_index { |_t, i| i }
|
|
40
|
+
|
|
41
|
+
# Calculate offsets
|
|
42
|
+
structure[:offsets] = OffsetCalculator.calculate(
|
|
43
|
+
topics: structure[:topics],
|
|
44
|
+
contexts: structure[:contexts],
|
|
45
|
+
context_map: structure[:context_map],
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Build header
|
|
49
|
+
structure[:header] = build_header(structure)
|
|
50
|
+
|
|
51
|
+
structure
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Compress all topics
|
|
57
|
+
#
|
|
58
|
+
# @param topics [Array<Hash>] Topic data
|
|
59
|
+
# @return [Array<Hash>] Compressed topics
|
|
60
|
+
def compress_topics(topics)
|
|
61
|
+
topics.map do |topic|
|
|
62
|
+
if topic[:compress]
|
|
63
|
+
TopicCompressor.compress_topic(topic[:text])
|
|
64
|
+
else
|
|
65
|
+
TopicCompressor.store_uncompressed(topic[:text])
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Build file header
|
|
71
|
+
#
|
|
72
|
+
# @param structure [Hash] QuickHelp structure
|
|
73
|
+
# @return [Hash] Header information
|
|
74
|
+
def build_header(structure)
|
|
75
|
+
attributes = 0
|
|
76
|
+
attributes |= Binary::HLPStructures::Attributes::CASE_SENSITIVE if @case_sensitive
|
|
77
|
+
|
|
78
|
+
{
|
|
79
|
+
version: @version,
|
|
80
|
+
attributes: attributes,
|
|
81
|
+
control_character: @control_char,
|
|
82
|
+
topic_count: structure[:topics].size,
|
|
83
|
+
context_count: structure[:contexts].size,
|
|
84
|
+
display_width: 80,
|
|
85
|
+
predefined_ctx_count: 0,
|
|
86
|
+
database_name: @database_name.ljust(14, "\x00")[0, 14],
|
|
87
|
+
offsets: structure[:offsets],
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Builds topic data in QuickHelp internal format
|
|
7
|
+
class TopicBuilder
|
|
8
|
+
# Build topic data in QuickHelp internal format
|
|
9
|
+
#
|
|
10
|
+
# Topic format as expected by decompressor:
|
|
11
|
+
# - Each line: [text_length][text][newline][attr_len][attrs][0xFF]
|
|
12
|
+
# - text_length = text + newline + 1 (for attr_len byte) = text_bytes + 2
|
|
13
|
+
# - Line structure: text_length byte + text + newline + attr_len byte + attr_data
|
|
14
|
+
#
|
|
15
|
+
# The decompressor reads:
|
|
16
|
+
# - text_length = data.getbyte(pos)
|
|
17
|
+
# - text_bytes = text_length - 2 (reads text, skips newline)
|
|
18
|
+
# - attr_length = data.getbyte(pos after text + newline)
|
|
19
|
+
#
|
|
20
|
+
# @param text [String] Raw topic text
|
|
21
|
+
# @return [String] Formatted topic data
|
|
22
|
+
def self.build_topic_data(text)
|
|
23
|
+
result = +""
|
|
24
|
+
|
|
25
|
+
# Split text into lines
|
|
26
|
+
lines = text.split("\n")
|
|
27
|
+
|
|
28
|
+
lines.each do |line|
|
|
29
|
+
text_bytes = line.b
|
|
30
|
+
newline = "\x0D" # Carriage return
|
|
31
|
+
|
|
32
|
+
# Attribute section: just 0xFF terminator (attr_len = 1)
|
|
33
|
+
attr_data = "\xFF"
|
|
34
|
+
attr_len = 1
|
|
35
|
+
|
|
36
|
+
# text_length = text + newline + 1 (for attr_len byte)
|
|
37
|
+
# This ensures text_bytes = text_length - 2 gives correct text length
|
|
38
|
+
text_length = text_bytes.bytesize + 2
|
|
39
|
+
|
|
40
|
+
result << text_length.chr
|
|
41
|
+
result << text_bytes
|
|
42
|
+
result << newline
|
|
43
|
+
result << attr_len.chr
|
|
44
|
+
result << attr_data
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
result
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module HLP
|
|
5
|
+
module QuickHelp
|
|
6
|
+
# Compresses topic text using QuickHelp keyword compression
|
|
7
|
+
class TopicCompressor
|
|
8
|
+
# Compress topic text using QuickHelp keyword compression
|
|
9
|
+
#
|
|
10
|
+
# QuickHelp format uses:
|
|
11
|
+
# - 0x00-0x0F: Literal bytes
|
|
12
|
+
# - 0x10-0x17: Dictionary entry references
|
|
13
|
+
# - 0x18: Run of spaces
|
|
14
|
+
# - 0x19: Run of bytes (repeat)
|
|
15
|
+
# - 0x1A: Escape byte (next byte is literal)
|
|
16
|
+
# - 0x1B-0xFF: Literal bytes
|
|
17
|
+
#
|
|
18
|
+
# Without a dictionary, we encode literals directly and escape
|
|
19
|
+
# control characters (0x10-0x1A) with 0x1A prefix.
|
|
20
|
+
#
|
|
21
|
+
# Topic text format:
|
|
22
|
+
# - Each line: [len][text][newline][attr_len][attrs][0xFF]
|
|
23
|
+
# - len includes itself, text, newline, attr_len
|
|
24
|
+
# - attr_len includes itself and attrs, minimum 1 (just 0xFF terminator)
|
|
25
|
+
#
|
|
26
|
+
# @param text [String] Topic text
|
|
27
|
+
# @return [Hash] Compressed topic with metadata
|
|
28
|
+
def self.compress_topic(text)
|
|
29
|
+
topic_data = TopicBuilder.build_topic_data(text)
|
|
30
|
+
encoded = encode_keyword_compression(topic_data)
|
|
31
|
+
|
|
32
|
+
# Prepend decompressed length (2 bytes)
|
|
33
|
+
length_header = [topic_data.bytesize].pack("v")
|
|
34
|
+
|
|
35
|
+
{
|
|
36
|
+
text: text,
|
|
37
|
+
compressed: length_header + encoded,
|
|
38
|
+
decompressed_length: topic_data.bytesize,
|
|
39
|
+
compressed_length: (length_header + encoded).bytesize,
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Store topic without compression
|
|
44
|
+
#
|
|
45
|
+
# @param text [String] Topic text
|
|
46
|
+
# @return [Hash] Uncompressed topic with metadata
|
|
47
|
+
def self.store_uncompressed(text)
|
|
48
|
+
topic_data = TopicBuilder.build_topic_data(text)
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
text: text,
|
|
52
|
+
compressed: topic_data,
|
|
53
|
+
decompressed_length: topic_data.bytesize,
|
|
54
|
+
compressed_length: topic_data.bytesize,
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Encode data using QuickHelp keyword compression format
|
|
59
|
+
#
|
|
60
|
+
# @param data [String] Data to encode
|
|
61
|
+
# @return [String] Encoded data
|
|
62
|
+
def self.encode_keyword_compression(data)
|
|
63
|
+
result = +""
|
|
64
|
+
|
|
65
|
+
data.bytes.each do |byte|
|
|
66
|
+
if byte < 0x10 || byte == 0x1B || byte > 0x1A
|
|
67
|
+
# Literal byte (except control range 0x10-0x1A)
|
|
68
|
+
result << byte.chr
|
|
69
|
+
elsif byte.between?(0x10, 0x1A)
|
|
70
|
+
# Control byte - escape it
|
|
71
|
+
result << 0x1A.chr << byte.chr
|
|
72
|
+
else
|
|
73
|
+
# 0x1B is also literal (above control range)
|
|
74
|
+
result << byte.chr
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
result
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../binary/hlp_structures"
|
|
4
|
+
require "stringio"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
module HLP
|
|
8
|
+
module WinHelp
|
|
9
|
+
# B+ tree builder for WinHelp 4.x directory format
|
|
10
|
+
#
|
|
11
|
+
# Builds B+ tree directory structure for WinHelp 4.x files.
|
|
12
|
+
# The directory maps filenames to file offsets using a B+ tree
|
|
13
|
+
# with fixed-size pages.
|
|
14
|
+
class BTreeBuilder
|
|
15
|
+
# Default page size for WinHelp 4.x directory (1KB for catalog/directory)
|
|
16
|
+
DEFAULT_PAGE_SIZE = 0x0400 # 1KB
|
|
17
|
+
|
|
18
|
+
# Page types
|
|
19
|
+
PAGE_TYPE_LEAF = 0
|
|
20
|
+
PAGE_TYPE_INDEX = 1
|
|
21
|
+
|
|
22
|
+
# B+ tree magic number
|
|
23
|
+
BTREE_MAGIC = 0x293B
|
|
24
|
+
|
|
25
|
+
# Flags for B+ tree header
|
|
26
|
+
# Bit 0x0002 is always 1
|
|
27
|
+
# Bit 0x0400 is 1 for catalog/directory
|
|
28
|
+
FLAGS_MAGIC_BIT = 0x0002
|
|
29
|
+
FLAGS_CATALOG_BIT = 0x0400
|
|
30
|
+
|
|
31
|
+
attr_reader :page_size, :structure
|
|
32
|
+
|
|
33
|
+
# Initialize B+ tree builder
|
|
34
|
+
#
|
|
35
|
+
# @param page_size [Integer] Page size in bytes (default: 1KB)
|
|
36
|
+
# @param structure [String] Structure string describing data format
|
|
37
|
+
def initialize(page_size: DEFAULT_PAGE_SIZE, structure: "FFz")
|
|
38
|
+
@page_size = page_size
|
|
39
|
+
@structure = structure
|
|
40
|
+
@entries = []
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Add a file entry to the B+ tree
|
|
44
|
+
#
|
|
45
|
+
# @param filename [String] Internal filename (e.g., "|SYSTEM")
|
|
46
|
+
# @param offset [Integer] File offset in help file
|
|
47
|
+
# @param size [Integer] File size in bytes
|
|
48
|
+
def add_entry(filename, offset, size)
|
|
49
|
+
@entries << { filename: filename, offset: offset, size: size }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Build B+ tree structure
|
|
53
|
+
#
|
|
54
|
+
# @return [Hash] Hash containing :header, :pages
|
|
55
|
+
def build
|
|
56
|
+
return build_empty if @entries.empty?
|
|
57
|
+
|
|
58
|
+
# Sort entries by filename
|
|
59
|
+
sorted_entries = @entries.sort_by { |e| e[:filename] }
|
|
60
|
+
|
|
61
|
+
# Build leaf pages
|
|
62
|
+
leaf_pages = build_leaf_pages(sorted_entries)
|
|
63
|
+
|
|
64
|
+
# Build index pages if needed
|
|
65
|
+
if leaf_pages.size > 1
|
|
66
|
+
index_pages = build_index_pages(leaf_pages)
|
|
67
|
+
root_page = index_pages.first[:page_num]
|
|
68
|
+
n_levels = 2
|
|
69
|
+
else
|
|
70
|
+
index_pages = []
|
|
71
|
+
root_page = leaf_pages.first[:page_num]
|
|
72
|
+
n_levels = 1
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Build B+ tree header
|
|
76
|
+
header = build_header(
|
|
77
|
+
total_pages: leaf_pages.size + index_pages.size,
|
|
78
|
+
root_page: root_page,
|
|
79
|
+
n_levels: n_levels,
|
|
80
|
+
total_entries: @entries.size,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Combine all pages
|
|
84
|
+
all_pages = index_pages + leaf_pages
|
|
85
|
+
|
|
86
|
+
{ header: header, pages: all_pages }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Build empty B+ tree (single empty leaf page)
|
|
92
|
+
#
|
|
93
|
+
# @return [Hash] Hash containing :header, :pages
|
|
94
|
+
def build_empty
|
|
95
|
+
# Create empty leaf page
|
|
96
|
+
leaf_page = {
|
|
97
|
+
page_num: 0,
|
|
98
|
+
data: build_empty_leaf_page,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
header = build_header(
|
|
102
|
+
total_pages: 1,
|
|
103
|
+
root_page: 0,
|
|
104
|
+
n_levels: 1,
|
|
105
|
+
total_entries: 0,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
{ header: header, pages: [leaf_page] }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Build B+ tree header
|
|
112
|
+
#
|
|
113
|
+
# @param total_pages [Integer] Total number of pages
|
|
114
|
+
# @param root_page [Integer] Root page number
|
|
115
|
+
# @param n_levels [Integer] Number of levels in tree
|
|
116
|
+
# @param total_entries [Integer] Total number of entries
|
|
117
|
+
# @return [Binary::HLPStructures::WinHelpBTreeHeader] B+ tree header
|
|
118
|
+
def build_header(total_pages:, root_page:, n_levels:, total_entries:)
|
|
119
|
+
Binary::HLPStructures::WinHelpBTreeHeader.new(
|
|
120
|
+
magic: BTREE_MAGIC,
|
|
121
|
+
flags: FLAGS_MAGIC_BIT | FLAGS_CATALOG_BIT,
|
|
122
|
+
page_size: @page_size,
|
|
123
|
+
structure: @structure.ljust(16, "\x00"),
|
|
124
|
+
must_be_zero: 0,
|
|
125
|
+
page_splits: 0,
|
|
126
|
+
root_page: root_page,
|
|
127
|
+
must_be_neg_one: 0xFFFF,
|
|
128
|
+
total_pages: total_pages,
|
|
129
|
+
n_levels: n_levels,
|
|
130
|
+
total_btree_entries: total_entries,
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Build leaf pages from entries
|
|
135
|
+
#
|
|
136
|
+
# @param entries [Array<Hash>] Sorted file entries
|
|
137
|
+
# @return [Array<Hash>] Array of page hashes with :page_num and :data
|
|
138
|
+
def build_leaf_pages(entries)
|
|
139
|
+
pages = []
|
|
140
|
+
current_page_data = StringIO.new
|
|
141
|
+
page_num = 0
|
|
142
|
+
|
|
143
|
+
entries.each do |entry|
|
|
144
|
+
# Check if this entry fits in current page
|
|
145
|
+
entry_size = entry[:filename].bytesize + 1 + 4 # filename + null + offset
|
|
146
|
+
header_size = 8 # leaf node header
|
|
147
|
+
|
|
148
|
+
# Check if we need a new page
|
|
149
|
+
if (current_page_data.size + entry_size + header_size) > @page_size
|
|
150
|
+
# Finish current page and start new one
|
|
151
|
+
pages << finish_leaf_page(current_page_data, page_num,
|
|
152
|
+
pages.empty?)
|
|
153
|
+
page_num += 1
|
|
154
|
+
current_page_data = StringIO.new
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Write entry to current page
|
|
158
|
+
current_page_data.write(entry[:filename])
|
|
159
|
+
current_page_data.write("\x00") # null terminator
|
|
160
|
+
current_page_data.write([entry[:offset]].pack("V")) # 4-byte offset
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Finish last page
|
|
164
|
+
if current_page_data.size.positive?
|
|
165
|
+
pages << finish_leaf_page(current_page_data, page_num, pages.empty?)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
pages
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Finish a leaf page by adding header
|
|
172
|
+
#
|
|
173
|
+
# @param page_data [StringIO] Page data without header
|
|
174
|
+
# @param page_num [Integer] Page number
|
|
175
|
+
# @param is_first [Boolean] Whether this is the first page
|
|
176
|
+
# @return [Hash] Page hash with :page_num and :data
|
|
177
|
+
def finish_leaf_page(page_data, page_num, is_first)
|
|
178
|
+
data = page_data.string
|
|
179
|
+
n_entries = count_entries(data)
|
|
180
|
+
|
|
181
|
+
# Build leaf node header
|
|
182
|
+
# - 2 bytes: unused (we use 0)
|
|
183
|
+
# - 2 bytes: nEntries
|
|
184
|
+
# - 2 bytes: PreviousPage (0xFFFF for first)
|
|
185
|
+
# - 2 bytes: NextPage (0xFFFF for last, to be determined)
|
|
186
|
+
header = [
|
|
187
|
+
0, # unused
|
|
188
|
+
n_entries,
|
|
189
|
+
is_first ? 0xFFFF : page_num - 1, # previous page
|
|
190
|
+
0xFFFF, # next page (will update if more pages added)
|
|
191
|
+
].pack("vvvv")
|
|
192
|
+
|
|
193
|
+
{ page_num: page_num, data: header + data }
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Build empty leaf page
|
|
197
|
+
#
|
|
198
|
+
# @return [String] Empty leaf page data
|
|
199
|
+
def build_empty_leaf_page
|
|
200
|
+
# Empty leaf has header with nEntries = 0
|
|
201
|
+
[
|
|
202
|
+
0, # unused
|
|
203
|
+
0, # nEntries = 0
|
|
204
|
+
0xFFFF, # previous page
|
|
205
|
+
0xFFFF, # next page
|
|
206
|
+
].pack("vvvv")
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Build index pages from leaf pages
|
|
210
|
+
#
|
|
211
|
+
# @param leaf_pages [Array<Hash>] Leaf page hashes
|
|
212
|
+
# @return [Array<Hash>] Array of index page hashes
|
|
213
|
+
def build_index_pages(leaf_pages)
|
|
214
|
+
# For simplicity, create single index page pointing to all leaf pages
|
|
215
|
+
# In a real implementation, this would recursively build index pages
|
|
216
|
+
index_data = StringIO.new
|
|
217
|
+
|
|
218
|
+
leaf_pages.each do |page|
|
|
219
|
+
# For index pages, entries are: (filename, page_number)
|
|
220
|
+
# We use the first filename from each leaf page as key
|
|
221
|
+
first_filename = extract_first_filename(page[:data])
|
|
222
|
+
index_data.write(first_filename)
|
|
223
|
+
index_data.write("\x00") # null terminator
|
|
224
|
+
index_data.write([page[:page_num]].pack("v")) # 2-byte page number
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
data = index_data.string
|
|
228
|
+
n_entries = leaf_pages.size
|
|
229
|
+
|
|
230
|
+
# Build index node header
|
|
231
|
+
# - 2 bytes: unused (we use 0)
|
|
232
|
+
# - 2 bytes: nEntries
|
|
233
|
+
# - 2 bytes: PreviousPage (0xFFFF - no previous)
|
|
234
|
+
header = [
|
|
235
|
+
0, # unused
|
|
236
|
+
n_entries,
|
|
237
|
+
0xFFFF, # previous page (none for root)
|
|
238
|
+
].pack("vvv")
|
|
239
|
+
|
|
240
|
+
[{
|
|
241
|
+
page_num: leaf_pages.size, # Index pages come after leaf pages
|
|
242
|
+
data: header + data,
|
|
243
|
+
}]
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Extract first filename from page data
|
|
247
|
+
#
|
|
248
|
+
# @param page_data [String] Page data with header
|
|
249
|
+
# @return [String] First filename in page
|
|
250
|
+
def extract_first_filename(page_data)
|
|
251
|
+
# Skip 8-byte header
|
|
252
|
+
data_start = 8
|
|
253
|
+
data = page_data[data_start..]
|
|
254
|
+
|
|
255
|
+
# Filename is null-terminated
|
|
256
|
+
null_pos = data.index("\x00")
|
|
257
|
+
return "" if null_pos.nil?
|
|
258
|
+
|
|
259
|
+
data[0...null_pos]
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Count entries in page data
|
|
263
|
+
#
|
|
264
|
+
# @param data [String] Page data without header
|
|
265
|
+
# @return [Integer] Number of entries
|
|
266
|
+
def count_entries(data)
|
|
267
|
+
count = 0
|
|
268
|
+
pos = 0
|
|
269
|
+
|
|
270
|
+
while pos < data.bytesize
|
|
271
|
+
# Find null terminator
|
|
272
|
+
null_pos = data.index("\x00", pos)
|
|
273
|
+
break if null_pos.nil?
|
|
274
|
+
|
|
275
|
+
# Skip filename
|
|
276
|
+
pos = null_pos + 1
|
|
277
|
+
|
|
278
|
+
# Skip 4-byte offset
|
|
279
|
+
pos += 4
|
|
280
|
+
|
|
281
|
+
count += 1
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
count
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
end
|