cabriolet 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ARCHITECTURE.md +799 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +29 -0
- data/README.adoc +1207 -0
- data/exe/cabriolet +6 -0
- data/lib/cabriolet/auto.rb +173 -0
- data/lib/cabriolet/binary/bitstream.rb +148 -0
- data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
- data/lib/cabriolet/binary/chm_structures.rb +213 -0
- data/lib/cabriolet/binary/hlp_structures.rb +66 -0
- data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
- data/lib/cabriolet/binary/lit_structures.rb +107 -0
- data/lib/cabriolet/binary/oab_structures.rb +112 -0
- data/lib/cabriolet/binary/structures.rb +56 -0
- data/lib/cabriolet/binary/szdd_structures.rb +60 -0
- data/lib/cabriolet/cab/compressor.rb +382 -0
- data/lib/cabriolet/cab/decompressor.rb +510 -0
- data/lib/cabriolet/cab/extractor.rb +357 -0
- data/lib/cabriolet/cab/parser.rb +264 -0
- data/lib/cabriolet/chm/compressor.rb +513 -0
- data/lib/cabriolet/chm/decompressor.rb +436 -0
- data/lib/cabriolet/chm/parser.rb +254 -0
- data/lib/cabriolet/cli.rb +776 -0
- data/lib/cabriolet/compressors/base.rb +34 -0
- data/lib/cabriolet/compressors/lzss.rb +250 -0
- data/lib/cabriolet/compressors/lzx.rb +581 -0
- data/lib/cabriolet/compressors/mszip.rb +315 -0
- data/lib/cabriolet/compressors/quantum.rb +446 -0
- data/lib/cabriolet/constants.rb +75 -0
- data/lib/cabriolet/decompressors/base.rb +39 -0
- data/lib/cabriolet/decompressors/lzss.rb +138 -0
- data/lib/cabriolet/decompressors/lzx.rb +726 -0
- data/lib/cabriolet/decompressors/mszip.rb +390 -0
- data/lib/cabriolet/decompressors/none.rb +27 -0
- data/lib/cabriolet/decompressors/quantum.rb +456 -0
- data/lib/cabriolet/errors.rb +39 -0
- data/lib/cabriolet/format_detector.rb +156 -0
- data/lib/cabriolet/hlp/compressor.rb +272 -0
- data/lib/cabriolet/hlp/decompressor.rb +198 -0
- data/lib/cabriolet/hlp/parser.rb +131 -0
- data/lib/cabriolet/huffman/decoder.rb +79 -0
- data/lib/cabriolet/huffman/encoder.rb +108 -0
- data/lib/cabriolet/huffman/tree.rb +138 -0
- data/lib/cabriolet/kwaj/compressor.rb +479 -0
- data/lib/cabriolet/kwaj/decompressor.rb +237 -0
- data/lib/cabriolet/kwaj/parser.rb +183 -0
- data/lib/cabriolet/lit/compressor.rb +255 -0
- data/lib/cabriolet/lit/decompressor.rb +250 -0
- data/lib/cabriolet/models/cabinet.rb +81 -0
- data/lib/cabriolet/models/chm_file.rb +28 -0
- data/lib/cabriolet/models/chm_header.rb +67 -0
- data/lib/cabriolet/models/chm_section.rb +38 -0
- data/lib/cabriolet/models/file.rb +119 -0
- data/lib/cabriolet/models/folder.rb +102 -0
- data/lib/cabriolet/models/folder_data.rb +21 -0
- data/lib/cabriolet/models/hlp_file.rb +45 -0
- data/lib/cabriolet/models/hlp_header.rb +37 -0
- data/lib/cabriolet/models/kwaj_header.rb +98 -0
- data/lib/cabriolet/models/lit_header.rb +55 -0
- data/lib/cabriolet/models/oab_header.rb +95 -0
- data/lib/cabriolet/models/szdd_header.rb +72 -0
- data/lib/cabriolet/modifier.rb +326 -0
- data/lib/cabriolet/oab/compressor.rb +353 -0
- data/lib/cabriolet/oab/decompressor.rb +315 -0
- data/lib/cabriolet/parallel.rb +333 -0
- data/lib/cabriolet/repairer.rb +288 -0
- data/lib/cabriolet/streaming.rb +221 -0
- data/lib/cabriolet/system/file_handle.rb +107 -0
- data/lib/cabriolet/system/io_system.rb +87 -0
- data/lib/cabriolet/system/memory_handle.rb +105 -0
- data/lib/cabriolet/szdd/compressor.rb +217 -0
- data/lib/cabriolet/szdd/decompressor.rb +184 -0
- data/lib/cabriolet/szdd/parser.rb +127 -0
- data/lib/cabriolet/validator.rb +332 -0
- data/lib/cabriolet/version.rb +5 -0
- data/lib/cabriolet.rb +104 -0
- metadata +157 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Huffman
|
|
5
|
+
# Encoder encodes symbols using Huffman codes for compression
|
|
6
|
+
class Encoder
|
|
7
|
+
# Maximum code length supported
|
|
8
|
+
MAX_BITS = 16
|
|
9
|
+
|
|
10
|
+
# Build Huffman codes from code lengths (RFC 1951 algorithm)
|
|
11
|
+
#
|
|
12
|
+
# This generates the actual Huffman code values from code lengths.
|
|
13
|
+
# The algorithm ensures canonical Huffman codes where codes of the
|
|
14
|
+
# same length are assigned sequentially.
|
|
15
|
+
#
|
|
16
|
+
# @param lengths [Array<Integer>] Code lengths for each symbol
|
|
17
|
+
# @param num_symbols [Integer] Number of symbols
|
|
18
|
+
# @return [Hash] Hash mapping symbol to {code: value, bits: length}
|
|
19
|
+
def self.build_codes(lengths, num_symbols)
|
|
20
|
+
# Count the number of codes for each length
|
|
21
|
+
bl_count = Array.new(MAX_BITS + 1, 0)
|
|
22
|
+
lengths[0, num_symbols].each do |len|
|
|
23
|
+
bl_count[len] += 1 if len.positive?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Find the numerical value of the smallest code for each length
|
|
27
|
+
code = 0
|
|
28
|
+
bl_count[0] = 0
|
|
29
|
+
next_code = Array.new(MAX_BITS + 1, 0)
|
|
30
|
+
|
|
31
|
+
(1..MAX_BITS).each do |bits|
|
|
32
|
+
code = (code + bl_count[bits - 1]) << 1
|
|
33
|
+
next_code[bits] = code
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Assign codes to symbols
|
|
37
|
+
codes = {}
|
|
38
|
+
num_symbols.times do |symbol|
|
|
39
|
+
len = lengths[symbol]
|
|
40
|
+
next unless len.positive?
|
|
41
|
+
|
|
42
|
+
codes[symbol] = {
|
|
43
|
+
code: next_code[len],
|
|
44
|
+
bits: len,
|
|
45
|
+
}
|
|
46
|
+
next_code[len] += 1
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
codes
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Build fixed Huffman codes for DEFLATE (RFC 1951)
|
|
53
|
+
#
|
|
54
|
+
# @return [Hash] Hash with :literal and :distance code tables
|
|
55
|
+
def self.build_fixed_codes
|
|
56
|
+
# Fixed literal/length code lengths
|
|
57
|
+
literal_lengths = Array.new(288, 0)
|
|
58
|
+
(0...144).each { |i| literal_lengths[i] = 8 }
|
|
59
|
+
(144...256).each { |i| literal_lengths[i] = 9 }
|
|
60
|
+
(256...280).each { |i| literal_lengths[i] = 7 }
|
|
61
|
+
(280...288).each { |i| literal_lengths[i] = 8 }
|
|
62
|
+
|
|
63
|
+
# Fixed distance code lengths (all 5 bits)
|
|
64
|
+
distance_lengths = Array.new(32, 5)
|
|
65
|
+
|
|
66
|
+
{
|
|
67
|
+
literal: build_codes(literal_lengths, 288),
|
|
68
|
+
distance: build_codes(distance_lengths, 32),
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Encode a symbol using Huffman codes and write to bitstream
|
|
73
|
+
#
|
|
74
|
+
# Per RFC 1951 Section 3.1.1, Huffman codes are written LSB-first,
|
|
75
|
+
# so we must reverse the bits before writing to the bitstream.
|
|
76
|
+
#
|
|
77
|
+
# @param symbol [Integer] Symbol to encode
|
|
78
|
+
# @param codes [Hash] Code table mapping symbols to {code:, bits:}
|
|
79
|
+
# @param bitstream [Binary::BitstreamWriter] Output bitstream
|
|
80
|
+
# @return [void]
|
|
81
|
+
def self.encode_symbol(symbol, codes, bitstream)
|
|
82
|
+
entry = codes[symbol]
|
|
83
|
+
unless entry
|
|
84
|
+
raise Cabriolet::CompressionError,
|
|
85
|
+
"No code for symbol #{symbol}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Reverse bits for LSB-first writing per RFC 1951
|
|
89
|
+
reversed_code = reverse_bits(entry[:code], entry[:bits])
|
|
90
|
+
bitstream.write_bits(reversed_code, entry[:bits])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Reverse bits for writing (some formats need reversed bit order)
|
|
94
|
+
#
|
|
95
|
+
# @param value [Integer] Value to reverse
|
|
96
|
+
# @param num_bits [Integer] Number of bits
|
|
97
|
+
# @return [Integer] Reversed value
|
|
98
|
+
def self.reverse_bits(value, num_bits)
|
|
99
|
+
result = 0
|
|
100
|
+
num_bits.times do
|
|
101
|
+
result = (result << 1) | (value & 1)
|
|
102
|
+
value >>= 1
|
|
103
|
+
end
|
|
104
|
+
result
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
module Huffman
|
|
5
|
+
# Tree builds Huffman decoding trees from code lengths
|
|
6
|
+
class Tree
|
|
7
|
+
attr_reader :lengths, :num_symbols, :table
|
|
8
|
+
|
|
9
|
+
# Maximum code length supported
|
|
10
|
+
MAX_BITS = 16
|
|
11
|
+
|
|
12
|
+
# Initialize a new Huffman tree
|
|
13
|
+
#
|
|
14
|
+
# @param lengths [Array<Integer>] Code lengths for each symbol
|
|
15
|
+
# @param num_symbols [Integer] Number of symbols
|
|
16
|
+
def initialize(lengths, num_symbols)
|
|
17
|
+
@lengths = lengths
|
|
18
|
+
@num_symbols = num_symbols
|
|
19
|
+
@table = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Build the fast decode table from code lengths
|
|
23
|
+
#
|
|
24
|
+
# This implements a canonical Huffman decoding table based on
|
|
25
|
+
# the algorithm from libmspack (readhuff.h make_decode_table).
|
|
26
|
+
# The table has two levels:
|
|
27
|
+
# 1. Direct lookup for codes <= table_bits length
|
|
28
|
+
# 2. Linked entries for longer codes
|
|
29
|
+
#
|
|
30
|
+
# @param table_bits [Integer] Number of bits for table lookup (typically 6-12)
|
|
31
|
+
# @return [Boolean] true if successful, false on error
|
|
32
|
+
def build_table(table_bits)
|
|
33
|
+
# Allocate table: (1 << table_bits) entries for direct lookup
|
|
34
|
+
# Plus space for longer codes (up to num_symbols * 2)
|
|
35
|
+
table_size = (1 << table_bits) + (num_symbols * 2)
|
|
36
|
+
@table = Array.new(table_size, 0xFFFF)
|
|
37
|
+
|
|
38
|
+
pos = 0
|
|
39
|
+
table_mask = 1 << table_bits
|
|
40
|
+
bit_mask = table_mask >> 1
|
|
41
|
+
|
|
42
|
+
# Fill entries for codes short enough for direct mapping (LSB ordering)
|
|
43
|
+
(1..table_bits).each do |bit_num|
|
|
44
|
+
(0...num_symbols).each do |sym|
|
|
45
|
+
next unless lengths[sym] == bit_num
|
|
46
|
+
|
|
47
|
+
# Reverse the significant bits for LSB ordering
|
|
48
|
+
fill = lengths[sym]
|
|
49
|
+
reverse = pos >> (table_bits - fill)
|
|
50
|
+
leaf = 0
|
|
51
|
+
fill.times do
|
|
52
|
+
leaf <<= 1
|
|
53
|
+
leaf |= reverse & 1
|
|
54
|
+
reverse >>= 1
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
pos += bit_mask
|
|
58
|
+
return false if pos > table_mask
|
|
59
|
+
|
|
60
|
+
# Fill all possible lookups of this symbol
|
|
61
|
+
fill = bit_mask
|
|
62
|
+
next_symbol = 1 << bit_num
|
|
63
|
+
while fill.positive?
|
|
64
|
+
@table[leaf] = sym
|
|
65
|
+
leaf += next_symbol
|
|
66
|
+
fill -= 1
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
bit_mask >>= 1
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Exit with success if table is complete
|
|
73
|
+
return true if pos == table_mask
|
|
74
|
+
|
|
75
|
+
# Mark remaining entries as unused
|
|
76
|
+
(pos...table_mask).each do |sym_idx|
|
|
77
|
+
reverse = sym_idx
|
|
78
|
+
leaf = 0
|
|
79
|
+
fill = table_bits
|
|
80
|
+
fill.times do
|
|
81
|
+
leaf <<= 1
|
|
82
|
+
leaf |= reverse & 1
|
|
83
|
+
reverse >>= 1
|
|
84
|
+
end
|
|
85
|
+
@table[leaf] = 0xFFFF
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# next_symbol = base of allocation for long codes
|
|
89
|
+
next_symbol = [(table_mask >> 1), num_symbols].max
|
|
90
|
+
|
|
91
|
+
# Process longer codes (table_bits + 1 to MAX_BITS)
|
|
92
|
+
pos <<= 16
|
|
93
|
+
table_mask <<= 16
|
|
94
|
+
bit_mask = 1 << 15
|
|
95
|
+
|
|
96
|
+
((table_bits + 1)..MAX_BITS).each do |bit_num|
|
|
97
|
+
(0...num_symbols).each do |sym|
|
|
98
|
+
next unless lengths[sym] == bit_num
|
|
99
|
+
|
|
100
|
+
return false if pos >= table_mask
|
|
101
|
+
|
|
102
|
+
# leaf = the first table_bits of the code, reversed (LSB)
|
|
103
|
+
reverse = pos >> 16
|
|
104
|
+
leaf = 0
|
|
105
|
+
fill = table_bits
|
|
106
|
+
fill.times do
|
|
107
|
+
leaf <<= 1
|
|
108
|
+
leaf |= reverse & 1
|
|
109
|
+
reverse >>= 1
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Build the tree path for this long code
|
|
113
|
+
(0...(bit_num - table_bits)).each do |fill_idx|
|
|
114
|
+
# If this path hasn't been taken yet, allocate two entries
|
|
115
|
+
if @table[leaf] == 0xFFFF
|
|
116
|
+
@table[next_symbol << 1] = 0xFFFF
|
|
117
|
+
@table[(next_symbol << 1) + 1] = 0xFFFF
|
|
118
|
+
@table[leaf] = next_symbol
|
|
119
|
+
next_symbol += 1
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Follow the path and select either left or right for next bit
|
|
123
|
+
leaf = @table[leaf] << 1
|
|
124
|
+
leaf += 1 if (pos >> (15 - fill_idx)).anybits?(1)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
@table[leaf] = sym
|
|
128
|
+
pos += bit_mask
|
|
129
|
+
end
|
|
130
|
+
bit_mask >>= 1
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Full table?
|
|
134
|
+
pos == table_mask
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|