cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Huffman
5
+ # Encoder encodes symbols using Huffman codes for compression
6
+ class Encoder
7
+ # Maximum code length supported
8
+ MAX_BITS = 16
9
+
10
+ # Build Huffman codes from code lengths (RFC 1951 algorithm)
11
+ #
12
+ # This generates the actual Huffman code values from code lengths.
13
+ # The algorithm ensures canonical Huffman codes where codes of the
14
+ # same length are assigned sequentially.
15
+ #
16
+ # @param lengths [Array<Integer>] Code lengths for each symbol
17
+ # @param num_symbols [Integer] Number of symbols
18
+ # @return [Hash] Hash mapping symbol to {code: value, bits: length}
19
+ def self.build_codes(lengths, num_symbols)
20
+ # Count the number of codes for each length
21
+ bl_count = Array.new(MAX_BITS + 1, 0)
22
+ lengths[0, num_symbols].each do |len|
23
+ bl_count[len] += 1 if len.positive?
24
+ end
25
+
26
+ # Find the numerical value of the smallest code for each length
27
+ code = 0
28
+ bl_count[0] = 0
29
+ next_code = Array.new(MAX_BITS + 1, 0)
30
+
31
+ (1..MAX_BITS).each do |bits|
32
+ code = (code + bl_count[bits - 1]) << 1
33
+ next_code[bits] = code
34
+ end
35
+
36
+ # Assign codes to symbols
37
+ codes = {}
38
+ num_symbols.times do |symbol|
39
+ len = lengths[symbol]
40
+ next unless len.positive?
41
+
42
+ codes[symbol] = {
43
+ code: next_code[len],
44
+ bits: len,
45
+ }
46
+ next_code[len] += 1
47
+ end
48
+
49
+ codes
50
+ end
51
+
52
+ # Build fixed Huffman codes for DEFLATE (RFC 1951)
53
+ #
54
+ # @return [Hash] Hash with :literal and :distance code tables
55
+ def self.build_fixed_codes
56
+ # Fixed literal/length code lengths
57
+ literal_lengths = Array.new(288, 0)
58
+ (0...144).each { |i| literal_lengths[i] = 8 }
59
+ (144...256).each { |i| literal_lengths[i] = 9 }
60
+ (256...280).each { |i| literal_lengths[i] = 7 }
61
+ (280...288).each { |i| literal_lengths[i] = 8 }
62
+
63
+ # Fixed distance code lengths (all 5 bits)
64
+ distance_lengths = Array.new(32, 5)
65
+
66
+ {
67
+ literal: build_codes(literal_lengths, 288),
68
+ distance: build_codes(distance_lengths, 32),
69
+ }
70
+ end
71
+
72
+ # Encode a symbol using Huffman codes and write to bitstream
73
+ #
74
+ # Per RFC 1951 Section 3.1.1, Huffman codes are written LSB-first,
75
+ # so we must reverse the bits before writing to the bitstream.
76
+ #
77
+ # @param symbol [Integer] Symbol to encode
78
+ # @param codes [Hash] Code table mapping symbols to {code:, bits:}
79
+ # @param bitstream [Binary::BitstreamWriter] Output bitstream
80
+ # @return [void]
81
+ def self.encode_symbol(symbol, codes, bitstream)
82
+ entry = codes[symbol]
83
+ unless entry
84
+ raise Cabriolet::CompressionError,
85
+ "No code for symbol #{symbol}"
86
+ end
87
+
88
+ # Reverse bits for LSB-first writing per RFC 1951
89
+ reversed_code = reverse_bits(entry[:code], entry[:bits])
90
+ bitstream.write_bits(reversed_code, entry[:bits])
91
+ end
92
+
93
+ # Reverse bits for writing (some formats need reversed bit order)
94
+ #
95
+ # @param value [Integer] Value to reverse
96
+ # @param num_bits [Integer] Number of bits
97
+ # @return [Integer] Reversed value
98
+ def self.reverse_bits(value, num_bits)
99
+ result = 0
100
+ num_bits.times do
101
+ result = (result << 1) | (value & 1)
102
+ value >>= 1
103
+ end
104
+ result
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module Huffman
5
+ # Tree builds Huffman decoding trees from code lengths
6
+ class Tree
7
+ attr_reader :lengths, :num_symbols, :table
8
+
9
+ # Maximum code length supported
10
+ MAX_BITS = 16
11
+
12
+ # Initialize a new Huffman tree
13
+ #
14
+ # @param lengths [Array<Integer>] Code lengths for each symbol
15
+ # @param num_symbols [Integer] Number of symbols
16
+ def initialize(lengths, num_symbols)
17
+ @lengths = lengths
18
+ @num_symbols = num_symbols
19
+ @table = nil
20
+ end
21
+
22
+ # Build the fast decode table from code lengths
23
+ #
24
+ # This implements a canonical Huffman decoding table based on
25
+ # the algorithm from libmspack (readhuff.h make_decode_table).
26
+ # The table has two levels:
27
+ # 1. Direct lookup for codes <= table_bits length
28
+ # 2. Linked entries for longer codes
29
+ #
30
+ # @param table_bits [Integer] Number of bits for table lookup (typically 6-12)
31
+ # @return [Boolean] true if successful, false on error
32
+ def build_table(table_bits)
33
+ # Allocate table: (1 << table_bits) entries for direct lookup
34
+ # Plus space for longer codes (up to num_symbols * 2)
35
+ table_size = (1 << table_bits) + (num_symbols * 2)
36
+ @table = Array.new(table_size, 0xFFFF)
37
+
38
+ pos = 0
39
+ table_mask = 1 << table_bits
40
+ bit_mask = table_mask >> 1
41
+
42
+ # Fill entries for codes short enough for direct mapping (LSB ordering)
43
+ (1..table_bits).each do |bit_num|
44
+ (0...num_symbols).each do |sym|
45
+ next unless lengths[sym] == bit_num
46
+
47
+ # Reverse the significant bits for LSB ordering
48
+ fill = lengths[sym]
49
+ reverse = pos >> (table_bits - fill)
50
+ leaf = 0
51
+ fill.times do
52
+ leaf <<= 1
53
+ leaf |= reverse & 1
54
+ reverse >>= 1
55
+ end
56
+
57
+ pos += bit_mask
58
+ return false if pos > table_mask
59
+
60
+ # Fill all possible lookups of this symbol
61
+ fill = bit_mask
62
+ next_symbol = 1 << bit_num
63
+ while fill.positive?
64
+ @table[leaf] = sym
65
+ leaf += next_symbol
66
+ fill -= 1
67
+ end
68
+ end
69
+ bit_mask >>= 1
70
+ end
71
+
72
+ # Exit with success if table is complete
73
+ return true if pos == table_mask
74
+
75
+ # Mark remaining entries as unused
76
+ (pos...table_mask).each do |sym_idx|
77
+ reverse = sym_idx
78
+ leaf = 0
79
+ fill = table_bits
80
+ fill.times do
81
+ leaf <<= 1
82
+ leaf |= reverse & 1
83
+ reverse >>= 1
84
+ end
85
+ @table[leaf] = 0xFFFF
86
+ end
87
+
88
+ # next_symbol = base of allocation for long codes
89
+ next_symbol = [(table_mask >> 1), num_symbols].max
90
+
91
+ # Process longer codes (table_bits + 1 to MAX_BITS)
92
+ pos <<= 16
93
+ table_mask <<= 16
94
+ bit_mask = 1 << 15
95
+
96
+ ((table_bits + 1)..MAX_BITS).each do |bit_num|
97
+ (0...num_symbols).each do |sym|
98
+ next unless lengths[sym] == bit_num
99
+
100
+ return false if pos >= table_mask
101
+
102
+ # leaf = the first table_bits of the code, reversed (LSB)
103
+ reverse = pos >> 16
104
+ leaf = 0
105
+ fill = table_bits
106
+ fill.times do
107
+ leaf <<= 1
108
+ leaf |= reverse & 1
109
+ reverse >>= 1
110
+ end
111
+
112
+ # Build the tree path for this long code
113
+ (0...(bit_num - table_bits)).each do |fill_idx|
114
+ # If this path hasn't been taken yet, allocate two entries
115
+ if @table[leaf] == 0xFFFF
116
+ @table[next_symbol << 1] = 0xFFFF
117
+ @table[(next_symbol << 1) + 1] = 0xFFFF
118
+ @table[leaf] = next_symbol
119
+ next_symbol += 1
120
+ end
121
+
122
+ # Follow the path and select either left or right for next bit
123
+ leaf = @table[leaf] << 1
124
+ leaf += 1 if (pos >> (15 - fill_idx)).anybits?(1)
125
+ end
126
+
127
+ @table[leaf] = sym
128
+ pos += bit_mask
129
+ end
130
+ bit_mask >>= 1
131
+ end
132
+
133
+ # Full table?
134
+ pos == table_mask
135
+ end
136
+ end
137
+ end
138
+ end