cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,581 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../binary/bitstream_writer"
4
+ require_relative "../huffman/encoder"
5
+
6
+ module Cabriolet
7
+ module Compressors
8
+ # LZX handles LZX compression
9
+ # Based on libmspack lzxc.c implementation
10
+ #
11
+ # Phase 1 Implementation:
12
+ # - VERBATIM blocks only
13
+ # - Basic LZ77 matching
14
+ # - Simple tree building
15
+ # - No E8 preprocessing
16
+ # - 32KB window size
17
+ class LZX < Base
18
+ # Frame size (32KB per frame)
19
+ FRAME_SIZE = 32_768
20
+
21
+ # Block types
22
+ BLOCKTYPE_VERBATIM = 1
23
+ BLOCKTYPE_ALIGNED = 2
24
+ BLOCKTYPE_UNCOMPRESSED = 3
25
+
26
+ # Match constants
27
+ MIN_MATCH = 2
28
+ MAX_MATCH = 257
29
+ NUM_CHARS = 256
30
+
31
+ # Tree constants
32
+ PRETREE_NUM_ELEMENTS = 20
33
+ PRETREE_MAXSYMBOLS = 20
34
+
35
+ ALIGNED_NUM_ELEMENTS = 8
36
+ ALIGNED_MAXSYMBOLS = 8
37
+
38
+ NUM_PRIMARY_LENGTHS = 7
39
+ NUM_SECONDARY_LENGTHS = 249
40
+ LENGTH_MAXSYMBOLS = 250
41
+
42
+ # Position slots for different window sizes
43
+ POSITION_SLOTS = [30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290].freeze
44
+
45
+ # Extra bits for position slots
46
+ EXTRA_BITS = [
47
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
48
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
49
+ ].freeze
50
+
51
+ # Position base offsets
52
+ POSITION_BASE = [
53
+ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
54
+ 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12_288, 16_384, 24_576, 32_768,
55
+ 49_152, 65_536, 98_304, 131_072, 196_608, 262_144, 393_216, 524_288, 655_360,
56
+ 786_432, 917_504, 1_048_576, 1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936,
57
+ 1_835_008, 1_966_080, 2_097_152
58
+ ].freeze
59
+
60
+ attr_reader :window_bits
61
+
62
+ # Initialize LZX compressor
63
+ #
64
+ # @param io_system [System::IOSystem] I/O system for reading/writing
65
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
66
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
67
+ # @param buffer_size [Integer] Buffer size for I/O operations
68
+ # @param window_bits [Integer] Window size (15-21 for regular LZX)
69
+ def initialize(io_system, input, output, buffer_size, window_bits: 15)
70
+ super(io_system, input, output, buffer_size)
71
+
72
+ # Validate window_bits
73
+ unless (15..21).cover?(window_bits)
74
+ raise ArgumentError,
75
+ "LZX window_bits must be 15-21, got #{window_bits}"
76
+ end
77
+
78
+ @window_bits = window_bits
79
+ @window_size = 1 << window_bits
80
+
81
+ # Calculate number of position slots
82
+ @num_offsets = POSITION_SLOTS[window_bits - 15] << 3
83
+ @maintree_maxsymbols = NUM_CHARS + @num_offsets
84
+
85
+ # Initialize bitstream writer
86
+ @bitstream = Binary::BitstreamWriter.new(io_system, output, buffer_size)
87
+
88
+ # Initialize sliding window for LZ77
89
+ @window = "\0" * @window_size
90
+ @window_pos = 0
91
+
92
+ # Initialize R0, R1, R2 (LRU offset registers)
93
+ @r0 = 1
94
+ @r1 = 1
95
+ @r2 = 1
96
+
97
+ # Statistics for tree building
98
+ @literal_freq = Array.new(NUM_CHARS, 0)
99
+ @match_freq = Array.new(@num_offsets, 0)
100
+ @length_freq = Array.new(LENGTH_MAXSYMBOLS, 0)
101
+ end
102
+
103
+ # Compress input data using LZX algorithm
104
+ #
105
+ # @return [Integer] Number of bytes written
106
+ def compress
107
+ input_data = read_all_input
108
+ return 0 if input_data.empty?
109
+
110
+ # Write Intel E8 filesize header once at the beginning (1 bit = 0, meaning no E8 processing)
111
+ @bitstream.write_bits(0, 1)
112
+
113
+ total_compressed = 0
114
+ pos = 0
115
+
116
+ # Process data in FRAME_SIZE chunks
117
+ while pos < input_data.bytesize
118
+ frame_size = [FRAME_SIZE, input_data.bytesize - pos].min
119
+ frame_data = input_data[pos, frame_size]
120
+
121
+ # Compress this frame
122
+ compress_frame(frame_data)
123
+
124
+ pos += frame_size
125
+ total_compressed += frame_size
126
+ end
127
+
128
+ # Flush any remaining bits
129
+ @bitstream.flush
130
+
131
+ total_compressed
132
+ end
133
+
134
+ private
135
+
136
+ # Read all input data into memory
137
+ #
138
+ # @return [String] All input data
139
+ def read_all_input
140
+ data = +""
141
+ loop do
142
+ chunk = @io_system.read(@input, @buffer_size)
143
+ break if chunk.empty?
144
+
145
+ data << chunk
146
+ end
147
+ data
148
+ end
149
+
150
+ # Compress a single frame (32KB)
151
+ #
152
+ # @param data [String] Frame data to compress
153
+ # @return [void]
154
+ def compress_frame(data)
155
+ # Use UNCOMPRESSED blocks for now (simplest approach)
156
+ write_block_header(BLOCKTYPE_UNCOMPRESSED, data.bytesize)
157
+
158
+ # Write R0, R1, R2 (required for uncompressed blocks)
159
+ write_offset_registers
160
+
161
+ # Write raw data
162
+ data.each_byte do |byte|
163
+ @bitstream.write_bits(byte, 8)
164
+ end
165
+
166
+ # Ensure byte alignment at end of frame for multi-frame support
167
+ @bitstream.byte_align
168
+ end
169
+
170
+ # Analyze frame and generate LZ77 tokens
171
+ #
172
+ # @param data [String] Frame data
173
+ # @return [Array<Hash>] Array of tokens (:literal or :match)
174
+ def analyze_frame(data)
175
+ tokens = []
176
+ pos = 0
177
+
178
+ while pos < data.bytesize
179
+ # Try to find a match in the window
180
+ match = find_match(data, pos)
181
+
182
+ if match && match[:length] >= MIN_MATCH
183
+ # Record match token
184
+ tokens << {
185
+ type: :match,
186
+ length: match[:length],
187
+ offset: match[:offset],
188
+ }
189
+
190
+ # Update statistics
191
+ update_match_statistics(match[:length], match[:offset])
192
+
193
+ # Add matched bytes to window
194
+ match[:length].times do
195
+ add_to_window(data.getbyte(pos))
196
+ pos += 1
197
+ end
198
+ else
199
+ # Record literal token
200
+ byte = data.getbyte(pos)
201
+ tokens << { type: :literal, value: byte }
202
+
203
+ # Update statistics
204
+ @literal_freq[byte] += 1
205
+
206
+ add_to_window(byte)
207
+ pos += 1
208
+ end
209
+ end
210
+
211
+ tokens
212
+ end
213
+
214
+ # Find the longest match in the sliding window
215
+ #
216
+ # @param data [String] Input data
217
+ # @param pos [Integer] Current position in data
218
+ # @return [Hash, nil] Match info with :length and :offset, or nil
219
+ def find_match(data, pos)
220
+ return nil if pos >= data.bytesize
221
+
222
+ best_match = nil
223
+ max_length = [MAX_MATCH, data.bytesize - pos].min
224
+
225
+ # Don't search if we can't get MIN_MATCH
226
+ return nil if max_length < MIN_MATCH
227
+
228
+ # Search window for matches
229
+ search_start = [@window_pos - @window_size, 0].max
230
+ search_end = @window_pos
231
+
232
+ (search_start...search_end).each do |win_pos|
233
+ length = 0
234
+
235
+ # Count matching bytes
236
+ while length < max_length &&
237
+ data.getbyte(pos + length) == @window.getbyte(win_pos + length)
238
+ length += 1
239
+ end
240
+
241
+ # Update best match if this is longer
242
+ next unless length >= MIN_MATCH && (best_match.nil? || length > best_match[:length])
243
+
244
+ offset = @window_pos - win_pos
245
+ best_match = { length: length, offset: offset }
246
+
247
+ # Stop if we found maximum match
248
+ break if length == MAX_MATCH
249
+ end
250
+
251
+ best_match
252
+ end
253
+
254
+ # Add byte to sliding window
255
+ #
256
+ # @param byte [Integer] Byte to add
257
+ # @return [void]
258
+ def add_to_window(byte)
259
+ @window.setbyte(@window_pos % @window_size, byte)
260
+ @window_pos += 1
261
+ end
262
+
263
+ # Update match statistics
264
+ #
265
+ # @param length [Integer] Match length
266
+ # @param offset [Integer] Match offset
267
+ # @return [void]
268
+ def update_match_statistics(length, offset)
269
+ # Get position slot for this offset
270
+ position_slot = get_position_slot(offset)
271
+ (position_slot << 3)
272
+
273
+ # Calculate length slot (0-6 directly, 7 needs length tree)
274
+ length_slot = [length - MIN_MATCH, NUM_PRIMARY_LENGTHS].min
275
+
276
+ @match_freq[(position_slot << 3) | length_slot] += 1
277
+
278
+ # If length requires length tree
279
+ return unless length_slot == NUM_PRIMARY_LENGTHS
280
+
281
+ length_footer = length - MIN_MATCH - NUM_PRIMARY_LENGTHS
282
+ @length_freq[length_footer] += 1 if length_footer < LENGTH_MAXSYMBOLS
283
+ end
284
+
285
+ # Get position slot for an offset
286
+ #
287
+ # @param offset [Integer] Match offset
288
+ # @return [Integer] Position slot
289
+ def get_position_slot(offset)
290
+ # Find position slot using binary search on POSITION_BASE
291
+ return 0 if offset < 4
292
+
293
+ # For offsets >= 4, find the slot
294
+ slot = 0
295
+ POSITION_BASE.each_with_index do |base, index|
296
+ break if base > offset
297
+
298
+ slot = index
299
+ end
300
+
301
+ slot
302
+ end
303
+
304
+ # Build Huffman trees from frequency statistics
305
+ #
306
+ # @return [void]
307
+ def build_trees
308
+ # Build main tree (literals + matches)
309
+ maintree_freqs = @literal_freq + @match_freq
310
+ @maintree_lengths = build_tree_lengths(maintree_freqs,
311
+ @maintree_maxsymbols)
312
+ @maintree_codes = Huffman::Encoder.build_codes(@maintree_lengths,
313
+ @maintree_maxsymbols)
314
+
315
+ # Build length tree
316
+ @length_lengths = build_tree_lengths(@length_freq, LENGTH_MAXSYMBOLS)
317
+ @length_codes = Huffman::Encoder.build_codes(@length_lengths,
318
+ LENGTH_MAXSYMBOLS)
319
+
320
+ # Build pretree (used to encode the other trees)
321
+ # Create a valid Huffman tree that satisfies Kraft inequality
322
+ # For 20 symbols, use: 2@3bits + 6@4bits + 12@5bits = 1.0
323
+ @pretree_lengths = Array.new(PRETREE_MAXSYMBOLS, 0)
324
+ # Most common symbols (0-1): 3 bits
325
+ (0..1).each { |i| @pretree_lengths[i] = 3 }
326
+ # Common symbols (2-7): 4 bits
327
+ (2..7).each { |i| @pretree_lengths[i] = 4 }
328
+ # Less common symbols (8-19): 5 bits
329
+ (8..19).each { |i| @pretree_lengths[i] = 5 }
330
+ @pretree_codes = Huffman::Encoder.build_codes(@pretree_lengths,
331
+ PRETREE_MAXSYMBOLS)
332
+ end
333
+
334
+ # Build Huffman code lengths from frequencies
335
+ #
336
+ # @param freqs [Array<Integer>] Symbol frequencies
337
+ # @param num_symbols [Integer] Number of symbols
338
+ # @return [Array<Integer>] Code lengths
339
+ def build_tree_lengths(freqs, num_symbols)
340
+ # Simple implementation: assign lengths based on frequency
341
+ # Higher frequency = shorter code
342
+ lengths = Array.new(num_symbols, 0)
343
+
344
+ # Get non-zero frequencies
345
+ non_zero = freqs.each_with_index.select { |freq, _| freq.positive? }
346
+ return lengths if non_zero.empty?
347
+
348
+ # Sort by frequency (descending)
349
+ sorted = non_zero.sort_by { |freq, _| -freq }
350
+
351
+ # Assign lengths using simple strategy
352
+ sorted.each_with_index do |(_, symbol), index|
353
+ # Assign shorter codes to more frequent symbols
354
+ lengths[symbol] = if index < num_symbols / 8
355
+ 4
356
+ elsif index < num_symbols / 4
357
+ 6
358
+ elsif index < num_symbols / 2
359
+ 8
360
+ else
361
+ 10
362
+ end
363
+ end
364
+
365
+ lengths
366
+ end
367
+
368
+ # Write block header
369
+ #
370
+ # @param block_type [Integer] Block type
371
+ # @param block_length [Integer] Block length in bytes
372
+ # @return [void]
373
+ def write_block_header(block_type, block_length)
374
+ # Write 3-bit block type
375
+ @bitstream.write_bits(block_type, 3)
376
+
377
+ # Write 24-bit block length (16 bits + 8 bits)
378
+ @bitstream.write_bits((block_length >> 8) & 0xFFFF, 16)
379
+ @bitstream.write_bits(block_length & 0xFF, 8)
380
+
381
+ # Align to byte boundary for UNCOMPRESSED blocks
382
+ @bitstream.byte_align if block_type == BLOCKTYPE_UNCOMPRESSED
383
+ end
384
+
385
+ # Write offset registers (R0, R1, R2) for uncompressed blocks
386
+ #
387
+ # @return [void]
388
+ def write_offset_registers
389
+ # Write R0, R1, R2 as 32-bit little-endian values (12 bytes total)
390
+ [@r0, @r1, @r2].each do |offset|
391
+ @bitstream.write_bits(offset & 0xFF, 8)
392
+ @bitstream.write_bits((offset >> 8) & 0xFF, 8)
393
+ @bitstream.write_bits((offset >> 16) & 0xFF, 8)
394
+ @bitstream.write_bits((offset >> 24) & 0xFF, 8)
395
+ end
396
+ end
397
+
398
+ # Write tree definitions
399
+ #
400
+ # @return [void]
401
+ def write_trees
402
+ # Write pretree (20 elements, 4 bits each)
403
+ write_pretree
404
+
405
+ # Write main tree using pretree encoding
406
+ write_tree_with_pretree(@maintree_lengths, 0, NUM_CHARS)
407
+ write_tree_with_pretree(@maintree_lengths, NUM_CHARS,
408
+ @maintree_maxsymbols)
409
+
410
+ # Write length tree using pretree encoding
411
+ write_tree_with_pretree(@length_lengths, 0, NUM_SECONDARY_LENGTHS)
412
+ end
413
+
414
+ # Write pretree
415
+ #
416
+ # @return [void]
417
+ def write_pretree
418
+ PRETREE_MAXSYMBOLS.times do |i|
419
+ @bitstream.write_bits(@pretree_lengths[i], 4)
420
+ end
421
+ end
422
+
423
+ # Write tree lengths using pretree encoding
424
+ #
425
+ # @param lengths [Array<Integer>] Tree lengths to encode
426
+ # @param start [Integer] Start index
427
+ # @param end_idx [Integer] End index (exclusive)
428
+ # @return [void]
429
+ def write_tree_with_pretree(lengths, start, end_idx)
430
+ i = start
431
+ prev_length = 0
432
+
433
+ while i < end_idx
434
+ length = lengths[i]
435
+
436
+ # Check for runs of zeros
437
+ if length.zero?
438
+ zero_count = 0
439
+ while i < end_idx && lengths[i].zero? && zero_count < 138
440
+ zero_count += 1
441
+ i += 1
442
+ end
443
+
444
+ if zero_count >= 20
445
+ # Use code 18 for long runs (20-51)
446
+ while zero_count >= 20
447
+ run = [zero_count, 51].min
448
+ encode_pretree_symbol(18)
449
+ @bitstream.write_bits(run - 20, 5)
450
+ zero_count -= run
451
+ end
452
+ end
453
+
454
+ if zero_count >= 4
455
+ # Use code 17 for medium runs (4-19)
456
+ run = [zero_count, 19].min
457
+ encode_pretree_symbol(17)
458
+ @bitstream.write_bits(run - 4, 4)
459
+ elsif zero_count.positive?
460
+ # Encode short runs individually
461
+ zero_count.times do
462
+ delta = (17 - prev_length) % 17
463
+ encode_pretree_symbol(delta)
464
+ prev_length = 0
465
+ end
466
+ end
467
+ else
468
+ # Encode as delta from previous
469
+ delta = (length - prev_length) % 17
470
+ encode_pretree_symbol(delta)
471
+ prev_length = length
472
+ i += 1
473
+ end
474
+ end
475
+ end
476
+
477
+ # Encode a pretree symbol
478
+ #
479
+ # @param symbol [Integer] Symbol to encode
480
+ # @return [void]
481
+ def encode_pretree_symbol(symbol)
482
+ code_entry = @pretree_codes[symbol]
483
+ return unless code_entry
484
+
485
+ @bitstream.write_bits(code_entry[:code], code_entry[:bits])
486
+ end
487
+
488
+ # Encode tokens using Huffman codes
489
+ #
490
+ # @param tokens [Array<Hash>] Tokens to encode
491
+ # @return [void]
492
+ def encode_tokens(tokens)
493
+ tokens.each do |token|
494
+ if token[:type] == :literal
495
+ encode_literal(token[:value])
496
+ else
497
+ encode_match(token[:length], token[:offset])
498
+ end
499
+ end
500
+ end
501
+
502
+ # Encode a literal byte
503
+ #
504
+ # @param byte [Integer] Byte value
505
+ # @return [void]
506
+ def encode_literal(byte)
507
+ code_entry = @maintree_codes[byte]
508
+ return unless code_entry
509
+
510
+ @bitstream.write_bits(code_entry[:code], code_entry[:bits])
511
+ end
512
+
513
+ # Encode a match
514
+ #
515
+ # @param length [Integer] Match length
516
+ # @param offset [Integer] Match offset
517
+ # @return [void]
518
+ def encode_match(length, offset)
519
+ # Get position slot
520
+ position_slot = get_position_slot(offset)
521
+
522
+ # Calculate main element
523
+ length_header = [length - MIN_MATCH, NUM_PRIMARY_LENGTHS].min
524
+ main_element = NUM_CHARS + (position_slot << 3) + length_header
525
+
526
+ # Encode main element
527
+ code_entry = @maintree_codes[main_element]
528
+ if code_entry
529
+ @bitstream.write_bits(code_entry[:code],
530
+ code_entry[:bits])
531
+ end
532
+
533
+ # Encode length footer if needed
534
+ if length_header == NUM_PRIMARY_LENGTHS
535
+ length_footer = length - MIN_MATCH - NUM_PRIMARY_LENGTHS
536
+ length_entry = @length_codes[length_footer]
537
+ if length_entry
538
+ @bitstream.write_bits(length_entry[:code],
539
+ length_entry[:bits])
540
+ end
541
+ end
542
+
543
+ # Encode position extra bits
544
+ encode_position_extra_bits(offset, position_slot)
545
+
546
+ # Update R0, R1, R2
547
+ update_offset_cache(offset)
548
+ end
549
+
550
+ # Encode position extra bits
551
+ #
552
+ # @param offset [Integer] Match offset
553
+ # @param position_slot [Integer] Position slot
554
+ # @return [void]
555
+ def encode_position_extra_bits(offset, position_slot)
556
+ return if position_slot < 2
557
+
558
+ extra_bits = position_slot >= 36 ? 17 : EXTRA_BITS[position_slot]
559
+ return if extra_bits.zero?
560
+
561
+ base = POSITION_BASE[position_slot]
562
+ extra_value = offset - base
563
+
564
+ @bitstream.write_bits(extra_value, extra_bits)
565
+ end
566
+
567
+ # Update offset cache (R0, R1, R2)
568
+ #
569
+ # @param offset [Integer] New offset
570
+ # @return [void]
571
+ def update_offset_cache(offset)
572
+ # Don't update for repeated offsets
573
+ return if [@r0, @r1, @r2].include?(offset)
574
+
575
+ @r2 = @r1
576
+ @r1 = @r0
577
+ @r0 = offset
578
+ end
579
+ end
580
+ end
581
+ end