brotli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/main.yml +34 -0
  3. data/.github/workflows/publish.yml +34 -0
  4. data/Gemfile +6 -2
  5. data/Rakefile +18 -6
  6. data/bin/before_install.sh +9 -0
  7. data/brotli.gemspec +7 -13
  8. data/ext/brotli/brotli.c +209 -11
  9. data/ext/brotli/buffer.c +1 -7
  10. data/ext/brotli/buffer.h +1 -1
  11. data/ext/brotli/extconf.rb +45 -26
  12. data/lib/brotli/version.rb +1 -1
  13. data/smoke.sh +1 -1
  14. data/test/brotli_test.rb +104 -0
  15. data/test/brotli_writer_test.rb +36 -0
  16. data/test/test_helper.rb +8 -0
  17. data/vendor/brotli/c/common/constants.c +15 -0
  18. data/vendor/brotli/c/common/constants.h +149 -6
  19. data/vendor/brotli/c/{dec/context.h → common/context.c} +91 -186
  20. data/vendor/brotli/c/common/context.h +113 -0
  21. data/vendor/brotli/c/common/dictionary.bin +0 -0
  22. data/vendor/brotli/c/common/dictionary.bin.br +0 -0
  23. data/vendor/brotli/c/common/dictionary.c +11 -2
  24. data/vendor/brotli/c/common/dictionary.h +4 -4
  25. data/vendor/brotli/c/common/platform.c +22 -0
  26. data/vendor/brotli/c/common/platform.h +594 -0
  27. data/vendor/brotli/c/common/transform.c +291 -0
  28. data/vendor/brotli/c/common/transform.h +85 -0
  29. data/vendor/brotli/c/common/version.h +8 -1
  30. data/vendor/brotli/c/dec/bit_reader.c +29 -1
  31. data/vendor/brotli/c/dec/bit_reader.h +91 -100
  32. data/vendor/brotli/c/dec/decode.c +665 -437
  33. data/vendor/brotli/c/dec/huffman.c +65 -84
  34. data/vendor/brotli/c/dec/huffman.h +67 -14
  35. data/vendor/brotli/c/dec/prefix.h +1 -20
  36. data/vendor/brotli/c/dec/state.c +32 -45
  37. data/vendor/brotli/c/dec/state.h +173 -55
  38. data/vendor/brotli/c/enc/backward_references.c +27 -16
  39. data/vendor/brotli/c/enc/backward_references.h +7 -7
  40. data/vendor/brotli/c/enc/backward_references_hq.c +155 -116
  41. data/vendor/brotli/c/enc/backward_references_hq.h +22 -23
  42. data/vendor/brotli/c/enc/backward_references_inc.h +32 -22
  43. data/vendor/brotli/c/enc/bit_cost.c +1 -1
  44. data/vendor/brotli/c/enc/bit_cost.h +5 -5
  45. data/vendor/brotli/c/enc/block_encoder_inc.h +7 -6
  46. data/vendor/brotli/c/enc/block_splitter.c +5 -6
  47. data/vendor/brotli/c/enc/block_splitter.h +1 -1
  48. data/vendor/brotli/c/enc/block_splitter_inc.h +26 -17
  49. data/vendor/brotli/c/enc/brotli_bit_stream.c +107 -123
  50. data/vendor/brotli/c/enc/brotli_bit_stream.h +19 -38
  51. data/vendor/brotli/c/enc/cluster.c +1 -1
  52. data/vendor/brotli/c/enc/cluster.h +1 -1
  53. data/vendor/brotli/c/enc/cluster_inc.h +6 -3
  54. data/vendor/brotli/c/enc/command.c +28 -0
  55. data/vendor/brotli/c/enc/command.h +52 -42
  56. data/vendor/brotli/c/enc/compress_fragment.c +21 -22
  57. data/vendor/brotli/c/enc/compress_fragment.h +1 -1
  58. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +102 -69
  59. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +1 -1
  60. data/vendor/brotli/c/enc/dictionary_hash.c +1827 -1101
  61. data/vendor/brotli/c/enc/dictionary_hash.h +2 -1
  62. data/vendor/brotli/c/enc/encode.c +358 -195
  63. data/vendor/brotli/c/enc/encoder_dict.c +33 -0
  64. data/vendor/brotli/c/enc/encoder_dict.h +43 -0
  65. data/vendor/brotli/c/enc/entropy_encode.c +16 -14
  66. data/vendor/brotli/c/enc/entropy_encode.h +7 -7
  67. data/vendor/brotli/c/enc/entropy_encode_static.h +3 -3
  68. data/vendor/brotli/c/enc/fast_log.c +105 -0
  69. data/vendor/brotli/c/enc/fast_log.h +20 -99
  70. data/vendor/brotli/c/enc/find_match_length.h +5 -6
  71. data/vendor/brotli/c/enc/hash.h +145 -103
  72. data/vendor/brotli/c/enc/hash_composite_inc.h +125 -0
  73. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +93 -53
  74. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +54 -53
  75. data/vendor/brotli/c/enc/hash_longest_match_inc.h +58 -54
  76. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +95 -63
  77. data/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
  78. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +46 -43
  79. data/vendor/brotli/c/enc/histogram.c +9 -6
  80. data/vendor/brotli/c/enc/histogram.h +6 -3
  81. data/vendor/brotli/c/enc/histogram_inc.h +1 -1
  82. data/vendor/brotli/c/enc/literal_cost.c +5 -5
  83. data/vendor/brotli/c/enc/literal_cost.h +2 -2
  84. data/vendor/brotli/c/enc/memory.c +5 -16
  85. data/vendor/brotli/c/enc/memory.h +52 -1
  86. data/vendor/brotli/c/enc/metablock.c +171 -36
  87. data/vendor/brotli/c/enc/metablock.h +13 -8
  88. data/vendor/brotli/c/enc/metablock_inc.h +2 -2
  89. data/vendor/brotli/c/enc/params.h +46 -0
  90. data/vendor/brotli/c/enc/prefix.h +3 -4
  91. data/vendor/brotli/c/enc/quality.h +29 -24
  92. data/vendor/brotli/c/enc/ringbuffer.h +19 -12
  93. data/vendor/brotli/c/enc/static_dict.c +49 -45
  94. data/vendor/brotli/c/enc/static_dict.h +4 -3
  95. data/vendor/brotli/c/enc/static_dict_lut.h +1 -1
  96. data/vendor/brotli/c/enc/utf8_util.c +21 -21
  97. data/vendor/brotli/c/enc/utf8_util.h +1 -1
  98. data/vendor/brotli/c/enc/write_bits.h +35 -38
  99. data/vendor/brotli/c/include/brotli/decode.h +13 -8
  100. data/vendor/brotli/c/include/brotli/encode.h +54 -8
  101. data/vendor/brotli/c/include/brotli/port.h +225 -83
  102. data/vendor/brotli/c/include/brotli/types.h +0 -7
  103. metadata +28 -87
  104. data/.travis.yml +0 -30
  105. data/spec/brotli_spec.rb +0 -88
  106. data/spec/inflate_spec.rb +0 -75
  107. data/spec/spec_helper.rb +0 -4
  108. data/vendor/brotli/c/dec/port.h +0 -168
  109. data/vendor/brotli/c/dec/transform.h +0 -300
  110. data/vendor/brotli/c/enc/context.h +0 -184
  111. data/vendor/brotli/c/enc/port.h +0 -184
@@ -15,7 +15,8 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- extern const uint16_t kStaticDictionaryHash[32768];
18
+ extern const uint16_t kStaticDictionaryHashWords[32768];
19
+ extern const uint8_t kStaticDictionaryHashLengths[32768];
19
20
 
20
21
  #if defined(__cplusplus) || defined(c_plusplus)
21
22
  } /* extern "C" */
@@ -11,6 +11,9 @@
11
11
  #include <stdlib.h> /* free, malloc */
12
12
  #include <string.h> /* memcpy, memset */
13
13
 
14
+ #include "../common/constants.h"
15
+ #include "../common/context.h"
16
+ #include "../common/platform.h"
14
17
  #include "../common/version.h"
15
18
  #include "./backward_references.h"
16
19
  #include "./backward_references_hq.h"
@@ -18,14 +21,13 @@
18
21
  #include "./brotli_bit_stream.h"
19
22
  #include "./compress_fragment.h"
20
23
  #include "./compress_fragment_two_pass.h"
21
- #include "./context.h"
24
+ #include "./encoder_dict.h"
22
25
  #include "./entropy_encode.h"
23
26
  #include "./fast_log.h"
24
27
  #include "./hash.h"
25
28
  #include "./histogram.h"
26
29
  #include "./memory.h"
27
30
  #include "./metablock.h"
28
- #include "./port.h"
29
31
  #include "./prefix.h"
30
32
  #include "./quality.h"
31
33
  #include "./ringbuffer.h"
@@ -52,12 +54,19 @@ typedef enum BrotliEncoderStreamState {
52
54
  BROTLI_STREAM_METADATA_BODY = 4
53
55
  } BrotliEncoderStreamState;
54
56
 
57
+ typedef enum BrotliEncoderFlintState {
58
+ BROTLI_FLINT_NEEDS_2_BYTES = 2,
59
+ BROTLI_FLINT_NEEDS_1_BYTE = 1,
60
+ BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
61
+ BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
62
+ BROTLI_FLINT_DONE = -2
63
+ } BrotliEncoderFlintState;
64
+
55
65
  typedef struct BrotliEncoderStateStruct {
56
66
  BrotliEncoderParams params;
57
67
 
58
68
  MemoryManager memory_manager_;
59
69
 
60
- HasherHandle hasher_;
61
70
  uint64_t input_pos_;
62
71
  RingBuffer ringbuffer_;
63
72
  size_t cmd_alloc_size_;
@@ -69,12 +78,19 @@ typedef struct BrotliEncoderStateStruct {
69
78
  uint64_t last_processed_pos_;
70
79
  int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
71
80
  int saved_dist_cache_[4];
72
- uint8_t last_byte_;
73
- uint8_t last_byte_bits_;
81
+ uint16_t last_bytes_;
82
+ uint8_t last_bytes_bits_;
83
+ /* "Flint" is a tiny uncompressed block emitted before the continuation
84
+ block to unwire literal context from previous data. Despite being int8_t,
85
+ field is actually BrotliEncoderFlintState enum. */
86
+ int8_t flint_;
74
87
  uint8_t prev_byte_;
75
88
  uint8_t prev_byte2_;
76
89
  size_t storage_size_;
77
90
  uint8_t* storage_;
91
+
92
+ Hasher hasher_;
93
+
78
94
  /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
79
95
  int small_table_[1 << 10]; /* 4KiB */
80
96
  int* large_table_; /* Allocated only when needed */
@@ -112,10 +128,7 @@ typedef struct BrotliEncoderStateStruct {
112
128
  BROTLI_BOOL is_initialized_;
113
129
  } BrotliEncoderStateStruct;
114
130
 
115
- static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s);
116
-
117
131
  static size_t InputBlockSize(BrotliEncoderState* s) {
118
- if (!EnsureInitialized(s)) return 0;
119
132
  return (size_t)1 << s->params.lgblock;
120
133
  }
121
134
 
@@ -161,27 +174,24 @@ BROTLI_BOOL BrotliEncoderSetParameter(
161
174
  state->params.size_hint = value;
162
175
  return BROTLI_TRUE;
163
176
 
164
- default: return BROTLI_FALSE;
165
- }
166
- }
177
+ case BROTLI_PARAM_LARGE_WINDOW:
178
+ state->params.large_window = TO_BROTLI_BOOL(!!value);
179
+ return BROTLI_TRUE;
167
180
 
168
- static void RecomputeDistancePrefixes(Command* cmds,
169
- size_t num_commands,
170
- uint32_t num_direct_distance_codes,
171
- uint32_t distance_postfix_bits) {
172
- size_t i;
173
- if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
174
- return;
175
- }
176
- for (i = 0; i < num_commands; ++i) {
177
- Command* cmd = &cmds[i];
178
- if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
179
- PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd),
180
- num_direct_distance_codes,
181
- distance_postfix_bits,
182
- &cmd->dist_prefix_,
183
- &cmd->dist_extra_);
184
- }
181
+ case BROTLI_PARAM_NPOSTFIX:
182
+ state->params.dist.distance_postfix_bits = value;
183
+ return BROTLI_TRUE;
184
+
185
+ case BROTLI_PARAM_NDIRECT:
186
+ state->params.dist.num_direct_distance_codes = value;
187
+ return BROTLI_TRUE;
188
+
189
+ case BROTLI_PARAM_STREAM_OFFSET:
190
+ if (value > (1u << 30)) return BROTLI_FALSE;
191
+ state->params.stream_offset = value;
192
+ return BROTLI_TRUE;
193
+
194
+ default: return BROTLI_FALSE;
185
195
  }
186
196
  }
187
197
 
@@ -202,7 +212,7 @@ static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
202
212
  if (s->storage_size_ < size) {
203
213
  BROTLI_FREE(m, s->storage_);
204
214
  s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
205
- if (BROTLI_IS_OOM(m)) return NULL;
215
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->storage_)) return NULL;
206
216
  s->storage_size_ = size;
207
217
  }
208
218
  return s->storage_;
@@ -226,7 +236,7 @@ static int* GetHashTable(BrotliEncoderState* s, int quality,
226
236
  const size_t max_table_size = MaxHashTableSize(quality);
227
237
  size_t htsize = HashTableSize(max_table_size, input_size);
228
238
  int* table;
229
- assert(max_table_size >= 256);
239
+ BROTLI_DCHECK(max_table_size >= 256);
230
240
  if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
231
241
  /* Only odd shifts are supported by fast-one-pass. */
232
242
  if ((htsize & 0xAAAAA) == 0) {
@@ -241,7 +251,7 @@ static int* GetHashTable(BrotliEncoderState* s, int quality,
241
251
  s->large_table_size_ = htsize;
242
252
  BROTLI_FREE(m, s->large_table_);
243
253
  s->large_table_ = BROTLI_ALLOC(m, int, htsize);
244
- if (BROTLI_IS_OOM(m)) return 0;
254
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->large_table_)) return 0;
245
255
  }
246
256
  table = s->large_table_;
247
257
  }
@@ -251,20 +261,25 @@ static int* GetHashTable(BrotliEncoderState* s, int quality,
251
261
  return table;
252
262
  }
253
263
 
254
- static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
255
- uint8_t* last_byte_bits) {
256
- if (lgwin == 16) {
257
- *last_byte = 0;
258
- *last_byte_bits = 1;
259
- } else if (lgwin == 17) {
260
- *last_byte = 1;
261
- *last_byte_bits = 7;
262
- } else if (lgwin > 17) {
263
- *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1);
264
- *last_byte_bits = 4;
264
+ static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
265
+ uint16_t* last_bytes, uint8_t* last_bytes_bits) {
266
+ if (large_window) {
267
+ *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
268
+ *last_bytes_bits = 14;
265
269
  } else {
266
- *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1);
267
- *last_byte_bits = 7;
270
+ if (lgwin == 16) {
271
+ *last_bytes = 0;
272
+ *last_bytes_bits = 1;
273
+ } else if (lgwin == 17) {
274
+ *last_bytes = 1;
275
+ *last_bytes_bits = 7;
276
+ } else if (lgwin > 17) {
277
+ *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
278
+ *last_bytes_bits = 4;
279
+ } else {
280
+ *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
281
+ *last_bytes_bits = 7;
282
+ }
268
283
  }
269
284
  }
270
285
 
@@ -357,7 +372,7 @@ static void ChooseContextMap(int quality,
357
372
  }
358
373
 
359
374
  total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
360
- assert(total != 0);
375
+ BROTLI_DCHECK(total != 0);
361
376
  entropy[0] = 1.0 / (double)total;
362
377
  entropy[1] *= entropy[0];
363
378
  entropy[2] *= entropy[0];
@@ -420,6 +435,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
420
435
  double entropy[3];
421
436
  size_t dummy;
422
437
  size_t i;
438
+ ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
423
439
  for (; start_pos + 64 <= end_pos; start_pos += 4096) {
424
440
  const size_t stride_end_pos = start_pos + 64;
425
441
  uint8_t prev2 = input[start_pos & mask];
@@ -430,7 +446,7 @@ static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
430
446
  for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
431
447
  const uint8_t literal = input[pos & mask];
432
448
  const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
433
- Context(prev1, prev2, CONTEXT_UTF8)];
449
+ BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
434
450
  ++total;
435
451
  ++combined_histo[literal >> 3];
436
452
  ++context_histo[context][literal >> 3];
@@ -497,8 +513,10 @@ static void DecideOverLiteralContextModeling(const uint8_t* input,
497
513
  static BROTLI_BOOL ShouldCompress(
498
514
  const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
499
515
  const size_t bytes, const size_t num_literals, const size_t num_commands) {
516
+ /* TODO: find more precise minimal block overhead. */
517
+ if (bytes <= 2) return BROTLI_FALSE;
500
518
  if (num_commands < (bytes >> 8) + 2) {
501
- if (num_literals > 0.99 * (double)bytes) {
519
+ if ((double)num_literals > 0.99 * (double)bytes) {
502
520
  uint32_t literal_histo[256] = { 0 };
503
521
  static const uint32_t kSampleRate = 13;
504
522
  static const double kMinEntropy = 7.92;
@@ -519,12 +537,26 @@ static BROTLI_BOOL ShouldCompress(
519
537
  return BROTLI_TRUE;
520
538
  }
521
539
 
540
+ /* Chooses the literal context mode for a metablock */
541
+ static ContextType ChooseContextMode(const BrotliEncoderParams* params,
542
+ const uint8_t* data, const size_t pos, const size_t mask,
543
+ const size_t length) {
544
+ /* We only do the computation for the option of something else than
545
+ CONTEXT_UTF8 for the highest qualities */
546
+ if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
547
+ !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
548
+ return CONTEXT_SIGNED;
549
+ }
550
+ return CONTEXT_UTF8;
551
+ }
552
+
522
553
  static void WriteMetaBlockInternal(MemoryManager* m,
523
554
  const uint8_t* data,
524
555
  const size_t mask,
525
556
  const uint64_t last_flush_pos,
526
557
  const size_t bytes,
527
558
  const BROTLI_BOOL is_last,
559
+ ContextType literal_context_mode,
528
560
  const BrotliEncoderParams* params,
529
561
  const uint8_t prev_byte,
530
562
  const uint8_t prev_byte2,
@@ -536,10 +568,10 @@ static void WriteMetaBlockInternal(MemoryManager* m,
536
568
  size_t* storage_ix,
537
569
  uint8_t* storage) {
538
570
  const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
539
- uint8_t last_byte;
540
- uint8_t last_byte_bits;
541
- uint32_t num_direct_distance_codes = 0;
542
- uint32_t distance_postfix_bits = 0;
571
+ uint16_t last_bytes;
572
+ uint8_t last_bytes_bits;
573
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
574
+ BrotliEncoderParams block_params = *params;
543
575
 
544
576
  if (bytes == 0) {
545
577
  /* Write the ISLAST and ISEMPTY bits. */
@@ -559,31 +591,22 @@ static void WriteMetaBlockInternal(MemoryManager* m,
559
591
  return;
560
592
  }
561
593
 
562
- last_byte = storage[0];
563
- last_byte_bits = (uint8_t)(*storage_ix & 0xff);
564
- if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES &&
565
- params->mode == BROTLI_MODE_FONT) {
566
- num_direct_distance_codes = 12;
567
- distance_postfix_bits = 1;
568
- RecomputeDistancePrefixes(commands,
569
- num_commands,
570
- num_direct_distance_codes,
571
- distance_postfix_bits);
572
- }
594
+ BROTLI_DCHECK(*storage_ix <= 14);
595
+ last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
596
+ last_bytes_bits = (uint8_t)(*storage_ix);
573
597
  if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
574
598
  BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
575
- bytes, mask, is_last,
599
+ bytes, mask, is_last, params,
576
600
  commands, num_commands,
577
601
  storage_ix, storage);
578
602
  if (BROTLI_IS_OOM(m)) return;
579
603
  } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
580
604
  BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
581
- bytes, mask, is_last,
605
+ bytes, mask, is_last, params,
582
606
  commands, num_commands,
583
607
  storage_ix, storage);
584
608
  if (BROTLI_IS_OOM(m)) return;
585
609
  } else {
586
- ContextType literal_context_mode = CONTEXT_UTF8;
587
610
  MetaBlockSplit mb;
588
611
  InitMetaBlockSplit(&mb);
589
612
  if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
@@ -596,15 +619,11 @@ static void WriteMetaBlockInternal(MemoryManager* m,
596
619
  &literal_context_map);
597
620
  }
598
621
  BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
599
- prev_byte, prev_byte2, literal_context_mode, num_literal_contexts,
622
+ prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
600
623
  literal_context_map, commands, num_commands, &mb);
601
624
  if (BROTLI_IS_OOM(m)) return;
602
625
  } else {
603
- if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes,
604
- kMinUTF8Ratio)) {
605
- literal_context_mode = CONTEXT_SIGNED;
606
- }
607
- BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params,
626
+ BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
608
627
  prev_byte, prev_byte2,
609
628
  commands, num_commands,
610
629
  literal_context_mode,
@@ -612,15 +631,15 @@ static void WriteMetaBlockInternal(MemoryManager* m,
612
631
  if (BROTLI_IS_OOM(m)) return;
613
632
  }
614
633
  if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
615
- BrotliOptimizeHistograms(num_direct_distance_codes,
616
- distance_postfix_bits,
617
- &mb);
634
+ /* The number of distance symbols effectively used for distance
635
+ histograms. It might be less than distance alphabet size
636
+ for "Large Window Brotli" (32-bit). */
637
+ BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
618
638
  }
619
639
  BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
620
640
  prev_byte, prev_byte2,
621
641
  is_last,
622
- num_direct_distance_codes,
623
- distance_postfix_bits,
642
+ &block_params,
624
643
  literal_context_mode,
625
644
  commands, num_commands,
626
645
  &mb,
@@ -631,22 +650,63 @@ static void WriteMetaBlockInternal(MemoryManager* m,
631
650
  if (bytes + 4 < (*storage_ix >> 3)) {
632
651
  /* Restore the distance cache and last byte. */
633
652
  memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
634
- storage[0] = last_byte;
635
- *storage_ix = last_byte_bits;
653
+ storage[0] = (uint8_t)last_bytes;
654
+ storage[1] = (uint8_t)(last_bytes >> 8);
655
+ *storage_ix = last_bytes_bits;
636
656
  BrotliStoreUncompressedMetaBlock(is_last, data,
637
657
  wrapped_last_flush_pos, mask,
638
658
  bytes, storage_ix, storage);
639
659
  }
640
660
  }
641
661
 
662
+ static void ChooseDistanceParams(BrotliEncoderParams* params) {
663
+ uint32_t distance_postfix_bits = 0;
664
+ uint32_t num_direct_distance_codes = 0;
665
+
666
+ if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
667
+ uint32_t ndirect_msb;
668
+ if (params->mode == BROTLI_MODE_FONT) {
669
+ distance_postfix_bits = 1;
670
+ num_direct_distance_codes = 12;
671
+ } else {
672
+ distance_postfix_bits = params->dist.distance_postfix_bits;
673
+ num_direct_distance_codes = params->dist.num_direct_distance_codes;
674
+ }
675
+ ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
676
+ if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
677
+ num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
678
+ (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
679
+ distance_postfix_bits = 0;
680
+ num_direct_distance_codes = 0;
681
+ }
682
+ }
683
+
684
+ BrotliInitDistanceParams(
685
+ params, distance_postfix_bits, num_direct_distance_codes);
686
+ }
687
+
642
688
  static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
643
689
  if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
644
690
  if (s->is_initialized_) return BROTLI_TRUE;
645
691
 
692
+ s->last_bytes_bits_ = 0;
693
+ s->last_bytes_ = 0;
694
+ s->flint_ = BROTLI_FLINT_DONE;
695
+ s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
696
+
646
697
  SanitizeParams(&s->params);
647
698
  s->params.lgblock = ComputeLgBlock(&s->params);
648
-
649
- s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
699
+ ChooseDistanceParams(&s->params);
700
+
701
+ if (s->params.stream_offset != 0) {
702
+ s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
703
+ /* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
704
+ s->dist_cache_[0] = -16;
705
+ s->dist_cache_[1] = -16;
706
+ s->dist_cache_[2] = -16;
707
+ s->dist_cache_[3] = -16;
708
+ memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
709
+ }
650
710
 
651
711
  RingBufferSetup(&s->params, &s->ringbuffer_);
652
712
 
@@ -657,7 +717,14 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
657
717
  s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
658
718
  lgwin = BROTLI_MAX(int, lgwin, 18);
659
719
  }
660
- EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_);
720
+ if (s->params.stream_offset == 0) {
721
+ EncodeWindowBits(lgwin, s->params.large_window,
722
+ &s->last_bytes_, &s->last_bytes_bits_);
723
+ } else {
724
+ /* Bigger values have the same effect, but could cause overflows. */
725
+ s->params.stream_offset = BROTLI_MIN(size_t,
726
+ s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
727
+ }
661
728
  }
662
729
 
663
730
  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -671,11 +738,20 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
671
738
 
672
739
  static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
673
740
  params->mode = BROTLI_DEFAULT_MODE;
741
+ params->large_window = BROTLI_FALSE;
674
742
  params->quality = BROTLI_DEFAULT_QUALITY;
675
743
  params->lgwin = BROTLI_DEFAULT_WINDOW;
676
744
  params->lgblock = 0;
745
+ params->stream_offset = 0;
677
746
  params->size_hint = 0;
678
747
  params->disable_literal_context_modeling = BROTLI_FALSE;
748
+ BrotliInitEncoderDictionary(&params->dictionary);
749
+ params->dist.distance_postfix_bits = 0;
750
+ params->dist.num_direct_distance_codes = 0;
751
+ params->dist.alphabet_size_max =
752
+ BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
753
+ params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
754
+ params->dist.max_distance = BROTLI_MAX_DISTANCE;
679
755
  }
680
756
 
681
757
  static void BrotliEncoderInitState(BrotliEncoderState* s) {
@@ -690,7 +766,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
690
766
  s->prev_byte2_ = 0;
691
767
  s->storage_size_ = 0;
692
768
  s->storage_ = 0;
693
- s->hasher_ = NULL;
769
+ HasherInit(&s->hasher_);
694
770
  s->large_table_ = NULL;
695
771
  s->large_table_size_ = 0;
696
772
  s->cmd_code_numbits_ = 0;
@@ -718,9 +794,8 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
718
794
  memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
719
795
  }
720
796
 
721
- BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
722
- brotli_free_func free_func,
723
- void* opaque) {
797
+ BrotliEncoderState* BrotliEncoderCreateInstance(
798
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
724
799
  BrotliEncoderState* state = 0;
725
800
  if (!alloc_func && !free_func) {
726
801
  state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
@@ -777,7 +852,6 @@ static void CopyInputToRingBuffer(BrotliEncoderState* s,
777
852
  const uint8_t* input_buffer) {
778
853
  RingBuffer* ringbuffer_ = &s->ringbuffer_;
779
854
  MemoryManager* m = &s->memory_manager_;
780
- if (!EnsureInitialized(s)) return;
781
855
  RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
782
856
  if (BROTLI_IS_OOM(m)) return;
783
857
  s->input_pos_ += input_size;
@@ -837,6 +911,40 @@ static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
837
911
  return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
838
912
  }
839
913
 
914
+ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
915
+ uint32_t* wrapped_last_processed_pos) {
916
+ Command* last_command = &s->commands_[s->num_commands_ - 1];
917
+ const uint8_t* data = s->ringbuffer_.buffer_;
918
+ const uint32_t mask = s->ringbuffer_.mask_;
919
+ uint64_t max_backward_distance =
920
+ (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
921
+ uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
922
+ uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
923
+ uint64_t max_distance = last_processed_pos < max_backward_distance ?
924
+ last_processed_pos : max_backward_distance;
925
+ uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
926
+ uint32_t distance_code = CommandRestoreDistanceCode(last_command,
927
+ &s->params.dist);
928
+ if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
929
+ distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
930
+ if (cmd_dist <= max_distance) {
931
+ while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
932
+ data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
933
+ last_command->copy_len_++;
934
+ (*bytes)--;
935
+ (*wrapped_last_processed_pos)++;
936
+ }
937
+ } else {
938
+ }
939
+ /* The copy length is at most the metablock size, and thus expressible. */
940
+ GetLengthCode(last_command->insert_len_,
941
+ (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
942
+ (int)(last_command->copy_len_ >> 25)),
943
+ TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
944
+ &last_command->cmd_prefix_);
945
+ }
946
+ }
947
+
840
948
  /*
841
949
  Processes the accumulated input data and sets |*out_size| to the length of
842
950
  the new output meta-block, or to zero if no new output meta-block has been
@@ -853,15 +961,14 @@ static BROTLI_BOOL EncodeData(
853
961
  BrotliEncoderState* s, const BROTLI_BOOL is_last,
854
962
  const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
855
963
  const uint64_t delta = UnprocessedInputSize(s);
856
- const uint32_t bytes = (uint32_t)delta;
857
- const uint32_t wrapped_last_processed_pos =
858
- WrapPosition(s->last_processed_pos_);
964
+ uint32_t bytes = (uint32_t)delta;
965
+ uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
859
966
  uint8_t* data;
860
967
  uint32_t mask;
861
968
  MemoryManager* m = &s->memory_manager_;
862
- const BrotliDictionary* dictionary = BrotliGetDictionary();
969
+ ContextType literal_context_mode;
970
+ ContextLut literal_context_lut;
863
971
 
864
- if (!EnsureInitialized(s)) return BROTLI_FALSE;
865
972
  data = s->ringbuffer_.buffer_;
866
973
  mask = s->ringbuffer_.mask_;
867
974
 
@@ -878,13 +985,16 @@ static BROTLI_BOOL EncodeData(
878
985
  BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
879
986
  s->literal_buf_ =
880
987
  BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
881
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
988
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
989
+ BROTLI_IS_NULL(s->literal_buf_)) {
990
+ return BROTLI_FALSE;
991
+ }
882
992
  }
883
993
 
884
994
  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
885
995
  s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
886
996
  uint8_t* storage;
887
- size_t storage_ix = s->last_byte_bits_;
997
+ size_t storage_ix = s->last_bytes_bits_;
888
998
  size_t table_size;
889
999
  int* table;
890
1000
 
@@ -894,9 +1004,10 @@ static BROTLI_BOOL EncodeData(
894
1004
  *out_size = 0;
895
1005
  return BROTLI_TRUE;
896
1006
  }
897
- storage = GetBrotliStorage(s, 2 * bytes + 502);
1007
+ storage = GetBrotliStorage(s, 2 * bytes + 503);
898
1008
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
899
- storage[0] = s->last_byte_;
1009
+ storage[0] = (uint8_t)s->last_bytes_;
1010
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
900
1011
  table = GetHashTable(s, s->params.quality, bytes, &table_size);
901
1012
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
902
1013
  if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@@ -917,8 +1028,8 @@ static BROTLI_BOOL EncodeData(
917
1028
  &storage_ix, storage);
918
1029
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
919
1030
  }
920
- s->last_byte_ = storage[storage_ix >> 3];
921
- s->last_byte_bits_ = storage_ix & 7u;
1031
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1032
+ s->last_bytes_bits_ = storage_ix & 7u;
922
1033
  UpdateLastProcessedPos(s);
923
1034
  *output = &storage[0];
924
1035
  *out_size = storage_ix >> 3;
@@ -935,7 +1046,7 @@ static BROTLI_BOOL EncodeData(
935
1046
  newsize += (bytes / 4) + 16;
936
1047
  s->cmd_alloc_size_ = newsize;
937
1048
  new_commands = BROTLI_ALLOC(m, Command, newsize);
938
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1049
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) return BROTLI_FALSE;
939
1050
  if (s->commands_) {
940
1051
  memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
941
1052
  BROTLI_FREE(m, s->commands_);
@@ -946,27 +1057,40 @@ static BROTLI_BOOL EncodeData(
946
1057
 
947
1058
  InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
948
1059
  wrapped_last_processed_pos, bytes, is_last);
1060
+
1061
+ literal_context_mode = ChooseContextMode(
1062
+ &s->params, data, WrapPosition(s->last_flush_pos_),
1063
+ mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1064
+ literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1065
+
949
1066
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
950
1067
 
1068
+ if (s->num_commands_ && s->last_insert_len_ == 0) {
1069
+ ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1070
+ }
1071
+
951
1072
  if (s->params.quality == ZOPFLIFICATION_QUALITY) {
952
- assert(s->params.hasher.type == 10);
953
- BrotliCreateZopfliBackwardReferences(
954
- m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
955
- &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
956
- &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
1073
+ BROTLI_DCHECK(s->params.hasher.type == 10);
1074
+ BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1075
+ data, mask, literal_context_lut, &s->params,
1076
+ &s->hasher_, s->dist_cache_,
1077
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1078
+ &s->num_commands_, &s->num_literals_);
957
1079
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
958
1080
  } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
959
- assert(s->params.hasher.type == 10);
960
- BrotliCreateHqZopfliBackwardReferences(
961
- m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
962
- &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
963
- &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
1081
+ BROTLI_DCHECK(s->params.hasher.type == 10);
1082
+ BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1083
+ data, mask, literal_context_lut, &s->params,
1084
+ &s->hasher_, s->dist_cache_,
1085
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1086
+ &s->num_commands_, &s->num_literals_);
964
1087
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
965
1088
  } else {
966
- BrotliCreateBackwardReferences(
967
- dictionary, bytes, wrapped_last_processed_pos, data, mask,
968
- &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
969
- &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
1089
+ BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1090
+ data, mask, literal_context_lut, &s->params,
1091
+ &s->hasher_, s->dist_cache_,
1092
+ &s->last_insert_len_, &s->commands_[s->num_commands_],
1093
+ &s->num_commands_, &s->num_literals_);
970
1094
  }
971
1095
 
972
1096
  {
@@ -989,7 +1113,7 @@ static BROTLI_BOOL EncodeData(
989
1113
  s->num_commands_ < max_commands) {
990
1114
  /* Merge with next input block. Everything will happen later. */
991
1115
  if (UpdateLastProcessedPos(s)) {
992
- HasherReset(s->hasher_);
1116
+ HasherReset(&s->hasher_);
993
1117
  }
994
1118
  *out_size = 0;
995
1119
  return BROTLI_TRUE;
@@ -1009,27 +1133,28 @@ static BROTLI_BOOL EncodeData(
1009
1133
  *out_size = 0;
1010
1134
  return BROTLI_TRUE;
1011
1135
  }
1012
- assert(s->input_pos_ >= s->last_flush_pos_);
1013
- assert(s->input_pos_ > s->last_flush_pos_ || is_last);
1014
- assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1136
+ BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1137
+ BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1138
+ BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1015
1139
  {
1016
1140
  const uint32_t metablock_size =
1017
1141
  (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1018
- uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502);
1019
- size_t storage_ix = s->last_byte_bits_;
1142
+ uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1143
+ size_t storage_ix = s->last_bytes_bits_;
1020
1144
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1021
- storage[0] = s->last_byte_;
1145
+ storage[0] = (uint8_t)s->last_bytes_;
1146
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1022
1147
  WriteMetaBlockInternal(
1023
1148
  m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1024
- &s->params, s->prev_byte_, s->prev_byte2_,
1149
+ literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1025
1150
  s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1026
1151
  s->dist_cache_, &storage_ix, storage);
1027
1152
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1028
- s->last_byte_ = storage[storage_ix >> 3];
1029
- s->last_byte_bits_ = storage_ix & 7u;
1153
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1154
+ s->last_bytes_bits_ = storage_ix & 7u;
1030
1155
  s->last_flush_pos_ = s->input_pos_;
1031
1156
  if (UpdateLastProcessedPos(s)) {
1032
- HasherReset(s->hasher_);
1157
+ HasherReset(&s->hasher_);
1033
1158
  }
1034
1159
  if (s->last_flush_pos_ > 0) {
1035
1160
  s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
@@ -1055,10 +1180,11 @@ static BROTLI_BOOL EncodeData(
1055
1180
  static size_t WriteMetadataHeader(
1056
1181
  BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1057
1182
  size_t storage_ix;
1058
- storage_ix = s->last_byte_bits_;
1059
- header[0] = s->last_byte_;
1060
- s->last_byte_ = 0;
1061
- s->last_byte_bits_ = 0;
1183
+ storage_ix = s->last_bytes_bits_;
1184
+ header[0] = (uint8_t)s->last_bytes_;
1185
+ header[1] = (uint8_t)(s->last_bytes_ >> 8);
1186
+ s->last_bytes_ = 0;
1187
+ s->last_bytes_bits_ = 0;
1062
1188
 
1063
1189
  BrotliWriteBits(1, 0, &storage_ix, header);
1064
1190
  BrotliWriteBits(2, 3, &storage_ix, header);
@@ -1082,21 +1208,18 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1082
1208
  MemoryManager* m = &memory_manager;
1083
1209
 
1084
1210
  const size_t mask = BROTLI_SIZE_MAX >> 1;
1085
- const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(lgwin);
1086
1211
  int dist_cache[4] = { 4, 11, 15, 16 };
1087
1212
  int saved_dist_cache[4] = { 4, 11, 15, 16 };
1088
1213
  BROTLI_BOOL ok = BROTLI_TRUE;
1089
1214
  const size_t max_out_size = *encoded_size;
1090
1215
  size_t total_out_size = 0;
1091
- uint8_t last_byte;
1092
- uint8_t last_byte_bits;
1093
- HasherHandle hasher = NULL;
1216
+ uint16_t last_bytes;
1217
+ uint8_t last_bytes_bits;
1094
1218
 
1095
- const size_t hasher_eff_size =
1096
- BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);
1219
+ const size_t hasher_eff_size = BROTLI_MIN(size_t,
1220
+ input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
1097
1221
 
1098
1222
  BrotliEncoderParams params;
1099
- const BrotliDictionary* dictionary = BrotliGetDictionary();
1100
1223
 
1101
1224
  const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
1102
1225
  size_t max_block_size;
@@ -1107,17 +1230,24 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1107
1230
  uint8_t prev_byte = 0;
1108
1231
  uint8_t prev_byte2 = 0;
1109
1232
 
1233
+ Hasher hasher;
1234
+ HasherInit(&hasher);
1235
+
1110
1236
  BrotliEncoderInitParams(&params);
1111
1237
  params.quality = 10;
1112
1238
  params.lgwin = lgwin;
1239
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1240
+ params.large_window = BROTLI_TRUE;
1241
+ }
1113
1242
  SanitizeParams(&params);
1114
1243
  params.lgblock = ComputeLgBlock(&params);
1244
+ ChooseDistanceParams(&params);
1115
1245
  max_block_size = (size_t)1 << params.lgblock;
1116
1246
 
1117
1247
  BrotliInitMemoryManager(m, 0, 0, 0);
1118
1248
 
1119
- assert(input_size <= mask + 1);
1120
- EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
1249
+ BROTLI_DCHECK(input_size <= mask + 1);
1250
+ EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
1121
1251
  InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
1122
1252
  0, hasher_eff_size, BROTLI_TRUE);
1123
1253
  if (BROTLI_IS_OOM(m)) goto oom;
@@ -1137,6 +1267,10 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1137
1267
  uint8_t* storage;
1138
1268
  size_t storage_ix;
1139
1269
 
1270
+ ContextType literal_context_mode = ChooseContextMode(&params,
1271
+ input_buffer, metablock_start, mask, metablock_end - metablock_start);
1272
+ ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
1273
+
1140
1274
  size_t block_start;
1141
1275
  for (block_start = metablock_start; block_start < metablock_end; ) {
1142
1276
  size_t block_size =
@@ -1144,13 +1278,13 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1144
1278
  ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
1145
1279
  size_t path_size;
1146
1280
  size_t new_cmd_alloc_size;
1147
- if (BROTLI_IS_OOM(m)) goto oom;
1281
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
1148
1282
  BrotliInitZopfliNodes(nodes, block_size + 1);
1149
- StitchToPreviousBlockH10(hasher, block_size, block_start,
1283
+ StitchToPreviousBlockH10(&hasher.privat._H10, block_size, block_start,
1150
1284
  input_buffer, mask);
1151
- path_size = BrotliZopfliComputeShortestPath(
1152
- m, dictionary, block_size, block_start, input_buffer, mask, &params,
1153
- max_backward_limit, dist_cache, hasher, nodes);
1285
+ path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
1286
+ input_buffer, mask, literal_context_lut, &params, dist_cache, &hasher,
1287
+ nodes);
1154
1288
  if (BROTLI_IS_OOM(m)) goto oom;
1155
1289
  /* We allocate a command buffer in the first iteration of this loop that
1156
1290
  will be likely big enough for the whole metablock, so that for most
@@ -1164,7 +1298,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1164
1298
  num_commands + path_size + 1);
1165
1299
  if (cmd_alloc_size != new_cmd_alloc_size) {
1166
1300
  Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
1167
- if (BROTLI_IS_OOM(m)) goto oom;
1301
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom;
1168
1302
  cmd_alloc_size = new_cmd_alloc_size;
1169
1303
  if (commands) {
1170
1304
  memcpy(new_commands, commands, sizeof(Command) * num_commands);
@@ -1172,10 +1306,8 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1172
1306
  }
1173
1307
  commands = new_commands;
1174
1308
  }
1175
- BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit,
1176
- &nodes[0], dist_cache, &last_insert_len,
1177
- &params, &commands[num_commands],
1178
- &num_literals);
1309
+ BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
1310
+ &last_insert_len, &params, &commands[num_commands], &num_literals);
1179
1311
  num_commands += path_size;
1180
1312
  block_start += block_size;
1181
1313
  metablock_size += block_size;
@@ -1193,13 +1325,14 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1193
1325
 
1194
1326
  is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
1195
1327
  storage = NULL;
1196
- storage_ix = last_byte_bits;
1328
+ storage_ix = last_bytes_bits;
1197
1329
 
1198
1330
  if (metablock_size == 0) {
1199
1331
  /* Write the ISLAST and ISEMPTY bits. */
1200
1332
  storage = BROTLI_ALLOC(m, uint8_t, 16);
1201
- if (BROTLI_IS_OOM(m)) goto oom;
1202
- storage[0] = last_byte;
1333
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1334
+ storage[0] = (uint8_t)last_bytes;
1335
+ storage[1] = (uint8_t)(last_bytes >> 8);
1203
1336
  BrotliWriteBits(2, 3, &storage_ix, storage);
1204
1337
  storage_ix = (storage_ix + 7u) & ~7u;
1205
1338
  } else if (!ShouldCompress(input_buffer, mask, metablock_start,
@@ -1208,38 +1341,37 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1208
1341
  CreateBackwardReferences is now unused. */
1209
1342
  memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1210
1343
  storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
1211
- if (BROTLI_IS_OOM(m)) goto oom;
1212
- storage[0] = last_byte;
1344
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1345
+ storage[0] = (uint8_t)last_bytes;
1346
+ storage[1] = (uint8_t)(last_bytes >> 8);
1213
1347
  BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1214
1348
  metablock_start, mask, metablock_size,
1215
1349
  &storage_ix, storage);
1216
1350
  } else {
1217
- uint32_t num_direct_distance_codes = 0;
1218
- uint32_t distance_postfix_bits = 0;
1219
- ContextType literal_context_mode = CONTEXT_UTF8;
1220
1351
  MetaBlockSplit mb;
1352
+ BrotliEncoderParams block_params = params;
1221
1353
  InitMetaBlockSplit(&mb);
1222
- if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask,
1223
- metablock_size, kMinUTF8Ratio)) {
1224
- literal_context_mode = CONTEXT_SIGNED;
1225
- }
1226
- BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, &params,
1354
+ BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
1355
+ &block_params,
1227
1356
  prev_byte, prev_byte2,
1228
1357
  commands, num_commands,
1229
1358
  literal_context_mode,
1230
1359
  &mb);
1231
1360
  if (BROTLI_IS_OOM(m)) goto oom;
1232
- BrotliOptimizeHistograms(num_direct_distance_codes,
1233
- distance_postfix_bits,
1234
- &mb);
1235
- storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502);
1236
- if (BROTLI_IS_OOM(m)) goto oom;
1237
- storage[0] = last_byte;
1361
+ {
1362
+ /* The number of distance symbols effectively used for distance
1363
+ histograms. It might be less than distance alphabet size
1364
+ for "Large Window Brotli" (32-bit). */
1365
+ BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
1366
+ }
1367
+ storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
1368
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
1369
+ storage[0] = (uint8_t)last_bytes;
1370
+ storage[1] = (uint8_t)(last_bytes >> 8);
1238
1371
  BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
1239
1372
  mask, prev_byte, prev_byte2,
1240
1373
  is_last,
1241
- num_direct_distance_codes,
1242
- distance_postfix_bits,
1374
+ &block_params,
1243
1375
  literal_context_mode,
1244
1376
  commands, num_commands,
1245
1377
  &mb,
@@ -1248,19 +1380,22 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
1248
1380
  if (metablock_size + 4 < (storage_ix >> 3)) {
1249
1381
  /* Restore the distance cache and last byte. */
1250
1382
  memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1251
- storage[0] = last_byte;
1252
- storage_ix = last_byte_bits;
1383
+ storage[0] = (uint8_t)last_bytes;
1384
+ storage[1] = (uint8_t)(last_bytes >> 8);
1385
+ storage_ix = last_bytes_bits;
1253
1386
  BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1254
1387
  metablock_start, mask,
1255
1388
  metablock_size, &storage_ix, storage);
1256
1389
  }
1257
1390
  DestroyMetaBlockSplit(m, &mb);
1258
1391
  }
1259
- last_byte = storage[storage_ix >> 3];
1260
- last_byte_bits = storage_ix & 7u;
1392
+ last_bytes = (uint16_t)(storage[storage_ix >> 3]);
1393
+ last_bytes_bits = storage_ix & 7u;
1261
1394
  metablock_start += metablock_size;
1262
- prev_byte = input_buffer[metablock_start - 1];
1263
- prev_byte2 = input_buffer[metablock_start - 2];
1395
+ if (metablock_start < input_size) {
1396
+ prev_byte = input_buffer[metablock_start - 1];
1397
+ prev_byte2 = input_buffer[metablock_start - 2];
1398
+ }
1264
1399
  /* Save the state of the distance cache in case we need to restore it for
1265
1400
  emitting an uncompressed block. */
1266
1401
  memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
@@ -1290,12 +1425,10 @@ oom:
1290
1425
 
1291
1426
  size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1292
1427
  /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1293
- size_t num_large_blocks = input_size >> 24;
1294
- size_t tail = input_size - (num_large_blocks << 24);
1295
- size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3;
1296
- size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1;
1428
+ size_t num_large_blocks = input_size >> 14;
1429
+ size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1297
1430
  size_t result = input_size + overhead;
1298
- if (input_size == 0) return 1;
1431
+ if (input_size == 0) return 2;
1299
1432
  return (result < input_size) ? 0 : result;
1300
1433
  }
1301
1434
 
@@ -1356,7 +1489,7 @@ BROTLI_BOOL BrotliEncoderCompress(
1356
1489
  }
1357
1490
  if (quality == 10) {
1358
1491
  /* TODO: Implement this direct path for all quality levels. */
1359
- const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS,
1492
+ const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
1360
1493
  BROTLI_MAX(int, 16, lgwin));
1361
1494
  int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
1362
1495
  encoded_size, encoded_buffer);
@@ -1380,6 +1513,9 @@ BROTLI_BOOL BrotliEncoderCompress(
1380
1513
  BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1381
1514
  BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1382
1515
  BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1516
+ if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1517
+ BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1518
+ }
1383
1519
  result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1384
1520
  &available_in, &next_in, &available_out, &next_out, &total_out);
1385
1521
  if (!BrotliEncoderIsFinished(s)) result = 0;
@@ -1402,11 +1538,11 @@ fallback:
1402
1538
  }
1403
1539
 
1404
1540
  static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1405
- uint32_t seal = s->last_byte_;
1406
- size_t seal_bits = s->last_byte_bits_;
1541
+ uint32_t seal = s->last_bytes_;
1542
+ size_t seal_bits = s->last_bytes_bits_;
1407
1543
  uint8_t* destination;
1408
- s->last_byte_ = 0;
1409
- s->last_byte_bits_ = 0;
1544
+ s->last_bytes_ = 0;
1545
+ s->last_bytes_bits_ = 0;
1410
1546
  /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1411
1547
  seal |= 0x6u << seal_bits;
1412
1548
  seal_bits += 6;
@@ -1420,6 +1556,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1420
1556
  }
1421
1557
  destination[0] = (uint8_t)seal;
1422
1558
  if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1559
+ if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1423
1560
  s->available_out_ += (seal_bits + 7) >> 3;
1424
1561
  }
1425
1562
 
@@ -1428,7 +1565,7 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1428
1565
  static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1429
1566
  size_t* available_out, uint8_t** next_out, size_t* total_out) {
1430
1567
  if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1431
- s->last_byte_bits_ != 0) {
1568
+ s->last_bytes_bits_ != 0) {
1432
1569
  InjectBytePaddingBlock(s);
1433
1570
  return BROTLI_TRUE;
1434
1571
  }
@@ -1479,7 +1616,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1479
1616
  BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1480
1617
  s->literal_buf_ =
1481
1618
  BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1482
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1619
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(s->command_buf_) ||
1620
+ BROTLI_IS_NULL(s->literal_buf_)) {
1621
+ return BROTLI_FALSE;
1622
+ }
1483
1623
  }
1484
1624
  if (s->command_buf_) {
1485
1625
  command_buf = s->command_buf_;
@@ -1487,7 +1627,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1487
1627
  } else {
1488
1628
  tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1489
1629
  tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1490
- if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1630
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp_command_buf) ||
1631
+ BROTLI_IS_NULL(tmp_literal_buf)) {
1632
+ return BROTLI_FALSE;
1633
+ }
1491
1634
  command_buf = tmp_command_buf;
1492
1635
  literal_buf = tmp_literal_buf;
1493
1636
  }
@@ -1509,10 +1652,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1509
1652
  (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1510
1653
  BROTLI_BOOL force_flush =
1511
1654
  (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1512
- size_t max_out_size = 2 * block_size + 502;
1655
+ size_t max_out_size = 2 * block_size + 503;
1513
1656
  BROTLI_BOOL inplace = BROTLI_TRUE;
1514
1657
  uint8_t* storage = NULL;
1515
- size_t storage_ix = s->last_byte_bits_;
1658
+ size_t storage_ix = s->last_bytes_bits_;
1516
1659
  size_t table_size;
1517
1660
  int* table;
1518
1661
 
@@ -1527,7 +1670,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1527
1670
  storage = GetBrotliStorage(s, max_out_size);
1528
1671
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1529
1672
  }
1530
- storage[0] = s->last_byte_;
1673
+ storage[0] = (uint8_t)s->last_bytes_;
1674
+ storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1531
1675
  table = GetHashTable(s, s->params.quality, block_size, &table_size);
1532
1676
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1533
1677
 
@@ -1542,12 +1686,14 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1542
1686
  &storage_ix, storage);
1543
1687
  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1544
1688
  }
1545
- *next_in += block_size;
1546
- *available_in -= block_size;
1689
+ if (block_size != 0) {
1690
+ *next_in += block_size;
1691
+ *available_in -= block_size;
1692
+ }
1547
1693
  if (inplace) {
1548
1694
  size_t out_bytes = storage_ix >> 3;
1549
- assert(out_bytes <= *available_out);
1550
- assert((storage_ix & 7) == 0 || out_bytes < *available_out);
1695
+ BROTLI_DCHECK(out_bytes <= *available_out);
1696
+ BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1551
1697
  *next_out += out_bytes;
1552
1698
  *available_out -= out_bytes;
1553
1699
  s->total_out_ += out_bytes;
@@ -1557,8 +1703,8 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1557
1703
  s->next_out_ = storage;
1558
1704
  s->available_out_ = out_bytes;
1559
1705
  }
1560
- s->last_byte_ = storage[storage_ix >> 3];
1561
- s->last_byte_bits_ = storage_ix & 7u;
1706
+ s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1707
+ s->last_bytes_bits_ = storage_ix & 7u;
1562
1708
 
1563
1709
  if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1564
1710
  if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
@@ -1688,6 +1834,10 @@ BROTLI_BOOL BrotliEncoderCompressStream(
1688
1834
  }
1689
1835
  while (BROTLI_TRUE) {
1690
1836
  size_t remaining_block_size = RemainingInputBlockSize(s);
1837
+ /* Shorten input to flint size. */
1838
+ if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
1839
+ remaining_block_size = (size_t)s->flint_;
1840
+ }
1691
1841
 
1692
1842
  if (remaining_block_size != 0 && *available_in != 0) {
1693
1843
  size_t copy_input_size =
@@ -1695,10 +1845,18 @@ BROTLI_BOOL BrotliEncoderCompressStream(
1695
1845
  CopyInputToRingBuffer(s, copy_input_size, *next_in);
1696
1846
  *next_in += copy_input_size;
1697
1847
  *available_in -= copy_input_size;
1848
+ if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
1698
1849
  continue;
1699
1850
  }
1700
1851
 
1701
1852
  if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1853
+ /* Exit the "emit flint" workflow. */
1854
+ if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
1855
+ CheckFlushComplete(s);
1856
+ if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1857
+ s->flint_ = BROTLI_FLINT_DONE;
1858
+ }
1859
+ }
1702
1860
  continue;
1703
1861
  }
1704
1862
 
@@ -1712,6 +1870,11 @@ BROTLI_BOOL BrotliEncoderCompressStream(
1712
1870
  BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1713
1871
  (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1714
1872
  BROTLI_BOOL result;
1873
+ /* Force emitting (uncompressed) piece containing flint. */
1874
+ if (!is_last && s->flint_ == 0) {
1875
+ s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
1876
+ force_flush = BROTLI_TRUE;
1877
+ }
1715
1878
  UpdateSizeHint(s, *available_in);
1716
1879
  result = EncodeData(s, is_last, force_flush,
1717
1880
  &s->available_out_, &s->next_out_);