brotli 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +6 -3
  3. data/.github/workflows/publish.yml +7 -17
  4. data/.gitmodules +1 -1
  5. data/README.md +2 -2
  6. data/ext/brotli/brotli.c +8 -0
  7. data/ext/brotli/extconf.rb +6 -0
  8. data/lib/brotli/version.rb +1 -1
  9. data/test/brotli_test.rb +14 -1
  10. data/test/test_helper.rb +1 -0
  11. data/vendor/brotli/c/common/constants.c +1 -1
  12. data/vendor/brotli/c/common/constants.h +2 -1
  13. data/vendor/brotli/c/common/context.c +1 -1
  14. data/vendor/brotli/c/common/dictionary.c +5 -3
  15. data/vendor/brotli/c/common/platform.c +2 -1
  16. data/vendor/brotli/c/common/platform.h +60 -113
  17. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  18. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  19. data/vendor/brotli/c/common/transform.c +1 -1
  20. data/vendor/brotli/c/common/version.h +31 -6
  21. data/vendor/brotli/c/dec/bit_reader.c +10 -8
  22. data/vendor/brotli/c/dec/bit_reader.h +172 -100
  23. data/vendor/brotli/c/dec/decode.c +467 -200
  24. data/vendor/brotli/c/dec/huffman.c +7 -4
  25. data/vendor/brotli/c/dec/huffman.h +2 -1
  26. data/vendor/brotli/c/dec/prefix.h +2 -1
  27. data/vendor/brotli/c/dec/state.c +33 -9
  28. data/vendor/brotli/c/dec/state.h +70 -35
  29. data/vendor/brotli/c/enc/backward_references.c +81 -19
  30. data/vendor/brotli/c/enc/backward_references.h +5 -4
  31. data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
  32. data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
  33. data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
  34. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  35. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  36. data/vendor/brotli/c/enc/block_splitter.c +37 -14
  37. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  38. data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
  39. data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
  40. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  41. data/vendor/brotli/c/enc/cluster.c +10 -9
  42. data/vendor/brotli/c/enc/cluster.h +7 -6
  43. data/vendor/brotli/c/enc/cluster_inc.h +25 -20
  44. data/vendor/brotli/c/enc/command.c +1 -1
  45. data/vendor/brotli/c/enc/command.h +5 -4
  46. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  47. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  48. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  49. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  50. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
  51. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  52. data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
  53. data/vendor/brotli/c/enc/encode.c +473 -404
  54. data/vendor/brotli/c/enc/encoder_dict.c +611 -4
  55. data/vendor/brotli/c/enc/encoder_dict.h +117 -3
  56. data/vendor/brotli/c/enc/entropy_encode.c +3 -2
  57. data/vendor/brotli/c/enc/entropy_encode.h +2 -1
  58. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  59. data/vendor/brotli/c/enc/fast_log.c +1 -1
  60. data/vendor/brotli/c/enc/fast_log.h +2 -1
  61. data/vendor/brotli/c/enc/find_match_length.h +15 -22
  62. data/vendor/brotli/c/enc/hash.h +285 -45
  63. data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
  64. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
  65. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
  66. data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
  67. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
  68. data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
  69. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
  70. data/vendor/brotli/c/enc/histogram.c +4 -4
  71. data/vendor/brotli/c/enc/histogram.h +7 -6
  72. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  73. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  74. data/vendor/brotli/c/enc/memory.c +29 -5
  75. data/vendor/brotli/c/enc/memory.h +19 -2
  76. data/vendor/brotli/c/enc/metablock.c +72 -58
  77. data/vendor/brotli/c/enc/metablock.h +9 -8
  78. data/vendor/brotli/c/enc/metablock_inc.h +8 -6
  79. data/vendor/brotli/c/enc/params.h +4 -3
  80. data/vendor/brotli/c/enc/prefix.h +3 -2
  81. data/vendor/brotli/c/enc/quality.h +40 -3
  82. data/vendor/brotli/c/enc/ringbuffer.h +4 -3
  83. data/vendor/brotli/c/enc/state.h +104 -0
  84. data/vendor/brotli/c/enc/static_dict.c +60 -4
  85. data/vendor/brotli/c/enc/static_dict.h +3 -2
  86. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  87. data/vendor/brotli/c/enc/utf8_util.c +1 -1
  88. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  89. data/vendor/brotli/c/enc/write_bits.h +2 -1
  90. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  91. data/vendor/brotli/c/include/brotli/encode.h +55 -2
  92. data/vendor/brotli/c/include/brotli/port.h +28 -11
  93. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  94. metadata +9 -3
@@ -6,22 +6,24 @@
6
6
 
7
7
  /* Function to find backward reference copies. */
8
8
 
9
- #include "./backward_references_hq.h"
9
+ #include "backward_references_hq.h"
10
10
 
11
11
  #include <string.h> /* memcpy, memset */
12
12
 
13
+ #include <brotli/types.h>
14
+
13
15
  #include "../common/constants.h"
14
- #include "../common/context.h"
15
16
  #include "../common/platform.h"
16
- #include <brotli/types.h>
17
- #include "./command.h"
18
- #include "./fast_log.h"
19
- #include "./find_match_length.h"
20
- #include "./literal_cost.h"
21
- #include "./memory.h"
22
- #include "./params.h"
23
- #include "./prefix.h"
24
- #include "./quality.h"
17
+ #include "command.h"
18
+ #include "compound_dictionary.h"
19
+ #include "encoder_dict.h"
20
+ #include "fast_log.h"
21
+ #include "find_match_length.h"
22
+ #include "literal_cost.h"
23
+ #include "memory.h"
24
+ #include "params.h"
25
+ #include "prefix.h"
26
+ #include "quality.h"
25
27
 
26
28
  #if defined(__cplusplus) || defined(c_plusplus)
27
29
  extern "C" {
@@ -73,6 +75,14 @@ static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
73
75
  return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
74
76
  }
75
77
 
78
+ /* Temporary data for ZopfliCostModelSetFromCommands. */
79
+ typedef struct ZopfliCostModelArena {
80
+ uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
81
+ uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
82
+ uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
83
+ float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
84
+ } ZopfliCostModelArena;
85
+
76
86
  /* Histogram based cost model for zopflification. */
77
87
  typedef struct ZopfliCostModel {
78
88
  /* The insert and copy length symbols. */
@@ -83,6 +93,12 @@ typedef struct ZopfliCostModel {
83
93
  float* literal_costs_;
84
94
  float min_cost_cmd_;
85
95
  size_t num_bytes_;
96
+
97
+ /* Temporary data. */
98
+ union {
99
+ size_t literal_histograms[3 * 256];
100
+ ZopfliCostModelArena arena;
101
+ };
86
102
  } ZopfliCostModel;
87
103
 
88
104
  static void InitZopfliCostModel(
@@ -139,18 +155,15 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
139
155
  const Command* commands,
140
156
  size_t num_commands,
141
157
  size_t last_insert_len) {
142
- uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
143
- uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
144
- uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
145
- float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
158
+ ZopfliCostModelArena* arena = &self->arena;
146
159
  size_t pos = position - last_insert_len;
147
160
  float min_cost_cmd = kInfinity;
148
161
  size_t i;
149
162
  float* cost_cmd = self->cost_cmd_;
150
163
 
151
- memset(histogram_literal, 0, sizeof(histogram_literal));
152
- memset(histogram_cmd, 0, sizeof(histogram_cmd));
153
- memset(histogram_dist, 0, sizeof(histogram_dist));
164
+ memset(arena->histogram_literal, 0, sizeof(arena->histogram_literal));
165
+ memset(arena->histogram_cmd, 0, sizeof(arena->histogram_cmd));
166
+ memset(arena->histogram_dist, 0, sizeof(arena->histogram_dist));
154
167
 
155
168
  for (i = 0; i < num_commands; i++) {
156
169
  size_t inslength = commands[i].insert_len_;
@@ -159,21 +172,21 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
159
172
  size_t cmdcode = commands[i].cmd_prefix_;
160
173
  size_t j;
161
174
 
162
- histogram_cmd[cmdcode]++;
163
- if (cmdcode >= 128) histogram_dist[distcode]++;
175
+ arena->histogram_cmd[cmdcode]++;
176
+ if (cmdcode >= 128) arena->histogram_dist[distcode]++;
164
177
 
165
178
  for (j = 0; j < inslength; j++) {
166
- histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
179
+ arena->histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
167
180
  }
168
181
 
169
182
  pos += inslength + copylength;
170
183
  }
171
184
 
172
- SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
173
- cost_literal);
174
- SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
185
+ SetCost(arena->histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
186
+ arena->cost_literal);
187
+ SetCost(arena->histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
175
188
  cost_cmd);
176
- SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
189
+ SetCost(arena->histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
177
190
  self->cost_dist_);
178
191
 
179
192
  for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
@@ -188,7 +201,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
188
201
  literal_costs[0] = 0.0;
189
202
  for (i = 0; i < num_bytes; ++i) {
190
203
  literal_carry +=
191
- cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
204
+ arena->cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
192
205
  literal_costs[i + 1] = literal_costs[i] + literal_carry;
193
206
  literal_carry -= literal_costs[i + 1] - literal_costs[i];
194
207
  }
@@ -206,7 +219,8 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
206
219
  size_t num_bytes = self->num_bytes_;
207
220
  size_t i;
208
221
  BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
209
- ringbuffer, &literal_costs[1]);
222
+ ringbuffer, self->literal_histograms,
223
+ &literal_costs[1]);
210
224
  literal_costs[0] = 0.0;
211
225
  for (i = 0; i < num_bytes; ++i) {
212
226
  literal_carry += literal_costs[i + 1];
@@ -418,7 +432,8 @@ static size_t UpdateNodes(
418
432
  size_t min_len;
419
433
  size_t result = 0;
420
434
  size_t k;
421
- size_t gap = 0;
435
+ const CompoundDictionary* addon = &params->dictionary.compound;
436
+ size_t gap = addon->total_size;
422
437
 
423
438
  EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
424
439
  starting_dist_cache, model, queue, nodes);
@@ -472,6 +487,24 @@ static size_t UpdateNodes(
472
487
  len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
473
488
  &ringbuffer[cur_ix_masked],
474
489
  max_len);
490
+ } else if (backward > dictionary_start) {
491
+ size_t d = 0;
492
+ size_t offset;
493
+ size_t limit;
494
+ const uint8_t* source;
495
+ offset = dictionary_start + 1 + addon->total_size - 1;
496
+ while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
497
+ source = addon->chunk_source[d];
498
+ offset = offset - addon->chunk_offsets[d] - backward;
499
+ limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
500
+ limit = limit > max_len ? max_len : limit;
501
+ if (best_len >= limit ||
502
+ continuation != source[offset + best_len]) {
503
+ continue;
504
+ }
505
+ len = FindMatchLengthWithLimit(&source[offset],
506
+ &ringbuffer[cur_ix_masked],
507
+ limit);
475
508
  } else {
476
509
  /* "Gray" area. It is addressable by decoder, but this encoder
477
510
  instance does not have that data -> should not touch it. */
@@ -577,7 +610,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
577
610
  size_t pos = 0;
578
611
  uint32_t offset = nodes[0].u.next;
579
612
  size_t i;
580
- size_t gap = 0;
613
+ size_t gap = params->dictionary.compound.total_size;
581
614
  for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
582
615
  const ZopfliNode* next = &nodes[pos + offset];
583
616
  size_t copy_length = ZopfliNodeCopyLength(next);
@@ -653,6 +686,23 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
653
686
  return ComputeShortestPathFromNodes(num_bytes, nodes);
654
687
  }
655
688
 
689
+ static void MergeMatches(BackwardMatch* dst,
690
+ BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
691
+ while (len1 > 0 && len2 > 0) {
692
+ size_t l1 = BackwardMatchLength(src1);
693
+ size_t l2 = BackwardMatchLength(src2);
694
+ if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
695
+ *dst++ = *src1++;
696
+ len1--;
697
+ } else {
698
+ *dst++ = *src2++;
699
+ len2--;
700
+ }
701
+ }
702
+ while (len1-- > 0) *dst++ = *src1++;
703
+ while (len2-- > 0) *dst++ = *src2++;
704
+ }
705
+
656
706
  /* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
657
707
  size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
658
708
  size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
@@ -661,21 +711,26 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
661
711
  const size_t stream_offset = params->stream_offset;
662
712
  const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
663
713
  const size_t max_zopfli_len = MaxZopfliLen(params);
664
- ZopfliCostModel model;
665
714
  StartPosQueue queue;
666
- BackwardMatch matches[2 * (MAX_NUM_MATCHES_H10 + 64)];
715
+ BackwardMatch* BROTLI_RESTRICT matches =
716
+ BROTLI_ALLOC(m, BackwardMatch, 2 * (MAX_NUM_MATCHES_H10 + 64));
667
717
  const size_t store_end = num_bytes >= StoreLookaheadH10() ?
668
718
  position + num_bytes - StoreLookaheadH10() + 1 : position;
669
719
  size_t i;
670
- size_t gap = 0;
671
- size_t lz_matches_offset = 0;
672
- BROTLI_UNUSED(literal_context_lut);
720
+ const CompoundDictionary* addon = &params->dictionary.compound;
721
+ size_t gap = addon->total_size;
722
+ size_t lz_matches_offset =
723
+ (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
724
+ ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
725
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
726
+ return 0;
727
+ }
673
728
  nodes[0].length = 0;
674
729
  nodes[0].u.cost = 0;
675
- InitZopfliCostModel(m, &model, &params->dist, num_bytes);
730
+ InitZopfliCostModel(m, model, &params->dist, num_bytes);
676
731
  if (BROTLI_IS_OOM(m)) return 0;
677
732
  ZopfliCostModelSetFromLiteralCosts(
678
- &model, position, ringbuffer, ringbuffer_mask);
733
+ model, position, ringbuffer, ringbuffer_mask);
679
734
  InitStartPosQueue(&queue);
680
735
  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
681
736
  const size_t pos = position + i;
@@ -684,17 +739,35 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
684
739
  pos + stream_offset, max_backward_limit);
685
740
  size_t skip;
686
741
  size_t num_matches;
742
+ int dict_id = 0;
743
+ if (params->dictionary.contextual.context_based) {
744
+ uint8_t p1 = pos >= 1 ?
745
+ ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
746
+ uint8_t p2 = pos >= 2 ?
747
+ ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
748
+ dict_id = params->dictionary.contextual.context_map[
749
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
750
+ }
687
751
  num_matches = FindAllMatchesH10(&hasher->privat._H10,
688
- &params->dictionary,
752
+ params->dictionary.contextual.dict[dict_id],
689
753
  ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
690
754
  dictionary_start + gap, params, &matches[lz_matches_offset]);
755
+ if (addon->num_chunks != 0) {
756
+ size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
757
+ ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
758
+ dictionary_start, params->dist.max_distance,
759
+ &matches[lz_matches_offset - 64], 64);
760
+ MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
761
+ &matches[lz_matches_offset], num_matches);
762
+ num_matches += cd_matches;
763
+ }
691
764
  if (num_matches > 0 &&
692
765
  BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
693
766
  matches[0] = matches[num_matches - 1];
694
767
  num_matches = 1;
695
768
  }
696
769
  skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
697
- params, max_backward_limit, dist_cache, num_matches, matches, &model,
770
+ params, max_backward_limit, dist_cache, num_matches, matches, model,
698
771
  &queue, nodes);
699
772
  if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
700
773
  if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
@@ -710,12 +783,14 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
710
783
  i++;
711
784
  if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
712
785
  EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
713
- dist_cache, &model, &queue, nodes);
786
+ dist_cache, model, &queue, nodes);
714
787
  skip--;
715
788
  }
716
789
  }
717
790
  }
718
- CleanupZopfliCostModel(m, &model);
791
+ CleanupZopfliCostModel(m, model);
792
+ BROTLI_FREE(m, model);
793
+ BROTLI_FREE(m, matches);
719
794
  return ComputeShortestPathFromNodes(num_bytes, nodes);
720
795
  }
721
796
 
@@ -753,14 +828,15 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
753
828
  size_t orig_last_insert_len;
754
829
  int orig_dist_cache[4];
755
830
  size_t orig_num_commands;
756
- ZopfliCostModel model;
831
+ ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
757
832
  ZopfliNode* nodes;
758
833
  BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
759
- size_t gap = 0;
760
- size_t shadow_matches = 0;
761
- BROTLI_UNUSED(literal_context_lut);
762
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(num_matches) ||
763
- BROTLI_IS_NULL(matches)) {
834
+ const CompoundDictionary* addon = &params->dictionary.compound;
835
+ size_t gap = addon->total_size;
836
+ size_t shadow_matches =
837
+ (addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
838
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
839
+ BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
764
840
  return;
765
841
  }
766
842
  for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
@@ -772,15 +848,34 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
772
848
  size_t num_found_matches;
773
849
  size_t cur_match_end;
774
850
  size_t j;
851
+ int dict_id = 0;
852
+ if (params->dictionary.contextual.context_based) {
853
+ uint8_t p1 = pos >= 1 ?
854
+ ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
855
+ uint8_t p2 = pos >= 2 ?
856
+ ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
857
+ dict_id = params->dictionary.contextual.context_map[
858
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
859
+ }
775
860
  /* Ensure that we have enough free slots. */
776
861
  BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
777
862
  cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
778
863
  if (BROTLI_IS_OOM(m)) return;
779
864
  num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
780
- &params->dictionary,
865
+ params->dictionary.contextual.dict[dict_id],
781
866
  ringbuffer, ringbuffer_mask, pos, max_length,
782
867
  max_distance, dictionary_start + gap, params,
783
868
  &matches[cur_match_pos + shadow_matches]);
869
+ if (addon->num_chunks != 0) {
870
+ size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
871
+ ringbuffer, ringbuffer_mask, pos, 3, max_length,
872
+ dictionary_start, params->dist.max_distance,
873
+ &matches[cur_match_pos + shadow_matches - 64], 64);
874
+ MergeMatches(&matches[cur_match_pos],
875
+ &matches[cur_match_pos + shadow_matches - 64], cd_matches,
876
+ &matches[cur_match_pos + shadow_matches], num_found_matches);
877
+ num_found_matches += cd_matches;
878
+ }
784
879
  cur_match_end = cur_match_pos + num_found_matches;
785
880
  for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
786
881
  BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
@@ -810,15 +905,15 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
810
905
  orig_num_commands = *num_commands;
811
906
  nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
812
907
  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
813
- InitZopfliCostModel(m, &model, &params->dist, num_bytes);
908
+ InitZopfliCostModel(m, model, &params->dist, num_bytes);
814
909
  if (BROTLI_IS_OOM(m)) return;
815
910
  for (i = 0; i < 2; i++) {
816
911
  BrotliInitZopfliNodes(nodes, num_bytes + 1);
817
912
  if (i == 0) {
818
913
  ZopfliCostModelSetFromLiteralCosts(
819
- &model, position, ringbuffer, ringbuffer_mask);
914
+ model, position, ringbuffer, ringbuffer_mask);
820
915
  } else {
821
- ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
916
+ ZopfliCostModelSetFromCommands(model, position, ringbuffer,
822
917
  ringbuffer_mask, commands, *num_commands - orig_num_commands,
823
918
  orig_last_insert_len);
824
919
  }
@@ -827,12 +922,13 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
827
922
  *last_insert_len = orig_last_insert_len;
828
923
  memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
829
924
  *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
830
- ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches,
925
+ ringbuffer_mask, params, gap, dist_cache, model, num_matches, matches,
831
926
  nodes);
832
927
  BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
833
928
  last_insert_len, params, commands, num_literals);
834
929
  }
835
- CleanupZopfliCostModel(m, &model);
930
+ CleanupZopfliCostModel(m, model);
931
+ BROTLI_FREE(m, model);
836
932
  BROTLI_FREE(m, nodes);
837
933
  BROTLI_FREE(m, matches);
838
934
  BROTLI_FREE(m, num_matches);
@@ -9,15 +9,16 @@
9
9
  #ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
10
10
  #define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
11
11
 
12
+ #include <brotli/types.h>
13
+
12
14
  #include "../common/constants.h"
13
15
  #include "../common/context.h"
14
16
  #include "../common/dictionary.h"
15
17
  #include "../common/platform.h"
16
- #include <brotli/types.h>
17
- #include "./command.h"
18
- #include "./hash.h"
19
- #include "./memory.h"
20
- #include "./quality.h"
18
+ #include "command.h"
19
+ #include "hash.h"
20
+ #include "memory.h"
21
+ #include "quality.h"
21
22
 
22
23
  #if defined(__cplusplus) || defined(c_plusplus)
23
24
  extern "C" {
@@ -28,13 +28,11 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
28
28
  const size_t random_heuristics_window_size =
29
29
  LiteralSpreeLengthForSparseSearch(params);
30
30
  size_t apply_random_heuristics = position + random_heuristics_window_size;
31
- const size_t gap = 0;
31
+ const size_t gap = params->dictionary.compound.total_size;
32
32
 
33
33
  /* Minimum score to accept a backward reference. */
34
34
  const score_t kMinScore = BROTLI_SCORE_BASE + 100;
35
35
 
36
- BROTLI_UNUSED(literal_context_lut);
37
-
38
36
  FN(PrepareDistanceCache)(privat, dist_cache);
39
37
 
40
38
  while (position + FN(HashTypeLength)() < pos_end) {
@@ -43,13 +41,29 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
43
41
  size_t dictionary_start = BROTLI_MIN(size_t,
44
42
  position + position_offset, max_backward_limit);
45
43
  HasherSearchResult sr;
44
+ int dict_id = 0;
45
+ uint8_t p1 = 0;
46
+ uint8_t p2 = 0;
47
+ if (params->dictionary.contextual.context_based) {
48
+ p1 = position >= 1 ?
49
+ ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
50
+ p2 = position >= 2 ?
51
+ ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
52
+ dict_id = params->dictionary.contextual.context_map[
53
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
54
+ }
46
55
  sr.len = 0;
47
56
  sr.len_code_delta = 0;
48
57
  sr.distance = 0;
49
58
  sr.score = kMinScore;
50
- FN(FindLongestMatch)(privat, &params->dictionary,
59
+ FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
51
60
  ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
52
61
  max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
62
+ if (ENABLE_COMPOUND_DICTIONARY) {
63
+ LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
64
+ ringbuffer_mask, dist_cache, position, max_length,
65
+ dictionary_start, params->dist.max_distance, &sr);
66
+ }
53
67
  if (sr.score > kMinScore) {
54
68
  /* Found a match. Let's look for something even better ahead. */
55
69
  int delayed_backward_references_in_row = 0;
@@ -65,11 +79,23 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
65
79
  max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
66
80
  dictionary_start = BROTLI_MIN(size_t,
67
81
  position + 1 + position_offset, max_backward_limit);
82
+ if (params->dictionary.contextual.context_based) {
83
+ p2 = p1;
84
+ p1 = ringbuffer[position & ringbuffer_mask];
85
+ dict_id = params->dictionary.contextual.context_map[
86
+ BROTLI_CONTEXT(p1, p2, literal_context_lut)];
87
+ }
68
88
  FN(FindLongestMatch)(privat,
69
- &params->dictionary,
89
+ params->dictionary.contextual.dict[dict_id],
70
90
  ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
71
91
  max_distance, dictionary_start + gap, params->dist.max_distance,
72
92
  &sr2);
93
+ if (ENABLE_COMPOUND_DICTIONARY) {
94
+ LookupCompoundDictionaryMatch(
95
+ &params->dictionary.compound, ringbuffer,
96
+ ringbuffer_mask, dist_cache, position + 1, max_length,
97
+ dictionary_start, params->dist.max_distance, &sr2);
98
+ }
73
99
  if (sr2.score >= sr.score + cost_diff_lazy) {
74
100
  /* Ok, let's just write one byte for now and start a match from the
75
101
  next byte. */
@@ -6,28 +6,29 @@
6
6
 
7
7
  /* Functions to estimate the bit cost of Huffman trees. */
8
8
 
9
- #include "./bit_cost.h"
9
+ #include "bit_cost.h"
10
+
11
+ #include <brotli/types.h>
10
12
 
11
13
  #include "../common/constants.h"
12
14
  #include "../common/platform.h"
13
- #include <brotli/types.h>
14
- #include "./fast_log.h"
15
- #include "./histogram.h"
15
+ #include "fast_log.h"
16
+ #include "histogram.h"
16
17
 
17
18
  #if defined(__cplusplus) || defined(c_plusplus)
18
19
  extern "C" {
19
20
  #endif
20
21
 
21
22
  #define FN(X) X ## Literal
22
- #include "./bit_cost_inc.h" /* NOLINT(build/include) */
23
+ #include "bit_cost_inc.h" /* NOLINT(build/include) */
23
24
  #undef FN
24
25
 
25
26
  #define FN(X) X ## Command
26
- #include "./bit_cost_inc.h" /* NOLINT(build/include) */
27
+ #include "bit_cost_inc.h" /* NOLINT(build/include) */
27
28
  #undef FN
28
29
 
29
30
  #define FN(X) X ## Distance
30
- #include "./bit_cost_inc.h" /* NOLINT(build/include) */
31
+ #include "bit_cost_inc.h" /* NOLINT(build/include) */
31
32
  #undef FN
32
33
 
33
34
  #if defined(__cplusplus) || defined(c_plusplus)
@@ -9,10 +9,11 @@
9
9
  #ifndef BROTLI_ENC_BIT_COST_H_
10
10
  #define BROTLI_ENC_BIT_COST_H_
11
11
 
12
- #include "../common/platform.h"
13
12
  #include <brotli/types.h>
14
- #include "./fast_log.h"
15
- #include "./histogram.h"
13
+
14
+ #include "../common/platform.h"
15
+ #include "fast_log.h"
16
+ #include "histogram.h"
16
17
 
17
18
  #if defined(__cplusplus) || defined(c_plusplus)
18
19
  extern "C" {
@@ -45,7 +46,7 @@ static BROTLI_INLINE double BitsEntropy(
45
46
  const uint32_t* population, size_t size) {
46
47
  size_t sum;
47
48
  double retval = ShannonEntropy(population, size, &sum);
48
- if (retval < sum) {
49
+ if (retval < (double)sum) {
49
50
  /* At least one bit per literal is needed. */
50
51
  retval = (double)sum;
51
52
  }
@@ -6,18 +6,18 @@
6
6
 
7
7
  /* Block split point selection utilities. */
8
8
 
9
- #include "./block_splitter.h"
9
+ #include "block_splitter.h"
10
10
 
11
11
  #include <string.h> /* memcpy, memset */
12
12
 
13
13
  #include "../common/platform.h"
14
- #include "./bit_cost.h"
15
- #include "./cluster.h"
16
- #include "./command.h"
17
- #include "./fast_log.h"
18
- #include "./histogram.h"
19
- #include "./memory.h"
20
- #include "./quality.h"
14
+ #include "bit_cost.h"
15
+ #include "cluster.h"
16
+ #include "command.h"
17
+ #include "fast_log.h"
18
+ #include "histogram.h"
19
+ #include "memory.h"
20
+ #include "quality.h"
21
21
 
22
22
  #if defined(__cplusplus) || defined(c_plusplus)
23
23
  extern "C" {
@@ -30,6 +30,7 @@ static const double kCommandBlockSwitchCost = 13.5;
30
30
  static const double kDistanceBlockSwitchCost = 14.6;
31
31
  static const size_t kLiteralStrideLength = 70;
32
32
  static const size_t kCommandStrideLength = 40;
33
+ static const size_t kDistanceStrideLength = 40;
33
34
  static const size_t kSymbolsPerLiteralHistogram = 544;
34
35
  static const size_t kSymbolsPerCommandHistogram = 530;
35
36
  static const size_t kSymbolsPerDistanceHistogram = 544;
@@ -89,19 +90,19 @@ static BROTLI_INLINE double BitCost(size_t count) {
89
90
  #define FN(X) X ## Literal
90
91
  #define DataType uint8_t
91
92
  /* NOLINTNEXTLINE(build/include) */
92
- #include "./block_splitter_inc.h"
93
+ #include "block_splitter_inc.h"
93
94
  #undef DataType
94
95
  #undef FN
95
96
 
96
97
  #define FN(X) X ## Command
97
98
  #define DataType uint16_t
98
99
  /* NOLINTNEXTLINE(build/include) */
99
- #include "./block_splitter_inc.h"
100
+ #include "block_splitter_inc.h"
100
101
  #undef FN
101
102
 
102
103
  #define FN(X) X ## Distance
103
104
  /* NOLINTNEXTLINE(build/include) */
104
- #include "./block_splitter_inc.h"
105
+ #include "block_splitter_inc.h"
105
106
  #undef DataType
106
107
  #undef FN
107
108
 
@@ -119,6 +120,8 @@ void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
119
120
  BROTLI_FREE(m, self->lengths);
120
121
  }
121
122
 
123
+ /* Extracts literals, command distance and prefix codes, then applies
124
+ * SplitByteVector to create partitioning. */
122
125
  void BrotliSplitBlock(MemoryManager* m,
123
126
  const Command* cmds,
124
127
  const size_t num_commands,
@@ -136,7 +139,9 @@ void BrotliSplitBlock(MemoryManager* m,
136
139
  /* Create a continuous array of literals. */
137
140
  CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
138
141
  /* Create the block split on the array of literals.
139
- Literal histograms have alphabet size 256. */
142
+ * Literal histograms can have alphabet size up to 256.
143
+ * Though, to accomodate context modeling, less than half of maximum size
144
+ * is allowed. */
140
145
  SplitByteVectorLiteral(
141
146
  m, literals, literals_count,
142
147
  kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
@@ -144,6 +149,10 @@ void BrotliSplitBlock(MemoryManager* m,
144
149
  literal_split);
145
150
  if (BROTLI_IS_OOM(m)) return;
146
151
  BROTLI_FREE(m, literals);
152
+ /* NB: this might be a good place for injecting extra splitting without
153
+ * increasing encoder complexity; however, output parition would be less
154
+ * optimal than one produced with forced splitting inside
155
+ * SplitByteVector (FindBlocks / ClusterBlocks). */
147
156
  }
148
157
 
149
158
  {
@@ -161,7 +170,7 @@ void BrotliSplitBlock(MemoryManager* m,
161
170
  kCommandStrideLength, kCommandBlockSwitchCost, params,
162
171
  insert_and_copy_split);
163
172
  if (BROTLI_IS_OOM(m)) return;
164
- /* TODO: reuse for distances? */
173
+ /* TODO(eustas): reuse for distances? */
165
174
  BROTLI_FREE(m, insert_and_copy_codes);
166
175
  }
167
176
 
@@ -181,13 +190,27 @@ void BrotliSplitBlock(MemoryManager* m,
181
190
  SplitByteVectorDistance(
182
191
  m, distance_prefixes, j,
183
192
  kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
184
- kCommandStrideLength, kDistanceBlockSwitchCost, params,
193
+ kDistanceStrideLength, kDistanceBlockSwitchCost, params,
185
194
  dist_split);
186
195
  if (BROTLI_IS_OOM(m)) return;
187
196
  BROTLI_FREE(m, distance_prefixes);
188
197
  }
189
198
  }
190
199
 
200
+ #if defined(BROTLI_TEST)
201
+ size_t CountLiteralsForTest(const Command*, const size_t);
202
+ size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
203
+ return CountLiterals(cmds, num_commands);
204
+ }
205
+
206
+ void CopyLiteralsToByteArrayForTest(const Command*,
207
+ const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
208
+ void CopyLiteralsToByteArrayForTest(const Command* cmds,
209
+ const size_t num_commands, const uint8_t* data, const size_t offset,
210
+ const size_t mask, uint8_t* literals) {
211
+ CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
212
+ }
213
+ #endif
191
214
 
192
215
  #if defined(__cplusplus) || defined(c_plusplus)
193
216
  } /* extern "C" */
@@ -9,11 +9,12 @@
9
9
  #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
10
10
  #define BROTLI_ENC_BLOCK_SPLITTER_H_
11
11
 
12
- #include "../common/platform.h"
13
12
  #include <brotli/types.h>
14
- #include "./command.h"
15
- #include "./memory.h"
16
- #include "./quality.h"
13
+
14
+ #include "../common/platform.h"
15
+ #include "command.h"
16
+ #include "memory.h"
17
+ #include "quality.h"
17
18
 
18
19
  #if defined(__cplusplus) || defined(c_plusplus)
19
20
  extern "C" {