brotli 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +6 -3
  3. data/.github/workflows/publish.yml +7 -17
  4. data/.gitmodules +1 -1
  5. data/README.md +2 -2
  6. data/ext/brotli/brotli.c +1 -0
  7. data/ext/brotli/extconf.rb +6 -0
  8. data/lib/brotli/version.rb +1 -1
  9. data/test/brotli_test.rb +4 -1
  10. data/vendor/brotli/c/common/constants.c +1 -1
  11. data/vendor/brotli/c/common/constants.h +2 -1
  12. data/vendor/brotli/c/common/context.c +1 -1
  13. data/vendor/brotli/c/common/dictionary.c +5 -3
  14. data/vendor/brotli/c/common/platform.c +2 -1
  15. data/vendor/brotli/c/common/platform.h +60 -113
  16. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  17. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  18. data/vendor/brotli/c/common/transform.c +1 -1
  19. data/vendor/brotli/c/common/version.h +31 -6
  20. data/vendor/brotli/c/dec/bit_reader.c +10 -8
  21. data/vendor/brotli/c/dec/bit_reader.h +172 -100
  22. data/vendor/brotli/c/dec/decode.c +467 -200
  23. data/vendor/brotli/c/dec/huffman.c +7 -4
  24. data/vendor/brotli/c/dec/huffman.h +2 -1
  25. data/vendor/brotli/c/dec/prefix.h +2 -1
  26. data/vendor/brotli/c/dec/state.c +33 -9
  27. data/vendor/brotli/c/dec/state.h +70 -35
  28. data/vendor/brotli/c/enc/backward_references.c +81 -19
  29. data/vendor/brotli/c/enc/backward_references.h +5 -4
  30. data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
  31. data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
  32. data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
  33. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  34. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  35. data/vendor/brotli/c/enc/block_splitter.c +37 -14
  36. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  37. data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
  38. data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
  39. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  40. data/vendor/brotli/c/enc/cluster.c +10 -9
  41. data/vendor/brotli/c/enc/cluster.h +7 -6
  42. data/vendor/brotli/c/enc/cluster_inc.h +25 -20
  43. data/vendor/brotli/c/enc/command.c +1 -1
  44. data/vendor/brotli/c/enc/command.h +5 -4
  45. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  46. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  47. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  48. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  49. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
  50. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  51. data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
  52. data/vendor/brotli/c/enc/encode.c +473 -404
  53. data/vendor/brotli/c/enc/encoder_dict.c +611 -4
  54. data/vendor/brotli/c/enc/encoder_dict.h +117 -3
  55. data/vendor/brotli/c/enc/entropy_encode.c +3 -2
  56. data/vendor/brotli/c/enc/entropy_encode.h +2 -1
  57. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  58. data/vendor/brotli/c/enc/fast_log.c +1 -1
  59. data/vendor/brotli/c/enc/fast_log.h +2 -1
  60. data/vendor/brotli/c/enc/find_match_length.h +15 -22
  61. data/vendor/brotli/c/enc/hash.h +285 -45
  62. data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
  63. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
  64. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
  65. data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
  66. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
  67. data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
  68. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
  69. data/vendor/brotli/c/enc/histogram.c +4 -4
  70. data/vendor/brotli/c/enc/histogram.h +7 -6
  71. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  72. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  73. data/vendor/brotli/c/enc/memory.c +29 -5
  74. data/vendor/brotli/c/enc/memory.h +19 -2
  75. data/vendor/brotli/c/enc/metablock.c +72 -58
  76. data/vendor/brotli/c/enc/metablock.h +9 -8
  77. data/vendor/brotli/c/enc/metablock_inc.h +8 -6
  78. data/vendor/brotli/c/enc/params.h +4 -3
  79. data/vendor/brotli/c/enc/prefix.h +3 -2
  80. data/vendor/brotli/c/enc/quality.h +40 -3
  81. data/vendor/brotli/c/enc/ringbuffer.h +4 -3
  82. data/vendor/brotli/c/enc/state.h +104 -0
  83. data/vendor/brotli/c/enc/static_dict.c +60 -4
  84. data/vendor/brotli/c/enc/static_dict.h +3 -2
  85. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  86. data/vendor/brotli/c/enc/utf8_util.c +1 -1
  87. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  88. data/vendor/brotli/c/enc/write_bits.h +2 -1
  89. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  90. data/vendor/brotli/c/include/brotli/encode.h +55 -2
  91. data/vendor/brotli/c/include/brotli/port.h +28 -11
  92. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  93. metadata +9 -3
@@ -10,33 +10,49 @@
10
10
  #ifndef BROTLI_ENC_HASH_H_
11
11
  #define BROTLI_ENC_HASH_H_
12
12
 
13
+ #include <stdlib.h> /* exit */
13
14
  #include <string.h> /* memcmp, memset */
14
15
 
16
+ #include <brotli/types.h>
17
+
15
18
  #include "../common/constants.h"
16
19
  #include "../common/dictionary.h"
17
20
  #include "../common/platform.h"
18
- #include <brotli/types.h>
19
- #include "./encoder_dict.h"
20
- #include "./fast_log.h"
21
- #include "./find_match_length.h"
22
- #include "./memory.h"
23
- #include "./quality.h"
24
- #include "./static_dict.h"
21
+ #include "compound_dictionary.h"
22
+ #include "encoder_dict.h"
23
+ #include "fast_log.h"
24
+ #include "find_match_length.h"
25
+ #include "memory.h"
26
+ #include "quality.h"
27
+ #include "static_dict.h"
25
28
 
26
29
  #if defined(__cplusplus) || defined(c_plusplus)
27
30
  extern "C" {
28
31
  #endif
29
32
 
30
33
  typedef struct {
31
- /* Dynamically allocated area; first member for quickest access. */
32
- void* extra;
34
+ /**
35
+ * Dynamically allocated areas; regular hasher uses one or two allocations;
36
+ * "composite" hasher uses up to 4 allocations.
37
+ */
38
+ void* extra[4];
39
+
40
+ /**
41
+ * False before the fisrt invocation of HasherSetup (where "extra" memory)
42
+ * is allocated.
43
+ */
44
+ BROTLI_BOOL is_setup_;
33
45
 
34
46
  size_t dict_num_lookups;
35
47
  size_t dict_num_matches;
36
48
 
37
49
  BrotliHasherParams params;
38
50
 
39
- /* False if hasher needs to be "prepared" before use. */
51
+ /**
52
+ * False if hasher needs to be "prepared" before use (before the first
53
+ * invocation of HasherSetup or after HasherReset). "preparation" is hasher
54
+ * data initialization (using input ringbuffer).
55
+ */
40
56
  BROTLI_BOOL is_prepared_;
41
57
  } HasherCommon;
42
58
 
@@ -62,8 +78,7 @@ typedef struct HasherSearchResult {
62
78
  for this use.
63
79
  * The number has been tuned heuristically against compression benchmarks. */
64
80
  static const uint32_t kHashMul32 = 0x1E35A7BD;
65
- static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
66
- static const uint64_t kHashMul64Long =
81
+ static const uint64_t kHashMul64 =
67
82
  BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
68
83
 
69
84
  static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
@@ -232,7 +247,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
232
247
  #define BUCKET_BITS 17
233
248
  #define MAX_TREE_SEARCH_DEPTH 64
234
249
  #define MAX_TREE_COMP_LENGTH 128
235
- #include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
250
+ #include "hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
236
251
  #undef MAX_TREE_SEARCH_DEPTH
237
252
  #undef MAX_TREE_COMP_LENGTH
238
253
  #undef BUCKET_BITS
@@ -249,7 +264,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
249
264
  #define BUCKET_SWEEP_BITS 0
250
265
  #define HASH_LEN 5
251
266
  #define USE_DICTIONARY 1
252
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
267
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
253
268
  #undef BUCKET_SWEEP_BITS
254
269
  #undef USE_DICTIONARY
255
270
  #undef HASHER
@@ -257,7 +272,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
257
272
  #define HASHER() H3
258
273
  #define BUCKET_SWEEP_BITS 1
259
274
  #define USE_DICTIONARY 0
260
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
275
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
261
276
  #undef USE_DICTIONARY
262
277
  #undef BUCKET_SWEEP_BITS
263
278
  #undef BUCKET_BITS
@@ -267,7 +282,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
267
282
  #define BUCKET_BITS 17
268
283
  #define BUCKET_SWEEP_BITS 2
269
284
  #define USE_DICTIONARY 1
270
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
285
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
271
286
  #undef USE_DICTIONARY
272
287
  #undef HASH_LEN
273
288
  #undef BUCKET_SWEEP_BITS
@@ -275,11 +290,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
275
290
  #undef HASHER
276
291
 
277
292
  #define HASHER() H5
278
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
293
+ #include "hash_longest_match_inc.h" /* NOLINT(build/include) */
279
294
  #undef HASHER
280
295
 
281
296
  #define HASHER() H6
282
- #include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
297
+ #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */
283
298
  #undef HASHER
284
299
 
285
300
  #define BUCKET_BITS 15
@@ -288,13 +303,13 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
288
303
  #define NUM_BANKS 1
289
304
  #define BANK_BITS 16
290
305
  #define HASHER() H40
291
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
306
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
292
307
  #undef HASHER
293
308
  #undef NUM_LAST_DISTANCES_TO_CHECK
294
309
 
295
310
  #define NUM_LAST_DISTANCES_TO_CHECK 10
296
311
  #define HASHER() H41
297
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
312
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
298
313
  #undef HASHER
299
314
  #undef NUM_LAST_DISTANCES_TO_CHECK
300
315
  #undef NUM_BANKS
@@ -304,7 +319,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
304
319
  #define NUM_BANKS 512
305
320
  #define BANK_BITS 9
306
321
  #define HASHER() H42
307
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
322
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
308
323
  #undef HASHER
309
324
  #undef NUM_LAST_DISTANCES_TO_CHECK
310
325
  #undef NUM_BANKS
@@ -317,7 +332,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
317
332
  #define BUCKET_SWEEP_BITS 2
318
333
  #define HASH_LEN 7
319
334
  #define USE_DICTIONARY 0
320
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
335
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
321
336
  #undef USE_DICTIONARY
322
337
  #undef HASH_LEN
323
338
  #undef BUCKET_SWEEP_BITS
@@ -331,14 +346,14 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
331
346
  #define JUMP 4
332
347
  #define NUMBUCKETS 16777216
333
348
  #define MASK ((NUMBUCKETS * 64) - 1)
334
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
349
+ #include "hash_rolling_inc.h" /* NOLINT(build/include) */
335
350
  #undef JUMP
336
351
  #undef HASHER
337
352
 
338
353
 
339
354
  #define HASHER() HROLLING
340
355
  #define JUMP 1
341
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
356
+ #include "hash_rolling_inc.h" /* NOLINT(build/include) */
342
357
  #undef MASK
343
358
  #undef NUMBUCKETS
344
359
  #undef JUMP
@@ -348,7 +363,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
348
363
  #define HASHER() H35
349
364
  #define HASHER_A H3
350
365
  #define HASHER_B HROLLING_FAST
351
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
366
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
352
367
  #undef HASHER_A
353
368
  #undef HASHER_B
354
369
  #undef HASHER
@@ -356,7 +371,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
356
371
  #define HASHER() H55
357
372
  #define HASHER_A H54
358
373
  #define HASHER_B HROLLING_FAST
359
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
374
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
360
375
  #undef HASHER_A
361
376
  #undef HASHER_B
362
377
  #undef HASHER
@@ -364,7 +379,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
364
379
  #define HASHER() H65
365
380
  #define HASHER_A H6
366
381
  #define HASHER_B HROLLING
367
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
382
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
368
383
  #undef HASHER_A
369
384
  #undef HASHER_B
370
385
  #undef HASHER
@@ -391,43 +406,55 @@ typedef struct {
391
406
 
392
407
  /* MUST be invoked before any other method. */
393
408
  static BROTLI_INLINE void HasherInit(Hasher* hasher) {
394
- hasher->common.extra = NULL;
409
+ hasher->common.is_setup_ = BROTLI_FALSE;
410
+ hasher->common.extra[0] = NULL;
411
+ hasher->common.extra[1] = NULL;
412
+ hasher->common.extra[2] = NULL;
413
+ hasher->common.extra[3] = NULL;
395
414
  }
396
415
 
397
416
  static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
398
- if (hasher->common.extra == NULL) return;
399
- BROTLI_FREE(m, hasher->common.extra);
417
+ if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
418
+ if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
419
+ if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
420
+ if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
400
421
  }
401
422
 
402
423
  static BROTLI_INLINE void HasherReset(Hasher* hasher) {
403
424
  hasher->common.is_prepared_ = BROTLI_FALSE;
404
425
  }
405
426
 
406
- static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
407
- BROTLI_BOOL one_shot, const size_t input_size) {
427
+ static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
428
+ BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
408
429
  switch (params->hasher.type) {
409
- #define SIZE_(N) \
410
- case N: \
411
- return HashMemAllocInBytesH ## N(params, one_shot, input_size);
430
+ #define SIZE_(N) \
431
+ case N: \
432
+ HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
433
+ break;
412
434
  FOR_ALL_HASHERS(SIZE_)
413
435
  #undef SIZE_
414
436
  default:
415
437
  break;
416
438
  }
417
- return 0; /* Default case. */
418
439
  }
419
440
 
420
441
  static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
421
442
  BrotliEncoderParams* params, const uint8_t* data, size_t position,
422
443
  size_t input_size, BROTLI_BOOL is_last) {
423
444
  BROTLI_BOOL one_shot = (position == 0 && is_last);
424
- if (hasher->common.extra == NULL) {
425
- size_t alloc_size;
445
+ if (!hasher->common.is_setup_) {
446
+ size_t alloc_size[4] = {0};
447
+ size_t i;
426
448
  ChooseHasher(params, &params->hasher);
427
- alloc_size = HasherSize(params, one_shot, input_size);
428
- hasher->common.extra = BROTLI_ALLOC(m, uint8_t, alloc_size);
429
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra)) return;
430
449
  hasher->common.params = params->hasher;
450
+ hasher->common.dict_num_lookups = 0;
451
+ hasher->common.dict_num_matches = 0;
452
+ HasherSize(params, one_shot, input_size, alloc_size);
453
+ for (i = 0; i < 4; ++i) {
454
+ if (alloc_size[i] == 0) continue;
455
+ hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
456
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
457
+ }
431
458
  switch (hasher->common.params.type) {
432
459
  #define INITIALIZE_(N) \
433
460
  case N: \
@@ -440,6 +467,7 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
440
467
  break;
441
468
  }
442
469
  HasherReset(hasher);
470
+ hasher->common.is_setup_ = BROTLI_TRUE;
443
471
  }
444
472
 
445
473
  if (!hasher->common.is_prepared_) {
@@ -454,10 +482,6 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
454
482
  #undef PREPARE_
455
483
  default: break;
456
484
  }
457
- if (position == 0) {
458
- hasher->common.dict_num_lookups = 0;
459
- hasher->common.dict_num_matches = 0;
460
- }
461
485
  hasher->common.is_prepared_ = BROTLI_TRUE;
462
486
  }
463
487
  }
@@ -481,6 +505,222 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
481
505
  }
482
506
  }
483
507
 
508
+ /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
509
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
510
+ static BROTLI_INLINE void FindCompoundDictionaryMatch(
511
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
512
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
513
+ const size_t cur_ix, const size_t max_length, const size_t distance_offset,
514
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
515
+ const uint32_t source_size = self->source_size;
516
+ const size_t boundary = distance_offset - source_size;
517
+ const uint32_t hash_bits = self->hash_bits;
518
+ const uint32_t bucket_bits = self->bucket_bits;
519
+ const uint32_t slot_bits = self->slot_bits;
520
+
521
+ const uint32_t hash_shift = 64u - bucket_bits;
522
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
523
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524
+
525
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
528
+ const uint8_t* source = NULL;
529
+
530
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
531
+ score_t best_score = out->score;
532
+ size_t best_len = out->len;
533
+ size_t i;
534
+ const uint64_t h =
535
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
536
+ kPreparedDictionaryHashMul64Long;
537
+ const uint32_t key = (uint32_t)(h >> hash_shift);
538
+ const uint32_t slot = key & slot_mask;
539
+ const uint32_t head = heads[key];
540
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
541
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
542
+
543
+ const void* tail = (void*)&items[self->num_items];
544
+ if (self->magic == kPreparedDictionaryMagic) {
545
+ source = (const uint8_t*)tail;
546
+ } else {
547
+ /* kLeanPreparedDictionaryMagic */
548
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
549
+ }
550
+
551
+ for (i = 0; i < 4; ++i) {
552
+ const size_t distance = (size_t)distance_cache[i];
553
+ size_t offset;
554
+ size_t limit;
555
+ size_t len;
556
+ if (distance <= boundary || distance > distance_offset) continue;
557
+ offset = distance_offset - distance;
558
+ limit = source_size - offset;
559
+ limit = limit > max_length ? max_length : limit;
560
+ len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
561
+ limit);
562
+ if (len >= 2) {
563
+ score_t score = BackwardReferenceScoreUsingLastDistance(len);
564
+ if (best_score < score) {
565
+ if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
566
+ if (best_score < score) {
567
+ best_score = score;
568
+ if (len > best_len) best_len = len;
569
+ out->len = len;
570
+ out->len_code_delta = 0;
571
+ out->distance = distance;
572
+ out->score = best_score;
573
+ }
574
+ }
575
+ }
576
+ }
577
+ while (item == 0) {
578
+ size_t offset;
579
+ size_t distance;
580
+ size_t limit;
581
+ item = *chain;
582
+ chain++;
583
+ offset = item & 0x7FFFFFFF;
584
+ item &= 0x80000000;
585
+ distance = distance_offset - offset;
586
+ limit = source_size - offset;
587
+ limit = (limit > max_length) ? max_length : limit;
588
+ if (distance > max_distance) continue;
589
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
590
+ best_len >= limit ||
591
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
592
+ continue;
593
+ }
594
+ {
595
+ const size_t len = FindMatchLengthWithLimit(&source[offset],
596
+ &data[cur_ix_masked],
597
+ limit);
598
+ if (len >= 4) {
599
+ score_t score = BackwardReferenceScore(len, distance);
600
+ if (best_score < score) {
601
+ best_score = score;
602
+ best_len = len;
603
+ out->len = best_len;
604
+ out->len_code_delta = 0;
605
+ out->distance = distance;
606
+ out->score = best_score;
607
+ }
608
+ }
609
+ }
610
+ }
611
+ }
612
+
613
+ /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
614
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
615
+ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
616
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
617
+ const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
618
+ const size_t max_length, const size_t distance_offset,
619
+ const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
620
+ const uint32_t source_size = self->source_size;
621
+ const uint32_t hash_bits = self->hash_bits;
622
+ const uint32_t bucket_bits = self->bucket_bits;
623
+ const uint32_t slot_bits = self->slot_bits;
624
+
625
+ const uint32_t hash_shift = 64u - bucket_bits;
626
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
627
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
628
+
629
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
630
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
631
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
632
+ const uint8_t* source = NULL;
633
+
634
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
635
+ size_t best_len = min_length;
636
+ const uint64_t h =
637
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
638
+ kPreparedDictionaryHashMul64Long;
639
+ const uint32_t key = (uint32_t)(h >> hash_shift);
640
+ const uint32_t slot = key & slot_mask;
641
+ const uint32_t head = heads[key];
642
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
643
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
644
+ size_t found = 0;
645
+
646
+ const void* tail = (void*)&items[self->num_items];
647
+ if (self->magic == kPreparedDictionaryMagic) {
648
+ source = (const uint8_t*)tail;
649
+ } else {
650
+ /* kLeanPreparedDictionaryMagic */
651
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
652
+ }
653
+
654
+ while (item == 0) {
655
+ size_t offset;
656
+ size_t distance;
657
+ size_t limit;
658
+ size_t len;
659
+ item = *chain;
660
+ chain++;
661
+ offset = item & 0x7FFFFFFF;
662
+ item &= 0x80000000;
663
+ distance = distance_offset - offset;
664
+ limit = source_size - offset;
665
+ limit = (limit > max_length) ? max_length : limit;
666
+ if (distance > max_distance) continue;
667
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
668
+ best_len >= limit ||
669
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
670
+ continue;
671
+ }
672
+ len = FindMatchLengthWithLimit(
673
+ &source[offset], &data[cur_ix_masked], limit);
674
+ if (len > best_len) {
675
+ best_len = len;
676
+ InitBackwardMatch(matches++, distance, len);
677
+ found++;
678
+ if (found == match_limit) break;
679
+ }
680
+ }
681
+ return found;
682
+ }
683
+
684
+ static BROTLI_INLINE void LookupCompoundDictionaryMatch(
685
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
686
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
687
+ const size_t cur_ix, const size_t max_length,
688
+ const size_t max_ring_buffer_distance, const size_t max_distance,
689
+ HasherSearchResult* sr) {
690
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
691
+ size_t d;
692
+ for (d = 0; d < addon->num_chunks; ++d) {
693
+ /* Only one prepared dictionary type is currently supported. */
694
+ FindCompoundDictionaryMatch(
695
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
696
+ distance_cache, cur_ix, max_length,
697
+ base_offset - addon->chunk_offsets[d], max_distance, sr);
698
+ }
699
+ }
700
+
701
+ static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
702
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
703
+ const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
704
+ const size_t max_length, const size_t max_ring_buffer_distance,
705
+ const size_t max_distance, BackwardMatch* matches,
706
+ size_t match_limit) {
707
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
708
+ size_t d;
709
+ size_t total_found = 0;
710
+ for (d = 0; d < addon->num_chunks; ++d) {
711
+ /* Only one prepared dictionary type is currently supported. */
712
+ total_found += FindAllCompoundDictionaryMatches(
713
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
714
+ cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
715
+ max_distance, matches + total_found, match_limit - total_found);
716
+ if (total_found == match_limit) break;
717
+ if (total_found > 0) {
718
+ min_length = BackwardMatchLength(&matches[total_found - 1]);
719
+ }
720
+ }
721
+ return total_found;
722
+ }
723
+
484
724
  #if defined(__cplusplus) || defined(c_plusplus)
485
725
  } /* extern "C" */
486
726
  #endif
@@ -30,10 +30,10 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
30
30
  typedef struct HashComposite {
31
31
  HASHER_A ha;
32
32
  HASHER_B hb;
33
+ HasherCommon ha_common;
33
34
  HasherCommon hb_common;
34
35
 
35
36
  /* Shortcuts. */
36
- void* extra;
37
37
  HasherCommon* common;
38
38
 
39
39
  BROTLI_BOOL fresh;
@@ -43,12 +43,12 @@ typedef struct HashComposite {
43
43
  static void FN(Initialize)(HasherCommon* common,
44
44
  HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
45
45
  self->common = common;
46
- self->extra = common->extra;
47
46
 
47
+ self->ha_common = *self->common;
48
48
  self->hb_common = *self->common;
49
49
  self->fresh = BROTLI_TRUE;
50
50
  self->params = params;
51
- /* TODO: Initialize of the hashers is defered to Prepare (and params
51
+ /* TODO(lode): Initialize of the hashers is deferred to Prepare (and params
52
52
  remembered here) because we don't get the one_shot and input_size params
53
53
  here that are needed to know the memory size of them. Instead provide
54
54
  those params to all hashers FN(Initialize) */
@@ -59,21 +59,36 @@ static void FN(Prepare)(
59
59
  size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
60
60
  if (self->fresh) {
61
61
  self->fresh = BROTLI_FALSE;
62
- self->hb_common.extra = (uint8_t*)self->extra +
63
- FN_A(HashMemAllocInBytes)(self->params, one_shot, input_size);
64
-
65
- FN_A(Initialize)(self->common, &self->ha, self->params);
62
+ self->ha_common.extra[0] = self->common->extra[0];
63
+ self->ha_common.extra[1] = self->common->extra[1];
64
+ self->ha_common.extra[2] = NULL;
65
+ self->ha_common.extra[3] = NULL;
66
+ self->hb_common.extra[0] = self->common->extra[2];
67
+ self->hb_common.extra[1] = self->common->extra[3];
68
+ self->hb_common.extra[2] = NULL;
69
+ self->hb_common.extra[3] = NULL;
70
+
71
+ FN_A(Initialize)(&self->ha_common, &self->ha, self->params);
66
72
  FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
67
73
  }
68
74
  FN_A(Prepare)(&self->ha, one_shot, input_size, data);
69
75
  FN_B(Prepare)(&self->hb, one_shot, input_size, data);
70
76
  }
71
77
 
72
- static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
78
+ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
73
79
  const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
74
- size_t input_size) {
75
- return FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
76
- FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
80
+ size_t input_size, size_t* alloc_size) {
81
+ size_t alloc_size_a[4] = {0};
82
+ size_t alloc_size_b[4] = {0};
83
+ FN_A(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_a);
84
+ FN_B(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_b);
85
+ /* Should never happen. */
86
+ if (alloc_size_a[2] != 0 || alloc_size_a[3] != 0) exit(EXIT_FAILURE);
87
+ if (alloc_size_b[2] != 0 || alloc_size_b[3] != 0) exit(EXIT_FAILURE);
88
+ alloc_size[0] = alloc_size_a[0];
89
+ alloc_size[1] = alloc_size_a[1];
90
+ alloc_size[2] = alloc_size_b[0];
91
+ alloc_size[3] = alloc_size_b[1];
77
92
  }
78
93
 
79
94
  static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
@@ -49,7 +49,7 @@ typedef struct HashForgetfulChain {
49
49
  size_t max_hops;
50
50
 
51
51
  /* Shortcuts. */
52
- void* extra;
52
+ void* extra[2];
53
53
  HasherCommon* common;
54
54
 
55
55
  /* --- Dynamic size members --- */
@@ -77,14 +77,15 @@ static uint8_t* FN(TinyHash)(void* extra) {
77
77
  }
78
78
 
79
79
  static FN(Bank)* FN(Banks)(void* extra) {
80
- return (FN(Bank)*)(&FN(TinyHash)(extra)[65536]);
80
+ return (FN(Bank)*)(extra);
81
81
  }
82
82
 
83
83
  static void FN(Initialize)(
84
84
  HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
85
85
  const BrotliEncoderParams* params) {
86
86
  self->common = common;
87
- self->extra = common->extra;
87
+ self->extra[0] = common->extra[0];
88
+ self->extra[1] = common->extra[1];
88
89
 
89
90
  self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
90
91
  }
@@ -92,9 +93,9 @@ static void FN(Initialize)(
92
93
  static void FN(Prepare)(
93
94
  HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
94
95
  size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
95
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
96
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
97
- uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
96
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
97
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
98
+ uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
98
99
  /* Partial preparation is 100 times slower (per socket). */
99
100
  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
100
101
  if (one_shot && input_size <= partial_prepare_threshold) {
@@ -116,24 +117,25 @@ static void FN(Prepare)(
116
117
  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
117
118
  }
118
119
 
119
- static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
120
+ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
120
121
  const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
121
- size_t input_size) {
122
+ size_t input_size, size_t* alloc_size) {
122
123
  BROTLI_UNUSED(params);
123
124
  BROTLI_UNUSED(one_shot);
124
125
  BROTLI_UNUSED(input_size);
125
- return sizeof(uint32_t) * BUCKET_SIZE + sizeof(uint16_t) * BUCKET_SIZE +
126
- sizeof(uint8_t) * 65536 + sizeof(FN(Bank)) * NUM_BANKS;
126
+ alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
127
+ sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
128
+ alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
127
129
  }
128
130
 
129
131
  /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
130
132
  node to corresponding chain; also update tiny_hash for current position. */
131
133
  static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
132
134
  const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
133
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
134
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
135
- uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
136
- FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
135
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
136
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
137
+ uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
138
+ FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
137
139
  const size_t key = FN(HashBytes)(&data[ix & mask]);
138
140
  const size_t bank = key & (NUM_BANKS - 1);
139
141
  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
@@ -196,10 +198,10 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
196
198
  const size_t cur_ix, const size_t max_length, const size_t max_backward,
197
199
  const size_t dictionary_distance, const size_t max_distance,
198
200
  HasherSearchResult* BROTLI_RESTRICT out) {
199
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
200
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
201
- uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra);
202
- FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
201
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
202
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
203
+ uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
204
+ FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
203
205
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
204
206
  /* Don't accept a short copy from far away. */
205
207
  score_t min_score = out->score;