brotli 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +6 -3
  3. data/.github/workflows/publish.yml +7 -17
  4. data/.gitmodules +1 -1
  5. data/README.md +2 -2
  6. data/ext/brotli/brotli.c +8 -0
  7. data/ext/brotli/extconf.rb +6 -0
  8. data/lib/brotli/version.rb +1 -1
  9. data/test/brotli_test.rb +14 -1
  10. data/test/test_helper.rb +1 -0
  11. data/vendor/brotli/c/common/constants.c +1 -1
  12. data/vendor/brotli/c/common/constants.h +2 -1
  13. data/vendor/brotli/c/common/context.c +1 -1
  14. data/vendor/brotli/c/common/dictionary.c +5 -3
  15. data/vendor/brotli/c/common/platform.c +2 -1
  16. data/vendor/brotli/c/common/platform.h +60 -113
  17. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  18. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  19. data/vendor/brotli/c/common/transform.c +1 -1
  20. data/vendor/brotli/c/common/version.h +31 -6
  21. data/vendor/brotli/c/dec/bit_reader.c +10 -8
  22. data/vendor/brotli/c/dec/bit_reader.h +172 -100
  23. data/vendor/brotli/c/dec/decode.c +467 -200
  24. data/vendor/brotli/c/dec/huffman.c +7 -4
  25. data/vendor/brotli/c/dec/huffman.h +2 -1
  26. data/vendor/brotli/c/dec/prefix.h +2 -1
  27. data/vendor/brotli/c/dec/state.c +33 -9
  28. data/vendor/brotli/c/dec/state.h +70 -35
  29. data/vendor/brotli/c/enc/backward_references.c +81 -19
  30. data/vendor/brotli/c/enc/backward_references.h +5 -4
  31. data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
  32. data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
  33. data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
  34. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  35. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  36. data/vendor/brotli/c/enc/block_splitter.c +37 -14
  37. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  38. data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
  39. data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
  40. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  41. data/vendor/brotli/c/enc/cluster.c +10 -9
  42. data/vendor/brotli/c/enc/cluster.h +7 -6
  43. data/vendor/brotli/c/enc/cluster_inc.h +25 -20
  44. data/vendor/brotli/c/enc/command.c +1 -1
  45. data/vendor/brotli/c/enc/command.h +5 -4
  46. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  47. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  48. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  49. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  50. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
  51. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  52. data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
  53. data/vendor/brotli/c/enc/encode.c +473 -404
  54. data/vendor/brotli/c/enc/encoder_dict.c +611 -4
  55. data/vendor/brotli/c/enc/encoder_dict.h +117 -3
  56. data/vendor/brotli/c/enc/entropy_encode.c +3 -2
  57. data/vendor/brotli/c/enc/entropy_encode.h +2 -1
  58. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  59. data/vendor/brotli/c/enc/fast_log.c +1 -1
  60. data/vendor/brotli/c/enc/fast_log.h +2 -1
  61. data/vendor/brotli/c/enc/find_match_length.h +15 -22
  62. data/vendor/brotli/c/enc/hash.h +285 -45
  63. data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
  64. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
  65. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
  66. data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
  67. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
  68. data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
  69. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
  70. data/vendor/brotli/c/enc/histogram.c +4 -4
  71. data/vendor/brotli/c/enc/histogram.h +7 -6
  72. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  73. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  74. data/vendor/brotli/c/enc/memory.c +29 -5
  75. data/vendor/brotli/c/enc/memory.h +19 -2
  76. data/vendor/brotli/c/enc/metablock.c +72 -58
  77. data/vendor/brotli/c/enc/metablock.h +9 -8
  78. data/vendor/brotli/c/enc/metablock_inc.h +8 -6
  79. data/vendor/brotli/c/enc/params.h +4 -3
  80. data/vendor/brotli/c/enc/prefix.h +3 -2
  81. data/vendor/brotli/c/enc/quality.h +40 -3
  82. data/vendor/brotli/c/enc/ringbuffer.h +4 -3
  83. data/vendor/brotli/c/enc/state.h +104 -0
  84. data/vendor/brotli/c/enc/static_dict.c +60 -4
  85. data/vendor/brotli/c/enc/static_dict.h +3 -2
  86. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  87. data/vendor/brotli/c/enc/utf8_util.c +1 -1
  88. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  89. data/vendor/brotli/c/enc/write_bits.h +2 -1
  90. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  91. data/vendor/brotli/c/include/brotli/encode.h +55 -2
  92. data/vendor/brotli/c/include/brotli/port.h +28 -11
  93. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  94. metadata +9 -3
@@ -10,33 +10,49 @@
10
10
  #ifndef BROTLI_ENC_HASH_H_
11
11
  #define BROTLI_ENC_HASH_H_
12
12
 
13
+ #include <stdlib.h> /* exit */
13
14
  #include <string.h> /* memcmp, memset */
14
15
 
16
+ #include <brotli/types.h>
17
+
15
18
  #include "../common/constants.h"
16
19
  #include "../common/dictionary.h"
17
20
  #include "../common/platform.h"
18
- #include <brotli/types.h>
19
- #include "./encoder_dict.h"
20
- #include "./fast_log.h"
21
- #include "./find_match_length.h"
22
- #include "./memory.h"
23
- #include "./quality.h"
24
- #include "./static_dict.h"
21
+ #include "compound_dictionary.h"
22
+ #include "encoder_dict.h"
23
+ #include "fast_log.h"
24
+ #include "find_match_length.h"
25
+ #include "memory.h"
26
+ #include "quality.h"
27
+ #include "static_dict.h"
25
28
 
26
29
  #if defined(__cplusplus) || defined(c_plusplus)
27
30
  extern "C" {
28
31
  #endif
29
32
 
30
33
  typedef struct {
31
- /* Dynamically allocated area; first member for quickest access. */
32
- void* extra;
34
+ /**
35
+ * Dynamically allocated areas; regular hasher uses one or two allocations;
36
+ * "composite" hasher uses up to 4 allocations.
37
+ */
38
+ void* extra[4];
39
+
40
+ /**
41
+ * False before the fisrt invocation of HasherSetup (where "extra" memory)
42
+ * is allocated.
43
+ */
44
+ BROTLI_BOOL is_setup_;
33
45
 
34
46
  size_t dict_num_lookups;
35
47
  size_t dict_num_matches;
36
48
 
37
49
  BrotliHasherParams params;
38
50
 
39
- /* False if hasher needs to be "prepared" before use. */
51
+ /**
52
+ * False if hasher needs to be "prepared" before use (before the first
53
+ * invocation of HasherSetup or after HasherReset). "preparation" is hasher
54
+ * data initialization (using input ringbuffer).
55
+ */
40
56
  BROTLI_BOOL is_prepared_;
41
57
  } HasherCommon;
42
58
 
@@ -62,8 +78,7 @@ typedef struct HasherSearchResult {
62
78
  for this use.
63
79
  * The number has been tuned heuristically against compression benchmarks. */
64
80
  static const uint32_t kHashMul32 = 0x1E35A7BD;
65
- static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
66
- static const uint64_t kHashMul64Long =
81
+ static const uint64_t kHashMul64 =
67
82
  BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
68
83
 
69
84
  static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
@@ -232,7 +247,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
232
247
  #define BUCKET_BITS 17
233
248
  #define MAX_TREE_SEARCH_DEPTH 64
234
249
  #define MAX_TREE_COMP_LENGTH 128
235
- #include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
250
+ #include "hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
236
251
  #undef MAX_TREE_SEARCH_DEPTH
237
252
  #undef MAX_TREE_COMP_LENGTH
238
253
  #undef BUCKET_BITS
@@ -249,7 +264,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
249
264
  #define BUCKET_SWEEP_BITS 0
250
265
  #define HASH_LEN 5
251
266
  #define USE_DICTIONARY 1
252
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
267
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
253
268
  #undef BUCKET_SWEEP_BITS
254
269
  #undef USE_DICTIONARY
255
270
  #undef HASHER
@@ -257,7 +272,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
257
272
  #define HASHER() H3
258
273
  #define BUCKET_SWEEP_BITS 1
259
274
  #define USE_DICTIONARY 0
260
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
275
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
261
276
  #undef USE_DICTIONARY
262
277
  #undef BUCKET_SWEEP_BITS
263
278
  #undef BUCKET_BITS
@@ -267,7 +282,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
267
282
  #define BUCKET_BITS 17
268
283
  #define BUCKET_SWEEP_BITS 2
269
284
  #define USE_DICTIONARY 1
270
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
285
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
271
286
  #undef USE_DICTIONARY
272
287
  #undef HASH_LEN
273
288
  #undef BUCKET_SWEEP_BITS
@@ -275,11 +290,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
275
290
  #undef HASHER
276
291
 
277
292
  #define HASHER() H5
278
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
293
+ #include "hash_longest_match_inc.h" /* NOLINT(build/include) */
279
294
  #undef HASHER
280
295
 
281
296
  #define HASHER() H6
282
- #include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
297
+ #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */
283
298
  #undef HASHER
284
299
 
285
300
  #define BUCKET_BITS 15
@@ -288,13 +303,13 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
288
303
  #define NUM_BANKS 1
289
304
  #define BANK_BITS 16
290
305
  #define HASHER() H40
291
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
306
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
292
307
  #undef HASHER
293
308
  #undef NUM_LAST_DISTANCES_TO_CHECK
294
309
 
295
310
  #define NUM_LAST_DISTANCES_TO_CHECK 10
296
311
  #define HASHER() H41
297
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
312
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
298
313
  #undef HASHER
299
314
  #undef NUM_LAST_DISTANCES_TO_CHECK
300
315
  #undef NUM_BANKS
@@ -304,7 +319,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
304
319
  #define NUM_BANKS 512
305
320
  #define BANK_BITS 9
306
321
  #define HASHER() H42
307
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
322
+ #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
308
323
  #undef HASHER
309
324
  #undef NUM_LAST_DISTANCES_TO_CHECK
310
325
  #undef NUM_BANKS
@@ -317,7 +332,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
317
332
  #define BUCKET_SWEEP_BITS 2
318
333
  #define HASH_LEN 7
319
334
  #define USE_DICTIONARY 0
320
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
335
+ #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
321
336
  #undef USE_DICTIONARY
322
337
  #undef HASH_LEN
323
338
  #undef BUCKET_SWEEP_BITS
@@ -331,14 +346,14 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
331
346
  #define JUMP 4
332
347
  #define NUMBUCKETS 16777216
333
348
  #define MASK ((NUMBUCKETS * 64) - 1)
334
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
349
+ #include "hash_rolling_inc.h" /* NOLINT(build/include) */
335
350
  #undef JUMP
336
351
  #undef HASHER
337
352
 
338
353
 
339
354
  #define HASHER() HROLLING
340
355
  #define JUMP 1
341
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
356
+ #include "hash_rolling_inc.h" /* NOLINT(build/include) */
342
357
  #undef MASK
343
358
  #undef NUMBUCKETS
344
359
  #undef JUMP
@@ -348,7 +363,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
348
363
  #define HASHER() H35
349
364
  #define HASHER_A H3
350
365
  #define HASHER_B HROLLING_FAST
351
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
366
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
352
367
  #undef HASHER_A
353
368
  #undef HASHER_B
354
369
  #undef HASHER
@@ -356,7 +371,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
356
371
  #define HASHER() H55
357
372
  #define HASHER_A H54
358
373
  #define HASHER_B HROLLING_FAST
359
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
374
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
360
375
  #undef HASHER_A
361
376
  #undef HASHER_B
362
377
  #undef HASHER
@@ -364,7 +379,7 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
364
379
  #define HASHER() H65
365
380
  #define HASHER_A H6
366
381
  #define HASHER_B HROLLING
367
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
382
+ #include "hash_composite_inc.h" /* NOLINT(build/include) */
368
383
  #undef HASHER_A
369
384
  #undef HASHER_B
370
385
  #undef HASHER
@@ -391,43 +406,55 @@ typedef struct {
391
406
 
392
407
  /* MUST be invoked before any other method. */
393
408
  static BROTLI_INLINE void HasherInit(Hasher* hasher) {
394
- hasher->common.extra = NULL;
409
+ hasher->common.is_setup_ = BROTLI_FALSE;
410
+ hasher->common.extra[0] = NULL;
411
+ hasher->common.extra[1] = NULL;
412
+ hasher->common.extra[2] = NULL;
413
+ hasher->common.extra[3] = NULL;
395
414
  }
396
415
 
397
416
  static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
398
- if (hasher->common.extra == NULL) return;
399
- BROTLI_FREE(m, hasher->common.extra);
417
+ if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]);
418
+ if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]);
419
+ if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]);
420
+ if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]);
400
421
  }
401
422
 
402
423
  static BROTLI_INLINE void HasherReset(Hasher* hasher) {
403
424
  hasher->common.is_prepared_ = BROTLI_FALSE;
404
425
  }
405
426
 
406
- static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
407
- BROTLI_BOOL one_shot, const size_t input_size) {
427
+ static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params,
428
+ BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) {
408
429
  switch (params->hasher.type) {
409
- #define SIZE_(N) \
410
- case N: \
411
- return HashMemAllocInBytesH ## N(params, one_shot, input_size);
430
+ #define SIZE_(N) \
431
+ case N: \
432
+ HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
433
+ break;
412
434
  FOR_ALL_HASHERS(SIZE_)
413
435
  #undef SIZE_
414
436
  default:
415
437
  break;
416
438
  }
417
- return 0; /* Default case. */
418
439
  }
419
440
 
420
441
  static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
421
442
  BrotliEncoderParams* params, const uint8_t* data, size_t position,
422
443
  size_t input_size, BROTLI_BOOL is_last) {
423
444
  BROTLI_BOOL one_shot = (position == 0 && is_last);
424
- if (hasher->common.extra == NULL) {
425
- size_t alloc_size;
445
+ if (!hasher->common.is_setup_) {
446
+ size_t alloc_size[4] = {0};
447
+ size_t i;
426
448
  ChooseHasher(params, &params->hasher);
427
- alloc_size = HasherSize(params, one_shot, input_size);
428
- hasher->common.extra = BROTLI_ALLOC(m, uint8_t, alloc_size);
429
- if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra)) return;
430
449
  hasher->common.params = params->hasher;
450
+ hasher->common.dict_num_lookups = 0;
451
+ hasher->common.dict_num_matches = 0;
452
+ HasherSize(params, one_shot, input_size, alloc_size);
453
+ for (i = 0; i < 4; ++i) {
454
+ if (alloc_size[i] == 0) continue;
455
+ hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
456
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
457
+ }
431
458
  switch (hasher->common.params.type) {
432
459
  #define INITIALIZE_(N) \
433
460
  case N: \
@@ -440,6 +467,7 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
440
467
  break;
441
468
  }
442
469
  HasherReset(hasher);
470
+ hasher->common.is_setup_ = BROTLI_TRUE;
443
471
  }
444
472
 
445
473
  if (!hasher->common.is_prepared_) {
@@ -454,10 +482,6 @@ static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
454
482
  #undef PREPARE_
455
483
  default: break;
456
484
  }
457
- if (position == 0) {
458
- hasher->common.dict_num_lookups = 0;
459
- hasher->common.dict_num_matches = 0;
460
- }
461
485
  hasher->common.is_prepared_ = BROTLI_TRUE;
462
486
  }
463
487
  }
@@ -481,6 +505,222 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
481
505
  }
482
506
  }
483
507
 
508
+ /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
509
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
510
+ static BROTLI_INLINE void FindCompoundDictionaryMatch(
511
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
512
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
513
+ const size_t cur_ix, const size_t max_length, const size_t distance_offset,
514
+ const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
515
+ const uint32_t source_size = self->source_size;
516
+ const size_t boundary = distance_offset - source_size;
517
+ const uint32_t hash_bits = self->hash_bits;
518
+ const uint32_t bucket_bits = self->bucket_bits;
519
+ const uint32_t slot_bits = self->slot_bits;
520
+
521
+ const uint32_t hash_shift = 64u - bucket_bits;
522
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
523
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524
+
525
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
528
+ const uint8_t* source = NULL;
529
+
530
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
531
+ score_t best_score = out->score;
532
+ size_t best_len = out->len;
533
+ size_t i;
534
+ const uint64_t h =
535
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
536
+ kPreparedDictionaryHashMul64Long;
537
+ const uint32_t key = (uint32_t)(h >> hash_shift);
538
+ const uint32_t slot = key & slot_mask;
539
+ const uint32_t head = heads[key];
540
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
541
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
542
+
543
+ const void* tail = (void*)&items[self->num_items];
544
+ if (self->magic == kPreparedDictionaryMagic) {
545
+ source = (const uint8_t*)tail;
546
+ } else {
547
+ /* kLeanPreparedDictionaryMagic */
548
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
549
+ }
550
+
551
+ for (i = 0; i < 4; ++i) {
552
+ const size_t distance = (size_t)distance_cache[i];
553
+ size_t offset;
554
+ size_t limit;
555
+ size_t len;
556
+ if (distance <= boundary || distance > distance_offset) continue;
557
+ offset = distance_offset - distance;
558
+ limit = source_size - offset;
559
+ limit = limit > max_length ? max_length : limit;
560
+ len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
561
+ limit);
562
+ if (len >= 2) {
563
+ score_t score = BackwardReferenceScoreUsingLastDistance(len);
564
+ if (best_score < score) {
565
+ if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
566
+ if (best_score < score) {
567
+ best_score = score;
568
+ if (len > best_len) best_len = len;
569
+ out->len = len;
570
+ out->len_code_delta = 0;
571
+ out->distance = distance;
572
+ out->score = best_score;
573
+ }
574
+ }
575
+ }
576
+ }
577
+ while (item == 0) {
578
+ size_t offset;
579
+ size_t distance;
580
+ size_t limit;
581
+ item = *chain;
582
+ chain++;
583
+ offset = item & 0x7FFFFFFF;
584
+ item &= 0x80000000;
585
+ distance = distance_offset - offset;
586
+ limit = source_size - offset;
587
+ limit = (limit > max_length) ? max_length : limit;
588
+ if (distance > max_distance) continue;
589
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
590
+ best_len >= limit ||
591
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
592
+ continue;
593
+ }
594
+ {
595
+ const size_t len = FindMatchLengthWithLimit(&source[offset],
596
+ &data[cur_ix_masked],
597
+ limit);
598
+ if (len >= 4) {
599
+ score_t score = BackwardReferenceScore(len, distance);
600
+ if (best_score < score) {
601
+ best_score = score;
602
+ best_len = len;
603
+ out->len = best_len;
604
+ out->len_code_delta = 0;
605
+ out->distance = distance;
606
+ out->score = best_score;
607
+ }
608
+ }
609
+ }
610
+ }
611
+ }
612
+
613
+ /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
614
+ to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
615
+ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
616
+ const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
617
+ const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
618
+ const size_t max_length, const size_t distance_offset,
619
+ const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
620
+ const uint32_t source_size = self->source_size;
621
+ const uint32_t hash_bits = self->hash_bits;
622
+ const uint32_t bucket_bits = self->bucket_bits;
623
+ const uint32_t slot_bits = self->slot_bits;
624
+
625
+ const uint32_t hash_shift = 64u - bucket_bits;
626
+ const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
627
+ const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
628
+
629
+ const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
630
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
631
+ const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
632
+ const uint8_t* source = NULL;
633
+
634
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
635
+ size_t best_len = min_length;
636
+ const uint64_t h =
637
+ (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
638
+ kPreparedDictionaryHashMul64Long;
639
+ const uint32_t key = (uint32_t)(h >> hash_shift);
640
+ const uint32_t slot = key & slot_mask;
641
+ const uint32_t head = heads[key];
642
+ const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
643
+ uint32_t item = (head == 0xFFFF) ? 1 : 0;
644
+ size_t found = 0;
645
+
646
+ const void* tail = (void*)&items[self->num_items];
647
+ if (self->magic == kPreparedDictionaryMagic) {
648
+ source = (const uint8_t*)tail;
649
+ } else {
650
+ /* kLeanPreparedDictionaryMagic */
651
+ source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
652
+ }
653
+
654
+ while (item == 0) {
655
+ size_t offset;
656
+ size_t distance;
657
+ size_t limit;
658
+ size_t len;
659
+ item = *chain;
660
+ chain++;
661
+ offset = item & 0x7FFFFFFF;
662
+ item &= 0x80000000;
663
+ distance = distance_offset - offset;
664
+ limit = source_size - offset;
665
+ limit = (limit > max_length) ? max_length : limit;
666
+ if (distance > max_distance) continue;
667
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
668
+ best_len >= limit ||
669
+ data[cur_ix_masked + best_len] != source[offset + best_len]) {
670
+ continue;
671
+ }
672
+ len = FindMatchLengthWithLimit(
673
+ &source[offset], &data[cur_ix_masked], limit);
674
+ if (len > best_len) {
675
+ best_len = len;
676
+ InitBackwardMatch(matches++, distance, len);
677
+ found++;
678
+ if (found == match_limit) break;
679
+ }
680
+ }
681
+ return found;
682
+ }
683
+
684
+ static BROTLI_INLINE void LookupCompoundDictionaryMatch(
685
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
686
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
687
+ const size_t cur_ix, const size_t max_length,
688
+ const size_t max_ring_buffer_distance, const size_t max_distance,
689
+ HasherSearchResult* sr) {
690
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
691
+ size_t d;
692
+ for (d = 0; d < addon->num_chunks; ++d) {
693
+ /* Only one prepared dictionary type is currently supported. */
694
+ FindCompoundDictionaryMatch(
695
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
696
+ distance_cache, cur_ix, max_length,
697
+ base_offset - addon->chunk_offsets[d], max_distance, sr);
698
+ }
699
+ }
700
+
701
+ static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
702
+ const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
703
+ const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
704
+ const size_t max_length, const size_t max_ring_buffer_distance,
705
+ const size_t max_distance, BackwardMatch* matches,
706
+ size_t match_limit) {
707
+ size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
708
+ size_t d;
709
+ size_t total_found = 0;
710
+ for (d = 0; d < addon->num_chunks; ++d) {
711
+ /* Only one prepared dictionary type is currently supported. */
712
+ total_found += FindAllCompoundDictionaryMatches(
713
+ (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
714
+ cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
715
+ max_distance, matches + total_found, match_limit - total_found);
716
+ if (total_found == match_limit) break;
717
+ if (total_found > 0) {
718
+ min_length = BackwardMatchLength(&matches[total_found - 1]);
719
+ }
720
+ }
721
+ return total_found;
722
+ }
723
+
484
724
  #if defined(__cplusplus) || defined(c_plusplus)
485
725
  } /* extern "C" */
486
726
  #endif
@@ -30,10 +30,10 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
30
30
  typedef struct HashComposite {
31
31
  HASHER_A ha;
32
32
  HASHER_B hb;
33
+ HasherCommon ha_common;
33
34
  HasherCommon hb_common;
34
35
 
35
36
  /* Shortcuts. */
36
- void* extra;
37
37
  HasherCommon* common;
38
38
 
39
39
  BROTLI_BOOL fresh;
@@ -43,12 +43,12 @@ typedef struct HashComposite {
43
43
  static void FN(Initialize)(HasherCommon* common,
44
44
  HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
45
45
  self->common = common;
46
- self->extra = common->extra;
47
46
 
47
+ self->ha_common = *self->common;
48
48
  self->hb_common = *self->common;
49
49
  self->fresh = BROTLI_TRUE;
50
50
  self->params = params;
51
- /* TODO: Initialize of the hashers is defered to Prepare (and params
51
+ /* TODO(lode): Initialize of the hashers is deferred to Prepare (and params
52
52
  remembered here) because we don't get the one_shot and input_size params
53
53
  here that are needed to know the memory size of them. Instead provide
54
54
  those params to all hashers FN(Initialize) */
@@ -59,21 +59,36 @@ static void FN(Prepare)(
59
59
  size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
60
60
  if (self->fresh) {
61
61
  self->fresh = BROTLI_FALSE;
62
- self->hb_common.extra = (uint8_t*)self->extra +
63
- FN_A(HashMemAllocInBytes)(self->params, one_shot, input_size);
64
-
65
- FN_A(Initialize)(self->common, &self->ha, self->params);
62
+ self->ha_common.extra[0] = self->common->extra[0];
63
+ self->ha_common.extra[1] = self->common->extra[1];
64
+ self->ha_common.extra[2] = NULL;
65
+ self->ha_common.extra[3] = NULL;
66
+ self->hb_common.extra[0] = self->common->extra[2];
67
+ self->hb_common.extra[1] = self->common->extra[3];
68
+ self->hb_common.extra[2] = NULL;
69
+ self->hb_common.extra[3] = NULL;
70
+
71
+ FN_A(Initialize)(&self->ha_common, &self->ha, self->params);
66
72
  FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
67
73
  }
68
74
  FN_A(Prepare)(&self->ha, one_shot, input_size, data);
69
75
  FN_B(Prepare)(&self->hb, one_shot, input_size, data);
70
76
  }
71
77
 
72
- static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
78
+ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
73
79
  const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
74
- size_t input_size) {
75
- return FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
76
- FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
80
+ size_t input_size, size_t* alloc_size) {
81
+ size_t alloc_size_a[4] = {0};
82
+ size_t alloc_size_b[4] = {0};
83
+ FN_A(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_a);
84
+ FN_B(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_b);
85
+ /* Should never happen. */
86
+ if (alloc_size_a[2] != 0 || alloc_size_a[3] != 0) exit(EXIT_FAILURE);
87
+ if (alloc_size_b[2] != 0 || alloc_size_b[3] != 0) exit(EXIT_FAILURE);
88
+ alloc_size[0] = alloc_size_a[0];
89
+ alloc_size[1] = alloc_size_a[1];
90
+ alloc_size[2] = alloc_size_b[0];
91
+ alloc_size[3] = alloc_size_b[1];
77
92
  }
78
93
 
79
94
  static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
@@ -49,7 +49,7 @@ typedef struct HashForgetfulChain {
49
49
  size_t max_hops;
50
50
 
51
51
  /* Shortcuts. */
52
- void* extra;
52
+ void* extra[2];
53
53
  HasherCommon* common;
54
54
 
55
55
  /* --- Dynamic size members --- */
@@ -77,14 +77,15 @@ static uint8_t* FN(TinyHash)(void* extra) {
77
77
  }
78
78
 
79
79
  static FN(Bank)* FN(Banks)(void* extra) {
80
- return (FN(Bank)*)(&FN(TinyHash)(extra)[65536]);
80
+ return (FN(Bank)*)(extra);
81
81
  }
82
82
 
83
83
  static void FN(Initialize)(
84
84
  HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
85
85
  const BrotliEncoderParams* params) {
86
86
  self->common = common;
87
- self->extra = common->extra;
87
+ self->extra[0] = common->extra[0];
88
+ self->extra[1] = common->extra[1];
88
89
 
89
90
  self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
90
91
  }
@@ -92,9 +93,9 @@ static void FN(Initialize)(
92
93
  static void FN(Prepare)(
93
94
  HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
94
95
  size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
95
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
96
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
97
- uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
96
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
97
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
98
+ uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
98
99
  /* Partial preparation is 100 times slower (per socket). */
99
100
  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
100
101
  if (one_shot && input_size <= partial_prepare_threshold) {
@@ -116,24 +117,25 @@ static void FN(Prepare)(
116
117
  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
117
118
  }
118
119
 
119
- static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
120
+ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
120
121
  const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
121
- size_t input_size) {
122
+ size_t input_size, size_t* alloc_size) {
122
123
  BROTLI_UNUSED(params);
123
124
  BROTLI_UNUSED(one_shot);
124
125
  BROTLI_UNUSED(input_size);
125
- return sizeof(uint32_t) * BUCKET_SIZE + sizeof(uint16_t) * BUCKET_SIZE +
126
- sizeof(uint8_t) * 65536 + sizeof(FN(Bank)) * NUM_BANKS;
126
+ alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
127
+ sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
128
+ alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
127
129
  }
128
130
 
129
131
  /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
130
132
  node to corresponding chain; also update tiny_hash for current position. */
131
133
  static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
132
134
  const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
133
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
134
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
135
- uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
136
- FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
135
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
136
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
137
+ uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
138
+ FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
137
139
  const size_t key = FN(HashBytes)(&data[ix & mask]);
138
140
  const size_t bank = key & (NUM_BANKS - 1);
139
141
  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
@@ -196,10 +198,10 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
196
198
  const size_t cur_ix, const size_t max_length, const size_t max_backward,
197
199
  const size_t dictionary_distance, const size_t max_distance,
198
200
  HasherSearchResult* BROTLI_RESTRICT out) {
199
- uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
200
- uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
201
- uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra);
202
- FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
201
+ uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
202
+ uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
203
+ uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
204
+ FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
203
205
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
204
206
  /* Don't accept a short copy from far away. */
205
207
  score_t min_score = out->score;