brotli 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +6 -3
  3. data/.github/workflows/publish.yml +7 -17
  4. data/.gitmodules +1 -1
  5. data/README.md +2 -2
  6. data/ext/brotli/brotli.c +8 -0
  7. data/ext/brotli/extconf.rb +6 -0
  8. data/lib/brotli/version.rb +1 -1
  9. data/test/brotli_test.rb +14 -1
  10. data/test/test_helper.rb +1 -0
  11. data/vendor/brotli/c/common/constants.c +1 -1
  12. data/vendor/brotli/c/common/constants.h +2 -1
  13. data/vendor/brotli/c/common/context.c +1 -1
  14. data/vendor/brotli/c/common/dictionary.c +5 -3
  15. data/vendor/brotli/c/common/platform.c +2 -1
  16. data/vendor/brotli/c/common/platform.h +60 -113
  17. data/vendor/brotli/c/common/shared_dictionary.c +521 -0
  18. data/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  19. data/vendor/brotli/c/common/transform.c +1 -1
  20. data/vendor/brotli/c/common/version.h +31 -6
  21. data/vendor/brotli/c/dec/bit_reader.c +10 -8
  22. data/vendor/brotli/c/dec/bit_reader.h +172 -100
  23. data/vendor/brotli/c/dec/decode.c +467 -200
  24. data/vendor/brotli/c/dec/huffman.c +7 -4
  25. data/vendor/brotli/c/dec/huffman.h +2 -1
  26. data/vendor/brotli/c/dec/prefix.h +2 -1
  27. data/vendor/brotli/c/dec/state.c +33 -9
  28. data/vendor/brotli/c/dec/state.h +70 -35
  29. data/vendor/brotli/c/enc/backward_references.c +81 -19
  30. data/vendor/brotli/c/enc/backward_references.h +5 -4
  31. data/vendor/brotli/c/enc/backward_references_hq.c +148 -52
  32. data/vendor/brotli/c/enc/backward_references_hq.h +6 -5
  33. data/vendor/brotli/c/enc/backward_references_inc.h +31 -5
  34. data/vendor/brotli/c/enc/bit_cost.c +8 -7
  35. data/vendor/brotli/c/enc/bit_cost.h +5 -4
  36. data/vendor/brotli/c/enc/block_splitter.c +37 -14
  37. data/vendor/brotli/c/enc/block_splitter.h +5 -4
  38. data/vendor/brotli/c/enc/block_splitter_inc.h +86 -45
  39. data/vendor/brotli/c/enc/brotli_bit_stream.c +132 -110
  40. data/vendor/brotli/c/enc/brotli_bit_stream.h +11 -6
  41. data/vendor/brotli/c/enc/cluster.c +10 -9
  42. data/vendor/brotli/c/enc/cluster.h +7 -6
  43. data/vendor/brotli/c/enc/cluster_inc.h +25 -20
  44. data/vendor/brotli/c/enc/command.c +1 -1
  45. data/vendor/brotli/c/enc/command.h +5 -4
  46. data/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  47. data/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  48. data/vendor/brotli/c/enc/compress_fragment.c +93 -83
  49. data/vendor/brotli/c/enc/compress_fragment.h +32 -7
  50. data/vendor/brotli/c/enc/compress_fragment_two_pass.c +99 -87
  51. data/vendor/brotli/c/enc/compress_fragment_two_pass.h +21 -3
  52. data/vendor/brotli/c/enc/dictionary_hash.c +3 -1
  53. data/vendor/brotli/c/enc/encode.c +473 -404
  54. data/vendor/brotli/c/enc/encoder_dict.c +611 -4
  55. data/vendor/brotli/c/enc/encoder_dict.h +117 -3
  56. data/vendor/brotli/c/enc/entropy_encode.c +3 -2
  57. data/vendor/brotli/c/enc/entropy_encode.h +2 -1
  58. data/vendor/brotli/c/enc/entropy_encode_static.h +5 -2
  59. data/vendor/brotli/c/enc/fast_log.c +1 -1
  60. data/vendor/brotli/c/enc/fast_log.h +2 -1
  61. data/vendor/brotli/c/enc/find_match_length.h +15 -22
  62. data/vendor/brotli/c/enc/hash.h +285 -45
  63. data/vendor/brotli/c/enc/hash_composite_inc.h +26 -11
  64. data/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +20 -18
  65. data/vendor/brotli/c/enc/hash_longest_match64_inc.h +34 -39
  66. data/vendor/brotli/c/enc/hash_longest_match_inc.h +6 -10
  67. data/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -4
  68. data/vendor/brotli/c/enc/hash_rolling_inc.h +4 -4
  69. data/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +6 -5
  70. data/vendor/brotli/c/enc/histogram.c +4 -4
  71. data/vendor/brotli/c/enc/histogram.h +7 -6
  72. data/vendor/brotli/c/enc/literal_cost.c +20 -15
  73. data/vendor/brotli/c/enc/literal_cost.h +4 -2
  74. data/vendor/brotli/c/enc/memory.c +29 -5
  75. data/vendor/brotli/c/enc/memory.h +19 -2
  76. data/vendor/brotli/c/enc/metablock.c +72 -58
  77. data/vendor/brotli/c/enc/metablock.h +9 -8
  78. data/vendor/brotli/c/enc/metablock_inc.h +8 -6
  79. data/vendor/brotli/c/enc/params.h +4 -3
  80. data/vendor/brotli/c/enc/prefix.h +3 -2
  81. data/vendor/brotli/c/enc/quality.h +40 -3
  82. data/vendor/brotli/c/enc/ringbuffer.h +4 -3
  83. data/vendor/brotli/c/enc/state.h +104 -0
  84. data/vendor/brotli/c/enc/static_dict.c +60 -4
  85. data/vendor/brotli/c/enc/static_dict.h +3 -2
  86. data/vendor/brotli/c/enc/static_dict_lut.h +2 -0
  87. data/vendor/brotli/c/enc/utf8_util.c +1 -1
  88. data/vendor/brotli/c/enc/utf8_util.h +2 -1
  89. data/vendor/brotli/c/enc/write_bits.h +2 -1
  90. data/vendor/brotli/c/include/brotli/decode.h +67 -2
  91. data/vendor/brotli/c/include/brotli/encode.h +55 -2
  92. data/vendor/brotli/c/include/brotli/port.h +28 -11
  93. data/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  94. metadata +9 -3
@@ -7,15 +7,56 @@
7
7
  #ifndef BROTLI_ENC_ENCODER_DICT_H_
8
8
  #define BROTLI_ENC_ENCODER_DICT_H_
9
9
 
10
+ #include <brotli/shared_dictionary.h>
11
+ #include <brotli/types.h>
12
+
10
13
  #include "../common/dictionary.h"
11
14
  #include "../common/platform.h"
12
- #include <brotli/types.h>
13
- #include "./static_dict_lut.h"
15
+ #include "compound_dictionary.h"
16
+ #include "memory.h"
17
+ #include "static_dict_lut.h"
14
18
 
15
19
  #if defined(__cplusplus) || defined(c_plusplus)
16
20
  extern "C" {
17
21
  #endif
18
22
 
23
+ /*
24
+ Dictionary hierarchy for Encoder:
25
+ -SharedEncoderDictionary
26
+ --CompoundDictionary
27
+ ---PreparedDictionary [up to 15x]
28
+ = prefix dictionary with precomputed hashes
29
+ --ContextualEncoderDictionary
30
+ ---BrotliEncoderDictionary [up to 64x]
31
+ = for each context, precomputed static dictionary with words + transforms
32
+
33
+ Dictionary hiearchy from common: similar, but without precomputed hashes
34
+ -BrotliSharedDictionary
35
+ --BrotliDictionary [up to 64x]
36
+ --BrotliTransforms [up to 64x]
37
+ --const uint8_t* prefix [up to 15x]: compound dictionaries
38
+ */
39
+
40
+ typedef struct BrotliTrieNode {
41
+ uint8_t single; /* if 1, sub is a single node for c instead of 256 */
42
+ uint8_t c;
43
+ uint8_t len_; /* untransformed length */
44
+ uint32_t idx_; /* word index + num words * transform index */
45
+ uint32_t sub; /* index of sub node(s) in the pool */
46
+ } BrotliTrieNode;
47
+
48
+ typedef struct BrotliTrie {
49
+ BrotliTrieNode* pool;
50
+ size_t pool_capacity;
51
+ size_t pool_size;
52
+ BrotliTrieNode root;
53
+ } BrotliTrie;
54
+
55
+ #if defined(BROTLI_EXPERIMENTAL)
56
+ BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
57
+ const BrotliTrieNode* node, uint8_t c);
58
+ #endif /* BROTLI_EXPERIMENTAL */
59
+
19
60
  /* Dictionary data (words and transforms) for 1 possible context */
20
61
  typedef struct BrotliEncoderDictionary {
21
62
  const BrotliDictionary* words;
@@ -32,9 +73,82 @@ typedef struct BrotliEncoderDictionary {
32
73
  /* from static_dict_lut.h, for slow encoder */
33
74
  const uint16_t* buckets;
34
75
  const DictWord* dict_words;
76
+ /* Heavy version, for use by slow encoder when there are custom transforms.
77
+ Contains every possible transformed dictionary word in a trie. It encodes
78
+ about as fast as the non-heavy encoder but consumes a lot of memory and
79
+ takes time to build. */
80
+ BrotliTrie trie;
81
+ BROTLI_BOOL has_words_heavy;
82
+
83
+ /* Reference to other dictionaries. */
84
+ const struct ContextualEncoderDictionary* parent;
85
+
86
+ /* Allocated memory, used only when not using the Brotli defaults */
87
+ uint16_t* hash_table_data_words_;
88
+ uint8_t* hash_table_data_lengths_;
89
+ size_t buckets_alloc_size_;
90
+ uint16_t* buckets_data_;
91
+ size_t dict_words_alloc_size_;
92
+ DictWord* dict_words_data_;
93
+ BrotliDictionary* words_instance_;
35
94
  } BrotliEncoderDictionary;
36
95
 
37
- BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
96
+ /* Dictionary data for all 64 contexts */
97
+ typedef struct ContextualEncoderDictionary {
98
+ BROTLI_BOOL context_based;
99
+ uint8_t num_dictionaries;
100
+ uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
101
+ const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
102
+
103
+ /* If num_instances_ is 1, instance_ is used, else dynamic allocation with
104
+ instances_ is used. */
105
+ size_t num_instances_;
106
+ BrotliEncoderDictionary instance_;
107
+ BrotliEncoderDictionary* instances_;
108
+ } ContextualEncoderDictionary;
109
+
110
+ typedef struct SharedEncoderDictionary {
111
+ /* Magic value to distinguish this struct from PreparedDictionary for
112
+ certain external usages. */
113
+ uint32_t magic;
114
+
115
+ /* LZ77 prefix, compound dictionary */
116
+ CompoundDictionary compound;
117
+
118
+ /* Custom static dictionary (optionally context-based) */
119
+ ContextualEncoderDictionary contextual;
120
+
121
+ /* The maximum quality the dictionary was computed for */
122
+ int max_quality;
123
+ } SharedEncoderDictionary;
124
+
125
+ typedef struct ManagedDictionary {
126
+ uint32_t magic;
127
+ MemoryManager memory_manager_;
128
+ uint32_t* dictionary;
129
+ } ManagedDictionary;
130
+
131
+ /* Initializes to the brotli built-in dictionary */
132
+ BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
133
+ SharedEncoderDictionary* dict);
134
+
135
+ #if defined(BROTLI_EXPERIMENTAL)
136
+ /* Initializes to shared dictionary that will be parsed from
137
+ encoded_dict. Requires that you keep the encoded_dict buffer
138
+ around, parts of data will point to it. */
139
+ BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
140
+ MemoryManager* m, const uint8_t* encoded_dict, size_t size,
141
+ int quality, SharedEncoderDictionary* dict);
142
+ #endif /* BROTLI_EXPERIMENTAL */
143
+
144
+ BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
145
+ MemoryManager* m, SharedEncoderDictionary* dict);
146
+
147
+ BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
148
+ brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
149
+
150
+ BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
151
+ ManagedDictionary* dictionary);
38
152
 
39
153
  #if defined(__cplusplus) || defined(c_plusplus)
40
154
  } /* extern "C" */
@@ -6,13 +6,14 @@
6
6
 
7
7
  /* Entropy encoding (Huffman) utilities. */
8
8
 
9
- #include "./entropy_encode.h"
9
+ #include "entropy_encode.h"
10
10
 
11
11
  #include <string.h> /* memset */
12
12
 
13
+ #include <brotli/types.h>
14
+
13
15
  #include "../common/constants.h"
14
16
  #include "../common/platform.h"
15
- #include <brotli/types.h>
16
17
 
17
18
  #if defined(__cplusplus) || defined(c_plusplus)
18
19
  extern "C" {
@@ -9,9 +9,10 @@
9
9
  #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
10
10
  #define BROTLI_ENC_ENTROPY_ENCODE_H_
11
11
 
12
- #include "../common/platform.h"
13
12
  #include <brotli/types.h>
14
13
 
14
+ #include "../common/platform.h"
15
+
15
16
  #if defined(__cplusplus) || defined(c_plusplus)
16
17
  extern "C" {
17
18
  #endif
@@ -9,10 +9,11 @@
9
9
  #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
10
10
  #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
11
11
 
12
+ #include <brotli/types.h>
13
+
12
14
  #include "../common/constants.h"
13
15
  #include "../common/platform.h"
14
- #include <brotli/types.h>
15
- #include "./write_bits.h"
16
+ #include "write_bits.h"
16
17
 
17
18
  #if defined(__cplusplus) || defined(c_plusplus)
18
19
  extern "C" {
@@ -76,6 +77,7 @@ static const uint8_t kStaticDistanceCodeDepth[64] = {
76
77
  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
77
78
  };
78
79
 
80
+ /* GENERATED CODE START */
79
81
  static const uint32_t kCodeLengthBits[18] = {
80
82
  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
81
83
  };
@@ -531,6 +533,7 @@ static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
531
533
  size_t* storage_ix, uint8_t* storage) {
532
534
  BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
533
535
  }
536
+ /* GENERATED CODE END */
534
537
 
535
538
  #if defined(__cplusplus) || defined(c_plusplus)
536
539
  } /* extern "C" */
@@ -4,7 +4,7 @@
4
4
  See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
5
  */
6
6
 
7
- #include "./fast_log.h"
7
+ #include "fast_log.h"
8
8
 
9
9
  #if defined(__cplusplus) || defined(c_plusplus)
10
10
  extern "C" {
@@ -11,9 +11,10 @@
11
11
 
12
12
  #include <math.h>
13
13
 
14
- #include "../common/platform.h"
15
14
  #include <brotli/types.h>
16
15
 
16
+ #include "../common/platform.h"
17
+
17
18
  #if defined(__cplusplus) || defined(c_plusplus)
18
19
  extern "C" {
19
20
  #endif
@@ -9,9 +9,10 @@
9
9
  #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
10
10
  #define BROTLI_ENC_FIND_MATCH_LENGTH_H_
11
11
 
12
- #include "../common/platform.h"
13
12
  #include <brotli/types.h>
14
13
 
14
+ #include "../common/platform.h"
15
+
15
16
  #if defined(__cplusplus) || defined(c_plusplus)
16
17
  extern "C" {
17
18
  #endif
@@ -21,31 +22,23 @@ extern "C" {
21
22
  static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
22
23
  const uint8_t* s2,
23
24
  size_t limit) {
24
- size_t matched = 0;
25
- size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */
26
- while (BROTLI_PREDICT_TRUE(--limit2)) {
27
- if (BROTLI_PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64LE(s2) ==
28
- BROTLI_UNALIGNED_LOAD64LE(s1 + matched))) {
29
- s2 += 8;
30
- matched += 8;
31
- } else {
32
- uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
33
- BROTLI_UNALIGNED_LOAD64LE(s1 + matched);
25
+ const uint8_t *s1_orig = s1;
26
+ for (; limit >= 8; limit -= 8) {
27
+ uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
28
+ BROTLI_UNALIGNED_LOAD64LE(s1);
29
+ s2 += 8;
30
+ if (x != 0) {
34
31
  size_t matching_bits = (size_t)BROTLI_TZCNT64(x);
35
- matched += matching_bits >> 3;
36
- return matched;
32
+ return (size_t)(s1 - s1_orig) + (matching_bits >> 3);
37
33
  }
34
+ s1 += 8;
38
35
  }
39
- limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */
40
- while (--limit) {
41
- if (BROTLI_PREDICT_TRUE(s1[matched] == *s2)) {
42
- ++s2;
43
- ++matched;
44
- } else {
45
- return matched;
46
- }
36
+ while (limit && *s1 == *s2) {
37
+ limit--;
38
+ ++s2;
39
+ ++s1;
47
40
  }
48
- return matched;
41
+ return (size_t)(s1 - s1_orig);
49
42
  }
50
43
  #else
51
44
  static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,