couchbase 3.0.0.alpha.1-universal-darwin-19 → 3.0.0.alpha.2-universal-darwin-19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/tests-6.0.3.yml +49 -0
  3. data/.github/workflows/tests.yml +47 -0
  4. data/.gitmodules +3 -0
  5. data/.idea/dictionaries/gem_terms.xml +5 -0
  6. data/.idea/inspectionProfiles/Project_Default.xml +1 -0
  7. data/.idea/vcs.xml +1 -0
  8. data/Gemfile +1 -0
  9. data/README.md +55 -2
  10. data/Rakefile +18 -0
  11. data/bin/init-cluster +62 -0
  12. data/bin/setup +1 -0
  13. data/couchbase.gemspec +3 -2
  14. data/examples/crud.rb +1 -2
  15. data/examples/managing_buckets.rb +47 -0
  16. data/examples/managing_collections.rb +58 -0
  17. data/examples/managing_query_indexes.rb +63 -0
  18. data/examples/query.rb +3 -2
  19. data/examples/query_with_consistency.rb +76 -0
  20. data/examples/subdocument.rb +23 -1
  21. data/ext/.clang-format +1 -1
  22. data/ext/.idea/dictionaries/couchbase_terms.xml +2 -0
  23. data/ext/.idea/vcs.xml +1 -0
  24. data/ext/CMakeLists.txt +30 -12
  25. data/ext/build_version.hxx.in +26 -0
  26. data/ext/couchbase/bucket.hxx +69 -8
  27. data/ext/couchbase/cluster.hxx +70 -54
  28. data/ext/couchbase/collections_manifest.hxx +3 -3
  29. data/ext/couchbase/configuration.hxx +14 -0
  30. data/ext/couchbase/couchbase.cxx +2044 -383
  31. data/ext/couchbase/{operations/document_id.hxx → document_id.hxx} +5 -4
  32. data/ext/couchbase/io/http_message.hxx +5 -1
  33. data/ext/couchbase/io/http_parser.hxx +2 -1
  34. data/ext/couchbase/io/http_session.hxx +6 -3
  35. data/ext/couchbase/io/{binary_message.hxx → mcbp_message.hxx} +15 -12
  36. data/ext/couchbase/io/mcbp_parser.hxx +99 -0
  37. data/ext/couchbase/io/{key_value_session.hxx → mcbp_session.hxx} +200 -95
  38. data/ext/couchbase/io/session_manager.hxx +37 -22
  39. data/ext/couchbase/mutation_token.hxx +2 -1
  40. data/ext/couchbase/operations.hxx +38 -8
  41. data/ext/couchbase/operations/bucket_create.hxx +138 -0
  42. data/ext/couchbase/operations/bucket_drop.hxx +65 -0
  43. data/ext/couchbase/operations/bucket_flush.hxx +65 -0
  44. data/ext/couchbase/operations/bucket_get.hxx +69 -0
  45. data/ext/couchbase/operations/bucket_get_all.hxx +62 -0
  46. data/ext/couchbase/operations/bucket_settings.hxx +111 -0
  47. data/ext/couchbase/operations/bucket_update.hxx +115 -0
  48. data/ext/couchbase/operations/cluster_developer_preview_enable.hxx +60 -0
  49. data/ext/couchbase/operations/collection_create.hxx +86 -0
  50. data/ext/couchbase/operations/collection_drop.hxx +82 -0
  51. data/ext/couchbase/operations/command.hxx +10 -10
  52. data/ext/couchbase/operations/document_decrement.hxx +80 -0
  53. data/ext/couchbase/operations/document_exists.hxx +80 -0
  54. data/ext/couchbase/operations/{get.hxx → document_get.hxx} +4 -2
  55. data/ext/couchbase/operations/document_get_and_lock.hxx +64 -0
  56. data/ext/couchbase/operations/document_get_and_touch.hxx +64 -0
  57. data/ext/couchbase/operations/document_increment.hxx +80 -0
  58. data/ext/couchbase/operations/document_insert.hxx +74 -0
  59. data/ext/couchbase/operations/{lookup_in.hxx → document_lookup_in.hxx} +2 -2
  60. data/ext/couchbase/operations/{mutate_in.hxx → document_mutate_in.hxx} +11 -2
  61. data/ext/couchbase/operations/{query.hxx → document_query.hxx} +101 -6
  62. data/ext/couchbase/operations/document_remove.hxx +67 -0
  63. data/ext/couchbase/operations/document_replace.hxx +76 -0
  64. data/ext/couchbase/operations/{upsert.hxx → document_touch.hxx} +14 -14
  65. data/ext/couchbase/operations/{remove.hxx → document_unlock.hxx} +12 -10
  66. data/ext/couchbase/operations/document_upsert.hxx +74 -0
  67. data/ext/couchbase/operations/query_index_build_deferred.hxx +85 -0
  68. data/ext/couchbase/operations/query_index_create.hxx +134 -0
  69. data/ext/couchbase/operations/query_index_drop.hxx +108 -0
  70. data/ext/couchbase/operations/query_index_get_all.hxx +106 -0
  71. data/ext/couchbase/operations/scope_create.hxx +81 -0
  72. data/ext/couchbase/operations/scope_drop.hxx +79 -0
  73. data/ext/couchbase/operations/scope_get_all.hxx +72 -0
  74. data/ext/couchbase/protocol/client_opcode.hxx +35 -0
  75. data/ext/couchbase/protocol/client_request.hxx +56 -9
  76. data/ext/couchbase/protocol/client_response.hxx +52 -15
  77. data/ext/couchbase/protocol/cmd_cluster_map_change_notification.hxx +81 -0
  78. data/ext/couchbase/protocol/cmd_decrement.hxx +187 -0
  79. data/ext/couchbase/protocol/cmd_exists.hxx +171 -0
  80. data/ext/couchbase/protocol/cmd_get.hxx +31 -8
  81. data/ext/couchbase/protocol/cmd_get_and_lock.hxx +142 -0
  82. data/ext/couchbase/protocol/cmd_get_and_touch.hxx +142 -0
  83. data/ext/couchbase/protocol/cmd_get_cluster_config.hxx +16 -3
  84. data/ext/couchbase/protocol/cmd_get_collections_manifest.hxx +16 -3
  85. data/ext/couchbase/protocol/cmd_get_error_map.hxx +16 -3
  86. data/ext/couchbase/protocol/cmd_hello.hxx +24 -8
  87. data/ext/couchbase/protocol/cmd_increment.hxx +187 -0
  88. data/ext/couchbase/protocol/cmd_info.hxx +1 -0
  89. data/ext/couchbase/protocol/cmd_insert.hxx +172 -0
  90. data/ext/couchbase/protocol/cmd_lookup_in.hxx +28 -13
  91. data/ext/couchbase/protocol/cmd_mutate_in.hxx +65 -13
  92. data/ext/couchbase/protocol/cmd_remove.hxx +59 -4
  93. data/ext/couchbase/protocol/cmd_replace.hxx +172 -0
  94. data/ext/couchbase/protocol/cmd_sasl_auth.hxx +15 -3
  95. data/ext/couchbase/protocol/cmd_sasl_list_mechs.hxx +15 -3
  96. data/ext/couchbase/protocol/cmd_sasl_step.hxx +15 -3
  97. data/ext/couchbase/protocol/cmd_select_bucket.hxx +14 -2
  98. data/ext/couchbase/protocol/cmd_touch.hxx +102 -0
  99. data/ext/couchbase/protocol/cmd_unlock.hxx +95 -0
  100. data/ext/couchbase/protocol/cmd_upsert.hxx +50 -14
  101. data/ext/couchbase/protocol/durability_level.hxx +67 -0
  102. data/ext/couchbase/protocol/frame_info_id.hxx +187 -0
  103. data/ext/couchbase/protocol/hello_feature.hxx +137 -0
  104. data/ext/couchbase/protocol/server_opcode.hxx +57 -0
  105. data/ext/couchbase/protocol/server_request.hxx +122 -0
  106. data/ext/couchbase/protocol/unsigned_leb128.h +15 -15
  107. data/ext/couchbase/utils/byteswap.hxx +1 -2
  108. data/ext/couchbase/utils/url_codec.hxx +225 -0
  109. data/ext/couchbase/version.hxx +3 -1
  110. data/ext/extconf.rb +4 -1
  111. data/ext/test/main.cxx +37 -113
  112. data/ext/third_party/snappy/.appveyor.yml +36 -0
  113. data/ext/third_party/snappy/.gitignore +8 -0
  114. data/ext/third_party/snappy/.travis.yml +98 -0
  115. data/ext/third_party/snappy/AUTHORS +1 -0
  116. data/ext/third_party/snappy/CMakeLists.txt +345 -0
  117. data/ext/third_party/snappy/CONTRIBUTING.md +26 -0
  118. data/ext/third_party/snappy/COPYING +54 -0
  119. data/ext/third_party/snappy/NEWS +188 -0
  120. data/ext/third_party/snappy/README.md +148 -0
  121. data/ext/third_party/snappy/cmake/SnappyConfig.cmake.in +33 -0
  122. data/ext/third_party/snappy/cmake/config.h.in +59 -0
  123. data/ext/third_party/snappy/docs/README.md +72 -0
  124. data/ext/third_party/snappy/format_description.txt +110 -0
  125. data/ext/third_party/snappy/framing_format.txt +135 -0
  126. data/ext/third_party/snappy/snappy-c.cc +90 -0
  127. data/ext/third_party/snappy/snappy-c.h +138 -0
  128. data/ext/third_party/snappy/snappy-internal.h +315 -0
  129. data/ext/third_party/snappy/snappy-sinksource.cc +121 -0
  130. data/ext/third_party/snappy/snappy-sinksource.h +182 -0
  131. data/ext/third_party/snappy/snappy-stubs-internal.cc +42 -0
  132. data/ext/third_party/snappy/snappy-stubs-internal.h +493 -0
  133. data/ext/third_party/snappy/snappy-stubs-public.h.in +63 -0
  134. data/ext/third_party/snappy/snappy-test.cc +613 -0
  135. data/ext/third_party/snappy/snappy-test.h +526 -0
  136. data/ext/third_party/snappy/snappy.cc +1770 -0
  137. data/ext/third_party/snappy/snappy.h +209 -0
  138. data/ext/third_party/snappy/snappy_compress_fuzzer.cc +60 -0
  139. data/ext/third_party/snappy/snappy_uncompress_fuzzer.cc +58 -0
  140. data/ext/third_party/snappy/snappy_unittest.cc +1512 -0
  141. data/ext/third_party/snappy/testdata/alice29.txt +3609 -0
  142. data/ext/third_party/snappy/testdata/asyoulik.txt +4122 -0
  143. data/ext/third_party/snappy/testdata/baddata1.snappy +0 -0
  144. data/ext/third_party/snappy/testdata/baddata2.snappy +0 -0
  145. data/ext/third_party/snappy/testdata/baddata3.snappy +0 -0
  146. data/ext/third_party/snappy/testdata/fireworks.jpeg +0 -0
  147. data/ext/third_party/snappy/testdata/geo.protodata +0 -0
  148. data/ext/third_party/snappy/testdata/html +1 -0
  149. data/ext/third_party/snappy/testdata/html_x_4 +1 -0
  150. data/ext/third_party/snappy/testdata/kppkn.gtb +0 -0
  151. data/ext/third_party/snappy/testdata/lcet10.txt +7519 -0
  152. data/ext/third_party/snappy/testdata/paper-100k.pdf +600 -2
  153. data/ext/third_party/snappy/testdata/plrabn12.txt +10699 -0
  154. data/ext/third_party/snappy/testdata/urls.10K +10000 -0
  155. data/lib/couchbase/binary_collection.rb +33 -76
  156. data/lib/couchbase/binary_collection_options.rb +94 -0
  157. data/lib/couchbase/bucket.rb +9 -3
  158. data/lib/couchbase/cluster.rb +161 -23
  159. data/lib/couchbase/collection.rb +108 -191
  160. data/lib/couchbase/collection_options.rb +430 -0
  161. data/lib/couchbase/errors.rb +136 -134
  162. data/lib/couchbase/json_transcoder.rb +32 -0
  163. data/lib/couchbase/management/analytics_index_manager.rb +185 -9
  164. data/lib/couchbase/management/bucket_manager.rb +84 -33
  165. data/lib/couchbase/management/collection_manager.rb +166 -1
  166. data/lib/couchbase/management/query_index_manager.rb +261 -0
  167. data/lib/couchbase/management/search_index_manager.rb +291 -0
  168. data/lib/couchbase/management/user_manager.rb +12 -10
  169. data/lib/couchbase/management/view_index_manager.rb +151 -1
  170. data/lib/couchbase/mutation_state.rb +11 -1
  171. data/lib/couchbase/scope.rb +4 -4
  172. data/lib/couchbase/version.rb +1 -1
  173. metadata +113 -18
  174. data/.travis.yml +0 -7
  175. data/ext/couchbase/io/binary_parser.hxx +0 -64
  176. data/lib/couchbase/results.rb +0 -307
@@ -0,0 +1,1770 @@
1
+ // Copyright 2005 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include "snappy.h"
30
+ #include "snappy-internal.h"
31
+ #include "snappy-sinksource.h"
32
+
33
+ #if !defined(SNAPPY_HAVE_SSSE3)
34
+ // __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
35
+ // support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
36
+ // defines __AVX__ when AVX support is available.
37
+ #if defined(__SSSE3__) || defined(__AVX__)
38
+ #define SNAPPY_HAVE_SSSE3 1
39
+ #else
40
+ #define SNAPPY_HAVE_SSSE3 0
41
+ #endif
42
+ #endif // !defined(SNAPPY_HAVE_SSSE3)
43
+
44
+ #if !defined(SNAPPY_HAVE_BMI2)
45
+ // __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
46
+ // specifically, but it does define __AVX2__ when AVX2 support is available.
47
+ // Fortunately, AVX2 was introduced in Haswell, just like BMI2.
48
+ //
49
+ // BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
50
+ // GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
51
+ // case issuing BMI2 instructions results in a compiler error.
52
+ #if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
53
+ #define SNAPPY_HAVE_BMI2 1
54
+ #else
55
+ #define SNAPPY_HAVE_BMI2 0
56
+ #endif
57
+ #endif // !defined(SNAPPY_HAVE_BMI2)
58
+
59
+ #if SNAPPY_HAVE_SSSE3
60
+ // Please do not replace with <x86intrin.h>. or with headers that assume more
61
+ // advanced SSE versions without checking with all the OWNERS.
62
+ #include <tmmintrin.h>
63
+ #endif
64
+
65
+ #if SNAPPY_HAVE_BMI2
66
+ // Please do not replace with <x86intrin.h>. or with headers that assume more
67
+ // advanced SSE versions without checking with all the OWNERS.
68
+ #include <immintrin.h>
69
+ #endif
70
+
71
+ #include <algorithm>
72
+ #include <cstdio>
73
+ #include <cstring>
74
+ #include <string>
75
+ #include <vector>
76
+
77
+ namespace snappy {
78
+
79
+ // The amount of slop bytes writers are using for unconditional copies.
80
+ constexpr int kSlopBytes = 64;
81
+
82
+ using internal::char_table;
83
+ using internal::COPY_1_BYTE_OFFSET;
84
+ using internal::COPY_2_BYTE_OFFSET;
85
+ using internal::COPY_4_BYTE_OFFSET;
86
+ using internal::kMaximumTagLength;
87
+ using internal::LITERAL;
88
+
89
+ // Any hash function will produce a valid compressed bitstream, but a good
90
+ // hash function reduces the number of collisions and thus yields better
91
+ // compression for compressible input, and more speed for incompressible
92
+ // input. Of course, it doesn't hurt if the hash function is reasonably fast
93
+ // either, as it gets called a lot.
94
+ static inline uint32_t HashBytes(uint32_t bytes, int shift) {
95
+ uint32_t kMul = 0x1e35a7bd;
96
+ return (bytes * kMul) >> shift;
97
+ }
98
+
99
+ size_t MaxCompressedLength(size_t source_bytes) {
100
+ // Compressed data can be defined as:
101
+ // compressed := item* literal*
102
+ // item := literal* copy
103
+ //
104
+ // The trailing literal sequence has a space blowup of at most 62/60
105
+ // since a literal of length 60 needs one tag byte + one extra byte
106
+ // for length information.
107
+ //
108
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
109
+ // 4 bytes of data. Because of a special check in the encoding code,
110
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
111
+ // the copy op takes 3 bytes to encode, and this type of item leads
112
+ // to at most the 62/60 blowup for representing literals.
113
+ //
114
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
115
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
116
+ // worst case here is a one-byte literal followed by a five-byte copy.
117
+ // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
118
+ //
119
+ // This last factor dominates the blowup, so the final estimate is:
120
+ return 32 + source_bytes + source_bytes / 6;
121
+ }
122
+
123
+ namespace {
124
+
125
+ void UnalignedCopy64(const void* src, void* dst) {
126
+ char tmp[8];
127
+ std::memcpy(tmp, src, 8);
128
+ std::memcpy(dst, tmp, 8);
129
+ }
130
+
131
+ void UnalignedCopy128(const void* src, void* dst) {
132
+ // std::memcpy() gets vectorized when the appropriate compiler options are
133
+ // used. For example, x86 compilers targeting SSE2+ will optimize to an SSE2
134
+ // load and store.
135
+ char tmp[16];
136
+ std::memcpy(tmp, src, 16);
137
+ std::memcpy(dst, tmp, 16);
138
+ }
139
+
140
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
141
+ // for handling COPY operations where the input and output regions may overlap.
142
+ // For example, suppose:
143
+ // src == "ab"
144
+ // op == src + 2
145
+ // op_limit == op + 20
146
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
147
+ // copies of "ab"
148
+ // ababababababababababab
149
+ // Note that this does not match the semantics of either std::memcpy() or
150
+ // std::memmove().
151
+ inline char* IncrementalCopySlow(const char* src, char* op,
152
+ char* const op_limit) {
153
+ // TODO: Remove pragma when LLVM is aware this
154
+ // function is only called in cold regions and when cold regions don't get
155
+ // vectorized or unrolled.
156
+ #ifdef __clang__
157
+ #pragma clang loop unroll(disable)
158
+ #endif
159
+ while (op < op_limit) {
160
+ *op++ = *src++;
161
+ }
162
+ return op_limit;
163
+ }
164
+
165
+ #if SNAPPY_HAVE_SSSE3
166
+
167
+ // This is a table of shuffle control masks that can be used as the source
168
+ // operand for PSHUFB to permute the contents of the destination XMM register
169
+ // into a repeating byte pattern.
170
+ alignas(16) const char pshufb_fill_patterns[7][16] = {
171
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
172
+ {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
173
+ {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
174
+ {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
175
+ {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0},
176
+ {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3},
177
+ {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1},
178
+ };
179
+
180
+ #endif // SNAPPY_HAVE_SSSE3
181
+
182
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
183
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
184
+ // region of the buffer.
185
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
186
+ char* const buf_limit) {
187
+ // Terminology:
188
+ //
189
+ // slop = buf_limit - op
190
+ // pat = op - src
191
+ // len = limit - op
192
+ assert(src < op);
193
+ assert(op <= op_limit);
194
+ assert(op_limit <= buf_limit);
195
+ // NOTE: The copy tags use 3 or 6 bits to store the copy length, so len <= 64.
196
+ assert(op_limit - op <= 64);
197
+ // NOTE: In practice the compressor always emits len >= 4, so it is ok to
198
+ // assume that to optimize this function, but this is not guaranteed by the
199
+ // compression format, so we have to also handle len < 4 in case the input
200
+ // does not satisfy these conditions.
201
+
202
+ size_t pattern_size = op - src;
203
+ // The cases are split into different branches to allow the branch predictor,
204
+ // FDO, and static prediction hints to work better. For each input we list the
205
+ // ratio of invocations that match each condition.
206
+ //
207
+ // input slop < 16 pat < 8 len > 16
208
+ // ------------------------------------------
209
+ // html|html4|cp 0% 1.01% 27.73%
210
+ // urls 0% 0.88% 14.79%
211
+ // jpg 0% 64.29% 7.14%
212
+ // pdf 0% 2.56% 58.06%
213
+ // txt[1-4] 0% 0.23% 0.97%
214
+ // pb 0% 0.96% 13.88%
215
+ // bin 0.01% 22.27% 41.17%
216
+ //
217
+ // It is very rare that we don't have enough slop for doing block copies. It
218
+ // is also rare that we need to expand a pattern. Small patterns are common
219
+ // for incompressible formats and for those we are plenty fast already.
220
+ // Lengths are normally not greater than 16 but they vary depending on the
221
+ // input. In general if we always predict len <= 16 it would be an ok
222
+ // prediction.
223
+ //
224
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
225
+ // copying 2x 8 bytes at a time.
226
+
227
+ // Handle the uncommon case where pattern is less than 8 bytes.
228
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
229
+ #if SNAPPY_HAVE_SSSE3
230
+ // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
231
+ // to permute the register's contents in-place into a repeating sequence of
232
+ // the first "pattern_size" bytes.
233
+ // For example, suppose:
234
+ // src == "abc"
235
+ // op == op + 3
236
+ // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc"
237
+ // followed by one byte of slop: abcabcabcabcabca.
238
+ //
239
+ // The non-SSE fallback implementation suffers from store-forwarding stalls
240
+ // because its loads and stores partly overlap. By expanding the pattern
241
+ // in-place, we avoid the penalty.
242
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) {
243
+ const __m128i shuffle_mask = _mm_load_si128(
244
+ reinterpret_cast<const __m128i*>(pshufb_fill_patterns)
245
+ + pattern_size - 1);
246
+ const __m128i pattern = _mm_shuffle_epi8(
247
+ _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)), shuffle_mask);
248
+ // Uninitialized bytes are masked out by the shuffle mask.
249
+ // TODO: remove annotation and macro defs once MSan is fixed.
250
+ SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern));
251
+ pattern_size *= 16 / pattern_size;
252
+ char* op_end = std::min(op_limit, buf_limit - 15);
253
+ while (op < op_end) {
254
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern);
255
+ op += pattern_size;
256
+ }
257
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
258
+ }
259
+ return IncrementalCopySlow(src, op, op_limit);
260
+ #else // !SNAPPY_HAVE_SSSE3
261
+ // If plenty of buffer space remains, expand the pattern to at least 8
262
+ // bytes. The way the following loop is written, we need 8 bytes of buffer
263
+ // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
264
+ // bytes if pattern_size is 2. Precisely encoding that is probably not
265
+ // worthwhile; instead, invoke the slow path if we cannot write 11 bytes
266
+ // (because 11 are required in the worst case).
267
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
268
+ while (pattern_size < 8) {
269
+ UnalignedCopy64(src, op);
270
+ op += pattern_size;
271
+ pattern_size *= 2;
272
+ }
273
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
274
+ } else {
275
+ return IncrementalCopySlow(src, op, op_limit);
276
+ }
277
+ #endif // SNAPPY_HAVE_SSSE3
278
+ }
279
+ assert(pattern_size >= 8);
280
+
281
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
282
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
283
+ // because expanding the pattern to at least 8 bytes guarantees that
284
+ // op - src >= 8.
285
+ //
286
+ // Typically, the op_limit is the gating factor so try to simplify the loop
287
+ // based on that.
288
+ if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) {
289
+ // There is at least one, and at most four 16-byte blocks. Writing four
290
+ // conditionals instead of a loop allows FDO to layout the code with respect
291
+ // to the actual probabilities of each length.
292
+ // TODO: Replace with loop with trip count hint.
293
+ UnalignedCopy64(src, op);
294
+ UnalignedCopy64(src + 8, op + 8);
295
+
296
+ if (op + 16 < op_limit) {
297
+ UnalignedCopy64(src + 16, op + 16);
298
+ UnalignedCopy64(src + 24, op + 24);
299
+ }
300
+ if (op + 32 < op_limit) {
301
+ UnalignedCopy64(src + 32, op + 32);
302
+ UnalignedCopy64(src + 40, op + 40);
303
+ }
304
+ if (op + 48 < op_limit) {
305
+ UnalignedCopy64(src + 48, op + 48);
306
+ UnalignedCopy64(src + 56, op + 56);
307
+ }
308
+ return op_limit;
309
+ }
310
+
311
+ // Fall back to doing as much as we can with the available slop in the
312
+ // buffer. This code path is relatively cold however so we save code size by
313
+ // avoiding unrolling and vectorizing.
314
+ //
315
+ // TODO: Remove pragma when when cold regions don't get vectorized
316
+ // or unrolled.
317
+ #ifdef __clang__
318
+ #pragma clang loop unroll(disable)
319
+ #endif
320
+ for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
321
+ UnalignedCopy64(src, op);
322
+ UnalignedCopy64(src + 8, op + 8);
323
+ }
324
+ if (op >= op_limit)
325
+ return op_limit;
326
+
327
+ // We only take this branch if we didn't have enough slop and we can do a
328
+ // single 8 byte copy.
329
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
330
+ UnalignedCopy64(src, op);
331
+ src += 8;
332
+ op += 8;
333
+ }
334
+ return IncrementalCopySlow(src, op, op_limit);
335
+ }
336
+
337
+ } // namespace
338
+
339
+ template <bool allow_fast_path>
340
+ static inline char* EmitLiteral(char* op,
341
+ const char* literal,
342
+ int len) {
343
+ // The vast majority of copies are below 16 bytes, for which a
344
+ // call to std::memcpy() is overkill. This fast path can sometimes
345
+ // copy up to 15 bytes too much, but that is okay in the
346
+ // main loop, since we have a bit to go on for both sides:
347
+ //
348
+ // - The input will always have kInputMarginBytes = 15 extra
349
+ // available bytes, as long as we're in the main loop, and
350
+ // if not, allow_fast_path = false.
351
+ // - The output will always have 32 spare bytes (see
352
+ // MaxCompressedLength).
353
+ assert(len > 0); // Zero-length literals are disallowed
354
+ int n = len - 1;
355
+ if (allow_fast_path && len <= 16) {
356
+ // Fits in tag byte
357
+ *op++ = LITERAL | (n << 2);
358
+
359
+ UnalignedCopy128(literal, op);
360
+ return op + len;
361
+ }
362
+
363
+ if (n < 60) {
364
+ // Fits in tag byte
365
+ *op++ = LITERAL | (n << 2);
366
+ } else {
367
+ int count = (Bits::Log2Floor(n) >> 3) + 1;
368
+ assert(count >= 1);
369
+ assert(count <= 4);
370
+ *op++ = LITERAL | ((59 + count) << 2);
371
+ // Encode in upcoming bytes.
372
+ // Write 4 bytes, though we may care about only 1 of them. The output buffer
373
+ // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
374
+ // here and there is a std::memcpy() of size 'len' below.
375
+ LittleEndian::Store32(op, n);
376
+ op += count;
377
+ }
378
+ std::memcpy(op, literal, len);
379
+ return op + len;
380
+ }
381
+
382
+ template <bool len_less_than_12>
383
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
384
+ assert(len <= 64);
385
+ assert(len >= 4);
386
+ assert(offset < 65536);
387
+ assert(len_less_than_12 == (len < 12));
388
+
389
+ if (len_less_than_12) {
390
+ uint32_t u = (len << 2) + (offset << 8);
391
+ uint32_t copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0);
392
+ uint32_t copy2 = COPY_2_BYTE_OFFSET - (1 << 2);
393
+ // It turns out that offset < 2048 is a difficult to predict branch.
394
+ // `perf record` shows this is the highest percentage of branch misses in
395
+ // benchmarks. This code produces branch free code, the data dependency
396
+ // chain that bottlenecks the throughput is so long that a few extra
397
+ // instructions are completely free (IPC << 6 because of data deps).
398
+ u += offset < 2048 ? copy1 : copy2;
399
+ LittleEndian::Store32(op, u);
400
+ op += offset < 2048 ? 2 : 3;
401
+ } else {
402
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
403
+ // is required to have some slack, so the extra byte won't overrun it.
404
+ uint32_t u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
405
+ LittleEndian::Store32(op, u);
406
+ op += 3;
407
+ }
408
+ return op;
409
+ }
410
+
411
+ template <bool len_less_than_12>
412
+ static inline char* EmitCopy(char* op, size_t offset, size_t len) {
413
+ assert(len_less_than_12 == (len < 12));
414
+ if (len_less_than_12) {
415
+ return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
416
+ } else {
417
+ // A special case for len <= 64 might help, but so far measurements suggest
418
+ // it's in the noise.
419
+
420
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
421
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
422
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
423
+ len -= 64;
424
+ }
425
+
426
+ // One or two copies will now finish the job.
427
+ if (len > 64) {
428
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
429
+ len -= 60;
430
+ }
431
+
432
+ // Emit remainder.
433
+ if (len < 12) {
434
+ op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
435
+ } else {
436
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
437
+ }
438
+ return op;
439
+ }
440
+ }
441
+
442
+ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
443
+ uint32_t v = 0;
444
+ const char* limit = start + n;
445
+ if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
446
+ *result = v;
447
+ return true;
448
+ } else {
449
+ return false;
450
+ }
451
+ }
452
+
453
+ namespace {
454
+ uint32_t CalculateTableSize(uint32_t input_size) {
455
+ static_assert(
456
+ kMaxHashTableSize >= kMinHashTableSize,
457
+ "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
458
+ if (input_size > kMaxHashTableSize) {
459
+ return kMaxHashTableSize;
460
+ }
461
+ if (input_size < kMinHashTableSize) {
462
+ return kMinHashTableSize;
463
+ }
464
+ // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
465
+ // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
466
+ return 2u << Bits::Log2Floor(input_size - 1);
467
+ }
468
+ } // namespace
469
+
470
+ namespace internal {
471
+ WorkingMemory::WorkingMemory(size_t input_size) {
472
+ const size_t max_fragment_size = std::min(input_size, kBlockSize);
473
+ const size_t table_size = CalculateTableSize(max_fragment_size);
474
+ size_ = table_size * sizeof(*table_) + max_fragment_size +
475
+ MaxCompressedLength(max_fragment_size);
476
+ mem_ = std::allocator<char>().allocate(size_);
477
+ table_ = reinterpret_cast<uint16_t*>(mem_);
478
+ input_ = mem_ + table_size * sizeof(*table_);
479
+ output_ = input_ + max_fragment_size;
480
+ }
481
+
482
+ WorkingMemory::~WorkingMemory() {
483
+ std::allocator<char>().deallocate(mem_, size_);
484
+ }
485
+
486
+ uint16_t* WorkingMemory::GetHashTable(size_t fragment_size,
487
+ int* table_size) const {
488
+ const size_t htsize = CalculateTableSize(fragment_size);
489
+ memset(table_, 0, htsize * sizeof(*table_));
490
+ *table_size = htsize;
491
+ return table_;
492
+ }
493
+ } // end namespace internal
494
+
495
+ // Flat array compression that does not emit the "uncompressed length"
496
+ // prefix. Compresses "input" string to the "*op" buffer.
497
+ //
498
+ // REQUIRES: "input" is at most "kBlockSize" bytes long.
499
+ // REQUIRES: "op" points to an array of memory that is at least
500
+ // "MaxCompressedLength(input.size())" in size.
501
+ // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
502
+ // REQUIRES: "table_size" is a power of two
503
+ //
504
+ // Returns an "end" pointer into "op" buffer.
505
+ // "end - op" is the compressed size of "input".
506
+ namespace internal {
507
+ char* CompressFragment(const char* input,
508
+ size_t input_size,
509
+ char* op,
510
+ uint16_t* table,
511
+ const int table_size) {
512
+ // "ip" is the input pointer, and "op" is the output pointer.
513
+ const char* ip = input;
514
+ assert(input_size <= kBlockSize);
515
+ assert((table_size & (table_size - 1)) == 0); // table must be power of two
516
+ const int shift = 32 - Bits::Log2Floor(table_size);
517
+ assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
518
+ const char* ip_end = input + input_size;
519
+ const char* base_ip = ip;
520
+
521
+ const size_t kInputMarginBytes = 15;
522
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
523
+ const char* ip_limit = input + input_size - kInputMarginBytes;
524
+
525
+ for (uint32_t preload = LittleEndian::Load32(ip + 1);;) {
526
+ // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
527
+ // [next_emit, ip_end) after the main loop.
528
+ const char* next_emit = ip++;
529
+ uint64_t data = LittleEndian::Load64(ip);
530
+ // The body of this loop calls EmitLiteral once and then EmitCopy one or
531
+ // more times. (The exception is that when we're close to exhausting
532
+ // the input we goto emit_remainder.)
533
+ //
534
+ // In the first iteration of this loop we're just starting, so
535
+ // there's nothing to copy, so calling EmitLiteral once is
536
+ // necessary. And we only start a new iteration when the
537
+ // current iteration has determined that a call to EmitLiteral will
538
+ // precede the next call to EmitCopy (if any).
539
+ //
540
+ // Step 1: Scan forward in the input looking for a 4-byte-long match.
541
+ // If we get close to exhausting the input then goto emit_remainder.
542
+ //
543
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
544
+ // found, start looking only at every other byte. If 32 more bytes are
545
+ // scanned (or skipped), look at every third byte, etc.. When a match is
546
+ // found, immediately go back to looking at every byte. This is a small
547
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
548
+ // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
549
+ // win since the compressor quickly "realizes" the data is incompressible
550
+ // and doesn't bother looking for matches everywhere.
551
+ //
552
+ // The "skip" variable keeps track of how many bytes there are since the
553
+ // last match; dividing it by 32 (ie. right-shifting by five) gives the
554
+ // number of bytes to move ahead for each iteration.
555
+ uint32_t skip = 32;
556
+
557
+ const char* candidate;
558
+ if (ip_limit - ip >= 16) {
559
+ auto delta = ip - base_ip;
560
+ for (int j = 0; j < 4; ++j) {
561
+ for (int k = 0; k < 4; ++k) {
562
+ int i = 4 * j + k;
563
+ // These for-loops are meant to be unrolled. So we can freely
564
+ // special case the first iteration to use the value already
565
+ // loaded in preload.
566
+ uint32_t dword = i == 0 ? preload : static_cast<uint32_t>(data);
567
+ assert(dword == LittleEndian::Load32(ip + i));
568
+ uint32_t hash = HashBytes(dword, shift);
569
+ candidate = base_ip + table[hash];
570
+ assert(candidate >= base_ip);
571
+ assert(candidate < ip + i);
572
+ table[hash] = delta + i;
573
+ if (SNAPPY_PREDICT_FALSE(LittleEndian::Load32(candidate) == dword)) {
574
+ *op = LITERAL | (i << 2);
575
+ UnalignedCopy128(next_emit, op + 1);
576
+ ip += i;
577
+ op = op + i + 2;
578
+ goto emit_match;
579
+ }
580
+ data >>= 8;
581
+ }
582
+ data = LittleEndian::Load64(ip + 4 * j + 4);
583
+ }
584
+ ip += 16;
585
+ skip += 16;
586
+ }
587
+ while (true) {
588
+ assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
589
+ uint32_t hash = HashBytes(data, shift);
590
+ uint32_t bytes_between_hash_lookups = skip >> 5;
591
+ skip += bytes_between_hash_lookups;
592
+ const char* next_ip = ip + bytes_between_hash_lookups;
593
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
594
+ ip = next_emit;
595
+ goto emit_remainder;
596
+ }
597
+ candidate = base_ip + table[hash];
598
+ assert(candidate >= base_ip);
599
+ assert(candidate < ip);
600
+
601
+ table[hash] = ip - base_ip;
602
+ if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
603
+ LittleEndian::Load32(candidate))) {
604
+ break;
605
+ }
606
+ data = LittleEndian::Load32(next_ip);
607
+ ip = next_ip;
608
+ }
609
+
610
+ // Step 2: A 4-byte match has been found. We'll later see if more
611
+ // than 4 bytes match. But, prior to the match, input
612
+ // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
613
+ assert(next_emit + 16 <= ip_end);
614
+ op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
615
+
616
+ // Step 3: Call EmitCopy, and then see if another EmitCopy could
617
+ // be our next move. Repeat until we find no match for the
618
+ // input immediately after what was consumed by the last EmitCopy call.
619
+ //
620
+ // If we exit this loop normally then we need to call EmitLiteral next,
621
+ // though we don't yet know how big the literal will be. We handle that
622
+ // by proceeding to the next iteration of the main loop. We also can exit
623
+ // this loop via goto if we get close to exhausting the input.
624
+ emit_match:
625
+ do {
626
+ // We have a 4-byte match at ip, and no need to emit any
627
+ // "literal bytes" prior to ip.
628
+ const char* base = ip;
629
+ std::pair<size_t, bool> p =
630
+ FindMatchLength(candidate + 4, ip + 4, ip_end, &data);
631
+ size_t matched = 4 + p.first;
632
+ ip += matched;
633
+ size_t offset = base - candidate;
634
+ assert(0 == memcmp(base, candidate, matched));
635
+ if (p.second) {
636
+ op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
637
+ } else {
638
+ op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
639
+ }
640
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
641
+ goto emit_remainder;
642
+ }
643
+ // Expect 5 bytes to match
644
+ assert((data & 0xFFFFFFFFFF) ==
645
+ (LittleEndian::Load64(ip) & 0xFFFFFFFFFF));
646
+ // We are now looking for a 4-byte match again. We read
647
+ // table[Hash(ip, shift)] for that. To improve compression,
648
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
649
+ table[HashBytes(LittleEndian::Load32(ip - 1), shift)] =
650
+ ip - base_ip - 1;
651
+ uint32_t hash = HashBytes(data, shift);
652
+ candidate = base_ip + table[hash];
653
+ table[hash] = ip - base_ip;
654
+ // Measurements on the benchmarks have shown the following probabilities
655
+ // for the loop to exit (ie. avg. number of iterations is reciprocal).
656
+ // BM_Flat/6 txt1 p = 0.3-0.4
657
+ // BM_Flat/7 txt2 p = 0.35
658
+ // BM_Flat/8 txt3 p = 0.3-0.4
659
+ // BM_Flat/9 txt3 p = 0.34-0.4
660
+ // BM_Flat/10 pb p = 0.4
661
+ // BM_Flat/11 gaviota p = 0.1
662
+ // BM_Flat/12 cp p = 0.5
663
+ // BM_Flat/13 c p = 0.3
664
+ } while (static_cast<uint32_t>(data) == LittleEndian::Load32(candidate));
665
+ // Because the least significant 5 bytes matched, we can utilize data
666
+ // for the next iteration.
667
+ preload = data >> 8;
668
+ }
669
+ }
670
+
671
+ emit_remainder:
672
+ // Emit the remaining bytes as a literal
673
+ if (ip < ip_end) {
674
+ op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
675
+ }
676
+
677
+ return op;
678
+ }
679
+ } // end namespace internal
680
+
681
+ // Called back at avery compression call to trace parameters and sizes.
682
+ static inline void Report(const char *algorithm, size_t compressed_size,
683
+ size_t uncompressed_size) {
684
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
685
+ (void)algorithm;
686
+ (void)compressed_size;
687
+ (void)uncompressed_size;
688
+ }
689
+
690
+ // Signature of output types needed by decompression code.
691
+ // The decompression code is templatized on a type that obeys this
692
+ // signature so that we do not pay virtual function call overhead in
693
+ // the middle of a tight decompression loop.
694
+ //
695
+ // class DecompressionWriter {
696
+ // public:
697
+ // // Called before decompression
698
+ // void SetExpectedLength(size_t length);
699
+ //
700
+ // // For performance a writer may choose to donate the cursor variable to the
701
+ // // decompression function. The decompression will inject it in all its
702
+ // // function calls to the writer. Keeping the important output cursor as a
703
+ // // function local stack variable allows the compiler to keep it in
704
+ // // register, which greatly aids performance by avoiding loads and stores of
705
+ // // this variable in the fast path loop iterations.
706
+ // T GetOutputPtr() const;
707
+ //
708
+ // // At end of decompression the loop donates the ownership of the cursor
709
+ // // variable back to the writer by calling this function.
710
+ // void SetOutputPtr(T op);
711
+ //
712
+ // // Called after decompression
713
+ // bool CheckLength() const;
714
+ //
715
+ // // Called repeatedly during decompression
716
+ // // Each function get a pointer to the op (output pointer), that the writer
717
+ // // can use and update. Note it's important that these functions get fully
718
+ // // inlined so that no actual address of the local variable needs to be
719
+ // // taken.
720
+ // bool Append(const char* ip, size_t length, T* op);
721
+ // bool AppendFromSelf(uint32_t offset, size_t length, T* op);
722
+ //
723
+ // // The rules for how TryFastAppend differs from Append are somewhat
724
+ // // convoluted:
725
+ // //
726
+ // // - TryFastAppend is allowed to decline (return false) at any
727
+ // // time, for any reason -- just "return false" would be
728
+ // // a perfectly legal implementation of TryFastAppend.
729
+ // // The intention is for TryFastAppend to allow a fast path
730
+ // // in the common case of a small append.
731
+ // // - TryFastAppend is allowed to read up to <available> bytes
732
+ // // from the input buffer, whereas Append is allowed to read
733
+ // // <length>. However, if it returns true, it must leave
734
+ // // at least five (kMaximumTagLength) bytes in the input buffer
735
+ // // afterwards, so that there is always enough space to read the
736
+ // // next tag without checking for a refill.
737
+ // // - TryFastAppend must always return decline (return false)
738
+ // // if <length> is 61 or more, as in this case the literal length is not
739
+ // // decoded fully. In practice, this should not be a big problem,
740
+ // // as it is unlikely that one would implement a fast path accepting
741
+ // // this much data.
742
+ // //
743
+ // bool TryFastAppend(const char* ip, size_t available, size_t length, T* op);
744
+ // };
745
+
746
+ static inline uint32_t ExtractLowBytes(uint32_t v, int n) {
747
+ assert(n >= 0);
748
+ assert(n <= 4);
749
+ #if SNAPPY_HAVE_BMI2
750
+ return _bzhi_u32(v, 8 * n);
751
+ #else
752
+ // This needs to be wider than uint32_t otherwise `mask << 32` will be
753
+ // undefined.
754
+ uint64_t mask = 0xffffffff;
755
+ return v & ~(mask << (8 * n));
756
+ #endif
757
+ }
758
+
759
+ static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
760
+ assert(shift < 32);
761
+ static const uint8_t masks[] = {
762
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
763
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
764
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
765
+ 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
766
+ return (value & masks[shift]) != 0;
767
+ }
768
+
769
+ // Helper class for decompression
770
+ class SnappyDecompressor {
771
+ private:
772
+ Source* reader_; // Underlying source of bytes to decompress
773
+ const char* ip_; // Points to next buffered byte
774
+ const char* ip_limit_; // Points just past buffered bytes
775
+ // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
776
+ // buffer.
777
+ const char* ip_limit_min_maxtaglen_;
778
+ uint32_t peeked_; // Bytes peeked from reader (need to skip)
779
+ bool eof_; // Hit end of input without an error?
780
+ char scratch_[kMaximumTagLength]; // See RefillTag().
781
+
782
+ // Ensure that all of the tag metadata for the next tag is available
783
+ // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
784
+ // if (ip_limit_ - ip_ < 5).
785
+ //
786
+ // Returns true on success, false on error or end of input.
787
+ bool RefillTag();
788
+
789
+ void ResetLimit(const char* ip) {
790
+ ip_limit_min_maxtaglen_ =
791
+ ip_limit_ - std::min<ptrdiff_t>(ip_limit_ - ip, kMaximumTagLength - 1);
792
+ }
793
+
794
+ public:
795
+ explicit SnappyDecompressor(Source* reader)
796
+ : reader_(reader),
797
+ ip_(NULL),
798
+ ip_limit_(NULL),
799
+ peeked_(0),
800
+ eof_(false) {
801
+ }
802
+
803
+ ~SnappyDecompressor() {
804
+ // Advance past any bytes we peeked at from the reader
805
+ reader_->Skip(peeked_);
806
+ }
807
+
808
+ // Returns true iff we have hit the end of the input without an error.
809
+ bool eof() const {
810
+ return eof_;
811
+ }
812
+
813
+ // Read the uncompressed length stored at the start of the compressed data.
814
+ // On success, stores the length in *result and returns true.
815
+ // On failure, returns false.
816
+ bool ReadUncompressedLength(uint32_t* result) {
817
+ assert(ip_ == NULL); // Must not have read anything yet
818
+ // Length is encoded in 1..5 bytes
819
+ *result = 0;
820
+ uint32_t shift = 0;
821
+ while (true) {
822
+ if (shift >= 32) return false;
823
+ size_t n;
824
+ const char* ip = reader_->Peek(&n);
825
+ if (n == 0) return false;
826
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
827
+ reader_->Skip(1);
828
+ uint32_t val = c & 0x7f;
829
+ if (LeftShiftOverflows(static_cast<uint8_t>(val), shift)) return false;
830
+ *result |= val << shift;
831
+ if (c < 128) {
832
+ break;
833
+ }
834
+ shift += 7;
835
+ }
836
+ return true;
837
+ }
838
+
839
+ // Process the next item found in the input.
840
+ // Returns true if successful, false on error or end of input.
841
+ template <class Writer>
842
+ #if defined(__GNUC__) && defined(__x86_64__)
843
+ __attribute__((aligned(32)))
844
+ #endif
845
+ void DecompressAllTags(Writer* writer) {
846
+ const char* ip = ip_;
847
+ ResetLimit(ip);
848
+ auto op = writer->GetOutputPtr();
849
+ // We could have put this refill fragment only at the beginning of the loop.
850
+ // However, duplicating it at the end of each branch gives the compiler more
851
+ // scope to optimize the <ip_limit_ - ip> expression based on the local
852
+ // context, which overall increases speed.
853
+ #define MAYBE_REFILL() \
854
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit_min_maxtaglen_)) { \
855
+ ip_ = ip; \
856
+ if (SNAPPY_PREDICT_FALSE(!RefillTag())) goto exit; \
857
+ ip = ip_; \
858
+ ResetLimit(ip); \
859
+ } \
860
+ preload = static_cast<uint8_t>(*ip)
861
+
862
+ // At the start of the for loop below the least significant byte of preload
863
+ // contains the tag.
864
+ uint32_t preload;
865
+ MAYBE_REFILL();
866
+ for ( ;; ) {
867
+ const uint8_t c = static_cast<uint8_t>(preload);
868
+ ip++;
869
+
870
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
871
+ // inputs.
872
+ //
873
+ // input LITERAL NON_LITERAL
874
+ // -----------------------------------
875
+ // html|html4|cp 23% 77%
876
+ // urls 36% 64%
877
+ // jpg 47% 53%
878
+ // pdf 19% 81%
879
+ // txt[1-4] 25% 75%
880
+ // pb 24% 76%
881
+ // bin 24% 76%
882
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
883
+ size_t literal_length = (c >> 2) + 1u;
884
+ if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length, &op)) {
885
+ assert(literal_length < 61);
886
+ ip += literal_length;
887
+ // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
888
+ // will not return true unless there's already at least five spare
889
+ // bytes in addition to the literal.
890
+ preload = static_cast<uint8_t>(*ip);
891
+ continue;
892
+ }
893
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
894
+ // Long literal.
895
+ const size_t literal_length_length = literal_length - 60;
896
+ literal_length =
897
+ ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
898
+ 1;
899
+ ip += literal_length_length;
900
+ }
901
+
902
+ size_t avail = ip_limit_ - ip;
903
+ while (avail < literal_length) {
904
+ if (!writer->Append(ip, avail, &op)) goto exit;
905
+ literal_length -= avail;
906
+ reader_->Skip(peeked_);
907
+ size_t n;
908
+ ip = reader_->Peek(&n);
909
+ avail = n;
910
+ peeked_ = avail;
911
+ if (avail == 0) goto exit;
912
+ ip_limit_ = ip + avail;
913
+ ResetLimit(ip);
914
+ }
915
+ if (!writer->Append(ip, literal_length, &op)) goto exit;
916
+ ip += literal_length;
917
+ MAYBE_REFILL();
918
+ } else {
919
+ if (SNAPPY_PREDICT_FALSE((c & 3) == COPY_4_BYTE_OFFSET)) {
920
+ const size_t copy_offset = LittleEndian::Load32(ip);
921
+ const size_t length = (c >> 2) + 1;
922
+ ip += 4;
923
+
924
+ if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
925
+ } else {
926
+ const uint32_t entry = char_table[c];
927
+ preload = LittleEndian::Load32(ip);
928
+ const uint32_t trailer = ExtractLowBytes(preload, c & 3);
929
+ const uint32_t length = entry & 0xff;
930
+
931
+ // copy_offset/256 is encoded in bits 8..10. By just fetching
932
+ // those bits, we get copy_offset (since the bit-field starts at
933
+ // bit 8).
934
+ const uint32_t copy_offset = (entry & 0x700) + trailer;
935
+ if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
936
+
937
+ ip += (c & 3);
938
+ // By using the result of the previous load we reduce the critical
939
+ // dependency chain of ip to 4 cycles.
940
+ preload >>= (c & 3) * 8;
941
+ if (ip < ip_limit_min_maxtaglen_) continue;
942
+ }
943
+ MAYBE_REFILL();
944
+ }
945
+ }
946
+ #undef MAYBE_REFILL
947
+ exit:
948
+ writer->SetOutputPtr(op);
949
+ }
950
+ };
951
+
952
+ bool SnappyDecompressor::RefillTag() {
953
+ const char* ip = ip_;
954
+ if (ip == ip_limit_) {
955
+ // Fetch a new fragment from the reader
956
+ reader_->Skip(peeked_); // All peeked bytes are used up
957
+ size_t n;
958
+ ip = reader_->Peek(&n);
959
+ peeked_ = n;
960
+ eof_ = (n == 0);
961
+ if (eof_) return false;
962
+ ip_limit_ = ip + n;
963
+ }
964
+
965
+ // Read the tag character
966
+ assert(ip < ip_limit_);
967
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
968
+ const uint32_t entry = char_table[c];
969
+ const uint32_t needed = (entry >> 11) + 1; // +1 byte for 'c'
970
+ assert(needed <= sizeof(scratch_));
971
+
972
+ // Read more bytes from reader if needed
973
+ uint32_t nbuf = ip_limit_ - ip;
974
+ if (nbuf < needed) {
975
+ // Stitch together bytes from ip and reader to form the word
976
+ // contents. We store the needed bytes in "scratch_". They
977
+ // will be consumed immediately by the caller since we do not
978
+ // read more than we need.
979
+ std::memmove(scratch_, ip, nbuf);
980
+ reader_->Skip(peeked_); // All peeked bytes are used up
981
+ peeked_ = 0;
982
+ while (nbuf < needed) {
983
+ size_t length;
984
+ const char* src = reader_->Peek(&length);
985
+ if (length == 0) return false;
986
+ uint32_t to_add = std::min<uint32_t>(needed - nbuf, length);
987
+ std::memcpy(scratch_ + nbuf, src, to_add);
988
+ nbuf += to_add;
989
+ reader_->Skip(to_add);
990
+ }
991
+ assert(nbuf == needed);
992
+ ip_ = scratch_;
993
+ ip_limit_ = scratch_ + needed;
994
+ } else if (nbuf < kMaximumTagLength) {
995
+ // Have enough bytes, but move into scratch_ so that we do not
996
+ // read past end of input
997
+ std::memmove(scratch_, ip, nbuf);
998
+ reader_->Skip(peeked_); // All peeked bytes are used up
999
+ peeked_ = 0;
1000
+ ip_ = scratch_;
1001
+ ip_limit_ = scratch_ + nbuf;
1002
+ } else {
1003
+ // Pass pointer to buffer returned by reader_.
1004
+ ip_ = ip;
1005
+ }
1006
+ return true;
1007
+ }
1008
+
1009
+ template <typename Writer>
1010
+ static bool InternalUncompress(Source* r, Writer* writer) {
1011
+ // Read the uncompressed length from the front of the compressed input
1012
+ SnappyDecompressor decompressor(r);
1013
+ uint32_t uncompressed_len = 0;
1014
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
1015
+
1016
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
1017
+ uncompressed_len);
1018
+ }
1019
+
1020
+ template <typename Writer>
1021
+ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
1022
+ Writer* writer,
1023
+ uint32_t compressed_len,
1024
+ uint32_t uncompressed_len) {
1025
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
1026
+
1027
+ writer->SetExpectedLength(uncompressed_len);
1028
+
1029
+ // Process the entire input
1030
+ decompressor->DecompressAllTags(writer);
1031
+ writer->Flush();
1032
+ return (decompressor->eof() && writer->CheckLength());
1033
+ }
1034
+
1035
+ bool GetUncompressedLength(Source* source, uint32_t* result) {
1036
+ SnappyDecompressor decompressor(source);
1037
+ return decompressor.ReadUncompressedLength(result);
1038
+ }
1039
+
1040
+ size_t Compress(Source* reader, Sink* writer) {
1041
+ size_t written = 0;
1042
+ size_t N = reader->Available();
1043
+ const size_t uncompressed_size = N;
1044
+ char ulength[Varint::kMax32];
1045
+ char* p = Varint::Encode32(ulength, N);
1046
+ writer->Append(ulength, p-ulength);
1047
+ written += (p - ulength);
1048
+
1049
+ internal::WorkingMemory wmem(N);
1050
+
1051
+ while (N > 0) {
1052
+ // Get next block to compress (without copying if possible)
1053
+ size_t fragment_size;
1054
+ const char* fragment = reader->Peek(&fragment_size);
1055
+ assert(fragment_size != 0); // premature end of input
1056
+ const size_t num_to_read = std::min(N, kBlockSize);
1057
+ size_t bytes_read = fragment_size;
1058
+
1059
+ size_t pending_advance = 0;
1060
+ if (bytes_read >= num_to_read) {
1061
+ // Buffer returned by reader is large enough
1062
+ pending_advance = num_to_read;
1063
+ fragment_size = num_to_read;
1064
+ } else {
1065
+ char* scratch = wmem.GetScratchInput();
1066
+ std::memcpy(scratch, fragment, bytes_read);
1067
+ reader->Skip(bytes_read);
1068
+
1069
+ while (bytes_read < num_to_read) {
1070
+ fragment = reader->Peek(&fragment_size);
1071
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
1072
+ std::memcpy(scratch + bytes_read, fragment, n);
1073
+ bytes_read += n;
1074
+ reader->Skip(n);
1075
+ }
1076
+ assert(bytes_read == num_to_read);
1077
+ fragment = scratch;
1078
+ fragment_size = num_to_read;
1079
+ }
1080
+ assert(fragment_size == num_to_read);
1081
+
1082
+ // Get encoding table for compression
1083
+ int table_size;
1084
+ uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
1085
+
1086
+ // Compress input_fragment and append to dest
1087
+ const int max_output = MaxCompressedLength(num_to_read);
1088
+
1089
+ // Need a scratch buffer for the output, in case the byte sink doesn't
1090
+ // have room for us directly.
1091
+
1092
+ // Since we encode kBlockSize regions followed by a region
1093
+ // which is <= kBlockSize in length, a previously allocated
1094
+ // scratch_output[] region is big enough for this iteration.
1095
+ char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
1096
+ char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
1097
+ table_size);
1098
+ writer->Append(dest, end - dest);
1099
+ written += (end - dest);
1100
+
1101
+ N -= num_to_read;
1102
+ reader->Skip(pending_advance);
1103
+ }
1104
+
1105
+ Report("snappy_compress", written, uncompressed_size);
1106
+
1107
+ return written;
1108
+ }
1109
+
1110
+ // -----------------------------------------------------------------------
1111
+ // IOVec interfaces
1112
+ // -----------------------------------------------------------------------
1113
+
1114
+ // A type that writes to an iovec.
1115
+ // Note that this is not a "ByteSink", but a type that matches the
1116
+ // Writer template argument to SnappyDecompressor::DecompressAllTags().
1117
+ class SnappyIOVecWriter {
1118
+ private:
1119
+ // output_iov_end_ is set to iov + count and used to determine when
1120
+ // the end of the iovs is reached.
1121
+ const struct iovec* output_iov_end_;
1122
+
1123
+ #if !defined(NDEBUG)
1124
+ const struct iovec* output_iov_;
1125
+ #endif // !defined(NDEBUG)
1126
+
1127
+ // Current iov that is being written into.
1128
+ const struct iovec* curr_iov_;
1129
+
1130
+ // Pointer to current iov's write location.
1131
+ char* curr_iov_output_;
1132
+
1133
+ // Remaining bytes to write into curr_iov_output.
1134
+ size_t curr_iov_remaining_;
1135
+
1136
+ // Total bytes decompressed into output_iov_ so far.
1137
+ size_t total_written_;
1138
+
1139
+ // Maximum number of bytes that will be decompressed into output_iov_.
1140
+ size_t output_limit_;
1141
+
1142
+ static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
1143
+ return reinterpret_cast<char*>(iov->iov_base) + offset;
1144
+ }
1145
+
1146
+ public:
1147
+ // Does not take ownership of iov. iov must be valid during the
1148
+ // entire lifetime of the SnappyIOVecWriter.
1149
+ inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
1150
+ : output_iov_end_(iov + iov_count),
1151
+ #if !defined(NDEBUG)
1152
+ output_iov_(iov),
1153
+ #endif // !defined(NDEBUG)
1154
+ curr_iov_(iov),
1155
+ curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
1156
+ : nullptr),
1157
+ curr_iov_remaining_(iov_count ? iov->iov_len : 0),
1158
+ total_written_(0),
1159
+ output_limit_(-1) {}
1160
+
1161
+ inline void SetExpectedLength(size_t len) {
1162
+ output_limit_ = len;
1163
+ }
1164
+
1165
+ inline bool CheckLength() const {
1166
+ return total_written_ == output_limit_;
1167
+ }
1168
+
1169
+ inline bool Append(const char* ip, size_t len, char**) {
1170
+ if (total_written_ + len > output_limit_) {
1171
+ return false;
1172
+ }
1173
+
1174
+ return AppendNoCheck(ip, len);
1175
+ }
1176
+
1177
+ char* GetOutputPtr() { return nullptr; }
1178
+ void SetOutputPtr(char* op) {
1179
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
1180
+ (void)op;
1181
+ }
1182
+
1183
+ inline bool AppendNoCheck(const char* ip, size_t len) {
1184
+ while (len > 0) {
1185
+ if (curr_iov_remaining_ == 0) {
1186
+ // This iovec is full. Go to the next one.
1187
+ if (curr_iov_ + 1 >= output_iov_end_) {
1188
+ return false;
1189
+ }
1190
+ ++curr_iov_;
1191
+ curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
1192
+ curr_iov_remaining_ = curr_iov_->iov_len;
1193
+ }
1194
+
1195
+ const size_t to_write = std::min(len, curr_iov_remaining_);
1196
+ std::memcpy(curr_iov_output_, ip, to_write);
1197
+ curr_iov_output_ += to_write;
1198
+ curr_iov_remaining_ -= to_write;
1199
+ total_written_ += to_write;
1200
+ ip += to_write;
1201
+ len -= to_write;
1202
+ }
1203
+
1204
+ return true;
1205
+ }
1206
+
1207
+ inline bool TryFastAppend(const char* ip, size_t available, size_t len,
1208
+ char**) {
1209
+ const size_t space_left = output_limit_ - total_written_;
1210
+ if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1211
+ curr_iov_remaining_ >= 16) {
1212
+ // Fast path, used for the majority (about 95%) of invocations.
1213
+ UnalignedCopy128(ip, curr_iov_output_);
1214
+ curr_iov_output_ += len;
1215
+ curr_iov_remaining_ -= len;
1216
+ total_written_ += len;
1217
+ return true;
1218
+ }
1219
+
1220
+ return false;
1221
+ }
1222
+
1223
+ inline bool AppendFromSelf(size_t offset, size_t len, char**) {
1224
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1225
+ // the "offset - 1u" trick.
1226
+ if (offset - 1u >= total_written_) {
1227
+ return false;
1228
+ }
1229
+ const size_t space_left = output_limit_ - total_written_;
1230
+ if (len > space_left) {
1231
+ return false;
1232
+ }
1233
+
1234
+ // Locate the iovec from which we need to start the copy.
1235
+ const iovec* from_iov = curr_iov_;
1236
+ size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
1237
+ while (offset > 0) {
1238
+ if (from_iov_offset >= offset) {
1239
+ from_iov_offset -= offset;
1240
+ break;
1241
+ }
1242
+
1243
+ offset -= from_iov_offset;
1244
+ --from_iov;
1245
+ #if !defined(NDEBUG)
1246
+ assert(from_iov >= output_iov_);
1247
+ #endif // !defined(NDEBUG)
1248
+ from_iov_offset = from_iov->iov_len;
1249
+ }
1250
+
1251
+ // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1252
+ // the current iovec.
1253
+ while (len > 0) {
1254
+ assert(from_iov <= curr_iov_);
1255
+ if (from_iov != curr_iov_) {
1256
+ const size_t to_copy =
1257
+ std::min(from_iov->iov_len - from_iov_offset, len);
1258
+ AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
1259
+ len -= to_copy;
1260
+ if (len > 0) {
1261
+ ++from_iov;
1262
+ from_iov_offset = 0;
1263
+ }
1264
+ } else {
1265
+ size_t to_copy = curr_iov_remaining_;
1266
+ if (to_copy == 0) {
1267
+ // This iovec is full. Go to the next one.
1268
+ if (curr_iov_ + 1 >= output_iov_end_) {
1269
+ return false;
1270
+ }
1271
+ ++curr_iov_;
1272
+ curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
1273
+ curr_iov_remaining_ = curr_iov_->iov_len;
1274
+ continue;
1275
+ }
1276
+ if (to_copy > len) {
1277
+ to_copy = len;
1278
+ }
1279
+
1280
+ IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
1281
+ curr_iov_output_, curr_iov_output_ + to_copy,
1282
+ curr_iov_output_ + curr_iov_remaining_);
1283
+ curr_iov_output_ += to_copy;
1284
+ curr_iov_remaining_ -= to_copy;
1285
+ from_iov_offset += to_copy;
1286
+ total_written_ += to_copy;
1287
+ len -= to_copy;
1288
+ }
1289
+ }
1290
+
1291
+ return true;
1292
+ }
1293
+
1294
+ inline void Flush() {}
1295
+ };
1296
+
1297
+ bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1298
+ const struct iovec* iov, size_t iov_cnt) {
1299
+ ByteArraySource reader(compressed, compressed_length);
1300
+ return RawUncompressToIOVec(&reader, iov, iov_cnt);
1301
+ }
1302
+
1303
+ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1304
+ size_t iov_cnt) {
1305
+ SnappyIOVecWriter output(iov, iov_cnt);
1306
+ return InternalUncompress(compressed, &output);
1307
+ }
1308
+
1309
+ // -----------------------------------------------------------------------
1310
+ // Flat array interfaces
1311
+ // -----------------------------------------------------------------------
1312
+
1313
+ // A type that writes to a flat array.
1314
+ // Note that this is not a "ByteSink", but a type that matches the
1315
+ // Writer template argument to SnappyDecompressor::DecompressAllTags().
1316
+ class SnappyArrayWriter {
1317
+ private:
1318
+ char* base_;
1319
+ char* op_;
1320
+ char* op_limit_;
1321
+ // If op < op_limit_min_slop_ then it's safe to unconditionally write
1322
+ // kSlopBytes starting at op.
1323
+ char* op_limit_min_slop_;
1324
+
1325
+ public:
1326
+ inline explicit SnappyArrayWriter(char* dst)
1327
+ : base_(dst),
1328
+ op_(dst),
1329
+ op_limit_(dst),
1330
+ op_limit_min_slop_(dst) {} // Safe default see invariant.
1331
+
1332
+ inline void SetExpectedLength(size_t len) {
1333
+ op_limit_ = op_ + len;
1334
+ // Prevent pointer from being past the buffer.
1335
+ op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, len);
1336
+ }
1337
+
1338
+ inline bool CheckLength() const {
1339
+ return op_ == op_limit_;
1340
+ }
1341
+
1342
+ char* GetOutputPtr() { return op_; }
1343
+ void SetOutputPtr(char* op) { op_ = op; }
1344
+
1345
+ inline bool Append(const char* ip, size_t len, char** op_p) {
1346
+ char* op = *op_p;
1347
+ const size_t space_left = op_limit_ - op;
1348
+ if (space_left < len) return false;
1349
+ std::memcpy(op, ip, len);
1350
+ *op_p = op + len;
1351
+ return true;
1352
+ }
1353
+
1354
+ inline bool TryFastAppend(const char* ip, size_t available, size_t len,
1355
+ char** op_p) {
1356
+ char* op = *op_p;
1357
+ const size_t space_left = op_limit_ - op;
1358
+ if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1359
+ // Fast path, used for the majority (about 95%) of invocations.
1360
+ UnalignedCopy128(ip, op);
1361
+ *op_p = op + len;
1362
+ return true;
1363
+ } else {
1364
+ return false;
1365
+ }
1366
+ }
1367
+
1368
+ SNAPPY_ATTRIBUTE_ALWAYS_INLINE
1369
+ inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
1370
+ char* const op = *op_p;
1371
+ assert(op >= base_);
1372
+ char* const op_end = op + len;
1373
+
1374
+ // Check if we try to append from before the start of the buffer.
1375
+ if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - base_) < offset))
1376
+ return false;
1377
+
1378
+ if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
1379
+ op >= op_limit_min_slop_ || offset < len)) {
1380
+ if (op_end > op_limit_ || offset == 0) return false;
1381
+ *op_p = IncrementalCopy(op - offset, op, op_end, op_limit_);
1382
+ return true;
1383
+ }
1384
+ std::memmove(op, op - offset, kSlopBytes);
1385
+ *op_p = op_end;
1386
+ return true;
1387
+ }
1388
+ inline size_t Produced() const {
1389
+ assert(op_ >= base_);
1390
+ return op_ - base_;
1391
+ }
1392
+ inline void Flush() {}
1393
+ };
1394
+
1395
+ bool RawUncompress(const char* compressed, size_t compressed_length,
1396
+ char* uncompressed) {
1397
+ ByteArraySource reader(compressed, compressed_length);
1398
+ return RawUncompress(&reader, uncompressed);
1399
+ }
1400
+
1401
+ bool RawUncompress(Source* compressed, char* uncompressed) {
1402
+ SnappyArrayWriter output(uncompressed);
1403
+ return InternalUncompress(compressed, &output);
1404
+ }
1405
+
1406
+ bool Uncompress(const char* compressed, size_t compressed_length,
1407
+ std::string* uncompressed) {
1408
+ size_t ulength;
1409
+ if (!GetUncompressedLength(compressed, compressed_length, &ulength)) {
1410
+ return false;
1411
+ }
1412
+ // On 32-bit builds: max_size() < kuint32max. Check for that instead
1413
+ // of crashing (e.g., consider externally specified compressed data).
1414
+ if (ulength > uncompressed->max_size()) {
1415
+ return false;
1416
+ }
1417
+ STLStringResizeUninitialized(uncompressed, ulength);
1418
+ return RawUncompress(compressed, compressed_length,
1419
+ string_as_array(uncompressed));
1420
+ }
1421
+
1422
+ // A Writer that drops everything on the floor and just does validation
1423
+ class SnappyDecompressionValidator {
1424
+ private:
1425
+ size_t expected_;
1426
+ size_t produced_;
1427
+
1428
+ public:
1429
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1430
+ inline void SetExpectedLength(size_t len) {
1431
+ expected_ = len;
1432
+ }
1433
+ size_t GetOutputPtr() { return produced_; }
1434
+ void SetOutputPtr(size_t op) { produced_ = op; }
1435
+ inline bool CheckLength() const {
1436
+ return expected_ == produced_;
1437
+ }
1438
+ inline bool Append(const char* ip, size_t len, size_t* produced) {
1439
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
1440
+ (void)ip;
1441
+
1442
+ *produced += len;
1443
+ return *produced <= expected_;
1444
+ }
1445
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length,
1446
+ size_t* produced) {
1447
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
1448
+ (void)ip;
1449
+ (void)available;
1450
+ (void)length;
1451
+ (void)produced;
1452
+
1453
+ return false;
1454
+ }
1455
+ inline bool AppendFromSelf(size_t offset, size_t len, size_t* produced) {
1456
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1457
+ // the "offset - 1u" trick.
1458
+ if (*produced <= offset - 1u) return false;
1459
+ *produced += len;
1460
+ return *produced <= expected_;
1461
+ }
1462
+ inline void Flush() {}
1463
+ };
1464
+
1465
+ bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length) {
1466
+ ByteArraySource reader(compressed, compressed_length);
1467
+ SnappyDecompressionValidator writer;
1468
+ return InternalUncompress(&reader, &writer);
1469
+ }
1470
+
1471
+ bool IsValidCompressed(Source* compressed) {
1472
+ SnappyDecompressionValidator writer;
1473
+ return InternalUncompress(compressed, &writer);
1474
+ }
1475
+
1476
+ void RawCompress(const char* input,
1477
+ size_t input_length,
1478
+ char* compressed,
1479
+ size_t* compressed_length) {
1480
+ ByteArraySource reader(input, input_length);
1481
+ UncheckedByteArraySink writer(compressed);
1482
+ Compress(&reader, &writer);
1483
+
1484
+ // Compute how many bytes were added
1485
+ *compressed_length = (writer.CurrentDestination() - compressed);
1486
+ }
1487
+
1488
+ size_t Compress(const char* input, size_t input_length,
1489
+ std::string* compressed) {
1490
+ // Pre-grow the buffer to the max length of the compressed output
1491
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1492
+
1493
+ size_t compressed_length;
1494
+ RawCompress(input, input_length, string_as_array(compressed),
1495
+ &compressed_length);
1496
+ compressed->resize(compressed_length);
1497
+ return compressed_length;
1498
+ }
1499
+
1500
+ // -----------------------------------------------------------------------
1501
+ // Sink interface
1502
+ // -----------------------------------------------------------------------
1503
+
1504
+ // A type that decompresses into a Sink. The template parameter
1505
+ // Allocator must export one method "char* Allocate(int size);", which
1506
+ // allocates a buffer of "size" and appends that to the destination.
1507
+ template <typename Allocator>
1508
+ class SnappyScatteredWriter {
1509
+ Allocator allocator_;
1510
+
1511
+ // We need random access into the data generated so far. Therefore
1512
+ // we keep track of all of the generated data as an array of blocks.
1513
+ // All of the blocks except the last have length kBlockSize.
1514
+ std::vector<char*> blocks_;
1515
+ size_t expected_;
1516
+
1517
+ // Total size of all fully generated blocks so far
1518
+ size_t full_size_;
1519
+
1520
+ // Pointer into current output block
1521
+ char* op_base_; // Base of output block
1522
+ char* op_ptr_; // Pointer to next unfilled byte in block
1523
+ char* op_limit_; // Pointer just past block
1524
+ // If op < op_limit_min_slop_ then it's safe to unconditionally write
1525
+ // kSlopBytes starting at op.
1526
+ char* op_limit_min_slop_;
1527
+
1528
+ inline size_t Size() const {
1529
+ return full_size_ + (op_ptr_ - op_base_);
1530
+ }
1531
+
1532
+ bool SlowAppend(const char* ip, size_t len);
1533
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1534
+
1535
+ public:
1536
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1537
+ : allocator_(allocator),
1538
+ full_size_(0),
1539
+ op_base_(NULL),
1540
+ op_ptr_(NULL),
1541
+ op_limit_(NULL) {
1542
+ }
1543
+ char* GetOutputPtr() { return op_ptr_; }
1544
+ void SetOutputPtr(char* op) { op_ptr_ = op; }
1545
+
1546
+ inline void SetExpectedLength(size_t len) {
1547
+ assert(blocks_.empty());
1548
+ expected_ = len;
1549
+ }
1550
+
1551
+ inline bool CheckLength() const {
1552
+ return Size() == expected_;
1553
+ }
1554
+
1555
+ // Return the number of bytes actually uncompressed so far
1556
+ inline size_t Produced() const {
1557
+ return Size();
1558
+ }
1559
+
1560
+ inline bool Append(const char* ip, size_t len, char** op_p) {
1561
+ char* op = *op_p;
1562
+ size_t avail = op_limit_ - op;
1563
+ if (len <= avail) {
1564
+ // Fast path
1565
+ std::memcpy(op, ip, len);
1566
+ *op_p = op + len;
1567
+ return true;
1568
+ } else {
1569
+ op_ptr_ = op;
1570
+ bool res = SlowAppend(ip, len);
1571
+ *op_p = op_ptr_;
1572
+ return res;
1573
+ }
1574
+ }
1575
+
1576
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length,
1577
+ char** op_p) {
1578
+ char* op = *op_p;
1579
+ const int space_left = op_limit_ - op;
1580
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1581
+ space_left >= 16) {
1582
+ // Fast path, used for the majority (about 95%) of invocations.
1583
+ UnalignedCopy128(ip, op);
1584
+ *op_p = op + length;
1585
+ return true;
1586
+ } else {
1587
+ return false;
1588
+ }
1589
+ }
1590
+
1591
+ inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
1592
+ char* op = *op_p;
1593
+ assert(op >= op_base_);
1594
+ // Check if we try to append from before the start of the buffer.
1595
+ if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
1596
+ static_cast<size_t>(op - op_base_) < offset ||
1597
+ op >= op_limit_min_slop_ || offset < len)) {
1598
+ if (offset == 0) return false;
1599
+ char* const op_end = op + len;
1600
+ if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - op_base_) < offset ||
1601
+ op_end > op_limit_)) {
1602
+ op_ptr_ = op;
1603
+ bool res = SlowAppendFromSelf(offset, len);
1604
+ *op_p = op_ptr_;
1605
+ return res;
1606
+ }
1607
+ *op_p = IncrementalCopy(op - offset, op, op_end, op_limit_);
1608
+ return true;
1609
+ }
1610
+ // Fast path
1611
+ char* const op_end = op + len;
1612
+ std::memmove(op, op - offset, kSlopBytes);
1613
+ *op_p = op_end;
1614
+ return true;
1615
+ }
1616
+
1617
+ // Called at the end of the decompress. We ask the allocator
1618
+ // write all blocks to the sink.
1619
+ inline void Flush() { allocator_.Flush(Produced()); }
1620
+ };
1621
+
1622
+ template<typename Allocator>
1623
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1624
+ size_t avail = op_limit_ - op_ptr_;
1625
+ while (len > avail) {
1626
+ // Completely fill this block
1627
+ std::memcpy(op_ptr_, ip, avail);
1628
+ op_ptr_ += avail;
1629
+ assert(op_limit_ - op_ptr_ == 0);
1630
+ full_size_ += (op_ptr_ - op_base_);
1631
+ len -= avail;
1632
+ ip += avail;
1633
+
1634
+ // Bounds check
1635
+ if (full_size_ + len > expected_) return false;
1636
+
1637
+ // Make new block
1638
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1639
+ op_base_ = allocator_.Allocate(bsize);
1640
+ op_ptr_ = op_base_;
1641
+ op_limit_ = op_base_ + bsize;
1642
+ op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, bsize);
1643
+
1644
+ blocks_.push_back(op_base_);
1645
+ avail = bsize;
1646
+ }
1647
+
1648
+ std::memcpy(op_ptr_, ip, len);
1649
+ op_ptr_ += len;
1650
+ return true;
1651
+ }
1652
+
1653
+ template<typename Allocator>
1654
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1655
+ size_t len) {
1656
+ // Overflow check
1657
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1658
+ // the "offset - 1u" trick.
1659
+ const size_t cur = Size();
1660
+ if (offset - 1u >= cur) return false;
1661
+ if (expected_ - cur < len) return false;
1662
+
1663
+ // Currently we shouldn't ever hit this path because Compress() chops the
1664
+ // input into blocks and does not create cross-block copies. However, it is
1665
+ // nice if we do not rely on that, since we can get better compression if we
1666
+ // allow cross-block copies and thus might want to change the compressor in
1667
+ // the future.
1668
+ // TODO Replace this with a properly optimized path. This is not
1669
+ // triggered right now. But this is so super slow, that it would regress
1670
+ // performance unacceptably if triggered.
1671
+ size_t src = cur - offset;
1672
+ char* op = op_ptr_;
1673
+ while (len-- > 0) {
1674
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1675
+ if (!Append(&c, 1, &op)) {
1676
+ op_ptr_ = op;
1677
+ return false;
1678
+ }
1679
+ src++;
1680
+ }
1681
+ op_ptr_ = op;
1682
+ return true;
1683
+ }
1684
+
1685
+ class SnappySinkAllocator {
1686
+ public:
1687
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1688
+ ~SnappySinkAllocator() {}
1689
+
1690
+ char* Allocate(int size) {
1691
+ Datablock block(new char[size], size);
1692
+ blocks_.push_back(block);
1693
+ return block.data;
1694
+ }
1695
+
1696
+ // We flush only at the end, because the writer wants
1697
+ // random access to the blocks and once we hand the
1698
+ // block over to the sink, we can't access it anymore.
1699
+ // Also we don't write more than has been actually written
1700
+ // to the blocks.
1701
+ void Flush(size_t size) {
1702
+ size_t size_written = 0;
1703
+ for (Datablock& block : blocks_) {
1704
+ size_t block_size = std::min<size_t>(block.size, size - size_written);
1705
+ dest_->AppendAndTakeOwnership(block.data, block_size,
1706
+ &SnappySinkAllocator::Deleter, NULL);
1707
+ size_written += block_size;
1708
+ }
1709
+ blocks_.clear();
1710
+ }
1711
+
1712
+ private:
1713
+ struct Datablock {
1714
+ char* data;
1715
+ size_t size;
1716
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1717
+ };
1718
+
1719
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1720
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
1721
+ (void)arg;
1722
+ (void)size;
1723
+
1724
+ delete[] bytes;
1725
+ }
1726
+
1727
+ Sink* dest_;
1728
+ std::vector<Datablock> blocks_;
1729
+
1730
+ // Note: copying this object is allowed
1731
+ };
1732
+
1733
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1734
+ SnappySinkAllocator allocator(uncompressed);
1735
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1736
+ InternalUncompress(compressed, &writer);
1737
+ return writer.Produced();
1738
+ }
1739
+
1740
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1741
+ // Read the uncompressed length from the front of the compressed input
1742
+ SnappyDecompressor decompressor(compressed);
1743
+ uint32_t uncompressed_len = 0;
1744
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1745
+ return false;
1746
+ }
1747
+
1748
+ char c;
1749
+ size_t allocated_size;
1750
+ char* buf = uncompressed->GetAppendBufferVariable(
1751
+ 1, uncompressed_len, &c, 1, &allocated_size);
1752
+
1753
+ const size_t compressed_len = compressed->Available();
1754
+ // If we can get a flat buffer, then use it, otherwise do block by block
1755
+ // uncompression
1756
+ if (allocated_size >= uncompressed_len) {
1757
+ SnappyArrayWriter writer(buf);
1758
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1759
+ compressed_len, uncompressed_len);
1760
+ uncompressed->Append(buf, writer.Produced());
1761
+ return result;
1762
+ } else {
1763
+ SnappySinkAllocator allocator(uncompressed);
1764
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1765
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1766
+ uncompressed_len);
1767
+ }
1768
+ }
1769
+
1770
+ } // namespace snappy