snappy 0.0.17 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +2 -0
  3. data/.github/workflows/main.yml +34 -0
  4. data/.github/workflows/publish.yml +34 -0
  5. data/.gitignore +2 -1
  6. data/.gitmodules +1 -1
  7. data/Dockerfile +13 -0
  8. data/Gemfile +4 -0
  9. data/README.md +29 -5
  10. data/Rakefile +32 -29
  11. data/ext/api.c +6 -1
  12. data/ext/extconf.rb +23 -16
  13. data/lib/snappy/hadoop/reader.rb +62 -0
  14. data/lib/snappy/hadoop/writer.rb +51 -0
  15. data/lib/snappy/hadoop.rb +22 -0
  16. data/lib/snappy/reader.rb +14 -10
  17. data/lib/snappy/shim.rb +1 -1
  18. data/lib/snappy/version.rb +1 -1
  19. data/lib/snappy.rb +5 -4
  20. data/snappy.gemspec +13 -13
  21. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  22. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  23. data/test/snappy_hadoop_test.rb +26 -0
  24. data/test/snappy_reader_test.rb +148 -0
  25. data/test/snappy_test.rb +95 -0
  26. data/test/snappy_writer_test.rb +55 -0
  27. data/test/test_helper.rb +7 -0
  28. data/test.sh +3 -0
  29. data/vendor/snappy/CMakeLists.txt +297 -0
  30. data/vendor/snappy/CONTRIBUTING.md +26 -0
  31. data/vendor/snappy/NEWS +40 -0
  32. data/vendor/snappy/{README → README.md} +27 -18
  33. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  34. data/vendor/snappy/cmake/config.h.in +62 -0
  35. data/vendor/snappy/docs/README.md +72 -0
  36. data/vendor/snappy/snappy-internal.h +22 -18
  37. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  38. data/vendor/snappy/snappy-stubs-internal.h +116 -38
  39. data/vendor/snappy/snappy-stubs-public.h.in +20 -46
  40. data/vendor/snappy/snappy-test.cc +26 -22
  41. data/vendor/snappy/snappy-test.h +24 -98
  42. data/vendor/snappy/snappy.cc +380 -183
  43. data/vendor/snappy/snappy.h +14 -10
  44. data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
  45. data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
  46. data/vendor/snappy/snappy_unittest.cc +236 -261
  47. metadata +37 -92
  48. data/.travis.yml +0 -26
  49. data/smoke.sh +0 -8
  50. data/test/test-snappy-reader.rb +0 -129
  51. data/test/test-snappy-writer.rb +0 -55
  52. data/test/test-snappy.rb +0 -58
  53. data/vendor/snappy/ChangeLog +0 -2468
  54. data/vendor/snappy/INSTALL +0 -370
  55. data/vendor/snappy/Makefile +0 -982
  56. data/vendor/snappy/Makefile.am +0 -26
  57. data/vendor/snappy/Makefile.in +0 -982
  58. data/vendor/snappy/aclocal.m4 +0 -9738
  59. data/vendor/snappy/autogen.sh +0 -12
  60. data/vendor/snappy/autom4te.cache/output.0 +0 -18856
  61. data/vendor/snappy/autom4te.cache/output.1 +0 -18852
  62. data/vendor/snappy/autom4te.cache/requests +0 -297
  63. data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
  64. data/vendor/snappy/autom4te.cache/traces.1 +0 -714
  65. data/vendor/snappy/config.guess +0 -1530
  66. data/vendor/snappy/config.h +0 -135
  67. data/vendor/snappy/config.h.in +0 -134
  68. data/vendor/snappy/config.log +0 -1640
  69. data/vendor/snappy/config.status +0 -2318
  70. data/vendor/snappy/config.sub +0 -1773
  71. data/vendor/snappy/configure +0 -18852
  72. data/vendor/snappy/configure.ac +0 -134
  73. data/vendor/snappy/depcomp +0 -688
  74. data/vendor/snappy/install-sh +0 -527
  75. data/vendor/snappy/libtool +0 -10246
  76. data/vendor/snappy/ltmain.sh +0 -9661
  77. data/vendor/snappy/m4/gtest.m4 +0 -74
  78. data/vendor/snappy/m4/libtool.m4 +0 -8001
  79. data/vendor/snappy/m4/ltoptions.m4 +0 -384
  80. data/vendor/snappy/m4/ltsugar.m4 +0 -123
  81. data/vendor/snappy/m4/ltversion.m4 +0 -23
  82. data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
  83. data/vendor/snappy/missing +0 -331
  84. data/vendor/snappy/snappy-stubs-public.h +0 -100
  85. data/vendor/snappy/snappy.pc +0 -10
  86. data/vendor/snappy/snappy.pc.in +0 -10
  87. data/vendor/snappy/stamp-h1 +0 -1
@@ -30,16 +30,50 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
- #if defined(__x86_64__) || defined(_M_X64)
34
- #include <emmintrin.h>
33
+ #if !defined(SNAPPY_HAVE_SSSE3)
34
+ // __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
35
+ // support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
36
+ // defines __AVX__ when AVX support is available.
37
+ #if defined(__SSSE3__) || defined(__AVX__)
38
+ #define SNAPPY_HAVE_SSSE3 1
39
+ #else
40
+ #define SNAPPY_HAVE_SSSE3 0
41
+ #endif
42
+ #endif // !defined(SNAPPY_HAVE_SSSE3)
43
+
44
+ #if !defined(SNAPPY_HAVE_BMI2)
45
+ // __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
46
+ // specifically, but it does define __AVX2__ when AVX2 support is available.
47
+ // Fortunately, AVX2 was introduced in Haswell, just like BMI2.
48
+ //
49
+ // BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
50
+ // GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
51
+ // case issuing BMI2 instructions results in a compiler error.
52
+ #if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
53
+ #define SNAPPY_HAVE_BMI2 1
54
+ #else
55
+ #define SNAPPY_HAVE_BMI2 0
35
56
  #endif
57
+ #endif // !defined(SNAPPY_HAVE_BMI2)
58
+
59
+ #if SNAPPY_HAVE_SSSE3
60
+ // Please do not replace with <x86intrin.h>. or with headers that assume more
61
+ // advanced SSE versions without checking with all the OWNERS.
62
+ #include <tmmintrin.h>
63
+ #endif
64
+
65
+ #if SNAPPY_HAVE_BMI2
66
+ // Please do not replace with <x86intrin.h>. or with headers that assume more
67
+ // advanced SSE versions without checking with all the OWNERS.
68
+ #include <immintrin.h>
69
+ #endif
70
+
36
71
  #include <stdio.h>
37
72
 
38
73
  #include <algorithm>
39
74
  #include <string>
40
75
  #include <vector>
41
76
 
42
-
43
77
  namespace snappy {
44
78
 
45
79
  using internal::COPY_1_BYTE_OFFSET;
@@ -47,7 +81,6 @@ using internal::COPY_2_BYTE_OFFSET;
47
81
  using internal::LITERAL;
48
82
  using internal::char_table;
49
83
  using internal::kMaximumTagLength;
50
- using internal::wordmask;
51
84
 
52
85
  // Any hash function will produce a valid compressed bitstream, but a good
53
86
  // hash function reduces the number of collisions and thus yields better
@@ -89,18 +122,18 @@ size_t MaxCompressedLength(size_t source_len) {
89
122
  namespace {
90
123
 
91
124
  void UnalignedCopy64(const void* src, void* dst) {
92
- memcpy(dst, src, 8);
125
+ char tmp[8];
126
+ memcpy(tmp, src, 8);
127
+ memcpy(dst, tmp, 8);
93
128
  }
94
129
 
95
130
  void UnalignedCopy128(const void* src, void* dst) {
96
- // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
97
- // SSE2 moves for memcpy(dst, src, 16).
98
- #ifdef __SSE2__
99
- __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
100
- _mm_storeu_si128(static_cast<__m128i*>(dst), x);
101
- #else
102
- memcpy(dst, src, 16);
103
- #endif
131
+ // memcpy gets vectorized when the appropriate compiler options are used.
132
+ // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load
133
+ // and store.
134
+ char tmp[16];
135
+ memcpy(tmp, src, 16);
136
+ memcpy(dst, tmp, 16);
104
137
  }
105
138
 
106
139
  // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
@@ -115,12 +148,35 @@ void UnalignedCopy128(const void* src, void* dst) {
115
148
  // Note that this does not match the semantics of either memcpy() or memmove().
116
149
  inline char* IncrementalCopySlow(const char* src, char* op,
117
150
  char* const op_limit) {
151
+ // TODO: Remove pragma when LLVM is aware this
152
+ // function is only called in cold regions and when cold regions don't get
153
+ // vectorized or unrolled.
154
+ #ifdef __clang__
155
+ #pragma clang loop unroll(disable)
156
+ #endif
118
157
  while (op < op_limit) {
119
158
  *op++ = *src++;
120
159
  }
121
160
  return op_limit;
122
161
  }
123
162
 
163
+ #if SNAPPY_HAVE_SSSE3
164
+
165
+ // This is a table of shuffle control masks that can be used as the source
166
+ // operand for PSHUFB to permute the contents of the destination XMM register
167
+ // into a repeating byte pattern.
168
+ alignas(16) const char pshufb_fill_patterns[7][16] = {
169
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
170
+ {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
171
+ {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
172
+ {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
173
+ {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0},
174
+ {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3},
175
+ {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1},
176
+ };
177
+
178
+ #endif // SNAPPY_HAVE_SSSE3
179
+
124
180
  // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
125
181
  // IncrementalCopySlow. buf_limit is the address past the end of the writable
126
182
  // region of the buffer.
@@ -132,9 +188,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
132
188
  // pat = op - src
133
189
  // len = limit - op
134
190
  assert(src < op);
191
+ assert(op <= op_limit);
135
192
  assert(op_limit <= buf_limit);
136
193
  // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
137
- // to optimize this function but we have to also handle these cases in case
194
+ // to optimize this function but we have to also handle other cases in case
138
195
  // the input does not satisfy these conditions.
139
196
 
140
197
  size_t pattern_size = op - src;
@@ -163,26 +220,56 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
163
220
  // copying 2x 8 bytes at a time.
164
221
 
165
222
  // Handle the uncommon case where pattern is less than 8 bytes.
166
- if (PREDICT_FALSE(pattern_size < 8)) {
167
- // Expand pattern to at least 8 bytes. The worse case scenario in terms of
168
- // buffer usage is when the pattern is size 3. ^ is the original position
169
- // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
223
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
224
+ #if SNAPPY_HAVE_SSSE3
225
+ // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
226
+ // to permute the register's contents in-place into a repeating sequence of
227
+ // the first "pattern_size" bytes.
228
+ // For example, suppose:
229
+ // src == "abc"
230
+ // op == op + 3
231
+ // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc"
232
+ // followed by one byte of slop: abcabcabcabcabca.
170
233
  //
171
- // abc
172
- // abcabcxxxxx
173
- // abcabcabcabcxxxxx
174
- // ^
175
- // The last x is 14 bytes after ^.
176
- if (PREDICT_TRUE(op <= buf_limit - 14)) {
234
+ // The non-SSE fallback implementation suffers from store-forwarding stalls
235
+ // because its loads and stores partly overlap. By expanding the pattern
236
+ // in-place, we avoid the penalty.
237
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) {
238
+ const __m128i shuffle_mask = _mm_load_si128(
239
+ reinterpret_cast<const __m128i*>(pshufb_fill_patterns)
240
+ + pattern_size - 1);
241
+ const __m128i pattern = _mm_shuffle_epi8(
242
+ _mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)), shuffle_mask);
243
+ // Uninitialized bytes are masked out by the shuffle mask.
244
+ // TODO: remove annotation and macro defs once MSan is fixed.
245
+ SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern));
246
+ pattern_size *= 16 / pattern_size;
247
+ char* op_end = std::min(op_limit, buf_limit - 15);
248
+ while (op < op_end) {
249
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern);
250
+ op += pattern_size;
251
+ }
252
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
253
+ }
254
+ return IncrementalCopySlow(src, op, op_limit);
255
+ #else // !SNAPPY_HAVE_SSSE3
256
+ // If plenty of buffer space remains, expand the pattern to at least 8
257
+ // bytes. The way the following loop is written, we need 8 bytes of buffer
258
+ // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
259
+ // bytes if pattern_size is 2. Precisely encoding that is probably not
260
+ // worthwhile; instead, invoke the slow path if we cannot write 11 bytes
261
+ // (because 11 are required in the worst case).
262
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
177
263
  while (pattern_size < 8) {
178
264
  UnalignedCopy64(src, op);
179
265
  op += pattern_size;
180
266
  pattern_size *= 2;
181
267
  }
182
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
268
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
183
269
  } else {
184
270
  return IncrementalCopySlow(src, op, op_limit);
185
271
  }
272
+ #endif // SNAPPY_HAVE_SSSE3
186
273
  }
187
274
  assert(pattern_size >= 8);
188
275
 
@@ -190,16 +277,51 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
190
277
  // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
191
278
  // because expanding the pattern to at least 8 bytes guarantees that
192
279
  // op - src >= 8.
193
- while (op <= buf_limit - 16) {
280
+ //
281
+ // Typically, the op_limit is the gating factor so try to simplify the loop
282
+ // based on that.
283
+ if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) {
284
+ // There is at least one, and at most four 16-byte blocks. Writing four
285
+ // conditionals instead of a loop allows FDO to layout the code with respect
286
+ // to the actual probabilities of each length.
287
+ // TODO: Replace with loop with trip count hint.
288
+ UnalignedCopy64(src, op);
289
+ UnalignedCopy64(src + 8, op + 8);
290
+
291
+ if (op + 16 < op_limit) {
292
+ UnalignedCopy64(src + 16, op + 16);
293
+ UnalignedCopy64(src + 24, op + 24);
294
+ }
295
+ if (op + 32 < op_limit) {
296
+ UnalignedCopy64(src + 32, op + 32);
297
+ UnalignedCopy64(src + 40, op + 40);
298
+ }
299
+ if (op + 48 < op_limit) {
300
+ UnalignedCopy64(src + 48, op + 48);
301
+ UnalignedCopy64(src + 56, op + 56);
302
+ }
303
+ return op_limit;
304
+ }
305
+
306
+ // Fall back to doing as much as we can with the available slop in the
307
+ // buffer. This code path is relatively cold however so we save code size by
308
+ // avoiding unrolling and vectorizing.
309
+ //
310
+ // TODO: Remove pragma when when cold regions don't get vectorized
311
+ // or unrolled.
312
+ #ifdef __clang__
313
+ #pragma clang loop unroll(disable)
314
+ #endif
315
+ for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
194
316
  UnalignedCopy64(src, op);
195
317
  UnalignedCopy64(src + 8, op + 8);
196
- src += 16;
197
- op += 16;
198
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
199
318
  }
319
+ if (op >= op_limit)
320
+ return op_limit;
321
+
200
322
  // We only take this branch if we didn't have enough slop and we can do a
201
323
  // single 8 byte copy.
202
- if (PREDICT_FALSE(op <= buf_limit - 8)) {
324
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
203
325
  UnalignedCopy64(src, op);
204
326
  src += 8;
205
327
  op += 8;
@@ -209,10 +331,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
209
331
 
210
332
  } // namespace
211
333
 
334
+ template <bool allow_fast_path>
212
335
  static inline char* EmitLiteral(char* op,
213
336
  const char* literal,
214
- int len,
215
- bool allow_fast_path) {
337
+ int len) {
216
338
  // The vast majority of copies are below 16 bytes, for which a
217
339
  // call to memcpy is overkill. This fast path can sometimes
218
340
  // copy up to 15 bytes too much, but that is okay in the
@@ -237,31 +359,29 @@ static inline char* EmitLiteral(char* op,
237
359
  // Fits in tag byte
238
360
  *op++ = LITERAL | (n << 2);
239
361
  } else {
240
- // Encode in upcoming bytes
241
- char* base = op;
242
- int count = 0;
243
- op++;
244
- while (n > 0) {
245
- *op++ = n & 0xff;
246
- n >>= 8;
247
- count++;
248
- }
362
+ int count = (Bits::Log2Floor(n) >> 3) + 1;
249
363
  assert(count >= 1);
250
364
  assert(count <= 4);
251
- *base = LITERAL | ((59+count) << 2);
365
+ *op++ = LITERAL | ((59 + count) << 2);
366
+ // Encode in upcoming bytes.
367
+ // Write 4 bytes, though we may care about only 1 of them. The output buffer
368
+ // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
369
+ // here and there is a memcpy of size 'len' below.
370
+ LittleEndian::Store32(op, n);
371
+ op += count;
252
372
  }
253
373
  memcpy(op, literal, len);
254
374
  return op + len;
255
375
  }
256
376
 
257
- static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
258
- bool len_less_than_12) {
377
+ template <bool len_less_than_12>
378
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
259
379
  assert(len <= 64);
260
380
  assert(len >= 4);
261
381
  assert(offset < 65536);
262
382
  assert(len_less_than_12 == (len < 12));
263
383
 
264
- if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
384
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
265
385
  // offset fits in 11 bits. The 3 highest go in the top of the first byte,
266
386
  // and the rest go in the second byte.
267
387
  *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
@@ -276,29 +396,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
276
396
  return op;
277
397
  }
278
398
 
279
- static inline char* EmitCopy(char* op, size_t offset, size_t len,
280
- bool len_less_than_12) {
399
+ template <bool len_less_than_12>
400
+ static inline char* EmitCopy(char* op, size_t offset, size_t len) {
281
401
  assert(len_less_than_12 == (len < 12));
282
402
  if (len_less_than_12) {
283
- return EmitCopyAtMost64(op, offset, len, true);
403
+ return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
284
404
  } else {
285
405
  // A special case for len <= 64 might help, but so far measurements suggest
286
406
  // it's in the noise.
287
407
 
288
408
  // Emit 64 byte copies but make sure to keep at least four bytes reserved.
289
- while (PREDICT_FALSE(len >= 68)) {
290
- op = EmitCopyAtMost64(op, offset, 64, false);
409
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
410
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
291
411
  len -= 64;
292
412
  }
293
413
 
294
414
  // One or two copies will now finish the job.
295
415
  if (len > 64) {
296
- op = EmitCopyAtMost64(op, offset, 60, false);
416
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
297
417
  len -= 60;
298
418
  }
299
419
 
300
420
  // Emit remainder.
301
- op = EmitCopyAtMost64(op, offset, len, len < 12);
421
+ if (len < 12) {
422
+ op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
423
+ } else {
424
+ op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
425
+ }
302
426
  return op;
303
427
  }
304
428
  }
@@ -314,31 +438,45 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
314
438
  }
315
439
  }
316
440
 
317
- namespace internal {
318
- uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
319
- // Use smaller hash table when input.size() is smaller, since we
320
- // fill the table, incurring O(hash table size) overhead for
321
- // compression, and if the input is short, we won't need that
322
- // many hash table entries anyway.
323
- assert(kMaxHashTableSize >= 256);
324
- size_t htsize = 256;
325
- while (htsize < kMaxHashTableSize && htsize < input_size) {
326
- htsize <<= 1;
441
+ namespace {
442
+ uint32 CalculateTableSize(uint32 input_size) {
443
+ static_assert(
444
+ kMaxHashTableSize >= kMinHashTableSize,
445
+ "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
446
+ if (input_size > kMaxHashTableSize) {
447
+ return kMaxHashTableSize;
327
448
  }
328
-
329
- uint16* table;
330
- if (htsize <= ARRAYSIZE(small_table_)) {
331
- table = small_table_;
332
- } else {
333
- if (large_table_ == NULL) {
334
- large_table_ = new uint16[kMaxHashTableSize];
335
- }
336
- table = large_table_;
449
+ if (input_size < kMinHashTableSize) {
450
+ return kMinHashTableSize;
337
451
  }
452
+ // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
453
+ // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
454
+ return 2u << Bits::Log2Floor(input_size - 1);
455
+ }
456
+ } // namespace
457
+
458
+ namespace internal {
459
+ WorkingMemory::WorkingMemory(size_t input_size) {
460
+ const size_t max_fragment_size = std::min(input_size, kBlockSize);
461
+ const size_t table_size = CalculateTableSize(max_fragment_size);
462
+ size_ = table_size * sizeof(*table_) + max_fragment_size +
463
+ MaxCompressedLength(max_fragment_size);
464
+ mem_ = std::allocator<char>().allocate(size_);
465
+ table_ = reinterpret_cast<uint16*>(mem_);
466
+ input_ = mem_ + table_size * sizeof(*table_);
467
+ output_ = input_ + max_fragment_size;
468
+ }
469
+
470
+ WorkingMemory::~WorkingMemory() {
471
+ std::allocator<char>().deallocate(mem_, size_);
472
+ }
338
473
 
474
+ uint16* WorkingMemory::GetHashTable(size_t fragment_size,
475
+ int* table_size) const {
476
+ const size_t htsize = CalculateTableSize(fragment_size);
477
+ memset(table_, 0, htsize * sizeof(*table_));
339
478
  *table_size = htsize;
340
- memset(table, 0, htsize * sizeof(*table));
341
- return table;
479
+ return table_;
342
480
  }
343
481
  } // end namespace internal
344
482
 
@@ -405,7 +543,7 @@ char* CompressFragment(const char* input,
405
543
  // "ip" is the input pointer, and "op" is the output pointer.
406
544
  const char* ip = input;
407
545
  assert(input_size <= kBlockSize);
408
- assert((table_size & (table_size - 1)) == 0); // table must be power of two
546
+ assert((table_size & (table_size - 1)) == 0); // table must be power of two
409
547
  const int shift = 32 - Bits::Log2Floor(table_size);
410
548
  assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
411
549
  const char* ip_end = input + input_size;
@@ -415,7 +553,7 @@ char* CompressFragment(const char* input,
415
553
  const char* next_emit = ip;
416
554
 
417
555
  const size_t kInputMarginBytes = 15;
418
- if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
556
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
419
557
  const char* ip_limit = input + input_size - kInputMarginBytes;
420
558
 
421
559
  for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -456,7 +594,7 @@ char* CompressFragment(const char* input,
456
594
  uint32 bytes_between_hash_lookups = skip >> 5;
457
595
  skip += bytes_between_hash_lookups;
458
596
  next_ip = ip + bytes_between_hash_lookups;
459
- if (PREDICT_FALSE(next_ip > ip_limit)) {
597
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
460
598
  goto emit_remainder;
461
599
  }
462
600
  next_hash = Hash(next_ip, shift);
@@ -465,14 +603,14 @@ char* CompressFragment(const char* input,
465
603
  assert(candidate < ip);
466
604
 
467
605
  table[hash] = ip - base_ip;
468
- } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
469
- UNALIGNED_LOAD32(candidate)));
606
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
607
+ UNALIGNED_LOAD32(candidate)));
470
608
 
471
609
  // Step 2: A 4-byte match has been found. We'll later see if more
472
610
  // than 4 bytes match. But, prior to the match, input
473
611
  // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
474
612
  assert(next_emit + 16 <= ip_end);
475
- op = EmitLiteral(op, next_emit, ip - next_emit, true);
613
+ op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
476
614
 
477
615
  // Step 3: Call EmitCopy, and then see if another EmitCopy could
478
616
  // be our next move. Repeat until we find no match for the
@@ -495,9 +633,13 @@ char* CompressFragment(const char* input,
495
633
  ip += matched;
496
634
  size_t offset = base - candidate;
497
635
  assert(0 == memcmp(base, candidate, matched));
498
- op = EmitCopy(op, offset, matched, p.second);
636
+ if (p.second) {
637
+ op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
638
+ } else {
639
+ op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
640
+ }
499
641
  next_emit = ip;
500
- if (PREDICT_FALSE(ip >= ip_limit)) {
642
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
501
643
  goto emit_remainder;
502
644
  }
503
645
  // We are now looking for a 4-byte match again. We read
@@ -520,13 +662,18 @@ char* CompressFragment(const char* input,
520
662
  emit_remainder:
521
663
  // Emit the remaining bytes as a literal
522
664
  if (next_emit < ip_end) {
523
- op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
665
+ op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit,
666
+ ip_end - next_emit);
524
667
  }
525
668
 
526
669
  return op;
527
670
  }
528
671
  } // end namespace internal
529
672
 
673
+ // Called back at avery compression call to trace parameters and sizes.
674
+ static inline void Report(const char *algorithm, size_t compressed_size,
675
+ size_t uncompressed_size) {}
676
+
530
677
  // Signature of output types needed by decompression code.
531
678
  // The decompression code is templatized on a type that obeys this
532
679
  // signature so that we do not pay virtual function call overhead in
@@ -567,6 +714,28 @@ char* CompressFragment(const char* input,
567
714
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
568
715
  // };
569
716
 
717
+ static inline uint32 ExtractLowBytes(uint32 v, int n) {
718
+ assert(n >= 0);
719
+ assert(n <= 4);
720
+ #if SNAPPY_HAVE_BMI2
721
+ return _bzhi_u32(v, 8 * n);
722
+ #else
723
+ // This needs to be wider than uint32 otherwise `mask << 32` will be
724
+ // undefined.
725
+ uint64 mask = 0xffffffff;
726
+ return v & ~(mask << (8 * n));
727
+ #endif
728
+ }
729
+
730
+ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
731
+ assert(shift < 32);
732
+ static const uint8 masks[] = {
733
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
734
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
735
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
736
+ 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
737
+ return (value & masks[shift]) != 0;
738
+ }
570
739
 
571
740
  // Helper class for decompression
572
741
  class SnappyDecompressor {
@@ -605,7 +774,7 @@ class SnappyDecompressor {
605
774
  }
606
775
 
607
776
  // Read the uncompressed length stored at the start of the compressed data.
608
- // On succcess, stores the length in *result and returns true.
777
+ // On success, stores the length in *result and returns true.
609
778
  // On failure, returns false.
610
779
  bool ReadUncompressedLength(uint32* result) {
611
780
  assert(ip_ == NULL); // Must not have read anything yet
@@ -620,7 +789,7 @@ class SnappyDecompressor {
620
789
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
621
790
  reader_->Skip(1);
622
791
  uint32 val = c & 0x7f;
623
- if (((val << shift) >> shift) != val) return false;
792
+ if (LeftShiftOverflows(static_cast<uint8>(val), shift)) return false;
624
793
  *result |= val << shift;
625
794
  if (c < 128) {
626
795
  break;
@@ -633,13 +802,27 @@ class SnappyDecompressor {
633
802
  // Process the next item found in the input.
634
803
  // Returns true if successful, false on error or end of input.
635
804
  template <class Writer>
805
+ #if defined(__GNUC__) && defined(__x86_64__)
806
+ __attribute__((aligned(32)))
807
+ #endif
636
808
  void DecompressAllTags(Writer* writer) {
637
- const char* ip = ip_;
638
- // For position-independent executables, accessing global arrays can be
639
- // slow. Move wordmask array onto the stack to mitigate this.
640
- uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
641
- memcpy(wordmask, internal::wordmask, sizeof(wordmask));
809
+ // In x86, pad the function body to start 16 bytes later. This function has
810
+ // a couple of hotspots that are highly sensitive to alignment: we have
811
+ // observed regressions by more than 20% in some metrics just by moving the
812
+ // exact same code to a different position in the benchmark binary.
813
+ //
814
+ // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit
815
+ // the "lucky" case consistently. Unfortunately, this is a very brittle
816
+ // workaround, and future differences in code generation may reintroduce
817
+ // this regression. If you experience a big, difficult to explain, benchmark
818
+ // performance regression here, first try removing this hack.
819
+ #if defined(__GNUC__) && defined(__x86_64__)
820
+ // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions.
821
+ asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
822
+ asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
823
+ #endif
642
824
 
825
+ const char* ip = ip_;
643
826
  // We could have put this refill fragment only at the beginning of the loop.
644
827
  // However, duplicating it at the end of each branch gives the compiler more
645
828
  // scope to optimize the <ip_limit_ - ip> expression based on the local
@@ -667,21 +850,22 @@ class SnappyDecompressor {
667
850
  // txt[1-4] 25% 75%
668
851
  // pb 24% 76%
669
852
  // bin 24% 76%
670
- if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
853
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
671
854
  size_t literal_length = (c >> 2) + 1u;
672
855
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
673
856
  assert(literal_length < 61);
674
857
  ip += literal_length;
675
- // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
858
+ // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
676
859
  // will not return true unless there's already at least five spare
677
860
  // bytes in addition to the literal.
678
861
  continue;
679
862
  }
680
- if (PREDICT_FALSE(literal_length >= 61)) {
863
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
681
864
  // Long literal.
682
865
  const size_t literal_length_length = literal_length - 60;
683
866
  literal_length =
684
- (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
867
+ ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
868
+ 1;
685
869
  ip += literal_length_length;
686
870
  }
687
871
 
@@ -704,7 +888,8 @@ class SnappyDecompressor {
704
888
  MAYBE_REFILL();
705
889
  } else {
706
890
  const size_t entry = char_table[c];
707
- const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
891
+ const size_t trailer =
892
+ ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11);
708
893
  const size_t length = entry & 0xff;
709
894
  ip += entry >> 11;
710
895
 
@@ -757,7 +942,7 @@ bool SnappyDecompressor::RefillTag() {
757
942
  size_t length;
758
943
  const char* src = reader_->Peek(&length);
759
944
  if (length == 0) return false;
760
- uint32 to_add = min<uint32>(needed - nbuf, length);
945
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
761
946
  memcpy(scratch_ + nbuf, src, to_add);
762
947
  nbuf += to_add;
763
948
  reader_->Skip(to_add);
@@ -786,13 +971,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
786
971
  SnappyDecompressor decompressor(r);
787
972
  uint32 uncompressed_len = 0;
788
973
  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
789
- return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
974
+
975
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
976
+ uncompressed_len);
790
977
  }
791
978
 
792
979
  template <typename Writer>
793
980
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
794
981
  Writer* writer,
982
+ uint32 compressed_len,
795
983
  uint32 uncompressed_len) {
984
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
985
+
796
986
  writer->SetExpectedLength(uncompressed_len);
797
987
 
798
988
  // Process the entire input
@@ -809,21 +999,20 @@ bool GetUncompressedLength(Source* source, uint32* result) {
809
999
  size_t Compress(Source* reader, Sink* writer) {
810
1000
  size_t written = 0;
811
1001
  size_t N = reader->Available();
1002
+ const size_t uncompressed_size = N;
812
1003
  char ulength[Varint::kMax32];
813
1004
  char* p = Varint::Encode32(ulength, N);
814
1005
  writer->Append(ulength, p-ulength);
815
1006
  written += (p - ulength);
816
1007
 
817
- internal::WorkingMemory wmem;
818
- char* scratch = NULL;
819
- char* scratch_output = NULL;
1008
+ internal::WorkingMemory wmem(N);
820
1009
 
821
1010
  while (N > 0) {
822
1011
  // Get next block to compress (without copying if possible)
823
1012
  size_t fragment_size;
824
1013
  const char* fragment = reader->Peek(&fragment_size);
825
1014
  assert(fragment_size != 0); // premature end of input
826
- const size_t num_to_read = min(N, kBlockSize);
1015
+ const size_t num_to_read = std::min(N, kBlockSize);
827
1016
  size_t bytes_read = fragment_size;
828
1017
 
829
1018
  size_t pending_advance = 0;
@@ -832,19 +1021,13 @@ size_t Compress(Source* reader, Sink* writer) {
832
1021
  pending_advance = num_to_read;
833
1022
  fragment_size = num_to_read;
834
1023
  } else {
835
- // Read into scratch buffer
836
- if (scratch == NULL) {
837
- // If this is the last iteration, we want to allocate N bytes
838
- // of space, otherwise the max possible kBlockSize space.
839
- // num_to_read contains exactly the correct value
840
- scratch = new char[num_to_read];
841
- }
1024
+ char* scratch = wmem.GetScratchInput();
842
1025
  memcpy(scratch, fragment, bytes_read);
843
1026
  reader->Skip(bytes_read);
844
1027
 
845
1028
  while (bytes_read < num_to_read) {
846
1029
  fragment = reader->Peek(&fragment_size);
847
- size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
1030
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
848
1031
  memcpy(scratch + bytes_read, fragment, n);
849
1032
  bytes_read += n;
850
1033
  reader->Skip(n);
@@ -864,16 +1047,13 @@ size_t Compress(Source* reader, Sink* writer) {
864
1047
 
865
1048
  // Need a scratch buffer for the output, in case the byte sink doesn't
866
1049
  // have room for us directly.
867
- if (scratch_output == NULL) {
868
- scratch_output = new char[max_output];
869
- } else {
870
- // Since we encode kBlockSize regions followed by a region
871
- // which is <= kBlockSize in length, a previously allocated
872
- // scratch_output[] region is big enough for this iteration.
873
- }
874
- char* dest = writer->GetAppendBuffer(max_output, scratch_output);
875
- char* end = internal::CompressFragment(fragment, fragment_size,
876
- dest, table, table_size);
1050
+
1051
+ // Since we encode kBlockSize regions followed by a region
1052
+ // which is <= kBlockSize in length, a previously allocated
1053
+ // scratch_output[] region is big enough for this iteration.
1054
+ char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
1055
+ char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
1056
+ table_size);
877
1057
  writer->Append(dest, end - dest);
878
1058
  written += (end - dest);
879
1059
 
@@ -881,8 +1061,7 @@ size_t Compress(Source* reader, Sink* writer) {
881
1061
  reader->Skip(pending_advance);
882
1062
  }
883
1063
 
884
- delete[] scratch;
885
- delete[] scratch_output;
1064
+ Report("snappy_compress", written, uncompressed_size);
886
1065
 
887
1066
  return written;
888
1067
  }
@@ -896,14 +1075,22 @@ size_t Compress(Source* reader, Sink* writer) {
896
1075
  // Writer template argument to SnappyDecompressor::DecompressAllTags().
897
1076
  class SnappyIOVecWriter {
898
1077
  private:
1078
+ // output_iov_end_ is set to iov + count and used to determine when
1079
+ // the end of the iovs is reached.
1080
+ const struct iovec* output_iov_end_;
1081
+
1082
+ #if !defined(NDEBUG)
899
1083
  const struct iovec* output_iov_;
900
- const size_t output_iov_count_;
1084
+ #endif // !defined(NDEBUG)
1085
+
1086
+ // Current iov that is being written into.
1087
+ const struct iovec* curr_iov_;
901
1088
 
902
- // We are currently writing into output_iov_[curr_iov_index_].
903
- size_t curr_iov_index_;
1089
+ // Pointer to current iov's write location.
1090
+ char* curr_iov_output_;
904
1091
 
905
- // Bytes written to output_iov_[curr_iov_index_] so far.
906
- size_t curr_iov_written_;
1092
+ // Remaining bytes to write into curr_iov_output.
1093
+ size_t curr_iov_remaining_;
907
1094
 
908
1095
  // Total bytes decompressed into output_iov_ so far.
909
1096
  size_t total_written_;
@@ -911,22 +1098,24 @@ class SnappyIOVecWriter {
911
1098
  // Maximum number of bytes that will be decompressed into output_iov_.
912
1099
  size_t output_limit_;
913
1100
 
914
- inline char* GetIOVecPointer(size_t index, size_t offset) {
915
- return reinterpret_cast<char*>(output_iov_[index].iov_base) +
916
- offset;
1101
+ static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
1102
+ return reinterpret_cast<char*>(iov->iov_base) + offset;
917
1103
  }
918
1104
 
919
1105
  public:
920
1106
  // Does not take ownership of iov. iov must be valid during the
921
1107
  // entire lifetime of the SnappyIOVecWriter.
922
1108
  inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
923
- : output_iov_(iov),
924
- output_iov_count_(iov_count),
925
- curr_iov_index_(0),
926
- curr_iov_written_(0),
1109
+ : output_iov_end_(iov + iov_count),
1110
+ #if !defined(NDEBUG)
1111
+ output_iov_(iov),
1112
+ #endif // !defined(NDEBUG)
1113
+ curr_iov_(iov),
1114
+ curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
1115
+ : nullptr),
1116
+ curr_iov_remaining_(iov_count ? iov->iov_len : 0),
927
1117
  total_written_(0),
928
- output_limit_(-1) {
929
- }
1118
+ output_limit_(-1) {}
930
1119
 
931
1120
  inline void SetExpectedLength(size_t len) {
932
1121
  output_limit_ = len;
@@ -941,23 +1130,25 @@ class SnappyIOVecWriter {
941
1130
  return false;
942
1131
  }
943
1132
 
1133
+ return AppendNoCheck(ip, len);
1134
+ }
1135
+
1136
+ inline bool AppendNoCheck(const char* ip, size_t len) {
944
1137
  while (len > 0) {
945
- assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
946
- if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1138
+ if (curr_iov_remaining_ == 0) {
947
1139
  // This iovec is full. Go to the next one.
948
- if (curr_iov_index_ + 1 >= output_iov_count_) {
1140
+ if (curr_iov_ + 1 >= output_iov_end_) {
949
1141
  return false;
950
1142
  }
951
- curr_iov_written_ = 0;
952
- ++curr_iov_index_;
1143
+ ++curr_iov_;
1144
+ curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
1145
+ curr_iov_remaining_ = curr_iov_->iov_len;
953
1146
  }
954
1147
 
955
- const size_t to_write = std::min(
956
- len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
957
- memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
958
- ip,
959
- to_write);
960
- curr_iov_written_ += to_write;
1148
+ const size_t to_write = std::min(len, curr_iov_remaining_);
1149
+ memcpy(curr_iov_output_, ip, to_write);
1150
+ curr_iov_output_ += to_write;
1151
+ curr_iov_remaining_ -= to_write;
961
1152
  total_written_ += to_write;
962
1153
  ip += to_write;
963
1154
  len -= to_write;
@@ -969,11 +1160,11 @@ class SnappyIOVecWriter {
969
1160
  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
970
1161
  const size_t space_left = output_limit_ - total_written_;
971
1162
  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
972
- output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1163
+ curr_iov_remaining_ >= 16) {
973
1164
  // Fast path, used for the majority (about 95%) of invocations.
974
- char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
975
- UnalignedCopy128(ip, ptr);
976
- curr_iov_written_ += len;
1165
+ UnalignedCopy128(ip, curr_iov_output_);
1166
+ curr_iov_output_ += len;
1167
+ curr_iov_remaining_ -= len;
977
1168
  total_written_ += len;
978
1169
  return true;
979
1170
  }
@@ -982,7 +1173,9 @@ class SnappyIOVecWriter {
982
1173
  }
983
1174
 
984
1175
  inline bool AppendFromSelf(size_t offset, size_t len) {
985
- if (offset > total_written_ || offset == 0) {
1176
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1177
+ // the "offset - 1u" trick.
1178
+ if (offset - 1u >= total_written_) {
986
1179
  return false;
987
1180
  }
988
1181
  const size_t space_left = output_limit_ - total_written_;
@@ -991,8 +1184,8 @@ class SnappyIOVecWriter {
991
1184
  }
992
1185
 
993
1186
  // Locate the iovec from which we need to start the copy.
994
- size_t from_iov_index = curr_iov_index_;
995
- size_t from_iov_offset = curr_iov_written_;
1187
+ const iovec* from_iov = curr_iov_;
1188
+ size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
996
1189
  while (offset > 0) {
997
1190
  if (from_iov_offset >= offset) {
998
1191
  from_iov_offset -= offset;
@@ -1000,47 +1193,47 @@ class SnappyIOVecWriter {
1000
1193
  }
1001
1194
 
1002
1195
  offset -= from_iov_offset;
1003
- assert(from_iov_index > 0);
1004
- --from_iov_index;
1005
- from_iov_offset = output_iov_[from_iov_index].iov_len;
1196
+ --from_iov;
1197
+ #if !defined(NDEBUG)
1198
+ assert(from_iov >= output_iov_);
1199
+ #endif // !defined(NDEBUG)
1200
+ from_iov_offset = from_iov->iov_len;
1006
1201
  }
1007
1202
 
1008
1203
  // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1009
1204
  // the current iovec.
1010
1205
  while (len > 0) {
1011
- assert(from_iov_index <= curr_iov_index_);
1012
- if (from_iov_index != curr_iov_index_) {
1013
- const size_t to_copy = std::min(
1014
- output_iov_[from_iov_index].iov_len - from_iov_offset,
1015
- len);
1016
- Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1206
+ assert(from_iov <= curr_iov_);
1207
+ if (from_iov != curr_iov_) {
1208
+ const size_t to_copy =
1209
+ std::min(from_iov->iov_len - from_iov_offset, len);
1210
+ AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
1017
1211
  len -= to_copy;
1018
1212
  if (len > 0) {
1019
- ++from_iov_index;
1213
+ ++from_iov;
1020
1214
  from_iov_offset = 0;
1021
1215
  }
1022
1216
  } else {
1023
- assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1024
- size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1025
- curr_iov_written_,
1026
- len);
1217
+ size_t to_copy = curr_iov_remaining_;
1027
1218
  if (to_copy == 0) {
1028
1219
  // This iovec is full. Go to the next one.
1029
- if (curr_iov_index_ + 1 >= output_iov_count_) {
1220
+ if (curr_iov_ + 1 >= output_iov_end_) {
1030
1221
  return false;
1031
1222
  }
1032
- ++curr_iov_index_;
1033
- curr_iov_written_ = 0;
1223
+ ++curr_iov_;
1224
+ curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
1225
+ curr_iov_remaining_ = curr_iov_->iov_len;
1034
1226
  continue;
1035
1227
  }
1036
1228
  if (to_copy > len) {
1037
1229
  to_copy = len;
1038
1230
  }
1039
- IncrementalCopySlow(
1040
- GetIOVecPointer(from_iov_index, from_iov_offset),
1041
- GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1042
- GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1043
- curr_iov_written_ += to_copy;
1231
+
1232
+ IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
1233
+ curr_iov_output_, curr_iov_output_ + to_copy,
1234
+ curr_iov_output_ + curr_iov_remaining_);
1235
+ curr_iov_output_ += to_copy;
1236
+ curr_iov_remaining_ -= to_copy;
1044
1237
  from_iov_offset += to_copy;
1045
1238
  total_written_ += to_copy;
1046
1239
  len -= to_copy;
@@ -1149,7 +1342,7 @@ bool RawUncompress(Source* compressed, char* uncompressed) {
1149
1342
  return InternalUncompress(compressed, &output);
1150
1343
  }
1151
1344
 
1152
- bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1345
+ bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) {
1153
1346
  size_t ulength;
1154
1347
  if (!GetUncompressedLength(compressed, n, &ulength)) {
1155
1348
  return false;
@@ -1217,7 +1410,8 @@ void RawCompress(const char* input,
1217
1410
  *compressed_length = (writer.CurrentDestination() - compressed);
1218
1411
  }
1219
1412
 
1220
- size_t Compress(const char* input, size_t input_length, string* compressed) {
1413
+ size_t Compress(const char* input, size_t input_length,
1414
+ std::string* compressed) {
1221
1415
  // Pre-grow the buffer to the max length of the compressed output
1222
1416
  STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1223
1417
 
@@ -1313,7 +1507,8 @@ class SnappyScatteredWriter {
1313
1507
  char* const op_end = op_ptr_ + len;
1314
1508
  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1315
1509
  // the "offset - 1u" trick.
1316
- if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
1510
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1511
+ op_end <= op_limit_)) {
1317
1512
  // Fast path: src and dst in current block.
1318
1513
  op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1319
1514
  return true;
@@ -1344,7 +1539,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1344
1539
  }
1345
1540
 
1346
1541
  // Make new block
1347
- size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1542
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1348
1543
  op_base_ = allocator_.Allocate(bsize);
1349
1544
  op_ptr_ = op_base_;
1350
1545
  op_limit_ = op_base_ + bsize;
@@ -1401,7 +1596,7 @@ class SnappySinkAllocator {
1401
1596
  size_t size_written = 0;
1402
1597
  size_t block_size;
1403
1598
  for (int i = 0; i < blocks_.size(); ++i) {
1404
- block_size = min<size_t>(blocks_[i].size, size - size_written);
1599
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1405
1600
  dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1406
1601
  &SnappySinkAllocator::Deleter, NULL);
1407
1602
  size_written += block_size;
@@ -1446,19 +1641,21 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
1446
1641
  char* buf = uncompressed->GetAppendBufferVariable(
1447
1642
  1, uncompressed_len, &c, 1, &allocated_size);
1448
1643
 
1644
+ const size_t compressed_len = compressed->Available();
1449
1645
  // If we can get a flat buffer, then use it, otherwise do block by block
1450
1646
  // uncompression
1451
1647
  if (allocated_size >= uncompressed_len) {
1452
1648
  SnappyArrayWriter writer(buf);
1453
- bool result = InternalUncompressAllTags(
1454
- &decompressor, &writer, uncompressed_len);
1649
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1650
+ compressed_len, uncompressed_len);
1455
1651
  uncompressed->Append(buf, writer.Produced());
1456
1652
  return result;
1457
1653
  } else {
1458
1654
  SnappySinkAllocator allocator(uncompressed);
1459
1655
  SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1460
- return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1656
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1657
+ uncompressed_len);
1461
1658
  }
1462
1659
  }
1463
1660
 
1464
- } // end namespace snappy
1661
+ } // namespace snappy