snappy 0.0.12-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +14 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/CMakeLists.txt +174 -0
  21. data/vendor/snappy/CONTRIBUTING.md +26 -0
  22. data/vendor/snappy/COPYING +1 -1
  23. data/vendor/snappy/NEWS +52 -0
  24. data/vendor/snappy/{README → README.md} +23 -9
  25. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  26. data/vendor/snappy/cmake/config.h.in +62 -0
  27. data/vendor/snappy/snappy-c.h +3 -3
  28. data/vendor/snappy/snappy-internal.h +101 -27
  29. data/vendor/snappy/snappy-sinksource.cc +33 -0
  30. data/vendor/snappy/snappy-sinksource.h +51 -6
  31. data/vendor/snappy/snappy-stubs-internal.h +107 -37
  32. data/vendor/snappy/snappy-stubs-public.h.in +16 -20
  33. data/vendor/snappy/snappy-test.cc +15 -9
  34. data/vendor/snappy/snappy-test.h +34 -43
  35. data/vendor/snappy/snappy.cc +529 -320
  36. data/vendor/snappy/snappy.h +23 -4
  37. data/vendor/snappy/snappy_unittest.cc +240 -185
  38. metadata +27 -74
  39. data/vendor/snappy/ChangeLog +0 -1916
  40. data/vendor/snappy/Makefile.am +0 -23
  41. data/vendor/snappy/autogen.sh +0 -7
  42. data/vendor/snappy/configure.ac +0 -133
  43. data/vendor/snappy/m4/gtest.m4 +0 -74
  44. data/vendor/snappy/testdata/alice29.txt +0 -3609
  45. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  46. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  47. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  48. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  49. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  50. data/vendor/snappy/testdata/geo.protodata +0 -0
  51. data/vendor/snappy/testdata/html +0 -1
  52. data/vendor/snappy/testdata/html_x_4 +0 -1
  53. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  54. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  55. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  56. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  57. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -30,6 +30,18 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
+ #ifndef SNAPPY_HAVE_SSE2
34
+ #if defined(__SSE2__) || defined(_M_X64) || \
35
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
36
+ #define SNAPPY_HAVE_SSE2 1
37
+ #else
38
+ #define SNAPPY_HAVE_SSE2 0
39
+ #endif
40
+ #endif
41
+
42
+ #if SNAPPY_HAVE_SSE2
43
+ #include <emmintrin.h>
44
+ #endif
33
45
  #include <stdio.h>
34
46
 
35
47
  #include <algorithm>
@@ -39,6 +51,12 @@
39
51
 
40
52
  namespace snappy {
41
53
 
54
+ using internal::COPY_1_BYTE_OFFSET;
55
+ using internal::COPY_2_BYTE_OFFSET;
56
+ using internal::LITERAL;
57
+ using internal::char_table;
58
+ using internal::kMaximumTagLength;
59
+
42
60
  // Any hash function will produce a valid compressed bitstream, but a good
43
61
  // hash function reduces the number of collisions and thus yields better
44
62
  // compression for compressible input, and more speed for incompressible
@@ -76,79 +94,129 @@ size_t MaxCompressedLength(size_t source_len) {
76
94
  return 32 + source_len + source_len/6;
77
95
  }
78
96
 
79
- enum {
80
- LITERAL = 0,
81
- COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82
- COPY_2_BYTE_OFFSET = 2,
83
- COPY_4_BYTE_OFFSET = 3
84
- };
85
- static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
86
-
87
- // Copy "len" bytes from "src" to "op", one byte at a time. Used for
88
- // handling COPY operations where the input and output regions may
89
- // overlap. For example, suppose:
90
- // src == "ab"
91
- // op == src + 2
92
- // len == 20
93
- // After IncrementalCopy(src, op, len), the result will have
94
- // eleven copies of "ab"
95
- // ababababababababababab
96
- // Note that this does not match the semantics of either memcpy()
97
- // or memmove().
98
- static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99
- assert(len > 0);
100
- do {
101
- *op++ = *src++;
102
- } while (--len > 0);
97
+ namespace {
98
+
99
+ void UnalignedCopy64(const void* src, void* dst) {
100
+ char tmp[8];
101
+ memcpy(tmp, src, 8);
102
+ memcpy(dst, tmp, 8);
103
103
  }
104
104
 
105
- // Equivalent to IncrementalCopy except that it can write up to ten extra
106
- // bytes after the end of the copy, and that it is faster.
107
- //
108
- // The main part of this loop is a simple copy of eight bytes at a time until
109
- // we've copied (at least) the requested amount of bytes. However, if op and
110
- // src are less than eight bytes apart (indicating a repeating pattern of
111
- // length < 8), we first need to expand the pattern in order to get the correct
112
- // results. For instance, if the buffer looks like this, with the eight-byte
113
- // <src> and <op> patterns marked as intervals:
114
- //
115
- // abxxxxxxxxxxxx
116
- // [------] src
117
- // [------] op
118
- //
119
- // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120
- // after which we can move <op> two bytes without moving <src>:
121
- //
122
- // ababxxxxxxxxxx
123
- // [------] src
124
- // [------] op
125
- //
126
- // and repeat the exercise until the two no longer overlap.
127
- //
128
- // This allows us to do very well in the special case of one single byte
129
- // repeated many times, without taking a big hit for more general cases.
130
- //
131
- // The worst case of extra writing past the end of the match occurs when
132
- // op - src == 1 and len == 1; the last copy will read from byte positions
133
- // [0..7] and write to [4..11], whereas it was only supposed to write to
134
- // position 1. Thus, ten excess bytes.
105
+ void UnalignedCopy128(const void* src, void* dst) {
106
+ // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
107
+ // SSE2 moves for memcpy(dst, src, 16).
108
+ #if SNAPPY_HAVE_SSE2
109
+ __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
110
+ _mm_storeu_si128(static_cast<__m128i*>(dst), x);
111
+ #else
112
+ char tmp[16];
113
+ memcpy(tmp, src, 16);
114
+ memcpy(dst, tmp, 16);
115
+ #endif
116
+ }
135
117
 
136
- namespace {
118
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
119
+ // for handling COPY operations where the input and output regions may overlap.
120
+ // For example, suppose:
121
+ // src == "ab"
122
+ // op == src + 2
123
+ // op_limit == op + 20
124
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
125
+ // copies of "ab"
126
+ // ababababababababababab
127
+ // Note that this does not match the semantics of either memcpy() or memmove().
128
+ inline char* IncrementalCopySlow(const char* src, char* op,
129
+ char* const op_limit) {
130
+ while (op < op_limit) {
131
+ *op++ = *src++;
132
+ }
133
+ return op_limit;
134
+ }
137
135
 
138
- const int kMaxIncrementCopyOverflow = 10;
136
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
137
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
138
+ // region of the buffer.
139
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
140
+ char* const buf_limit) {
141
+ // Terminology:
142
+ //
143
+ // slop = buf_limit - op
144
+ // pat = op - src
145
+ // len = limit - op
146
+ assert(src < op);
147
+ assert(op_limit <= buf_limit);
148
+ // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
149
+ // to optimize this function but we have to also handle these cases in case
150
+ // the input does not satisfy these conditions.
151
+
152
+ size_t pattern_size = op - src;
153
+ // The cases are split into different branches to allow the branch predictor,
154
+ // FDO, and static prediction hints to work better. For each input we list the
155
+ // ratio of invocations that match each condition.
156
+ //
157
+ // input slop < 16 pat < 8 len > 16
158
+ // ------------------------------------------
159
+ // html|html4|cp 0% 1.01% 27.73%
160
+ // urls 0% 0.88% 14.79%
161
+ // jpg 0% 64.29% 7.14%
162
+ // pdf 0% 2.56% 58.06%
163
+ // txt[1-4] 0% 0.23% 0.97%
164
+ // pb 0% 0.96% 13.88%
165
+ // bin 0.01% 22.27% 41.17%
166
+ //
167
+ // It is very rare that we don't have enough slop for doing block copies. It
168
+ // is also rare that we need to expand a pattern. Small patterns are common
169
+ // for incompressible formats and for those we are plenty fast already.
170
+ // Lengths are normally not greater than 16 but they vary depending on the
171
+ // input. In general if we always predict len <= 16 it would be an ok
172
+ // prediction.
173
+ //
174
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
175
+ // copying 2x 8 bytes at a time.
176
+
177
+ // Handle the uncommon case where pattern is less than 8 bytes.
178
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
179
+ // Expand pattern to at least 8 bytes. The worse case scenario in terms of
180
+ // buffer usage is when the pattern is size 3. ^ is the original position
181
+ // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
182
+ //
183
+ // abc
184
+ // abcabcxxxxx
185
+ // abcabcabcabcxxxxx
186
+ // ^
187
+ // The last x is 14 bytes after ^.
188
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
189
+ while (pattern_size < 8) {
190
+ UnalignedCopy64(src, op);
191
+ op += pattern_size;
192
+ pattern_size *= 2;
193
+ }
194
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
195
+ } else {
196
+ return IncrementalCopySlow(src, op, op_limit);
197
+ }
198
+ }
199
+ assert(pattern_size >= 8);
139
200
 
140
- inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141
- while (op - src < 8) {
201
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
202
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
203
+ // because expanding the pattern to at least 8 bytes guarantees that
204
+ // op - src >= 8.
205
+ while (op <= buf_limit - 16) {
142
206
  UnalignedCopy64(src, op);
143
- len -= op - src;
144
- op += op - src;
207
+ UnalignedCopy64(src + 8, op + 8);
208
+ src += 16;
209
+ op += 16;
210
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
145
211
  }
146
- while (len > 0) {
212
+ // We only take this branch if we didn't have enough slop and we can do a
213
+ // single 8 byte copy.
214
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
147
215
  UnalignedCopy64(src, op);
148
216
  src += 8;
149
217
  op += 8;
150
- len -= 8;
151
218
  }
219
+ return IncrementalCopySlow(src, op, op_limit);
152
220
  }
153
221
 
154
222
  } // namespace
@@ -157,26 +225,29 @@ static inline char* EmitLiteral(char* op,
157
225
  const char* literal,
158
226
  int len,
159
227
  bool allow_fast_path) {
160
- int n = len - 1; // Zero-length literals are disallowed
161
- if (n < 60) {
228
+ // The vast majority of copies are below 16 bytes, for which a
229
+ // call to memcpy is overkill. This fast path can sometimes
230
+ // copy up to 15 bytes too much, but that is okay in the
231
+ // main loop, since we have a bit to go on for both sides:
232
+ //
233
+ // - The input will always have kInputMarginBytes = 15 extra
234
+ // available bytes, as long as we're in the main loop, and
235
+ // if not, allow_fast_path = false.
236
+ // - The output will always have 32 spare bytes (see
237
+ // MaxCompressedLength).
238
+ assert(len > 0); // Zero-length literals are disallowed
239
+ int n = len - 1;
240
+ if (allow_fast_path && len <= 16) {
162
241
  // Fits in tag byte
163
242
  *op++ = LITERAL | (n << 2);
164
243
 
165
- // The vast majority of copies are below 16 bytes, for which a
166
- // call to memcpy is overkill. This fast path can sometimes
167
- // copy up to 15 bytes too much, but that is okay in the
168
- // main loop, since we have a bit to go on for both sides:
169
- //
170
- // - The input will always have kInputMarginBytes = 15 extra
171
- // available bytes, as long as we're in the main loop, and
172
- // if not, allow_fast_path = false.
173
- // - The output will always have 32 spare bytes (see
174
- // MaxCompressedLength).
175
- if (allow_fast_path && len <= 16) {
176
- UnalignedCopy64(literal, op);
177
- UnalignedCopy64(literal + 8, op + 8);
178
- return op + len;
179
- }
244
+ UnalignedCopy128(literal, op);
245
+ return op + len;
246
+ }
247
+
248
+ if (n < 60) {
249
+ // Fits in tag byte
250
+ *op++ = LITERAL | (n << 2);
180
251
  } else {
181
252
  // Encode in upcoming bytes
182
253
  char* base = op;
@@ -195,42 +266,54 @@ static inline char* EmitLiteral(char* op,
195
266
  return op + len;
196
267
  }
197
268
 
198
- static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
269
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
270
+ bool len_less_than_12) {
199
271
  assert(len <= 64);
200
272
  assert(len >= 4);
201
273
  assert(offset < 65536);
274
+ assert(len_less_than_12 == (len < 12));
202
275
 
203
- if ((len < 12) && (offset < 2048)) {
204
- size_t len_minus_4 = len - 4;
205
- assert(len_minus_4 < 8); // Must fit in 3 bits
206
- *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
276
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
277
+ // offset fits in 11 bits. The 3 highest go in the top of the first byte,
278
+ // and the rest go in the second byte.
279
+ *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
207
280
  *op++ = offset & 0xff;
208
281
  } else {
209
- *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210
- LittleEndian::Store16(op, offset);
211
- op += 2;
282
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
283
+ // is required to have some slack, so the extra byte won't overrun it.
284
+ uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
285
+ LittleEndian::Store32(op, u);
286
+ op += 3;
212
287
  }
213
288
  return op;
214
289
  }
215
290
 
216
- static inline char* EmitCopy(char* op, size_t offset, int len) {
217
- // Emit 64 byte copies but make sure to keep at least four bytes reserved
218
- while (len >= 68) {
219
- op = EmitCopyLessThan64(op, offset, 64);
220
- len -= 64;
221
- }
291
+ static inline char* EmitCopy(char* op, size_t offset, size_t len,
292
+ bool len_less_than_12) {
293
+ assert(len_less_than_12 == (len < 12));
294
+ if (len_less_than_12) {
295
+ return EmitCopyAtMost64(op, offset, len, true);
296
+ } else {
297
+ // A special case for len <= 64 might help, but so far measurements suggest
298
+ // it's in the noise.
222
299
 
223
- // Emit an extra 60 byte copy if have too much data to fit in one copy
224
- if (len > 64) {
225
- op = EmitCopyLessThan64(op, offset, 60);
226
- len -= 60;
227
- }
300
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
301
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
302
+ op = EmitCopyAtMost64(op, offset, 64, false);
303
+ len -= 64;
304
+ }
228
305
 
229
- // Emit remainder
230
- op = EmitCopyLessThan64(op, offset, len);
231
- return op;
232
- }
306
+ // One or two copies will now finish the job.
307
+ if (len > 64) {
308
+ op = EmitCopyAtMost64(op, offset, 60, false);
309
+ len -= 60;
310
+ }
233
311
 
312
+ // Emit remainder.
313
+ op = EmitCopyAtMost64(op, offset, len, len < 12);
314
+ return op;
315
+ }
316
+ }
234
317
 
235
318
  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236
319
  uint32 v = 0;
@@ -344,7 +427,7 @@ char* CompressFragment(const char* input,
344
427
  const char* next_emit = ip;
345
428
 
346
429
  const size_t kInputMarginBytes = 15;
347
- if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
430
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
348
431
  const char* ip_limit = input + input_size - kInputMarginBytes;
349
432
 
350
433
  for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -364,9 +447,9 @@ char* CompressFragment(const char* input,
364
447
  //
365
448
  // Heuristic match skipping: If 32 bytes are scanned with no matches
366
449
  // found, start looking only at every other byte. If 32 more bytes are
367
- // scanned, look at every third byte, etc.. When a match is found,
368
- // immediately go back to looking at every byte. This is a small loss
369
- // (~5% performance, ~0.1% density) for compressible data due to more
450
+ // scanned (or skipped), look at every third byte, etc.. When a match is
451
+ // found, immediately go back to looking at every byte. This is a small
452
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
370
453
  // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371
454
  // win since the compressor quickly "realizes" the data is incompressible
372
455
  // and doesn't bother looking for matches everywhere.
@@ -382,9 +465,10 @@ char* CompressFragment(const char* input,
382
465
  ip = next_ip;
383
466
  uint32 hash = next_hash;
384
467
  assert(hash == Hash(ip, shift));
385
- uint32 bytes_between_hash_lookups = skip++ >> 5;
468
+ uint32 bytes_between_hash_lookups = skip >> 5;
469
+ skip += bytes_between_hash_lookups;
386
470
  next_ip = ip + bytes_between_hash_lookups;
387
- if (PREDICT_FALSE(next_ip > ip_limit)) {
471
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
388
472
  goto emit_remainder;
389
473
  }
390
474
  next_hash = Hash(next_ip, shift);
@@ -393,8 +477,8 @@ char* CompressFragment(const char* input,
393
477
  assert(candidate < ip);
394
478
 
395
479
  table[hash] = ip - base_ip;
396
- } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
397
- UNALIGNED_LOAD32(candidate)));
480
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
481
+ UNALIGNED_LOAD32(candidate)));
398
482
 
399
483
  // Step 2: A 4-byte match has been found. We'll later see if more
400
484
  // than 4 bytes match. But, prior to the match, input
@@ -417,19 +501,21 @@ char* CompressFragment(const char* input,
417
501
  // We have a 4-byte match at ip, and no need to emit any
418
502
  // "literal bytes" prior to ip.
419
503
  const char* base = ip;
420
- int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
504
+ std::pair<size_t, bool> p =
505
+ FindMatchLength(candidate + 4, ip + 4, ip_end);
506
+ size_t matched = 4 + p.first;
421
507
  ip += matched;
422
508
  size_t offset = base - candidate;
423
509
  assert(0 == memcmp(base, candidate, matched));
424
- op = EmitCopy(op, offset, matched);
425
- // We could immediately start working at ip now, but to improve
426
- // compression we first update table[Hash(ip - 1, ...)].
427
- const char* insert_tail = ip - 1;
510
+ op = EmitCopy(op, offset, matched, p.second);
428
511
  next_emit = ip;
429
- if (PREDICT_FALSE(ip >= ip_limit)) {
512
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
430
513
  goto emit_remainder;
431
514
  }
432
- input_bytes = GetEightBytesAt(insert_tail);
515
+ // We are now looking for a 4-byte match again. We read
516
+ // table[Hash(ip, shift)] for that. To improve compression,
517
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
518
+ input_bytes = GetEightBytesAt(ip - 1);
433
519
  uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434
520
  table[prev_hash] = ip - base_ip - 1;
435
521
  uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -453,6 +539,10 @@ char* CompressFragment(const char* input,
453
539
  }
454
540
  } // end namespace internal
455
541
 
542
+ // Called back at avery compression call to trace parameters and sizes.
543
+ static inline void Report(const char *algorithm, size_t compressed_size,
544
+ size_t uncompressed_size) {}
545
+
456
546
  // Signature of output types needed by decompression code.
457
547
  // The decompression code is templatized on a type that obeys this
458
548
  // signature so that we do not pay virtual function call overhead in
@@ -493,162 +583,14 @@ char* CompressFragment(const char* input,
493
583
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
494
584
  // };
495
585
 
496
- // -----------------------------------------------------------------------
497
- // Lookup table for decompression code. Generated by ComputeTable() below.
498
- // -----------------------------------------------------------------------
586
+ namespace internal {
499
587
 
500
588
  // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501
589
  static const uint32 wordmask[] = {
502
590
  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503
591
  };
504
592
 
505
- // Data stored per entry in lookup table:
506
- // Range Bits-used Description
507
- // ------------------------------------
508
- // 1..64 0..7 Literal/copy length encoded in opcode byte
509
- // 0..7 8..10 Copy offset encoded in opcode byte / 256
510
- // 0..4 11..13 Extra bytes after opcode
511
- //
512
- // We use eight bits for the length even though 7 would have sufficed
513
- // because of efficiency reasons:
514
- // (1) Extracting a byte is faster than a bit-field
515
- // (2) It properly aligns copy offset so we do not need a <<8
516
- static const uint16 char_table[256] = {
517
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549
- };
550
-
551
- // In debug mode, allow optional computation of the table at startup.
552
- // Also, check that the decompression table is correct.
553
- #ifndef NDEBUG
554
- DEFINE_bool(snappy_dump_decompression_table, false,
555
- "If true, we print the decompression table at startup.");
556
-
557
- static uint16 MakeEntry(unsigned int extra,
558
- unsigned int len,
559
- unsigned int copy_offset) {
560
- // Check that all of the fields fit within the allocated space
561
- assert(extra == (extra & 0x7)); // At most 3 bits
562
- assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
563
- assert(len == (len & 0x7f)); // At most 7 bits
564
- return len | (copy_offset << 8) | (extra << 11);
565
- }
566
-
567
- static void ComputeTable() {
568
- uint16 dst[256];
569
-
570
- // Place invalid entries in all places to detect missing initialization
571
- int assigned = 0;
572
- for (int i = 0; i < 256; i++) {
573
- dst[i] = 0xffff;
574
- }
575
-
576
- // Small LITERAL entries. We store (len-1) in the top 6 bits.
577
- for (unsigned int len = 1; len <= 60; len++) {
578
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579
- assigned++;
580
- }
581
-
582
- // Large LITERAL entries. We use 60..63 in the high 6 bits to
583
- // encode the number of bytes of length info that follow the opcode.
584
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585
- // We set the length field in the lookup table to 1 because extra
586
- // bytes encode len-1.
587
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588
- assigned++;
589
- }
590
-
591
- // COPY_1_BYTE_OFFSET.
592
- //
593
- // The tag byte in the compressed data stores len-4 in 3 bits, and
594
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
595
- //
596
- // This format is used for length in range [4..11] and offset in
597
- // range [0..2047]
598
- for (unsigned int len = 4; len < 12; len++) {
599
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
600
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601
- MakeEntry(1, len, offset>>8);
602
- assigned++;
603
- }
604
- }
605
-
606
- // COPY_2_BYTE_OFFSET.
607
- // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608
- for (unsigned int len = 1; len <= 64; len++) {
609
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610
- assigned++;
611
- }
612
-
613
- // COPY_4_BYTE_OFFSET.
614
- // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615
- for (unsigned int len = 1; len <= 64; len++) {
616
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617
- assigned++;
618
- }
619
-
620
- // Check that each entry was initialized exactly once.
621
- if (assigned != 256) {
622
- fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623
- abort();
624
- }
625
- for (int i = 0; i < 256; i++) {
626
- if (dst[i] == 0xffff) {
627
- fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628
- abort();
629
- }
630
- }
631
-
632
- if (FLAGS_snappy_dump_decompression_table) {
633
- printf("static const uint16 char_table[256] = {\n ");
634
- for (int i = 0; i < 256; i++) {
635
- printf("0x%04x%s",
636
- dst[i],
637
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
638
- }
639
- printf("};\n");
640
- }
641
-
642
- // Check that computed table matched recorded table
643
- for (int i = 0; i < 256; i++) {
644
- if (dst[i] != char_table[i]) {
645
- fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646
- i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647
- abort();
648
- }
649
- }
650
- }
651
- #endif /* !NDEBUG */
593
+ } // end namespace internal
652
594
 
653
595
  // Helper class for decompression
654
596
  class SnappyDecompressor {
@@ -701,7 +643,9 @@ class SnappyDecompressor {
701
643
  if (n == 0) return false;
702
644
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703
645
  reader_->Skip(1);
704
- *result |= static_cast<uint32>(c & 0x7f) << shift;
646
+ uint32 val = c & 0x7f;
647
+ if (((val << shift) >> shift) != val) return false;
648
+ *result |= val << shift;
705
649
  if (c < 128) {
706
650
  break;
707
651
  }
@@ -715,6 +659,19 @@ class SnappyDecompressor {
715
659
  template <class Writer>
716
660
  void DecompressAllTags(Writer* writer) {
717
661
  const char* ip = ip_;
662
+ // For position-independent executables, accessing global arrays can be
663
+ // slow. Move wordmask array onto the stack to mitigate this.
664
+ uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
665
+ // Do not use memcpy to copy internal::wordmask to
666
+ // wordmask. LLVM converts stack arrays to global arrays if it detects
667
+ // const stack arrays and this hurts the performance of position
668
+ // independent code. This change is temporary and can be reverted when
669
+ // https://reviews.llvm.org/D30759 is approved.
670
+ wordmask[0] = internal::wordmask[0];
671
+ wordmask[1] = internal::wordmask[1];
672
+ wordmask[2] = internal::wordmask[2];
673
+ wordmask[3] = internal::wordmask[3];
674
+ wordmask[4] = internal::wordmask[4];
718
675
 
719
676
  // We could have put this refill fragment only at the beginning of the loop.
720
677
  // However, duplicating it at the end of each branch gives the compiler more
@@ -728,10 +685,29 @@ class SnappyDecompressor {
728
685
  }
729
686
 
730
687
  MAYBE_REFILL();
688
+ // Add loop alignment directive. Without this directive, we observed
689
+ // significant performance degradation on several intel architectures
690
+ // in snappy benchmark built with LLVM. The degradation was caused by
691
+ // increased branch miss prediction.
692
+ #if defined(__clang__) && defined(__x86_64__)
693
+ asm volatile (".p2align 5");
694
+ #endif
731
695
  for ( ;; ) {
732
696
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733
697
 
734
- if ((c & 0x3) == LITERAL) {
698
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
699
+ // inputs.
700
+ //
701
+ // input LITERAL NON_LITERAL
702
+ // -----------------------------------
703
+ // html|html4|cp 23% 77%
704
+ // urls 36% 64%
705
+ // jpg 47% 53%
706
+ // pdf 19% 81%
707
+ // txt[1-4] 25% 75%
708
+ // pb 24% 76%
709
+ // bin 24% 76%
710
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
735
711
  size_t literal_length = (c >> 2) + 1u;
736
712
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737
713
  assert(literal_length < 61);
@@ -741,7 +717,7 @@ class SnappyDecompressor {
741
717
  // bytes in addition to the literal.
742
718
  continue;
743
719
  }
744
- if (PREDICT_FALSE(literal_length >= 61)) {
720
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
745
721
  // Long literal.
746
722
  const size_t literal_length_length = literal_length - 60;
747
723
  literal_length =
@@ -767,15 +743,15 @@ class SnappyDecompressor {
767
743
  ip += literal_length;
768
744
  MAYBE_REFILL();
769
745
  } else {
770
- const uint32 entry = char_table[c];
771
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772
- const uint32 length = entry & 0xff;
746
+ const size_t entry = char_table[c];
747
+ const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
748
+ const size_t length = entry & 0xff;
773
749
  ip += entry >> 11;
774
750
 
775
751
  // copy_offset/256 is encoded in bits 8..10. By just fetching
776
752
  // those bits, we get copy_offset (since the bit-field starts at
777
753
  // bit 8).
778
- const uint32 copy_offset = entry & 0x700;
754
+ const size_t copy_offset = entry & 0x700;
779
755
  if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780
756
  return;
781
757
  }
@@ -795,10 +771,8 @@ bool SnappyDecompressor::RefillTag() {
795
771
  size_t n;
796
772
  ip = reader_->Peek(&n);
797
773
  peeked_ = n;
798
- if (n == 0) {
799
- eof_ = true;
800
- return false;
801
- }
774
+ eof_ = (n == 0);
775
+ if (eof_) return false;
802
776
  ip_limit_ = ip + n;
803
777
  }
804
778
 
@@ -823,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
823
797
  size_t length;
824
798
  const char* src = reader_->Peek(&length);
825
799
  if (length == 0) return false;
826
- uint32 to_add = min<uint32>(needed - nbuf, length);
800
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
827
801
  memcpy(scratch_ + nbuf, src, to_add);
828
802
  nbuf += to_add;
829
803
  reader_->Skip(to_add);
@@ -852,17 +826,23 @@ static bool InternalUncompress(Source* r, Writer* writer) {
852
826
  SnappyDecompressor decompressor(r);
853
827
  uint32 uncompressed_len = 0;
854
828
  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
855
- return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
829
+
830
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
831
+ uncompressed_len);
856
832
  }
857
833
 
858
834
  template <typename Writer>
859
835
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
860
836
  Writer* writer,
837
+ uint32 compressed_len,
861
838
  uint32 uncompressed_len) {
839
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
840
+
862
841
  writer->SetExpectedLength(uncompressed_len);
863
842
 
864
843
  // Process the entire input
865
844
  decompressor->DecompressAllTags(writer);
845
+ writer->Flush();
866
846
  return (decompressor->eof() && writer->CheckLength());
867
847
  }
868
848
 
@@ -874,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
874
854
  size_t Compress(Source* reader, Sink* writer) {
875
855
  size_t written = 0;
876
856
  size_t N = reader->Available();
857
+ const size_t uncompressed_size = N;
877
858
  char ulength[Varint::kMax32];
878
859
  char* p = Varint::Encode32(ulength, N);
879
860
  writer->Append(ulength, p-ulength);
@@ -888,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
888
869
  size_t fragment_size;
889
870
  const char* fragment = reader->Peek(&fragment_size);
890
871
  assert(fragment_size != 0); // premature end of input
891
- const size_t num_to_read = min(N, kBlockSize);
872
+ const size_t num_to_read = std::min(N, kBlockSize);
892
873
  size_t bytes_read = fragment_size;
893
874
 
894
875
  size_t pending_advance = 0;
@@ -909,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
909
890
 
910
891
  while (bytes_read < num_to_read) {
911
892
  fragment = reader->Peek(&fragment_size);
912
- size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
893
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
913
894
  memcpy(scratch + bytes_read, fragment, n);
914
895
  bytes_read += n;
915
896
  reader->Skip(n);
@@ -946,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
946
927
  reader->Skip(pending_advance);
947
928
  }
948
929
 
930
+ Report("snappy_compress", written, uncompressed_size);
931
+
949
932
  delete[] scratch;
950
933
  delete[] scratch_output;
951
934
 
@@ -965,7 +948,7 @@ class SnappyIOVecWriter {
965
948
  const size_t output_iov_count_;
966
949
 
967
950
  // We are currently writing into output_iov_[curr_iov_index_].
968
- int curr_iov_index_;
951
+ size_t curr_iov_index_;
969
952
 
970
953
  // Bytes written to output_iov_[curr_iov_index_] so far.
971
954
  size_t curr_iov_written_;
@@ -976,7 +959,7 @@ class SnappyIOVecWriter {
976
959
  // Maximum number of bytes that will be decompressed into output_iov_.
977
960
  size_t output_limit_;
978
961
 
979
- inline char* GetIOVecPointer(int index, size_t offset) {
962
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
980
963
  return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981
964
  offset;
982
965
  }
@@ -1037,8 +1020,7 @@ class SnappyIOVecWriter {
1037
1020
  output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038
1021
  // Fast path, used for the majority (about 95%) of invocations.
1039
1022
  char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040
- UnalignedCopy64(ip, ptr);
1041
- UnalignedCopy64(ip + 8, ptr + 8);
1023
+ UnalignedCopy128(ip, ptr);
1042
1024
  curr_iov_written_ += len;
1043
1025
  total_written_ += len;
1044
1026
  return true;
@@ -1057,7 +1039,7 @@ class SnappyIOVecWriter {
1057
1039
  }
1058
1040
 
1059
1041
  // Locate the iovec from which we need to start the copy.
1060
- int from_iov_index = curr_iov_index_;
1042
+ size_t from_iov_index = curr_iov_index_;
1061
1043
  size_t from_iov_offset = curr_iov_written_;
1062
1044
  while (offset > 0) {
1063
1045
  if (from_iov_offset >= offset) {
@@ -1066,8 +1048,8 @@ class SnappyIOVecWriter {
1066
1048
  }
1067
1049
 
1068
1050
  offset -= from_iov_offset;
1051
+ assert(from_iov_index > 0);
1069
1052
  --from_iov_index;
1070
- assert(from_iov_index >= 0);
1071
1053
  from_iov_offset = output_iov_[from_iov_index].iov_len;
1072
1054
  }
1073
1055
 
@@ -1102,9 +1084,10 @@ class SnappyIOVecWriter {
1102
1084
  if (to_copy > len) {
1103
1085
  to_copy = len;
1104
1086
  }
1105
- IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106
- GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107
- to_copy);
1087
+ IncrementalCopySlow(
1088
+ GetIOVecPointer(from_iov_index, from_iov_offset),
1089
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1090
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1108
1091
  curr_iov_written_ += to_copy;
1109
1092
  from_iov_offset += to_copy;
1110
1093
  total_written_ += to_copy;
@@ -1115,6 +1098,7 @@ class SnappyIOVecWriter {
1115
1098
  return true;
1116
1099
  }
1117
1100
 
1101
+ inline void Flush() {}
1118
1102
  };
1119
1103
 
1120
1104
  bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
@@ -1145,7 +1129,8 @@ class SnappyArrayWriter {
1145
1129
  public:
1146
1130
  inline explicit SnappyArrayWriter(char* dst)
1147
1131
  : base_(dst),
1148
- op_(dst) {
1132
+ op_(dst),
1133
+ op_limit_(dst) {
1149
1134
  }
1150
1135
 
1151
1136
  inline void SetExpectedLength(size_t len) {
@@ -1172,8 +1157,7 @@ class SnappyArrayWriter {
1172
1157
  const size_t space_left = op_limit_ - op;
1173
1158
  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174
1159
  // Fast path, used for the majority (about 95%) of invocations.
1175
- UnalignedCopy64(ip, op);
1176
- UnalignedCopy64(ip + 8, op + 8);
1160
+ UnalignedCopy128(ip, op);
1177
1161
  op_ = op + len;
1178
1162
  return true;
1179
1163
  } else {
@@ -1182,8 +1166,7 @@ class SnappyArrayWriter {
1182
1166
  }
1183
1167
 
1184
1168
  inline bool AppendFromSelf(size_t offset, size_t len) {
1185
- char* op = op_;
1186
- const size_t space_left = op_limit_ - op;
1169
+ char* const op_end = op_ + len;
1187
1170
 
1188
1171
  // Check if we try to append from before the start of the buffer.
1189
1172
  // Normally this would just be a check for "produced < offset",
@@ -1192,29 +1175,16 @@ class SnappyArrayWriter {
1192
1175
  // to a very big number. This is convenient, as offset==0 is another
1193
1176
  // invalid case that we also want to catch, so that we do not go
1194
1177
  // into an infinite loop.
1195
- assert(op >= base_);
1196
- size_t produced = op - base_;
1197
- if (produced <= offset - 1u) {
1198
- return false;
1199
- }
1200
- if (len <= 16 && offset >= 8 && space_left >= 16) {
1201
- // Fast path, used for the majority (70-80%) of dynamic invocations.
1202
- UnalignedCopy64(op - offset, op);
1203
- UnalignedCopy64(op - offset + 8, op + 8);
1204
- } else {
1205
- if (space_left >= len + kMaxIncrementCopyOverflow) {
1206
- IncrementalCopyFastPath(op - offset, op, len);
1207
- } else {
1208
- if (space_left < len) {
1209
- return false;
1210
- }
1211
- IncrementalCopy(op - offset, op, len);
1212
- }
1213
- }
1178
+ if (Produced() <= offset - 1u || op_end > op_limit_) return false;
1179
+ op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
1214
1180
 
1215
- op_ = op + len;
1216
1181
  return true;
1217
1182
  }
1183
+ inline size_t Produced() const {
1184
+ assert(op_ >= base_);
1185
+ return op_ - base_;
1186
+ }
1187
+ inline void Flush() {}
1218
1188
  };
1219
1189
 
1220
1190
  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
@@ -1241,7 +1211,6 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1241
1211
  return RawUncompress(compressed, n, string_as_array(uncompressed));
1242
1212
  }
1243
1213
 
1244
-
1245
1214
  // A Writer that drops everything on the floor and just does validation
1246
1215
  class SnappyDecompressionValidator {
1247
1216
  private:
@@ -1249,7 +1218,7 @@ class SnappyDecompressionValidator {
1249
1218
  size_t produced_;
1250
1219
 
1251
1220
  public:
1252
- inline SnappyDecompressionValidator() : produced_(0) { }
1221
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1253
1222
  inline void SetExpectedLength(size_t len) {
1254
1223
  expected_ = len;
1255
1224
  }
@@ -1270,6 +1239,7 @@ class SnappyDecompressionValidator {
1270
1239
  produced_ += len;
1271
1240
  return produced_ <= expected_;
1272
1241
  }
1242
+ inline void Flush() {}
1273
1243
  };
1274
1244
 
1275
1245
  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
@@ -1278,6 +1248,11 @@ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1278
1248
  return InternalUncompress(&reader, &writer);
1279
1249
  }
1280
1250
 
1251
+ bool IsValidCompressed(Source* compressed) {
1252
+ SnappyDecompressionValidator writer;
1253
+ return InternalUncompress(compressed, &writer);
1254
+ }
1255
+
1281
1256
  void RawCompress(const char* input,
1282
1257
  size_t input_length,
1283
1258
  char* compressed,
@@ -1292,7 +1267,7 @@ void RawCompress(const char* input,
1292
1267
 
1293
1268
  size_t Compress(const char* input, size_t input_length, string* compressed) {
1294
1269
  // Pre-grow the buffer to the max length of the compressed output
1295
- compressed->resize(MaxCompressedLength(input_length));
1270
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1296
1271
 
1297
1272
  size_t compressed_length;
1298
1273
  RawCompress(input, input_length, string_as_array(compressed),
@@ -1301,6 +1276,240 @@ size_t Compress(const char* input, size_t input_length, string* compressed) {
1301
1276
  return compressed_length;
1302
1277
  }
1303
1278
 
1279
+ // -----------------------------------------------------------------------
1280
+ // Sink interface
1281
+ // -----------------------------------------------------------------------
1304
1282
 
1305
- } // end namespace snappy
1283
+ // A type that decompresses into a Sink. The template parameter
1284
+ // Allocator must export one method "char* Allocate(int size);", which
1285
+ // allocates a buffer of "size" and appends that to the destination.
1286
+ template <typename Allocator>
1287
+ class SnappyScatteredWriter {
1288
+ Allocator allocator_;
1289
+
1290
+ // We need random access into the data generated so far. Therefore
1291
+ // we keep track of all of the generated data as an array of blocks.
1292
+ // All of the blocks except the last have length kBlockSize.
1293
+ std::vector<char*> blocks_;
1294
+ size_t expected_;
1295
+
1296
+ // Total size of all fully generated blocks so far
1297
+ size_t full_size_;
1298
+
1299
+ // Pointer into current output block
1300
+ char* op_base_; // Base of output block
1301
+ char* op_ptr_; // Pointer to next unfilled byte in block
1302
+ char* op_limit_; // Pointer just past block
1303
+
1304
+ inline size_t Size() const {
1305
+ return full_size_ + (op_ptr_ - op_base_);
1306
+ }
1307
+
1308
+ bool SlowAppend(const char* ip, size_t len);
1309
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1310
+
1311
+ public:
1312
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1313
+ : allocator_(allocator),
1314
+ full_size_(0),
1315
+ op_base_(NULL),
1316
+ op_ptr_(NULL),
1317
+ op_limit_(NULL) {
1318
+ }
1319
+
1320
+ inline void SetExpectedLength(size_t len) {
1321
+ assert(blocks_.empty());
1322
+ expected_ = len;
1323
+ }
1324
+
1325
+ inline bool CheckLength() const {
1326
+ return Size() == expected_;
1327
+ }
1328
+
1329
+ // Return the number of bytes actually uncompressed so far
1330
+ inline size_t Produced() const {
1331
+ return Size();
1332
+ }
1333
+
1334
+ inline bool Append(const char* ip, size_t len) {
1335
+ size_t avail = op_limit_ - op_ptr_;
1336
+ if (len <= avail) {
1337
+ // Fast path
1338
+ memcpy(op_ptr_, ip, len);
1339
+ op_ptr_ += len;
1340
+ return true;
1341
+ } else {
1342
+ return SlowAppend(ip, len);
1343
+ }
1344
+ }
1345
+
1346
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1347
+ char* op = op_ptr_;
1348
+ const int space_left = op_limit_ - op;
1349
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1350
+ space_left >= 16) {
1351
+ // Fast path, used for the majority (about 95%) of invocations.
1352
+ UnalignedCopy128(ip, op);
1353
+ op_ptr_ = op + length;
1354
+ return true;
1355
+ } else {
1356
+ return false;
1357
+ }
1358
+ }
1359
+
1360
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1361
+ char* const op_end = op_ptr_ + len;
1362
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1363
+ // the "offset - 1u" trick.
1364
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1365
+ op_end <= op_limit_)) {
1366
+ // Fast path: src and dst in current block.
1367
+ op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1368
+ return true;
1369
+ }
1370
+ return SlowAppendFromSelf(offset, len);
1371
+ }
1372
+
1373
+ // Called at the end of the decompress. We ask the allocator
1374
+ // write all blocks to the sink.
1375
+ inline void Flush() { allocator_.Flush(Produced()); }
1376
+ };
1377
+
1378
+ template<typename Allocator>
1379
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1380
+ size_t avail = op_limit_ - op_ptr_;
1381
+ while (len > avail) {
1382
+ // Completely fill this block
1383
+ memcpy(op_ptr_, ip, avail);
1384
+ op_ptr_ += avail;
1385
+ assert(op_limit_ - op_ptr_ == 0);
1386
+ full_size_ += (op_ptr_ - op_base_);
1387
+ len -= avail;
1388
+ ip += avail;
1389
+
1390
+ // Bounds check
1391
+ if (full_size_ + len > expected_) {
1392
+ return false;
1393
+ }
1394
+
1395
+ // Make new block
1396
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1397
+ op_base_ = allocator_.Allocate(bsize);
1398
+ op_ptr_ = op_base_;
1399
+ op_limit_ = op_base_ + bsize;
1400
+ blocks_.push_back(op_base_);
1401
+ avail = bsize;
1402
+ }
1403
+
1404
+ memcpy(op_ptr_, ip, len);
1405
+ op_ptr_ += len;
1406
+ return true;
1407
+ }
1408
+
1409
+ template<typename Allocator>
1410
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1411
+ size_t len) {
1412
+ // Overflow check
1413
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1414
+ // the "offset - 1u" trick.
1415
+ const size_t cur = Size();
1416
+ if (offset - 1u >= cur) return false;
1417
+ if (expected_ - cur < len) return false;
1418
+
1419
+ // Currently we shouldn't ever hit this path because Compress() chops the
1420
+ // input into blocks and does not create cross-block copies. However, it is
1421
+ // nice if we do not rely on that, since we can get better compression if we
1422
+ // allow cross-block copies and thus might want to change the compressor in
1423
+ // the future.
1424
+ size_t src = cur - offset;
1425
+ while (len-- > 0) {
1426
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1427
+ Append(&c, 1);
1428
+ src++;
1429
+ }
1430
+ return true;
1431
+ }
1432
+
1433
+ class SnappySinkAllocator {
1434
+ public:
1435
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1436
+ ~SnappySinkAllocator() {}
1437
+
1438
+ char* Allocate(int size) {
1439
+ Datablock block(new char[size], size);
1440
+ blocks_.push_back(block);
1441
+ return block.data;
1442
+ }
1443
+
1444
+ // We flush only at the end, because the writer wants
1445
+ // random access to the blocks and once we hand the
1446
+ // block over to the sink, we can't access it anymore.
1447
+ // Also we don't write more than has been actually written
1448
+ // to the blocks.
1449
+ void Flush(size_t size) {
1450
+ size_t size_written = 0;
1451
+ size_t block_size;
1452
+ for (int i = 0; i < blocks_.size(); ++i) {
1453
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1454
+ dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1455
+ &SnappySinkAllocator::Deleter, NULL);
1456
+ size_written += block_size;
1457
+ }
1458
+ blocks_.clear();
1459
+ }
1460
+
1461
+ private:
1462
+ struct Datablock {
1463
+ char* data;
1464
+ size_t size;
1465
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1466
+ };
1467
+
1468
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1469
+ delete[] bytes;
1470
+ }
1471
+
1472
+ Sink* dest_;
1473
+ std::vector<Datablock> blocks_;
1474
+
1475
+ // Note: copying this object is allowed
1476
+ };
1306
1477
 
1478
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1479
+ SnappySinkAllocator allocator(uncompressed);
1480
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1481
+ InternalUncompress(compressed, &writer);
1482
+ return writer.Produced();
1483
+ }
1484
+
1485
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1486
+ // Read the uncompressed length from the front of the compressed input
1487
+ SnappyDecompressor decompressor(compressed);
1488
+ uint32 uncompressed_len = 0;
1489
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1490
+ return false;
1491
+ }
1492
+
1493
+ char c;
1494
+ size_t allocated_size;
1495
+ char* buf = uncompressed->GetAppendBufferVariable(
1496
+ 1, uncompressed_len, &c, 1, &allocated_size);
1497
+
1498
+ const size_t compressed_len = compressed->Available();
1499
+ // If we can get a flat buffer, then use it, otherwise do block by block
1500
+ // uncompression
1501
+ if (allocated_size >= uncompressed_len) {
1502
+ SnappyArrayWriter writer(buf);
1503
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1504
+ compressed_len, uncompressed_len);
1505
+ uncompressed->Append(buf, writer.Produced());
1506
+ return result;
1507
+ } else {
1508
+ SnappySinkAllocator allocator(uncompressed);
1509
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1510
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1511
+ uncompressed_len);
1512
+ }
1513
+ }
1514
+
1515
+ } // end namespace snappy