snappy 0.0.12-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +14 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/CMakeLists.txt +174 -0
  21. data/vendor/snappy/CONTRIBUTING.md +26 -0
  22. data/vendor/snappy/COPYING +1 -1
  23. data/vendor/snappy/NEWS +52 -0
  24. data/vendor/snappy/{README → README.md} +23 -9
  25. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  26. data/vendor/snappy/cmake/config.h.in +62 -0
  27. data/vendor/snappy/snappy-c.h +3 -3
  28. data/vendor/snappy/snappy-internal.h +101 -27
  29. data/vendor/snappy/snappy-sinksource.cc +33 -0
  30. data/vendor/snappy/snappy-sinksource.h +51 -6
  31. data/vendor/snappy/snappy-stubs-internal.h +107 -37
  32. data/vendor/snappy/snappy-stubs-public.h.in +16 -20
  33. data/vendor/snappy/snappy-test.cc +15 -9
  34. data/vendor/snappy/snappy-test.h +34 -43
  35. data/vendor/snappy/snappy.cc +529 -320
  36. data/vendor/snappy/snappy.h +23 -4
  37. data/vendor/snappy/snappy_unittest.cc +240 -185
  38. metadata +27 -74
  39. data/vendor/snappy/ChangeLog +0 -1916
  40. data/vendor/snappy/Makefile.am +0 -23
  41. data/vendor/snappy/autogen.sh +0 -7
  42. data/vendor/snappy/configure.ac +0 -133
  43. data/vendor/snappy/m4/gtest.m4 +0 -74
  44. data/vendor/snappy/testdata/alice29.txt +0 -3609
  45. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  46. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  47. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  48. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  49. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  50. data/vendor/snappy/testdata/geo.protodata +0 -0
  51. data/vendor/snappy/testdata/html +0 -1
  52. data/vendor/snappy/testdata/html_x_4 +0 -1
  53. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  54. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  55. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  56. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  57. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -30,6 +30,18 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
+ #ifndef SNAPPY_HAVE_SSE2
34
+ #if defined(__SSE2__) || defined(_M_X64) || \
35
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
36
+ #define SNAPPY_HAVE_SSE2 1
37
+ #else
38
+ #define SNAPPY_HAVE_SSE2 0
39
+ #endif
40
+ #endif
41
+
42
+ #if SNAPPY_HAVE_SSE2
43
+ #include <emmintrin.h>
44
+ #endif
33
45
  #include <stdio.h>
34
46
 
35
47
  #include <algorithm>
@@ -39,6 +51,12 @@
39
51
 
40
52
  namespace snappy {
41
53
 
54
+ using internal::COPY_1_BYTE_OFFSET;
55
+ using internal::COPY_2_BYTE_OFFSET;
56
+ using internal::LITERAL;
57
+ using internal::char_table;
58
+ using internal::kMaximumTagLength;
59
+
42
60
  // Any hash function will produce a valid compressed bitstream, but a good
43
61
  // hash function reduces the number of collisions and thus yields better
44
62
  // compression for compressible input, and more speed for incompressible
@@ -76,79 +94,129 @@ size_t MaxCompressedLength(size_t source_len) {
76
94
  return 32 + source_len + source_len/6;
77
95
  }
78
96
 
79
- enum {
80
- LITERAL = 0,
81
- COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82
- COPY_2_BYTE_OFFSET = 2,
83
- COPY_4_BYTE_OFFSET = 3
84
- };
85
- static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
86
-
87
- // Copy "len" bytes from "src" to "op", one byte at a time. Used for
88
- // handling COPY operations where the input and output regions may
89
- // overlap. For example, suppose:
90
- // src == "ab"
91
- // op == src + 2
92
- // len == 20
93
- // After IncrementalCopy(src, op, len), the result will have
94
- // eleven copies of "ab"
95
- // ababababababababababab
96
- // Note that this does not match the semantics of either memcpy()
97
- // or memmove().
98
- static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99
- assert(len > 0);
100
- do {
101
- *op++ = *src++;
102
- } while (--len > 0);
97
+ namespace {
98
+
99
+ void UnalignedCopy64(const void* src, void* dst) {
100
+ char tmp[8];
101
+ memcpy(tmp, src, 8);
102
+ memcpy(dst, tmp, 8);
103
103
  }
104
104
 
105
- // Equivalent to IncrementalCopy except that it can write up to ten extra
106
- // bytes after the end of the copy, and that it is faster.
107
- //
108
- // The main part of this loop is a simple copy of eight bytes at a time until
109
- // we've copied (at least) the requested amount of bytes. However, if op and
110
- // src are less than eight bytes apart (indicating a repeating pattern of
111
- // length < 8), we first need to expand the pattern in order to get the correct
112
- // results. For instance, if the buffer looks like this, with the eight-byte
113
- // <src> and <op> patterns marked as intervals:
114
- //
115
- // abxxxxxxxxxxxx
116
- // [------] src
117
- // [------] op
118
- //
119
- // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120
- // after which we can move <op> two bytes without moving <src>:
121
- //
122
- // ababxxxxxxxxxx
123
- // [------] src
124
- // [------] op
125
- //
126
- // and repeat the exercise until the two no longer overlap.
127
- //
128
- // This allows us to do very well in the special case of one single byte
129
- // repeated many times, without taking a big hit for more general cases.
130
- //
131
- // The worst case of extra writing past the end of the match occurs when
132
- // op - src == 1 and len == 1; the last copy will read from byte positions
133
- // [0..7] and write to [4..11], whereas it was only supposed to write to
134
- // position 1. Thus, ten excess bytes.
105
+ void UnalignedCopy128(const void* src, void* dst) {
106
+ // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
107
+ // SSE2 moves for memcpy(dst, src, 16).
108
+ #if SNAPPY_HAVE_SSE2
109
+ __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
110
+ _mm_storeu_si128(static_cast<__m128i*>(dst), x);
111
+ #else
112
+ char tmp[16];
113
+ memcpy(tmp, src, 16);
114
+ memcpy(dst, tmp, 16);
115
+ #endif
116
+ }
135
117
 
136
- namespace {
118
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
119
+ // for handling COPY operations where the input and output regions may overlap.
120
+ // For example, suppose:
121
+ // src == "ab"
122
+ // op == src + 2
123
+ // op_limit == op + 20
124
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
125
+ // copies of "ab"
126
+ // ababababababababababab
127
+ // Note that this does not match the semantics of either memcpy() or memmove().
128
+ inline char* IncrementalCopySlow(const char* src, char* op,
129
+ char* const op_limit) {
130
+ while (op < op_limit) {
131
+ *op++ = *src++;
132
+ }
133
+ return op_limit;
134
+ }
137
135
 
138
- const int kMaxIncrementCopyOverflow = 10;
136
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
137
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
138
+ // region of the buffer.
139
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
140
+ char* const buf_limit) {
141
+ // Terminology:
142
+ //
143
+ // slop = buf_limit - op
144
+ // pat = op - src
145
+ // len = limit - op
146
+ assert(src < op);
147
+ assert(op_limit <= buf_limit);
148
+ // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
149
+ // to optimize this function but we have to also handle these cases in case
150
+ // the input does not satisfy these conditions.
151
+
152
+ size_t pattern_size = op - src;
153
+ // The cases are split into different branches to allow the branch predictor,
154
+ // FDO, and static prediction hints to work better. For each input we list the
155
+ // ratio of invocations that match each condition.
156
+ //
157
+ // input slop < 16 pat < 8 len > 16
158
+ // ------------------------------------------
159
+ // html|html4|cp 0% 1.01% 27.73%
160
+ // urls 0% 0.88% 14.79%
161
+ // jpg 0% 64.29% 7.14%
162
+ // pdf 0% 2.56% 58.06%
163
+ // txt[1-4] 0% 0.23% 0.97%
164
+ // pb 0% 0.96% 13.88%
165
+ // bin 0.01% 22.27% 41.17%
166
+ //
167
+ // It is very rare that we don't have enough slop for doing block copies. It
168
+ // is also rare that we need to expand a pattern. Small patterns are common
169
+ // for incompressible formats and for those we are plenty fast already.
170
+ // Lengths are normally not greater than 16 but they vary depending on the
171
+ // input. In general if we always predict len <= 16 it would be an ok
172
+ // prediction.
173
+ //
174
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
175
+ // copying 2x 8 bytes at a time.
176
+
177
+ // Handle the uncommon case where pattern is less than 8 bytes.
178
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
179
+ // Expand pattern to at least 8 bytes. The worse case scenario in terms of
180
+ // buffer usage is when the pattern is size 3. ^ is the original position
181
+ // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
182
+ //
183
+ // abc
184
+ // abcabcxxxxx
185
+ // abcabcabcabcxxxxx
186
+ // ^
187
+ // The last x is 14 bytes after ^.
188
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
189
+ while (pattern_size < 8) {
190
+ UnalignedCopy64(src, op);
191
+ op += pattern_size;
192
+ pattern_size *= 2;
193
+ }
194
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
195
+ } else {
196
+ return IncrementalCopySlow(src, op, op_limit);
197
+ }
198
+ }
199
+ assert(pattern_size >= 8);
139
200
 
140
- inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141
- while (op - src < 8) {
201
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
202
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
203
+ // because expanding the pattern to at least 8 bytes guarantees that
204
+ // op - src >= 8.
205
+ while (op <= buf_limit - 16) {
142
206
  UnalignedCopy64(src, op);
143
- len -= op - src;
144
- op += op - src;
207
+ UnalignedCopy64(src + 8, op + 8);
208
+ src += 16;
209
+ op += 16;
210
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
145
211
  }
146
- while (len > 0) {
212
+ // We only take this branch if we didn't have enough slop and we can do a
213
+ // single 8 byte copy.
214
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
147
215
  UnalignedCopy64(src, op);
148
216
  src += 8;
149
217
  op += 8;
150
- len -= 8;
151
218
  }
219
+ return IncrementalCopySlow(src, op, op_limit);
152
220
  }
153
221
 
154
222
  } // namespace
@@ -157,26 +225,29 @@ static inline char* EmitLiteral(char* op,
157
225
  const char* literal,
158
226
  int len,
159
227
  bool allow_fast_path) {
160
- int n = len - 1; // Zero-length literals are disallowed
161
- if (n < 60) {
228
+ // The vast majority of copies are below 16 bytes, for which a
229
+ // call to memcpy is overkill. This fast path can sometimes
230
+ // copy up to 15 bytes too much, but that is okay in the
231
+ // main loop, since we have a bit to go on for both sides:
232
+ //
233
+ // - The input will always have kInputMarginBytes = 15 extra
234
+ // available bytes, as long as we're in the main loop, and
235
+ // if not, allow_fast_path = false.
236
+ // - The output will always have 32 spare bytes (see
237
+ // MaxCompressedLength).
238
+ assert(len > 0); // Zero-length literals are disallowed
239
+ int n = len - 1;
240
+ if (allow_fast_path && len <= 16) {
162
241
  // Fits in tag byte
163
242
  *op++ = LITERAL | (n << 2);
164
243
 
165
- // The vast majority of copies are below 16 bytes, for which a
166
- // call to memcpy is overkill. This fast path can sometimes
167
- // copy up to 15 bytes too much, but that is okay in the
168
- // main loop, since we have a bit to go on for both sides:
169
- //
170
- // - The input will always have kInputMarginBytes = 15 extra
171
- // available bytes, as long as we're in the main loop, and
172
- // if not, allow_fast_path = false.
173
- // - The output will always have 32 spare bytes (see
174
- // MaxCompressedLength).
175
- if (allow_fast_path && len <= 16) {
176
- UnalignedCopy64(literal, op);
177
- UnalignedCopy64(literal + 8, op + 8);
178
- return op + len;
179
- }
244
+ UnalignedCopy128(literal, op);
245
+ return op + len;
246
+ }
247
+
248
+ if (n < 60) {
249
+ // Fits in tag byte
250
+ *op++ = LITERAL | (n << 2);
180
251
  } else {
181
252
  // Encode in upcoming bytes
182
253
  char* base = op;
@@ -195,42 +266,54 @@ static inline char* EmitLiteral(char* op,
195
266
  return op + len;
196
267
  }
197
268
 
198
- static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
269
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
270
+ bool len_less_than_12) {
199
271
  assert(len <= 64);
200
272
  assert(len >= 4);
201
273
  assert(offset < 65536);
274
+ assert(len_less_than_12 == (len < 12));
202
275
 
203
- if ((len < 12) && (offset < 2048)) {
204
- size_t len_minus_4 = len - 4;
205
- assert(len_minus_4 < 8); // Must fit in 3 bits
206
- *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
276
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
277
+ // offset fits in 11 bits. The 3 highest go in the top of the first byte,
278
+ // and the rest go in the second byte.
279
+ *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
207
280
  *op++ = offset & 0xff;
208
281
  } else {
209
- *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210
- LittleEndian::Store16(op, offset);
211
- op += 2;
282
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
283
+ // is required to have some slack, so the extra byte won't overrun it.
284
+ uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
285
+ LittleEndian::Store32(op, u);
286
+ op += 3;
212
287
  }
213
288
  return op;
214
289
  }
215
290
 
216
- static inline char* EmitCopy(char* op, size_t offset, int len) {
217
- // Emit 64 byte copies but make sure to keep at least four bytes reserved
218
- while (len >= 68) {
219
- op = EmitCopyLessThan64(op, offset, 64);
220
- len -= 64;
221
- }
291
+ static inline char* EmitCopy(char* op, size_t offset, size_t len,
292
+ bool len_less_than_12) {
293
+ assert(len_less_than_12 == (len < 12));
294
+ if (len_less_than_12) {
295
+ return EmitCopyAtMost64(op, offset, len, true);
296
+ } else {
297
+ // A special case for len <= 64 might help, but so far measurements suggest
298
+ // it's in the noise.
222
299
 
223
- // Emit an extra 60 byte copy if have too much data to fit in one copy
224
- if (len > 64) {
225
- op = EmitCopyLessThan64(op, offset, 60);
226
- len -= 60;
227
- }
300
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
301
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
302
+ op = EmitCopyAtMost64(op, offset, 64, false);
303
+ len -= 64;
304
+ }
228
305
 
229
- // Emit remainder
230
- op = EmitCopyLessThan64(op, offset, len);
231
- return op;
232
- }
306
+ // One or two copies will now finish the job.
307
+ if (len > 64) {
308
+ op = EmitCopyAtMost64(op, offset, 60, false);
309
+ len -= 60;
310
+ }
233
311
 
312
+ // Emit remainder.
313
+ op = EmitCopyAtMost64(op, offset, len, len < 12);
314
+ return op;
315
+ }
316
+ }
234
317
 
235
318
  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236
319
  uint32 v = 0;
@@ -344,7 +427,7 @@ char* CompressFragment(const char* input,
344
427
  const char* next_emit = ip;
345
428
 
346
429
  const size_t kInputMarginBytes = 15;
347
- if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
430
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
348
431
  const char* ip_limit = input + input_size - kInputMarginBytes;
349
432
 
350
433
  for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -364,9 +447,9 @@ char* CompressFragment(const char* input,
364
447
  //
365
448
  // Heuristic match skipping: If 32 bytes are scanned with no matches
366
449
  // found, start looking only at every other byte. If 32 more bytes are
367
- // scanned, look at every third byte, etc.. When a match is found,
368
- // immediately go back to looking at every byte. This is a small loss
369
- // (~5% performance, ~0.1% density) for compressible data due to more
450
+ // scanned (or skipped), look at every third byte, etc.. When a match is
451
+ // found, immediately go back to looking at every byte. This is a small
452
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
370
453
  // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371
454
  // win since the compressor quickly "realizes" the data is incompressible
372
455
  // and doesn't bother looking for matches everywhere.
@@ -382,9 +465,10 @@ char* CompressFragment(const char* input,
382
465
  ip = next_ip;
383
466
  uint32 hash = next_hash;
384
467
  assert(hash == Hash(ip, shift));
385
- uint32 bytes_between_hash_lookups = skip++ >> 5;
468
+ uint32 bytes_between_hash_lookups = skip >> 5;
469
+ skip += bytes_between_hash_lookups;
386
470
  next_ip = ip + bytes_between_hash_lookups;
387
- if (PREDICT_FALSE(next_ip > ip_limit)) {
471
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
388
472
  goto emit_remainder;
389
473
  }
390
474
  next_hash = Hash(next_ip, shift);
@@ -393,8 +477,8 @@ char* CompressFragment(const char* input,
393
477
  assert(candidate < ip);
394
478
 
395
479
  table[hash] = ip - base_ip;
396
- } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
397
- UNALIGNED_LOAD32(candidate)));
480
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
481
+ UNALIGNED_LOAD32(candidate)));
398
482
 
399
483
  // Step 2: A 4-byte match has been found. We'll later see if more
400
484
  // than 4 bytes match. But, prior to the match, input
@@ -417,19 +501,21 @@ char* CompressFragment(const char* input,
417
501
  // We have a 4-byte match at ip, and no need to emit any
418
502
  // "literal bytes" prior to ip.
419
503
  const char* base = ip;
420
- int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
504
+ std::pair<size_t, bool> p =
505
+ FindMatchLength(candidate + 4, ip + 4, ip_end);
506
+ size_t matched = 4 + p.first;
421
507
  ip += matched;
422
508
  size_t offset = base - candidate;
423
509
  assert(0 == memcmp(base, candidate, matched));
424
- op = EmitCopy(op, offset, matched);
425
- // We could immediately start working at ip now, but to improve
426
- // compression we first update table[Hash(ip - 1, ...)].
427
- const char* insert_tail = ip - 1;
510
+ op = EmitCopy(op, offset, matched, p.second);
428
511
  next_emit = ip;
429
- if (PREDICT_FALSE(ip >= ip_limit)) {
512
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
430
513
  goto emit_remainder;
431
514
  }
432
- input_bytes = GetEightBytesAt(insert_tail);
515
+ // We are now looking for a 4-byte match again. We read
516
+ // table[Hash(ip, shift)] for that. To improve compression,
517
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
518
+ input_bytes = GetEightBytesAt(ip - 1);
433
519
  uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434
520
  table[prev_hash] = ip - base_ip - 1;
435
521
  uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -453,6 +539,10 @@ char* CompressFragment(const char* input,
453
539
  }
454
540
  } // end namespace internal
455
541
 
542
+ // Called back at avery compression call to trace parameters and sizes.
543
+ static inline void Report(const char *algorithm, size_t compressed_size,
544
+ size_t uncompressed_size) {}
545
+
456
546
  // Signature of output types needed by decompression code.
457
547
  // The decompression code is templatized on a type that obeys this
458
548
  // signature so that we do not pay virtual function call overhead in
@@ -493,162 +583,14 @@ char* CompressFragment(const char* input,
493
583
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
494
584
  // };
495
585
 
496
- // -----------------------------------------------------------------------
497
- // Lookup table for decompression code. Generated by ComputeTable() below.
498
- // -----------------------------------------------------------------------
586
+ namespace internal {
499
587
 
500
588
  // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501
589
  static const uint32 wordmask[] = {
502
590
  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503
591
  };
504
592
 
505
- // Data stored per entry in lookup table:
506
- // Range Bits-used Description
507
- // ------------------------------------
508
- // 1..64 0..7 Literal/copy length encoded in opcode byte
509
- // 0..7 8..10 Copy offset encoded in opcode byte / 256
510
- // 0..4 11..13 Extra bytes after opcode
511
- //
512
- // We use eight bits for the length even though 7 would have sufficed
513
- // because of efficiency reasons:
514
- // (1) Extracting a byte is faster than a bit-field
515
- // (2) It properly aligns copy offset so we do not need a <<8
516
- static const uint16 char_table[256] = {
517
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549
- };
550
-
551
- // In debug mode, allow optional computation of the table at startup.
552
- // Also, check that the decompression table is correct.
553
- #ifndef NDEBUG
554
- DEFINE_bool(snappy_dump_decompression_table, false,
555
- "If true, we print the decompression table at startup.");
556
-
557
- static uint16 MakeEntry(unsigned int extra,
558
- unsigned int len,
559
- unsigned int copy_offset) {
560
- // Check that all of the fields fit within the allocated space
561
- assert(extra == (extra & 0x7)); // At most 3 bits
562
- assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
563
- assert(len == (len & 0x7f)); // At most 7 bits
564
- return len | (copy_offset << 8) | (extra << 11);
565
- }
566
-
567
- static void ComputeTable() {
568
- uint16 dst[256];
569
-
570
- // Place invalid entries in all places to detect missing initialization
571
- int assigned = 0;
572
- for (int i = 0; i < 256; i++) {
573
- dst[i] = 0xffff;
574
- }
575
-
576
- // Small LITERAL entries. We store (len-1) in the top 6 bits.
577
- for (unsigned int len = 1; len <= 60; len++) {
578
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579
- assigned++;
580
- }
581
-
582
- // Large LITERAL entries. We use 60..63 in the high 6 bits to
583
- // encode the number of bytes of length info that follow the opcode.
584
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585
- // We set the length field in the lookup table to 1 because extra
586
- // bytes encode len-1.
587
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588
- assigned++;
589
- }
590
-
591
- // COPY_1_BYTE_OFFSET.
592
- //
593
- // The tag byte in the compressed data stores len-4 in 3 bits, and
594
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
595
- //
596
- // This format is used for length in range [4..11] and offset in
597
- // range [0..2047]
598
- for (unsigned int len = 4; len < 12; len++) {
599
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
600
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601
- MakeEntry(1, len, offset>>8);
602
- assigned++;
603
- }
604
- }
605
-
606
- // COPY_2_BYTE_OFFSET.
607
- // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608
- for (unsigned int len = 1; len <= 64; len++) {
609
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610
- assigned++;
611
- }
612
-
613
- // COPY_4_BYTE_OFFSET.
614
- // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615
- for (unsigned int len = 1; len <= 64; len++) {
616
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617
- assigned++;
618
- }
619
-
620
- // Check that each entry was initialized exactly once.
621
- if (assigned != 256) {
622
- fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623
- abort();
624
- }
625
- for (int i = 0; i < 256; i++) {
626
- if (dst[i] == 0xffff) {
627
- fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628
- abort();
629
- }
630
- }
631
-
632
- if (FLAGS_snappy_dump_decompression_table) {
633
- printf("static const uint16 char_table[256] = {\n ");
634
- for (int i = 0; i < 256; i++) {
635
- printf("0x%04x%s",
636
- dst[i],
637
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
638
- }
639
- printf("};\n");
640
- }
641
-
642
- // Check that computed table matched recorded table
643
- for (int i = 0; i < 256; i++) {
644
- if (dst[i] != char_table[i]) {
645
- fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646
- i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647
- abort();
648
- }
649
- }
650
- }
651
- #endif /* !NDEBUG */
593
+ } // end namespace internal
652
594
 
653
595
  // Helper class for decompression
654
596
  class SnappyDecompressor {
@@ -701,7 +643,9 @@ class SnappyDecompressor {
701
643
  if (n == 0) return false;
702
644
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703
645
  reader_->Skip(1);
704
- *result |= static_cast<uint32>(c & 0x7f) << shift;
646
+ uint32 val = c & 0x7f;
647
+ if (((val << shift) >> shift) != val) return false;
648
+ *result |= val << shift;
705
649
  if (c < 128) {
706
650
  break;
707
651
  }
@@ -715,6 +659,19 @@ class SnappyDecompressor {
715
659
  template <class Writer>
716
660
  void DecompressAllTags(Writer* writer) {
717
661
  const char* ip = ip_;
662
+ // For position-independent executables, accessing global arrays can be
663
+ // slow. Move wordmask array onto the stack to mitigate this.
664
+ uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
665
+ // Do not use memcpy to copy internal::wordmask to
666
+ // wordmask. LLVM converts stack arrays to global arrays if it detects
667
+ // const stack arrays and this hurts the performance of position
668
+ // independent code. This change is temporary and can be reverted when
669
+ // https://reviews.llvm.org/D30759 is approved.
670
+ wordmask[0] = internal::wordmask[0];
671
+ wordmask[1] = internal::wordmask[1];
672
+ wordmask[2] = internal::wordmask[2];
673
+ wordmask[3] = internal::wordmask[3];
674
+ wordmask[4] = internal::wordmask[4];
718
675
 
719
676
  // We could have put this refill fragment only at the beginning of the loop.
720
677
  // However, duplicating it at the end of each branch gives the compiler more
@@ -728,10 +685,29 @@ class SnappyDecompressor {
728
685
  }
729
686
 
730
687
  MAYBE_REFILL();
688
+ // Add loop alignment directive. Without this directive, we observed
689
+ // significant performance degradation on several intel architectures
690
+ // in snappy benchmark built with LLVM. The degradation was caused by
691
+ // increased branch miss prediction.
692
+ #if defined(__clang__) && defined(__x86_64__)
693
+ asm volatile (".p2align 5");
694
+ #endif
731
695
  for ( ;; ) {
732
696
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733
697
 
734
- if ((c & 0x3) == LITERAL) {
698
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
699
+ // inputs.
700
+ //
701
+ // input LITERAL NON_LITERAL
702
+ // -----------------------------------
703
+ // html|html4|cp 23% 77%
704
+ // urls 36% 64%
705
+ // jpg 47% 53%
706
+ // pdf 19% 81%
707
+ // txt[1-4] 25% 75%
708
+ // pb 24% 76%
709
+ // bin 24% 76%
710
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
735
711
  size_t literal_length = (c >> 2) + 1u;
736
712
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737
713
  assert(literal_length < 61);
@@ -741,7 +717,7 @@ class SnappyDecompressor {
741
717
  // bytes in addition to the literal.
742
718
  continue;
743
719
  }
744
- if (PREDICT_FALSE(literal_length >= 61)) {
720
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
745
721
  // Long literal.
746
722
  const size_t literal_length_length = literal_length - 60;
747
723
  literal_length =
@@ -767,15 +743,15 @@ class SnappyDecompressor {
767
743
  ip += literal_length;
768
744
  MAYBE_REFILL();
769
745
  } else {
770
- const uint32 entry = char_table[c];
771
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772
- const uint32 length = entry & 0xff;
746
+ const size_t entry = char_table[c];
747
+ const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
748
+ const size_t length = entry & 0xff;
773
749
  ip += entry >> 11;
774
750
 
775
751
  // copy_offset/256 is encoded in bits 8..10. By just fetching
776
752
  // those bits, we get copy_offset (since the bit-field starts at
777
753
  // bit 8).
778
- const uint32 copy_offset = entry & 0x700;
754
+ const size_t copy_offset = entry & 0x700;
779
755
  if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780
756
  return;
781
757
  }
@@ -795,10 +771,8 @@ bool SnappyDecompressor::RefillTag() {
795
771
  size_t n;
796
772
  ip = reader_->Peek(&n);
797
773
  peeked_ = n;
798
- if (n == 0) {
799
- eof_ = true;
800
- return false;
801
- }
774
+ eof_ = (n == 0);
775
+ if (eof_) return false;
802
776
  ip_limit_ = ip + n;
803
777
  }
804
778
 
@@ -823,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
823
797
  size_t length;
824
798
  const char* src = reader_->Peek(&length);
825
799
  if (length == 0) return false;
826
- uint32 to_add = min<uint32>(needed - nbuf, length);
800
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
827
801
  memcpy(scratch_ + nbuf, src, to_add);
828
802
  nbuf += to_add;
829
803
  reader_->Skip(to_add);
@@ -852,17 +826,23 @@ static bool InternalUncompress(Source* r, Writer* writer) {
852
826
  SnappyDecompressor decompressor(r);
853
827
  uint32 uncompressed_len = 0;
854
828
  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
855
- return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
829
+
830
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
831
+ uncompressed_len);
856
832
  }
857
833
 
858
834
  template <typename Writer>
859
835
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
860
836
  Writer* writer,
837
+ uint32 compressed_len,
861
838
  uint32 uncompressed_len) {
839
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
840
+
862
841
  writer->SetExpectedLength(uncompressed_len);
863
842
 
864
843
  // Process the entire input
865
844
  decompressor->DecompressAllTags(writer);
845
+ writer->Flush();
866
846
  return (decompressor->eof() && writer->CheckLength());
867
847
  }
868
848
 
@@ -874,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
874
854
  size_t Compress(Source* reader, Sink* writer) {
875
855
  size_t written = 0;
876
856
  size_t N = reader->Available();
857
+ const size_t uncompressed_size = N;
877
858
  char ulength[Varint::kMax32];
878
859
  char* p = Varint::Encode32(ulength, N);
879
860
  writer->Append(ulength, p-ulength);
@@ -888,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
888
869
  size_t fragment_size;
889
870
  const char* fragment = reader->Peek(&fragment_size);
890
871
  assert(fragment_size != 0); // premature end of input
891
- const size_t num_to_read = min(N, kBlockSize);
872
+ const size_t num_to_read = std::min(N, kBlockSize);
892
873
  size_t bytes_read = fragment_size;
893
874
 
894
875
  size_t pending_advance = 0;
@@ -909,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
909
890
 
910
891
  while (bytes_read < num_to_read) {
911
892
  fragment = reader->Peek(&fragment_size);
912
- size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
893
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
913
894
  memcpy(scratch + bytes_read, fragment, n);
914
895
  bytes_read += n;
915
896
  reader->Skip(n);
@@ -946,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
946
927
  reader->Skip(pending_advance);
947
928
  }
948
929
 
930
+ Report("snappy_compress", written, uncompressed_size);
931
+
949
932
  delete[] scratch;
950
933
  delete[] scratch_output;
951
934
 
@@ -965,7 +948,7 @@ class SnappyIOVecWriter {
965
948
  const size_t output_iov_count_;
966
949
 
967
950
  // We are currently writing into output_iov_[curr_iov_index_].
968
- int curr_iov_index_;
951
+ size_t curr_iov_index_;
969
952
 
970
953
  // Bytes written to output_iov_[curr_iov_index_] so far.
971
954
  size_t curr_iov_written_;
@@ -976,7 +959,7 @@ class SnappyIOVecWriter {
976
959
  // Maximum number of bytes that will be decompressed into output_iov_.
977
960
  size_t output_limit_;
978
961
 
979
- inline char* GetIOVecPointer(int index, size_t offset) {
962
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
980
963
  return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981
964
  offset;
982
965
  }
@@ -1037,8 +1020,7 @@ class SnappyIOVecWriter {
1037
1020
  output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038
1021
  // Fast path, used for the majority (about 95%) of invocations.
1039
1022
  char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040
- UnalignedCopy64(ip, ptr);
1041
- UnalignedCopy64(ip + 8, ptr + 8);
1023
+ UnalignedCopy128(ip, ptr);
1042
1024
  curr_iov_written_ += len;
1043
1025
  total_written_ += len;
1044
1026
  return true;
@@ -1057,7 +1039,7 @@ class SnappyIOVecWriter {
1057
1039
  }
1058
1040
 
1059
1041
  // Locate the iovec from which we need to start the copy.
1060
- int from_iov_index = curr_iov_index_;
1042
+ size_t from_iov_index = curr_iov_index_;
1061
1043
  size_t from_iov_offset = curr_iov_written_;
1062
1044
  while (offset > 0) {
1063
1045
  if (from_iov_offset >= offset) {
@@ -1066,8 +1048,8 @@ class SnappyIOVecWriter {
1066
1048
  }
1067
1049
 
1068
1050
  offset -= from_iov_offset;
1051
+ assert(from_iov_index > 0);
1069
1052
  --from_iov_index;
1070
- assert(from_iov_index >= 0);
1071
1053
  from_iov_offset = output_iov_[from_iov_index].iov_len;
1072
1054
  }
1073
1055
 
@@ -1102,9 +1084,10 @@ class SnappyIOVecWriter {
1102
1084
  if (to_copy > len) {
1103
1085
  to_copy = len;
1104
1086
  }
1105
- IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106
- GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107
- to_copy);
1087
+ IncrementalCopySlow(
1088
+ GetIOVecPointer(from_iov_index, from_iov_offset),
1089
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1090
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1108
1091
  curr_iov_written_ += to_copy;
1109
1092
  from_iov_offset += to_copy;
1110
1093
  total_written_ += to_copy;
@@ -1115,6 +1098,7 @@ class SnappyIOVecWriter {
1115
1098
  return true;
1116
1099
  }
1117
1100
 
1101
+ inline void Flush() {}
1118
1102
  };
1119
1103
 
1120
1104
  bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
@@ -1145,7 +1129,8 @@ class SnappyArrayWriter {
1145
1129
  public:
1146
1130
  inline explicit SnappyArrayWriter(char* dst)
1147
1131
  : base_(dst),
1148
- op_(dst) {
1132
+ op_(dst),
1133
+ op_limit_(dst) {
1149
1134
  }
1150
1135
 
1151
1136
  inline void SetExpectedLength(size_t len) {
@@ -1172,8 +1157,7 @@ class SnappyArrayWriter {
1172
1157
  const size_t space_left = op_limit_ - op;
1173
1158
  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174
1159
  // Fast path, used for the majority (about 95%) of invocations.
1175
- UnalignedCopy64(ip, op);
1176
- UnalignedCopy64(ip + 8, op + 8);
1160
+ UnalignedCopy128(ip, op);
1177
1161
  op_ = op + len;
1178
1162
  return true;
1179
1163
  } else {
@@ -1182,8 +1166,7 @@ class SnappyArrayWriter {
1182
1166
  }
1183
1167
 
1184
1168
  inline bool AppendFromSelf(size_t offset, size_t len) {
1185
- char* op = op_;
1186
- const size_t space_left = op_limit_ - op;
1169
+ char* const op_end = op_ + len;
1187
1170
 
1188
1171
  // Check if we try to append from before the start of the buffer.
1189
1172
  // Normally this would just be a check for "produced < offset",
@@ -1192,29 +1175,16 @@ class SnappyArrayWriter {
1192
1175
  // to a very big number. This is convenient, as offset==0 is another
1193
1176
  // invalid case that we also want to catch, so that we do not go
1194
1177
  // into an infinite loop.
1195
- assert(op >= base_);
1196
- size_t produced = op - base_;
1197
- if (produced <= offset - 1u) {
1198
- return false;
1199
- }
1200
- if (len <= 16 && offset >= 8 && space_left >= 16) {
1201
- // Fast path, used for the majority (70-80%) of dynamic invocations.
1202
- UnalignedCopy64(op - offset, op);
1203
- UnalignedCopy64(op - offset + 8, op + 8);
1204
- } else {
1205
- if (space_left >= len + kMaxIncrementCopyOverflow) {
1206
- IncrementalCopyFastPath(op - offset, op, len);
1207
- } else {
1208
- if (space_left < len) {
1209
- return false;
1210
- }
1211
- IncrementalCopy(op - offset, op, len);
1212
- }
1213
- }
1178
+ if (Produced() <= offset - 1u || op_end > op_limit_) return false;
1179
+ op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
1214
1180
 
1215
- op_ = op + len;
1216
1181
  return true;
1217
1182
  }
1183
+ inline size_t Produced() const {
1184
+ assert(op_ >= base_);
1185
+ return op_ - base_;
1186
+ }
1187
+ inline void Flush() {}
1218
1188
  };
1219
1189
 
1220
1190
  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
@@ -1241,7 +1211,6 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1241
1211
  return RawUncompress(compressed, n, string_as_array(uncompressed));
1242
1212
  }
1243
1213
 
1244
-
1245
1214
  // A Writer that drops everything on the floor and just does validation
1246
1215
  class SnappyDecompressionValidator {
1247
1216
  private:
@@ -1249,7 +1218,7 @@ class SnappyDecompressionValidator {
1249
1218
  size_t produced_;
1250
1219
 
1251
1220
  public:
1252
- inline SnappyDecompressionValidator() : produced_(0) { }
1221
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1253
1222
  inline void SetExpectedLength(size_t len) {
1254
1223
  expected_ = len;
1255
1224
  }
@@ -1270,6 +1239,7 @@ class SnappyDecompressionValidator {
1270
1239
  produced_ += len;
1271
1240
  return produced_ <= expected_;
1272
1241
  }
1242
+ inline void Flush() {}
1273
1243
  };
1274
1244
 
1275
1245
  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
@@ -1278,6 +1248,11 @@ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1278
1248
  return InternalUncompress(&reader, &writer);
1279
1249
  }
1280
1250
 
1251
+ bool IsValidCompressed(Source* compressed) {
1252
+ SnappyDecompressionValidator writer;
1253
+ return InternalUncompress(compressed, &writer);
1254
+ }
1255
+
1281
1256
  void RawCompress(const char* input,
1282
1257
  size_t input_length,
1283
1258
  char* compressed,
@@ -1292,7 +1267,7 @@ void RawCompress(const char* input,
1292
1267
 
1293
1268
  size_t Compress(const char* input, size_t input_length, string* compressed) {
1294
1269
  // Pre-grow the buffer to the max length of the compressed output
1295
- compressed->resize(MaxCompressedLength(input_length));
1270
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1296
1271
 
1297
1272
  size_t compressed_length;
1298
1273
  RawCompress(input, input_length, string_as_array(compressed),
@@ -1301,6 +1276,240 @@ size_t Compress(const char* input, size_t input_length, string* compressed) {
1301
1276
  return compressed_length;
1302
1277
  }
1303
1278
 
1279
+ // -----------------------------------------------------------------------
1280
+ // Sink interface
1281
+ // -----------------------------------------------------------------------
1304
1282
 
1305
- } // end namespace snappy
1283
+ // A type that decompresses into a Sink. The template parameter
1284
+ // Allocator must export one method "char* Allocate(int size);", which
1285
+ // allocates a buffer of "size" and appends that to the destination.
1286
+ template <typename Allocator>
1287
+ class SnappyScatteredWriter {
1288
+ Allocator allocator_;
1289
+
1290
+ // We need random access into the data generated so far. Therefore
1291
+ // we keep track of all of the generated data as an array of blocks.
1292
+ // All of the blocks except the last have length kBlockSize.
1293
+ std::vector<char*> blocks_;
1294
+ size_t expected_;
1295
+
1296
+ // Total size of all fully generated blocks so far
1297
+ size_t full_size_;
1298
+
1299
+ // Pointer into current output block
1300
+ char* op_base_; // Base of output block
1301
+ char* op_ptr_; // Pointer to next unfilled byte in block
1302
+ char* op_limit_; // Pointer just past block
1303
+
1304
+ inline size_t Size() const {
1305
+ return full_size_ + (op_ptr_ - op_base_);
1306
+ }
1307
+
1308
+ bool SlowAppend(const char* ip, size_t len);
1309
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1310
+
1311
+ public:
1312
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1313
+ : allocator_(allocator),
1314
+ full_size_(0),
1315
+ op_base_(NULL),
1316
+ op_ptr_(NULL),
1317
+ op_limit_(NULL) {
1318
+ }
1319
+
1320
+ inline void SetExpectedLength(size_t len) {
1321
+ assert(blocks_.empty());
1322
+ expected_ = len;
1323
+ }
1324
+
1325
+ inline bool CheckLength() const {
1326
+ return Size() == expected_;
1327
+ }
1328
+
1329
+ // Return the number of bytes actually uncompressed so far
1330
+ inline size_t Produced() const {
1331
+ return Size();
1332
+ }
1333
+
1334
+ inline bool Append(const char* ip, size_t len) {
1335
+ size_t avail = op_limit_ - op_ptr_;
1336
+ if (len <= avail) {
1337
+ // Fast path
1338
+ memcpy(op_ptr_, ip, len);
1339
+ op_ptr_ += len;
1340
+ return true;
1341
+ } else {
1342
+ return SlowAppend(ip, len);
1343
+ }
1344
+ }
1345
+
1346
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1347
+ char* op = op_ptr_;
1348
+ const int space_left = op_limit_ - op;
1349
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1350
+ space_left >= 16) {
1351
+ // Fast path, used for the majority (about 95%) of invocations.
1352
+ UnalignedCopy128(ip, op);
1353
+ op_ptr_ = op + length;
1354
+ return true;
1355
+ } else {
1356
+ return false;
1357
+ }
1358
+ }
1359
+
1360
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1361
+ char* const op_end = op_ptr_ + len;
1362
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1363
+ // the "offset - 1u" trick.
1364
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1365
+ op_end <= op_limit_)) {
1366
+ // Fast path: src and dst in current block.
1367
+ op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1368
+ return true;
1369
+ }
1370
+ return SlowAppendFromSelf(offset, len);
1371
+ }
1372
+
1373
+ // Called at the end of the decompress. We ask the allocator
1374
+ // write all blocks to the sink.
1375
+ inline void Flush() { allocator_.Flush(Produced()); }
1376
+ };
1377
+
1378
+ template<typename Allocator>
1379
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1380
+ size_t avail = op_limit_ - op_ptr_;
1381
+ while (len > avail) {
1382
+ // Completely fill this block
1383
+ memcpy(op_ptr_, ip, avail);
1384
+ op_ptr_ += avail;
1385
+ assert(op_limit_ - op_ptr_ == 0);
1386
+ full_size_ += (op_ptr_ - op_base_);
1387
+ len -= avail;
1388
+ ip += avail;
1389
+
1390
+ // Bounds check
1391
+ if (full_size_ + len > expected_) {
1392
+ return false;
1393
+ }
1394
+
1395
+ // Make new block
1396
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1397
+ op_base_ = allocator_.Allocate(bsize);
1398
+ op_ptr_ = op_base_;
1399
+ op_limit_ = op_base_ + bsize;
1400
+ blocks_.push_back(op_base_);
1401
+ avail = bsize;
1402
+ }
1403
+
1404
+ memcpy(op_ptr_, ip, len);
1405
+ op_ptr_ += len;
1406
+ return true;
1407
+ }
1408
+
1409
+ template<typename Allocator>
1410
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1411
+ size_t len) {
1412
+ // Overflow check
1413
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1414
+ // the "offset - 1u" trick.
1415
+ const size_t cur = Size();
1416
+ if (offset - 1u >= cur) return false;
1417
+ if (expected_ - cur < len) return false;
1418
+
1419
+ // Currently we shouldn't ever hit this path because Compress() chops the
1420
+ // input into blocks and does not create cross-block copies. However, it is
1421
+ // nice if we do not rely on that, since we can get better compression if we
1422
+ // allow cross-block copies and thus might want to change the compressor in
1423
+ // the future.
1424
+ size_t src = cur - offset;
1425
+ while (len-- > 0) {
1426
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1427
+ Append(&c, 1);
1428
+ src++;
1429
+ }
1430
+ return true;
1431
+ }
1432
+
1433
+ class SnappySinkAllocator {
1434
+ public:
1435
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1436
+ ~SnappySinkAllocator() {}
1437
+
1438
+ char* Allocate(int size) {
1439
+ Datablock block(new char[size], size);
1440
+ blocks_.push_back(block);
1441
+ return block.data;
1442
+ }
1443
+
1444
+ // We flush only at the end, because the writer wants
1445
+ // random access to the blocks and once we hand the
1446
+ // block over to the sink, we can't access it anymore.
1447
+ // Also we don't write more than has been actually written
1448
+ // to the blocks.
1449
+ void Flush(size_t size) {
1450
+ size_t size_written = 0;
1451
+ size_t block_size;
1452
+ for (int i = 0; i < blocks_.size(); ++i) {
1453
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1454
+ dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1455
+ &SnappySinkAllocator::Deleter, NULL);
1456
+ size_written += block_size;
1457
+ }
1458
+ blocks_.clear();
1459
+ }
1460
+
1461
+ private:
1462
+ struct Datablock {
1463
+ char* data;
1464
+ size_t size;
1465
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1466
+ };
1467
+
1468
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1469
+ delete[] bytes;
1470
+ }
1471
+
1472
+ Sink* dest_;
1473
+ std::vector<Datablock> blocks_;
1474
+
1475
+ // Note: copying this object is allowed
1476
+ };
1306
1477
 
1478
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1479
+ SnappySinkAllocator allocator(uncompressed);
1480
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1481
+ InternalUncompress(compressed, &writer);
1482
+ return writer.Produced();
1483
+ }
1484
+
1485
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1486
+ // Read the uncompressed length from the front of the compressed input
1487
+ SnappyDecompressor decompressor(compressed);
1488
+ uint32 uncompressed_len = 0;
1489
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1490
+ return false;
1491
+ }
1492
+
1493
+ char c;
1494
+ size_t allocated_size;
1495
+ char* buf = uncompressed->GetAppendBufferVariable(
1496
+ 1, uncompressed_len, &c, 1, &allocated_size);
1497
+
1498
+ const size_t compressed_len = compressed->Available();
1499
+ // If we can get a flat buffer, then use it, otherwise do block by block
1500
+ // uncompression
1501
+ if (allocated_size >= uncompressed_len) {
1502
+ SnappyArrayWriter writer(buf);
1503
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1504
+ compressed_len, uncompressed_len);
1505
+ uncompressed->Append(buf, writer.Produced());
1506
+ return result;
1507
+ } else {
1508
+ SnappySinkAllocator allocator(uncompressed);
1509
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1510
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1511
+ uncompressed_len);
1512
+ }
1513
+ }
1514
+
1515
+ } // end namespace snappy