snappy 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +8 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/AUTHORS +1 -0
  21. data/vendor/snappy/CMakeLists.txt +174 -0
  22. data/vendor/snappy/CONTRIBUTING.md +26 -0
  23. data/vendor/snappy/COPYING +54 -0
  24. data/vendor/snappy/NEWS +180 -0
  25. data/vendor/snappy/README.md +149 -0
  26. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  27. data/vendor/snappy/cmake/config.h.in +62 -0
  28. data/vendor/snappy/format_description.txt +110 -0
  29. data/vendor/snappy/framing_format.txt +135 -0
  30. data/vendor/snappy/snappy-c.cc +90 -0
  31. data/vendor/snappy/snappy-c.h +138 -0
  32. data/vendor/snappy/snappy-internal.h +224 -0
  33. data/vendor/snappy/snappy-sinksource.cc +104 -0
  34. data/vendor/snappy/snappy-sinksource.h +182 -0
  35. data/vendor/snappy/snappy-stubs-internal.cc +42 -0
  36. data/vendor/snappy/snappy-stubs-internal.h +561 -0
  37. data/vendor/snappy/snappy-stubs-public.h.in +94 -0
  38. data/vendor/snappy/snappy-test.cc +612 -0
  39. data/vendor/snappy/snappy-test.h +573 -0
  40. data/vendor/snappy/snappy.cc +1515 -0
  41. data/vendor/snappy/snappy.h +203 -0
  42. data/vendor/snappy/snappy_unittest.cc +1410 -0
  43. metadata +38 -46
@@ -0,0 +1,1515 @@
1
+ // Copyright 2005 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include "snappy.h"
30
+ #include "snappy-internal.h"
31
+ #include "snappy-sinksource.h"
32
+
33
+ #ifndef SNAPPY_HAVE_SSE2
34
+ #if defined(__SSE2__) || defined(_M_X64) || \
35
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
36
+ #define SNAPPY_HAVE_SSE2 1
37
+ #else
38
+ #define SNAPPY_HAVE_SSE2 0
39
+ #endif
40
+ #endif
41
+
42
+ #if SNAPPY_HAVE_SSE2
43
+ #include <emmintrin.h>
44
+ #endif
45
+ #include <stdio.h>
46
+
47
+ #include <algorithm>
48
+ #include <string>
49
+ #include <vector>
50
+
51
+
52
+ namespace snappy {
53
+
54
+ using internal::COPY_1_BYTE_OFFSET;
55
+ using internal::COPY_2_BYTE_OFFSET;
56
+ using internal::LITERAL;
57
+ using internal::char_table;
58
+ using internal::kMaximumTagLength;
59
+
60
+ // Any hash function will produce a valid compressed bitstream, but a good
61
+ // hash function reduces the number of collisions and thus yields better
62
+ // compression for compressible input, and more speed for incompressible
63
+ // input. Of course, it doesn't hurt if the hash function is reasonably fast
64
+ // either, as it gets called a lot.
65
+ static inline uint32 HashBytes(uint32 bytes, int shift) {
66
+ uint32 kMul = 0x1e35a7bd;
67
+ return (bytes * kMul) >> shift;
68
+ }
69
+ static inline uint32 Hash(const char* p, int shift) {
70
+ return HashBytes(UNALIGNED_LOAD32(p), shift);
71
+ }
72
+
73
+ size_t MaxCompressedLength(size_t source_len) {
74
+ // Compressed data can be defined as:
75
+ // compressed := item* literal*
76
+ // item := literal* copy
77
+ //
78
+ // The trailing literal sequence has a space blowup of at most 62/60
79
+ // since a literal of length 60 needs one tag byte + one extra byte
80
+ // for length information.
81
+ //
82
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
83
+ // 4 bytes of data. Because of a special check in the encoding code,
84
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
85
+ // the copy op takes 3 bytes to encode, and this type of item leads
86
+ // to at most the 62/60 blowup for representing literals.
87
+ //
88
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
89
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
90
+ // worst case here is a one-byte literal followed by a five-byte copy.
91
+ // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
92
+ //
93
+ // This last factor dominates the blowup, so the final estimate is:
94
+ return 32 + source_len + source_len/6;
95
+ }
96
+
97
+ namespace {
98
+
99
+ void UnalignedCopy64(const void* src, void* dst) {
100
+ char tmp[8];
101
+ memcpy(tmp, src, 8);
102
+ memcpy(dst, tmp, 8);
103
+ }
104
+
105
+ void UnalignedCopy128(const void* src, void* dst) {
106
+ // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
107
+ // SSE2 moves for memcpy(dst, src, 16).
108
+ #if SNAPPY_HAVE_SSE2
109
+ __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
110
+ _mm_storeu_si128(static_cast<__m128i*>(dst), x);
111
+ #else
112
+ char tmp[16];
113
+ memcpy(tmp, src, 16);
114
+ memcpy(dst, tmp, 16);
115
+ #endif
116
+ }
117
+
118
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
119
+ // for handling COPY operations where the input and output regions may overlap.
120
+ // For example, suppose:
121
+ // src == "ab"
122
+ // op == src + 2
123
+ // op_limit == op + 20
124
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
125
+ // copies of "ab"
126
+ // ababababababababababab
127
+ // Note that this does not match the semantics of either memcpy() or memmove().
128
+ inline char* IncrementalCopySlow(const char* src, char* op,
129
+ char* const op_limit) {
130
+ while (op < op_limit) {
131
+ *op++ = *src++;
132
+ }
133
+ return op_limit;
134
+ }
135
+
136
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
137
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
138
+ // region of the buffer.
139
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
140
+ char* const buf_limit) {
141
+ // Terminology:
142
+ //
143
+ // slop = buf_limit - op
144
+ // pat = op - src
145
+ // len = limit - op
146
+ assert(src < op);
147
+ assert(op_limit <= buf_limit);
148
+ // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
149
+ // to optimize this function but we have to also handle these cases in case
150
+ // the input does not satisfy these conditions.
151
+
152
+ size_t pattern_size = op - src;
153
+ // The cases are split into different branches to allow the branch predictor,
154
+ // FDO, and static prediction hints to work better. For each input we list the
155
+ // ratio of invocations that match each condition.
156
+ //
157
+ // input slop < 16 pat < 8 len > 16
158
+ // ------------------------------------------
159
+ // html|html4|cp 0% 1.01% 27.73%
160
+ // urls 0% 0.88% 14.79%
161
+ // jpg 0% 64.29% 7.14%
162
+ // pdf 0% 2.56% 58.06%
163
+ // txt[1-4] 0% 0.23% 0.97%
164
+ // pb 0% 0.96% 13.88%
165
+ // bin 0.01% 22.27% 41.17%
166
+ //
167
+ // It is very rare that we don't have enough slop for doing block copies. It
168
+ // is also rare that we need to expand a pattern. Small patterns are common
169
+ // for incompressible formats and for those we are plenty fast already.
170
+ // Lengths are normally not greater than 16 but they vary depending on the
171
+ // input. In general if we always predict len <= 16 it would be an ok
172
+ // prediction.
173
+ //
174
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
175
+ // copying 2x 8 bytes at a time.
176
+
177
+ // Handle the uncommon case where pattern is less than 8 bytes.
178
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
179
+ // Expand pattern to at least 8 bytes. The worse case scenario in terms of
180
+ // buffer usage is when the pattern is size 3. ^ is the original position
181
+ // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
182
+ //
183
+ // abc
184
+ // abcabcxxxxx
185
+ // abcabcabcabcxxxxx
186
+ // ^
187
+ // The last x is 14 bytes after ^.
188
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
189
+ while (pattern_size < 8) {
190
+ UnalignedCopy64(src, op);
191
+ op += pattern_size;
192
+ pattern_size *= 2;
193
+ }
194
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
195
+ } else {
196
+ return IncrementalCopySlow(src, op, op_limit);
197
+ }
198
+ }
199
+ assert(pattern_size >= 8);
200
+
201
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
202
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
203
+ // because expanding the pattern to at least 8 bytes guarantees that
204
+ // op - src >= 8.
205
+ while (op <= buf_limit - 16) {
206
+ UnalignedCopy64(src, op);
207
+ UnalignedCopy64(src + 8, op + 8);
208
+ src += 16;
209
+ op += 16;
210
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
211
+ }
212
+ // We only take this branch if we didn't have enough slop and we can do a
213
+ // single 8 byte copy.
214
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
215
+ UnalignedCopy64(src, op);
216
+ src += 8;
217
+ op += 8;
218
+ }
219
+ return IncrementalCopySlow(src, op, op_limit);
220
+ }
221
+
222
+ } // namespace
223
+
224
+ static inline char* EmitLiteral(char* op,
225
+ const char* literal,
226
+ int len,
227
+ bool allow_fast_path) {
228
+ // The vast majority of copies are below 16 bytes, for which a
229
+ // call to memcpy is overkill. This fast path can sometimes
230
+ // copy up to 15 bytes too much, but that is okay in the
231
+ // main loop, since we have a bit to go on for both sides:
232
+ //
233
+ // - The input will always have kInputMarginBytes = 15 extra
234
+ // available bytes, as long as we're in the main loop, and
235
+ // if not, allow_fast_path = false.
236
+ // - The output will always have 32 spare bytes (see
237
+ // MaxCompressedLength).
238
+ assert(len > 0); // Zero-length literals are disallowed
239
+ int n = len - 1;
240
+ if (allow_fast_path && len <= 16) {
241
+ // Fits in tag byte
242
+ *op++ = LITERAL | (n << 2);
243
+
244
+ UnalignedCopy128(literal, op);
245
+ return op + len;
246
+ }
247
+
248
+ if (n < 60) {
249
+ // Fits in tag byte
250
+ *op++ = LITERAL | (n << 2);
251
+ } else {
252
+ // Encode in upcoming bytes
253
+ char* base = op;
254
+ int count = 0;
255
+ op++;
256
+ while (n > 0) {
257
+ *op++ = n & 0xff;
258
+ n >>= 8;
259
+ count++;
260
+ }
261
+ assert(count >= 1);
262
+ assert(count <= 4);
263
+ *base = LITERAL | ((59+count) << 2);
264
+ }
265
+ memcpy(op, literal, len);
266
+ return op + len;
267
+ }
268
+
269
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
270
+ bool len_less_than_12) {
271
+ assert(len <= 64);
272
+ assert(len >= 4);
273
+ assert(offset < 65536);
274
+ assert(len_less_than_12 == (len < 12));
275
+
276
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
277
+ // offset fits in 11 bits. The 3 highest go in the top of the first byte,
278
+ // and the rest go in the second byte.
279
+ *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
280
+ *op++ = offset & 0xff;
281
+ } else {
282
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
283
+ // is required to have some slack, so the extra byte won't overrun it.
284
+ uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
285
+ LittleEndian::Store32(op, u);
286
+ op += 3;
287
+ }
288
+ return op;
289
+ }
290
+
291
+ static inline char* EmitCopy(char* op, size_t offset, size_t len,
292
+ bool len_less_than_12) {
293
+ assert(len_less_than_12 == (len < 12));
294
+ if (len_less_than_12) {
295
+ return EmitCopyAtMost64(op, offset, len, true);
296
+ } else {
297
+ // A special case for len <= 64 might help, but so far measurements suggest
298
+ // it's in the noise.
299
+
300
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
301
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
302
+ op = EmitCopyAtMost64(op, offset, 64, false);
303
+ len -= 64;
304
+ }
305
+
306
+ // One or two copies will now finish the job.
307
+ if (len > 64) {
308
+ op = EmitCopyAtMost64(op, offset, 60, false);
309
+ len -= 60;
310
+ }
311
+
312
+ // Emit remainder.
313
+ op = EmitCopyAtMost64(op, offset, len, len < 12);
314
+ return op;
315
+ }
316
+ }
317
+
318
+ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
319
+ uint32 v = 0;
320
+ const char* limit = start + n;
321
+ if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
322
+ *result = v;
323
+ return true;
324
+ } else {
325
+ return false;
326
+ }
327
+ }
328
+
329
+ namespace internal {
330
+ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
331
+ // Use smaller hash table when input.size() is smaller, since we
332
+ // fill the table, incurring O(hash table size) overhead for
333
+ // compression, and if the input is short, we won't need that
334
+ // many hash table entries anyway.
335
+ assert(kMaxHashTableSize >= 256);
336
+ size_t htsize = 256;
337
+ while (htsize < kMaxHashTableSize && htsize < input_size) {
338
+ htsize <<= 1;
339
+ }
340
+
341
+ uint16* table;
342
+ if (htsize <= ARRAYSIZE(small_table_)) {
343
+ table = small_table_;
344
+ } else {
345
+ if (large_table_ == NULL) {
346
+ large_table_ = new uint16[kMaxHashTableSize];
347
+ }
348
+ table = large_table_;
349
+ }
350
+
351
+ *table_size = htsize;
352
+ memset(table, 0, htsize * sizeof(*table));
353
+ return table;
354
+ }
355
+ } // end namespace internal
356
+
357
+ // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
358
+ // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
359
+ // empirically found that overlapping loads such as
360
+ // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
361
+ // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
362
+ //
363
+ // We have different versions for 64- and 32-bit; ideally we would avoid the
364
+ // two functions and just inline the UNALIGNED_LOAD64 call into
365
+ // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
366
+ // enough to avoid loading the value multiple times then. For 64-bit, the load
367
+ // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
368
+ // done at GetUint32AtOffset() time.
369
+
370
+ #ifdef ARCH_K8
371
+
372
+ typedef uint64 EightBytesReference;
373
+
374
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
375
+ return UNALIGNED_LOAD64(ptr);
376
+ }
377
+
378
+ static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
379
+ assert(offset >= 0);
380
+ assert(offset <= 4);
381
+ return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
382
+ }
383
+
384
+ #else
385
+
386
+ typedef const char* EightBytesReference;
387
+
388
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
389
+ return ptr;
390
+ }
391
+
392
+ static inline uint32 GetUint32AtOffset(const char* v, int offset) {
393
+ assert(offset >= 0);
394
+ assert(offset <= 4);
395
+ return UNALIGNED_LOAD32(v + offset);
396
+ }
397
+
398
+ #endif
399
+
400
+ // Flat array compression that does not emit the "uncompressed length"
401
+ // prefix. Compresses "input" string to the "*op" buffer.
402
+ //
403
+ // REQUIRES: "input" is at most "kBlockSize" bytes long.
404
+ // REQUIRES: "op" points to an array of memory that is at least
405
+ // "MaxCompressedLength(input.size())" in size.
406
+ // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
407
+ // REQUIRES: "table_size" is a power of two
408
+ //
409
+ // Returns an "end" pointer into "op" buffer.
410
+ // "end - op" is the compressed size of "input".
411
+ namespace internal {
412
+ char* CompressFragment(const char* input,
413
+ size_t input_size,
414
+ char* op,
415
+ uint16* table,
416
+ const int table_size) {
417
+ // "ip" is the input pointer, and "op" is the output pointer.
418
+ const char* ip = input;
419
+ assert(input_size <= kBlockSize);
420
+ assert((table_size & (table_size - 1)) == 0); // table must be power of two
421
+ const int shift = 32 - Bits::Log2Floor(table_size);
422
+ assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
423
+ const char* ip_end = input + input_size;
424
+ const char* base_ip = ip;
425
+ // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
426
+ // [next_emit, ip_end) after the main loop.
427
+ const char* next_emit = ip;
428
+
429
+ const size_t kInputMarginBytes = 15;
430
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
431
+ const char* ip_limit = input + input_size - kInputMarginBytes;
432
+
433
+ for (uint32 next_hash = Hash(++ip, shift); ; ) {
434
+ assert(next_emit < ip);
435
+ // The body of this loop calls EmitLiteral once and then EmitCopy one or
436
+ // more times. (The exception is that when we're close to exhausting
437
+ // the input we goto emit_remainder.)
438
+ //
439
+ // In the first iteration of this loop we're just starting, so
440
+ // there's nothing to copy, so calling EmitLiteral once is
441
+ // necessary. And we only start a new iteration when the
442
+ // current iteration has determined that a call to EmitLiteral will
443
+ // precede the next call to EmitCopy (if any).
444
+ //
445
+ // Step 1: Scan forward in the input looking for a 4-byte-long match.
446
+ // If we get close to exhausting the input then goto emit_remainder.
447
+ //
448
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
449
+ // found, start looking only at every other byte. If 32 more bytes are
450
+ // scanned (or skipped), look at every third byte, etc.. When a match is
451
+ // found, immediately go back to looking at every byte. This is a small
452
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
453
+ // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
454
+ // win since the compressor quickly "realizes" the data is incompressible
455
+ // and doesn't bother looking for matches everywhere.
456
+ //
457
+ // The "skip" variable keeps track of how many bytes there are since the
458
+ // last match; dividing it by 32 (ie. right-shifting by five) gives the
459
+ // number of bytes to move ahead for each iteration.
460
+ uint32 skip = 32;
461
+
462
+ const char* next_ip = ip;
463
+ const char* candidate;
464
+ do {
465
+ ip = next_ip;
466
+ uint32 hash = next_hash;
467
+ assert(hash == Hash(ip, shift));
468
+ uint32 bytes_between_hash_lookups = skip >> 5;
469
+ skip += bytes_between_hash_lookups;
470
+ next_ip = ip + bytes_between_hash_lookups;
471
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
472
+ goto emit_remainder;
473
+ }
474
+ next_hash = Hash(next_ip, shift);
475
+ candidate = base_ip + table[hash];
476
+ assert(candidate >= base_ip);
477
+ assert(candidate < ip);
478
+
479
+ table[hash] = ip - base_ip;
480
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
481
+ UNALIGNED_LOAD32(candidate)));
482
+
483
+ // Step 2: A 4-byte match has been found. We'll later see if more
484
+ // than 4 bytes match. But, prior to the match, input
485
+ // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
486
+ assert(next_emit + 16 <= ip_end);
487
+ op = EmitLiteral(op, next_emit, ip - next_emit, true);
488
+
489
+ // Step 3: Call EmitCopy, and then see if another EmitCopy could
490
+ // be our next move. Repeat until we find no match for the
491
+ // input immediately after what was consumed by the last EmitCopy call.
492
+ //
493
+ // If we exit this loop normally then we need to call EmitLiteral next,
494
+ // though we don't yet know how big the literal will be. We handle that
495
+ // by proceeding to the next iteration of the main loop. We also can exit
496
+ // this loop via goto if we get close to exhausting the input.
497
+ EightBytesReference input_bytes;
498
+ uint32 candidate_bytes = 0;
499
+
500
+ do {
501
+ // We have a 4-byte match at ip, and no need to emit any
502
+ // "literal bytes" prior to ip.
503
+ const char* base = ip;
504
+ std::pair<size_t, bool> p =
505
+ FindMatchLength(candidate + 4, ip + 4, ip_end);
506
+ size_t matched = 4 + p.first;
507
+ ip += matched;
508
+ size_t offset = base - candidate;
509
+ assert(0 == memcmp(base, candidate, matched));
510
+ op = EmitCopy(op, offset, matched, p.second);
511
+ next_emit = ip;
512
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
513
+ goto emit_remainder;
514
+ }
515
+ // We are now looking for a 4-byte match again. We read
516
+ // table[Hash(ip, shift)] for that. To improve compression,
517
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
518
+ input_bytes = GetEightBytesAt(ip - 1);
519
+ uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
520
+ table[prev_hash] = ip - base_ip - 1;
521
+ uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
522
+ candidate = base_ip + table[cur_hash];
523
+ candidate_bytes = UNALIGNED_LOAD32(candidate);
524
+ table[cur_hash] = ip - base_ip;
525
+ } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
526
+
527
+ next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
528
+ ++ip;
529
+ }
530
+ }
531
+
532
+ emit_remainder:
533
+ // Emit the remaining bytes as a literal
534
+ if (next_emit < ip_end) {
535
+ op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
536
+ }
537
+
538
+ return op;
539
+ }
540
+ } // end namespace internal
541
+
542
+ // Called back at avery compression call to trace parameters and sizes.
543
+ static inline void Report(const char *algorithm, size_t compressed_size,
544
+ size_t uncompressed_size) {}
545
+
546
+ // Signature of output types needed by decompression code.
547
+ // The decompression code is templatized on a type that obeys this
548
+ // signature so that we do not pay virtual function call overhead in
549
+ // the middle of a tight decompression loop.
550
+ //
551
+ // class DecompressionWriter {
552
+ // public:
553
+ // // Called before decompression
554
+ // void SetExpectedLength(size_t length);
555
+ //
556
+ // // Called after decompression
557
+ // bool CheckLength() const;
558
+ //
559
+ // // Called repeatedly during decompression
560
+ // bool Append(const char* ip, size_t length);
561
+ // bool AppendFromSelf(uint32 offset, size_t length);
562
+ //
563
+ // // The rules for how TryFastAppend differs from Append are somewhat
564
+ // // convoluted:
565
+ // //
566
+ // // - TryFastAppend is allowed to decline (return false) at any
567
+ // // time, for any reason -- just "return false" would be
568
+ // // a perfectly legal implementation of TryFastAppend.
569
+ // // The intention is for TryFastAppend to allow a fast path
570
+ // // in the common case of a small append.
571
+ // // - TryFastAppend is allowed to read up to <available> bytes
572
+ // // from the input buffer, whereas Append is allowed to read
573
+ // // <length>. However, if it returns true, it must leave
574
+ // // at least five (kMaximumTagLength) bytes in the input buffer
575
+ // // afterwards, so that there is always enough space to read the
576
+ // // next tag without checking for a refill.
577
+ // // - TryFastAppend must always return decline (return false)
578
+ // // if <length> is 61 or more, as in this case the literal length is not
579
+ // // decoded fully. In practice, this should not be a big problem,
580
+ // // as it is unlikely that one would implement a fast path accepting
581
+ // // this much data.
582
+ // //
583
+ // bool TryFastAppend(const char* ip, size_t available, size_t length);
584
+ // };
585
+
586
+ namespace internal {
587
+
588
+ // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
589
+ static const uint32 wordmask[] = {
590
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
591
+ };
592
+
593
+ } // end namespace internal
594
+
595
+ // Helper class for decompression
596
+ class SnappyDecompressor {
597
+ private:
598
+ Source* reader_; // Underlying source of bytes to decompress
599
+ const char* ip_; // Points to next buffered byte
600
+ const char* ip_limit_; // Points just past buffered bytes
601
+ uint32 peeked_; // Bytes peeked from reader (need to skip)
602
+ bool eof_; // Hit end of input without an error?
603
+ char scratch_[kMaximumTagLength]; // See RefillTag().
604
+
605
+ // Ensure that all of the tag metadata for the next tag is available
606
+ // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
607
+ // if (ip_limit_ - ip_ < 5).
608
+ //
609
+ // Returns true on success, false on error or end of input.
610
+ bool RefillTag();
611
+
612
+ public:
613
+ explicit SnappyDecompressor(Source* reader)
614
+ : reader_(reader),
615
+ ip_(NULL),
616
+ ip_limit_(NULL),
617
+ peeked_(0),
618
+ eof_(false) {
619
+ }
620
+
621
+ ~SnappyDecompressor() {
622
+ // Advance past any bytes we peeked at from the reader
623
+ reader_->Skip(peeked_);
624
+ }
625
+
626
+ // Returns true iff we have hit the end of the input without an error.
627
+ bool eof() const {
628
+ return eof_;
629
+ }
630
+
631
+ // Read the uncompressed length stored at the start of the compressed data.
632
+ // On succcess, stores the length in *result and returns true.
633
+ // On failure, returns false.
634
+ bool ReadUncompressedLength(uint32* result) {
635
+ assert(ip_ == NULL); // Must not have read anything yet
636
+ // Length is encoded in 1..5 bytes
637
+ *result = 0;
638
+ uint32 shift = 0;
639
+ while (true) {
640
+ if (shift >= 32) return false;
641
+ size_t n;
642
+ const char* ip = reader_->Peek(&n);
643
+ if (n == 0) return false;
644
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
645
+ reader_->Skip(1);
646
+ uint32 val = c & 0x7f;
647
+ if (((val << shift) >> shift) != val) return false;
648
+ *result |= val << shift;
649
+ if (c < 128) {
650
+ break;
651
+ }
652
+ shift += 7;
653
+ }
654
+ return true;
655
+ }
656
+
657
+ // Process the next item found in the input.
658
+ // Returns true if successful, false on error or end of input.
659
+ template <class Writer>
660
+ void DecompressAllTags(Writer* writer) {
661
+ const char* ip = ip_;
662
+ // For position-independent executables, accessing global arrays can be
663
+ // slow. Move wordmask array onto the stack to mitigate this.
664
+ uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
665
+ // Do not use memcpy to copy internal::wordmask to
666
+ // wordmask. LLVM converts stack arrays to global arrays if it detects
667
+ // const stack arrays and this hurts the performance of position
668
+ // independent code. This change is temporary and can be reverted when
669
+ // https://reviews.llvm.org/D30759 is approved.
670
+ wordmask[0] = internal::wordmask[0];
671
+ wordmask[1] = internal::wordmask[1];
672
+ wordmask[2] = internal::wordmask[2];
673
+ wordmask[3] = internal::wordmask[3];
674
+ wordmask[4] = internal::wordmask[4];
675
+
676
+ // We could have put this refill fragment only at the beginning of the loop.
677
+ // However, duplicating it at the end of each branch gives the compiler more
678
+ // scope to optimize the <ip_limit_ - ip> expression based on the local
679
+ // context, which overall increases speed.
680
+ #define MAYBE_REFILL() \
681
+ if (ip_limit_ - ip < kMaximumTagLength) { \
682
+ ip_ = ip; \
683
+ if (!RefillTag()) return; \
684
+ ip = ip_; \
685
+ }
686
+
687
+ MAYBE_REFILL();
688
+ // Add loop alignment directive. Without this directive, we observed
689
+ // significant performance degradation on several intel architectures
690
+ // in snappy benchmark built with LLVM. The degradation was caused by
691
+ // increased branch miss prediction.
692
+ #if defined(__clang__) && defined(__x86_64__)
693
+ asm volatile (".p2align 5");
694
+ #endif
695
+ for ( ;; ) {
696
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
697
+
698
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
699
+ // inputs.
700
+ //
701
+ // input LITERAL NON_LITERAL
702
+ // -----------------------------------
703
+ // html|html4|cp 23% 77%
704
+ // urls 36% 64%
705
+ // jpg 47% 53%
706
+ // pdf 19% 81%
707
+ // txt[1-4] 25% 75%
708
+ // pb 24% 76%
709
+ // bin 24% 76%
710
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
711
+ size_t literal_length = (c >> 2) + 1u;
712
+ if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
713
+ assert(literal_length < 61);
714
+ ip += literal_length;
715
+ // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
716
+ // will not return true unless there's already at least five spare
717
+ // bytes in addition to the literal.
718
+ continue;
719
+ }
720
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
721
+ // Long literal.
722
+ const size_t literal_length_length = literal_length - 60;
723
+ literal_length =
724
+ (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
725
+ ip += literal_length_length;
726
+ }
727
+
728
+ size_t avail = ip_limit_ - ip;
729
+ while (avail < literal_length) {
730
+ if (!writer->Append(ip, avail)) return;
731
+ literal_length -= avail;
732
+ reader_->Skip(peeked_);
733
+ size_t n;
734
+ ip = reader_->Peek(&n);
735
+ avail = n;
736
+ peeked_ = avail;
737
+ if (avail == 0) return; // Premature end of input
738
+ ip_limit_ = ip + avail;
739
+ }
740
+ if (!writer->Append(ip, literal_length)) {
741
+ return;
742
+ }
743
+ ip += literal_length;
744
+ MAYBE_REFILL();
745
+ } else {
746
+ const size_t entry = char_table[c];
747
+ const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
748
+ const size_t length = entry & 0xff;
749
+ ip += entry >> 11;
750
+
751
+ // copy_offset/256 is encoded in bits 8..10. By just fetching
752
+ // those bits, we get copy_offset (since the bit-field starts at
753
+ // bit 8).
754
+ const size_t copy_offset = entry & 0x700;
755
+ if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
756
+ return;
757
+ }
758
+ MAYBE_REFILL();
759
+ }
760
+ }
761
+
762
+ #undef MAYBE_REFILL
763
+ }
764
+ };
765
+
766
+ bool SnappyDecompressor::RefillTag() {
767
+ const char* ip = ip_;
768
+ if (ip == ip_limit_) {
769
+ // Fetch a new fragment from the reader
770
+ reader_->Skip(peeked_); // All peeked bytes are used up
771
+ size_t n;
772
+ ip = reader_->Peek(&n);
773
+ peeked_ = n;
774
+ eof_ = (n == 0);
775
+ if (eof_) return false;
776
+ ip_limit_ = ip + n;
777
+ }
778
+
779
+ // Read the tag character
780
+ assert(ip < ip_limit_);
781
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
782
+ const uint32 entry = char_table[c];
783
+ const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
784
+ assert(needed <= sizeof(scratch_));
785
+
786
+ // Read more bytes from reader if needed
787
+ uint32 nbuf = ip_limit_ - ip;
788
+ if (nbuf < needed) {
789
+ // Stitch together bytes from ip and reader to form the word
790
+ // contents. We store the needed bytes in "scratch_". They
791
+ // will be consumed immediately by the caller since we do not
792
+ // read more than we need.
793
+ memmove(scratch_, ip, nbuf);
794
+ reader_->Skip(peeked_); // All peeked bytes are used up
795
+ peeked_ = 0;
796
+ while (nbuf < needed) {
797
+ size_t length;
798
+ const char* src = reader_->Peek(&length);
799
+ if (length == 0) return false;
800
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
801
+ memcpy(scratch_ + nbuf, src, to_add);
802
+ nbuf += to_add;
803
+ reader_->Skip(to_add);
804
+ }
805
+ assert(nbuf == needed);
806
+ ip_ = scratch_;
807
+ ip_limit_ = scratch_ + needed;
808
+ } else if (nbuf < kMaximumTagLength) {
809
+ // Have enough bytes, but move into scratch_ so that we do not
810
+ // read past end of input
811
+ memmove(scratch_, ip, nbuf);
812
+ reader_->Skip(peeked_); // All peeked bytes are used up
813
+ peeked_ = 0;
814
+ ip_ = scratch_;
815
+ ip_limit_ = scratch_ + nbuf;
816
+ } else {
817
+ // Pass pointer to buffer returned by reader_.
818
+ ip_ = ip;
819
+ }
820
+ return true;
821
+ }
822
+
823
+ template <typename Writer>
824
+ static bool InternalUncompress(Source* r, Writer* writer) {
825
+ // Read the uncompressed length from the front of the compressed input
826
+ SnappyDecompressor decompressor(r);
827
+ uint32 uncompressed_len = 0;
828
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
829
+
830
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
831
+ uncompressed_len);
832
+ }
833
+
834
+ template <typename Writer>
835
+ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
836
+ Writer* writer,
837
+ uint32 compressed_len,
838
+ uint32 uncompressed_len) {
839
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
840
+
841
+ writer->SetExpectedLength(uncompressed_len);
842
+
843
+ // Process the entire input
844
+ decompressor->DecompressAllTags(writer);
845
+ writer->Flush();
846
+ return (decompressor->eof() && writer->CheckLength());
847
+ }
848
+
849
+ bool GetUncompressedLength(Source* source, uint32* result) {
850
+ SnappyDecompressor decompressor(source);
851
+ return decompressor.ReadUncompressedLength(result);
852
+ }
853
+
854
+ size_t Compress(Source* reader, Sink* writer) {
855
+ size_t written = 0;
856
+ size_t N = reader->Available();
857
+ const size_t uncompressed_size = N;
858
+ char ulength[Varint::kMax32];
859
+ char* p = Varint::Encode32(ulength, N);
860
+ writer->Append(ulength, p-ulength);
861
+ written += (p - ulength);
862
+
863
+ internal::WorkingMemory wmem;
864
+ char* scratch = NULL;
865
+ char* scratch_output = NULL;
866
+
867
+ while (N > 0) {
868
+ // Get next block to compress (without copying if possible)
869
+ size_t fragment_size;
870
+ const char* fragment = reader->Peek(&fragment_size);
871
+ assert(fragment_size != 0); // premature end of input
872
+ const size_t num_to_read = std::min(N, kBlockSize);
873
+ size_t bytes_read = fragment_size;
874
+
875
+ size_t pending_advance = 0;
876
+ if (bytes_read >= num_to_read) {
877
+ // Buffer returned by reader is large enough
878
+ pending_advance = num_to_read;
879
+ fragment_size = num_to_read;
880
+ } else {
881
+ // Read into scratch buffer
882
+ if (scratch == NULL) {
883
+ // If this is the last iteration, we want to allocate N bytes
884
+ // of space, otherwise the max possible kBlockSize space.
885
+ // num_to_read contains exactly the correct value
886
+ scratch = new char[num_to_read];
887
+ }
888
+ memcpy(scratch, fragment, bytes_read);
889
+ reader->Skip(bytes_read);
890
+
891
+ while (bytes_read < num_to_read) {
892
+ fragment = reader->Peek(&fragment_size);
893
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
894
+ memcpy(scratch + bytes_read, fragment, n);
895
+ bytes_read += n;
896
+ reader->Skip(n);
897
+ }
898
+ assert(bytes_read == num_to_read);
899
+ fragment = scratch;
900
+ fragment_size = num_to_read;
901
+ }
902
+ assert(fragment_size == num_to_read);
903
+
904
+ // Get encoding table for compression
905
+ int table_size;
906
+ uint16* table = wmem.GetHashTable(num_to_read, &table_size);
907
+
908
+ // Compress input_fragment and append to dest
909
+ const int max_output = MaxCompressedLength(num_to_read);
910
+
911
+ // Need a scratch buffer for the output, in case the byte sink doesn't
912
+ // have room for us directly.
913
+ if (scratch_output == NULL) {
914
+ scratch_output = new char[max_output];
915
+ } else {
916
+ // Since we encode kBlockSize regions followed by a region
917
+ // which is <= kBlockSize in length, a previously allocated
918
+ // scratch_output[] region is big enough for this iteration.
919
+ }
920
+ char* dest = writer->GetAppendBuffer(max_output, scratch_output);
921
+ char* end = internal::CompressFragment(fragment, fragment_size,
922
+ dest, table, table_size);
923
+ writer->Append(dest, end - dest);
924
+ written += (end - dest);
925
+
926
+ N -= num_to_read;
927
+ reader->Skip(pending_advance);
928
+ }
929
+
930
+ Report("snappy_compress", written, uncompressed_size);
931
+
932
+ delete[] scratch;
933
+ delete[] scratch_output;
934
+
935
+ return written;
936
+ }
937
+
938
+ // -----------------------------------------------------------------------
939
+ // IOVec interfaces
940
+ // -----------------------------------------------------------------------
941
+
942
+ // A type that writes to an iovec.
943
+ // Note that this is not a "ByteSink", but a type that matches the
944
+ // Writer template argument to SnappyDecompressor::DecompressAllTags().
945
+ class SnappyIOVecWriter {
946
+ private:
947
+ const struct iovec* output_iov_;
948
+ const size_t output_iov_count_;
949
+
950
+ // We are currently writing into output_iov_[curr_iov_index_].
951
+ size_t curr_iov_index_;
952
+
953
+ // Bytes written to output_iov_[curr_iov_index_] so far.
954
+ size_t curr_iov_written_;
955
+
956
+ // Total bytes decompressed into output_iov_ so far.
957
+ size_t total_written_;
958
+
959
+ // Maximum number of bytes that will be decompressed into output_iov_.
960
+ size_t output_limit_;
961
+
962
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
963
+ return reinterpret_cast<char*>(output_iov_[index].iov_base) +
964
+ offset;
965
+ }
966
+
967
+ public:
968
+ // Does not take ownership of iov. iov must be valid during the
969
+ // entire lifetime of the SnappyIOVecWriter.
970
+ inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
971
+ : output_iov_(iov),
972
+ output_iov_count_(iov_count),
973
+ curr_iov_index_(0),
974
+ curr_iov_written_(0),
975
+ total_written_(0),
976
+ output_limit_(-1) {
977
+ }
978
+
979
+ inline void SetExpectedLength(size_t len) {
980
+ output_limit_ = len;
981
+ }
982
+
983
+ inline bool CheckLength() const {
984
+ return total_written_ == output_limit_;
985
+ }
986
+
987
+ inline bool Append(const char* ip, size_t len) {
988
+ if (total_written_ + len > output_limit_) {
989
+ return false;
990
+ }
991
+
992
+ while (len > 0) {
993
+ assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
994
+ if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
995
+ // This iovec is full. Go to the next one.
996
+ if (curr_iov_index_ + 1 >= output_iov_count_) {
997
+ return false;
998
+ }
999
+ curr_iov_written_ = 0;
1000
+ ++curr_iov_index_;
1001
+ }
1002
+
1003
+ const size_t to_write = std::min(
1004
+ len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1005
+ memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1006
+ ip,
1007
+ to_write);
1008
+ curr_iov_written_ += to_write;
1009
+ total_written_ += to_write;
1010
+ ip += to_write;
1011
+ len -= to_write;
1012
+ }
1013
+
1014
+ return true;
1015
+ }
1016
+
1017
+ inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1018
+ const size_t space_left = output_limit_ - total_written_;
1019
+ if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1020
+ output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1021
+ // Fast path, used for the majority (about 95%) of invocations.
1022
+ char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1023
+ UnalignedCopy128(ip, ptr);
1024
+ curr_iov_written_ += len;
1025
+ total_written_ += len;
1026
+ return true;
1027
+ }
1028
+
1029
+ return false;
1030
+ }
1031
+
1032
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1033
+ if (offset > total_written_ || offset == 0) {
1034
+ return false;
1035
+ }
1036
+ const size_t space_left = output_limit_ - total_written_;
1037
+ if (len > space_left) {
1038
+ return false;
1039
+ }
1040
+
1041
+ // Locate the iovec from which we need to start the copy.
1042
+ size_t from_iov_index = curr_iov_index_;
1043
+ size_t from_iov_offset = curr_iov_written_;
1044
+ while (offset > 0) {
1045
+ if (from_iov_offset >= offset) {
1046
+ from_iov_offset -= offset;
1047
+ break;
1048
+ }
1049
+
1050
+ offset -= from_iov_offset;
1051
+ assert(from_iov_index > 0);
1052
+ --from_iov_index;
1053
+ from_iov_offset = output_iov_[from_iov_index].iov_len;
1054
+ }
1055
+
1056
+ // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1057
+ // the current iovec.
1058
+ while (len > 0) {
1059
+ assert(from_iov_index <= curr_iov_index_);
1060
+ if (from_iov_index != curr_iov_index_) {
1061
+ const size_t to_copy = std::min(
1062
+ output_iov_[from_iov_index].iov_len - from_iov_offset,
1063
+ len);
1064
+ Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1065
+ len -= to_copy;
1066
+ if (len > 0) {
1067
+ ++from_iov_index;
1068
+ from_iov_offset = 0;
1069
+ }
1070
+ } else {
1071
+ assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1072
+ size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1073
+ curr_iov_written_,
1074
+ len);
1075
+ if (to_copy == 0) {
1076
+ // This iovec is full. Go to the next one.
1077
+ if (curr_iov_index_ + 1 >= output_iov_count_) {
1078
+ return false;
1079
+ }
1080
+ ++curr_iov_index_;
1081
+ curr_iov_written_ = 0;
1082
+ continue;
1083
+ }
1084
+ if (to_copy > len) {
1085
+ to_copy = len;
1086
+ }
1087
+ IncrementalCopySlow(
1088
+ GetIOVecPointer(from_iov_index, from_iov_offset),
1089
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1090
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1091
+ curr_iov_written_ += to_copy;
1092
+ from_iov_offset += to_copy;
1093
+ total_written_ += to_copy;
1094
+ len -= to_copy;
1095
+ }
1096
+ }
1097
+
1098
+ return true;
1099
+ }
1100
+
1101
+ inline void Flush() {}
1102
+ };
1103
+
1104
+ bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1105
+ const struct iovec* iov, size_t iov_cnt) {
1106
+ ByteArraySource reader(compressed, compressed_length);
1107
+ return RawUncompressToIOVec(&reader, iov, iov_cnt);
1108
+ }
1109
+
1110
+ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1111
+ size_t iov_cnt) {
1112
+ SnappyIOVecWriter output(iov, iov_cnt);
1113
+ return InternalUncompress(compressed, &output);
1114
+ }
1115
+
1116
+ // -----------------------------------------------------------------------
1117
+ // Flat array interfaces
1118
+ // -----------------------------------------------------------------------
1119
+
1120
+ // A type that writes to a flat array.
1121
+ // Note that this is not a "ByteSink", but a type that matches the
1122
+ // Writer template argument to SnappyDecompressor::DecompressAllTags().
1123
+ class SnappyArrayWriter {
1124
+ private:
1125
+ char* base_;
1126
+ char* op_;
1127
+ char* op_limit_;
1128
+
1129
+ public:
1130
+ inline explicit SnappyArrayWriter(char* dst)
1131
+ : base_(dst),
1132
+ op_(dst),
1133
+ op_limit_(dst) {
1134
+ }
1135
+
1136
+ inline void SetExpectedLength(size_t len) {
1137
+ op_limit_ = op_ + len;
1138
+ }
1139
+
1140
+ inline bool CheckLength() const {
1141
+ return op_ == op_limit_;
1142
+ }
1143
+
1144
+ inline bool Append(const char* ip, size_t len) {
1145
+ char* op = op_;
1146
+ const size_t space_left = op_limit_ - op;
1147
+ if (space_left < len) {
1148
+ return false;
1149
+ }
1150
+ memcpy(op, ip, len);
1151
+ op_ = op + len;
1152
+ return true;
1153
+ }
1154
+
1155
+ inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1156
+ char* op = op_;
1157
+ const size_t space_left = op_limit_ - op;
1158
+ if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1159
+ // Fast path, used for the majority (about 95%) of invocations.
1160
+ UnalignedCopy128(ip, op);
1161
+ op_ = op + len;
1162
+ return true;
1163
+ } else {
1164
+ return false;
1165
+ }
1166
+ }
1167
+
1168
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1169
+ char* const op_end = op_ + len;
1170
+
1171
+ // Check if we try to append from before the start of the buffer.
1172
+ // Normally this would just be a check for "produced < offset",
1173
+ // but "produced <= offset - 1u" is equivalent for every case
1174
+ // except the one where offset==0, where the right side will wrap around
1175
+ // to a very big number. This is convenient, as offset==0 is another
1176
+ // invalid case that we also want to catch, so that we do not go
1177
+ // into an infinite loop.
1178
+ if (Produced() <= offset - 1u || op_end > op_limit_) return false;
1179
+ op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
1180
+
1181
+ return true;
1182
+ }
1183
+ inline size_t Produced() const {
1184
+ assert(op_ >= base_);
1185
+ return op_ - base_;
1186
+ }
1187
+ inline void Flush() {}
1188
+ };
1189
+
1190
+ bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1191
+ ByteArraySource reader(compressed, n);
1192
+ return RawUncompress(&reader, uncompressed);
1193
+ }
1194
+
1195
+ bool RawUncompress(Source* compressed, char* uncompressed) {
1196
+ SnappyArrayWriter output(uncompressed);
1197
+ return InternalUncompress(compressed, &output);
1198
+ }
1199
+
1200
+ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1201
+ size_t ulength;
1202
+ if (!GetUncompressedLength(compressed, n, &ulength)) {
1203
+ return false;
1204
+ }
1205
+ // On 32-bit builds: max_size() < kuint32max. Check for that instead
1206
+ // of crashing (e.g., consider externally specified compressed data).
1207
+ if (ulength > uncompressed->max_size()) {
1208
+ return false;
1209
+ }
1210
+ STLStringResizeUninitialized(uncompressed, ulength);
1211
+ return RawUncompress(compressed, n, string_as_array(uncompressed));
1212
+ }
1213
+
1214
+ // A Writer that drops everything on the floor and just does validation
1215
+ class SnappyDecompressionValidator {
1216
+ private:
1217
+ size_t expected_;
1218
+ size_t produced_;
1219
+
1220
+ public:
1221
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1222
+ inline void SetExpectedLength(size_t len) {
1223
+ expected_ = len;
1224
+ }
1225
+ inline bool CheckLength() const {
1226
+ return expected_ == produced_;
1227
+ }
1228
+ inline bool Append(const char* ip, size_t len) {
1229
+ produced_ += len;
1230
+ return produced_ <= expected_;
1231
+ }
1232
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1233
+ return false;
1234
+ }
1235
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1236
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1237
+ // the "offset - 1u" trick.
1238
+ if (produced_ <= offset - 1u) return false;
1239
+ produced_ += len;
1240
+ return produced_ <= expected_;
1241
+ }
1242
+ inline void Flush() {}
1243
+ };
1244
+
1245
+ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1246
+ ByteArraySource reader(compressed, n);
1247
+ SnappyDecompressionValidator writer;
1248
+ return InternalUncompress(&reader, &writer);
1249
+ }
1250
+
1251
+ bool IsValidCompressed(Source* compressed) {
1252
+ SnappyDecompressionValidator writer;
1253
+ return InternalUncompress(compressed, &writer);
1254
+ }
1255
+
1256
+ void RawCompress(const char* input,
1257
+ size_t input_length,
1258
+ char* compressed,
1259
+ size_t* compressed_length) {
1260
+ ByteArraySource reader(input, input_length);
1261
+ UncheckedByteArraySink writer(compressed);
1262
+ Compress(&reader, &writer);
1263
+
1264
+ // Compute how many bytes were added
1265
+ *compressed_length = (writer.CurrentDestination() - compressed);
1266
+ }
1267
+
1268
+ size_t Compress(const char* input, size_t input_length, string* compressed) {
1269
+ // Pre-grow the buffer to the max length of the compressed output
1270
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1271
+
1272
+ size_t compressed_length;
1273
+ RawCompress(input, input_length, string_as_array(compressed),
1274
+ &compressed_length);
1275
+ compressed->resize(compressed_length);
1276
+ return compressed_length;
1277
+ }
1278
+
1279
+ // -----------------------------------------------------------------------
1280
+ // Sink interface
1281
+ // -----------------------------------------------------------------------
1282
+
1283
+ // A type that decompresses into a Sink. The template parameter
1284
+ // Allocator must export one method "char* Allocate(int size);", which
1285
+ // allocates a buffer of "size" and appends that to the destination.
1286
+ template <typename Allocator>
1287
+ class SnappyScatteredWriter {
1288
+ Allocator allocator_;
1289
+
1290
+ // We need random access into the data generated so far. Therefore
1291
+ // we keep track of all of the generated data as an array of blocks.
1292
+ // All of the blocks except the last have length kBlockSize.
1293
+ std::vector<char*> blocks_;
1294
+ size_t expected_;
1295
+
1296
+ // Total size of all fully generated blocks so far
1297
+ size_t full_size_;
1298
+
1299
+ // Pointer into current output block
1300
+ char* op_base_; // Base of output block
1301
+ char* op_ptr_; // Pointer to next unfilled byte in block
1302
+ char* op_limit_; // Pointer just past block
1303
+
1304
+ inline size_t Size() const {
1305
+ return full_size_ + (op_ptr_ - op_base_);
1306
+ }
1307
+
1308
+ bool SlowAppend(const char* ip, size_t len);
1309
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1310
+
1311
+ public:
1312
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1313
+ : allocator_(allocator),
1314
+ full_size_(0),
1315
+ op_base_(NULL),
1316
+ op_ptr_(NULL),
1317
+ op_limit_(NULL) {
1318
+ }
1319
+
1320
+ inline void SetExpectedLength(size_t len) {
1321
+ assert(blocks_.empty());
1322
+ expected_ = len;
1323
+ }
1324
+
1325
+ inline bool CheckLength() const {
1326
+ return Size() == expected_;
1327
+ }
1328
+
1329
+ // Return the number of bytes actually uncompressed so far
1330
+ inline size_t Produced() const {
1331
+ return Size();
1332
+ }
1333
+
1334
+ inline bool Append(const char* ip, size_t len) {
1335
+ size_t avail = op_limit_ - op_ptr_;
1336
+ if (len <= avail) {
1337
+ // Fast path
1338
+ memcpy(op_ptr_, ip, len);
1339
+ op_ptr_ += len;
1340
+ return true;
1341
+ } else {
1342
+ return SlowAppend(ip, len);
1343
+ }
1344
+ }
1345
+
1346
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1347
+ char* op = op_ptr_;
1348
+ const int space_left = op_limit_ - op;
1349
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1350
+ space_left >= 16) {
1351
+ // Fast path, used for the majority (about 95%) of invocations.
1352
+ UnalignedCopy128(ip, op);
1353
+ op_ptr_ = op + length;
1354
+ return true;
1355
+ } else {
1356
+ return false;
1357
+ }
1358
+ }
1359
+
1360
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1361
+ char* const op_end = op_ptr_ + len;
1362
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1363
+ // the "offset - 1u" trick.
1364
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1365
+ op_end <= op_limit_)) {
1366
+ // Fast path: src and dst in current block.
1367
+ op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1368
+ return true;
1369
+ }
1370
+ return SlowAppendFromSelf(offset, len);
1371
+ }
1372
+
1373
+ // Called at the end of the decompress. We ask the allocator
1374
+ // write all blocks to the sink.
1375
+ inline void Flush() { allocator_.Flush(Produced()); }
1376
+ };
1377
+
1378
+ template<typename Allocator>
1379
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1380
+ size_t avail = op_limit_ - op_ptr_;
1381
+ while (len > avail) {
1382
+ // Completely fill this block
1383
+ memcpy(op_ptr_, ip, avail);
1384
+ op_ptr_ += avail;
1385
+ assert(op_limit_ - op_ptr_ == 0);
1386
+ full_size_ += (op_ptr_ - op_base_);
1387
+ len -= avail;
1388
+ ip += avail;
1389
+
1390
+ // Bounds check
1391
+ if (full_size_ + len > expected_) {
1392
+ return false;
1393
+ }
1394
+
1395
+ // Make new block
1396
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1397
+ op_base_ = allocator_.Allocate(bsize);
1398
+ op_ptr_ = op_base_;
1399
+ op_limit_ = op_base_ + bsize;
1400
+ blocks_.push_back(op_base_);
1401
+ avail = bsize;
1402
+ }
1403
+
1404
+ memcpy(op_ptr_, ip, len);
1405
+ op_ptr_ += len;
1406
+ return true;
1407
+ }
1408
+
1409
+ template<typename Allocator>
1410
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1411
+ size_t len) {
1412
+ // Overflow check
1413
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1414
+ // the "offset - 1u" trick.
1415
+ const size_t cur = Size();
1416
+ if (offset - 1u >= cur) return false;
1417
+ if (expected_ - cur < len) return false;
1418
+
1419
+ // Currently we shouldn't ever hit this path because Compress() chops the
1420
+ // input into blocks and does not create cross-block copies. However, it is
1421
+ // nice if we do not rely on that, since we can get better compression if we
1422
+ // allow cross-block copies and thus might want to change the compressor in
1423
+ // the future.
1424
+ size_t src = cur - offset;
1425
+ while (len-- > 0) {
1426
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1427
+ Append(&c, 1);
1428
+ src++;
1429
+ }
1430
+ return true;
1431
+ }
1432
+
1433
+ class SnappySinkAllocator {
1434
+ public:
1435
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1436
+ ~SnappySinkAllocator() {}
1437
+
1438
+ char* Allocate(int size) {
1439
+ Datablock block(new char[size], size);
1440
+ blocks_.push_back(block);
1441
+ return block.data;
1442
+ }
1443
+
1444
+ // We flush only at the end, because the writer wants
1445
+ // random access to the blocks and once we hand the
1446
+ // block over to the sink, we can't access it anymore.
1447
+ // Also we don't write more than has been actually written
1448
+ // to the blocks.
1449
+ void Flush(size_t size) {
1450
+ size_t size_written = 0;
1451
+ size_t block_size;
1452
+ for (int i = 0; i < blocks_.size(); ++i) {
1453
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1454
+ dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1455
+ &SnappySinkAllocator::Deleter, NULL);
1456
+ size_written += block_size;
1457
+ }
1458
+ blocks_.clear();
1459
+ }
1460
+
1461
+ private:
1462
+ struct Datablock {
1463
+ char* data;
1464
+ size_t size;
1465
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1466
+ };
1467
+
1468
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1469
+ delete[] bytes;
1470
+ }
1471
+
1472
+ Sink* dest_;
1473
+ std::vector<Datablock> blocks_;
1474
+
1475
+ // Note: copying this object is allowed
1476
+ };
1477
+
1478
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1479
+ SnappySinkAllocator allocator(uncompressed);
1480
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1481
+ InternalUncompress(compressed, &writer);
1482
+ return writer.Produced();
1483
+ }
1484
+
1485
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1486
+ // Read the uncompressed length from the front of the compressed input
1487
+ SnappyDecompressor decompressor(compressed);
1488
+ uint32 uncompressed_len = 0;
1489
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1490
+ return false;
1491
+ }
1492
+
1493
+ char c;
1494
+ size_t allocated_size;
1495
+ char* buf = uncompressed->GetAppendBufferVariable(
1496
+ 1, uncompressed_len, &c, 1, &allocated_size);
1497
+
1498
+ const size_t compressed_len = compressed->Available();
1499
+ // If we can get a flat buffer, then use it, otherwise do block by block
1500
+ // uncompression
1501
+ if (allocated_size >= uncompressed_len) {
1502
+ SnappyArrayWriter writer(buf);
1503
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1504
+ compressed_len, uncompressed_len);
1505
+ uncompressed->Append(buf, writer.Produced());
1506
+ return result;
1507
+ } else {
1508
+ SnappySinkAllocator allocator(uncompressed);
1509
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1510
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1511
+ uncompressed_len);
1512
+ }
1513
+ }
1514
+
1515
+ } // end namespace snappy