snappy 0.0.15-java → 0.0.16-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,8 +33,8 @@
33
33
  // which is a public header. Instead, snappy-stubs-public.h is generated by
34
34
  // from snappy-stubs-public.h.in at configure time.
35
35
 
36
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
36
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
38
38
 
39
39
  #if @ac_cv_have_stdint_h@
40
40
  #include <stdint.h>
@@ -80,9 +80,11 @@ typedef unsigned long long uint64;
80
80
 
81
81
  typedef std::string string;
82
82
 
83
+ #ifndef DISALLOW_COPY_AND_ASSIGN
83
84
  #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
84
85
  TypeName(const TypeName&); \
85
86
  void operator=(const TypeName&)
87
+ #endif
86
88
 
87
89
  #if !@ac_cv_have_sys_uio_h@
88
90
  // Windows does not have an iovec type, yet the concept is universally useful.
@@ -95,4 +97,4 @@ struct iovec {
95
97
 
96
98
  } // namespace snappy
97
99
 
98
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
100
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
@@ -28,13 +28,16 @@
28
28
  //
29
29
  // Various stubs for the unit tests for the open-source version of Snappy.
30
30
 
31
- #include "snappy-test.h"
31
+ #ifdef HAVE_CONFIG_H
32
+ #include "config.h"
33
+ #endif
32
34
 
33
35
  #ifdef HAVE_WINDOWS_H
34
- #define WIN32_LEAN_AND_MEAN
35
36
  #include <windows.h>
36
37
  #endif
37
38
 
39
+ #include "snappy-test.h"
40
+
38
41
  #include <algorithm>
39
42
 
40
43
  DEFINE_bool(run_microbenchmarks, true,
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Various stubs for the unit tests for the open-source version of Snappy.
30
30
 
31
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
32
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
32
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
33
33
 
34
34
  #include <iostream>
35
35
  #include <string>
@@ -52,7 +52,6 @@
52
52
  #endif
53
53
 
54
54
  #ifdef HAVE_WINDOWS_H
55
- #define WIN32_LEAN_AND_MEAN
56
55
  #include <windows.h>
57
56
  #endif
58
57
 
@@ -132,7 +131,7 @@ namespace File {
132
131
  } // namespace File
133
132
 
134
133
  namespace file {
135
- int Defaults() { }
134
+ int Defaults() { return 0; }
136
135
 
137
136
  class DummyStatus {
138
137
  public:
@@ -158,6 +157,8 @@ namespace file {
158
157
  }
159
158
 
160
159
  fclose(fp);
160
+
161
+ return DummyStatus();
161
162
  }
162
163
 
163
164
  DummyStatus SetContents(const string& filename,
@@ -176,6 +177,8 @@ namespace file {
176
177
  }
177
178
 
178
179
  fclose(fp);
180
+
181
+ return DummyStatus();
179
182
  }
180
183
  } // namespace file
181
184
 
@@ -193,6 +196,7 @@ void Test_Snappy_RandomData();
193
196
  void Test_Snappy_FourByteOffset();
194
197
  void Test_SnappyCorruption_TruncatedVarint();
195
198
  void Test_SnappyCorruption_UnterminatedVarint();
199
+ void Test_SnappyCorruption_OverflowingVarint();
196
200
  void Test_Snappy_ReadPastEndOfBuffer();
197
201
  void Test_Snappy_FindMatchLength();
198
202
  void Test_Snappy_FindMatchLengthRandom();
@@ -497,6 +501,7 @@ static inline int RUN_ALL_TESTS() {
497
501
  snappy::Test_Snappy_FourByteOffset();
498
502
  snappy::Test_SnappyCorruption_TruncatedVarint();
499
503
  snappy::Test_SnappyCorruption_UnterminatedVarint();
504
+ snappy::Test_SnappyCorruption_OverflowingVarint();
500
505
  snappy::Test_Snappy_ReadPastEndOfBuffer();
501
506
  snappy::Test_Snappy_FindMatchLength();
502
507
  snappy::Test_Snappy_FindMatchLengthRandom();
@@ -544,6 +549,13 @@ class LogMessage {
544
549
  PREDICT_TRUE(condition) ? (void)0 : \
545
550
  snappy::LogMessageVoidify() & snappy::LogMessageCrash()
546
551
 
552
+ #ifdef _MSC_VER
553
+ // ~LogMessageCrash calls abort() and therefore never exits. This is by design
554
+ // so temporarily disable warning C4722.
555
+ #pragma warning(push)
556
+ #pragma warning(disable:4722)
557
+ #endif
558
+
547
559
  class LogMessageCrash : public LogMessage {
548
560
  public:
549
561
  LogMessageCrash() { }
@@ -553,6 +565,10 @@ class LogMessageCrash : public LogMessage {
553
565
  }
554
566
  };
555
567
 
568
+ #ifdef _MSC_VER
569
+ #pragma warning(pop)
570
+ #endif
571
+
556
572
  // This class is used to explicitly ignore values in the conditional
557
573
  // logging macros. This avoids compiler warnings like "value computed
558
574
  // is not used" and "statement has no effect".
@@ -572,6 +588,7 @@ class LogMessageVoidify {
572
588
  #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
573
589
  #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
574
590
  #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
591
+ #define CHECK_OK(cond) (cond).CheckSuccess()
575
592
 
576
593
  } // namespace
577
594
 
@@ -579,4 +596,4 @@ using snappy::CompressFile;
579
596
  using snappy::UncompressFile;
580
597
  using snappy::MeasureFile;
581
598
 
582
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
599
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
@@ -30,6 +30,9 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
+ #if defined(__x86_64__) || defined(_M_X64)
34
+ #include <emmintrin.h>
35
+ #endif
33
36
  #include <stdio.h>
34
37
 
35
38
  #include <algorithm>
@@ -39,6 +42,13 @@
39
42
 
40
43
  namespace snappy {
41
44
 
45
+ using internal::COPY_1_BYTE_OFFSET;
46
+ using internal::COPY_2_BYTE_OFFSET;
47
+ using internal::LITERAL;
48
+ using internal::char_table;
49
+ using internal::kMaximumTagLength;
50
+ using internal::wordmask;
51
+
42
52
  // Any hash function will produce a valid compressed bitstream, but a good
43
53
  // hash function reduces the number of collisions and thus yields better
44
54
  // compression for compressible input, and more speed for incompressible
@@ -76,79 +86,125 @@ size_t MaxCompressedLength(size_t source_len) {
76
86
  return 32 + source_len + source_len/6;
77
87
  }
78
88
 
79
- enum {
80
- LITERAL = 0,
81
- COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82
- COPY_2_BYTE_OFFSET = 2,
83
- COPY_4_BYTE_OFFSET = 3
84
- };
85
- static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
86
-
87
- // Copy "len" bytes from "src" to "op", one byte at a time. Used for
88
- // handling COPY operations where the input and output regions may
89
- // overlap. For example, suppose:
90
- // src == "ab"
91
- // op == src + 2
92
- // len == 20
93
- // After IncrementalCopy(src, op, len), the result will have
94
- // eleven copies of "ab"
95
- // ababababababababababab
96
- // Note that this does not match the semantics of either memcpy()
97
- // or memmove().
98
- static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99
- assert(len > 0);
100
- do {
101
- *op++ = *src++;
102
- } while (--len > 0);
89
+ namespace {
90
+
91
+ void UnalignedCopy64(const void* src, void* dst) {
92
+ memcpy(dst, src, 8);
103
93
  }
104
94
 
105
- // Equivalent to IncrementalCopy except that it can write up to ten extra
106
- // bytes after the end of the copy, and that it is faster.
107
- //
108
- // The main part of this loop is a simple copy of eight bytes at a time until
109
- // we've copied (at least) the requested amount of bytes. However, if op and
110
- // src are less than eight bytes apart (indicating a repeating pattern of
111
- // length < 8), we first need to expand the pattern in order to get the correct
112
- // results. For instance, if the buffer looks like this, with the eight-byte
113
- // <src> and <op> patterns marked as intervals:
114
- //
115
- // abxxxxxxxxxxxx
116
- // [------] src
117
- // [------] op
118
- //
119
- // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120
- // after which we can move <op> two bytes without moving <src>:
121
- //
122
- // ababxxxxxxxxxx
123
- // [------] src
124
- // [------] op
125
- //
126
- // and repeat the exercise until the two no longer overlap.
127
- //
128
- // This allows us to do very well in the special case of one single byte
129
- // repeated many times, without taking a big hit for more general cases.
130
- //
131
- // The worst case of extra writing past the end of the match occurs when
132
- // op - src == 1 and len == 1; the last copy will read from byte positions
133
- // [0..7] and write to [4..11], whereas it was only supposed to write to
134
- // position 1. Thus, ten excess bytes.
95
+ void UnalignedCopy128(const void* src, void* dst) {
96
+ // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
97
+ // SSE2 moves for memcpy(dst, src, 16).
98
+ #ifdef __SSE2__
99
+ __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
100
+ _mm_storeu_si128(static_cast<__m128i*>(dst), x);
101
+ #else
102
+ memcpy(dst, src, 16);
103
+ #endif
104
+ }
135
105
 
136
- namespace {
106
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
107
+ // for handling COPY operations where the input and output regions may overlap.
108
+ // For example, suppose:
109
+ // src == "ab"
110
+ // op == src + 2
111
+ // op_limit == op + 20
112
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
113
+ // copies of "ab"
114
+ // ababababababababababab
115
+ // Note that this does not match the semantics of either memcpy() or memmove().
116
+ inline char* IncrementalCopySlow(const char* src, char* op,
117
+ char* const op_limit) {
118
+ while (op < op_limit) {
119
+ *op++ = *src++;
120
+ }
121
+ return op_limit;
122
+ }
137
123
 
138
- const int kMaxIncrementCopyOverflow = 10;
124
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
125
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
126
+ // region of the buffer.
127
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
128
+ char* const buf_limit) {
129
+ // Terminology:
130
+ //
131
+ // slop = buf_limit - op
132
+ // pat = op - src
133
+ // len = limit - op
134
+ assert(src < op);
135
+ assert(op_limit <= buf_limit);
136
+ // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
137
+ // to optimize this function but we have to also handle these cases in case
138
+ // the input does not satisfy these conditions.
139
+
140
+ size_t pattern_size = op - src;
141
+ // The cases are split into different branches to allow the branch predictor,
142
+ // FDO, and static prediction hints to work better. For each input we list the
143
+ // ratio of invocations that match each condition.
144
+ //
145
+ // input slop < 16 pat < 8 len > 16
146
+ // ------------------------------------------
147
+ // html|html4|cp 0% 1.01% 27.73%
148
+ // urls 0% 0.88% 14.79%
149
+ // jpg 0% 64.29% 7.14%
150
+ // pdf 0% 2.56% 58.06%
151
+ // txt[1-4] 0% 0.23% 0.97%
152
+ // pb 0% 0.96% 13.88%
153
+ // bin 0.01% 22.27% 41.17%
154
+ //
155
+ // It is very rare that we don't have enough slop for doing block copies. It
156
+ // is also rare that we need to expand a pattern. Small patterns are common
157
+ // for incompressible formats and for those we are plenty fast already.
158
+ // Lengths are normally not greater than 16 but they vary depending on the
159
+ // input. In general if we always predict len <= 16 it would be an ok
160
+ // prediction.
161
+ //
162
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
163
+ // copying 2x 8 bytes at a time.
164
+
165
+ // Handle the uncommon case where pattern is less than 8 bytes.
166
+ if (PREDICT_FALSE(pattern_size < 8)) {
167
+ // Expand pattern to at least 8 bytes. The worse case scenario in terms of
168
+ // buffer usage is when the pattern is size 3. ^ is the original position
169
+ // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
170
+ //
171
+ // abc
172
+ // abcabcxxxxx
173
+ // abcabcabcabcxxxxx
174
+ // ^
175
+ // The last x is 14 bytes after ^.
176
+ if (PREDICT_TRUE(op <= buf_limit - 14)) {
177
+ while (pattern_size < 8) {
178
+ UnalignedCopy64(src, op);
179
+ op += pattern_size;
180
+ pattern_size *= 2;
181
+ }
182
+ if (PREDICT_TRUE(op >= op_limit)) return op_limit;
183
+ } else {
184
+ return IncrementalCopySlow(src, op, op_limit);
185
+ }
186
+ }
187
+ assert(pattern_size >= 8);
139
188
 
140
- inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141
- while (op - src < 8) {
189
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
190
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
191
+ // because expanding the pattern to at least 8 bytes guarantees that
192
+ // op - src >= 8.
193
+ while (op <= buf_limit - 16) {
142
194
  UnalignedCopy64(src, op);
143
- len -= op - src;
144
- op += op - src;
195
+ UnalignedCopy64(src + 8, op + 8);
196
+ src += 16;
197
+ op += 16;
198
+ if (PREDICT_TRUE(op >= op_limit)) return op_limit;
145
199
  }
146
- while (len > 0) {
200
+ // We only take this branch if we didn't have enough slop and we can do a
201
+ // single 8 byte copy.
202
+ if (PREDICT_FALSE(op <= buf_limit - 8)) {
147
203
  UnalignedCopy64(src, op);
148
204
  src += 8;
149
205
  op += 8;
150
- len -= 8;
151
206
  }
207
+ return IncrementalCopySlow(src, op, op_limit);
152
208
  }
153
209
 
154
210
  } // namespace
@@ -157,26 +213,29 @@ static inline char* EmitLiteral(char* op,
157
213
  const char* literal,
158
214
  int len,
159
215
  bool allow_fast_path) {
160
- int n = len - 1; // Zero-length literals are disallowed
161
- if (n < 60) {
216
+ // The vast majority of copies are below 16 bytes, for which a
217
+ // call to memcpy is overkill. This fast path can sometimes
218
+ // copy up to 15 bytes too much, but that is okay in the
219
+ // main loop, since we have a bit to go on for both sides:
220
+ //
221
+ // - The input will always have kInputMarginBytes = 15 extra
222
+ // available bytes, as long as we're in the main loop, and
223
+ // if not, allow_fast_path = false.
224
+ // - The output will always have 32 spare bytes (see
225
+ // MaxCompressedLength).
226
+ assert(len > 0); // Zero-length literals are disallowed
227
+ int n = len - 1;
228
+ if (allow_fast_path && len <= 16) {
162
229
  // Fits in tag byte
163
230
  *op++ = LITERAL | (n << 2);
164
231
 
165
- // The vast majority of copies are below 16 bytes, for which a
166
- // call to memcpy is overkill. This fast path can sometimes
167
- // copy up to 15 bytes too much, but that is okay in the
168
- // main loop, since we have a bit to go on for both sides:
169
- //
170
- // - The input will always have kInputMarginBytes = 15 extra
171
- // available bytes, as long as we're in the main loop, and
172
- // if not, allow_fast_path = false.
173
- // - The output will always have 32 spare bytes (see
174
- // MaxCompressedLength).
175
- if (allow_fast_path && len <= 16) {
176
- UnalignedCopy64(literal, op);
177
- UnalignedCopy64(literal + 8, op + 8);
178
- return op + len;
179
- }
232
+ UnalignedCopy128(literal, op);
233
+ return op + len;
234
+ }
235
+
236
+ if (n < 60) {
237
+ // Fits in tag byte
238
+ *op++ = LITERAL | (n << 2);
180
239
  } else {
181
240
  // Encode in upcoming bytes
182
241
  char* base = op;
@@ -195,42 +254,54 @@ static inline char* EmitLiteral(char* op,
195
254
  return op + len;
196
255
  }
197
256
 
198
- static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
257
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
258
+ bool len_less_than_12) {
199
259
  assert(len <= 64);
200
260
  assert(len >= 4);
201
261
  assert(offset < 65536);
262
+ assert(len_less_than_12 == (len < 12));
202
263
 
203
- if ((len < 12) && (offset < 2048)) {
204
- size_t len_minus_4 = len - 4;
205
- assert(len_minus_4 < 8); // Must fit in 3 bits
206
- *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
264
+ if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
265
+ // offset fits in 11 bits. The 3 highest go in the top of the first byte,
266
+ // and the rest go in the second byte.
267
+ *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
207
268
  *op++ = offset & 0xff;
208
269
  } else {
209
- *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210
- LittleEndian::Store16(op, offset);
211
- op += 2;
270
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
271
+ // is required to have some slack, so the extra byte won't overrun it.
272
+ uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
273
+ LittleEndian::Store32(op, u);
274
+ op += 3;
212
275
  }
213
276
  return op;
214
277
  }
215
278
 
216
- static inline char* EmitCopy(char* op, size_t offset, int len) {
217
- // Emit 64 byte copies but make sure to keep at least four bytes reserved
218
- while (len >= 68) {
219
- op = EmitCopyLessThan64(op, offset, 64);
220
- len -= 64;
221
- }
279
+ static inline char* EmitCopy(char* op, size_t offset, size_t len,
280
+ bool len_less_than_12) {
281
+ assert(len_less_than_12 == (len < 12));
282
+ if (len_less_than_12) {
283
+ return EmitCopyAtMost64(op, offset, len, true);
284
+ } else {
285
+ // A special case for len <= 64 might help, but so far measurements suggest
286
+ // it's in the noise.
222
287
 
223
- // Emit an extra 60 byte copy if have too much data to fit in one copy
224
- if (len > 64) {
225
- op = EmitCopyLessThan64(op, offset, 60);
226
- len -= 60;
227
- }
288
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
289
+ while (PREDICT_FALSE(len >= 68)) {
290
+ op = EmitCopyAtMost64(op, offset, 64, false);
291
+ len -= 64;
292
+ }
228
293
 
229
- // Emit remainder
230
- op = EmitCopyLessThan64(op, offset, len);
231
- return op;
232
- }
294
+ // One or two copies will now finish the job.
295
+ if (len > 64) {
296
+ op = EmitCopyAtMost64(op, offset, 60, false);
297
+ len -= 60;
298
+ }
233
299
 
300
+ // Emit remainder.
301
+ op = EmitCopyAtMost64(op, offset, len, len < 12);
302
+ return op;
303
+ }
304
+ }
234
305
 
235
306
  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236
307
  uint32 v = 0;
@@ -364,9 +435,9 @@ char* CompressFragment(const char* input,
364
435
  //
365
436
  // Heuristic match skipping: If 32 bytes are scanned with no matches
366
437
  // found, start looking only at every other byte. If 32 more bytes are
367
- // scanned, look at every third byte, etc.. When a match is found,
368
- // immediately go back to looking at every byte. This is a small loss
369
- // (~5% performance, ~0.1% density) for compressible data due to more
438
+ // scanned (or skipped), look at every third byte, etc.. When a match is
439
+ // found, immediately go back to looking at every byte. This is a small
440
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
370
441
  // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371
442
  // win since the compressor quickly "realizes" the data is incompressible
372
443
  // and doesn't bother looking for matches everywhere.
@@ -382,7 +453,8 @@ char* CompressFragment(const char* input,
382
453
  ip = next_ip;
383
454
  uint32 hash = next_hash;
384
455
  assert(hash == Hash(ip, shift));
385
- uint32 bytes_between_hash_lookups = skip++ >> 5;
456
+ uint32 bytes_between_hash_lookups = skip >> 5;
457
+ skip += bytes_between_hash_lookups;
386
458
  next_ip = ip + bytes_between_hash_lookups;
387
459
  if (PREDICT_FALSE(next_ip > ip_limit)) {
388
460
  goto emit_remainder;
@@ -417,19 +489,21 @@ char* CompressFragment(const char* input,
417
489
  // We have a 4-byte match at ip, and no need to emit any
418
490
  // "literal bytes" prior to ip.
419
491
  const char* base = ip;
420
- int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
492
+ std::pair<size_t, bool> p =
493
+ FindMatchLength(candidate + 4, ip + 4, ip_end);
494
+ size_t matched = 4 + p.first;
421
495
  ip += matched;
422
496
  size_t offset = base - candidate;
423
497
  assert(0 == memcmp(base, candidate, matched));
424
- op = EmitCopy(op, offset, matched);
425
- // We could immediately start working at ip now, but to improve
426
- // compression we first update table[Hash(ip - 1, ...)].
427
- const char* insert_tail = ip - 1;
498
+ op = EmitCopy(op, offset, matched, p.second);
428
499
  next_emit = ip;
429
500
  if (PREDICT_FALSE(ip >= ip_limit)) {
430
501
  goto emit_remainder;
431
502
  }
432
- input_bytes = GetEightBytesAt(insert_tail);
503
+ // We are now looking for a 4-byte match again. We read
504
+ // table[Hash(ip, shift)] for that. To improve compression,
505
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
506
+ input_bytes = GetEightBytesAt(ip - 1);
433
507
  uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434
508
  table[prev_hash] = ip - base_ip - 1;
435
509
  uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -493,162 +567,6 @@ char* CompressFragment(const char* input,
493
567
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
494
568
  // };
495
569
 
496
- // -----------------------------------------------------------------------
497
- // Lookup table for decompression code. Generated by ComputeTable() below.
498
- // -----------------------------------------------------------------------
499
-
500
- // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501
- static const uint32 wordmask[] = {
502
- 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503
- };
504
-
505
- // Data stored per entry in lookup table:
506
- // Range Bits-used Description
507
- // ------------------------------------
508
- // 1..64 0..7 Literal/copy length encoded in opcode byte
509
- // 0..7 8..10 Copy offset encoded in opcode byte / 256
510
- // 0..4 11..13 Extra bytes after opcode
511
- //
512
- // We use eight bits for the length even though 7 would have sufficed
513
- // because of efficiency reasons:
514
- // (1) Extracting a byte is faster than a bit-field
515
- // (2) It properly aligns copy offset so we do not need a <<8
516
- static const uint16 char_table[256] = {
517
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549
- };
550
-
551
- // In debug mode, allow optional computation of the table at startup.
552
- // Also, check that the decompression table is correct.
553
- #ifndef NDEBUG
554
- DEFINE_bool(snappy_dump_decompression_table, false,
555
- "If true, we print the decompression table at startup.");
556
-
557
- static uint16 MakeEntry(unsigned int extra,
558
- unsigned int len,
559
- unsigned int copy_offset) {
560
- // Check that all of the fields fit within the allocated space
561
- assert(extra == (extra & 0x7)); // At most 3 bits
562
- assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
563
- assert(len == (len & 0x7f)); // At most 7 bits
564
- return len | (copy_offset << 8) | (extra << 11);
565
- }
566
-
567
- static void ComputeTable() {
568
- uint16 dst[256];
569
-
570
- // Place invalid entries in all places to detect missing initialization
571
- int assigned = 0;
572
- for (int i = 0; i < 256; i++) {
573
- dst[i] = 0xffff;
574
- }
575
-
576
- // Small LITERAL entries. We store (len-1) in the top 6 bits.
577
- for (unsigned int len = 1; len <= 60; len++) {
578
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579
- assigned++;
580
- }
581
-
582
- // Large LITERAL entries. We use 60..63 in the high 6 bits to
583
- // encode the number of bytes of length info that follow the opcode.
584
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585
- // We set the length field in the lookup table to 1 because extra
586
- // bytes encode len-1.
587
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588
- assigned++;
589
- }
590
-
591
- // COPY_1_BYTE_OFFSET.
592
- //
593
- // The tag byte in the compressed data stores len-4 in 3 bits, and
594
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
595
- //
596
- // This format is used for length in range [4..11] and offset in
597
- // range [0..2047]
598
- for (unsigned int len = 4; len < 12; len++) {
599
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
600
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601
- MakeEntry(1, len, offset>>8);
602
- assigned++;
603
- }
604
- }
605
-
606
- // COPY_2_BYTE_OFFSET.
607
- // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608
- for (unsigned int len = 1; len <= 64; len++) {
609
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610
- assigned++;
611
- }
612
-
613
- // COPY_4_BYTE_OFFSET.
614
- // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615
- for (unsigned int len = 1; len <= 64; len++) {
616
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617
- assigned++;
618
- }
619
-
620
- // Check that each entry was initialized exactly once.
621
- if (assigned != 256) {
622
- fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623
- abort();
624
- }
625
- for (int i = 0; i < 256; i++) {
626
- if (dst[i] == 0xffff) {
627
- fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628
- abort();
629
- }
630
- }
631
-
632
- if (FLAGS_snappy_dump_decompression_table) {
633
- printf("static const uint16 char_table[256] = {\n ");
634
- for (int i = 0; i < 256; i++) {
635
- printf("0x%04x%s",
636
- dst[i],
637
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
638
- }
639
- printf("};\n");
640
- }
641
-
642
- // Check that computed table matched recorded table
643
- for (int i = 0; i < 256; i++) {
644
- if (dst[i] != char_table[i]) {
645
- fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646
- i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647
- abort();
648
- }
649
- }
650
- }
651
- #endif /* !NDEBUG */
652
570
 
653
571
  // Helper class for decompression
654
572
  class SnappyDecompressor {
@@ -701,7 +619,9 @@ class SnappyDecompressor {
701
619
  if (n == 0) return false;
702
620
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703
621
  reader_->Skip(1);
704
- *result |= static_cast<uint32>(c & 0x7f) << shift;
622
+ uint32 val = c & 0x7f;
623
+ if (((val << shift) >> shift) != val) return false;
624
+ *result |= val << shift;
705
625
  if (c < 128) {
706
626
  break;
707
627
  }
@@ -715,6 +635,10 @@ class SnappyDecompressor {
715
635
  template <class Writer>
716
636
  void DecompressAllTags(Writer* writer) {
717
637
  const char* ip = ip_;
638
+ // For position-independent executables, accessing global arrays can be
639
+ // slow. Move wordmask array onto the stack to mitigate this.
640
+ uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
641
+ memcpy(wordmask, internal::wordmask, sizeof(wordmask));
718
642
 
719
643
  // We could have put this refill fragment only at the beginning of the loop.
720
644
  // However, duplicating it at the end of each branch gives the compiler more
@@ -731,7 +655,19 @@ class SnappyDecompressor {
731
655
  for ( ;; ) {
732
656
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733
657
 
734
- if ((c & 0x3) == LITERAL) {
658
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
659
+ // inputs.
660
+ //
661
+ // input LITERAL NON_LITERAL
662
+ // -----------------------------------
663
+ // html|html4|cp 23% 77%
664
+ // urls 36% 64%
665
+ // jpg 47% 53%
666
+ // pdf 19% 81%
667
+ // txt[1-4] 25% 75%
668
+ // pb 24% 76%
669
+ // bin 24% 76%
670
+ if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
735
671
  size_t literal_length = (c >> 2) + 1u;
736
672
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737
673
  assert(literal_length < 61);
@@ -767,15 +703,15 @@ class SnappyDecompressor {
767
703
  ip += literal_length;
768
704
  MAYBE_REFILL();
769
705
  } else {
770
- const uint32 entry = char_table[c];
771
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772
- const uint32 length = entry & 0xff;
706
+ const size_t entry = char_table[c];
707
+ const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
708
+ const size_t length = entry & 0xff;
773
709
  ip += entry >> 11;
774
710
 
775
711
  // copy_offset/256 is encoded in bits 8..10. By just fetching
776
712
  // those bits, we get copy_offset (since the bit-field starts at
777
713
  // bit 8).
778
- const uint32 copy_offset = entry & 0x700;
714
+ const size_t copy_offset = entry & 0x700;
779
715
  if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780
716
  return;
781
717
  }
@@ -795,10 +731,8 @@ bool SnappyDecompressor::RefillTag() {
795
731
  size_t n;
796
732
  ip = reader_->Peek(&n);
797
733
  peeked_ = n;
798
- if (n == 0) {
799
- eof_ = true;
800
- return false;
801
- }
734
+ eof_ = (n == 0);
735
+ if (eof_) return false;
802
736
  ip_limit_ = ip + n;
803
737
  }
804
738
 
@@ -863,6 +797,7 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
863
797
 
864
798
  // Process the entire input
865
799
  decompressor->DecompressAllTags(writer);
800
+ writer->Flush();
866
801
  return (decompressor->eof() && writer->CheckLength());
867
802
  }
868
803
 
@@ -965,7 +900,7 @@ class SnappyIOVecWriter {
965
900
  const size_t output_iov_count_;
966
901
 
967
902
  // We are currently writing into output_iov_[curr_iov_index_].
968
- int curr_iov_index_;
903
+ size_t curr_iov_index_;
969
904
 
970
905
  // Bytes written to output_iov_[curr_iov_index_] so far.
971
906
  size_t curr_iov_written_;
@@ -976,7 +911,7 @@ class SnappyIOVecWriter {
976
911
  // Maximum number of bytes that will be decompressed into output_iov_.
977
912
  size_t output_limit_;
978
913
 
979
- inline char* GetIOVecPointer(int index, size_t offset) {
914
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
980
915
  return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981
916
  offset;
982
917
  }
@@ -1037,8 +972,7 @@ class SnappyIOVecWriter {
1037
972
  output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038
973
  // Fast path, used for the majority (about 95%) of invocations.
1039
974
  char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040
- UnalignedCopy64(ip, ptr);
1041
- UnalignedCopy64(ip + 8, ptr + 8);
975
+ UnalignedCopy128(ip, ptr);
1042
976
  curr_iov_written_ += len;
1043
977
  total_written_ += len;
1044
978
  return true;
@@ -1057,7 +991,7 @@ class SnappyIOVecWriter {
1057
991
  }
1058
992
 
1059
993
  // Locate the iovec from which we need to start the copy.
1060
- int from_iov_index = curr_iov_index_;
994
+ size_t from_iov_index = curr_iov_index_;
1061
995
  size_t from_iov_offset = curr_iov_written_;
1062
996
  while (offset > 0) {
1063
997
  if (from_iov_offset >= offset) {
@@ -1066,8 +1000,8 @@ class SnappyIOVecWriter {
1066
1000
  }
1067
1001
 
1068
1002
  offset -= from_iov_offset;
1003
+ assert(from_iov_index > 0);
1069
1004
  --from_iov_index;
1070
- assert(from_iov_index >= 0);
1071
1005
  from_iov_offset = output_iov_[from_iov_index].iov_len;
1072
1006
  }
1073
1007
 
@@ -1102,9 +1036,10 @@ class SnappyIOVecWriter {
1102
1036
  if (to_copy > len) {
1103
1037
  to_copy = len;
1104
1038
  }
1105
- IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106
- GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107
- to_copy);
1039
+ IncrementalCopySlow(
1040
+ GetIOVecPointer(from_iov_index, from_iov_offset),
1041
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1042
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1108
1043
  curr_iov_written_ += to_copy;
1109
1044
  from_iov_offset += to_copy;
1110
1045
  total_written_ += to_copy;
@@ -1115,6 +1050,7 @@ class SnappyIOVecWriter {
1115
1050
  return true;
1116
1051
  }
1117
1052
 
1053
+ inline void Flush() {}
1118
1054
  };
1119
1055
 
1120
1056
  bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
@@ -1145,7 +1081,8 @@ class SnappyArrayWriter {
1145
1081
  public:
1146
1082
  inline explicit SnappyArrayWriter(char* dst)
1147
1083
  : base_(dst),
1148
- op_(dst) {
1084
+ op_(dst),
1085
+ op_limit_(dst) {
1149
1086
  }
1150
1087
 
1151
1088
  inline void SetExpectedLength(size_t len) {
@@ -1172,8 +1109,7 @@ class SnappyArrayWriter {
1172
1109
  const size_t space_left = op_limit_ - op;
1173
1110
  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174
1111
  // Fast path, used for the majority (about 95%) of invocations.
1175
- UnalignedCopy64(ip, op);
1176
- UnalignedCopy64(ip + 8, op + 8);
1112
+ UnalignedCopy128(ip, op);
1177
1113
  op_ = op + len;
1178
1114
  return true;
1179
1115
  } else {
@@ -1182,8 +1118,7 @@ class SnappyArrayWriter {
1182
1118
  }
1183
1119
 
1184
1120
  inline bool AppendFromSelf(size_t offset, size_t len) {
1185
- char* op = op_;
1186
- const size_t space_left = op_limit_ - op;
1121
+ char* const op_end = op_ + len;
1187
1122
 
1188
1123
  // Check if we try to append from before the start of the buffer.
1189
1124
  // Normally this would just be a check for "produced < offset",
@@ -1192,29 +1127,16 @@ class SnappyArrayWriter {
1192
1127
  // to a very big number. This is convenient, as offset==0 is another
1193
1128
  // invalid case that we also want to catch, so that we do not go
1194
1129
  // into an infinite loop.
1195
- assert(op >= base_);
1196
- size_t produced = op - base_;
1197
- if (produced <= offset - 1u) {
1198
- return false;
1199
- }
1200
- if (len <= 16 && offset >= 8 && space_left >= 16) {
1201
- // Fast path, used for the majority (70-80%) of dynamic invocations.
1202
- UnalignedCopy64(op - offset, op);
1203
- UnalignedCopy64(op - offset + 8, op + 8);
1204
- } else {
1205
- if (space_left >= len + kMaxIncrementCopyOverflow) {
1206
- IncrementalCopyFastPath(op - offset, op, len);
1207
- } else {
1208
- if (space_left < len) {
1209
- return false;
1210
- }
1211
- IncrementalCopy(op - offset, op, len);
1212
- }
1213
- }
1130
+ if (Produced() <= offset - 1u || op_end > op_limit_) return false;
1131
+ op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
1214
1132
 
1215
- op_ = op + len;
1216
1133
  return true;
1217
1134
  }
1135
+ inline size_t Produced() const {
1136
+ assert(op_ >= base_);
1137
+ return op_ - base_;
1138
+ }
1139
+ inline void Flush() {}
1218
1140
  };
1219
1141
 
1220
1142
  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
@@ -1241,7 +1163,6 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1241
1163
  return RawUncompress(compressed, n, string_as_array(uncompressed));
1242
1164
  }
1243
1165
 
1244
-
1245
1166
  // A Writer that drops everything on the floor and just does validation
1246
1167
  class SnappyDecompressionValidator {
1247
1168
  private:
@@ -1249,7 +1170,7 @@ class SnappyDecompressionValidator {
1249
1170
  size_t produced_;
1250
1171
 
1251
1172
  public:
1252
- inline SnappyDecompressionValidator() : produced_(0) { }
1173
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1253
1174
  inline void SetExpectedLength(size_t len) {
1254
1175
  expected_ = len;
1255
1176
  }
@@ -1270,6 +1191,7 @@ class SnappyDecompressionValidator {
1270
1191
  produced_ += len;
1271
1192
  return produced_ <= expected_;
1272
1193
  }
1194
+ inline void Flush() {}
1273
1195
  };
1274
1196
 
1275
1197
  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
@@ -1278,6 +1200,11 @@ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1278
1200
  return InternalUncompress(&reader, &writer);
1279
1201
  }
1280
1202
 
1203
+ bool IsValidCompressed(Source* compressed) {
1204
+ SnappyDecompressionValidator writer;
1205
+ return InternalUncompress(compressed, &writer);
1206
+ }
1207
+
1281
1208
  void RawCompress(const char* input,
1282
1209
  size_t input_length,
1283
1210
  char* compressed,
@@ -1292,7 +1219,7 @@ void RawCompress(const char* input,
1292
1219
 
1293
1220
  size_t Compress(const char* input, size_t input_length, string* compressed) {
1294
1221
  // Pre-grow the buffer to the max length of the compressed output
1295
- compressed->resize(MaxCompressedLength(input_length));
1222
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1296
1223
 
1297
1224
  size_t compressed_length;
1298
1225
  RawCompress(input, input_length, string_as_array(compressed),
@@ -1301,6 +1228,237 @@ size_t Compress(const char* input, size_t input_length, string* compressed) {
1301
1228
  return compressed_length;
1302
1229
  }
1303
1230
 
1231
+ // -----------------------------------------------------------------------
1232
+ // Sink interface
1233
+ // -----------------------------------------------------------------------
1304
1234
 
1305
- } // end namespace snappy
1235
+ // A type that decompresses into a Sink. The template parameter
1236
+ // Allocator must export one method "char* Allocate(int size);", which
1237
+ // allocates a buffer of "size" and appends that to the destination.
1238
+ template <typename Allocator>
1239
+ class SnappyScatteredWriter {
1240
+ Allocator allocator_;
1241
+
1242
+ // We need random access into the data generated so far. Therefore
1243
+ // we keep track of all of the generated data as an array of blocks.
1244
+ // All of the blocks except the last have length kBlockSize.
1245
+ std::vector<char*> blocks_;
1246
+ size_t expected_;
1247
+
1248
+ // Total size of all fully generated blocks so far
1249
+ size_t full_size_;
1250
+
1251
+ // Pointer into current output block
1252
+ char* op_base_; // Base of output block
1253
+ char* op_ptr_; // Pointer to next unfilled byte in block
1254
+ char* op_limit_; // Pointer just past block
1255
+
1256
+ inline size_t Size() const {
1257
+ return full_size_ + (op_ptr_ - op_base_);
1258
+ }
1259
+
1260
+ bool SlowAppend(const char* ip, size_t len);
1261
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1262
+
1263
+ public:
1264
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1265
+ : allocator_(allocator),
1266
+ full_size_(0),
1267
+ op_base_(NULL),
1268
+ op_ptr_(NULL),
1269
+ op_limit_(NULL) {
1270
+ }
1271
+
1272
+ inline void SetExpectedLength(size_t len) {
1273
+ assert(blocks_.empty());
1274
+ expected_ = len;
1275
+ }
1276
+
1277
+ inline bool CheckLength() const {
1278
+ return Size() == expected_;
1279
+ }
1280
+
1281
+ // Return the number of bytes actually uncompressed so far
1282
+ inline size_t Produced() const {
1283
+ return Size();
1284
+ }
1285
+
1286
+ inline bool Append(const char* ip, size_t len) {
1287
+ size_t avail = op_limit_ - op_ptr_;
1288
+ if (len <= avail) {
1289
+ // Fast path
1290
+ memcpy(op_ptr_, ip, len);
1291
+ op_ptr_ += len;
1292
+ return true;
1293
+ } else {
1294
+ return SlowAppend(ip, len);
1295
+ }
1296
+ }
1297
+
1298
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1299
+ char* op = op_ptr_;
1300
+ const int space_left = op_limit_ - op;
1301
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1302
+ space_left >= 16) {
1303
+ // Fast path, used for the majority (about 95%) of invocations.
1304
+ UnalignedCopy128(ip, op);
1305
+ op_ptr_ = op + length;
1306
+ return true;
1307
+ } else {
1308
+ return false;
1309
+ }
1310
+ }
1306
1311
 
1312
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1313
+ char* const op_end = op_ptr_ + len;
1314
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1315
+ // the "offset - 1u" trick.
1316
+ if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
1317
+ // Fast path: src and dst in current block.
1318
+ op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1319
+ return true;
1320
+ }
1321
+ return SlowAppendFromSelf(offset, len);
1322
+ }
1323
+
1324
+ // Called at the end of the decompress. We ask the allocator
1325
+ // write all blocks to the sink.
1326
+ inline void Flush() { allocator_.Flush(Produced()); }
1327
+ };
1328
+
1329
+ template<typename Allocator>
1330
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1331
+ size_t avail = op_limit_ - op_ptr_;
1332
+ while (len > avail) {
1333
+ // Completely fill this block
1334
+ memcpy(op_ptr_, ip, avail);
1335
+ op_ptr_ += avail;
1336
+ assert(op_limit_ - op_ptr_ == 0);
1337
+ full_size_ += (op_ptr_ - op_base_);
1338
+ len -= avail;
1339
+ ip += avail;
1340
+
1341
+ // Bounds check
1342
+ if (full_size_ + len > expected_) {
1343
+ return false;
1344
+ }
1345
+
1346
+ // Make new block
1347
+ size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1348
+ op_base_ = allocator_.Allocate(bsize);
1349
+ op_ptr_ = op_base_;
1350
+ op_limit_ = op_base_ + bsize;
1351
+ blocks_.push_back(op_base_);
1352
+ avail = bsize;
1353
+ }
1354
+
1355
+ memcpy(op_ptr_, ip, len);
1356
+ op_ptr_ += len;
1357
+ return true;
1358
+ }
1359
+
1360
+ template<typename Allocator>
1361
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1362
+ size_t len) {
1363
+ // Overflow check
1364
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1365
+ // the "offset - 1u" trick.
1366
+ const size_t cur = Size();
1367
+ if (offset - 1u >= cur) return false;
1368
+ if (expected_ - cur < len) return false;
1369
+
1370
+ // Currently we shouldn't ever hit this path because Compress() chops the
1371
+ // input into blocks and does not create cross-block copies. However, it is
1372
+ // nice if we do not rely on that, since we can get better compression if we
1373
+ // allow cross-block copies and thus might want to change the compressor in
1374
+ // the future.
1375
+ size_t src = cur - offset;
1376
+ while (len-- > 0) {
1377
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1378
+ Append(&c, 1);
1379
+ src++;
1380
+ }
1381
+ return true;
1382
+ }
1383
+
1384
+ class SnappySinkAllocator {
1385
+ public:
1386
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1387
+ ~SnappySinkAllocator() {}
1388
+
1389
+ char* Allocate(int size) {
1390
+ Datablock block(new char[size], size);
1391
+ blocks_.push_back(block);
1392
+ return block.data;
1393
+ }
1394
+
1395
+ // We flush only at the end, because the writer wants
1396
+ // random access to the blocks and once we hand the
1397
+ // block over to the sink, we can't access it anymore.
1398
+ // Also we don't write more than has been actually written
1399
+ // to the blocks.
1400
+ void Flush(size_t size) {
1401
+ size_t size_written = 0;
1402
+ size_t block_size;
1403
+ for (int i = 0; i < blocks_.size(); ++i) {
1404
+ block_size = min<size_t>(blocks_[i].size, size - size_written);
1405
+ dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1406
+ &SnappySinkAllocator::Deleter, NULL);
1407
+ size_written += block_size;
1408
+ }
1409
+ blocks_.clear();
1410
+ }
1411
+
1412
+ private:
1413
+ struct Datablock {
1414
+ char* data;
1415
+ size_t size;
1416
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1417
+ };
1418
+
1419
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1420
+ delete[] bytes;
1421
+ }
1422
+
1423
+ Sink* dest_;
1424
+ std::vector<Datablock> blocks_;
1425
+
1426
+ // Note: copying this object is allowed
1427
+ };
1428
+
1429
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1430
+ SnappySinkAllocator allocator(uncompressed);
1431
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1432
+ InternalUncompress(compressed, &writer);
1433
+ return writer.Produced();
1434
+ }
1435
+
1436
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1437
+ // Read the uncompressed length from the front of the compressed input
1438
+ SnappyDecompressor decompressor(compressed);
1439
+ uint32 uncompressed_len = 0;
1440
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1441
+ return false;
1442
+ }
1443
+
1444
+ char c;
1445
+ size_t allocated_size;
1446
+ char* buf = uncompressed->GetAppendBufferVariable(
1447
+ 1, uncompressed_len, &c, 1, &allocated_size);
1448
+
1449
+ // If we can get a flat buffer, then use it, otherwise do block by block
1450
+ // uncompression
1451
+ if (allocated_size >= uncompressed_len) {
1452
+ SnappyArrayWriter writer(buf);
1453
+ bool result = InternalUncompressAllTags(
1454
+ &decompressor, &writer, uncompressed_len);
1455
+ uncompressed->Append(buf, writer.Produced());
1456
+ return result;
1457
+ } else {
1458
+ SnappySinkAllocator allocator(uncompressed);
1459
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1460
+ return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1461
+ }
1462
+ }
1463
+
1464
+ } // end namespace snappy