snappy 0.0.15 → 0.0.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +23 -1
  3. data/ext/extconf.rb +1 -9
  4. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/AUTHORS +0 -0
  5. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/COPYING +1 -1
  6. data/home/travis/build/miyucy/snappy/vendor/snappy/ChangeLog +2468 -0
  7. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/Makefile.am +3 -0
  8. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/NEWS +20 -0
  9. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/README +10 -6
  10. data/home/travis/build/miyucy/snappy/vendor/snappy/autogen.sh +12 -0
  11. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/configure.ac +4 -3
  12. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/format_description.txt +0 -0
  13. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/framing_format.txt +0 -0
  14. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/m4/gtest.m4 +0 -0
  15. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-c.cc +0 -0
  16. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-c.h +3 -3
  17. data/home/travis/build/miyucy/snappy/vendor/snappy/snappy-internal.h +227 -0
  18. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-sinksource.cc +33 -0
  19. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-sinksource.h +51 -6
  20. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-internal.cc +0 -0
  21. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-internal.h +44 -7
  22. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-public.h.in +5 -3
  23. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-test.cc +5 -2
  24. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-test.h +22 -5
  25. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy.cc +474 -316
  26. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy.h +23 -4
  27. data/home/travis/build/miyucy/snappy/vendor/snappy/snappy.pc.in +10 -0
  28. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy_unittest.cc +225 -49
  29. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/alice29.txt +0 -0
  30. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/asyoulik.txt +0 -0
  31. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata1.snappy +0 -0
  32. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata2.snappy +0 -0
  33. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata3.snappy +0 -0
  34. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/fireworks.jpeg +0 -0
  35. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/geo.protodata +0 -0
  36. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/html +0 -0
  37. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/html_x_4 +0 -0
  38. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/kppkn.gtb +0 -0
  39. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/lcet10.txt +0 -0
  40. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/paper-100k.pdf +0 -0
  41. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/plrabn12.txt +0 -0
  42. data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/urls.10K +0 -0
  43. data/lib/snappy.rb +2 -1
  44. data/lib/snappy/reader.rb +7 -3
  45. data/lib/snappy/shim.rb +30 -0
  46. data/lib/snappy/version.rb +3 -1
  47. data/lib/snappy/writer.rb +8 -9
  48. data/smoke.sh +8 -0
  49. metadata +44 -41
  50. data/vendor/snappy/ChangeLog +0 -1916
  51. data/vendor/snappy/autogen.sh +0 -7
  52. data/vendor/snappy/snappy-internal.h +0 -150
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Various stubs for the open-source version of Snappy.
30
30
 
31
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
33
 
34
34
  #ifdef HAVE_CONFIG_H
35
35
  #include "config.h"
@@ -116,6 +116,15 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
116
116
  // sub-architectures.
117
117
  //
118
118
  // This is a mess, but there's not much we can do about it.
119
+ //
120
+ // To further complicate matters, only LDR instructions (single reads) are
121
+ // allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
122
+ // explicitly tell the compiler that these accesses can be unaligned, it can and
123
+ // will combine accesses. On armcc, the way to signal this is done by accessing
124
+ // through the type (uint32 __packed *), but GCC has no such attribute
125
+ // (it ignores __attribute__((packed)) on individual variables). However,
126
+ // we can tell it that a _struct_ is unaligned, which has the same effect,
127
+ // so we do that.
119
128
 
120
129
  #elif defined(__arm__) && \
121
130
  !defined(__ARM_ARCH_4__) && \
@@ -131,11 +140,39 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
131
140
  !defined(__ARM_ARCH_6ZK__) && \
132
141
  !defined(__ARM_ARCH_6T2__)
133
142
 
134
- #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
135
- #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
143
+ #if __GNUC__
144
+ #define ATTRIBUTE_PACKED __attribute__((__packed__))
145
+ #else
146
+ #define ATTRIBUTE_PACKED
147
+ #endif
136
148
 
137
- #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
138
- #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
149
+ namespace base {
150
+ namespace internal {
151
+
152
+ struct Unaligned16Struct {
153
+ uint16 value;
154
+ uint8 dummy; // To make the size non-power-of-two.
155
+ } ATTRIBUTE_PACKED;
156
+
157
+ struct Unaligned32Struct {
158
+ uint32 value;
159
+ uint8 dummy; // To make the size non-power-of-two.
160
+ } ATTRIBUTE_PACKED;
161
+
162
+ } // namespace internal
163
+ } // namespace base
164
+
165
+ #define UNALIGNED_LOAD16(_p) \
166
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
167
+ #define UNALIGNED_LOAD32(_p) \
168
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
169
+
170
+ #define UNALIGNED_STORE16(_p, _val) \
171
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
172
+ (_val))
173
+ #define UNALIGNED_STORE32(_p, _val) \
174
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
175
+ (_val))
139
176
 
140
177
  // TODO(user): NEON supports unaligned 64-bit loads and stores.
141
178
  // See if that would be more efficient on platforms supporting it,
@@ -488,4 +525,4 @@ inline char* string_as_array(string* str) {
488
525
 
489
526
  } // namespace snappy
490
527
 
491
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
528
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
@@ -33,8 +33,8 @@
33
33
  // which is a public header. Instead, snappy-stubs-public.h is generated by
34
34
  // from snappy-stubs-public.h.in at configure time.
35
35
 
36
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
36
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
38
38
 
39
39
  #if @ac_cv_have_stdint_h@
40
40
  #include <stdint.h>
@@ -80,9 +80,11 @@ typedef unsigned long long uint64;
80
80
 
81
81
  typedef std::string string;
82
82
 
83
+ #ifndef DISALLOW_COPY_AND_ASSIGN
83
84
  #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
84
85
  TypeName(const TypeName&); \
85
86
  void operator=(const TypeName&)
87
+ #endif
86
88
 
87
89
  #if !@ac_cv_have_sys_uio_h@
88
90
  // Windows does not have an iovec type, yet the concept is universally useful.
@@ -95,4 +97,4 @@ struct iovec {
95
97
 
96
98
  } // namespace snappy
97
99
 
98
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
100
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
@@ -28,13 +28,16 @@
28
28
  //
29
29
  // Various stubs for the unit tests for the open-source version of Snappy.
30
30
 
31
- #include "snappy-test.h"
31
+ #ifdef HAVE_CONFIG_H
32
+ #include "config.h"
33
+ #endif
32
34
 
33
35
  #ifdef HAVE_WINDOWS_H
34
- #define WIN32_LEAN_AND_MEAN
35
36
  #include <windows.h>
36
37
  #endif
37
38
 
39
+ #include "snappy-test.h"
40
+
38
41
  #include <algorithm>
39
42
 
40
43
  DEFINE_bool(run_microbenchmarks, true,
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Various stubs for the unit tests for the open-source version of Snappy.
30
30
 
31
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
32
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
32
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
33
33
 
34
34
  #include <iostream>
35
35
  #include <string>
@@ -52,7 +52,6 @@
52
52
  #endif
53
53
 
54
54
  #ifdef HAVE_WINDOWS_H
55
- #define WIN32_LEAN_AND_MEAN
56
55
  #include <windows.h>
57
56
  #endif
58
57
 
@@ -132,7 +131,7 @@ namespace File {
132
131
  } // namespace File
133
132
 
134
133
  namespace file {
135
- int Defaults() { }
134
+ int Defaults() { return 0; }
136
135
 
137
136
  class DummyStatus {
138
137
  public:
@@ -158,6 +157,8 @@ namespace file {
158
157
  }
159
158
 
160
159
  fclose(fp);
160
+
161
+ return DummyStatus();
161
162
  }
162
163
 
163
164
  DummyStatus SetContents(const string& filename,
@@ -176,6 +177,8 @@ namespace file {
176
177
  }
177
178
 
178
179
  fclose(fp);
180
+
181
+ return DummyStatus();
179
182
  }
180
183
  } // namespace file
181
184
 
@@ -193,6 +196,7 @@ void Test_Snappy_RandomData();
193
196
  void Test_Snappy_FourByteOffset();
194
197
  void Test_SnappyCorruption_TruncatedVarint();
195
198
  void Test_SnappyCorruption_UnterminatedVarint();
199
+ void Test_SnappyCorruption_OverflowingVarint();
196
200
  void Test_Snappy_ReadPastEndOfBuffer();
197
201
  void Test_Snappy_FindMatchLength();
198
202
  void Test_Snappy_FindMatchLengthRandom();
@@ -497,6 +501,7 @@ static inline int RUN_ALL_TESTS() {
497
501
  snappy::Test_Snappy_FourByteOffset();
498
502
  snappy::Test_SnappyCorruption_TruncatedVarint();
499
503
  snappy::Test_SnappyCorruption_UnterminatedVarint();
504
+ snappy::Test_SnappyCorruption_OverflowingVarint();
500
505
  snappy::Test_Snappy_ReadPastEndOfBuffer();
501
506
  snappy::Test_Snappy_FindMatchLength();
502
507
  snappy::Test_Snappy_FindMatchLengthRandom();
@@ -544,6 +549,13 @@ class LogMessage {
544
549
  PREDICT_TRUE(condition) ? (void)0 : \
545
550
  snappy::LogMessageVoidify() & snappy::LogMessageCrash()
546
551
 
552
+ #ifdef _MSC_VER
553
+ // ~LogMessageCrash calls abort() and therefore never exits. This is by design
554
+ // so temporarily disable warning C4722.
555
+ #pragma warning(push)
556
+ #pragma warning(disable:4722)
557
+ #endif
558
+
547
559
  class LogMessageCrash : public LogMessage {
548
560
  public:
549
561
  LogMessageCrash() { }
@@ -553,6 +565,10 @@ class LogMessageCrash : public LogMessage {
553
565
  }
554
566
  };
555
567
 
568
+ #ifdef _MSC_VER
569
+ #pragma warning(pop)
570
+ #endif
571
+
556
572
  // This class is used to explicitly ignore values in the conditional
557
573
  // logging macros. This avoids compiler warnings like "value computed
558
574
  // is not used" and "statement has no effect".
@@ -572,6 +588,7 @@ class LogMessageVoidify {
572
588
  #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
573
589
  #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
574
590
  #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
591
+ #define CHECK_OK(cond) (cond).CheckSuccess()
575
592
 
576
593
  } // namespace
577
594
 
@@ -579,4 +596,4 @@ using snappy::CompressFile;
579
596
  using snappy::UncompressFile;
580
597
  using snappy::MeasureFile;
581
598
 
582
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
599
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
@@ -30,6 +30,9 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
+ #if defined(__x86_64__) || defined(_M_X64)
34
+ #include <emmintrin.h>
35
+ #endif
33
36
  #include <stdio.h>
34
37
 
35
38
  #include <algorithm>
@@ -39,6 +42,13 @@
39
42
 
40
43
  namespace snappy {
41
44
 
45
+ using internal::COPY_1_BYTE_OFFSET;
46
+ using internal::COPY_2_BYTE_OFFSET;
47
+ using internal::LITERAL;
48
+ using internal::char_table;
49
+ using internal::kMaximumTagLength;
50
+ using internal::wordmask;
51
+
42
52
  // Any hash function will produce a valid compressed bitstream, but a good
43
53
  // hash function reduces the number of collisions and thus yields better
44
54
  // compression for compressible input, and more speed for incompressible
@@ -76,79 +86,125 @@ size_t MaxCompressedLength(size_t source_len) {
76
86
  return 32 + source_len + source_len/6;
77
87
  }
78
88
 
79
- enum {
80
- LITERAL = 0,
81
- COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82
- COPY_2_BYTE_OFFSET = 2,
83
- COPY_4_BYTE_OFFSET = 3
84
- };
85
- static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
86
-
87
- // Copy "len" bytes from "src" to "op", one byte at a time. Used for
88
- // handling COPY operations where the input and output regions may
89
- // overlap. For example, suppose:
90
- // src == "ab"
91
- // op == src + 2
92
- // len == 20
93
- // After IncrementalCopy(src, op, len), the result will have
94
- // eleven copies of "ab"
95
- // ababababababababababab
96
- // Note that this does not match the semantics of either memcpy()
97
- // or memmove().
98
- static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99
- assert(len > 0);
100
- do {
101
- *op++ = *src++;
102
- } while (--len > 0);
89
+ namespace {
90
+
91
+ void UnalignedCopy64(const void* src, void* dst) {
92
+ memcpy(dst, src, 8);
103
93
  }
104
94
 
105
- // Equivalent to IncrementalCopy except that it can write up to ten extra
106
- // bytes after the end of the copy, and that it is faster.
107
- //
108
- // The main part of this loop is a simple copy of eight bytes at a time until
109
- // we've copied (at least) the requested amount of bytes. However, if op and
110
- // src are less than eight bytes apart (indicating a repeating pattern of
111
- // length < 8), we first need to expand the pattern in order to get the correct
112
- // results. For instance, if the buffer looks like this, with the eight-byte
113
- // <src> and <op> patterns marked as intervals:
114
- //
115
- // abxxxxxxxxxxxx
116
- // [------] src
117
- // [------] op
118
- //
119
- // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120
- // after which we can move <op> two bytes without moving <src>:
121
- //
122
- // ababxxxxxxxxxx
123
- // [------] src
124
- // [------] op
125
- //
126
- // and repeat the exercise until the two no longer overlap.
127
- //
128
- // This allows us to do very well in the special case of one single byte
129
- // repeated many times, without taking a big hit for more general cases.
130
- //
131
- // The worst case of extra writing past the end of the match occurs when
132
- // op - src == 1 and len == 1; the last copy will read from byte positions
133
- // [0..7] and write to [4..11], whereas it was only supposed to write to
134
- // position 1. Thus, ten excess bytes.
95
+ void UnalignedCopy128(const void* src, void* dst) {
96
+ // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
97
+ // SSE2 moves for memcpy(dst, src, 16).
98
+ #ifdef __SSE2__
99
+ __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
100
+ _mm_storeu_si128(static_cast<__m128i*>(dst), x);
101
+ #else
102
+ memcpy(dst, src, 16);
103
+ #endif
104
+ }
135
105
 
136
- namespace {
106
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
107
+ // for handling COPY operations where the input and output regions may overlap.
108
+ // For example, suppose:
109
+ // src == "ab"
110
+ // op == src + 2
111
+ // op_limit == op + 20
112
+ // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
113
+ // copies of "ab"
114
+ // ababababababababababab
115
+ // Note that this does not match the semantics of either memcpy() or memmove().
116
+ inline char* IncrementalCopySlow(const char* src, char* op,
117
+ char* const op_limit) {
118
+ while (op < op_limit) {
119
+ *op++ = *src++;
120
+ }
121
+ return op_limit;
122
+ }
137
123
 
138
- const int kMaxIncrementCopyOverflow = 10;
124
+ // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
125
+ // IncrementalCopySlow. buf_limit is the address past the end of the writable
126
+ // region of the buffer.
127
+ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
128
+ char* const buf_limit) {
129
+ // Terminology:
130
+ //
131
+ // slop = buf_limit - op
132
+ // pat = op - src
133
+ // len = limit - op
134
+ assert(src < op);
135
+ assert(op_limit <= buf_limit);
136
+ // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
137
+ // to optimize this function but we have to also handle these cases in case
138
+ // the input does not satisfy these conditions.
139
+
140
+ size_t pattern_size = op - src;
141
+ // The cases are split into different branches to allow the branch predictor,
142
+ // FDO, and static prediction hints to work better. For each input we list the
143
+ // ratio of invocations that match each condition.
144
+ //
145
+ // input slop < 16 pat < 8 len > 16
146
+ // ------------------------------------------
147
+ // html|html4|cp 0% 1.01% 27.73%
148
+ // urls 0% 0.88% 14.79%
149
+ // jpg 0% 64.29% 7.14%
150
+ // pdf 0% 2.56% 58.06%
151
+ // txt[1-4] 0% 0.23% 0.97%
152
+ // pb 0% 0.96% 13.88%
153
+ // bin 0.01% 22.27% 41.17%
154
+ //
155
+ // It is very rare that we don't have enough slop for doing block copies. It
156
+ // is also rare that we need to expand a pattern. Small patterns are common
157
+ // for incompressible formats and for those we are plenty fast already.
158
+ // Lengths are normally not greater than 16 but they vary depending on the
159
+ // input. In general if we always predict len <= 16 it would be an ok
160
+ // prediction.
161
+ //
162
+ // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
163
+ // copying 2x 8 bytes at a time.
164
+
165
+ // Handle the uncommon case where pattern is less than 8 bytes.
166
+ if (PREDICT_FALSE(pattern_size < 8)) {
167
+ // Expand pattern to at least 8 bytes. The worse case scenario in terms of
168
+ // buffer usage is when the pattern is size 3. ^ is the original position
169
+ // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
170
+ //
171
+ // abc
172
+ // abcabcxxxxx
173
+ // abcabcabcabcxxxxx
174
+ // ^
175
+ // The last x is 14 bytes after ^.
176
+ if (PREDICT_TRUE(op <= buf_limit - 14)) {
177
+ while (pattern_size < 8) {
178
+ UnalignedCopy64(src, op);
179
+ op += pattern_size;
180
+ pattern_size *= 2;
181
+ }
182
+ if (PREDICT_TRUE(op >= op_limit)) return op_limit;
183
+ } else {
184
+ return IncrementalCopySlow(src, op, op_limit);
185
+ }
186
+ }
187
+ assert(pattern_size >= 8);
139
188
 
140
- inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141
- while (op - src < 8) {
189
+ // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
190
+ // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
191
+ // because expanding the pattern to at least 8 bytes guarantees that
192
+ // op - src >= 8.
193
+ while (op <= buf_limit - 16) {
142
194
  UnalignedCopy64(src, op);
143
- len -= op - src;
144
- op += op - src;
195
+ UnalignedCopy64(src + 8, op + 8);
196
+ src += 16;
197
+ op += 16;
198
+ if (PREDICT_TRUE(op >= op_limit)) return op_limit;
145
199
  }
146
- while (len > 0) {
200
+ // We only take this branch if we didn't have enough slop and we can do a
201
+ // single 8 byte copy.
202
+ if (PREDICT_FALSE(op <= buf_limit - 8)) {
147
203
  UnalignedCopy64(src, op);
148
204
  src += 8;
149
205
  op += 8;
150
- len -= 8;
151
206
  }
207
+ return IncrementalCopySlow(src, op, op_limit);
152
208
  }
153
209
 
154
210
  } // namespace
@@ -157,26 +213,29 @@ static inline char* EmitLiteral(char* op,
157
213
  const char* literal,
158
214
  int len,
159
215
  bool allow_fast_path) {
160
- int n = len - 1; // Zero-length literals are disallowed
161
- if (n < 60) {
216
+ // The vast majority of copies are below 16 bytes, for which a
217
+ // call to memcpy is overkill. This fast path can sometimes
218
+ // copy up to 15 bytes too much, but that is okay in the
219
+ // main loop, since we have a bit to go on for both sides:
220
+ //
221
+ // - The input will always have kInputMarginBytes = 15 extra
222
+ // available bytes, as long as we're in the main loop, and
223
+ // if not, allow_fast_path = false.
224
+ // - The output will always have 32 spare bytes (see
225
+ // MaxCompressedLength).
226
+ assert(len > 0); // Zero-length literals are disallowed
227
+ int n = len - 1;
228
+ if (allow_fast_path && len <= 16) {
162
229
  // Fits in tag byte
163
230
  *op++ = LITERAL | (n << 2);
164
231
 
165
- // The vast majority of copies are below 16 bytes, for which a
166
- // call to memcpy is overkill. This fast path can sometimes
167
- // copy up to 15 bytes too much, but that is okay in the
168
- // main loop, since we have a bit to go on for both sides:
169
- //
170
- // - The input will always have kInputMarginBytes = 15 extra
171
- // available bytes, as long as we're in the main loop, and
172
- // if not, allow_fast_path = false.
173
- // - The output will always have 32 spare bytes (see
174
- // MaxCompressedLength).
175
- if (allow_fast_path && len <= 16) {
176
- UnalignedCopy64(literal, op);
177
- UnalignedCopy64(literal + 8, op + 8);
178
- return op + len;
179
- }
232
+ UnalignedCopy128(literal, op);
233
+ return op + len;
234
+ }
235
+
236
+ if (n < 60) {
237
+ // Fits in tag byte
238
+ *op++ = LITERAL | (n << 2);
180
239
  } else {
181
240
  // Encode in upcoming bytes
182
241
  char* base = op;
@@ -195,42 +254,54 @@ static inline char* EmitLiteral(char* op,
195
254
  return op + len;
196
255
  }
197
256
 
198
- static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
257
+ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
258
+ bool len_less_than_12) {
199
259
  assert(len <= 64);
200
260
  assert(len >= 4);
201
261
  assert(offset < 65536);
262
+ assert(len_less_than_12 == (len < 12));
202
263
 
203
- if ((len < 12) && (offset < 2048)) {
204
- size_t len_minus_4 = len - 4;
205
- assert(len_minus_4 < 8); // Must fit in 3 bits
206
- *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
264
+ if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
265
+ // offset fits in 11 bits. The 3 highest go in the top of the first byte,
266
+ // and the rest go in the second byte.
267
+ *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
207
268
  *op++ = offset & 0xff;
208
269
  } else {
209
- *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210
- LittleEndian::Store16(op, offset);
211
- op += 2;
270
+ // Write 4 bytes, though we only care about 3 of them. The output buffer
271
+ // is required to have some slack, so the extra byte won't overrun it.
272
+ uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
273
+ LittleEndian::Store32(op, u);
274
+ op += 3;
212
275
  }
213
276
  return op;
214
277
  }
215
278
 
216
- static inline char* EmitCopy(char* op, size_t offset, int len) {
217
- // Emit 64 byte copies but make sure to keep at least four bytes reserved
218
- while (len >= 68) {
219
- op = EmitCopyLessThan64(op, offset, 64);
220
- len -= 64;
221
- }
279
+ static inline char* EmitCopy(char* op, size_t offset, size_t len,
280
+ bool len_less_than_12) {
281
+ assert(len_less_than_12 == (len < 12));
282
+ if (len_less_than_12) {
283
+ return EmitCopyAtMost64(op, offset, len, true);
284
+ } else {
285
+ // A special case for len <= 64 might help, but so far measurements suggest
286
+ // it's in the noise.
222
287
 
223
- // Emit an extra 60 byte copy if have too much data to fit in one copy
224
- if (len > 64) {
225
- op = EmitCopyLessThan64(op, offset, 60);
226
- len -= 60;
227
- }
288
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved.
289
+ while (PREDICT_FALSE(len >= 68)) {
290
+ op = EmitCopyAtMost64(op, offset, 64, false);
291
+ len -= 64;
292
+ }
228
293
 
229
- // Emit remainder
230
- op = EmitCopyLessThan64(op, offset, len);
231
- return op;
232
- }
294
+ // One or two copies will now finish the job.
295
+ if (len > 64) {
296
+ op = EmitCopyAtMost64(op, offset, 60, false);
297
+ len -= 60;
298
+ }
233
299
 
300
+ // Emit remainder.
301
+ op = EmitCopyAtMost64(op, offset, len, len < 12);
302
+ return op;
303
+ }
304
+ }
234
305
 
235
306
  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236
307
  uint32 v = 0;
@@ -364,9 +435,9 @@ char* CompressFragment(const char* input,
364
435
  //
365
436
  // Heuristic match skipping: If 32 bytes are scanned with no matches
366
437
  // found, start looking only at every other byte. If 32 more bytes are
367
- // scanned, look at every third byte, etc.. When a match is found,
368
- // immediately go back to looking at every byte. This is a small loss
369
- // (~5% performance, ~0.1% density) for compressible data due to more
438
+ // scanned (or skipped), look at every third byte, etc.. When a match is
439
+ // found, immediately go back to looking at every byte. This is a small
440
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
370
441
  // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371
442
  // win since the compressor quickly "realizes" the data is incompressible
372
443
  // and doesn't bother looking for matches everywhere.
@@ -382,7 +453,8 @@ char* CompressFragment(const char* input,
382
453
  ip = next_ip;
383
454
  uint32 hash = next_hash;
384
455
  assert(hash == Hash(ip, shift));
385
- uint32 bytes_between_hash_lookups = skip++ >> 5;
456
+ uint32 bytes_between_hash_lookups = skip >> 5;
457
+ skip += bytes_between_hash_lookups;
386
458
  next_ip = ip + bytes_between_hash_lookups;
387
459
  if (PREDICT_FALSE(next_ip > ip_limit)) {
388
460
  goto emit_remainder;
@@ -417,19 +489,21 @@ char* CompressFragment(const char* input,
417
489
  // We have a 4-byte match at ip, and no need to emit any
418
490
  // "literal bytes" prior to ip.
419
491
  const char* base = ip;
420
- int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
492
+ std::pair<size_t, bool> p =
493
+ FindMatchLength(candidate + 4, ip + 4, ip_end);
494
+ size_t matched = 4 + p.first;
421
495
  ip += matched;
422
496
  size_t offset = base - candidate;
423
497
  assert(0 == memcmp(base, candidate, matched));
424
- op = EmitCopy(op, offset, matched);
425
- // We could immediately start working at ip now, but to improve
426
- // compression we first update table[Hash(ip - 1, ...)].
427
- const char* insert_tail = ip - 1;
498
+ op = EmitCopy(op, offset, matched, p.second);
428
499
  next_emit = ip;
429
500
  if (PREDICT_FALSE(ip >= ip_limit)) {
430
501
  goto emit_remainder;
431
502
  }
432
- input_bytes = GetEightBytesAt(insert_tail);
503
+ // We are now looking for a 4-byte match again. We read
504
+ // table[Hash(ip, shift)] for that. To improve compression,
505
+ // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
506
+ input_bytes = GetEightBytesAt(ip - 1);
433
507
  uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434
508
  table[prev_hash] = ip - base_ip - 1;
435
509
  uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -493,162 +567,6 @@ char* CompressFragment(const char* input,
493
567
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
494
568
  // };
495
569
 
496
- // -----------------------------------------------------------------------
497
- // Lookup table for decompression code. Generated by ComputeTable() below.
498
- // -----------------------------------------------------------------------
499
-
500
- // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501
- static const uint32 wordmask[] = {
502
- 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503
- };
504
-
505
- // Data stored per entry in lookup table:
506
- // Range Bits-used Description
507
- // ------------------------------------
508
- // 1..64 0..7 Literal/copy length encoded in opcode byte
509
- // 0..7 8..10 Copy offset encoded in opcode byte / 256
510
- // 0..4 11..13 Extra bytes after opcode
511
- //
512
- // We use eight bits for the length even though 7 would have sufficed
513
- // because of efficiency reasons:
514
- // (1) Extracting a byte is faster than a bit-field
515
- // (2) It properly aligns copy offset so we do not need a <<8
516
- static const uint16 char_table[256] = {
517
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549
- };
550
-
551
- // In debug mode, allow optional computation of the table at startup.
552
- // Also, check that the decompression table is correct.
553
- #ifndef NDEBUG
554
- DEFINE_bool(snappy_dump_decompression_table, false,
555
- "If true, we print the decompression table at startup.");
556
-
557
- static uint16 MakeEntry(unsigned int extra,
558
- unsigned int len,
559
- unsigned int copy_offset) {
560
- // Check that all of the fields fit within the allocated space
561
- assert(extra == (extra & 0x7)); // At most 3 bits
562
- assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
563
- assert(len == (len & 0x7f)); // At most 7 bits
564
- return len | (copy_offset << 8) | (extra << 11);
565
- }
566
-
567
- static void ComputeTable() {
568
- uint16 dst[256];
569
-
570
- // Place invalid entries in all places to detect missing initialization
571
- int assigned = 0;
572
- for (int i = 0; i < 256; i++) {
573
- dst[i] = 0xffff;
574
- }
575
-
576
- // Small LITERAL entries. We store (len-1) in the top 6 bits.
577
- for (unsigned int len = 1; len <= 60; len++) {
578
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579
- assigned++;
580
- }
581
-
582
- // Large LITERAL entries. We use 60..63 in the high 6 bits to
583
- // encode the number of bytes of length info that follow the opcode.
584
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585
- // We set the length field in the lookup table to 1 because extra
586
- // bytes encode len-1.
587
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588
- assigned++;
589
- }
590
-
591
- // COPY_1_BYTE_OFFSET.
592
- //
593
- // The tag byte in the compressed data stores len-4 in 3 bits, and
594
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
595
- //
596
- // This format is used for length in range [4..11] and offset in
597
- // range [0..2047]
598
- for (unsigned int len = 4; len < 12; len++) {
599
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
600
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601
- MakeEntry(1, len, offset>>8);
602
- assigned++;
603
- }
604
- }
605
-
606
- // COPY_2_BYTE_OFFSET.
607
- // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608
- for (unsigned int len = 1; len <= 64; len++) {
609
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610
- assigned++;
611
- }
612
-
613
- // COPY_4_BYTE_OFFSET.
614
- // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615
- for (unsigned int len = 1; len <= 64; len++) {
616
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617
- assigned++;
618
- }
619
-
620
- // Check that each entry was initialized exactly once.
621
- if (assigned != 256) {
622
- fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623
- abort();
624
- }
625
- for (int i = 0; i < 256; i++) {
626
- if (dst[i] == 0xffff) {
627
- fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628
- abort();
629
- }
630
- }
631
-
632
- if (FLAGS_snappy_dump_decompression_table) {
633
- printf("static const uint16 char_table[256] = {\n ");
634
- for (int i = 0; i < 256; i++) {
635
- printf("0x%04x%s",
636
- dst[i],
637
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
638
- }
639
- printf("};\n");
640
- }
641
-
642
- // Check that computed table matched recorded table
643
- for (int i = 0; i < 256; i++) {
644
- if (dst[i] != char_table[i]) {
645
- fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646
- i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647
- abort();
648
- }
649
- }
650
- }
651
- #endif /* !NDEBUG */
652
570
 
653
571
  // Helper class for decompression
654
572
  class SnappyDecompressor {
@@ -701,7 +619,9 @@ class SnappyDecompressor {
701
619
  if (n == 0) return false;
702
620
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703
621
  reader_->Skip(1);
704
- *result |= static_cast<uint32>(c & 0x7f) << shift;
622
+ uint32 val = c & 0x7f;
623
+ if (((val << shift) >> shift) != val) return false;
624
+ *result |= val << shift;
705
625
  if (c < 128) {
706
626
  break;
707
627
  }
@@ -715,6 +635,10 @@ class SnappyDecompressor {
715
635
  template <class Writer>
716
636
  void DecompressAllTags(Writer* writer) {
717
637
  const char* ip = ip_;
638
+ // For position-independent executables, accessing global arrays can be
639
+ // slow. Move wordmask array onto the stack to mitigate this.
640
+ uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
641
+ memcpy(wordmask, internal::wordmask, sizeof(wordmask));
718
642
 
719
643
  // We could have put this refill fragment only at the beginning of the loop.
720
644
  // However, duplicating it at the end of each branch gives the compiler more
@@ -731,7 +655,19 @@ class SnappyDecompressor {
731
655
  for ( ;; ) {
732
656
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733
657
 
734
- if ((c & 0x3) == LITERAL) {
658
+ // Ratio of iterations that have LITERAL vs non-LITERAL for different
659
+ // inputs.
660
+ //
661
+ // input LITERAL NON_LITERAL
662
+ // -----------------------------------
663
+ // html|html4|cp 23% 77%
664
+ // urls 36% 64%
665
+ // jpg 47% 53%
666
+ // pdf 19% 81%
667
+ // txt[1-4] 25% 75%
668
+ // pb 24% 76%
669
+ // bin 24% 76%
670
+ if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
735
671
  size_t literal_length = (c >> 2) + 1u;
736
672
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737
673
  assert(literal_length < 61);
@@ -767,15 +703,15 @@ class SnappyDecompressor {
767
703
  ip += literal_length;
768
704
  MAYBE_REFILL();
769
705
  } else {
770
- const uint32 entry = char_table[c];
771
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772
- const uint32 length = entry & 0xff;
706
+ const size_t entry = char_table[c];
707
+ const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
708
+ const size_t length = entry & 0xff;
773
709
  ip += entry >> 11;
774
710
 
775
711
  // copy_offset/256 is encoded in bits 8..10. By just fetching
776
712
  // those bits, we get copy_offset (since the bit-field starts at
777
713
  // bit 8).
778
- const uint32 copy_offset = entry & 0x700;
714
+ const size_t copy_offset = entry & 0x700;
779
715
  if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780
716
  return;
781
717
  }
@@ -795,10 +731,8 @@ bool SnappyDecompressor::RefillTag() {
795
731
  size_t n;
796
732
  ip = reader_->Peek(&n);
797
733
  peeked_ = n;
798
- if (n == 0) {
799
- eof_ = true;
800
- return false;
801
- }
734
+ eof_ = (n == 0);
735
+ if (eof_) return false;
802
736
  ip_limit_ = ip + n;
803
737
  }
804
738
 
@@ -863,6 +797,7 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
863
797
 
864
798
  // Process the entire input
865
799
  decompressor->DecompressAllTags(writer);
800
+ writer->Flush();
866
801
  return (decompressor->eof() && writer->CheckLength());
867
802
  }
868
803
 
@@ -965,7 +900,7 @@ class SnappyIOVecWriter {
965
900
  const size_t output_iov_count_;
966
901
 
967
902
  // We are currently writing into output_iov_[curr_iov_index_].
968
- int curr_iov_index_;
903
+ size_t curr_iov_index_;
969
904
 
970
905
  // Bytes written to output_iov_[curr_iov_index_] so far.
971
906
  size_t curr_iov_written_;
@@ -976,7 +911,7 @@ class SnappyIOVecWriter {
976
911
  // Maximum number of bytes that will be decompressed into output_iov_.
977
912
  size_t output_limit_;
978
913
 
979
- inline char* GetIOVecPointer(int index, size_t offset) {
914
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
980
915
  return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981
916
  offset;
982
917
  }
@@ -1037,8 +972,7 @@ class SnappyIOVecWriter {
1037
972
  output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038
973
  // Fast path, used for the majority (about 95%) of invocations.
1039
974
  char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040
- UnalignedCopy64(ip, ptr);
1041
- UnalignedCopy64(ip + 8, ptr + 8);
975
+ UnalignedCopy128(ip, ptr);
1042
976
  curr_iov_written_ += len;
1043
977
  total_written_ += len;
1044
978
  return true;
@@ -1057,7 +991,7 @@ class SnappyIOVecWriter {
1057
991
  }
1058
992
 
1059
993
  // Locate the iovec from which we need to start the copy.
1060
- int from_iov_index = curr_iov_index_;
994
+ size_t from_iov_index = curr_iov_index_;
1061
995
  size_t from_iov_offset = curr_iov_written_;
1062
996
  while (offset > 0) {
1063
997
  if (from_iov_offset >= offset) {
@@ -1066,8 +1000,8 @@ class SnappyIOVecWriter {
1066
1000
  }
1067
1001
 
1068
1002
  offset -= from_iov_offset;
1003
+ assert(from_iov_index > 0);
1069
1004
  --from_iov_index;
1070
- assert(from_iov_index >= 0);
1071
1005
  from_iov_offset = output_iov_[from_iov_index].iov_len;
1072
1006
  }
1073
1007
 
@@ -1102,9 +1036,10 @@ class SnappyIOVecWriter {
1102
1036
  if (to_copy > len) {
1103
1037
  to_copy = len;
1104
1038
  }
1105
- IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106
- GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107
- to_copy);
1039
+ IncrementalCopySlow(
1040
+ GetIOVecPointer(from_iov_index, from_iov_offset),
1041
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1042
+ GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
1108
1043
  curr_iov_written_ += to_copy;
1109
1044
  from_iov_offset += to_copy;
1110
1045
  total_written_ += to_copy;
@@ -1115,6 +1050,7 @@ class SnappyIOVecWriter {
1115
1050
  return true;
1116
1051
  }
1117
1052
 
1053
+ inline void Flush() {}
1118
1054
  };
1119
1055
 
1120
1056
  bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
@@ -1145,7 +1081,8 @@ class SnappyArrayWriter {
1145
1081
  public:
1146
1082
  inline explicit SnappyArrayWriter(char* dst)
1147
1083
  : base_(dst),
1148
- op_(dst) {
1084
+ op_(dst),
1085
+ op_limit_(dst) {
1149
1086
  }
1150
1087
 
1151
1088
  inline void SetExpectedLength(size_t len) {
@@ -1172,8 +1109,7 @@ class SnappyArrayWriter {
1172
1109
  const size_t space_left = op_limit_ - op;
1173
1110
  if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174
1111
  // Fast path, used for the majority (about 95%) of invocations.
1175
- UnalignedCopy64(ip, op);
1176
- UnalignedCopy64(ip + 8, op + 8);
1112
+ UnalignedCopy128(ip, op);
1177
1113
  op_ = op + len;
1178
1114
  return true;
1179
1115
  } else {
@@ -1182,8 +1118,7 @@ class SnappyArrayWriter {
1182
1118
  }
1183
1119
 
1184
1120
  inline bool AppendFromSelf(size_t offset, size_t len) {
1185
- char* op = op_;
1186
- const size_t space_left = op_limit_ - op;
1121
+ char* const op_end = op_ + len;
1187
1122
 
1188
1123
  // Check if we try to append from before the start of the buffer.
1189
1124
  // Normally this would just be a check for "produced < offset",
@@ -1192,29 +1127,16 @@ class SnappyArrayWriter {
1192
1127
  // to a very big number. This is convenient, as offset==0 is another
1193
1128
  // invalid case that we also want to catch, so that we do not go
1194
1129
  // into an infinite loop.
1195
- assert(op >= base_);
1196
- size_t produced = op - base_;
1197
- if (produced <= offset - 1u) {
1198
- return false;
1199
- }
1200
- if (len <= 16 && offset >= 8 && space_left >= 16) {
1201
- // Fast path, used for the majority (70-80%) of dynamic invocations.
1202
- UnalignedCopy64(op - offset, op);
1203
- UnalignedCopy64(op - offset + 8, op + 8);
1204
- } else {
1205
- if (space_left >= len + kMaxIncrementCopyOverflow) {
1206
- IncrementalCopyFastPath(op - offset, op, len);
1207
- } else {
1208
- if (space_left < len) {
1209
- return false;
1210
- }
1211
- IncrementalCopy(op - offset, op, len);
1212
- }
1213
- }
1130
+ if (Produced() <= offset - 1u || op_end > op_limit_) return false;
1131
+ op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
1214
1132
 
1215
- op_ = op + len;
1216
1133
  return true;
1217
1134
  }
1135
+ inline size_t Produced() const {
1136
+ assert(op_ >= base_);
1137
+ return op_ - base_;
1138
+ }
1139
+ inline void Flush() {}
1218
1140
  };
1219
1141
 
1220
1142
  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
@@ -1241,7 +1163,6 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1241
1163
  return RawUncompress(compressed, n, string_as_array(uncompressed));
1242
1164
  }
1243
1165
 
1244
-
1245
1166
  // A Writer that drops everything on the floor and just does validation
1246
1167
  class SnappyDecompressionValidator {
1247
1168
  private:
@@ -1249,7 +1170,7 @@ class SnappyDecompressionValidator {
1249
1170
  size_t produced_;
1250
1171
 
1251
1172
  public:
1252
- inline SnappyDecompressionValidator() : produced_(0) { }
1173
+ inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1253
1174
  inline void SetExpectedLength(size_t len) {
1254
1175
  expected_ = len;
1255
1176
  }
@@ -1270,6 +1191,7 @@ class SnappyDecompressionValidator {
1270
1191
  produced_ += len;
1271
1192
  return produced_ <= expected_;
1272
1193
  }
1194
+ inline void Flush() {}
1273
1195
  };
1274
1196
 
1275
1197
  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
@@ -1278,6 +1200,11 @@ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1278
1200
  return InternalUncompress(&reader, &writer);
1279
1201
  }
1280
1202
 
1203
+ bool IsValidCompressed(Source* compressed) {
1204
+ SnappyDecompressionValidator writer;
1205
+ return InternalUncompress(compressed, &writer);
1206
+ }
1207
+
1281
1208
  void RawCompress(const char* input,
1282
1209
  size_t input_length,
1283
1210
  char* compressed,
@@ -1292,7 +1219,7 @@ void RawCompress(const char* input,
1292
1219
 
1293
1220
  size_t Compress(const char* input, size_t input_length, string* compressed) {
1294
1221
  // Pre-grow the buffer to the max length of the compressed output
1295
- compressed->resize(MaxCompressedLength(input_length));
1222
+ STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
1296
1223
 
1297
1224
  size_t compressed_length;
1298
1225
  RawCompress(input, input_length, string_as_array(compressed),
@@ -1301,6 +1228,237 @@ size_t Compress(const char* input, size_t input_length, string* compressed) {
1301
1228
  return compressed_length;
1302
1229
  }
1303
1230
 
1231
+ // -----------------------------------------------------------------------
1232
+ // Sink interface
1233
+ // -----------------------------------------------------------------------
1304
1234
 
1305
- } // end namespace snappy
1235
+ // A type that decompresses into a Sink. The template parameter
1236
+ // Allocator must export one method "char* Allocate(int size);", which
1237
+ // allocates a buffer of "size" and appends that to the destination.
1238
+ template <typename Allocator>
1239
+ class SnappyScatteredWriter {
1240
+ Allocator allocator_;
1241
+
1242
+ // We need random access into the data generated so far. Therefore
1243
+ // we keep track of all of the generated data as an array of blocks.
1244
+ // All of the blocks except the last have length kBlockSize.
1245
+ std::vector<char*> blocks_;
1246
+ size_t expected_;
1247
+
1248
+ // Total size of all fully generated blocks so far
1249
+ size_t full_size_;
1250
+
1251
+ // Pointer into current output block
1252
+ char* op_base_; // Base of output block
1253
+ char* op_ptr_; // Pointer to next unfilled byte in block
1254
+ char* op_limit_; // Pointer just past block
1255
+
1256
+ inline size_t Size() const {
1257
+ return full_size_ + (op_ptr_ - op_base_);
1258
+ }
1259
+
1260
+ bool SlowAppend(const char* ip, size_t len);
1261
+ bool SlowAppendFromSelf(size_t offset, size_t len);
1262
+
1263
+ public:
1264
+ inline explicit SnappyScatteredWriter(const Allocator& allocator)
1265
+ : allocator_(allocator),
1266
+ full_size_(0),
1267
+ op_base_(NULL),
1268
+ op_ptr_(NULL),
1269
+ op_limit_(NULL) {
1270
+ }
1271
+
1272
+ inline void SetExpectedLength(size_t len) {
1273
+ assert(blocks_.empty());
1274
+ expected_ = len;
1275
+ }
1276
+
1277
+ inline bool CheckLength() const {
1278
+ return Size() == expected_;
1279
+ }
1280
+
1281
+ // Return the number of bytes actually uncompressed so far
1282
+ inline size_t Produced() const {
1283
+ return Size();
1284
+ }
1285
+
1286
+ inline bool Append(const char* ip, size_t len) {
1287
+ size_t avail = op_limit_ - op_ptr_;
1288
+ if (len <= avail) {
1289
+ // Fast path
1290
+ memcpy(op_ptr_, ip, len);
1291
+ op_ptr_ += len;
1292
+ return true;
1293
+ } else {
1294
+ return SlowAppend(ip, len);
1295
+ }
1296
+ }
1297
+
1298
+ inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1299
+ char* op = op_ptr_;
1300
+ const int space_left = op_limit_ - op;
1301
+ if (length <= 16 && available >= 16 + kMaximumTagLength &&
1302
+ space_left >= 16) {
1303
+ // Fast path, used for the majority (about 95%) of invocations.
1304
+ UnalignedCopy128(ip, op);
1305
+ op_ptr_ = op + length;
1306
+ return true;
1307
+ } else {
1308
+ return false;
1309
+ }
1310
+ }
1306
1311
 
1312
+ inline bool AppendFromSelf(size_t offset, size_t len) {
1313
+ char* const op_end = op_ptr_ + len;
1314
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1315
+ // the "offset - 1u" trick.
1316
+ if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
1317
+ // Fast path: src and dst in current block.
1318
+ op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1319
+ return true;
1320
+ }
1321
+ return SlowAppendFromSelf(offset, len);
1322
+ }
1323
+
1324
+ // Called at the end of the decompress. We ask the allocator
1325
+ // write all blocks to the sink.
1326
+ inline void Flush() { allocator_.Flush(Produced()); }
1327
+ };
1328
+
1329
+ template<typename Allocator>
1330
+ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1331
+ size_t avail = op_limit_ - op_ptr_;
1332
+ while (len > avail) {
1333
+ // Completely fill this block
1334
+ memcpy(op_ptr_, ip, avail);
1335
+ op_ptr_ += avail;
1336
+ assert(op_limit_ - op_ptr_ == 0);
1337
+ full_size_ += (op_ptr_ - op_base_);
1338
+ len -= avail;
1339
+ ip += avail;
1340
+
1341
+ // Bounds check
1342
+ if (full_size_ + len > expected_) {
1343
+ return false;
1344
+ }
1345
+
1346
+ // Make new block
1347
+ size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1348
+ op_base_ = allocator_.Allocate(bsize);
1349
+ op_ptr_ = op_base_;
1350
+ op_limit_ = op_base_ + bsize;
1351
+ blocks_.push_back(op_base_);
1352
+ avail = bsize;
1353
+ }
1354
+
1355
+ memcpy(op_ptr_, ip, len);
1356
+ op_ptr_ += len;
1357
+ return true;
1358
+ }
1359
+
1360
+ template<typename Allocator>
1361
+ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1362
+ size_t len) {
1363
+ // Overflow check
1364
+ // See SnappyArrayWriter::AppendFromSelf for an explanation of
1365
+ // the "offset - 1u" trick.
1366
+ const size_t cur = Size();
1367
+ if (offset - 1u >= cur) return false;
1368
+ if (expected_ - cur < len) return false;
1369
+
1370
+ // Currently we shouldn't ever hit this path because Compress() chops the
1371
+ // input into blocks and does not create cross-block copies. However, it is
1372
+ // nice if we do not rely on that, since we can get better compression if we
1373
+ // allow cross-block copies and thus might want to change the compressor in
1374
+ // the future.
1375
+ size_t src = cur - offset;
1376
+ while (len-- > 0) {
1377
+ char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1378
+ Append(&c, 1);
1379
+ src++;
1380
+ }
1381
+ return true;
1382
+ }
1383
+
1384
+ class SnappySinkAllocator {
1385
+ public:
1386
+ explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1387
+ ~SnappySinkAllocator() {}
1388
+
1389
+ char* Allocate(int size) {
1390
+ Datablock block(new char[size], size);
1391
+ blocks_.push_back(block);
1392
+ return block.data;
1393
+ }
1394
+
1395
+ // We flush only at the end, because the writer wants
1396
+ // random access to the blocks and once we hand the
1397
+ // block over to the sink, we can't access it anymore.
1398
+ // Also we don't write more than has been actually written
1399
+ // to the blocks.
1400
+ void Flush(size_t size) {
1401
+ size_t size_written = 0;
1402
+ size_t block_size;
1403
+ for (int i = 0; i < blocks_.size(); ++i) {
1404
+ block_size = min<size_t>(blocks_[i].size, size - size_written);
1405
+ dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1406
+ &SnappySinkAllocator::Deleter, NULL);
1407
+ size_written += block_size;
1408
+ }
1409
+ blocks_.clear();
1410
+ }
1411
+
1412
+ private:
1413
+ struct Datablock {
1414
+ char* data;
1415
+ size_t size;
1416
+ Datablock(char* p, size_t s) : data(p), size(s) {}
1417
+ };
1418
+
1419
+ static void Deleter(void* arg, const char* bytes, size_t size) {
1420
+ delete[] bytes;
1421
+ }
1422
+
1423
+ Sink* dest_;
1424
+ std::vector<Datablock> blocks_;
1425
+
1426
+ // Note: copying this object is allowed
1427
+ };
1428
+
1429
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1430
+ SnappySinkAllocator allocator(uncompressed);
1431
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1432
+ InternalUncompress(compressed, &writer);
1433
+ return writer.Produced();
1434
+ }
1435
+
1436
+ bool Uncompress(Source* compressed, Sink* uncompressed) {
1437
+ // Read the uncompressed length from the front of the compressed input
1438
+ SnappyDecompressor decompressor(compressed);
1439
+ uint32 uncompressed_len = 0;
1440
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1441
+ return false;
1442
+ }
1443
+
1444
+ char c;
1445
+ size_t allocated_size;
1446
+ char* buf = uncompressed->GetAppendBufferVariable(
1447
+ 1, uncompressed_len, &c, 1, &allocated_size);
1448
+
1449
+ // If we can get a flat buffer, then use it, otherwise do block by block
1450
+ // uncompression
1451
+ if (allocated_size >= uncompressed_len) {
1452
+ SnappyArrayWriter writer(buf);
1453
+ bool result = InternalUncompressAllTags(
1454
+ &decompressor, &writer, uncompressed_len);
1455
+ uncompressed->Append(buf, writer.Produced());
1456
+ return result;
1457
+ } else {
1458
+ SnappySinkAllocator allocator(uncompressed);
1459
+ SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1460
+ return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1461
+ }
1462
+ }
1463
+
1464
+ } // end namespace snappy