snappy 0.0.10-java → 0.0.11-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitmodules +3 -0
  3. data/Rakefile +12 -13
  4. data/ext/extconf.rb +22 -31
  5. data/lib/snappy/reader.rb +10 -7
  6. data/lib/snappy/version.rb +1 -1
  7. data/snappy.gemspec +24 -0
  8. data/test/test-snappy-reader.rb +16 -0
  9. data/vendor/snappy/AUTHORS +1 -0
  10. data/vendor/snappy/COPYING +54 -0
  11. data/vendor/snappy/ChangeLog +1916 -0
  12. data/vendor/snappy/Makefile.am +23 -0
  13. data/vendor/snappy/NEWS +128 -0
  14. data/vendor/snappy/README +135 -0
  15. data/vendor/snappy/autogen.sh +7 -0
  16. data/vendor/snappy/configure.ac +133 -0
  17. data/vendor/snappy/format_description.txt +110 -0
  18. data/vendor/snappy/framing_format.txt +135 -0
  19. data/vendor/snappy/m4/gtest.m4 +74 -0
  20. data/vendor/snappy/snappy-c.cc +90 -0
  21. data/vendor/snappy/snappy-c.h +138 -0
  22. data/vendor/snappy/snappy-internal.h +150 -0
  23. data/vendor/snappy/snappy-sinksource.cc +71 -0
  24. data/vendor/snappy/snappy-sinksource.h +137 -0
  25. data/vendor/snappy/snappy-stubs-internal.cc +42 -0
  26. data/vendor/snappy/snappy-stubs-internal.h +491 -0
  27. data/vendor/snappy/snappy-stubs-public.h.in +98 -0
  28. data/vendor/snappy/snappy-test.cc +606 -0
  29. data/vendor/snappy/snappy-test.h +582 -0
  30. data/vendor/snappy/snappy.cc +1306 -0
  31. data/vendor/snappy/snappy.h +184 -0
  32. data/vendor/snappy/snappy_unittest.cc +1355 -0
  33. data/vendor/snappy/testdata/alice29.txt +3609 -0
  34. data/vendor/snappy/testdata/asyoulik.txt +4122 -0
  35. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  36. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  37. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  38. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  39. data/vendor/snappy/testdata/geo.protodata +0 -0
  40. data/vendor/snappy/testdata/html +1 -0
  41. data/vendor/snappy/testdata/html_x_4 +1 -0
  42. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  43. data/vendor/snappy/testdata/lcet10.txt +7519 -0
  44. data/vendor/snappy/testdata/paper-100k.pdf +600 -2
  45. data/vendor/snappy/testdata/plrabn12.txt +10699 -0
  46. data/vendor/snappy/testdata/urls.10K +10000 -0
  47. metadata +57 -18
@@ -0,0 +1,184 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // A light-weight compression algorithm. It is designed for speed of
30
+ // compression and decompression, rather than for the utmost in space
31
+ // savings.
32
+ //
33
+ // For getting better compression ratios when you are compressing data
34
+ // with long repeated sequences or compressing data that is similar to
35
+ // other data, while still compressing fast, you might look at first
36
+ // using BMDiff and then compressing the output of BMDiff with
37
+ // Snappy.
38
+
39
+ #ifndef UTIL_SNAPPY_SNAPPY_H__
40
+ #define UTIL_SNAPPY_SNAPPY_H__
41
+
42
+ #include <stddef.h>
43
+ #include <string>
44
+
45
+ #include "snappy-stubs-public.h"
46
+
47
+ namespace snappy {
48
+ class Source;
49
+ class Sink;
50
+
51
+ // ------------------------------------------------------------------------
52
+ // Generic compression/decompression routines.
53
+ // ------------------------------------------------------------------------
54
+
55
+ // Compress the bytes read from "*source" and append to "*sink". Return the
56
+ // number of bytes written.
57
+ size_t Compress(Source* source, Sink* sink);
58
+
59
+ // Find the uncompressed length of the given stream, as given by the header.
60
+ // Note that the true length could deviate from this; the stream could e.g.
61
+ // be truncated.
62
+ //
63
+ // Also note that this leaves "*source" in a state that is unsuitable for
64
+ // further operations, such as RawUncompress(). You will need to rewind
65
+ // or recreate the source yourself before attempting any further calls.
66
+ bool GetUncompressedLength(Source* source, uint32* result);
67
+
68
+ // ------------------------------------------------------------------------
69
+ // Higher-level string based routines (should be sufficient for most users)
70
+ // ------------------------------------------------------------------------
71
+
72
+ // Sets "*output" to the compressed version of "input[0,input_length-1]".
73
+ // Original contents of *output are lost.
74
+ //
75
+ // REQUIRES: "input[]" is not an alias of "*output".
76
+ size_t Compress(const char* input, size_t input_length, string* output);
77
+
78
+ // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
79
+ // Original contents of "*uncompressed" are lost.
80
+ //
81
+ // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
82
+ //
83
+ // returns false if the message is corrupted and could not be decompressed
84
+ bool Uncompress(const char* compressed, size_t compressed_length,
85
+ string* uncompressed);
86
+
87
+
88
+ // ------------------------------------------------------------------------
89
+ // Lower-level character array based routines. May be useful for
90
+ // efficiency reasons in certain circumstances.
91
+ // ------------------------------------------------------------------------
92
+
93
+ // REQUIRES: "compressed" must point to an area of memory that is at
94
+ // least "MaxCompressedLength(input_length)" bytes in length.
95
+ //
96
+ // Takes the data stored in "input[0..input_length]" and stores
97
+ // it in the array pointed to by "compressed".
98
+ //
99
+ // "*compressed_length" is set to the length of the compressed output.
100
+ //
101
+ // Example:
102
+ // char* output = new char[snappy::MaxCompressedLength(input_length)];
103
+ // size_t output_length;
104
+ // RawCompress(input, input_length, output, &output_length);
105
+ // ... Process(output, output_length) ...
106
+ // delete [] output;
107
+ void RawCompress(const char* input,
108
+ size_t input_length,
109
+ char* compressed,
110
+ size_t* compressed_length);
111
+
112
+ // Given data in "compressed[0..compressed_length-1]" generated by
113
+ // calling the Snappy::Compress routine, this routine
114
+ // stores the uncompressed data to
115
+ // uncompressed[0..GetUncompressedLength(compressed)-1]
116
+ // returns false if the message is corrupted and could not be decrypted
117
+ bool RawUncompress(const char* compressed, size_t compressed_length,
118
+ char* uncompressed);
119
+
120
+ // Given data from the byte source 'compressed' generated by calling
121
+ // the Snappy::Compress routine, this routine stores the uncompressed
122
+ // data to
123
+ // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
124
+ // returns false if the message is corrupted and could not be decrypted
125
+ bool RawUncompress(Source* compressed, char* uncompressed);
126
+
127
+ // Given data in "compressed[0..compressed_length-1]" generated by
128
+ // calling the Snappy::Compress routine, this routine
129
+ // stores the uncompressed data to the iovec "iov". The number of physical
130
+ // buffers in "iov" is given by iov_cnt and their cumulative size
131
+ // must be at least GetUncompressedLength(compressed). The individual buffers
132
+ // in "iov" must not overlap with each other.
133
+ //
134
+ // returns false if the message is corrupted and could not be decrypted
135
+ bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
136
+ const struct iovec* iov, size_t iov_cnt);
137
+
138
+ // Given data from the byte source 'compressed' generated by calling
139
+ // the Snappy::Compress routine, this routine stores the uncompressed
140
+ // data to the iovec "iov". The number of physical
141
+ // buffers in "iov" is given by iov_cnt and their cumulative size
142
+ // must be at least GetUncompressedLength(compressed). The individual buffers
143
+ // in "iov" must not overlap with each other.
144
+ //
145
+ // returns false if the message is corrupted and could not be decrypted
146
+ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
147
+ size_t iov_cnt);
148
+
149
+ // Returns the maximal size of the compressed representation of
150
+ // input data that is "source_bytes" bytes in length;
151
+ size_t MaxCompressedLength(size_t source_bytes);
152
+
153
+ // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
154
+ // Returns true and stores the length of the uncompressed data in
155
+ // *result normally. Returns false on parsing error.
156
+ // This operation takes O(1) time.
157
+ bool GetUncompressedLength(const char* compressed, size_t compressed_length,
158
+ size_t* result);
159
+
160
+ // Returns true iff the contents of "compressed[]" can be uncompressed
161
+ // successfully. Does not return the uncompressed data. Takes
162
+ // time proportional to compressed_length, but is usually at least
163
+ // a factor of four faster than actual decompression.
164
+ bool IsValidCompressedBuffer(const char* compressed,
165
+ size_t compressed_length);
166
+
167
+ // The size of a compression block. Note that many parts of the compression
168
+ // code assumes that kBlockSize <= 65536; in particular, the hash table
169
+ // can only store 16-bit offsets, and EmitCopy() also assumes the offset
170
+ // is 65535 bytes or less. Note also that if you change this, it will
171
+ // affect the framing format (see framing_format.txt).
172
+ //
173
+ // Note that there might be older data around that is compressed with larger
174
+ // block sizes, so the decompression code should not rely on the
175
+ // non-existence of long backreferences.
176
+ static const int kBlockLog = 16;
177
+ static const size_t kBlockSize = 1 << kBlockLog;
178
+
179
+ static const int kMaxHashTableBits = 14;
180
+ static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
181
+ } // end namespace snappy
182
+
183
+
184
+ #endif // UTIL_SNAPPY_SNAPPY_H__
@@ -0,0 +1,1355 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include <math.h>
30
+ #include <stdlib.h>
31
+
32
+
33
+ #include <algorithm>
34
+ #include <string>
35
+ #include <vector>
36
+
37
+ #include "snappy.h"
38
+ #include "snappy-internal.h"
39
+ #include "snappy-test.h"
40
+ #include "snappy-sinksource.h"
41
+
42
+ DEFINE_int32(start_len, -1,
43
+ "Starting prefix size for testing (-1: just full file contents)");
44
+ DEFINE_int32(end_len, -1,
45
+ "Starting prefix size for testing (-1: just full file contents)");
46
+ DEFINE_int32(bytes, 10485760,
47
+ "How many bytes to compress/uncompress per file for timing");
48
+
49
+ DEFINE_bool(zlib, false,
50
+ "Run zlib compression (http://www.zlib.net)");
51
+ DEFINE_bool(lzo, false,
52
+ "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53
+ DEFINE_bool(quicklz, false,
54
+ "Run quickLZ compression (http://www.quicklz.com/)");
55
+ DEFINE_bool(liblzf, false,
56
+ "Run libLZF compression "
57
+ "(http://www.goof.com/pcg/marc/liblzf.html)");
58
+ DEFINE_bool(fastlz, false,
59
+ "Run FastLZ compression (http://www.fastlz.org/");
60
+ DEFINE_bool(snappy, true, "Run snappy compression");
61
+
62
+
63
+ DEFINE_bool(write_compressed, false,
64
+ "Write compressed versions of each file to <file>.comp");
65
+ DEFINE_bool(write_uncompressed, false,
66
+ "Write uncompressed versions of each file to <file>.uncomp");
67
+
68
+ namespace snappy {
69
+
70
+
71
+ #ifdef HAVE_FUNC_MMAP
72
+
73
+ // To test against code that reads beyond its input, this class copies a
74
+ // string to a newly allocated group of pages, the last of which
75
+ // is made unreadable via mprotect. Note that we need to allocate the
76
+ // memory with mmap(), as POSIX allows mprotect() only on memory allocated
77
+ // with mmap(), and some malloc/posix_memalign implementations expect to
78
+ // be able to read previously allocated memory while doing heap allocations.
79
+ class DataEndingAtUnreadablePage {
80
+ public:
81
+ explicit DataEndingAtUnreadablePage(const string& s) {
82
+ const size_t page_size = getpagesize();
83
+ const size_t size = s.size();
84
+ // Round up space for string to a multiple of page_size.
85
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
86
+ alloc_size_ = space_for_string + page_size;
87
+ mem_ = mmap(NULL, alloc_size_,
88
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
89
+ CHECK_NE(MAP_FAILED, mem_);
90
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
91
+ char* dst = protected_page_ - size;
92
+ memcpy(dst, s.data(), size);
93
+ data_ = dst;
94
+ size_ = size;
95
+ // Make guard page unreadable.
96
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
97
+ }
98
+
99
+ ~DataEndingAtUnreadablePage() {
100
+ // Undo the mprotect.
101
+ CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
102
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
103
+ }
104
+
105
+ const char* data() const { return data_; }
106
+ size_t size() const { return size_; }
107
+
108
+ private:
109
+ size_t alloc_size_;
110
+ void* mem_;
111
+ char* protected_page_;
112
+ const char* data_;
113
+ size_t size_;
114
+ };
115
+
116
+ #else // HAVE_FUNC_MMAP
117
+
118
+ // Fallback for systems without mmap.
119
+ typedef string DataEndingAtUnreadablePage;
120
+
121
+ #endif
122
+
123
+ enum CompressorType {
124
+ ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
125
+ };
126
+
127
+ const char* names[] = {
128
+ "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
129
+ };
130
+
131
+ static size_t MinimumRequiredOutputSpace(size_t input_size,
132
+ CompressorType comp) {
133
+ switch (comp) {
134
+ #ifdef ZLIB_VERSION
135
+ case ZLIB:
136
+ return ZLib::MinCompressbufSize(input_size);
137
+ #endif // ZLIB_VERSION
138
+
139
+ #ifdef LZO_VERSION
140
+ case LZO:
141
+ return input_size + input_size/64 + 16 + 3;
142
+ #endif // LZO_VERSION
143
+
144
+ #ifdef LZF_VERSION
145
+ case LIBLZF:
146
+ return input_size;
147
+ #endif // LZF_VERSION
148
+
149
+ #ifdef QLZ_VERSION_MAJOR
150
+ case QUICKLZ:
151
+ return input_size + 36000; // 36000 is used for scratch.
152
+ #endif // QLZ_VERSION_MAJOR
153
+
154
+ #ifdef FASTLZ_VERSION
155
+ case FASTLZ:
156
+ return max(static_cast<int>(ceil(input_size * 1.05)), 66);
157
+ #endif // FASTLZ_VERSION
158
+
159
+ case SNAPPY:
160
+ return snappy::MaxCompressedLength(input_size);
161
+
162
+ default:
163
+ LOG(FATAL) << "Unknown compression type number " << comp;
164
+ }
165
+ }
166
+
167
+ // Returns true if we successfully compressed, false otherwise.
168
+ //
169
+ // If compressed_is_preallocated is set, do not resize the compressed buffer.
170
+ // This is typically what you want for a benchmark, in order to not spend
171
+ // time in the memory allocator. If you do set this flag, however,
172
+ // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
173
+ // number of bytes, and may contain junk bytes at the end after return.
174
+ static bool Compress(const char* input, size_t input_size, CompressorType comp,
175
+ string* compressed, bool compressed_is_preallocated) {
176
+ if (!compressed_is_preallocated) {
177
+ compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
178
+ }
179
+
180
+ switch (comp) {
181
+ #ifdef ZLIB_VERSION
182
+ case ZLIB: {
183
+ ZLib zlib;
184
+ uLongf destlen = compressed->size();
185
+ int ret = zlib.Compress(
186
+ reinterpret_cast<Bytef*>(string_as_array(compressed)),
187
+ &destlen,
188
+ reinterpret_cast<const Bytef*>(input),
189
+ input_size);
190
+ CHECK_EQ(Z_OK, ret);
191
+ if (!compressed_is_preallocated) {
192
+ compressed->resize(destlen);
193
+ }
194
+ return true;
195
+ }
196
+ #endif // ZLIB_VERSION
197
+
198
+ #ifdef LZO_VERSION
199
+ case LZO: {
200
+ unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
201
+ lzo_uint destlen;
202
+ int ret = lzo1x_1_15_compress(
203
+ reinterpret_cast<const uint8*>(input),
204
+ input_size,
205
+ reinterpret_cast<uint8*>(string_as_array(compressed)),
206
+ &destlen,
207
+ mem);
208
+ CHECK_EQ(LZO_E_OK, ret);
209
+ delete[] mem;
210
+ if (!compressed_is_preallocated) {
211
+ compressed->resize(destlen);
212
+ }
213
+ break;
214
+ }
215
+ #endif // LZO_VERSION
216
+
217
+ #ifdef LZF_VERSION
218
+ case LIBLZF: {
219
+ int destlen = lzf_compress(input,
220
+ input_size,
221
+ string_as_array(compressed),
222
+ input_size);
223
+ if (destlen == 0) {
224
+ // lzf *can* cause lots of blowup when compressing, so they
225
+ // recommend to limit outsize to insize, and just not compress
226
+ // if it's bigger. Ideally, we'd just swap input and output.
227
+ compressed->assign(input, input_size);
228
+ destlen = input_size;
229
+ }
230
+ if (!compressed_is_preallocated) {
231
+ compressed->resize(destlen);
232
+ }
233
+ break;
234
+ }
235
+ #endif // LZF_VERSION
236
+
237
+ #ifdef QLZ_VERSION_MAJOR
238
+ case QUICKLZ: {
239
+ qlz_state_compress *state_compress = new qlz_state_compress;
240
+ int destlen = qlz_compress(input,
241
+ string_as_array(compressed),
242
+ input_size,
243
+ state_compress);
244
+ delete state_compress;
245
+ CHECK_NE(0, destlen);
246
+ if (!compressed_is_preallocated) {
247
+ compressed->resize(destlen);
248
+ }
249
+ break;
250
+ }
251
+ #endif // QLZ_VERSION_MAJOR
252
+
253
+ #ifdef FASTLZ_VERSION
254
+ case FASTLZ: {
255
+ // Use level 1 compression since we mostly care about speed.
256
+ int destlen = fastlz_compress_level(
257
+ 1,
258
+ input,
259
+ input_size,
260
+ string_as_array(compressed));
261
+ if (!compressed_is_preallocated) {
262
+ compressed->resize(destlen);
263
+ }
264
+ CHECK_NE(destlen, 0);
265
+ break;
266
+ }
267
+ #endif // FASTLZ_VERSION
268
+
269
+ case SNAPPY: {
270
+ size_t destlen;
271
+ snappy::RawCompress(input, input_size,
272
+ string_as_array(compressed),
273
+ &destlen);
274
+ CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
275
+ if (!compressed_is_preallocated) {
276
+ compressed->resize(destlen);
277
+ }
278
+ break;
279
+ }
280
+
281
+
282
+ default: {
283
+ return false; // the asked-for library wasn't compiled in
284
+ }
285
+ }
286
+ return true;
287
+ }
288
+
289
+ static bool Uncompress(const string& compressed, CompressorType comp,
290
+ int size, string* output) {
291
+ switch (comp) {
292
+ #ifdef ZLIB_VERSION
293
+ case ZLIB: {
294
+ output->resize(size);
295
+ ZLib zlib;
296
+ uLongf destlen = output->size();
297
+ int ret = zlib.Uncompress(
298
+ reinterpret_cast<Bytef*>(string_as_array(output)),
299
+ &destlen,
300
+ reinterpret_cast<const Bytef*>(compressed.data()),
301
+ compressed.size());
302
+ CHECK_EQ(Z_OK, ret);
303
+ CHECK_EQ(static_cast<uLongf>(size), destlen);
304
+ break;
305
+ }
306
+ #endif // ZLIB_VERSION
307
+
308
+ #ifdef LZO_VERSION
309
+ case LZO: {
310
+ output->resize(size);
311
+ lzo_uint destlen;
312
+ int ret = lzo1x_decompress(
313
+ reinterpret_cast<const uint8*>(compressed.data()),
314
+ compressed.size(),
315
+ reinterpret_cast<uint8*>(string_as_array(output)),
316
+ &destlen,
317
+ NULL);
318
+ CHECK_EQ(LZO_E_OK, ret);
319
+ CHECK_EQ(static_cast<lzo_uint>(size), destlen);
320
+ break;
321
+ }
322
+ #endif // LZO_VERSION
323
+
324
+ #ifdef LZF_VERSION
325
+ case LIBLZF: {
326
+ output->resize(size);
327
+ int destlen = lzf_decompress(compressed.data(),
328
+ compressed.size(),
329
+ string_as_array(output),
330
+ output->size());
331
+ if (destlen == 0) {
332
+ // This error probably means we had decided not to compress,
333
+ // and thus have stored input in output directly.
334
+ output->assign(compressed.data(), compressed.size());
335
+ destlen = compressed.size();
336
+ }
337
+ CHECK_EQ(destlen, size);
338
+ break;
339
+ }
340
+ #endif // LZF_VERSION
341
+
342
+ #ifdef QLZ_VERSION_MAJOR
343
+ case QUICKLZ: {
344
+ output->resize(size);
345
+ qlz_state_decompress *state_decompress = new qlz_state_decompress;
346
+ int destlen = qlz_decompress(compressed.data(),
347
+ string_as_array(output),
348
+ state_decompress);
349
+ delete state_decompress;
350
+ CHECK_EQ(destlen, size);
351
+ break;
352
+ }
353
+ #endif // QLZ_VERSION_MAJOR
354
+
355
+ #ifdef FASTLZ_VERSION
356
+ case FASTLZ: {
357
+ output->resize(size);
358
+ int destlen = fastlz_decompress(compressed.data(),
359
+ compressed.length(),
360
+ string_as_array(output),
361
+ size);
362
+ CHECK_EQ(destlen, size);
363
+ break;
364
+ }
365
+ #endif // FASTLZ_VERSION
366
+
367
+ case SNAPPY: {
368
+ snappy::RawUncompress(compressed.data(), compressed.size(),
369
+ string_as_array(output));
370
+ break;
371
+ }
372
+
373
+
374
+ default: {
375
+ return false; // the asked-for library wasn't compiled in
376
+ }
377
+ }
378
+ return true;
379
+ }
380
+
381
+ static void Measure(const char* data,
382
+ size_t length,
383
+ CompressorType comp,
384
+ int repeats,
385
+ int block_size) {
386
+ // Run tests a few time and pick median running times
387
+ static const int kRuns = 5;
388
+ double ctime[kRuns];
389
+ double utime[kRuns];
390
+ int compressed_size = 0;
391
+
392
+ {
393
+ // Chop the input into blocks
394
+ int num_blocks = (length + block_size - 1) / block_size;
395
+ vector<const char*> input(num_blocks);
396
+ vector<size_t> input_length(num_blocks);
397
+ vector<string> compressed(num_blocks);
398
+ vector<string> output(num_blocks);
399
+ for (int b = 0; b < num_blocks; b++) {
400
+ int input_start = b * block_size;
401
+ int input_limit = min<int>((b+1)*block_size, length);
402
+ input[b] = data+input_start;
403
+ input_length[b] = input_limit-input_start;
404
+
405
+ // Pre-grow the output buffer so we don't measure string append time.
406
+ compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
407
+ }
408
+
409
+ // First, try one trial compression to make sure the code is compiled in
410
+ if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
411
+ LOG(WARNING) << "Skipping " << names[comp] << ": "
412
+ << "library not compiled in";
413
+ return;
414
+ }
415
+
416
+ for (int run = 0; run < kRuns; run++) {
417
+ CycleTimer ctimer, utimer;
418
+
419
+ for (int b = 0; b < num_blocks; b++) {
420
+ // Pre-grow the output buffer so we don't measure string append time.
421
+ compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
422
+ }
423
+
424
+ ctimer.Start();
425
+ for (int b = 0; b < num_blocks; b++)
426
+ for (int i = 0; i < repeats; i++)
427
+ Compress(input[b], input_length[b], comp, &compressed[b], true);
428
+ ctimer.Stop();
429
+
430
+ // Compress once more, with resizing, so we don't leave junk
431
+ // at the end that will confuse the decompressor.
432
+ for (int b = 0; b < num_blocks; b++) {
433
+ Compress(input[b], input_length[b], comp, &compressed[b], false);
434
+ }
435
+
436
+ for (int b = 0; b < num_blocks; b++) {
437
+ output[b].resize(input_length[b]);
438
+ }
439
+
440
+ utimer.Start();
441
+ for (int i = 0; i < repeats; i++)
442
+ for (int b = 0; b < num_blocks; b++)
443
+ Uncompress(compressed[b], comp, input_length[b], &output[b]);
444
+ utimer.Stop();
445
+
446
+ ctime[run] = ctimer.Get();
447
+ utime[run] = utimer.Get();
448
+ }
449
+
450
+ compressed_size = 0;
451
+ for (int i = 0; i < compressed.size(); i++) {
452
+ compressed_size += compressed[i].size();
453
+ }
454
+ }
455
+
456
+ sort(ctime, ctime + kRuns);
457
+ sort(utime, utime + kRuns);
458
+ const int med = kRuns/2;
459
+
460
+ float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
461
+ float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
462
+ string x = names[comp];
463
+ x += ":";
464
+ string urate = (uncomp_rate >= 0)
465
+ ? StringPrintf("%.1f", uncomp_rate)
466
+ : string("?");
467
+ printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
468
+ "comp %5.1f MB/s uncomp %5s MB/s\n",
469
+ x.c_str(),
470
+ block_size/(1<<20),
471
+ static_cast<int>(length), static_cast<uint32>(compressed_size),
472
+ (compressed_size * 100.0) / max<int>(1, length),
473
+ comp_rate,
474
+ urate.c_str());
475
+ }
476
+
477
+
478
+ static int VerifyString(const string& input) {
479
+ string compressed;
480
+ DataEndingAtUnreadablePage i(input);
481
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
482
+ CHECK_EQ(written, compressed.size());
483
+ CHECK_LE(compressed.size(),
484
+ snappy::MaxCompressedLength(input.size()));
485
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
486
+
487
+ string uncompressed;
488
+ DataEndingAtUnreadablePage c(compressed);
489
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
490
+ CHECK_EQ(uncompressed, input);
491
+ return uncompressed.size();
492
+ }
493
+
494
+
495
+ static void VerifyIOVec(const string& input) {
496
+ string compressed;
497
+ DataEndingAtUnreadablePage i(input);
498
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
499
+ CHECK_EQ(written, compressed.size());
500
+ CHECK_LE(compressed.size(),
501
+ snappy::MaxCompressedLength(input.size()));
502
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
503
+
504
+ // Try uncompressing into an iovec containing a random number of entries
505
+ // ranging from 1 to 10.
506
+ char* buf = new char[input.size()];
507
+ ACMRandom rnd(input.size());
508
+ int num = rnd.Next() % 10 + 1;
509
+ if (input.size() < num) {
510
+ num = input.size();
511
+ }
512
+ struct iovec* iov = new iovec[num];
513
+ int used_so_far = 0;
514
+ for (int i = 0; i < num; ++i) {
515
+ iov[i].iov_base = buf + used_so_far;
516
+ if (i == num - 1) {
517
+ iov[i].iov_len = input.size() - used_so_far;
518
+ } else {
519
+ // Randomly choose to insert a 0 byte entry.
520
+ if (rnd.OneIn(5)) {
521
+ iov[i].iov_len = 0;
522
+ } else {
523
+ iov[i].iov_len = rnd.Uniform(input.size());
524
+ }
525
+ }
526
+ used_so_far += iov[i].iov_len;
527
+ }
528
+ CHECK(snappy::RawUncompressToIOVec(
529
+ compressed.data(), compressed.size(), iov, num));
530
+ CHECK(!memcmp(buf, input.data(), input.size()));
531
+ delete[] iov;
532
+ delete[] buf;
533
+ }
534
+
535
+ // Test that data compressed by a compressor that does not
536
+ // obey block sizes is uncompressed properly.
537
+ static void VerifyNonBlockedCompression(const string& input) {
538
+ if (input.length() > snappy::kBlockSize) {
539
+ // We cannot test larger blocks than the maximum block size, obviously.
540
+ return;
541
+ }
542
+
543
+ string prefix;
544
+ Varint::Append32(&prefix, input.size());
545
+
546
+ // Setup compression table
547
+ snappy::internal::WorkingMemory wmem;
548
+ int table_size;
549
+ uint16* table = wmem.GetHashTable(input.size(), &table_size);
550
+
551
+ // Compress entire input in one shot
552
+ string compressed;
553
+ compressed += prefix;
554
+ compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
555
+ char* dest = string_as_array(&compressed) + prefix.size();
556
+ char* end = snappy::internal::CompressFragment(input.data(), input.size(),
557
+ dest, table, table_size);
558
+ compressed.resize(end - compressed.data());
559
+
560
+ // Uncompress into string
561
+ string uncomp_str;
562
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
563
+ CHECK_EQ(uncomp_str, input);
564
+
565
+ }
566
+
567
+ // Expand the input so that it is at least K times as big as block size
568
+ static string Expand(const string& input) {
569
+ static const int K = 3;
570
+ string data = input;
571
+ while (data.size() < K * snappy::kBlockSize) {
572
+ data += input;
573
+ }
574
+ return data;
575
+ }
576
+
577
+ static int Verify(const string& input) {
578
+ VLOG(1) << "Verifying input of size " << input.size();
579
+
580
+ // Compress using string based routines
581
+ const int result = VerifyString(input);
582
+
583
+
584
+ VerifyNonBlockedCompression(input);
585
+ VerifyIOVec(input);
586
+ if (!input.empty()) {
587
+ const string expanded = Expand(input);
588
+ VerifyNonBlockedCompression(expanded);
589
+ VerifyIOVec(input);
590
+ }
591
+
592
+
593
+ return result;
594
+ }
595
+
596
+ // This test checks to ensure that snappy doesn't coredump if it gets
597
+ // corrupted data.
598
+
599
+ static bool IsValidCompressedBuffer(const string& c) {
600
+ return snappy::IsValidCompressedBuffer(c.data(), c.size());
601
+ }
602
+ static bool Uncompress(const string& c, string* u) {
603
+ return snappy::Uncompress(c.data(), c.size(), u);
604
+ }
605
+
606
+ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
607
+ string source = "making sure we don't crash with corrupted input";
608
+ VLOG(1) << source;
609
+ string dest;
610
+ TypeParam uncmp;
611
+ snappy::Compress(source.data(), source.size(), &dest);
612
+
613
+ // Mess around with the data. It's hard to simulate all possible
614
+ // corruptions; this is just one example ...
615
+ CHECK_GT(dest.size(), 3);
616
+ dest[1]--;
617
+ dest[3]++;
618
+ // this really ought to fail.
619
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
620
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
621
+
622
+ // This is testing for a security bug - a buffer that decompresses to 100k
623
+ // but we lie in the snappy header and only reserve 0 bytes of memory :)
624
+ source.resize(100000);
625
+ for (int i = 0; i < source.length(); ++i) {
626
+ source[i] = 'A';
627
+ }
628
+ snappy::Compress(source.data(), source.size(), &dest);
629
+ dest[0] = dest[1] = dest[2] = dest[3] = 0;
630
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
631
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
632
+
633
+ if (sizeof(void *) == 4) {
634
+ // Another security check; check a crazy big length can't DoS us with an
635
+ // over-allocation.
636
+ // Currently this is done only for 32-bit builds. On 64-bit builds,
637
+ // where 3 GB might be an acceptable allocation size, Uncompress()
638
+ // attempts to decompress, and sometimes causes the test to run out of
639
+ // memory.
640
+ dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
641
+ // This decodes to a really large size, i.e., about 3 GB.
642
+ dest[4] = 'k';
643
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
644
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
645
+ } else {
646
+ LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
647
+ }
648
+
649
+ // This decodes to about 2 MB; much smaller, but should still fail.
650
+ dest[0] = dest[1] = dest[2] = 0xff;
651
+ dest[3] = 0x00;
652
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
653
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
654
+
655
+ // try reading stuff in from a bad file.
656
+ for (int i = 1; i <= 3; ++i) {
657
+ string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
658
+ 0);
659
+ string uncmp;
660
+ // check that we don't return a crazy length
661
+ size_t ulen;
662
+ CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
663
+ || (ulen < (1<<20)));
664
+ uint32 ulen2;
665
+ snappy::ByteArraySource source(data.data(), data.size());
666
+ CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
667
+ (ulen2 < (1<<20)));
668
+ CHECK(!IsValidCompressedBuffer(TypeParam(data)));
669
+ CHECK(!Uncompress(TypeParam(data), &uncmp));
670
+ }
671
+ }
672
+
673
+ // Helper routines to construct arbitrary compressed strings.
674
+ // These mirror the compression code in snappy.cc, but are copied
675
+ // here so that we can bypass some limitations in the how snappy.cc
676
+ // invokes these routines.
677
+ static void AppendLiteral(string* dst, const string& literal) {
678
+ if (literal.empty()) return;
679
+ int n = literal.size() - 1;
680
+ if (n < 60) {
681
+ // Fit length in tag byte
682
+ dst->push_back(0 | (n << 2));
683
+ } else {
684
+ // Encode in upcoming bytes
685
+ char number[4];
686
+ int count = 0;
687
+ while (n > 0) {
688
+ number[count++] = n & 0xff;
689
+ n >>= 8;
690
+ }
691
+ dst->push_back(0 | ((59+count) << 2));
692
+ *dst += string(number, count);
693
+ }
694
+ *dst += literal;
695
+ }
696
+
697
+ static void AppendCopy(string* dst, int offset, int length) {
698
+ while (length > 0) {
699
+ // Figure out how much to copy in one shot
700
+ int to_copy;
701
+ if (length >= 68) {
702
+ to_copy = 64;
703
+ } else if (length > 64) {
704
+ to_copy = 60;
705
+ } else {
706
+ to_copy = length;
707
+ }
708
+ length -= to_copy;
709
+
710
+ if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
711
+ assert(to_copy-4 < 8); // Must fit in 3 bits
712
+ dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
713
+ dst->push_back(offset & 0xff);
714
+ } else if (offset < 65536) {
715
+ dst->push_back(2 | ((to_copy-1) << 2));
716
+ dst->push_back(offset & 0xff);
717
+ dst->push_back(offset >> 8);
718
+ } else {
719
+ dst->push_back(3 | ((to_copy-1) << 2));
720
+ dst->push_back(offset & 0xff);
721
+ dst->push_back((offset >> 8) & 0xff);
722
+ dst->push_back((offset >> 16) & 0xff);
723
+ dst->push_back((offset >> 24) & 0xff);
724
+ }
725
+ }
726
+ }
727
+
728
+ TEST(Snappy, SimpleTests) {
729
+ Verify("");
730
+ Verify("a");
731
+ Verify("ab");
732
+ Verify("abc");
733
+
734
+ Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
735
+ Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
736
+ Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
737
+ Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
738
+ Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
739
+ }
740
+
741
+ // Verify max blowup (lots of four-byte copies)
742
+ TEST(Snappy, MaxBlowup) {
743
+ string input;
744
+ for (int i = 0; i < 20000; i++) {
745
+ ACMRandom rnd(i);
746
+ uint32 bytes = static_cast<uint32>(rnd.Next());
747
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
748
+ }
749
+ for (int i = 19999; i >= 0; i--) {
750
+ ACMRandom rnd(i);
751
+ uint32 bytes = static_cast<uint32>(rnd.Next());
752
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
753
+ }
754
+ Verify(input);
755
+ }
756
+
757
+ TEST(Snappy, RandomData) {
758
+ ACMRandom rnd(FLAGS_test_random_seed);
759
+
760
+ const int num_ops = 20000;
761
+ for (int i = 0; i < num_ops; i++) {
762
+ if ((i % 1000) == 0) {
763
+ VLOG(0) << "Random op " << i << " of " << num_ops;
764
+ }
765
+
766
+ string x;
767
+ int len = rnd.Uniform(4096);
768
+ if (i < 100) {
769
+ len = 65536 + rnd.Uniform(65536);
770
+ }
771
+ while (x.size() < len) {
772
+ int run_len = 1;
773
+ if (rnd.OneIn(10)) {
774
+ run_len = rnd.Skewed(8);
775
+ }
776
+ char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
777
+ while (run_len-- > 0 && x.size() < len) {
778
+ x += c;
779
+ }
780
+ }
781
+
782
+ Verify(x);
783
+ }
784
+ }
785
+
786
+ TEST(Snappy, FourByteOffset) {
787
+ // The new compressor cannot generate four-byte offsets since
788
+ // it chops up the input into 32KB pieces. So we hand-emit the
789
+ // copy manually.
790
+
791
+ // The two fragments that make up the input string.
792
+ string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
793
+ string fragment2 = "some other string";
794
+
795
+ // How many times each fragment is emitted.
796
+ const int n1 = 2;
797
+ const int n2 = 100000 / fragment2.size();
798
+ const int length = n1 * fragment1.size() + n2 * fragment2.size();
799
+
800
+ string compressed;
801
+ Varint::Append32(&compressed, length);
802
+
803
+ AppendLiteral(&compressed, fragment1);
804
+ string src = fragment1;
805
+ for (int i = 0; i < n2; i++) {
806
+ AppendLiteral(&compressed, fragment2);
807
+ src += fragment2;
808
+ }
809
+ AppendCopy(&compressed, src.size(), fragment1.size());
810
+ src += fragment1;
811
+ CHECK_EQ(length, src.size());
812
+
813
+ string uncompressed;
814
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
815
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
816
+ &uncompressed));
817
+ CHECK_EQ(uncompressed, src);
818
+ }
819
+
820
+ TEST(Snappy, IOVecEdgeCases) {
821
+ // Test some tricky edge cases in the iovec output that are not necessarily
822
+ // exercised by random tests.
823
+
824
+ // Our output blocks look like this initially (the last iovec is bigger
825
+ // than depicted):
826
+ // [ ] [ ] [ ] [ ] [ ]
827
+ static const int kLengths[] = { 2, 1, 4, 8, 128 };
828
+
829
+ struct iovec iov[ARRAYSIZE(kLengths)];
830
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
831
+ iov[i].iov_base = new char[kLengths[i]];
832
+ iov[i].iov_len = kLengths[i];
833
+ }
834
+
835
+ string compressed;
836
+ Varint::Append32(&compressed, 22);
837
+
838
+ // A literal whose output crosses three blocks.
839
+ // [ab] [c] [123 ] [ ] [ ]
840
+ AppendLiteral(&compressed, "abc123");
841
+
842
+ // A copy whose output crosses two blocks (source and destination
843
+ // segments marked).
844
+ // [ab] [c] [1231] [23 ] [ ]
845
+ // ^--^ --
846
+ AppendCopy(&compressed, 3, 3);
847
+
848
+ // A copy where the input is, at first, in the block before the output:
849
+ //
850
+ // [ab] [c] [1231] [231231 ] [ ]
851
+ // ^--- ^---
852
+ // Then during the copy, the pointers move such that the input and
853
+ // output pointers are in the same block:
854
+ //
855
+ // [ab] [c] [1231] [23123123] [ ]
856
+ // ^- ^-
857
+ // And then they move again, so that the output pointer is no longer
858
+ // in the same block as the input pointer:
859
+ // [ab] [c] [1231] [23123123] [123 ]
860
+ // ^-- ^--
861
+ AppendCopy(&compressed, 6, 9);
862
+
863
+ // Finally, a copy where the input is from several blocks back,
864
+ // and it also crosses three blocks:
865
+ //
866
+ // [ab] [c] [1231] [23123123] [123b ]
867
+ // ^ ^
868
+ // [ab] [c] [1231] [23123123] [123bc ]
869
+ // ^ ^
870
+ // [ab] [c] [1231] [23123123] [123bc12 ]
871
+ // ^- ^-
872
+ AppendCopy(&compressed, 17, 4);
873
+
874
+ CHECK(snappy::RawUncompressToIOVec(
875
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
876
+ CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
877
+ CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
878
+ CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
879
+ CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
880
+ CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
881
+
882
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
883
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
884
+ }
885
+ }
886
+
887
+ TEST(Snappy, IOVecLiteralOverflow) {
888
+ static const int kLengths[] = { 3, 4 };
889
+
890
+ struct iovec iov[ARRAYSIZE(kLengths)];
891
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
892
+ iov[i].iov_base = new char[kLengths[i]];
893
+ iov[i].iov_len = kLengths[i];
894
+ }
895
+
896
+ string compressed;
897
+ Varint::Append32(&compressed, 8);
898
+
899
+ AppendLiteral(&compressed, "12345678");
900
+
901
+ CHECK(!snappy::RawUncompressToIOVec(
902
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
903
+
904
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
905
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
906
+ }
907
+ }
908
+
909
+ TEST(Snappy, IOVecCopyOverflow) {
910
+ static const int kLengths[] = { 3, 4 };
911
+
912
+ struct iovec iov[ARRAYSIZE(kLengths)];
913
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
914
+ iov[i].iov_base = new char[kLengths[i]];
915
+ iov[i].iov_len = kLengths[i];
916
+ }
917
+
918
+ string compressed;
919
+ Varint::Append32(&compressed, 8);
920
+
921
+ AppendLiteral(&compressed, "123");
922
+ AppendCopy(&compressed, 3, 5);
923
+
924
+ CHECK(!snappy::RawUncompressToIOVec(
925
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
926
+
927
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
928
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
929
+ }
930
+ }
931
+
932
+
933
+ static bool CheckUncompressedLength(const string& compressed,
934
+ size_t* ulength) {
935
+ const bool result1 = snappy::GetUncompressedLength(compressed.data(),
936
+ compressed.size(),
937
+ ulength);
938
+
939
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
940
+ uint32 length;
941
+ const bool result2 = snappy::GetUncompressedLength(&source, &length);
942
+ CHECK_EQ(result1, result2);
943
+ return result1;
944
+ }
945
+
946
+ TEST(SnappyCorruption, TruncatedVarint) {
947
+ string compressed, uncompressed;
948
+ size_t ulength;
949
+ compressed.push_back('\xf0');
950
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
951
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
952
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
953
+ &uncompressed));
954
+ }
955
+
956
+ TEST(SnappyCorruption, UnterminatedVarint) {
957
+ string compressed, uncompressed;
958
+ size_t ulength;
959
+ compressed.push_back(128);
960
+ compressed.push_back(128);
961
+ compressed.push_back(128);
962
+ compressed.push_back(128);
963
+ compressed.push_back(128);
964
+ compressed.push_back(10);
965
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
966
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
967
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
968
+ &uncompressed));
969
+ }
970
+
971
+ TEST(Snappy, ReadPastEndOfBuffer) {
972
+ // Check that we do not read past end of input
973
+
974
+ // Make a compressed string that ends with a single-byte literal
975
+ string compressed;
976
+ Varint::Append32(&compressed, 1);
977
+ AppendLiteral(&compressed, "x");
978
+
979
+ string uncompressed;
980
+ DataEndingAtUnreadablePage c(compressed);
981
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
982
+ CHECK_EQ(uncompressed, string("x"));
983
+ }
984
+
985
+ // Check for an infinite loop caused by a copy with offset==0
986
+ TEST(Snappy, ZeroOffsetCopy) {
987
+ const char* compressed = "\x40\x12\x00\x00";
988
+ // \x40 Length (must be > kMaxIncrementCopyOverflow)
989
+ // \x12\x00\x00 Copy with offset==0, length==5
990
+ char uncompressed[100];
991
+ EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
992
+ }
993
+
994
+ TEST(Snappy, ZeroOffsetCopyValidation) {
995
+ const char* compressed = "\x05\x12\x00\x00";
996
+ // \x05 Length
997
+ // \x12\x00\x00 Copy with offset==0, length==5
998
+ EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
999
+ }
1000
+
1001
+
1002
+ namespace {
1003
+
1004
+ int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
1005
+ return snappy::internal::FindMatchLength(s1, s2, s2 + length);
1006
+ }
1007
+
1008
+ } // namespace
1009
+
1010
+ TEST(Snappy, FindMatchLength) {
1011
+ // Exercise all different code paths through the function.
1012
+ // 64-bit version:
1013
+
1014
+ // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
1015
+ EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
1016
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
1017
+
1018
+ // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
1019
+ EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
1020
+
1021
+ // Same, but edge cases.
1022
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
1023
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
1024
+
1025
+ // Find non-match at once in first loop.
1026
+ EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
1027
+ EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
1028
+ EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
1029
+ EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
1030
+
1031
+ // Find non-match in first loop after one block.
1032
+ EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1033
+ "abcdefgh?1234567xxxxxxxx", 24));
1034
+ EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1035
+ "abcdefgh0?234567xxxxxxxx", 24));
1036
+ EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1037
+ "abcdefgh01237654xxxxxxxx", 24));
1038
+ EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1039
+ "abcdefgh0123456?xxxxxxxx", 24));
1040
+
1041
+ // 32-bit version:
1042
+
1043
+ // Short matches.
1044
+ EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
1045
+ EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
1046
+ EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
1047
+ EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
1048
+ EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
1049
+ EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
1050
+ EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
1051
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
1052
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
1053
+ EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
1054
+
1055
+ // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
1056
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
1057
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
1058
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
1059
+
1060
+ // Same, but edge cases.
1061
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1062
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1063
+
1064
+ // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1065
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1066
+
1067
+ // Find non-match at once in first loop.
1068
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1069
+ "xxxxxx?123xxxxxxxx", 18));
1070
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1071
+ "xxxxxx0?23xxxxxxxx", 18));
1072
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1073
+ "xxxxxx0132xxxxxxxx", 18));
1074
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1075
+ "xxxxxx012?xxxxxxxx", 18));
1076
+
1077
+ // Same, but edge cases.
1078
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1079
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1080
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1081
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1082
+
1083
+ // Find non-match in first loop after one block.
1084
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1085
+ "xxxxxxabcd?123xx", 16));
1086
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1087
+ "xxxxxxabcd0?23xx", 16));
1088
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1089
+ "xxxxxxabcd0132xx", 16));
1090
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1091
+ "xxxxxxabcd012?xx", 16));
1092
+
1093
+ // Same, but edge cases.
1094
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1095
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1096
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1097
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1098
+ }
1099
+
1100
+ TEST(Snappy, FindMatchLengthRandom) {
1101
+ const int kNumTrials = 10000;
1102
+ const int kTypicalLength = 10;
1103
+ ACMRandom rnd(FLAGS_test_random_seed);
1104
+
1105
+ for (int i = 0; i < kNumTrials; i++) {
1106
+ string s, t;
1107
+ char a = rnd.Rand8();
1108
+ char b = rnd.Rand8();
1109
+ while (!rnd.OneIn(kTypicalLength)) {
1110
+ s.push_back(rnd.OneIn(2) ? a : b);
1111
+ t.push_back(rnd.OneIn(2) ? a : b);
1112
+ }
1113
+ DataEndingAtUnreadablePage u(s);
1114
+ DataEndingAtUnreadablePage v(t);
1115
+ int matched = snappy::internal::FindMatchLength(
1116
+ u.data(), v.data(), v.data() + t.size());
1117
+ if (matched == t.size()) {
1118
+ EXPECT_EQ(s, t);
1119
+ } else {
1120
+ EXPECT_NE(s[matched], t[matched]);
1121
+ for (int j = 0; j < matched; j++) {
1122
+ EXPECT_EQ(s[j], t[j]);
1123
+ }
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+
1129
+ static void CompressFile(const char* fname) {
1130
+ string fullinput;
1131
+ file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1132
+
1133
+ string compressed;
1134
+ Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1135
+
1136
+ file::SetContents(string(fname).append(".comp"), compressed, file::Defaults())
1137
+ .CheckSuccess();
1138
+ }
1139
+
1140
+ static void UncompressFile(const char* fname) {
1141
+ string fullinput;
1142
+ file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1143
+
1144
+ size_t uncompLength;
1145
+ CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1146
+
1147
+ string uncompressed;
1148
+ uncompressed.resize(uncompLength);
1149
+ CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1150
+
1151
+ file::SetContents(string(fname).append(".uncomp"), uncompressed,
1152
+ file::Defaults()).CheckSuccess();
1153
+ }
1154
+
1155
+ static void MeasureFile(const char* fname) {
1156
+ string fullinput;
1157
+ file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1158
+ printf("%-40s :\n", fname);
1159
+
1160
+ int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1161
+ int end_len = fullinput.size();
1162
+ if (FLAGS_end_len >= 0) {
1163
+ end_len = min<int>(fullinput.size(), FLAGS_end_len);
1164
+ }
1165
+ for (int len = start_len; len <= end_len; len++) {
1166
+ const char* const input = fullinput.data();
1167
+ int repeats = (FLAGS_bytes + len) / (len + 1);
1168
+ if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1169
+ if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1170
+ if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1171
+ if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1172
+ if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1173
+ if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1174
+
1175
+ // For block-size based measurements
1176
+ if (0 && FLAGS_snappy) {
1177
+ Measure(input, len, SNAPPY, repeats, 8<<10);
1178
+ Measure(input, len, SNAPPY, repeats, 16<<10);
1179
+ Measure(input, len, SNAPPY, repeats, 32<<10);
1180
+ Measure(input, len, SNAPPY, repeats, 64<<10);
1181
+ Measure(input, len, SNAPPY, repeats, 256<<10);
1182
+ Measure(input, len, SNAPPY, repeats, 1024<<10);
1183
+ }
1184
+ }
1185
+ }
1186
+
1187
+ static struct {
1188
+ const char* label;
1189
+ const char* filename;
1190
+ size_t size_limit;
1191
+ } files[] = {
1192
+ { "html", "html", 0 },
1193
+ { "urls", "urls.10K", 0 },
1194
+ { "jpg", "fireworks.jpeg", 0 },
1195
+ { "jpg_200", "fireworks.jpeg", 200 },
1196
+ { "pdf", "paper-100k.pdf", 0 },
1197
+ { "html4", "html_x_4", 0 },
1198
+ { "txt1", "alice29.txt", 0 },
1199
+ { "txt2", "asyoulik.txt", 0 },
1200
+ { "txt3", "lcet10.txt", 0 },
1201
+ { "txt4", "plrabn12.txt", 0 },
1202
+ { "pb", "geo.protodata", 0 },
1203
+ { "gaviota", "kppkn.gtb", 0 },
1204
+ };
1205
+
1206
+ static void BM_UFlat(int iters, int arg) {
1207
+ StopBenchmarkTiming();
1208
+
1209
+ // Pick file to process based on "arg"
1210
+ CHECK_GE(arg, 0);
1211
+ CHECK_LT(arg, ARRAYSIZE(files));
1212
+ string contents = ReadTestDataFile(files[arg].filename,
1213
+ files[arg].size_limit);
1214
+
1215
+ string zcontents;
1216
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1217
+ char* dst = new char[contents.size()];
1218
+
1219
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1220
+ static_cast<int64>(contents.size()));
1221
+ SetBenchmarkLabel(files[arg].label);
1222
+ StartBenchmarkTiming();
1223
+ while (iters-- > 0) {
1224
+ CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1225
+ }
1226
+ StopBenchmarkTiming();
1227
+
1228
+ delete[] dst;
1229
+ }
1230
+ BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1231
+
1232
+ static void BM_UValidate(int iters, int arg) {
1233
+ StopBenchmarkTiming();
1234
+
1235
+ // Pick file to process based on "arg"
1236
+ CHECK_GE(arg, 0);
1237
+ CHECK_LT(arg, ARRAYSIZE(files));
1238
+ string contents = ReadTestDataFile(files[arg].filename,
1239
+ files[arg].size_limit);
1240
+
1241
+ string zcontents;
1242
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1243
+
1244
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1245
+ static_cast<int64>(contents.size()));
1246
+ SetBenchmarkLabel(files[arg].label);
1247
+ StartBenchmarkTiming();
1248
+ while (iters-- > 0) {
1249
+ CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1250
+ }
1251
+ StopBenchmarkTiming();
1252
+ }
1253
+ BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1254
+
1255
+ static void BM_UIOVec(int iters, int arg) {
1256
+ StopBenchmarkTiming();
1257
+
1258
+ // Pick file to process based on "arg"
1259
+ CHECK_GE(arg, 0);
1260
+ CHECK_LT(arg, ARRAYSIZE(files));
1261
+ string contents = ReadTestDataFile(files[arg].filename,
1262
+ files[arg].size_limit);
1263
+
1264
+ string zcontents;
1265
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1266
+
1267
+ // Uncompress into an iovec containing ten entries.
1268
+ const int kNumEntries = 10;
1269
+ struct iovec iov[kNumEntries];
1270
+ char *dst = new char[contents.size()];
1271
+ int used_so_far = 0;
1272
+ for (int i = 0; i < kNumEntries; ++i) {
1273
+ iov[i].iov_base = dst + used_so_far;
1274
+ if (used_so_far == contents.size()) {
1275
+ iov[i].iov_len = 0;
1276
+ continue;
1277
+ }
1278
+
1279
+ if (i == kNumEntries - 1) {
1280
+ iov[i].iov_len = contents.size() - used_so_far;
1281
+ } else {
1282
+ iov[i].iov_len = contents.size() / kNumEntries;
1283
+ }
1284
+ used_so_far += iov[i].iov_len;
1285
+ }
1286
+
1287
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1288
+ static_cast<int64>(contents.size()));
1289
+ SetBenchmarkLabel(files[arg].label);
1290
+ StartBenchmarkTiming();
1291
+ while (iters-- > 0) {
1292
+ CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1293
+ kNumEntries));
1294
+ }
1295
+ StopBenchmarkTiming();
1296
+
1297
+ delete[] dst;
1298
+ }
1299
+ BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1300
+
1301
+
1302
+ static void BM_ZFlat(int iters, int arg) {
1303
+ StopBenchmarkTiming();
1304
+
1305
+ // Pick file to process based on "arg"
1306
+ CHECK_GE(arg, 0);
1307
+ CHECK_LT(arg, ARRAYSIZE(files));
1308
+ string contents = ReadTestDataFile(files[arg].filename,
1309
+ files[arg].size_limit);
1310
+
1311
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1312
+
1313
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1314
+ static_cast<int64>(contents.size()));
1315
+ StartBenchmarkTiming();
1316
+
1317
+ size_t zsize = 0;
1318
+ while (iters-- > 0) {
1319
+ snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1320
+ }
1321
+ StopBenchmarkTiming();
1322
+ const double compression_ratio =
1323
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1324
+ SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1325
+ files[arg].label, 100.0 * compression_ratio));
1326
+ VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1327
+ files[arg].label, contents.size(), zsize);
1328
+ delete[] dst;
1329
+ }
1330
+ BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1331
+
1332
+
1333
+ } // namespace snappy
1334
+
1335
+
1336
+ int main(int argc, char** argv) {
1337
+ InitGoogle(argv[0], &argc, &argv, true);
1338
+ RunSpecifiedBenchmarks();
1339
+
1340
+
1341
+ if (argc >= 2) {
1342
+ for (int arg = 1; arg < argc; arg++) {
1343
+ if (FLAGS_write_compressed) {
1344
+ CompressFile(argv[arg]);
1345
+ } else if (FLAGS_write_uncompressed) {
1346
+ UncompressFile(argv[arg]);
1347
+ } else {
1348
+ MeasureFile(argv[arg]);
1349
+ }
1350
+ }
1351
+ return 0;
1352
+ }
1353
+
1354
+ return RUN_ALL_TESTS();
1355
+ }