snappy 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/lib/snappy/version.rb +1 -1
  3. data/lib/snappy/writer.rb +6 -0
  4. metadata +3 -41
  5. data/vendor/snappy/AUTHORS +0 -1
  6. data/vendor/snappy/COPYING +0 -54
  7. data/vendor/snappy/ChangeLog +0 -1916
  8. data/vendor/snappy/Makefile.am +0 -23
  9. data/vendor/snappy/NEWS +0 -128
  10. data/vendor/snappy/README +0 -135
  11. data/vendor/snappy/autogen.sh +0 -7
  12. data/vendor/snappy/configure.ac +0 -133
  13. data/vendor/snappy/format_description.txt +0 -110
  14. data/vendor/snappy/framing_format.txt +0 -135
  15. data/vendor/snappy/m4/gtest.m4 +0 -74
  16. data/vendor/snappy/snappy-c.cc +0 -90
  17. data/vendor/snappy/snappy-c.h +0 -138
  18. data/vendor/snappy/snappy-internal.h +0 -150
  19. data/vendor/snappy/snappy-sinksource.cc +0 -71
  20. data/vendor/snappy/snappy-sinksource.h +0 -137
  21. data/vendor/snappy/snappy-stubs-internal.cc +0 -42
  22. data/vendor/snappy/snappy-stubs-internal.h +0 -491
  23. data/vendor/snappy/snappy-stubs-public.h.in +0 -98
  24. data/vendor/snappy/snappy-test.cc +0 -606
  25. data/vendor/snappy/snappy-test.h +0 -582
  26. data/vendor/snappy/snappy.cc +0 -1306
  27. data/vendor/snappy/snappy.h +0 -184
  28. data/vendor/snappy/snappy_unittest.cc +0 -1355
  29. data/vendor/snappy/testdata/alice29.txt +0 -3609
  30. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  31. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  32. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  33. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  34. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  35. data/vendor/snappy/testdata/geo.protodata +0 -0
  36. data/vendor/snappy/testdata/html +0 -1
  37. data/vendor/snappy/testdata/html_x_4 +0 -1
  38. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  39. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  40. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  41. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  42. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -1,184 +0,0 @@
1
- // Copyright 2005 and onwards Google Inc.
2
- //
3
- // Redistribution and use in source and binary forms, with or without
4
- // modification, are permitted provided that the following conditions are
5
- // met:
6
- //
7
- // * Redistributions of source code must retain the above copyright
8
- // notice, this list of conditions and the following disclaimer.
9
- // * Redistributions in binary form must reproduce the above
10
- // copyright notice, this list of conditions and the following disclaimer
11
- // in the documentation and/or other materials provided with the
12
- // distribution.
13
- // * Neither the name of Google Inc. nor the names of its
14
- // contributors may be used to endorse or promote products derived from
15
- // this software without specific prior written permission.
16
- //
17
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
- //
29
- // A light-weight compression algorithm. It is designed for speed of
30
- // compression and decompression, rather than for the utmost in space
31
- // savings.
32
- //
33
- // For getting better compression ratios when you are compressing data
34
- // with long repeated sequences or compressing data that is similar to
35
- // other data, while still compressing fast, you might look at first
36
- // using BMDiff and then compressing the output of BMDiff with
37
- // Snappy.
38
-
39
- #ifndef UTIL_SNAPPY_SNAPPY_H__
40
- #define UTIL_SNAPPY_SNAPPY_H__
41
-
42
- #include <stddef.h>
43
- #include <string>
44
-
45
- #include "snappy-stubs-public.h"
46
-
47
- namespace snappy {
48
- class Source;
49
- class Sink;
50
-
51
- // ------------------------------------------------------------------------
52
- // Generic compression/decompression routines.
53
- // ------------------------------------------------------------------------
54
-
55
- // Compress the bytes read from "*source" and append to "*sink". Return the
56
- // number of bytes written.
57
- size_t Compress(Source* source, Sink* sink);
58
-
59
- // Find the uncompressed length of the given stream, as given by the header.
60
- // Note that the true length could deviate from this; the stream could e.g.
61
- // be truncated.
62
- //
63
- // Also note that this leaves "*source" in a state that is unsuitable for
64
- // further operations, such as RawUncompress(). You will need to rewind
65
- // or recreate the source yourself before attempting any further calls.
66
- bool GetUncompressedLength(Source* source, uint32* result);
67
-
68
- // ------------------------------------------------------------------------
69
- // Higher-level string based routines (should be sufficient for most users)
70
- // ------------------------------------------------------------------------
71
-
72
- // Sets "*output" to the compressed version of "input[0,input_length-1]".
73
- // Original contents of *output are lost.
74
- //
75
- // REQUIRES: "input[]" is not an alias of "*output".
76
- size_t Compress(const char* input, size_t input_length, string* output);
77
-
78
- // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
79
- // Original contents of "*uncompressed" are lost.
80
- //
81
- // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
82
- //
83
- // returns false if the message is corrupted and could not be decompressed
84
- bool Uncompress(const char* compressed, size_t compressed_length,
85
- string* uncompressed);
86
-
87
-
88
- // ------------------------------------------------------------------------
89
- // Lower-level character array based routines. May be useful for
90
- // efficiency reasons in certain circumstances.
91
- // ------------------------------------------------------------------------
92
-
93
- // REQUIRES: "compressed" must point to an area of memory that is at
94
- // least "MaxCompressedLength(input_length)" bytes in length.
95
- //
96
- // Takes the data stored in "input[0..input_length]" and stores
97
- // it in the array pointed to by "compressed".
98
- //
99
- // "*compressed_length" is set to the length of the compressed output.
100
- //
101
- // Example:
102
- // char* output = new char[snappy::MaxCompressedLength(input_length)];
103
- // size_t output_length;
104
- // RawCompress(input, input_length, output, &output_length);
105
- // ... Process(output, output_length) ...
106
- // delete [] output;
107
- void RawCompress(const char* input,
108
- size_t input_length,
109
- char* compressed,
110
- size_t* compressed_length);
111
-
112
- // Given data in "compressed[0..compressed_length-1]" generated by
113
- // calling the Snappy::Compress routine, this routine
114
- // stores the uncompressed data to
115
- // uncompressed[0..GetUncompressedLength(compressed)-1]
116
- // returns false if the message is corrupted and could not be decrypted
117
- bool RawUncompress(const char* compressed, size_t compressed_length,
118
- char* uncompressed);
119
-
120
- // Given data from the byte source 'compressed' generated by calling
121
- // the Snappy::Compress routine, this routine stores the uncompressed
122
- // data to
123
- // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
124
- // returns false if the message is corrupted and could not be decrypted
125
- bool RawUncompress(Source* compressed, char* uncompressed);
126
-
127
- // Given data in "compressed[0..compressed_length-1]" generated by
128
- // calling the Snappy::Compress routine, this routine
129
- // stores the uncompressed data to the iovec "iov". The number of physical
130
- // buffers in "iov" is given by iov_cnt and their cumulative size
131
- // must be at least GetUncompressedLength(compressed). The individual buffers
132
- // in "iov" must not overlap with each other.
133
- //
134
- // returns false if the message is corrupted and could not be decrypted
135
- bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
136
- const struct iovec* iov, size_t iov_cnt);
137
-
138
- // Given data from the byte source 'compressed' generated by calling
139
- // the Snappy::Compress routine, this routine stores the uncompressed
140
- // data to the iovec "iov". The number of physical
141
- // buffers in "iov" is given by iov_cnt and their cumulative size
142
- // must be at least GetUncompressedLength(compressed). The individual buffers
143
- // in "iov" must not overlap with each other.
144
- //
145
- // returns false if the message is corrupted and could not be decrypted
146
- bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
147
- size_t iov_cnt);
148
-
149
- // Returns the maximal size of the compressed representation of
150
- // input data that is "source_bytes" bytes in length;
151
- size_t MaxCompressedLength(size_t source_bytes);
152
-
153
- // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
154
- // Returns true and stores the length of the uncompressed data in
155
- // *result normally. Returns false on parsing error.
156
- // This operation takes O(1) time.
157
- bool GetUncompressedLength(const char* compressed, size_t compressed_length,
158
- size_t* result);
159
-
160
- // Returns true iff the contents of "compressed[]" can be uncompressed
161
- // successfully. Does not return the uncompressed data. Takes
162
- // time proportional to compressed_length, but is usually at least
163
- // a factor of four faster than actual decompression.
164
- bool IsValidCompressedBuffer(const char* compressed,
165
- size_t compressed_length);
166
-
167
- // The size of a compression block. Note that many parts of the compression
168
- // code assumes that kBlockSize <= 65536; in particular, the hash table
169
- // can only store 16-bit offsets, and EmitCopy() also assumes the offset
170
- // is 65535 bytes or less. Note also that if you change this, it will
171
- // affect the framing format (see framing_format.txt).
172
- //
173
- // Note that there might be older data around that is compressed with larger
174
- // block sizes, so the decompression code should not rely on the
175
- // non-existence of long backreferences.
176
- static const int kBlockLog = 16;
177
- static const size_t kBlockSize = 1 << kBlockLog;
178
-
179
- static const int kMaxHashTableBits = 14;
180
- static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
181
- } // end namespace snappy
182
-
183
-
184
- #endif // UTIL_SNAPPY_SNAPPY_H__
@@ -1,1355 +0,0 @@
1
- // Copyright 2005 and onwards Google Inc.
2
- //
3
- // Redistribution and use in source and binary forms, with or without
4
- // modification, are permitted provided that the following conditions are
5
- // met:
6
- //
7
- // * Redistributions of source code must retain the above copyright
8
- // notice, this list of conditions and the following disclaimer.
9
- // * Redistributions in binary form must reproduce the above
10
- // copyright notice, this list of conditions and the following disclaimer
11
- // in the documentation and/or other materials provided with the
12
- // distribution.
13
- // * Neither the name of Google Inc. nor the names of its
14
- // contributors may be used to endorse or promote products derived from
15
- // this software without specific prior written permission.
16
- //
17
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
-
29
- #include <math.h>
30
- #include <stdlib.h>
31
-
32
-
33
- #include <algorithm>
34
- #include <string>
35
- #include <vector>
36
-
37
- #include "snappy.h"
38
- #include "snappy-internal.h"
39
- #include "snappy-test.h"
40
- #include "snappy-sinksource.h"
41
-
42
- DEFINE_int32(start_len, -1,
43
- "Starting prefix size for testing (-1: just full file contents)");
44
- DEFINE_int32(end_len, -1,
45
- "Starting prefix size for testing (-1: just full file contents)");
46
- DEFINE_int32(bytes, 10485760,
47
- "How many bytes to compress/uncompress per file for timing");
48
-
49
- DEFINE_bool(zlib, false,
50
- "Run zlib compression (http://www.zlib.net)");
51
- DEFINE_bool(lzo, false,
52
- "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53
- DEFINE_bool(quicklz, false,
54
- "Run quickLZ compression (http://www.quicklz.com/)");
55
- DEFINE_bool(liblzf, false,
56
- "Run libLZF compression "
57
- "(http://www.goof.com/pcg/marc/liblzf.html)");
58
- DEFINE_bool(fastlz, false,
59
- "Run FastLZ compression (http://www.fastlz.org/");
60
- DEFINE_bool(snappy, true, "Run snappy compression");
61
-
62
-
63
- DEFINE_bool(write_compressed, false,
64
- "Write compressed versions of each file to <file>.comp");
65
- DEFINE_bool(write_uncompressed, false,
66
- "Write uncompressed versions of each file to <file>.uncomp");
67
-
68
- namespace snappy {
69
-
70
-
71
- #ifdef HAVE_FUNC_MMAP
72
-
73
- // To test against code that reads beyond its input, this class copies a
74
- // string to a newly allocated group of pages, the last of which
75
- // is made unreadable via mprotect. Note that we need to allocate the
76
- // memory with mmap(), as POSIX allows mprotect() only on memory allocated
77
- // with mmap(), and some malloc/posix_memalign implementations expect to
78
- // be able to read previously allocated memory while doing heap allocations.
79
- class DataEndingAtUnreadablePage {
80
- public:
81
- explicit DataEndingAtUnreadablePage(const string& s) {
82
- const size_t page_size = getpagesize();
83
- const size_t size = s.size();
84
- // Round up space for string to a multiple of page_size.
85
- size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
86
- alloc_size_ = space_for_string + page_size;
87
- mem_ = mmap(NULL, alloc_size_,
88
- PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
89
- CHECK_NE(MAP_FAILED, mem_);
90
- protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
91
- char* dst = protected_page_ - size;
92
- memcpy(dst, s.data(), size);
93
- data_ = dst;
94
- size_ = size;
95
- // Make guard page unreadable.
96
- CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
97
- }
98
-
99
- ~DataEndingAtUnreadablePage() {
100
- // Undo the mprotect.
101
- CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
102
- CHECK_EQ(0, munmap(mem_, alloc_size_));
103
- }
104
-
105
- const char* data() const { return data_; }
106
- size_t size() const { return size_; }
107
-
108
- private:
109
- size_t alloc_size_;
110
- void* mem_;
111
- char* protected_page_;
112
- const char* data_;
113
- size_t size_;
114
- };
115
-
116
- #else // HAVE_FUNC_MMAP
117
-
118
- // Fallback for systems without mmap.
119
- typedef string DataEndingAtUnreadablePage;
120
-
121
- #endif
122
-
123
- enum CompressorType {
124
- ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
125
- };
126
-
127
- const char* names[] = {
128
- "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
129
- };
130
-
131
- static size_t MinimumRequiredOutputSpace(size_t input_size,
132
- CompressorType comp) {
133
- switch (comp) {
134
- #ifdef ZLIB_VERSION
135
- case ZLIB:
136
- return ZLib::MinCompressbufSize(input_size);
137
- #endif // ZLIB_VERSION
138
-
139
- #ifdef LZO_VERSION
140
- case LZO:
141
- return input_size + input_size/64 + 16 + 3;
142
- #endif // LZO_VERSION
143
-
144
- #ifdef LZF_VERSION
145
- case LIBLZF:
146
- return input_size;
147
- #endif // LZF_VERSION
148
-
149
- #ifdef QLZ_VERSION_MAJOR
150
- case QUICKLZ:
151
- return input_size + 36000; // 36000 is used for scratch.
152
- #endif // QLZ_VERSION_MAJOR
153
-
154
- #ifdef FASTLZ_VERSION
155
- case FASTLZ:
156
- return max(static_cast<int>(ceil(input_size * 1.05)), 66);
157
- #endif // FASTLZ_VERSION
158
-
159
- case SNAPPY:
160
- return snappy::MaxCompressedLength(input_size);
161
-
162
- default:
163
- LOG(FATAL) << "Unknown compression type number " << comp;
164
- }
165
- }
166
-
167
- // Returns true if we successfully compressed, false otherwise.
168
- //
169
- // If compressed_is_preallocated is set, do not resize the compressed buffer.
170
- // This is typically what you want for a benchmark, in order to not spend
171
- // time in the memory allocator. If you do set this flag, however,
172
- // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
173
- // number of bytes, and may contain junk bytes at the end after return.
174
- static bool Compress(const char* input, size_t input_size, CompressorType comp,
175
- string* compressed, bool compressed_is_preallocated) {
176
- if (!compressed_is_preallocated) {
177
- compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
178
- }
179
-
180
- switch (comp) {
181
- #ifdef ZLIB_VERSION
182
- case ZLIB: {
183
- ZLib zlib;
184
- uLongf destlen = compressed->size();
185
- int ret = zlib.Compress(
186
- reinterpret_cast<Bytef*>(string_as_array(compressed)),
187
- &destlen,
188
- reinterpret_cast<const Bytef*>(input),
189
- input_size);
190
- CHECK_EQ(Z_OK, ret);
191
- if (!compressed_is_preallocated) {
192
- compressed->resize(destlen);
193
- }
194
- return true;
195
- }
196
- #endif // ZLIB_VERSION
197
-
198
- #ifdef LZO_VERSION
199
- case LZO: {
200
- unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
201
- lzo_uint destlen;
202
- int ret = lzo1x_1_15_compress(
203
- reinterpret_cast<const uint8*>(input),
204
- input_size,
205
- reinterpret_cast<uint8*>(string_as_array(compressed)),
206
- &destlen,
207
- mem);
208
- CHECK_EQ(LZO_E_OK, ret);
209
- delete[] mem;
210
- if (!compressed_is_preallocated) {
211
- compressed->resize(destlen);
212
- }
213
- break;
214
- }
215
- #endif // LZO_VERSION
216
-
217
- #ifdef LZF_VERSION
218
- case LIBLZF: {
219
- int destlen = lzf_compress(input,
220
- input_size,
221
- string_as_array(compressed),
222
- input_size);
223
- if (destlen == 0) {
224
- // lzf *can* cause lots of blowup when compressing, so they
225
- // recommend to limit outsize to insize, and just not compress
226
- // if it's bigger. Ideally, we'd just swap input and output.
227
- compressed->assign(input, input_size);
228
- destlen = input_size;
229
- }
230
- if (!compressed_is_preallocated) {
231
- compressed->resize(destlen);
232
- }
233
- break;
234
- }
235
- #endif // LZF_VERSION
236
-
237
- #ifdef QLZ_VERSION_MAJOR
238
- case QUICKLZ: {
239
- qlz_state_compress *state_compress = new qlz_state_compress;
240
- int destlen = qlz_compress(input,
241
- string_as_array(compressed),
242
- input_size,
243
- state_compress);
244
- delete state_compress;
245
- CHECK_NE(0, destlen);
246
- if (!compressed_is_preallocated) {
247
- compressed->resize(destlen);
248
- }
249
- break;
250
- }
251
- #endif // QLZ_VERSION_MAJOR
252
-
253
- #ifdef FASTLZ_VERSION
254
- case FASTLZ: {
255
- // Use level 1 compression since we mostly care about speed.
256
- int destlen = fastlz_compress_level(
257
- 1,
258
- input,
259
- input_size,
260
- string_as_array(compressed));
261
- if (!compressed_is_preallocated) {
262
- compressed->resize(destlen);
263
- }
264
- CHECK_NE(destlen, 0);
265
- break;
266
- }
267
- #endif // FASTLZ_VERSION
268
-
269
- case SNAPPY: {
270
- size_t destlen;
271
- snappy::RawCompress(input, input_size,
272
- string_as_array(compressed),
273
- &destlen);
274
- CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
275
- if (!compressed_is_preallocated) {
276
- compressed->resize(destlen);
277
- }
278
- break;
279
- }
280
-
281
-
282
- default: {
283
- return false; // the asked-for library wasn't compiled in
284
- }
285
- }
286
- return true;
287
- }
288
-
289
- static bool Uncompress(const string& compressed, CompressorType comp,
290
- int size, string* output) {
291
- switch (comp) {
292
- #ifdef ZLIB_VERSION
293
- case ZLIB: {
294
- output->resize(size);
295
- ZLib zlib;
296
- uLongf destlen = output->size();
297
- int ret = zlib.Uncompress(
298
- reinterpret_cast<Bytef*>(string_as_array(output)),
299
- &destlen,
300
- reinterpret_cast<const Bytef*>(compressed.data()),
301
- compressed.size());
302
- CHECK_EQ(Z_OK, ret);
303
- CHECK_EQ(static_cast<uLongf>(size), destlen);
304
- break;
305
- }
306
- #endif // ZLIB_VERSION
307
-
308
- #ifdef LZO_VERSION
309
- case LZO: {
310
- output->resize(size);
311
- lzo_uint destlen;
312
- int ret = lzo1x_decompress(
313
- reinterpret_cast<const uint8*>(compressed.data()),
314
- compressed.size(),
315
- reinterpret_cast<uint8*>(string_as_array(output)),
316
- &destlen,
317
- NULL);
318
- CHECK_EQ(LZO_E_OK, ret);
319
- CHECK_EQ(static_cast<lzo_uint>(size), destlen);
320
- break;
321
- }
322
- #endif // LZO_VERSION
323
-
324
- #ifdef LZF_VERSION
325
- case LIBLZF: {
326
- output->resize(size);
327
- int destlen = lzf_decompress(compressed.data(),
328
- compressed.size(),
329
- string_as_array(output),
330
- output->size());
331
- if (destlen == 0) {
332
- // This error probably means we had decided not to compress,
333
- // and thus have stored input in output directly.
334
- output->assign(compressed.data(), compressed.size());
335
- destlen = compressed.size();
336
- }
337
- CHECK_EQ(destlen, size);
338
- break;
339
- }
340
- #endif // LZF_VERSION
341
-
342
- #ifdef QLZ_VERSION_MAJOR
343
- case QUICKLZ: {
344
- output->resize(size);
345
- qlz_state_decompress *state_decompress = new qlz_state_decompress;
346
- int destlen = qlz_decompress(compressed.data(),
347
- string_as_array(output),
348
- state_decompress);
349
- delete state_decompress;
350
- CHECK_EQ(destlen, size);
351
- break;
352
- }
353
- #endif // QLZ_VERSION_MAJOR
354
-
355
- #ifdef FASTLZ_VERSION
356
- case FASTLZ: {
357
- output->resize(size);
358
- int destlen = fastlz_decompress(compressed.data(),
359
- compressed.length(),
360
- string_as_array(output),
361
- size);
362
- CHECK_EQ(destlen, size);
363
- break;
364
- }
365
- #endif // FASTLZ_VERSION
366
-
367
- case SNAPPY: {
368
- snappy::RawUncompress(compressed.data(), compressed.size(),
369
- string_as_array(output));
370
- break;
371
- }
372
-
373
-
374
- default: {
375
- return false; // the asked-for library wasn't compiled in
376
- }
377
- }
378
- return true;
379
- }
380
-
381
- static void Measure(const char* data,
382
- size_t length,
383
- CompressorType comp,
384
- int repeats,
385
- int block_size) {
386
- // Run tests a few time and pick median running times
387
- static const int kRuns = 5;
388
- double ctime[kRuns];
389
- double utime[kRuns];
390
- int compressed_size = 0;
391
-
392
- {
393
- // Chop the input into blocks
394
- int num_blocks = (length + block_size - 1) / block_size;
395
- vector<const char*> input(num_blocks);
396
- vector<size_t> input_length(num_blocks);
397
- vector<string> compressed(num_blocks);
398
- vector<string> output(num_blocks);
399
- for (int b = 0; b < num_blocks; b++) {
400
- int input_start = b * block_size;
401
- int input_limit = min<int>((b+1)*block_size, length);
402
- input[b] = data+input_start;
403
- input_length[b] = input_limit-input_start;
404
-
405
- // Pre-grow the output buffer so we don't measure string append time.
406
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
407
- }
408
-
409
- // First, try one trial compression to make sure the code is compiled in
410
- if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
411
- LOG(WARNING) << "Skipping " << names[comp] << ": "
412
- << "library not compiled in";
413
- return;
414
- }
415
-
416
- for (int run = 0; run < kRuns; run++) {
417
- CycleTimer ctimer, utimer;
418
-
419
- for (int b = 0; b < num_blocks; b++) {
420
- // Pre-grow the output buffer so we don't measure string append time.
421
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
422
- }
423
-
424
- ctimer.Start();
425
- for (int b = 0; b < num_blocks; b++)
426
- for (int i = 0; i < repeats; i++)
427
- Compress(input[b], input_length[b], comp, &compressed[b], true);
428
- ctimer.Stop();
429
-
430
- // Compress once more, with resizing, so we don't leave junk
431
- // at the end that will confuse the decompressor.
432
- for (int b = 0; b < num_blocks; b++) {
433
- Compress(input[b], input_length[b], comp, &compressed[b], false);
434
- }
435
-
436
- for (int b = 0; b < num_blocks; b++) {
437
- output[b].resize(input_length[b]);
438
- }
439
-
440
- utimer.Start();
441
- for (int i = 0; i < repeats; i++)
442
- for (int b = 0; b < num_blocks; b++)
443
- Uncompress(compressed[b], comp, input_length[b], &output[b]);
444
- utimer.Stop();
445
-
446
- ctime[run] = ctimer.Get();
447
- utime[run] = utimer.Get();
448
- }
449
-
450
- compressed_size = 0;
451
- for (int i = 0; i < compressed.size(); i++) {
452
- compressed_size += compressed[i].size();
453
- }
454
- }
455
-
456
- sort(ctime, ctime + kRuns);
457
- sort(utime, utime + kRuns);
458
- const int med = kRuns/2;
459
-
460
- float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
461
- float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
462
- string x = names[comp];
463
- x += ":";
464
- string urate = (uncomp_rate >= 0)
465
- ? StringPrintf("%.1f", uncomp_rate)
466
- : string("?");
467
- printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
468
- "comp %5.1f MB/s uncomp %5s MB/s\n",
469
- x.c_str(),
470
- block_size/(1<<20),
471
- static_cast<int>(length), static_cast<uint32>(compressed_size),
472
- (compressed_size * 100.0) / max<int>(1, length),
473
- comp_rate,
474
- urate.c_str());
475
- }
476
-
477
-
478
- static int VerifyString(const string& input) {
479
- string compressed;
480
- DataEndingAtUnreadablePage i(input);
481
- const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
482
- CHECK_EQ(written, compressed.size());
483
- CHECK_LE(compressed.size(),
484
- snappy::MaxCompressedLength(input.size()));
485
- CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
486
-
487
- string uncompressed;
488
- DataEndingAtUnreadablePage c(compressed);
489
- CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
490
- CHECK_EQ(uncompressed, input);
491
- return uncompressed.size();
492
- }
493
-
494
-
495
- static void VerifyIOVec(const string& input) {
496
- string compressed;
497
- DataEndingAtUnreadablePage i(input);
498
- const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
499
- CHECK_EQ(written, compressed.size());
500
- CHECK_LE(compressed.size(),
501
- snappy::MaxCompressedLength(input.size()));
502
- CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
503
-
504
- // Try uncompressing into an iovec containing a random number of entries
505
- // ranging from 1 to 10.
506
- char* buf = new char[input.size()];
507
- ACMRandom rnd(input.size());
508
- int num = rnd.Next() % 10 + 1;
509
- if (input.size() < num) {
510
- num = input.size();
511
- }
512
- struct iovec* iov = new iovec[num];
513
- int used_so_far = 0;
514
- for (int i = 0; i < num; ++i) {
515
- iov[i].iov_base = buf + used_so_far;
516
- if (i == num - 1) {
517
- iov[i].iov_len = input.size() - used_so_far;
518
- } else {
519
- // Randomly choose to insert a 0 byte entry.
520
- if (rnd.OneIn(5)) {
521
- iov[i].iov_len = 0;
522
- } else {
523
- iov[i].iov_len = rnd.Uniform(input.size());
524
- }
525
- }
526
- used_so_far += iov[i].iov_len;
527
- }
528
- CHECK(snappy::RawUncompressToIOVec(
529
- compressed.data(), compressed.size(), iov, num));
530
- CHECK(!memcmp(buf, input.data(), input.size()));
531
- delete[] iov;
532
- delete[] buf;
533
- }
534
-
535
- // Test that data compressed by a compressor that does not
536
- // obey block sizes is uncompressed properly.
537
- static void VerifyNonBlockedCompression(const string& input) {
538
- if (input.length() > snappy::kBlockSize) {
539
- // We cannot test larger blocks than the maximum block size, obviously.
540
- return;
541
- }
542
-
543
- string prefix;
544
- Varint::Append32(&prefix, input.size());
545
-
546
- // Setup compression table
547
- snappy::internal::WorkingMemory wmem;
548
- int table_size;
549
- uint16* table = wmem.GetHashTable(input.size(), &table_size);
550
-
551
- // Compress entire input in one shot
552
- string compressed;
553
- compressed += prefix;
554
- compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
555
- char* dest = string_as_array(&compressed) + prefix.size();
556
- char* end = snappy::internal::CompressFragment(input.data(), input.size(),
557
- dest, table, table_size);
558
- compressed.resize(end - compressed.data());
559
-
560
- // Uncompress into string
561
- string uncomp_str;
562
- CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
563
- CHECK_EQ(uncomp_str, input);
564
-
565
- }
566
-
567
- // Expand the input so that it is at least K times as big as block size
568
- static string Expand(const string& input) {
569
- static const int K = 3;
570
- string data = input;
571
- while (data.size() < K * snappy::kBlockSize) {
572
- data += input;
573
- }
574
- return data;
575
- }
576
-
577
- static int Verify(const string& input) {
578
- VLOG(1) << "Verifying input of size " << input.size();
579
-
580
- // Compress using string based routines
581
- const int result = VerifyString(input);
582
-
583
-
584
- VerifyNonBlockedCompression(input);
585
- VerifyIOVec(input);
586
- if (!input.empty()) {
587
- const string expanded = Expand(input);
588
- VerifyNonBlockedCompression(expanded);
589
- VerifyIOVec(input);
590
- }
591
-
592
-
593
- return result;
594
- }
595
-
596
- // This test checks to ensure that snappy doesn't coredump if it gets
597
- // corrupted data.
598
-
599
- static bool IsValidCompressedBuffer(const string& c) {
600
- return snappy::IsValidCompressedBuffer(c.data(), c.size());
601
- }
602
- static bool Uncompress(const string& c, string* u) {
603
- return snappy::Uncompress(c.data(), c.size(), u);
604
- }
605
-
606
- TYPED_TEST(CorruptedTest, VerifyCorrupted) {
607
- string source = "making sure we don't crash with corrupted input";
608
- VLOG(1) << source;
609
- string dest;
610
- TypeParam uncmp;
611
- snappy::Compress(source.data(), source.size(), &dest);
612
-
613
- // Mess around with the data. It's hard to simulate all possible
614
- // corruptions; this is just one example ...
615
- CHECK_GT(dest.size(), 3);
616
- dest[1]--;
617
- dest[3]++;
618
- // this really ought to fail.
619
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
620
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
621
-
622
- // This is testing for a security bug - a buffer that decompresses to 100k
623
- // but we lie in the snappy header and only reserve 0 bytes of memory :)
624
- source.resize(100000);
625
- for (int i = 0; i < source.length(); ++i) {
626
- source[i] = 'A';
627
- }
628
- snappy::Compress(source.data(), source.size(), &dest);
629
- dest[0] = dest[1] = dest[2] = dest[3] = 0;
630
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
631
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
632
-
633
- if (sizeof(void *) == 4) {
634
- // Another security check; check a crazy big length can't DoS us with an
635
- // over-allocation.
636
- // Currently this is done only for 32-bit builds. On 64-bit builds,
637
- // where 3 GB might be an acceptable allocation size, Uncompress()
638
- // attempts to decompress, and sometimes causes the test to run out of
639
- // memory.
640
- dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
641
- // This decodes to a really large size, i.e., about 3 GB.
642
- dest[4] = 'k';
643
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
644
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
645
- } else {
646
- LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
647
- }
648
-
649
- // This decodes to about 2 MB; much smaller, but should still fail.
650
- dest[0] = dest[1] = dest[2] = 0xff;
651
- dest[3] = 0x00;
652
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
653
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
654
-
655
- // try reading stuff in from a bad file.
656
- for (int i = 1; i <= 3; ++i) {
657
- string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
658
- 0);
659
- string uncmp;
660
- // check that we don't return a crazy length
661
- size_t ulen;
662
- CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
663
- || (ulen < (1<<20)));
664
- uint32 ulen2;
665
- snappy::ByteArraySource source(data.data(), data.size());
666
- CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
667
- (ulen2 < (1<<20)));
668
- CHECK(!IsValidCompressedBuffer(TypeParam(data)));
669
- CHECK(!Uncompress(TypeParam(data), &uncmp));
670
- }
671
- }
672
-
673
- // Helper routines to construct arbitrary compressed strings.
674
- // These mirror the compression code in snappy.cc, but are copied
675
- // here so that we can bypass some limitations in the how snappy.cc
676
- // invokes these routines.
677
- static void AppendLiteral(string* dst, const string& literal) {
678
- if (literal.empty()) return;
679
- int n = literal.size() - 1;
680
- if (n < 60) {
681
- // Fit length in tag byte
682
- dst->push_back(0 | (n << 2));
683
- } else {
684
- // Encode in upcoming bytes
685
- char number[4];
686
- int count = 0;
687
- while (n > 0) {
688
- number[count++] = n & 0xff;
689
- n >>= 8;
690
- }
691
- dst->push_back(0 | ((59+count) << 2));
692
- *dst += string(number, count);
693
- }
694
- *dst += literal;
695
- }
696
-
697
- static void AppendCopy(string* dst, int offset, int length) {
698
- while (length > 0) {
699
- // Figure out how much to copy in one shot
700
- int to_copy;
701
- if (length >= 68) {
702
- to_copy = 64;
703
- } else if (length > 64) {
704
- to_copy = 60;
705
- } else {
706
- to_copy = length;
707
- }
708
- length -= to_copy;
709
-
710
- if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
711
- assert(to_copy-4 < 8); // Must fit in 3 bits
712
- dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
713
- dst->push_back(offset & 0xff);
714
- } else if (offset < 65536) {
715
- dst->push_back(2 | ((to_copy-1) << 2));
716
- dst->push_back(offset & 0xff);
717
- dst->push_back(offset >> 8);
718
- } else {
719
- dst->push_back(3 | ((to_copy-1) << 2));
720
- dst->push_back(offset & 0xff);
721
- dst->push_back((offset >> 8) & 0xff);
722
- dst->push_back((offset >> 16) & 0xff);
723
- dst->push_back((offset >> 24) & 0xff);
724
- }
725
- }
726
- }
727
-
728
- TEST(Snappy, SimpleTests) {
729
- Verify("");
730
- Verify("a");
731
- Verify("ab");
732
- Verify("abc");
733
-
734
- Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
735
- Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
736
- Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
737
- Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
738
- Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
739
- }
740
-
741
- // Verify max blowup (lots of four-byte copies)
742
- TEST(Snappy, MaxBlowup) {
743
- string input;
744
- for (int i = 0; i < 20000; i++) {
745
- ACMRandom rnd(i);
746
- uint32 bytes = static_cast<uint32>(rnd.Next());
747
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
748
- }
749
- for (int i = 19999; i >= 0; i--) {
750
- ACMRandom rnd(i);
751
- uint32 bytes = static_cast<uint32>(rnd.Next());
752
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
753
- }
754
- Verify(input);
755
- }
756
-
757
- TEST(Snappy, RandomData) {
758
- ACMRandom rnd(FLAGS_test_random_seed);
759
-
760
- const int num_ops = 20000;
761
- for (int i = 0; i < num_ops; i++) {
762
- if ((i % 1000) == 0) {
763
- VLOG(0) << "Random op " << i << " of " << num_ops;
764
- }
765
-
766
- string x;
767
- int len = rnd.Uniform(4096);
768
- if (i < 100) {
769
- len = 65536 + rnd.Uniform(65536);
770
- }
771
- while (x.size() < len) {
772
- int run_len = 1;
773
- if (rnd.OneIn(10)) {
774
- run_len = rnd.Skewed(8);
775
- }
776
- char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
777
- while (run_len-- > 0 && x.size() < len) {
778
- x += c;
779
- }
780
- }
781
-
782
- Verify(x);
783
- }
784
- }
785
-
786
- TEST(Snappy, FourByteOffset) {
787
- // The new compressor cannot generate four-byte offsets since
788
- // it chops up the input into 32KB pieces. So we hand-emit the
789
- // copy manually.
790
-
791
- // The two fragments that make up the input string.
792
- string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
793
- string fragment2 = "some other string";
794
-
795
- // How many times each fragment is emitted.
796
- const int n1 = 2;
797
- const int n2 = 100000 / fragment2.size();
798
- const int length = n1 * fragment1.size() + n2 * fragment2.size();
799
-
800
- string compressed;
801
- Varint::Append32(&compressed, length);
802
-
803
- AppendLiteral(&compressed, fragment1);
804
- string src = fragment1;
805
- for (int i = 0; i < n2; i++) {
806
- AppendLiteral(&compressed, fragment2);
807
- src += fragment2;
808
- }
809
- AppendCopy(&compressed, src.size(), fragment1.size());
810
- src += fragment1;
811
- CHECK_EQ(length, src.size());
812
-
813
- string uncompressed;
814
- CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
815
- CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
816
- &uncompressed));
817
- CHECK_EQ(uncompressed, src);
818
- }
819
-
820
- TEST(Snappy, IOVecEdgeCases) {
821
- // Test some tricky edge cases in the iovec output that are not necessarily
822
- // exercised by random tests.
823
-
824
- // Our output blocks look like this initially (the last iovec is bigger
825
- // than depicted):
826
- // [ ] [ ] [ ] [ ] [ ]
827
- static const int kLengths[] = { 2, 1, 4, 8, 128 };
828
-
829
- struct iovec iov[ARRAYSIZE(kLengths)];
830
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
831
- iov[i].iov_base = new char[kLengths[i]];
832
- iov[i].iov_len = kLengths[i];
833
- }
834
-
835
- string compressed;
836
- Varint::Append32(&compressed, 22);
837
-
838
- // A literal whose output crosses three blocks.
839
- // [ab] [c] [123 ] [ ] [ ]
840
- AppendLiteral(&compressed, "abc123");
841
-
842
- // A copy whose output crosses two blocks (source and destination
843
- // segments marked).
844
- // [ab] [c] [1231] [23 ] [ ]
845
- // ^--^ --
846
- AppendCopy(&compressed, 3, 3);
847
-
848
- // A copy where the input is, at first, in the block before the output:
849
- //
850
- // [ab] [c] [1231] [231231 ] [ ]
851
- // ^--- ^---
852
- // Then during the copy, the pointers move such that the input and
853
- // output pointers are in the same block:
854
- //
855
- // [ab] [c] [1231] [23123123] [ ]
856
- // ^- ^-
857
- // And then they move again, so that the output pointer is no longer
858
- // in the same block as the input pointer:
859
- // [ab] [c] [1231] [23123123] [123 ]
860
- // ^-- ^--
861
- AppendCopy(&compressed, 6, 9);
862
-
863
- // Finally, a copy where the input is from several blocks back,
864
- // and it also crosses three blocks:
865
- //
866
- // [ab] [c] [1231] [23123123] [123b ]
867
- // ^ ^
868
- // [ab] [c] [1231] [23123123] [123bc ]
869
- // ^ ^
870
- // [ab] [c] [1231] [23123123] [123bc12 ]
871
- // ^- ^-
872
- AppendCopy(&compressed, 17, 4);
873
-
874
- CHECK(snappy::RawUncompressToIOVec(
875
- compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
876
- CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
877
- CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
878
- CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
879
- CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
880
- CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
881
-
882
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
883
- delete[] reinterpret_cast<char *>(iov[i].iov_base);
884
- }
885
- }
886
-
887
- TEST(Snappy, IOVecLiteralOverflow) {
888
- static const int kLengths[] = { 3, 4 };
889
-
890
- struct iovec iov[ARRAYSIZE(kLengths)];
891
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
892
- iov[i].iov_base = new char[kLengths[i]];
893
- iov[i].iov_len = kLengths[i];
894
- }
895
-
896
- string compressed;
897
- Varint::Append32(&compressed, 8);
898
-
899
- AppendLiteral(&compressed, "12345678");
900
-
901
- CHECK(!snappy::RawUncompressToIOVec(
902
- compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
903
-
904
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
905
- delete[] reinterpret_cast<char *>(iov[i].iov_base);
906
- }
907
- }
908
-
909
- TEST(Snappy, IOVecCopyOverflow) {
910
- static const int kLengths[] = { 3, 4 };
911
-
912
- struct iovec iov[ARRAYSIZE(kLengths)];
913
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
914
- iov[i].iov_base = new char[kLengths[i]];
915
- iov[i].iov_len = kLengths[i];
916
- }
917
-
918
- string compressed;
919
- Varint::Append32(&compressed, 8);
920
-
921
- AppendLiteral(&compressed, "123");
922
- AppendCopy(&compressed, 3, 5);
923
-
924
- CHECK(!snappy::RawUncompressToIOVec(
925
- compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
926
-
927
- for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
928
- delete[] reinterpret_cast<char *>(iov[i].iov_base);
929
- }
930
- }
931
-
932
-
933
- static bool CheckUncompressedLength(const string& compressed,
934
- size_t* ulength) {
935
- const bool result1 = snappy::GetUncompressedLength(compressed.data(),
936
- compressed.size(),
937
- ulength);
938
-
939
- snappy::ByteArraySource source(compressed.data(), compressed.size());
940
- uint32 length;
941
- const bool result2 = snappy::GetUncompressedLength(&source, &length);
942
- CHECK_EQ(result1, result2);
943
- return result1;
944
- }
945
-
946
- TEST(SnappyCorruption, TruncatedVarint) {
947
- string compressed, uncompressed;
948
- size_t ulength;
949
- compressed.push_back('\xf0');
950
- CHECK(!CheckUncompressedLength(compressed, &ulength));
951
- CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
952
- CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
953
- &uncompressed));
954
- }
955
-
956
- TEST(SnappyCorruption, UnterminatedVarint) {
957
- string compressed, uncompressed;
958
- size_t ulength;
959
- compressed.push_back(128);
960
- compressed.push_back(128);
961
- compressed.push_back(128);
962
- compressed.push_back(128);
963
- compressed.push_back(128);
964
- compressed.push_back(10);
965
- CHECK(!CheckUncompressedLength(compressed, &ulength));
966
- CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
967
- CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
968
- &uncompressed));
969
- }
970
-
971
- TEST(Snappy, ReadPastEndOfBuffer) {
972
- // Check that we do not read past end of input
973
-
974
- // Make a compressed string that ends with a single-byte literal
975
- string compressed;
976
- Varint::Append32(&compressed, 1);
977
- AppendLiteral(&compressed, "x");
978
-
979
- string uncompressed;
980
- DataEndingAtUnreadablePage c(compressed);
981
- CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
982
- CHECK_EQ(uncompressed, string("x"));
983
- }
984
-
985
- // Check for an infinite loop caused by a copy with offset==0
986
- TEST(Snappy, ZeroOffsetCopy) {
987
- const char* compressed = "\x40\x12\x00\x00";
988
- // \x40 Length (must be > kMaxIncrementCopyOverflow)
989
- // \x12\x00\x00 Copy with offset==0, length==5
990
- char uncompressed[100];
991
- EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
992
- }
993
-
994
- TEST(Snappy, ZeroOffsetCopyValidation) {
995
- const char* compressed = "\x05\x12\x00\x00";
996
- // \x05 Length
997
- // \x12\x00\x00 Copy with offset==0, length==5
998
- EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
999
- }
1000
-
1001
-
1002
- namespace {
1003
-
1004
- int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
1005
- return snappy::internal::FindMatchLength(s1, s2, s2 + length);
1006
- }
1007
-
1008
- } // namespace
1009
-
1010
- TEST(Snappy, FindMatchLength) {
1011
- // Exercise all different code paths through the function.
1012
- // 64-bit version:
1013
-
1014
- // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
1015
- EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
1016
- EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
1017
-
1018
- // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
1019
- EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
1020
-
1021
- // Same, but edge cases.
1022
- EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
1023
- EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
1024
-
1025
- // Find non-match at once in first loop.
1026
- EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
1027
- EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
1028
- EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
1029
- EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
1030
-
1031
- // Find non-match in first loop after one block.
1032
- EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1033
- "abcdefgh?1234567xxxxxxxx", 24));
1034
- EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1035
- "abcdefgh0?234567xxxxxxxx", 24));
1036
- EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1037
- "abcdefgh01237654xxxxxxxx", 24));
1038
- EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1039
- "abcdefgh0123456?xxxxxxxx", 24));
1040
-
1041
- // 32-bit version:
1042
-
1043
- // Short matches.
1044
- EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
1045
- EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
1046
- EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
1047
- EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
1048
- EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
1049
- EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
1050
- EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
1051
- EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
1052
- EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
1053
- EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
1054
-
1055
- // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
1056
- EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
1057
- EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
1058
- EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
1059
-
1060
- // Same, but edge cases.
1061
- EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1062
- EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1063
-
1064
- // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1065
- EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1066
-
1067
- // Find non-match at once in first loop.
1068
- EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1069
- "xxxxxx?123xxxxxxxx", 18));
1070
- EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1071
- "xxxxxx0?23xxxxxxxx", 18));
1072
- EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1073
- "xxxxxx0132xxxxxxxx", 18));
1074
- EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1075
- "xxxxxx012?xxxxxxxx", 18));
1076
-
1077
- // Same, but edge cases.
1078
- EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1079
- EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1080
- EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1081
- EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1082
-
1083
- // Find non-match in first loop after one block.
1084
- EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1085
- "xxxxxxabcd?123xx", 16));
1086
- EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1087
- "xxxxxxabcd0?23xx", 16));
1088
- EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1089
- "xxxxxxabcd0132xx", 16));
1090
- EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1091
- "xxxxxxabcd012?xx", 16));
1092
-
1093
- // Same, but edge cases.
1094
- EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1095
- EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1096
- EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1097
- EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1098
- }
1099
-
1100
- TEST(Snappy, FindMatchLengthRandom) {
1101
- const int kNumTrials = 10000;
1102
- const int kTypicalLength = 10;
1103
- ACMRandom rnd(FLAGS_test_random_seed);
1104
-
1105
- for (int i = 0; i < kNumTrials; i++) {
1106
- string s, t;
1107
- char a = rnd.Rand8();
1108
- char b = rnd.Rand8();
1109
- while (!rnd.OneIn(kTypicalLength)) {
1110
- s.push_back(rnd.OneIn(2) ? a : b);
1111
- t.push_back(rnd.OneIn(2) ? a : b);
1112
- }
1113
- DataEndingAtUnreadablePage u(s);
1114
- DataEndingAtUnreadablePage v(t);
1115
- int matched = snappy::internal::FindMatchLength(
1116
- u.data(), v.data(), v.data() + t.size());
1117
- if (matched == t.size()) {
1118
- EXPECT_EQ(s, t);
1119
- } else {
1120
- EXPECT_NE(s[matched], t[matched]);
1121
- for (int j = 0; j < matched; j++) {
1122
- EXPECT_EQ(s[j], t[j]);
1123
- }
1124
- }
1125
- }
1126
- }
1127
-
1128
-
1129
- static void CompressFile(const char* fname) {
1130
- string fullinput;
1131
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1132
-
1133
- string compressed;
1134
- Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1135
-
1136
- file::SetContents(string(fname).append(".comp"), compressed, file::Defaults())
1137
- .CheckSuccess();
1138
- }
1139
-
1140
- static void UncompressFile(const char* fname) {
1141
- string fullinput;
1142
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1143
-
1144
- size_t uncompLength;
1145
- CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1146
-
1147
- string uncompressed;
1148
- uncompressed.resize(uncompLength);
1149
- CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1150
-
1151
- file::SetContents(string(fname).append(".uncomp"), uncompressed,
1152
- file::Defaults()).CheckSuccess();
1153
- }
1154
-
1155
- static void MeasureFile(const char* fname) {
1156
- string fullinput;
1157
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1158
- printf("%-40s :\n", fname);
1159
-
1160
- int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1161
- int end_len = fullinput.size();
1162
- if (FLAGS_end_len >= 0) {
1163
- end_len = min<int>(fullinput.size(), FLAGS_end_len);
1164
- }
1165
- for (int len = start_len; len <= end_len; len++) {
1166
- const char* const input = fullinput.data();
1167
- int repeats = (FLAGS_bytes + len) / (len + 1);
1168
- if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1169
- if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1170
- if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1171
- if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1172
- if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1173
- if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1174
-
1175
- // For block-size based measurements
1176
- if (0 && FLAGS_snappy) {
1177
- Measure(input, len, SNAPPY, repeats, 8<<10);
1178
- Measure(input, len, SNAPPY, repeats, 16<<10);
1179
- Measure(input, len, SNAPPY, repeats, 32<<10);
1180
- Measure(input, len, SNAPPY, repeats, 64<<10);
1181
- Measure(input, len, SNAPPY, repeats, 256<<10);
1182
- Measure(input, len, SNAPPY, repeats, 1024<<10);
1183
- }
1184
- }
1185
- }
1186
-
1187
- static struct {
1188
- const char* label;
1189
- const char* filename;
1190
- size_t size_limit;
1191
- } files[] = {
1192
- { "html", "html", 0 },
1193
- { "urls", "urls.10K", 0 },
1194
- { "jpg", "fireworks.jpeg", 0 },
1195
- { "jpg_200", "fireworks.jpeg", 200 },
1196
- { "pdf", "paper-100k.pdf", 0 },
1197
- { "html4", "html_x_4", 0 },
1198
- { "txt1", "alice29.txt", 0 },
1199
- { "txt2", "asyoulik.txt", 0 },
1200
- { "txt3", "lcet10.txt", 0 },
1201
- { "txt4", "plrabn12.txt", 0 },
1202
- { "pb", "geo.protodata", 0 },
1203
- { "gaviota", "kppkn.gtb", 0 },
1204
- };
1205
-
1206
- static void BM_UFlat(int iters, int arg) {
1207
- StopBenchmarkTiming();
1208
-
1209
- // Pick file to process based on "arg"
1210
- CHECK_GE(arg, 0);
1211
- CHECK_LT(arg, ARRAYSIZE(files));
1212
- string contents = ReadTestDataFile(files[arg].filename,
1213
- files[arg].size_limit);
1214
-
1215
- string zcontents;
1216
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1217
- char* dst = new char[contents.size()];
1218
-
1219
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1220
- static_cast<int64>(contents.size()));
1221
- SetBenchmarkLabel(files[arg].label);
1222
- StartBenchmarkTiming();
1223
- while (iters-- > 0) {
1224
- CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1225
- }
1226
- StopBenchmarkTiming();
1227
-
1228
- delete[] dst;
1229
- }
1230
- BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1231
-
1232
- static void BM_UValidate(int iters, int arg) {
1233
- StopBenchmarkTiming();
1234
-
1235
- // Pick file to process based on "arg"
1236
- CHECK_GE(arg, 0);
1237
- CHECK_LT(arg, ARRAYSIZE(files));
1238
- string contents = ReadTestDataFile(files[arg].filename,
1239
- files[arg].size_limit);
1240
-
1241
- string zcontents;
1242
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1243
-
1244
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1245
- static_cast<int64>(contents.size()));
1246
- SetBenchmarkLabel(files[arg].label);
1247
- StartBenchmarkTiming();
1248
- while (iters-- > 0) {
1249
- CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1250
- }
1251
- StopBenchmarkTiming();
1252
- }
1253
- BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1254
-
1255
- static void BM_UIOVec(int iters, int arg) {
1256
- StopBenchmarkTiming();
1257
-
1258
- // Pick file to process based on "arg"
1259
- CHECK_GE(arg, 0);
1260
- CHECK_LT(arg, ARRAYSIZE(files));
1261
- string contents = ReadTestDataFile(files[arg].filename,
1262
- files[arg].size_limit);
1263
-
1264
- string zcontents;
1265
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1266
-
1267
- // Uncompress into an iovec containing ten entries.
1268
- const int kNumEntries = 10;
1269
- struct iovec iov[kNumEntries];
1270
- char *dst = new char[contents.size()];
1271
- int used_so_far = 0;
1272
- for (int i = 0; i < kNumEntries; ++i) {
1273
- iov[i].iov_base = dst + used_so_far;
1274
- if (used_so_far == contents.size()) {
1275
- iov[i].iov_len = 0;
1276
- continue;
1277
- }
1278
-
1279
- if (i == kNumEntries - 1) {
1280
- iov[i].iov_len = contents.size() - used_so_far;
1281
- } else {
1282
- iov[i].iov_len = contents.size() / kNumEntries;
1283
- }
1284
- used_so_far += iov[i].iov_len;
1285
- }
1286
-
1287
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1288
- static_cast<int64>(contents.size()));
1289
- SetBenchmarkLabel(files[arg].label);
1290
- StartBenchmarkTiming();
1291
- while (iters-- > 0) {
1292
- CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1293
- kNumEntries));
1294
- }
1295
- StopBenchmarkTiming();
1296
-
1297
- delete[] dst;
1298
- }
1299
- BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1300
-
1301
-
1302
- static void BM_ZFlat(int iters, int arg) {
1303
- StopBenchmarkTiming();
1304
-
1305
- // Pick file to process based on "arg"
1306
- CHECK_GE(arg, 0);
1307
- CHECK_LT(arg, ARRAYSIZE(files));
1308
- string contents = ReadTestDataFile(files[arg].filename,
1309
- files[arg].size_limit);
1310
-
1311
- char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1312
-
1313
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1314
- static_cast<int64>(contents.size()));
1315
- StartBenchmarkTiming();
1316
-
1317
- size_t zsize = 0;
1318
- while (iters-- > 0) {
1319
- snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1320
- }
1321
- StopBenchmarkTiming();
1322
- const double compression_ratio =
1323
- static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1324
- SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1325
- files[arg].label, 100.0 * compression_ratio));
1326
- VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1327
- files[arg].label, contents.size(), zsize);
1328
- delete[] dst;
1329
- }
1330
- BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1331
-
1332
-
1333
- } // namespace snappy
1334
-
1335
-
1336
- int main(int argc, char** argv) {
1337
- InitGoogle(argv[0], &argc, &argv, true);
1338
- RunSpecifiedBenchmarks();
1339
-
1340
-
1341
- if (argc >= 2) {
1342
- for (int arg = 1; arg < argc; arg++) {
1343
- if (FLAGS_write_compressed) {
1344
- CompressFile(argv[arg]);
1345
- } else if (FLAGS_write_uncompressed) {
1346
- UncompressFile(argv[arg]);
1347
- } else {
1348
- MeasureFile(argv[arg]);
1349
- }
1350
- }
1351
- return 0;
1352
- }
1353
-
1354
- return RUN_ALL_TESTS();
1355
- }