snappy 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +8 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/AUTHORS +1 -0
  21. data/vendor/snappy/CMakeLists.txt +174 -0
  22. data/vendor/snappy/CONTRIBUTING.md +26 -0
  23. data/vendor/snappy/COPYING +54 -0
  24. data/vendor/snappy/NEWS +180 -0
  25. data/vendor/snappy/README.md +149 -0
  26. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  27. data/vendor/snappy/cmake/config.h.in +62 -0
  28. data/vendor/snappy/format_description.txt +110 -0
  29. data/vendor/snappy/framing_format.txt +135 -0
  30. data/vendor/snappy/snappy-c.cc +90 -0
  31. data/vendor/snappy/snappy-c.h +138 -0
  32. data/vendor/snappy/snappy-internal.h +224 -0
  33. data/vendor/snappy/snappy-sinksource.cc +104 -0
  34. data/vendor/snappy/snappy-sinksource.h +182 -0
  35. data/vendor/snappy/snappy-stubs-internal.cc +42 -0
  36. data/vendor/snappy/snappy-stubs-internal.h +561 -0
  37. data/vendor/snappy/snappy-stubs-public.h.in +94 -0
  38. data/vendor/snappy/snappy-test.cc +612 -0
  39. data/vendor/snappy/snappy-test.h +573 -0
  40. data/vendor/snappy/snappy.cc +1515 -0
  41. data/vendor/snappy/snappy.h +203 -0
  42. data/vendor/snappy/snappy_unittest.cc +1410 -0
  43. metadata +38 -46
@@ -0,0 +1,203 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // A light-weight compression algorithm. It is designed for speed of
30
+ // compression and decompression, rather than for the utmost in space
31
+ // savings.
32
+ //
33
+ // For getting better compression ratios when you are compressing data
34
+ // with long repeated sequences or compressing data that is similar to
35
+ // other data, while still compressing fast, you might look at first
36
+ // using BMDiff and then compressing the output of BMDiff with
37
+ // Snappy.
38
+
39
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
40
+ #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
+
42
+ #include <stddef.h>
43
+ #include <string>
44
+
45
+ #include "snappy-stubs-public.h"
46
+
47
+ namespace snappy {
48
+ class Source;
49
+ class Sink;
50
+
51
+ // ------------------------------------------------------------------------
52
+ // Generic compression/decompression routines.
53
+ // ------------------------------------------------------------------------
54
+
55
+ // Compress the bytes read from "*source" and append to "*sink". Return the
56
+ // number of bytes written.
57
+ size_t Compress(Source* source, Sink* sink);
58
+
59
+ // Find the uncompressed length of the given stream, as given by the header.
60
+ // Note that the true length could deviate from this; the stream could e.g.
61
+ // be truncated.
62
+ //
63
+ // Also note that this leaves "*source" in a state that is unsuitable for
64
+ // further operations, such as RawUncompress(). You will need to rewind
65
+ // or recreate the source yourself before attempting any further calls.
66
+ bool GetUncompressedLength(Source* source, uint32* result);
67
+
68
+ // ------------------------------------------------------------------------
69
+ // Higher-level string based routines (should be sufficient for most users)
70
+ // ------------------------------------------------------------------------
71
+
72
+ // Sets "*output" to the compressed version of "input[0,input_length-1]".
73
+ // Original contents of *output are lost.
74
+ //
75
+ // REQUIRES: "input[]" is not an alias of "*output".
76
+ size_t Compress(const char* input, size_t input_length, string* output);
77
+
78
+ // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
79
+ // Original contents of "*uncompressed" are lost.
80
+ //
81
+ // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
82
+ //
83
+ // returns false if the message is corrupted and could not be decompressed
84
+ bool Uncompress(const char* compressed, size_t compressed_length,
85
+ string* uncompressed);
86
+
87
+ // Decompresses "compressed" to "*uncompressed".
88
+ //
89
+ // returns false if the message is corrupted and could not be decompressed
90
+ bool Uncompress(Source* compressed, Sink* uncompressed);
91
+
92
+ // This routine uncompresses as much of the "compressed" as possible
93
+ // into sink. It returns the number of valid bytes added to sink
94
+ // (extra invalid bytes may have been added due to errors; the caller
95
+ // should ignore those). The emitted data typically has length
96
+ // GetUncompressedLength(), but may be shorter if an error is
97
+ // encountered.
98
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
99
+
100
+ // ------------------------------------------------------------------------
101
+ // Lower-level character array based routines. May be useful for
102
+ // efficiency reasons in certain circumstances.
103
+ // ------------------------------------------------------------------------
104
+
105
+ // REQUIRES: "compressed" must point to an area of memory that is at
106
+ // least "MaxCompressedLength(input_length)" bytes in length.
107
+ //
108
+ // Takes the data stored in "input[0..input_length]" and stores
109
+ // it in the array pointed to by "compressed".
110
+ //
111
+ // "*compressed_length" is set to the length of the compressed output.
112
+ //
113
+ // Example:
114
+ // char* output = new char[snappy::MaxCompressedLength(input_length)];
115
+ // size_t output_length;
116
+ // RawCompress(input, input_length, output, &output_length);
117
+ // ... Process(output, output_length) ...
118
+ // delete [] output;
119
+ void RawCompress(const char* input,
120
+ size_t input_length,
121
+ char* compressed,
122
+ size_t* compressed_length);
123
+
124
+ // Given data in "compressed[0..compressed_length-1]" generated by
125
+ // calling the Snappy::Compress routine, this routine
126
+ // stores the uncompressed data to
127
+ // uncompressed[0..GetUncompressedLength(compressed)-1]
128
+ // returns false if the message is corrupted and could not be decrypted
129
+ bool RawUncompress(const char* compressed, size_t compressed_length,
130
+ char* uncompressed);
131
+
132
+ // Given data from the byte source 'compressed' generated by calling
133
+ // the Snappy::Compress routine, this routine stores the uncompressed
134
+ // data to
135
+ // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
136
+ // returns false if the message is corrupted and could not be decrypted
137
+ bool RawUncompress(Source* compressed, char* uncompressed);
138
+
139
+ // Given data in "compressed[0..compressed_length-1]" generated by
140
+ // calling the Snappy::Compress routine, this routine
141
+ // stores the uncompressed data to the iovec "iov". The number of physical
142
+ // buffers in "iov" is given by iov_cnt and their cumulative size
143
+ // must be at least GetUncompressedLength(compressed). The individual buffers
144
+ // in "iov" must not overlap with each other.
145
+ //
146
+ // returns false if the message is corrupted and could not be decrypted
147
+ bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
148
+ const struct iovec* iov, size_t iov_cnt);
149
+
150
+ // Given data from the byte source 'compressed' generated by calling
151
+ // the Snappy::Compress routine, this routine stores the uncompressed
152
+ // data to the iovec "iov". The number of physical
153
+ // buffers in "iov" is given by iov_cnt and their cumulative size
154
+ // must be at least GetUncompressedLength(compressed). The individual buffers
155
+ // in "iov" must not overlap with each other.
156
+ //
157
+ // returns false if the message is corrupted and could not be decrypted
158
+ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
159
+ size_t iov_cnt);
160
+
161
+ // Returns the maximal size of the compressed representation of
162
+ // input data that is "source_bytes" bytes in length;
163
+ size_t MaxCompressedLength(size_t source_bytes);
164
+
165
+ // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
166
+ // Returns true and stores the length of the uncompressed data in
167
+ // *result normally. Returns false on parsing error.
168
+ // This operation takes O(1) time.
169
+ bool GetUncompressedLength(const char* compressed, size_t compressed_length,
170
+ size_t* result);
171
+
172
+ // Returns true iff the contents of "compressed[]" can be uncompressed
173
+ // successfully. Does not return the uncompressed data. Takes
174
+ // time proportional to compressed_length, but is usually at least
175
+ // a factor of four faster than actual decompression.
176
+ bool IsValidCompressedBuffer(const char* compressed,
177
+ size_t compressed_length);
178
+
179
+ // Returns true iff the contents of "compressed" can be uncompressed
180
+ // successfully. Does not return the uncompressed data. Takes
181
+ // time proportional to *compressed length, but is usually at least
182
+ // a factor of four faster than actual decompression.
183
+ // On success, consumes all of *compressed. On failure, consumes an
184
+ // unspecified prefix of *compressed.
185
+ bool IsValidCompressed(Source* compressed);
186
+
187
+ // The size of a compression block. Note that many parts of the compression
188
+ // code assumes that kBlockSize <= 65536; in particular, the hash table
189
+ // can only store 16-bit offsets, and EmitCopy() also assumes the offset
190
+ // is 65535 bytes or less. Note also that if you change this, it will
191
+ // affect the framing format (see framing_format.txt).
192
+ //
193
+ // Note that there might be older data around that is compressed with larger
194
+ // block sizes, so the decompression code should not rely on the
195
+ // non-existence of long backreferences.
196
+ static const int kBlockLog = 16;
197
+ static const size_t kBlockSize = 1 << kBlockLog;
198
+
199
+ static const int kMaxHashTableBits = 14;
200
+ static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
201
+ } // end namespace snappy
202
+
203
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -0,0 +1,1410 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include <math.h>
30
+ #include <stdlib.h>
31
+
32
+
33
+ #include <algorithm>
34
+ #include <string>
35
+ #include <utility>
36
+ #include <vector>
37
+
38
+ #include "snappy.h"
39
+ #include "snappy-internal.h"
40
+ #include "snappy-test.h"
41
+ #include "snappy-sinksource.h"
42
+
43
+ DEFINE_int32(start_len, -1,
44
+ "Starting prefix size for testing (-1: just full file contents)");
45
+ DEFINE_int32(end_len, -1,
46
+ "Starting prefix size for testing (-1: just full file contents)");
47
+ DEFINE_int32(bytes, 10485760,
48
+ "How many bytes to compress/uncompress per file for timing");
49
+
50
+ DEFINE_bool(zlib, false,
51
+ "Run zlib compression (http://www.zlib.net)");
52
+ DEFINE_bool(lzo, false,
53
+ "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
54
+ DEFINE_bool(snappy, true, "Run snappy compression");
55
+
56
+ DEFINE_bool(write_compressed, false,
57
+ "Write compressed versions of each file to <file>.comp");
58
+ DEFINE_bool(write_uncompressed, false,
59
+ "Write uncompressed versions of each file to <file>.uncomp");
60
+
61
+ DEFINE_bool(snappy_dump_decompression_table, false,
62
+ "If true, we print the decompression table during tests.");
63
+
64
+ namespace snappy {
65
+
66
+ #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
67
+
68
+ // To test against code that reads beyond its input, this class copies a
69
+ // string to a newly allocated group of pages, the last of which
70
+ // is made unreadable via mprotect. Note that we need to allocate the
71
+ // memory with mmap(), as POSIX allows mprotect() only on memory allocated
72
+ // with mmap(), and some malloc/posix_memalign implementations expect to
73
+ // be able to read previously allocated memory while doing heap allocations.
74
+ class DataEndingAtUnreadablePage {
75
+ public:
76
+ explicit DataEndingAtUnreadablePage(const string& s) {
77
+ const size_t page_size = sysconf(_SC_PAGESIZE);
78
+ const size_t size = s.size();
79
+ // Round up space for string to a multiple of page_size.
80
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
81
+ alloc_size_ = space_for_string + page_size;
82
+ mem_ = mmap(NULL, alloc_size_,
83
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
84
+ CHECK_NE(MAP_FAILED, mem_);
85
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
86
+ char* dst = protected_page_ - size;
87
+ memcpy(dst, s.data(), size);
88
+ data_ = dst;
89
+ size_ = size;
90
+ // Make guard page unreadable.
91
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
92
+ }
93
+
94
+ ~DataEndingAtUnreadablePage() {
95
+ const size_t page_size = sysconf(_SC_PAGESIZE);
96
+ // Undo the mprotect.
97
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
98
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
99
+ }
100
+
101
+ const char* data() const { return data_; }
102
+ size_t size() const { return size_; }
103
+
104
+ private:
105
+ size_t alloc_size_;
106
+ void* mem_;
107
+ char* protected_page_;
108
+ const char* data_;
109
+ size_t size_;
110
+ };
111
+
112
+ #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
113
+
114
+ // Fallback for systems without mmap.
115
+ typedef string DataEndingAtUnreadablePage;
116
+
117
+ #endif
118
+
119
+ enum CompressorType {
120
+ ZLIB, LZO, SNAPPY
121
+ };
122
+
123
+ const char* names[] = {
124
+ "ZLIB", "LZO", "SNAPPY"
125
+ };
126
+
127
+ static size_t MinimumRequiredOutputSpace(size_t input_size,
128
+ CompressorType comp) {
129
+ switch (comp) {
130
+ #ifdef ZLIB_VERSION
131
+ case ZLIB:
132
+ return ZLib::MinCompressbufSize(input_size);
133
+ #endif // ZLIB_VERSION
134
+
135
+ #ifdef LZO_VERSION
136
+ case LZO:
137
+ return input_size + input_size/64 + 16 + 3;
138
+ #endif // LZO_VERSION
139
+
140
+ case SNAPPY:
141
+ return snappy::MaxCompressedLength(input_size);
142
+
143
+ default:
144
+ LOG(FATAL) << "Unknown compression type number " << comp;
145
+ return 0;
146
+ }
147
+ }
148
+
149
+ // Returns true if we successfully compressed, false otherwise.
150
+ //
151
+ // If compressed_is_preallocated is set, do not resize the compressed buffer.
152
+ // This is typically what you want for a benchmark, in order to not spend
153
+ // time in the memory allocator. If you do set this flag, however,
154
+ // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
155
+ // number of bytes, and may contain junk bytes at the end after return.
156
+ static bool Compress(const char* input, size_t input_size, CompressorType comp,
157
+ string* compressed, bool compressed_is_preallocated) {
158
+ if (!compressed_is_preallocated) {
159
+ compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
160
+ }
161
+
162
+ switch (comp) {
163
+ #ifdef ZLIB_VERSION
164
+ case ZLIB: {
165
+ ZLib zlib;
166
+ uLongf destlen = compressed->size();
167
+ int ret = zlib.Compress(
168
+ reinterpret_cast<Bytef*>(string_as_array(compressed)),
169
+ &destlen,
170
+ reinterpret_cast<const Bytef*>(input),
171
+ input_size);
172
+ CHECK_EQ(Z_OK, ret);
173
+ if (!compressed_is_preallocated) {
174
+ compressed->resize(destlen);
175
+ }
176
+ return true;
177
+ }
178
+ #endif // ZLIB_VERSION
179
+
180
+ #ifdef LZO_VERSION
181
+ case LZO: {
182
+ unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
183
+ lzo_uint destlen;
184
+ int ret = lzo1x_1_15_compress(
185
+ reinterpret_cast<const uint8*>(input),
186
+ input_size,
187
+ reinterpret_cast<uint8*>(string_as_array(compressed)),
188
+ &destlen,
189
+ mem);
190
+ CHECK_EQ(LZO_E_OK, ret);
191
+ delete[] mem;
192
+ if (!compressed_is_preallocated) {
193
+ compressed->resize(destlen);
194
+ }
195
+ break;
196
+ }
197
+ #endif // LZO_VERSION
198
+
199
+ case SNAPPY: {
200
+ size_t destlen;
201
+ snappy::RawCompress(input, input_size,
202
+ string_as_array(compressed),
203
+ &destlen);
204
+ CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
205
+ if (!compressed_is_preallocated) {
206
+ compressed->resize(destlen);
207
+ }
208
+ break;
209
+ }
210
+
211
+ default: {
212
+ return false; // the asked-for library wasn't compiled in
213
+ }
214
+ }
215
+ return true;
216
+ }
217
+
218
+ static bool Uncompress(const string& compressed, CompressorType comp,
219
+ int size, string* output) {
220
+ switch (comp) {
221
+ #ifdef ZLIB_VERSION
222
+ case ZLIB: {
223
+ output->resize(size);
224
+ ZLib zlib;
225
+ uLongf destlen = output->size();
226
+ int ret = zlib.Uncompress(
227
+ reinterpret_cast<Bytef*>(string_as_array(output)),
228
+ &destlen,
229
+ reinterpret_cast<const Bytef*>(compressed.data()),
230
+ compressed.size());
231
+ CHECK_EQ(Z_OK, ret);
232
+ CHECK_EQ(static_cast<uLongf>(size), destlen);
233
+ break;
234
+ }
235
+ #endif // ZLIB_VERSION
236
+
237
+ #ifdef LZO_VERSION
238
+ case LZO: {
239
+ output->resize(size);
240
+ lzo_uint destlen;
241
+ int ret = lzo1x_decompress(
242
+ reinterpret_cast<const uint8*>(compressed.data()),
243
+ compressed.size(),
244
+ reinterpret_cast<uint8*>(string_as_array(output)),
245
+ &destlen,
246
+ NULL);
247
+ CHECK_EQ(LZO_E_OK, ret);
248
+ CHECK_EQ(static_cast<lzo_uint>(size), destlen);
249
+ break;
250
+ }
251
+ #endif // LZO_VERSION
252
+
253
+ case SNAPPY: {
254
+ snappy::RawUncompress(compressed.data(), compressed.size(),
255
+ string_as_array(output));
256
+ break;
257
+ }
258
+
259
+ default: {
260
+ return false; // the asked-for library wasn't compiled in
261
+ }
262
+ }
263
+ return true;
264
+ }
265
+
266
+ static void Measure(const char* data,
267
+ size_t length,
268
+ CompressorType comp,
269
+ int repeats,
270
+ int block_size) {
271
+ // Run tests a few time and pick median running times
272
+ static const int kRuns = 5;
273
+ double ctime[kRuns];
274
+ double utime[kRuns];
275
+ int compressed_size = 0;
276
+
277
+ {
278
+ // Chop the input into blocks
279
+ int num_blocks = (length + block_size - 1) / block_size;
280
+ std::vector<const char*> input(num_blocks);
281
+ std::vector<size_t> input_length(num_blocks);
282
+ std::vector<string> compressed(num_blocks);
283
+ std::vector<string> output(num_blocks);
284
+ for (int b = 0; b < num_blocks; b++) {
285
+ int input_start = b * block_size;
286
+ int input_limit = std::min<int>((b+1)*block_size, length);
287
+ input[b] = data+input_start;
288
+ input_length[b] = input_limit-input_start;
289
+
290
+ // Pre-grow the output buffer so we don't measure string append time.
291
+ compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
292
+ }
293
+
294
+ // First, try one trial compression to make sure the code is compiled in
295
+ if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
296
+ LOG(WARNING) << "Skipping " << names[comp] << ": "
297
+ << "library not compiled in";
298
+ return;
299
+ }
300
+
301
+ for (int run = 0; run < kRuns; run++) {
302
+ CycleTimer ctimer, utimer;
303
+
304
+ for (int b = 0; b < num_blocks; b++) {
305
+ // Pre-grow the output buffer so we don't measure string append time.
306
+ compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
307
+ }
308
+
309
+ ctimer.Start();
310
+ for (int b = 0; b < num_blocks; b++)
311
+ for (int i = 0; i < repeats; i++)
312
+ Compress(input[b], input_length[b], comp, &compressed[b], true);
313
+ ctimer.Stop();
314
+
315
+ // Compress once more, with resizing, so we don't leave junk
316
+ // at the end that will confuse the decompressor.
317
+ for (int b = 0; b < num_blocks; b++) {
318
+ Compress(input[b], input_length[b], comp, &compressed[b], false);
319
+ }
320
+
321
+ for (int b = 0; b < num_blocks; b++) {
322
+ output[b].resize(input_length[b]);
323
+ }
324
+
325
+ utimer.Start();
326
+ for (int i = 0; i < repeats; i++)
327
+ for (int b = 0; b < num_blocks; b++)
328
+ Uncompress(compressed[b], comp, input_length[b], &output[b]);
329
+ utimer.Stop();
330
+
331
+ ctime[run] = ctimer.Get();
332
+ utime[run] = utimer.Get();
333
+ }
334
+
335
+ compressed_size = 0;
336
+ for (size_t i = 0; i < compressed.size(); i++) {
337
+ compressed_size += compressed[i].size();
338
+ }
339
+ }
340
+
341
+ std::sort(ctime, ctime + kRuns);
342
+ std::sort(utime, utime + kRuns);
343
+ const int med = kRuns/2;
344
+
345
+ float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
346
+ float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
347
+ string x = names[comp];
348
+ x += ":";
349
+ string urate = (uncomp_rate >= 0)
350
+ ? StringPrintf("%.1f", uncomp_rate)
351
+ : string("?");
352
+ printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
353
+ "comp %5.1f MB/s uncomp %5s MB/s\n",
354
+ x.c_str(),
355
+ block_size/(1<<20),
356
+ static_cast<int>(length), static_cast<uint32>(compressed_size),
357
+ (compressed_size * 100.0) / std::max<int>(1, length),
358
+ comp_rate,
359
+ urate.c_str());
360
+ }
361
+
362
+ static int VerifyString(const string& input) {
363
+ string compressed;
364
+ DataEndingAtUnreadablePage i(input);
365
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
366
+ CHECK_EQ(written, compressed.size());
367
+ CHECK_LE(compressed.size(),
368
+ snappy::MaxCompressedLength(input.size()));
369
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
370
+
371
+ string uncompressed;
372
+ DataEndingAtUnreadablePage c(compressed);
373
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
374
+ CHECK_EQ(uncompressed, input);
375
+ return uncompressed.size();
376
+ }
377
+
378
+ static void VerifyStringSink(const string& input) {
379
+ string compressed;
380
+ DataEndingAtUnreadablePage i(input);
381
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
382
+ CHECK_EQ(written, compressed.size());
383
+ CHECK_LE(compressed.size(),
384
+ snappy::MaxCompressedLength(input.size()));
385
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
386
+
387
+ string uncompressed;
388
+ uncompressed.resize(input.size());
389
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
390
+ DataEndingAtUnreadablePage c(compressed);
391
+ snappy::ByteArraySource source(c.data(), c.size());
392
+ CHECK(snappy::Uncompress(&source, &sink));
393
+ CHECK_EQ(uncompressed, input);
394
+ }
395
+
396
+ static void VerifyIOVec(const string& input) {
397
+ string compressed;
398
+ DataEndingAtUnreadablePage i(input);
399
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
400
+ CHECK_EQ(written, compressed.size());
401
+ CHECK_LE(compressed.size(),
402
+ snappy::MaxCompressedLength(input.size()));
403
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
404
+
405
+ // Try uncompressing into an iovec containing a random number of entries
406
+ // ranging from 1 to 10.
407
+ char* buf = new char[input.size()];
408
+ ACMRandom rnd(input.size());
409
+ size_t num = rnd.Next() % 10 + 1;
410
+ if (input.size() < num) {
411
+ num = input.size();
412
+ }
413
+ struct iovec* iov = new iovec[num];
414
+ int used_so_far = 0;
415
+ for (size_t i = 0; i < num; ++i) {
416
+ iov[i].iov_base = buf + used_so_far;
417
+ if (i == num - 1) {
418
+ iov[i].iov_len = input.size() - used_so_far;
419
+ } else {
420
+ // Randomly choose to insert a 0 byte entry.
421
+ if (rnd.OneIn(5)) {
422
+ iov[i].iov_len = 0;
423
+ } else {
424
+ iov[i].iov_len = rnd.Uniform(input.size());
425
+ }
426
+ }
427
+ used_so_far += iov[i].iov_len;
428
+ }
429
+ CHECK(snappy::RawUncompressToIOVec(
430
+ compressed.data(), compressed.size(), iov, num));
431
+ CHECK(!memcmp(buf, input.data(), input.size()));
432
+ delete[] iov;
433
+ delete[] buf;
434
+ }
435
+
436
+ // Test that data compressed by a compressor that does not
437
+ // obey block sizes is uncompressed properly.
438
+ static void VerifyNonBlockedCompression(const string& input) {
439
+ if (input.length() > snappy::kBlockSize) {
440
+ // We cannot test larger blocks than the maximum block size, obviously.
441
+ return;
442
+ }
443
+
444
+ string prefix;
445
+ Varint::Append32(&prefix, input.size());
446
+
447
+ // Setup compression table
448
+ snappy::internal::WorkingMemory wmem;
449
+ int table_size;
450
+ uint16* table = wmem.GetHashTable(input.size(), &table_size);
451
+
452
+ // Compress entire input in one shot
453
+ string compressed;
454
+ compressed += prefix;
455
+ compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
456
+ char* dest = string_as_array(&compressed) + prefix.size();
457
+ char* end = snappy::internal::CompressFragment(input.data(), input.size(),
458
+ dest, table, table_size);
459
+ compressed.resize(end - compressed.data());
460
+
461
+ // Uncompress into string
462
+ string uncomp_str;
463
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
464
+ CHECK_EQ(uncomp_str, input);
465
+
466
+ // Uncompress using source/sink
467
+ string uncomp_str2;
468
+ uncomp_str2.resize(input.size());
469
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
470
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
471
+ CHECK(snappy::Uncompress(&source, &sink));
472
+ CHECK_EQ(uncomp_str2, input);
473
+
474
+ // Uncompress into iovec
475
+ {
476
+ static const int kNumBlocks = 10;
477
+ struct iovec vec[kNumBlocks];
478
+ const int block_size = 1 + input.size() / kNumBlocks;
479
+ string iovec_data(block_size * kNumBlocks, 'x');
480
+ for (int i = 0; i < kNumBlocks; i++) {
481
+ vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
482
+ vec[i].iov_len = block_size;
483
+ }
484
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
485
+ vec, kNumBlocks));
486
+ CHECK_EQ(string(iovec_data.data(), input.size()), input);
487
+ }
488
+ }
489
+
490
+ // Expand the input so that it is at least K times as big as block size
491
+ static string Expand(const string& input) {
492
+ static const int K = 3;
493
+ string data = input;
494
+ while (data.size() < K * snappy::kBlockSize) {
495
+ data += input;
496
+ }
497
+ return data;
498
+ }
499
+
500
+ static int Verify(const string& input) {
501
+ VLOG(1) << "Verifying input of size " << input.size();
502
+
503
+ // Compress using string based routines
504
+ const int result = VerifyString(input);
505
+
506
+ // Verify using sink based routines
507
+ VerifyStringSink(input);
508
+
509
+ VerifyNonBlockedCompression(input);
510
+ VerifyIOVec(input);
511
+ if (!input.empty()) {
512
+ const string expanded = Expand(input);
513
+ VerifyNonBlockedCompression(expanded);
514
+ VerifyIOVec(input);
515
+ }
516
+
517
+ return result;
518
+ }
519
+
520
+
521
+ static bool IsValidCompressedBuffer(const string& c) {
522
+ return snappy::IsValidCompressedBuffer(c.data(), c.size());
523
+ }
524
+ static bool Uncompress(const string& c, string* u) {
525
+ return snappy::Uncompress(c.data(), c.size(), u);
526
+ }
527
+
528
+ // This test checks to ensure that snappy doesn't coredump if it gets
529
+ // corrupted data.
530
+ TEST(CorruptedTest, VerifyCorrupted) {
531
+ string source = "making sure we don't crash with corrupted input";
532
+ VLOG(1) << source;
533
+ string dest;
534
+ string uncmp;
535
+ snappy::Compress(source.data(), source.size(), &dest);
536
+
537
+ // Mess around with the data. It's hard to simulate all possible
538
+ // corruptions; this is just one example ...
539
+ CHECK_GT(dest.size(), 3);
540
+ dest[1]--;
541
+ dest[3]++;
542
+ // this really ought to fail.
543
+ CHECK(!IsValidCompressedBuffer(dest));
544
+ CHECK(!Uncompress(dest, &uncmp));
545
+
546
+ // This is testing for a security bug - a buffer that decompresses to 100k
547
+ // but we lie in the snappy header and only reserve 0 bytes of memory :)
548
+ source.resize(100000);
549
+ for (size_t i = 0; i < source.length(); ++i) {
550
+ source[i] = 'A';
551
+ }
552
+ snappy::Compress(source.data(), source.size(), &dest);
553
+ dest[0] = dest[1] = dest[2] = dest[3] = 0;
554
+ CHECK(!IsValidCompressedBuffer(dest));
555
+ CHECK(!Uncompress(dest, &uncmp));
556
+
557
+ if (sizeof(void *) == 4) {
558
+ // Another security check; check a crazy big length can't DoS us with an
559
+ // over-allocation.
560
+ // Currently this is done only for 32-bit builds. On 64-bit builds,
561
+ // where 3 GB might be an acceptable allocation size, Uncompress()
562
+ // attempts to decompress, and sometimes causes the test to run out of
563
+ // memory.
564
+ dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
565
+ // This decodes to a really large size, i.e., about 3 GB.
566
+ dest[4] = 'k';
567
+ CHECK(!IsValidCompressedBuffer(dest));
568
+ CHECK(!Uncompress(dest, &uncmp));
569
+ } else {
570
+ LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
571
+ }
572
+
573
+ // This decodes to about 2 MB; much smaller, but should still fail.
574
+ dest[0] = dest[1] = dest[2] = '\xff';
575
+ dest[3] = 0x00;
576
+ CHECK(!IsValidCompressedBuffer(dest));
577
+ CHECK(!Uncompress(dest, &uncmp));
578
+
579
+ // try reading stuff in from a bad file.
580
+ for (int i = 1; i <= 3; ++i) {
581
+ string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
582
+ 0);
583
+ string uncmp;
584
+ // check that we don't return a crazy length
585
+ size_t ulen;
586
+ CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
587
+ || (ulen < (1<<20)));
588
+ uint32 ulen2;
589
+ snappy::ByteArraySource source(data.data(), data.size());
590
+ CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
591
+ (ulen2 < (1<<20)));
592
+ CHECK(!IsValidCompressedBuffer(data));
593
+ CHECK(!Uncompress(data, &uncmp));
594
+ }
595
+ }
596
+
597
+ // Helper routines to construct arbitrary compressed strings.
598
+ // These mirror the compression code in snappy.cc, but are copied
599
+ // here so that we can bypass some limitations in the how snappy.cc
600
+ // invokes these routines.
601
+ static void AppendLiteral(string* dst, const string& literal) {
602
+ if (literal.empty()) return;
603
+ int n = literal.size() - 1;
604
+ if (n < 60) {
605
+ // Fit length in tag byte
606
+ dst->push_back(0 | (n << 2));
607
+ } else {
608
+ // Encode in upcoming bytes
609
+ char number[4];
610
+ int count = 0;
611
+ while (n > 0) {
612
+ number[count++] = n & 0xff;
613
+ n >>= 8;
614
+ }
615
+ dst->push_back(0 | ((59+count) << 2));
616
+ *dst += string(number, count);
617
+ }
618
+ *dst += literal;
619
+ }
620
+
621
+ static void AppendCopy(string* dst, int offset, int length) {
622
+ while (length > 0) {
623
+ // Figure out how much to copy in one shot
624
+ int to_copy;
625
+ if (length >= 68) {
626
+ to_copy = 64;
627
+ } else if (length > 64) {
628
+ to_copy = 60;
629
+ } else {
630
+ to_copy = length;
631
+ }
632
+ length -= to_copy;
633
+
634
+ if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
635
+ assert(to_copy-4 < 8); // Must fit in 3 bits
636
+ dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
637
+ dst->push_back(offset & 0xff);
638
+ } else if (offset < 65536) {
639
+ dst->push_back(2 | ((to_copy-1) << 2));
640
+ dst->push_back(offset & 0xff);
641
+ dst->push_back(offset >> 8);
642
+ } else {
643
+ dst->push_back(3 | ((to_copy-1) << 2));
644
+ dst->push_back(offset & 0xff);
645
+ dst->push_back((offset >> 8) & 0xff);
646
+ dst->push_back((offset >> 16) & 0xff);
647
+ dst->push_back((offset >> 24) & 0xff);
648
+ }
649
+ }
650
+ }
651
+
652
+ TEST(Snappy, SimpleTests) {
653
+ Verify("");
654
+ Verify("a");
655
+ Verify("ab");
656
+ Verify("abc");
657
+
658
+ Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
659
+ Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
660
+ Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
661
+ Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
662
+ Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
663
+ }
664
+
665
+ // Verify max blowup (lots of four-byte copies)
666
+ TEST(Snappy, MaxBlowup) {
667
+ string input;
668
+ for (int i = 0; i < 20000; i++) {
669
+ ACMRandom rnd(i);
670
+ uint32 bytes = static_cast<uint32>(rnd.Next());
671
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
672
+ }
673
+ for (int i = 19999; i >= 0; i--) {
674
+ ACMRandom rnd(i);
675
+ uint32 bytes = static_cast<uint32>(rnd.Next());
676
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
677
+ }
678
+ Verify(input);
679
+ }
680
+
681
+ TEST(Snappy, RandomData) {
682
+ ACMRandom rnd(FLAGS_test_random_seed);
683
+
684
+ const int num_ops = 20000;
685
+ for (int i = 0; i < num_ops; i++) {
686
+ if ((i % 1000) == 0) {
687
+ VLOG(0) << "Random op " << i << " of " << num_ops;
688
+ }
689
+
690
+ string x;
691
+ size_t len = rnd.Uniform(4096);
692
+ if (i < 100) {
693
+ len = 65536 + rnd.Uniform(65536);
694
+ }
695
+ while (x.size() < len) {
696
+ int run_len = 1;
697
+ if (rnd.OneIn(10)) {
698
+ run_len = rnd.Skewed(8);
699
+ }
700
+ char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
701
+ while (run_len-- > 0 && x.size() < len) {
702
+ x += c;
703
+ }
704
+ }
705
+
706
+ Verify(x);
707
+ }
708
+ }
709
+
710
+ TEST(Snappy, FourByteOffset) {
711
+ // The new compressor cannot generate four-byte offsets since
712
+ // it chops up the input into 32KB pieces. So we hand-emit the
713
+ // copy manually.
714
+
715
+ // The two fragments that make up the input string.
716
+ string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
717
+ string fragment2 = "some other string";
718
+
719
+ // How many times each fragment is emitted.
720
+ const int n1 = 2;
721
+ const int n2 = 100000 / fragment2.size();
722
+ const int length = n1 * fragment1.size() + n2 * fragment2.size();
723
+
724
+ string compressed;
725
+ Varint::Append32(&compressed, length);
726
+
727
+ AppendLiteral(&compressed, fragment1);
728
+ string src = fragment1;
729
+ for (int i = 0; i < n2; i++) {
730
+ AppendLiteral(&compressed, fragment2);
731
+ src += fragment2;
732
+ }
733
+ AppendCopy(&compressed, src.size(), fragment1.size());
734
+ src += fragment1;
735
+ CHECK_EQ(length, src.size());
736
+
737
+ string uncompressed;
738
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
739
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
740
+ &uncompressed));
741
+ CHECK_EQ(uncompressed, src);
742
+ }
743
+
744
+ TEST(Snappy, IOVecEdgeCases) {
745
+ // Test some tricky edge cases in the iovec output that are not necessarily
746
+ // exercised by random tests.
747
+
748
+ // Our output blocks look like this initially (the last iovec is bigger
749
+ // than depicted):
750
+ // [ ] [ ] [ ] [ ] [ ]
751
+ static const int kLengths[] = { 2, 1, 4, 8, 128 };
752
+
753
+ struct iovec iov[ARRAYSIZE(kLengths)];
754
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
755
+ iov[i].iov_base = new char[kLengths[i]];
756
+ iov[i].iov_len = kLengths[i];
757
+ }
758
+
759
+ string compressed;
760
+ Varint::Append32(&compressed, 22);
761
+
762
+ // A literal whose output crosses three blocks.
763
+ // [ab] [c] [123 ] [ ] [ ]
764
+ AppendLiteral(&compressed, "abc123");
765
+
766
+ // A copy whose output crosses two blocks (source and destination
767
+ // segments marked).
768
+ // [ab] [c] [1231] [23 ] [ ]
769
+ // ^--^ --
770
+ AppendCopy(&compressed, 3, 3);
771
+
772
+ // A copy where the input is, at first, in the block before the output:
773
+ //
774
+ // [ab] [c] [1231] [231231 ] [ ]
775
+ // ^--- ^---
776
+ // Then during the copy, the pointers move such that the input and
777
+ // output pointers are in the same block:
778
+ //
779
+ // [ab] [c] [1231] [23123123] [ ]
780
+ // ^- ^-
781
+ // And then they move again, so that the output pointer is no longer
782
+ // in the same block as the input pointer:
783
+ // [ab] [c] [1231] [23123123] [123 ]
784
+ // ^-- ^--
785
+ AppendCopy(&compressed, 6, 9);
786
+
787
+ // Finally, a copy where the input is from several blocks back,
788
+ // and it also crosses three blocks:
789
+ //
790
+ // [ab] [c] [1231] [23123123] [123b ]
791
+ // ^ ^
792
+ // [ab] [c] [1231] [23123123] [123bc ]
793
+ // ^ ^
794
+ // [ab] [c] [1231] [23123123] [123bc12 ]
795
+ // ^- ^-
796
+ AppendCopy(&compressed, 17, 4);
797
+
798
+ CHECK(snappy::RawUncompressToIOVec(
799
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
800
+ CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
801
+ CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
802
+ CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
803
+ CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
804
+ CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
805
+
806
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
807
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
808
+ }
809
+ }
810
+
811
+ TEST(Snappy, IOVecLiteralOverflow) {
812
+ static const int kLengths[] = { 3, 4 };
813
+
814
+ struct iovec iov[ARRAYSIZE(kLengths)];
815
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
816
+ iov[i].iov_base = new char[kLengths[i]];
817
+ iov[i].iov_len = kLengths[i];
818
+ }
819
+
820
+ string compressed;
821
+ Varint::Append32(&compressed, 8);
822
+
823
+ AppendLiteral(&compressed, "12345678");
824
+
825
+ CHECK(!snappy::RawUncompressToIOVec(
826
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
827
+
828
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
829
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
830
+ }
831
+ }
832
+
833
+ TEST(Snappy, IOVecCopyOverflow) {
834
+ static const int kLengths[] = { 3, 4 };
835
+
836
+ struct iovec iov[ARRAYSIZE(kLengths)];
837
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
838
+ iov[i].iov_base = new char[kLengths[i]];
839
+ iov[i].iov_len = kLengths[i];
840
+ }
841
+
842
+ string compressed;
843
+ Varint::Append32(&compressed, 8);
844
+
845
+ AppendLiteral(&compressed, "123");
846
+ AppendCopy(&compressed, 3, 5);
847
+
848
+ CHECK(!snappy::RawUncompressToIOVec(
849
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
850
+
851
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
852
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
853
+ }
854
+ }
855
+
856
+ static bool CheckUncompressedLength(const string& compressed,
857
+ size_t* ulength) {
858
+ const bool result1 = snappy::GetUncompressedLength(compressed.data(),
859
+ compressed.size(),
860
+ ulength);
861
+
862
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
863
+ uint32 length;
864
+ const bool result2 = snappy::GetUncompressedLength(&source, &length);
865
+ CHECK_EQ(result1, result2);
866
+ return result1;
867
+ }
868
+
869
+ TEST(SnappyCorruption, TruncatedVarint) {
870
+ string compressed, uncompressed;
871
+ size_t ulength;
872
+ compressed.push_back('\xf0');
873
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
874
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
875
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
876
+ &uncompressed));
877
+ }
878
+
879
+ TEST(SnappyCorruption, UnterminatedVarint) {
880
+ string compressed, uncompressed;
881
+ size_t ulength;
882
+ compressed.push_back('\x80');
883
+ compressed.push_back('\x80');
884
+ compressed.push_back('\x80');
885
+ compressed.push_back('\x80');
886
+ compressed.push_back('\x80');
887
+ compressed.push_back(10);
888
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
889
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
890
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
891
+ &uncompressed));
892
+ }
893
+
894
+ TEST(SnappyCorruption, OverflowingVarint) {
895
+ string compressed, uncompressed;
896
+ size_t ulength;
897
+ compressed.push_back('\xfb');
898
+ compressed.push_back('\xff');
899
+ compressed.push_back('\xff');
900
+ compressed.push_back('\xff');
901
+ compressed.push_back('\x7f');
902
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
903
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
904
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
905
+ &uncompressed));
906
+ }
907
+
908
+ TEST(Snappy, ReadPastEndOfBuffer) {
909
+ // Check that we do not read past end of input
910
+
911
+ // Make a compressed string that ends with a single-byte literal
912
+ string compressed;
913
+ Varint::Append32(&compressed, 1);
914
+ AppendLiteral(&compressed, "x");
915
+
916
+ string uncompressed;
917
+ DataEndingAtUnreadablePage c(compressed);
918
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
919
+ CHECK_EQ(uncompressed, string("x"));
920
+ }
921
+
922
+ // Check for an infinite loop caused by a copy with offset==0
923
+ TEST(Snappy, ZeroOffsetCopy) {
924
+ const char* compressed = "\x40\x12\x00\x00";
925
+ // \x40 Length (must be > kMaxIncrementCopyOverflow)
926
+ // \x12\x00\x00 Copy with offset==0, length==5
927
+ char uncompressed[100];
928
+ EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
929
+ }
930
+
931
+ TEST(Snappy, ZeroOffsetCopyValidation) {
932
+ const char* compressed = "\x05\x12\x00\x00";
933
+ // \x05 Length
934
+ // \x12\x00\x00 Copy with offset==0, length==5
935
+ EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
936
+ }
937
+
938
+ namespace {
939
+
940
+ int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
941
+ std::pair<size_t, bool> p =
942
+ snappy::internal::FindMatchLength(s1, s2, s2 + length);
943
+ CHECK_EQ(p.first < 8, p.second);
944
+ return p.first;
945
+ }
946
+
947
+ } // namespace
948
+
949
+ TEST(Snappy, FindMatchLength) {
950
+ // Exercise all different code paths through the function.
951
+ // 64-bit version:
952
+
953
+ // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
954
+ EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
955
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
956
+
957
+ // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
958
+ EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
959
+
960
+ // Same, but edge cases.
961
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
962
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
963
+
964
+ // Find non-match at once in first loop.
965
+ EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
966
+ EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
967
+ EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
968
+ EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
969
+
970
+ // Find non-match in first loop after one block.
971
+ EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
972
+ "abcdefgh?1234567xxxxxxxx", 24));
973
+ EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
974
+ "abcdefgh0?234567xxxxxxxx", 24));
975
+ EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
976
+ "abcdefgh01237654xxxxxxxx", 24));
977
+ EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
978
+ "abcdefgh0123456?xxxxxxxx", 24));
979
+
980
+ // 32-bit version:
981
+
982
+ // Short matches.
983
+ EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
984
+ EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
985
+ EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
986
+ EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
987
+ EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
988
+ EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
989
+ EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
990
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
991
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
992
+ EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
993
+
994
+ // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
995
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
996
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
997
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
998
+
999
+ // Same, but edge cases.
1000
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1001
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1002
+
1003
+ // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1004
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1005
+
1006
+ // Find non-match at once in first loop.
1007
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1008
+ "xxxxxx?123xxxxxxxx", 18));
1009
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1010
+ "xxxxxx0?23xxxxxxxx", 18));
1011
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1012
+ "xxxxxx0132xxxxxxxx", 18));
1013
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1014
+ "xxxxxx012?xxxxxxxx", 18));
1015
+
1016
+ // Same, but edge cases.
1017
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1018
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1019
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1020
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1021
+
1022
+ // Find non-match in first loop after one block.
1023
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1024
+ "xxxxxxabcd?123xx", 16));
1025
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1026
+ "xxxxxxabcd0?23xx", 16));
1027
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1028
+ "xxxxxxabcd0132xx", 16));
1029
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1030
+ "xxxxxxabcd012?xx", 16));
1031
+
1032
+ // Same, but edge cases.
1033
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1034
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1035
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1036
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1037
+ }
1038
+
1039
+ TEST(Snappy, FindMatchLengthRandom) {
1040
+ const int kNumTrials = 10000;
1041
+ const int kTypicalLength = 10;
1042
+ ACMRandom rnd(FLAGS_test_random_seed);
1043
+
1044
+ for (int i = 0; i < kNumTrials; i++) {
1045
+ string s, t;
1046
+ char a = rnd.Rand8();
1047
+ char b = rnd.Rand8();
1048
+ while (!rnd.OneIn(kTypicalLength)) {
1049
+ s.push_back(rnd.OneIn(2) ? a : b);
1050
+ t.push_back(rnd.OneIn(2) ? a : b);
1051
+ }
1052
+ DataEndingAtUnreadablePage u(s);
1053
+ DataEndingAtUnreadablePage v(t);
1054
+ int matched = TestFindMatchLength(u.data(), v.data(), t.size());
1055
+ if (matched == t.size()) {
1056
+ EXPECT_EQ(s, t);
1057
+ } else {
1058
+ EXPECT_NE(s[matched], t[matched]);
1059
+ for (int j = 0; j < matched; j++) {
1060
+ EXPECT_EQ(s[j], t[j]);
1061
+ }
1062
+ }
1063
+ }
1064
+ }
1065
+
1066
+ static uint16 MakeEntry(unsigned int extra,
1067
+ unsigned int len,
1068
+ unsigned int copy_offset) {
1069
+ // Check that all of the fields fit within the allocated space
1070
+ assert(extra == (extra & 0x7)); // At most 3 bits
1071
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
1072
+ assert(len == (len & 0x7f)); // At most 7 bits
1073
+ return len | (copy_offset << 8) | (extra << 11);
1074
+ }
1075
+
1076
+ // Check that the decompression table is correct, and optionally print out
1077
+ // the computed one.
1078
+ TEST(Snappy, VerifyCharTable) {
1079
+ using snappy::internal::LITERAL;
1080
+ using snappy::internal::COPY_1_BYTE_OFFSET;
1081
+ using snappy::internal::COPY_2_BYTE_OFFSET;
1082
+ using snappy::internal::COPY_4_BYTE_OFFSET;
1083
+ using snappy::internal::char_table;
1084
+
1085
+ uint16 dst[256];
1086
+
1087
+ // Place invalid entries in all places to detect missing initialization
1088
+ int assigned = 0;
1089
+ for (int i = 0; i < 256; i++) {
1090
+ dst[i] = 0xffff;
1091
+ }
1092
+
1093
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
1094
+ for (unsigned int len = 1; len <= 60; len++) {
1095
+ dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
1096
+ assigned++;
1097
+ }
1098
+
1099
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
1100
+ // encode the number of bytes of length info that follow the opcode.
1101
+ for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
1102
+ // We set the length field in the lookup table to 1 because extra
1103
+ // bytes encode len-1.
1104
+ dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1105
+ assigned++;
1106
+ }
1107
+
1108
+ // COPY_1_BYTE_OFFSET.
1109
+ //
1110
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
1111
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
1112
+ //
1113
+ // This format is used for length in range [4..11] and offset in
1114
+ // range [0..2047]
1115
+ for (unsigned int len = 4; len < 12; len++) {
1116
+ for (unsigned int offset = 0; offset < 2048; offset += 256) {
1117
+ dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1118
+ MakeEntry(1, len, offset>>8);
1119
+ assigned++;
1120
+ }
1121
+ }
1122
+
1123
+ // COPY_2_BYTE_OFFSET.
1124
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1125
+ for (unsigned int len = 1; len <= 64; len++) {
1126
+ dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
1127
+ assigned++;
1128
+ }
1129
+
1130
+ // COPY_4_BYTE_OFFSET.
1131
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1132
+ for (unsigned int len = 1; len <= 64; len++) {
1133
+ dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
1134
+ assigned++;
1135
+ }
1136
+
1137
+ // Check that each entry was initialized exactly once.
1138
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1139
+ for (int i = 0; i < 256; i++) {
1140
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1141
+ }
1142
+
1143
+ if (FLAGS_snappy_dump_decompression_table) {
1144
+ printf("static const uint16 char_table[256] = {\n ");
1145
+ for (int i = 0; i < 256; i++) {
1146
+ printf("0x%04x%s",
1147
+ dst[i],
1148
+ ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
1149
+ }
1150
+ printf("};\n");
1151
+ }
1152
+
1153
+ // Check that computed table matched recorded table.
1154
+ for (int i = 0; i < 256; i++) {
1155
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1156
+ }
1157
+ }
1158
+
1159
+ static void CompressFile(const char* fname) {
1160
+ string fullinput;
1161
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1162
+
1163
+ string compressed;
1164
+ Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1165
+
1166
+ CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1167
+ file::Defaults()));
1168
+ }
1169
+
1170
+ static void UncompressFile(const char* fname) {
1171
+ string fullinput;
1172
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1173
+
1174
+ size_t uncompLength;
1175
+ CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1176
+
1177
+ string uncompressed;
1178
+ uncompressed.resize(uncompLength);
1179
+ CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1180
+
1181
+ CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1182
+ file::Defaults()));
1183
+ }
1184
+
1185
+ static void MeasureFile(const char* fname) {
1186
+ string fullinput;
1187
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1188
+ printf("%-40s :\n", fname);
1189
+
1190
+ int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1191
+ int end_len = fullinput.size();
1192
+ if (FLAGS_end_len >= 0) {
1193
+ end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1194
+ }
1195
+ for (int len = start_len; len <= end_len; len++) {
1196
+ const char* const input = fullinput.data();
1197
+ int repeats = (FLAGS_bytes + len) / (len + 1);
1198
+ if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1199
+ if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1200
+ if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1201
+
1202
+ // For block-size based measurements
1203
+ if (0 && FLAGS_snappy) {
1204
+ Measure(input, len, SNAPPY, repeats, 8<<10);
1205
+ Measure(input, len, SNAPPY, repeats, 16<<10);
1206
+ Measure(input, len, SNAPPY, repeats, 32<<10);
1207
+ Measure(input, len, SNAPPY, repeats, 64<<10);
1208
+ Measure(input, len, SNAPPY, repeats, 256<<10);
1209
+ Measure(input, len, SNAPPY, repeats, 1024<<10);
1210
+ }
1211
+ }
1212
+ }
1213
+
1214
+ static struct {
1215
+ const char* label;
1216
+ const char* filename;
1217
+ size_t size_limit;
1218
+ } files[] = {
1219
+ { "html", "html", 0 },
1220
+ { "urls", "urls.10K", 0 },
1221
+ { "jpg", "fireworks.jpeg", 0 },
1222
+ { "jpg_200", "fireworks.jpeg", 200 },
1223
+ { "pdf", "paper-100k.pdf", 0 },
1224
+ { "html4", "html_x_4", 0 },
1225
+ { "txt1", "alice29.txt", 0 },
1226
+ { "txt2", "asyoulik.txt", 0 },
1227
+ { "txt3", "lcet10.txt", 0 },
1228
+ { "txt4", "plrabn12.txt", 0 },
1229
+ { "pb", "geo.protodata", 0 },
1230
+ { "gaviota", "kppkn.gtb", 0 },
1231
+ };
1232
+
1233
+ static void BM_UFlat(int iters, int arg) {
1234
+ StopBenchmarkTiming();
1235
+
1236
+ // Pick file to process based on "arg"
1237
+ CHECK_GE(arg, 0);
1238
+ CHECK_LT(arg, ARRAYSIZE(files));
1239
+ string contents = ReadTestDataFile(files[arg].filename,
1240
+ files[arg].size_limit);
1241
+
1242
+ string zcontents;
1243
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1244
+ char* dst = new char[contents.size()];
1245
+
1246
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1247
+ static_cast<int64>(contents.size()));
1248
+ SetBenchmarkLabel(files[arg].label);
1249
+ StartBenchmarkTiming();
1250
+ while (iters-- > 0) {
1251
+ CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1252
+ }
1253
+ StopBenchmarkTiming();
1254
+
1255
+ delete[] dst;
1256
+ }
1257
+ BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1258
+
1259
+ static void BM_UValidate(int iters, int arg) {
1260
+ StopBenchmarkTiming();
1261
+
1262
+ // Pick file to process based on "arg"
1263
+ CHECK_GE(arg, 0);
1264
+ CHECK_LT(arg, ARRAYSIZE(files));
1265
+ string contents = ReadTestDataFile(files[arg].filename,
1266
+ files[arg].size_limit);
1267
+
1268
+ string zcontents;
1269
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1270
+
1271
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1272
+ static_cast<int64>(contents.size()));
1273
+ SetBenchmarkLabel(files[arg].label);
1274
+ StartBenchmarkTiming();
1275
+ while (iters-- > 0) {
1276
+ CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1277
+ }
1278
+ StopBenchmarkTiming();
1279
+ }
1280
+ BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1281
+
1282
+ static void BM_UIOVec(int iters, int arg) {
1283
+ StopBenchmarkTiming();
1284
+
1285
+ // Pick file to process based on "arg"
1286
+ CHECK_GE(arg, 0);
1287
+ CHECK_LT(arg, ARRAYSIZE(files));
1288
+ string contents = ReadTestDataFile(files[arg].filename,
1289
+ files[arg].size_limit);
1290
+
1291
+ string zcontents;
1292
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1293
+
1294
+ // Uncompress into an iovec containing ten entries.
1295
+ const int kNumEntries = 10;
1296
+ struct iovec iov[kNumEntries];
1297
+ char *dst = new char[contents.size()];
1298
+ int used_so_far = 0;
1299
+ for (int i = 0; i < kNumEntries; ++i) {
1300
+ iov[i].iov_base = dst + used_so_far;
1301
+ if (used_so_far == contents.size()) {
1302
+ iov[i].iov_len = 0;
1303
+ continue;
1304
+ }
1305
+
1306
+ if (i == kNumEntries - 1) {
1307
+ iov[i].iov_len = contents.size() - used_so_far;
1308
+ } else {
1309
+ iov[i].iov_len = contents.size() / kNumEntries;
1310
+ }
1311
+ used_so_far += iov[i].iov_len;
1312
+ }
1313
+
1314
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1315
+ static_cast<int64>(contents.size()));
1316
+ SetBenchmarkLabel(files[arg].label);
1317
+ StartBenchmarkTiming();
1318
+ while (iters-- > 0) {
1319
+ CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1320
+ kNumEntries));
1321
+ }
1322
+ StopBenchmarkTiming();
1323
+
1324
+ delete[] dst;
1325
+ }
1326
+ BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1327
+
1328
+ static void BM_UFlatSink(int iters, int arg) {
1329
+ StopBenchmarkTiming();
1330
+
1331
+ // Pick file to process based on "arg"
1332
+ CHECK_GE(arg, 0);
1333
+ CHECK_LT(arg, ARRAYSIZE(files));
1334
+ string contents = ReadTestDataFile(files[arg].filename,
1335
+ files[arg].size_limit);
1336
+
1337
+ string zcontents;
1338
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1339
+ char* dst = new char[contents.size()];
1340
+
1341
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1342
+ static_cast<int64>(contents.size()));
1343
+ SetBenchmarkLabel(files[arg].label);
1344
+ StartBenchmarkTiming();
1345
+ while (iters-- > 0) {
1346
+ snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1347
+ snappy::UncheckedByteArraySink sink(dst);
1348
+ CHECK(snappy::Uncompress(&source, &sink));
1349
+ }
1350
+ StopBenchmarkTiming();
1351
+
1352
+ string s(dst, contents.size());
1353
+ CHECK_EQ(contents, s);
1354
+
1355
+ delete[] dst;
1356
+ }
1357
+
1358
+ BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1359
+
1360
+ static void BM_ZFlat(int iters, int arg) {
1361
+ StopBenchmarkTiming();
1362
+
1363
+ // Pick file to process based on "arg"
1364
+ CHECK_GE(arg, 0);
1365
+ CHECK_LT(arg, ARRAYSIZE(files));
1366
+ string contents = ReadTestDataFile(files[arg].filename,
1367
+ files[arg].size_limit);
1368
+
1369
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1370
+
1371
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1372
+ static_cast<int64>(contents.size()));
1373
+ StartBenchmarkTiming();
1374
+
1375
+ size_t zsize = 0;
1376
+ while (iters-- > 0) {
1377
+ snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1378
+ }
1379
+ StopBenchmarkTiming();
1380
+ const double compression_ratio =
1381
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1382
+ SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1383
+ files[arg].label, 100.0 * compression_ratio));
1384
+ VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1385
+ files[arg].label, contents.size(), zsize);
1386
+ delete[] dst;
1387
+ }
1388
+ BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1389
+
1390
+ } // namespace snappy
1391
+
1392
+ int main(int argc, char** argv) {
1393
+ InitGoogle(argv[0], &argc, &argv, true);
1394
+ RunSpecifiedBenchmarks();
1395
+
1396
+ if (argc >= 2) {
1397
+ for (int arg = 1; arg < argc; arg++) {
1398
+ if (FLAGS_write_compressed) {
1399
+ snappy::CompressFile(argv[arg]);
1400
+ } else if (FLAGS_write_uncompressed) {
1401
+ snappy::UncompressFile(argv[arg]);
1402
+ } else {
1403
+ snappy::MeasureFile(argv[arg]);
1404
+ }
1405
+ }
1406
+ return 0;
1407
+ }
1408
+
1409
+ return RUN_ALL_TESTS();
1410
+ }