snappy_ext 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/ext/snappy/extconf.rb +36 -0
  2. data/ext/snappy/snappy_ext.cc +131 -0
  3. data/ext/snappy/vendor/snappy-1.0.0/AUTHORS +1 -0
  4. data/ext/snappy/vendor/snappy-1.0.0/COPYING +28 -0
  5. data/ext/snappy/vendor/snappy-1.0.0/ChangeLog +3 -0
  6. data/ext/snappy/vendor/snappy-1.0.0/INSTALL +230 -0
  7. data/ext/snappy/vendor/snappy-1.0.0/Makefile.am +24 -0
  8. data/ext/snappy/vendor/snappy-1.0.0/Makefile.in +926 -0
  9. data/ext/snappy/vendor/snappy-1.0.0/NEWS +3 -0
  10. data/ext/snappy/vendor/snappy-1.0.0/README +132 -0
  11. data/ext/snappy/vendor/snappy-1.0.0/aclocal.m4 +9076 -0
  12. data/ext/snappy/vendor/snappy-1.0.0/autogen.sh +8 -0
  13. data/ext/snappy/vendor/snappy-1.0.0/compile +99 -0
  14. data/ext/snappy/vendor/snappy-1.0.0/config.guess +1466 -0
  15. data/ext/snappy/vendor/snappy-1.0.0/config.h.in +107 -0
  16. data/ext/snappy/vendor/snappy-1.0.0/config.sub +1579 -0
  17. data/ext/snappy/vendor/snappy-1.0.0/configure +17962 -0
  18. data/ext/snappy/vendor/snappy-1.0.0/configure.ac +99 -0
  19. data/ext/snappy/vendor/snappy-1.0.0/depcomp +530 -0
  20. data/ext/snappy/vendor/snappy-1.0.0/install-sh +323 -0
  21. data/ext/snappy/vendor/snappy-1.0.0/ltmain.sh +8413 -0
  22. data/ext/snappy/vendor/snappy-1.0.0/m4/gtest.m4 +74 -0
  23. data/ext/snappy/vendor/snappy-1.0.0/missing +360 -0
  24. data/ext/snappy/vendor/snappy-1.0.0/mkinstalldirs +158 -0
  25. data/ext/snappy/vendor/snappy-1.0.0/snappy-internal.h +136 -0
  26. data/ext/snappy/vendor/snappy-1.0.0/snappy-sinksource.cc +46 -0
  27. data/ext/snappy/vendor/snappy-1.0.0/snappy-sinksource.h +110 -0
  28. data/ext/snappy/vendor/snappy-1.0.0/snappy-stubs-internal.cc +28 -0
  29. data/ext/snappy/vendor/snappy-1.0.0/snappy-stubs-internal.h +457 -0
  30. data/ext/snappy/vendor/snappy-1.0.0/snappy-stubs-public.h +59 -0
  31. data/ext/snappy/vendor/snappy-1.0.0/snappy-stubs-public.h.in +59 -0
  32. data/ext/snappy/vendor/snappy-1.0.0/snappy-test.cc +523 -0
  33. data/ext/snappy/vendor/snappy-1.0.0/snappy-test.h +458 -0
  34. data/ext/snappy/vendor/snappy-1.0.0/snappy.cc +1001 -0
  35. data/ext/snappy/vendor/snappy-1.0.0/snappy.h +141 -0
  36. data/ext/snappy/vendor/snappy-1.0.0/snappy_unittest.cc +1073 -0
  37. data/ext/snappy/version.h +4 -0
  38. data/snappy_ext.gemspec +58 -0
  39. metadata +99 -0
@@ -0,0 +1,141 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // A light-weight compression algorithm. It is designed for speed of
16
+ // compression and decompression, rather than for the utmost in space
17
+ // savings.
18
+ //
19
+ // For getting better compression ratios when you are compressing data
20
+ // with long repeated sequences or compressing data that is similar to
21
+ // other data, while still compressing fast, you might look at first
22
+ // using BMDiff and then compressing the output of BMDiff with
23
+ // Snappy.
24
+
25
+ #ifndef UTIL_SNAPPY_SNAPPY_H__
26
+ #define UTIL_SNAPPY_SNAPPY_H__
27
+
28
+ #include <stddef.h>
29
+ #include <string>
30
+
31
+ #include "snappy-stubs-public.h"
32
+
33
+ namespace snappy {
34
+ class Source;
35
+ class Sink;
36
+
37
+ // ------------------------------------------------------------------------
38
+ // Generic compression/decompression routines.
39
+ // ------------------------------------------------------------------------
40
+
41
+ // Compress the bytes read from "*source" and append to "*sink". Return the
42
+ // number of bytes written.
43
+ size_t Compress(Source* source, Sink* sink);
44
+
45
+ bool GetUncompressedLength(Source* source, uint32* result);
46
+
47
+ // ------------------------------------------------------------------------
48
+ // Higher-level string based routines (should be sufficient for most users)
49
+ // ------------------------------------------------------------------------
50
+
51
+ // Sets "*output" to the compressed version of "input[0,input_length-1]".
52
+ // Original contents of *output are lost.
53
+ //
54
+ // REQUIRES: "input[]" is not an alias of "*output".
55
+ size_t Compress(const char* input, size_t input_length, string* output);
56
+
57
+ // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
58
+ // Original contents of "*uncompressed" are lost.
59
+ //
60
+ // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
61
+ //
62
+ // returns false if the message is corrupted and could not be decompressed
63
+ bool Uncompress(const char* compressed, size_t compressed_length,
64
+ string* uncompressed);
65
+
66
+
67
+ // ------------------------------------------------------------------------
68
+ // Lower-level character array based routines. May be useful for
69
+ // efficiency reasons in certain circumstances.
70
+ // ------------------------------------------------------------------------
71
+
72
+ // REQUIRES: "compressed" must point to an area of memory that is at
73
+ // least "MaxCompressedLength(input_length)" bytes in length.
74
+ //
75
+ // Takes the data stored in "input[0..input_length]" and stores
76
+ // it in the array pointed to by "compressed".
77
+ //
78
+ // "*compressed_length" is set to the length of the compressed output.
79
+ //
80
+ // Example:
81
+ // char* output = new char[snappy::MaxCompressedLength(input_length)];
82
+ // size_t output_length;
83
+ // RawCompress(input, input_length, output, &output_length);
84
+ // ... Process(output, output_length) ...
85
+ // delete [] output;
86
+ void RawCompress(const char* input,
87
+ size_t input_length,
88
+ char* compressed,
89
+ size_t* compressed_length);
90
+
91
+ // Given data in "compressed[0..compressed_length-1]" generated by
92
+ // calling the Snappy::Compress routine, this routine
93
+ // stores the uncompressed data to
94
+ // uncompressed[0..GetUncompressedLength(compressed)-1]
95
+ // returns false if the message is corrupted and could not be decrypted
96
+ bool RawUncompress(const char* compressed, size_t compressed_length,
97
+ char* uncompressed);
98
+
99
+ // Given data from the byte source 'compressed' generated by calling
100
+ // the Snappy::Compress routine, this routine stores the uncompressed
101
+ // data to
102
+ // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
103
+ // returns false if the message is corrupted and could not be decrypted
104
+ bool RawUncompress(Source* compressed, char* uncompressed);
105
+
106
+ // Returns the maximal size of the compressed representation of
107
+ // input data that is "source_bytes" bytes in length;
108
+ size_t MaxCompressedLength(size_t source_bytes);
109
+
110
+ // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
111
+ // Returns true and stores the length of the uncompressed data in
112
+ // *result normally. Returns false on parsing error.
113
+ // This operation takes O(1) time.
114
+ bool GetUncompressedLength(const char* compressed, size_t compressed_length,
115
+ size_t* result);
116
+
117
+ // Returns true iff the contents of "compressed[]" can be uncompressed
118
+ // successfully. Does not return the uncompressed data. Takes
119
+ // time proportional to compressed_length, but is usually at least
120
+ // a factor of four faster than actual decompression.
121
+ bool IsValidCompressedBuffer(const char* compressed,
122
+ size_t compressed_length);
123
+
124
+ // *** DO NOT CHANGE THE VALUE OF kBlockSize ***
125
+ //
126
+ // New Compression code chops up the input into blocks of at most
127
+ // the following size. This ensures that back-references in the
128
+ // output never cross kBlockSize block boundaries. This can be
129
+ // helpful in implementing blocked decompression. However the
130
+ // decompression code should not rely on this guarantee since older
131
+ // compression code may not obey it.
132
+ static const int kBlockLog = 15;
133
+ static const int kBlockSize = 1 << kBlockLog;
134
+
135
+ static const int kMaxHashTableBits = 14;
136
+ static const int kMaxHashTableSize = 1 << kMaxHashTableBits;
137
+
138
+ } // end namespace snappy
139
+
140
+
141
+ #endif // UTIL_SNAPPY_SNAPPY_H__
@@ -0,0 +1,1073 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ #include <math.h>
16
+ #include <stdlib.h>
17
+ #include <sys/mman.h>
18
+
19
+ #include <algorithm>
20
+ #include <string>
21
+ #include <vector>
22
+
23
+ #include "snappy.h"
24
+ #include "snappy-internal.h"
25
+ #include "snappy-test.h"
26
+ #include "snappy-sinksource.h"
27
+
28
+ DEFINE_int32(start_len, -1,
29
+ "Starting prefix size for testing (-1: just full file contents)");
30
+ DEFINE_int32(end_len, -1,
31
+ "Starting prefix size for testing (-1: just full file contents)");
32
+ DEFINE_int32(bytes, 10485760,
33
+ "How many bytes to compress/uncompress per file for timing");
34
+
35
+ DEFINE_bool(zlib, false,
36
+ "Run zlib compression (http://www.zlib.net)");
37
+ DEFINE_bool(lzo, false,
38
+ "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
39
+ DEFINE_bool(quicklz, false,
40
+ "Run quickLZ compression (http://www.quicklz.com/)");
41
+ DEFINE_bool(liblzf, false,
42
+ "Run libLZF compression "
43
+ "(http://www.goof.com/pcg/marc/liblzf.html)");
44
+ DEFINE_bool(fastlz, false,
45
+ "Run FastLZ compression (http://www.fastlz.org/");
46
+ DEFINE_bool(snappy, true, "Run snappy compression");
47
+
48
+
49
+ DEFINE_bool(write_compressed, false,
50
+ "Write compressed versions of each file to <file>.comp");
51
+ DEFINE_bool(write_uncompressed, false,
52
+ "Write uncompressed versions of each file to <file>.uncomp");
53
+
54
+ namespace snappy {
55
+
56
+
57
+ // To test against code that reads beyond its input, this class copies a
58
+ // string to a newly allocated group of pages, the last of which
59
+ // is made unreadable via mprotect. Note that we need to allocate the
60
+ // memory with mmap(), as POSIX allows mprotect() only on memory allocated
61
+ // with mmap(), and some malloc/posix_memalign implementations expect to
62
+ // be able to read previously allocated memory while doing heap allocations.
63
+ //
64
+ // TODO(user): Add support for running the unittest without the protection
65
+ // checks if the target system doesn't have mmap.
66
+ class DataEndingAtUnreadablePage {
67
+ public:
68
+ explicit DataEndingAtUnreadablePage(const string& s) {
69
+ const size_t page_size = getpagesize();
70
+ const size_t size = s.size();
71
+ // Round up space for string to a multiple of page_size.
72
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
73
+ alloc_size_ = space_for_string + page_size;
74
+ mem_ = mmap(NULL, alloc_size_,
75
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
76
+ CHECK_NE(MAP_FAILED, mem_);
77
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
78
+ char* dst = protected_page_ - size;
79
+ memcpy(dst, s.data(), size);
80
+ data_ = dst;
81
+ size_ = size;
82
+ // Make guard page unreadable.
83
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
84
+ }
85
+
86
+ ~DataEndingAtUnreadablePage() {
87
+ // Undo the mprotect.
88
+ CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
89
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
90
+ }
91
+
92
+ const char* data() const { return data_; }
93
+ size_t size() const { return size_; }
94
+
95
+ private:
96
+ size_t alloc_size_;
97
+ void* mem_;
98
+ char* protected_page_;
99
+ const char* data_;
100
+ size_t size_;
101
+ };
102
+
103
+ enum CompressorType {
104
+ ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY,
105
+ };
106
+
107
+ const char* names[] = {
108
+ "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY",
109
+ };
110
+
111
+ // Returns true if we successfully compressed, false otherwise
112
+ static bool Compress(const char* input, size_t input_size, CompressorType comp,
113
+ string* compressed) {
114
+ switch (comp) {
115
+ #ifdef ZLIB_VERSION
116
+ case ZLIB: {
117
+ compressed->resize(ZLib::MinCompressbufSize(input_size));
118
+ ZLib zlib;
119
+ uLongf destlen = compressed->size();
120
+ int ret = zlib.Compress(
121
+ reinterpret_cast<Bytef*>(string_as_array(compressed)),
122
+ &destlen,
123
+ reinterpret_cast<const Bytef*>(input),
124
+ input_size);
125
+ CHECK_EQ(Z_OK, ret);
126
+ compressed->resize(destlen);
127
+ return true;
128
+ }
129
+ #endif // ZLIB_VERSION
130
+
131
+ #ifdef LZO_VERSION
132
+ case LZO: {
133
+ compressed->resize(input_size + input_size/64 + 16 + 3);
134
+ unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
135
+ lzo_uint destlen;
136
+ int ret = lzo1x_1_15_compress(
137
+ reinterpret_cast<const uint8*>(input),
138
+ input_size,
139
+ reinterpret_cast<uint8*>(string_as_array(compressed)),
140
+ &destlen,
141
+ mem);
142
+ CHECK_EQ(LZO_E_OK, ret);
143
+ delete[] mem;
144
+ compressed->resize(destlen);
145
+ break;
146
+ }
147
+ #endif // LZO_VERSION
148
+
149
+ #ifdef LZF_VERSION
150
+ case LIBLZF: {
151
+ compressed->resize(input_size - 1);
152
+ int destlen = lzf_compress(input,
153
+ input_size,
154
+ string_as_array(compressed),
155
+ input_size - 1);
156
+ if (destlen == 0) {
157
+ // lzf *can* cause lots of blowup when compressing, so they
158
+ // recommend to limit outsize to insize, and just not compress
159
+ // if it's bigger. Ideally, we'd just swap input and output.
160
+ compressed->assign(input, input_size);
161
+ destlen = input_size;
162
+ }
163
+ compressed->resize(destlen);
164
+ break;
165
+ }
166
+ #endif // LZF_VERSION
167
+
168
+ #ifdef QLZ_VERSION_MAJOR
169
+ case QUICKLZ: {
170
+ compressed->resize(input_size + 36000); // 36000 is used for scratch
171
+ qlz_state_compress *state_compress = new qlz_state_compress;
172
+ int destlen = qlz_compress(input,
173
+ string_as_array(compressed),
174
+ input_size,
175
+ state_compress);
176
+ delete state_compress;
177
+ CHECK_NE(0, destlen);
178
+ compressed->resize(destlen);
179
+ break;
180
+ }
181
+ #endif // QLZ_VERSION_MAJOR
182
+
183
+ #ifdef FASTLZ_VERSION
184
+ case FASTLZ: {
185
+ int compressed_size = max(static_cast<int>(ceil(input_size * 1.05)),
186
+ 66);
187
+ compressed->resize(compressed_size);
188
+ // Use level 1 compression since we mostly care about speed.
189
+ int destlen = fastlz_compress_level(
190
+ 1,
191
+ input,
192
+ input_size,
193
+ string_as_array(compressed));
194
+ compressed->resize(destlen);
195
+ CHECK_NE(destlen, 0);
196
+ break;
197
+ }
198
+ #endif // FASTLZ_VERSION
199
+
200
+ case SNAPPY: {
201
+ size_t destlen;
202
+ snappy::RawCompress(input, input_size,
203
+ string_as_array(compressed),
204
+ &destlen);
205
+ CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
206
+ compressed->resize(destlen);
207
+ break;
208
+ }
209
+
210
+
211
+ default: {
212
+ return false; // the asked-for library wasn't compiled in
213
+ }
214
+ }
215
+ return true;
216
+ }
217
+
218
+ static bool Uncompress(const string& compressed, CompressorType comp,
219
+ int size, string* output) {
220
+ switch (comp) {
221
+ #ifdef ZLIB_VERSION
222
+ case ZLIB: {
223
+ output->resize(size);
224
+ ZLib zlib;
225
+ uLongf destlen = output->size();
226
+ int ret = zlib.Uncompress(
227
+ reinterpret_cast<Bytef*>(string_as_array(output)),
228
+ &destlen,
229
+ reinterpret_cast<const Bytef*>(compressed.data()),
230
+ compressed.size());
231
+ CHECK_EQ(Z_OK, ret);
232
+ CHECK_EQ(destlen, size);
233
+ break;
234
+ }
235
+ #endif // ZLIB_VERSION
236
+
237
+ #ifdef LZO_VERSION
238
+ case LZO: {
239
+ output->resize(size);
240
+ lzo_uint destlen;
241
+ int ret = lzo1x_decompress(
242
+ reinterpret_cast<const uint8*>(compressed.data()),
243
+ compressed.size(),
244
+ reinterpret_cast<uint8*>(string_as_array(output)),
245
+ &destlen,
246
+ NULL);
247
+ CHECK_EQ(LZO_E_OK, ret);
248
+ CHECK_EQ(destlen, size);
249
+ break;
250
+ }
251
+ #endif // LZO_VERSION
252
+
253
+ #ifdef LZF_VERSION
254
+ case LIBLZF: {
255
+ output->resize(size);
256
+ int destlen = lzf_decompress(compressed.data(),
257
+ compressed.size(),
258
+ string_as_array(output),
259
+ output->size());
260
+ if (destlen == 0) {
261
+ // This error probably means we had decided not to compress,
262
+ // and thus have stored input in output directly.
263
+ output->assign(compressed.data(), compressed.size());
264
+ destlen = compressed.size();
265
+ }
266
+ CHECK_EQ(destlen, size);
267
+ break;
268
+ }
269
+ #endif // LZF_VERSION
270
+
271
+ #ifdef QLZ_VERSION_MAJOR
272
+ case QUICKLZ: {
273
+ output->resize(size);
274
+ qlz_state_decompress *state_decompress = new qlz_state_decompress;
275
+ int destlen = qlz_decompress(compressed.data(),
276
+ string_as_array(output),
277
+ state_decompress);
278
+ delete state_decompress;
279
+ CHECK_EQ(destlen, size);
280
+ break;
281
+ }
282
+ #endif // QLZ_VERSION_MAJOR
283
+
284
+ #ifdef FASTLZ_VERSION
285
+ case FASTLZ: {
286
+ output->resize(size);
287
+ int destlen = fastlz_decompress(compressed.data(),
288
+ compressed.length(),
289
+ string_as_array(output),
290
+ size);
291
+ CHECK_EQ(destlen, size);
292
+ break;
293
+ }
294
+ #endif // FASTLZ_VERSION
295
+
296
+ case SNAPPY: {
297
+ snappy::RawUncompress(compressed.data(), compressed.size(),
298
+ string_as_array(output));
299
+ break;
300
+ }
301
+
302
+
303
+ default: {
304
+ return false; // the asked-for library wasn't compiled in
305
+ }
306
+ }
307
+ return true;
308
+ }
309
+
310
+ static void Measure(const char* data,
311
+ size_t length,
312
+ CompressorType comp,
313
+ int repeats,
314
+ int block_size) {
315
+ // Run tests a few time and pick median running times
316
+ static const int kRuns = 5;
317
+ double ctime[kRuns];
318
+ double utime[kRuns];
319
+ int compressed_size = 0;
320
+
321
+ {
322
+ // Chop the input into blocks
323
+ int num_blocks = (length + block_size - 1) / block_size;
324
+ vector<const char*> input(num_blocks);
325
+ vector<size_t> input_length(num_blocks);
326
+ vector<string> compressed(num_blocks);
327
+ vector<string> output(num_blocks);
328
+ for (int b = 0; b < num_blocks; b++) {
329
+ int input_start = b * block_size;
330
+ int input_limit = min<int>((b+1)*block_size, length);
331
+ input[b] = data+input_start;
332
+ input_length[b] = input_limit-input_start;
333
+
334
+ // Pre-grow the output buffer so we don't measure stupid string
335
+ // append time
336
+ compressed[b].resize(block_size * 2);
337
+ }
338
+
339
+ // First, try one trial compression to make sure the code is compiled in
340
+ if (!Compress(input[0], input_length[0], comp, &compressed[0])) {
341
+ LOG(WARNING) << "Skipping " << names[comp] << ": "
342
+ << "library not compiled in";
343
+ return;
344
+ }
345
+
346
+ for (int run = 0; run < kRuns; run++) {
347
+ CycleTimer ctimer, utimer;
348
+
349
+ ctimer.Start();
350
+ for (int b = 0; b < num_blocks; b++)
351
+ for (int i = 0; i < repeats; i++)
352
+ Compress(input[b], input_length[b], comp, &compressed[b]);
353
+ ctimer.Stop();
354
+
355
+ for (int b = 0; b < num_blocks; b++) {
356
+ output[b].resize(input_length[b]);
357
+ }
358
+
359
+ utimer.Start();
360
+ for (int i = 0; i < repeats; i++)
361
+ for (int b = 0; b < num_blocks; b++)
362
+ Uncompress(compressed[b], comp, input_length[b], &output[b]);
363
+ utimer.Stop();
364
+
365
+ ctime[run] = ctimer.Get();
366
+ utime[run] = utimer.Get();
367
+ }
368
+
369
+ compressed_size = 0;
370
+ for (int i = 0; i < compressed.size(); i++) {
371
+ compressed_size += compressed[i].size();
372
+ }
373
+ }
374
+
375
+ sort(ctime, ctime + kRuns);
376
+ sort(utime, utime + kRuns);
377
+ const int med = kRuns/2;
378
+
379
+ float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
380
+ float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
381
+ string x = names[comp];
382
+ x += ":";
383
+ string urate = (uncomp_rate >= 0)
384
+ ? StringPrintf("%.1f", uncomp_rate)
385
+ : string("?");
386
+ printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
387
+ "comp %5.1f MB/s uncomp %5s MB/s\n",
388
+ x.c_str(),
389
+ block_size/(1<<20),
390
+ static_cast<int>(length), static_cast<uint32>(compressed_size),
391
+ (compressed_size * 100.0) / max<int>(1, length),
392
+ comp_rate,
393
+ urate.c_str());
394
+ }
395
+
396
+
397
+ static int VerifyString(const string& input) {
398
+ string compressed;
399
+ DataEndingAtUnreadablePage i(input);
400
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
401
+ CHECK_EQ(written, compressed.size());
402
+ CHECK_LE(compressed.size(),
403
+ snappy::MaxCompressedLength(input.size()));
404
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
405
+
406
+ string uncompressed;
407
+ DataEndingAtUnreadablePage c(compressed);
408
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
409
+ CHECK_EQ(uncompressed, input);
410
+ return uncompressed.size();
411
+ }
412
+
413
+
414
+ // Test that data compressed by a compressor that does not
415
+ // obey block sizes is uncompressed properly.
416
+ static void VerifyNonBlockedCompression(const string& input) {
417
+ if (input.length() > snappy::kBlockSize) {
418
+ // We cannot test larger blocks than the maximum block size, obviously.
419
+ return;
420
+ }
421
+
422
+ string prefix;
423
+ Varint::Append32(&prefix, input.size());
424
+
425
+ // Setup compression table
426
+ snappy::internal::WorkingMemory wmem;
427
+ int table_size;
428
+ uint16* table = wmem.GetHashTable(input.size(), &table_size);
429
+
430
+ // Compress entire input in one shot
431
+ string compressed;
432
+ compressed += prefix;
433
+ compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
434
+ char* dest = string_as_array(&compressed) + prefix.size();
435
+ char* end = snappy::internal::CompressFragment(input.data(), input.size(),
436
+ dest, table, table_size);
437
+ compressed.resize(end - compressed.data());
438
+
439
+ // Uncompress into string
440
+ string uncomp_str;
441
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
442
+ CHECK_EQ(uncomp_str, input);
443
+
444
+ }
445
+
446
+ // Expand the input so that it is at least K times as big as block size
447
+ static string Expand(const string& input) {
448
+ static const int K = 3;
449
+ string data = input;
450
+ while (data.size() < K * snappy::kBlockSize) {
451
+ data += input;
452
+ }
453
+ return data;
454
+ }
455
+
456
+ static int Verify(const string& input) {
457
+ VLOG(1) << "Verifying input of size " << input.size();
458
+
459
+ // Compress using string based routines
460
+ const int result = VerifyString(input);
461
+
462
+
463
+ VerifyNonBlockedCompression(input);
464
+ if (!input.empty()) {
465
+ VerifyNonBlockedCompression(Expand(input));
466
+ }
467
+
468
+
469
+ return result;
470
+ }
471
+
472
+ // This test checks to ensure that snappy doesn't coredump if it gets
473
+ // corrupted data.
474
+
475
+ static bool IsValidCompressedBuffer(const string& c) {
476
+ return snappy::IsValidCompressedBuffer(c.data(), c.size());
477
+ }
478
+ static bool Uncompress(const string& c, string* u) {
479
+ return snappy::Uncompress(c.data(), c.size(), u);
480
+ }
481
+
482
+ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
483
+ string source = "making sure we don't crash with corrupted input";
484
+ VLOG(1) << source;
485
+ string dest;
486
+ TypeParam uncmp;
487
+ snappy::Compress(source.data(), source.size(), &dest);
488
+
489
+ // Mess around with the data. It's hard to simulate all possible
490
+ // corruptions; this is just one example ...
491
+ CHECK_GT(dest.size(), 3);
492
+ dest[1]--;
493
+ dest[3]++;
494
+ // this really ought to fail.
495
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
496
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
497
+
498
+ // This is testing for a security bug - a buffer that decompresses to 100k
499
+ // but we lie in the snappy header and only reserve 0 bytes of memory :)
500
+ source.resize(100000);
501
+ for (int i = 0; i < source.length(); ++i) {
502
+ source[i] = 'A';
503
+ }
504
+ snappy::Compress(source.data(), source.size(), &dest);
505
+ dest[0] = dest[1] = dest[2] = dest[3] = 0;
506
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
507
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
508
+
509
+ if (sizeof(void *) == 4) {
510
+ // Another security check; check a crazy big length can't DoS us with an
511
+ // over-allocation.
512
+ // Currently this is done only for 32-bit builds. On 64-bit builds,
513
+ // where 3GBytes might be an acceptable allocation size, Uncompress()
514
+ // attempts to decompress, and sometimes causes the test to run out of
515
+ // memory.
516
+ dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
517
+ // This decodes to a really large size, i.e., 3221225471 bytes
518
+ dest[4] = 'k';
519
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
520
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
521
+ dest[0] = dest[1] = dest[2] = 0xff;
522
+ dest[3] = 0x7f;
523
+ CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
524
+ CHECK(!Uncompress(TypeParam(dest), &uncmp));
525
+ } else {
526
+ LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
527
+ }
528
+
529
+ // try reading stuff in from a bad file.
530
+ for (int i = 1; i <= 3; ++i) {
531
+ string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str());
532
+ string uncmp;
533
+ // check that we don't return a crazy length
534
+ size_t ulen;
535
+ CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
536
+ || (ulen < (1<<20)));
537
+ uint32 ulen2;
538
+ snappy::ByteArraySource source(data.data(), data.size());
539
+ CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
540
+ (ulen2 < (1<<20)));
541
+ CHECK(!IsValidCompressedBuffer(TypeParam(data)));
542
+ CHECK(!Uncompress(TypeParam(data), &uncmp));
543
+ }
544
+ }
545
+
546
+ // Helper routines to construct arbitrary compressed strings.
547
+ // These mirror the compression code in snappy.cc, but are copied
548
+ // here so that we can bypass some limitations in the how snappy.cc
549
+ // invokes these routines.
550
+ static void AppendLiteral(string* dst, const string& literal) {
551
+ if (literal.empty()) return;
552
+ int n = literal.size() - 1;
553
+ if (n < 60) {
554
+ // Fit length in tag byte
555
+ dst->push_back(0 | (n << 2));
556
+ } else {
557
+ // Encode in upcoming bytes
558
+ char number[4];
559
+ int count = 0;
560
+ while (n > 0) {
561
+ number[count++] = n & 0xff;
562
+ n >>= 8;
563
+ }
564
+ dst->push_back(0 | ((59+count) << 2));
565
+ *dst += string(number, count);
566
+ }
567
+ *dst += literal;
568
+ }
569
+
570
+ static void AppendCopy(string* dst, int offset, int length) {
571
+ while (length > 0) {
572
+ // Figure out how much to copy in one shot
573
+ int to_copy;
574
+ if (length >= 68) {
575
+ to_copy = 64;
576
+ } else if (length > 64) {
577
+ to_copy = 60;
578
+ } else {
579
+ to_copy = length;
580
+ }
581
+ length -= to_copy;
582
+
583
+ if ((to_copy < 12) && (offset < 2048)) {
584
+ assert(to_copy-4 < 8); // Must fit in 3 bits
585
+ dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
586
+ dst->push_back(offset & 0xff);
587
+ } else if (offset < 65536) {
588
+ dst->push_back(2 | ((to_copy-1) << 2));
589
+ dst->push_back(offset & 0xff);
590
+ dst->push_back(offset >> 8);
591
+ } else {
592
+ dst->push_back(3 | ((to_copy-1) << 2));
593
+ dst->push_back(offset & 0xff);
594
+ dst->push_back((offset >> 8) & 0xff);
595
+ dst->push_back((offset >> 16) & 0xff);
596
+ dst->push_back((offset >> 24) & 0xff);
597
+ }
598
+ }
599
+ }
600
+
601
+ TEST(Snappy, SimpleTests) {
602
+ Verify("");
603
+ Verify("a");
604
+ Verify("ab");
605
+ Verify("abc");
606
+
607
+ Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
608
+ Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
609
+ Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
610
+ Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
611
+ Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
612
+ }
613
+
614
+ // Verify max blowup (lots of four-byte copies)
615
+ TEST(Snappy, MaxBlowup) {
616
+ string input;
617
+ for (int i = 0; i < 20000; i++) {
618
+ ACMRandom rnd(i);
619
+ uint32 bytes = static_cast<uint32>(rnd.Next());
620
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
621
+ }
622
+ for (int i = 19999; i >= 0; i--) {
623
+ ACMRandom rnd(i);
624
+ uint32 bytes = static_cast<uint32>(rnd.Next());
625
+ input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
626
+ }
627
+ Verify(input);
628
+ }
629
+
630
+ TEST(Snappy, RandomData) {
631
+ ACMRandom rnd(FLAGS_test_random_seed);
632
+
633
+ const int num_ops = 20000;
634
+ for (int i = 0; i < num_ops; i++) {
635
+ if ((i % 1000) == 0) {
636
+ VLOG(0) << "Random op " << i << " of " << num_ops;
637
+ }
638
+
639
+ string x;
640
+ int len = rnd.Uniform(4096);
641
+ if (i < 100) {
642
+ len = 65536 + rnd.Uniform(65536);
643
+ }
644
+ while (x.size() < len) {
645
+ int run_len = 1;
646
+ if (rnd.OneIn(10)) {
647
+ run_len = rnd.Skewed(8);
648
+ }
649
+ char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
650
+ while (run_len-- > 0 && x.size() < len) {
651
+ x += c;
652
+ }
653
+ }
654
+
655
+ Verify(x);
656
+ }
657
+ }
658
+
659
+ TEST(Snappy, FourByteOffset) {
660
+ // The new compressor cannot generate four-byte offsets since
661
+ // it chops up the input into 32KB pieces. So we hand-emit the
662
+ // copy manually.
663
+
664
+ // The two fragments that make up the input string
665
+ string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
666
+ string fragment2 = "some other string";
667
+
668
+ // How many times is each fragment emittedn
669
+ const int n1 = 2;
670
+ const int n2 = 100000 / fragment2.size();
671
+ const int length = n1 * fragment1.size() + n2 * fragment2.size();
672
+
673
+ string compressed;
674
+ Varint::Append32(&compressed, length);
675
+
676
+ AppendLiteral(&compressed, fragment1);
677
+ string src = fragment1;
678
+ for (int i = 0; i < n2; i++) {
679
+ AppendLiteral(&compressed, fragment2);
680
+ src += fragment2;
681
+ }
682
+ AppendCopy(&compressed, src.size(), fragment1.size());
683
+ src += fragment1;
684
+ CHECK_EQ(length, src.size());
685
+
686
+ string uncompressed;
687
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
688
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed));
689
+ CHECK_EQ(uncompressed, src);
690
+ }
691
+
692
+
693
+ static bool CheckUncompressedLength(const string& compressed,
694
+ size_t* ulength) {
695
+ const bool result1 = snappy::GetUncompressedLength(compressed.data(),
696
+ compressed.size(),
697
+ ulength);
698
+
699
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
700
+ uint32 length;
701
+ const bool result2 = snappy::GetUncompressedLength(&source, &length);
702
+ CHECK_EQ(result1, result2);
703
+ return result1;
704
+ }
705
+
706
+ TEST(SnappyCorruption, TruncatedVarint) {
707
+ string compressed, uncompressed;
708
+ size_t ulength;
709
+ compressed.push_back('\xf0');
710
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
711
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
712
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
713
+ &uncompressed));
714
+ }
715
+
716
+ TEST(SnappyCorruption, UnterminatedVarint) {
717
+ string compressed, uncompressed;
718
+ size_t ulength;
719
+ compressed.push_back(128);
720
+ compressed.push_back(128);
721
+ compressed.push_back(128);
722
+ compressed.push_back(128);
723
+ compressed.push_back(128);
724
+ compressed.push_back(10);
725
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
726
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
727
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
728
+ &uncompressed));
729
+ }
730
+
731
+ TEST(Snappy, ReadPastEndOfBuffer) {
732
+ // Check that we do not read past end of input
733
+
734
+ // Make a compressed string that ends with a single-byte literal
735
+ string compressed;
736
+ Varint::Append32(&compressed, 1);
737
+ AppendLiteral(&compressed, "x");
738
+
739
+ string uncompressed;
740
+ DataEndingAtUnreadablePage c(compressed);
741
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
742
+ CHECK_EQ(uncompressed, string("x"));
743
+ }
744
+
745
+ // Check for an infinite loop caused by a copy with offset==0
746
+ TEST(Snappy, ZeroOffsetCopy) {
747
+ const char* compressed = "\x40\x12\x00\x00";
748
+ // \x40 Length (must be > kMaxIncrementCopyOverflow)
749
+ // \x12\x00\x00 Copy with offset==0, length==5
750
+ char uncompressed[100];
751
+ EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
752
+ }
753
+
754
+ TEST(Snappy, ZeroOffsetCopyValidation) {
755
+ const char* compressed = "\x05\x12\x00\x00";
756
+ // \x05 Length
757
+ // \x12\x00\x00 Copy with offset==0, length==5
758
+ EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
759
+ }
760
+
761
+
762
+ namespace {
763
+
764
+ int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
765
+ return snappy::internal::FindMatchLength(s1, s2, s2 + length);
766
+ }
767
+
768
+ } // namespace
769
+
770
+ TEST(Snappy, FindMatchLength) {
771
+ // Exercise all different code paths through the function.
772
+ // 64-bit version:
773
+
774
+ // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
775
+ EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
776
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
777
+
778
+ // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
779
+ EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
780
+
781
+ // Same, but edge cases.
782
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
783
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
784
+
785
+ // Find non-match at once in first loop.
786
+ EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
787
+ EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
788
+ EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
789
+ EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
790
+
791
+ // Find non-match in first loop after one block.
792
+ EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
793
+ "abcdefgh?1234567xxxxxxxx", 24));
794
+ EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
795
+ "abcdefgh0?234567xxxxxxxx", 24));
796
+ EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
797
+ "abcdefgh01237654xxxxxxxx", 24));
798
+ EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
799
+ "abcdefgh0123456?xxxxxxxx", 24));
800
+
801
+ // 32-bit version:
802
+
803
+ // Short matches.
804
+ EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
805
+ EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
806
+ EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
807
+ EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
808
+ EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
809
+ EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
810
+ EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
811
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
812
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
813
+ EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
814
+
815
+ // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
816
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
817
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
818
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
819
+
820
+ // Same, but edge cases.
821
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
822
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
823
+
824
+ // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
825
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
826
+
827
+ // Find non-match at once in first loop.
828
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
829
+ "xxxxxx?123xxxxxxxx", 18));
830
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
831
+ "xxxxxx0?23xxxxxxxx", 18));
832
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
833
+ "xxxxxx0132xxxxxxxx", 18));
834
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
835
+ "xxxxxx012?xxxxxxxx", 18));
836
+
837
+ // Same, but edge cases.
838
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
839
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
840
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
841
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
842
+
843
+ // Find non-match in first loop after one block.
844
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
845
+ "xxxxxxabcd?123xx", 16));
846
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
847
+ "xxxxxxabcd0?23xx", 16));
848
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
849
+ "xxxxxxabcd0132xx", 16));
850
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
851
+ "xxxxxxabcd012?xx", 16));
852
+
853
+ // Same, but edge cases.
854
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
855
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
856
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
857
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
858
+ }
859
+
860
+ TEST(Snappy, FindMatchLengthRandom) {
861
+ const int kNumTrials = 10000;
862
+ const int kTypicalLength = 10;
863
+ ACMRandom rnd(FLAGS_test_random_seed);
864
+
865
+ for (int i = 0; i < kNumTrials; i++) {
866
+ string s, t;
867
+ char a = rnd.Rand8();
868
+ char b = rnd.Rand8();
869
+ while (!rnd.OneIn(kTypicalLength)) {
870
+ s.push_back(rnd.OneIn(2) ? a : b);
871
+ t.push_back(rnd.OneIn(2) ? a : b);
872
+ }
873
+ DataEndingAtUnreadablePage u(s);
874
+ DataEndingAtUnreadablePage v(t);
875
+ int matched = snappy::internal::FindMatchLength(
876
+ u.data(), v.data(), v.data() + t.size());
877
+ if (matched == t.size()) {
878
+ EXPECT_EQ(s, t);
879
+ } else {
880
+ EXPECT_NE(s[matched], t[matched]);
881
+ for (int j = 0; j < matched; j++) {
882
+ EXPECT_EQ(s[j], t[j]);
883
+ }
884
+ }
885
+ }
886
+ }
887
+
888
+
889
+ static void CompressFile(const char* fname) {
890
+ string fullinput;
891
+ File::ReadFileToStringOrDie(fname, &fullinput);
892
+
893
+ string compressed;
894
+ compressed.resize(fullinput.size() * 2);
895
+ Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed);
896
+
897
+ File::WriteStringToFileOrDie(compressed,
898
+ string(fname).append(".comp").c_str());
899
+ }
900
+
901
+ static void UncompressFile(const char* fname) {
902
+ string fullinput;
903
+ File::ReadFileToStringOrDie(fname, &fullinput);
904
+
905
+ size_t uncompLength;
906
+ CHECK(CheckUncompressedLength(fullinput, &uncompLength));
907
+
908
+ string uncompressed;
909
+ uncompressed.resize(uncompLength);
910
+ CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
911
+
912
+ File::WriteStringToFileOrDie(uncompressed,
913
+ string(fname).append(".uncomp").c_str());
914
+ }
915
+
916
+ static void MeasureFile(const char* fname) {
917
+ string fullinput;
918
+ File::ReadFileToStringOrDie(fname, &fullinput);
919
+ printf("%-40s :\n", fname);
920
+
921
+ int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
922
+ int end_len = fullinput.size();
923
+ if (FLAGS_end_len >= 0) {
924
+ end_len = min<int>(fullinput.size(), FLAGS_end_len);
925
+ }
926
+ for (int len = start_len; len <= end_len; len++) {
927
+ const char* const input = fullinput.data();
928
+ int repeats = (FLAGS_bytes + len) / (len + 1);
929
+ if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
930
+ if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
931
+ if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
932
+ if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
933
+ if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
934
+ if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
935
+
936
+ // For block-size based measurements
937
+ if (0 && FLAGS_snappy) {
938
+ Measure(input, len, SNAPPY, repeats, 8<<10);
939
+ Measure(input, len, SNAPPY, repeats, 16<<10);
940
+ Measure(input, len, SNAPPY, repeats, 32<<10);
941
+ Measure(input, len, SNAPPY, repeats, 64<<10);
942
+ Measure(input, len, SNAPPY, repeats, 256<<10);
943
+ Measure(input, len, SNAPPY, repeats, 1024<<10);
944
+ }
945
+ }
946
+ }
947
+
948
+ static struct {
949
+ const char* label;
950
+ const char* filename;
951
+ } files[] = {
952
+ { "html", "html" },
953
+ { "urls", "urls.10K" },
954
+ { "jpg", "house.jpg" },
955
+ { "pdf", "mapreduce-osdi-1.pdf" },
956
+ { "html4", "html_x_4" },
957
+ { "cp", "cp.html" },
958
+ { "c", "fields.c" },
959
+ { "lsp", "grammar.lsp" },
960
+ { "xls", "kennedy.xls" },
961
+ { "txt1", "alice29.txt" },
962
+ { "txt2", "asyoulik.txt" },
963
+ { "txt3", "lcet10.txt" },
964
+ { "txt4", "plrabn12.txt" },
965
+ { "bin", "ptt5" },
966
+ { "sum", "sum" },
967
+ { "man", "xargs.1" },
968
+ { "pb", "geo.protodata" },
969
+ { "gaviota", "kppkn.gtb" },
970
+ };
971
+
972
+ static void BM_UFlat(int iters, int arg) {
973
+ StopBenchmarkTiming();
974
+
975
+ // Pick file to process based on "arg"
976
+ CHECK_GE(arg, 0);
977
+ CHECK_LT(arg, ARRAYSIZE(files));
978
+ string contents = ReadTestDataFile(files[arg].filename);
979
+
980
+ string zcontents;
981
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
982
+ char* dst = new char[contents.size()];
983
+
984
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
985
+ static_cast<int64>(contents.size()));
986
+ SetBenchmarkLabel(files[arg].label);
987
+ StartBenchmarkTiming();
988
+ while (iters-- > 0) {
989
+ CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
990
+ }
991
+ StopBenchmarkTiming();
992
+
993
+ delete[] dst;
994
+ }
995
+ BENCHMARK(BM_UFlat)->DenseRange(0, 17);
996
+
997
+ static void BM_UValidate(int iters, int arg) {
998
+ StopBenchmarkTiming();
999
+
1000
+ // Pick file to process based on "arg"
1001
+ CHECK_GE(arg, 0);
1002
+ CHECK_LT(arg, ARRAYSIZE(files));
1003
+ string contents = ReadTestDataFile(files[arg].filename);
1004
+
1005
+ string zcontents;
1006
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1007
+
1008
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1009
+ static_cast<int64>(contents.size()));
1010
+ SetBenchmarkLabel(files[arg].label);
1011
+ StartBenchmarkTiming();
1012
+ while (iters-- > 0) {
1013
+ CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1014
+ }
1015
+ StopBenchmarkTiming();
1016
+ }
1017
+ BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1018
+
1019
+
1020
+ static void BM_ZFlat(int iters, int arg) {
1021
+ StopBenchmarkTiming();
1022
+
1023
+ // Pick file to process based on "arg"
1024
+ CHECK_GE(arg, 0);
1025
+ CHECK_LT(arg, ARRAYSIZE(files));
1026
+ string contents = ReadTestDataFile(files[arg].filename);
1027
+
1028
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1029
+
1030
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1031
+ static_cast<int64>(contents.size()));
1032
+ StartBenchmarkTiming();
1033
+
1034
+ size_t zsize = 0;
1035
+ while (iters-- > 0) {
1036
+ snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1037
+ }
1038
+ StopBenchmarkTiming();
1039
+ const double compression_ratio =
1040
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1041
+ SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1042
+ files[arg].label, 100.0 * compression_ratio));
1043
+ VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1044
+ files[arg].label, contents.size(), zsize);
1045
+ delete[] dst;
1046
+ }
1047
+ BENCHMARK(BM_ZFlat)->DenseRange(0, 17);
1048
+
1049
+
1050
+ } // namespace snappy
1051
+
1052
+
1053
+ int main(int argc, char** argv) {
1054
+ InitGoogle(argv[0], &argc, &argv, true);
1055
+ File::Init();
1056
+ RunSpecifiedBenchmarks();
1057
+
1058
+
1059
+ if (argc >= 2) {
1060
+ for (int arg = 1; arg < argc; arg++) {
1061
+ if (FLAGS_write_compressed) {
1062
+ CompressFile(argv[arg]);
1063
+ } else if (FLAGS_write_uncompressed) {
1064
+ UncompressFile(argv[arg]);
1065
+ } else {
1066
+ MeasureFile(argv[arg]);
1067
+ }
1068
+ }
1069
+ return 0;
1070
+ }
1071
+
1072
+ return RUN_ALL_TESTS();
1073
+ }