snappy 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,471 @@
1
+ // Copyright 2020 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include <algorithm>
30
+ #include <cmath>
31
+ #include <cstdint>
32
+ #include <cstdlib>
33
+ #include <random>
34
+ #include <string>
35
+ #include <utility>
36
+ #include <vector>
37
+
38
+ #include "snappy-test.h"
39
+
40
+ #include "snappy-internal.h"
41
+ #include "snappy-sinksource.h"
42
+ #include "snappy.h"
43
+ #include "snappy_test_data.h"
44
+
45
+ SNAPPY_FLAG(int32_t, start_len, -1,
46
+ "Starting prefix size for testing (-1: just full file contents)");
47
+ SNAPPY_FLAG(int32_t, end_len, -1,
48
+ "Starting prefix size for testing (-1: just full file contents)");
49
+ SNAPPY_FLAG(int32_t, bytes, 10485760,
50
+ "How many bytes to compress/uncompress per file for timing");
51
+
52
+ SNAPPY_FLAG(bool, zlib, true,
53
+ "Run zlib compression (http://www.zlib.net)");
54
+ SNAPPY_FLAG(bool, lzo, true,
55
+ "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
56
+ SNAPPY_FLAG(bool, lz4, true,
57
+ "Run LZ4 compression (https://github.com/lz4/lz4)");
58
+ SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
59
+
60
+ SNAPPY_FLAG(bool, write_compressed, false,
61
+ "Write compressed versions of each file to <file>.comp");
62
+ SNAPPY_FLAG(bool, write_uncompressed, false,
63
+ "Write uncompressed versions of each file to <file>.uncomp");
64
+
65
+ namespace snappy {
66
+
67
+ namespace {
68
+
69
+ #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
70
+
71
+ // To test against code that reads beyond its input, this class copies a
72
+ // string to a newly allocated group of pages, the last of which
73
+ // is made unreadable via mprotect. Note that we need to allocate the
74
+ // memory with mmap(), as POSIX allows mprotect() only on memory allocated
75
+ // with mmap(), and some malloc/posix_memalign implementations expect to
76
+ // be able to read previously allocated memory while doing heap allocations.
77
+ class DataEndingAtUnreadablePage {
78
+ public:
79
+ explicit DataEndingAtUnreadablePage(const std::string& s) {
80
+ const size_t page_size = sysconf(_SC_PAGESIZE);
81
+ const size_t size = s.size();
82
+ // Round up space for string to a multiple of page_size.
83
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
84
+ alloc_size_ = space_for_string + page_size;
85
+ mem_ = mmap(NULL, alloc_size_,
86
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
87
+ CHECK_NE(MAP_FAILED, mem_);
88
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
89
+ char* dst = protected_page_ - size;
90
+ std::memcpy(dst, s.data(), size);
91
+ data_ = dst;
92
+ size_ = size;
93
+ // Make guard page unreadable.
94
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
95
+ }
96
+
97
+ ~DataEndingAtUnreadablePage() {
98
+ const size_t page_size = sysconf(_SC_PAGESIZE);
99
+ // Undo the mprotect.
100
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
101
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
102
+ }
103
+
104
+ const char* data() const { return data_; }
105
+ size_t size() const { return size_; }
106
+
107
+ private:
108
+ size_t alloc_size_;
109
+ void* mem_;
110
+ char* protected_page_;
111
+ const char* data_;
112
+ size_t size_;
113
+ };
114
+
115
+ #else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
116
+
117
+ // Fallback for systems without mmap.
118
+ using DataEndingAtUnreadablePage = std::string;
119
+
120
+ #endif
121
+
122
+ enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
123
+
124
+ const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
125
+
126
+ size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
127
+ switch (comp) {
128
+ #ifdef ZLIB_VERSION
129
+ case ZLIB:
130
+ return ZLib::MinCompressbufSize(input_size);
131
+ #endif // ZLIB_VERSION
132
+
133
+ #ifdef LZO_VERSION
134
+ case LZO:
135
+ return input_size + input_size/64 + 16 + 3;
136
+ #endif // LZO_VERSION
137
+
138
+ #ifdef LZ4_VERSION_NUMBER
139
+ case LZ4:
140
+ return LZ4_compressBound(input_size);
141
+ #endif // LZ4_VERSION_NUMBER
142
+
143
+ case SNAPPY:
144
+ return snappy::MaxCompressedLength(input_size);
145
+
146
+ default:
147
+ LOG(FATAL) << "Unknown compression type number " << comp;
148
+ return 0;
149
+ }
150
+ }
151
+
152
+ // Returns true if we successfully compressed, false otherwise.
153
+ //
154
+ // If compressed_is_preallocated is set, do not resize the compressed buffer.
155
+ // This is typically what you want for a benchmark, in order to not spend
156
+ // time in the memory allocator. If you do set this flag, however,
157
+ // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
158
+ // number of bytes, and may contain junk bytes at the end after return.
159
+ bool Compress(const char* input, size_t input_size, CompressorType comp,
160
+ std::string* compressed, bool compressed_is_preallocated) {
161
+ if (!compressed_is_preallocated) {
162
+ compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
163
+ }
164
+
165
+ switch (comp) {
166
+ #ifdef ZLIB_VERSION
167
+ case ZLIB: {
168
+ ZLib zlib;
169
+ uLongf destlen = compressed->size();
170
+ int ret = zlib.Compress(
171
+ reinterpret_cast<Bytef*>(string_as_array(compressed)),
172
+ &destlen,
173
+ reinterpret_cast<const Bytef*>(input),
174
+ input_size);
175
+ CHECK_EQ(Z_OK, ret);
176
+ if (!compressed_is_preallocated) {
177
+ compressed->resize(destlen);
178
+ }
179
+ return true;
180
+ }
181
+ #endif // ZLIB_VERSION
182
+
183
+ #ifdef LZO_VERSION
184
+ case LZO: {
185
+ unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
186
+ lzo_uint destlen;
187
+ int ret = lzo1x_1_15_compress(
188
+ reinterpret_cast<const uint8_t*>(input),
189
+ input_size,
190
+ reinterpret_cast<uint8_t*>(string_as_array(compressed)),
191
+ &destlen,
192
+ mem);
193
+ CHECK_EQ(LZO_E_OK, ret);
194
+ delete[] mem;
195
+ if (!compressed_is_preallocated) {
196
+ compressed->resize(destlen);
197
+ }
198
+ break;
199
+ }
200
+ #endif // LZO_VERSION
201
+
202
+ #ifdef LZ4_VERSION_NUMBER
203
+ case LZ4: {
204
+ int destlen = compressed->size();
205
+ destlen = LZ4_compress_default(input, string_as_array(compressed),
206
+ input_size, destlen);
207
+ CHECK_NE(destlen, 0);
208
+ if (!compressed_is_preallocated) {
209
+ compressed->resize(destlen);
210
+ }
211
+ break;
212
+ }
213
+ #endif // LZ4_VERSION_NUMBER
214
+
215
+ case SNAPPY: {
216
+ size_t destlen;
217
+ snappy::RawCompress(input, input_size,
218
+ string_as_array(compressed),
219
+ &destlen);
220
+ CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
221
+ if (!compressed_is_preallocated) {
222
+ compressed->resize(destlen);
223
+ }
224
+ break;
225
+ }
226
+
227
+ default: {
228
+ return false; // the asked-for library wasn't compiled in
229
+ }
230
+ }
231
+ return true;
232
+ }
233
+
234
+ bool Uncompress(const std::string& compressed, CompressorType comp, int size,
235
+ std::string* output) {
236
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
237
+ (void)size;
238
+ switch (comp) {
239
+ #ifdef ZLIB_VERSION
240
+ case ZLIB: {
241
+ output->resize(size);
242
+ ZLib zlib;
243
+ uLongf destlen = output->size();
244
+ int ret = zlib.Uncompress(
245
+ reinterpret_cast<Bytef*>(string_as_array(output)),
246
+ &destlen,
247
+ reinterpret_cast<const Bytef*>(compressed.data()),
248
+ compressed.size());
249
+ CHECK_EQ(Z_OK, ret);
250
+ CHECK_EQ(static_cast<uLongf>(size), destlen);
251
+ break;
252
+ }
253
+ #endif // ZLIB_VERSION
254
+
255
+ #ifdef LZO_VERSION
256
+ case LZO: {
257
+ output->resize(size);
258
+ lzo_uint destlen;
259
+ int ret = lzo1x_decompress(
260
+ reinterpret_cast<const uint8_t*>(compressed.data()),
261
+ compressed.size(),
262
+ reinterpret_cast<uint8_t*>(string_as_array(output)),
263
+ &destlen,
264
+ NULL);
265
+ CHECK_EQ(LZO_E_OK, ret);
266
+ CHECK_EQ(static_cast<lzo_uint>(size), destlen);
267
+ break;
268
+ }
269
+ #endif // LZO_VERSION
270
+
271
+ #ifdef LZ4_VERSION_NUMBER
272
+ case LZ4: {
273
+ output->resize(size);
274
+ int destlen = output->size();
275
+ destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
276
+ compressed.size(), destlen);
277
+ CHECK_NE(destlen, 0);
278
+ CHECK_EQ(size, destlen);
279
+ break;
280
+ }
281
+ #endif // LZ4_VERSION_NUMBER
282
+ case SNAPPY: {
283
+ snappy::RawUncompress(compressed.data(), compressed.size(),
284
+ string_as_array(output));
285
+ break;
286
+ }
287
+
288
+ default: {
289
+ return false; // the asked-for library wasn't compiled in
290
+ }
291
+ }
292
+ return true;
293
+ }
294
+
295
+ void Measure(const char* data, size_t length, CompressorType comp, int repeats,
296
+ int block_size) {
297
+ // Run tests a few time and pick median running times
298
+ static const int kRuns = 5;
299
+ double ctime[kRuns];
300
+ double utime[kRuns];
301
+ int compressed_size = 0;
302
+
303
+ {
304
+ // Chop the input into blocks
305
+ int num_blocks = (length + block_size - 1) / block_size;
306
+ std::vector<const char*> input(num_blocks);
307
+ std::vector<size_t> input_length(num_blocks);
308
+ std::vector<std::string> compressed(num_blocks);
309
+ std::vector<std::string> output(num_blocks);
310
+ for (int b = 0; b < num_blocks; ++b) {
311
+ int input_start = b * block_size;
312
+ int input_limit = std::min<int>((b+1)*block_size, length);
313
+ input[b] = data+input_start;
314
+ input_length[b] = input_limit-input_start;
315
+ }
316
+
317
+ // Pre-grow the output buffers so we don't measure string append time.
318
+ for (std::string& compressed_block : compressed) {
319
+ compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
320
+ }
321
+
322
+ // First, try one trial compression to make sure the code is compiled in
323
+ if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
324
+ LOG(WARNING) << "Skipping " << names[comp] << ": "
325
+ << "library not compiled in";
326
+ return;
327
+ }
328
+
329
+ for (int run = 0; run < kRuns; ++run) {
330
+ CycleTimer ctimer, utimer;
331
+
332
+ // Pre-grow the output buffers so we don't measure string append time.
333
+ for (std::string& compressed_block : compressed) {
334
+ compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
335
+ }
336
+
337
+ ctimer.Start();
338
+ for (int b = 0; b < num_blocks; ++b) {
339
+ for (int i = 0; i < repeats; ++i)
340
+ Compress(input[b], input_length[b], comp, &compressed[b], true);
341
+ }
342
+ ctimer.Stop();
343
+
344
+ // Compress once more, with resizing, so we don't leave junk
345
+ // at the end that will confuse the decompressor.
346
+ for (int b = 0; b < num_blocks; ++b) {
347
+ Compress(input[b], input_length[b], comp, &compressed[b], false);
348
+ }
349
+
350
+ for (int b = 0; b < num_blocks; ++b) {
351
+ output[b].resize(input_length[b]);
352
+ }
353
+
354
+ utimer.Start();
355
+ for (int i = 0; i < repeats; ++i) {
356
+ for (int b = 0; b < num_blocks; ++b)
357
+ Uncompress(compressed[b], comp, input_length[b], &output[b]);
358
+ }
359
+ utimer.Stop();
360
+
361
+ ctime[run] = ctimer.Get();
362
+ utime[run] = utimer.Get();
363
+ }
364
+
365
+ compressed_size = 0;
366
+ for (const std::string& compressed_item : compressed) {
367
+ compressed_size += compressed_item.size();
368
+ }
369
+ }
370
+
371
+ std::sort(ctime, ctime + kRuns);
372
+ std::sort(utime, utime + kRuns);
373
+ const int med = kRuns/2;
374
+
375
+ float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
376
+ float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
377
+ std::string x = names[comp];
378
+ x += ":";
379
+ std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
380
+ : std::string("?");
381
+ std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
382
+ "comp %5.1f MB/s uncomp %5s MB/s\n",
383
+ x.c_str(),
384
+ block_size/(1<<20),
385
+ static_cast<int>(length), static_cast<uint32_t>(compressed_size),
386
+ (compressed_size * 100.0) / std::max<int>(1, length),
387
+ comp_rate,
388
+ urate.c_str());
389
+ }
390
+
391
+ void CompressFile(const char* fname) {
392
+ std::string fullinput;
393
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
394
+
395
+ std::string compressed;
396
+ Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
397
+
398
+ CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
399
+ file::Defaults()));
400
+ }
401
+
402
+ void UncompressFile(const char* fname) {
403
+ std::string fullinput;
404
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
405
+
406
+ size_t uncompLength;
407
+ CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
408
+ &uncompLength));
409
+
410
+ std::string uncompressed;
411
+ uncompressed.resize(uncompLength);
412
+ CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
413
+
414
+ CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
415
+ file::Defaults()));
416
+ }
417
+
418
+ void MeasureFile(const char* fname) {
419
+ std::string fullinput;
420
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
421
+ std::printf("%-40s :\n", fname);
422
+
423
+ int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
424
+ ? fullinput.size()
425
+ : snappy::GetFlag(FLAGS_start_len);
426
+ int end_len = fullinput.size();
427
+ if (snappy::GetFlag(FLAGS_end_len) >= 0) {
428
+ end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
429
+ }
430
+ for (int len = start_len; len <= end_len; ++len) {
431
+ const char* const input = fullinput.data();
432
+ int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
433
+ if (snappy::GetFlag(FLAGS_zlib))
434
+ Measure(input, len, ZLIB, repeats, 1024 << 10);
435
+ if (snappy::GetFlag(FLAGS_lzo))
436
+ Measure(input, len, LZO, repeats, 1024 << 10);
437
+ if (snappy::GetFlag(FLAGS_lz4))
438
+ Measure(input, len, LZ4, repeats, 1024 << 10);
439
+ if (snappy::GetFlag(FLAGS_snappy))
440
+ Measure(input, len, SNAPPY, repeats, 4096 << 10);
441
+
442
+ // For block-size based measurements
443
+ if (0 && snappy::GetFlag(FLAGS_snappy)) {
444
+ Measure(input, len, SNAPPY, repeats, 8<<10);
445
+ Measure(input, len, SNAPPY, repeats, 16<<10);
446
+ Measure(input, len, SNAPPY, repeats, 32<<10);
447
+ Measure(input, len, SNAPPY, repeats, 64<<10);
448
+ Measure(input, len, SNAPPY, repeats, 256<<10);
449
+ Measure(input, len, SNAPPY, repeats, 1024<<10);
450
+ }
451
+ }
452
+ }
453
+
454
+ } // namespace
455
+
456
+ } // namespace snappy
457
+
458
+ int main(int argc, char** argv) {
459
+ InitGoogle(argv[0], &argc, &argv, true);
460
+
461
+ for (int arg = 1; arg < argc; ++arg) {
462
+ if (snappy::GetFlag(FLAGS_write_compressed)) {
463
+ snappy::CompressFile(argv[arg]);
464
+ } else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
465
+ snappy::UncompressFile(argv[arg]);
466
+ } else {
467
+ snappy::MeasureFile(argv[arg]);
468
+ }
469
+ }
470
+ return 0;
471
+ }
@@ -28,9 +28,10 @@
28
28
  //
29
29
  // libFuzzer harness for fuzzing snappy's decompression code.
30
30
 
31
+ #include <stddef.h>
32
+ #include <stdint.h>
33
+
31
34
  #include <cassert>
32
- #include <cstddef>
33
- #include <cstdint>
34
35
  #include <string>
35
36
 
36
37
  #include "snappy.h"