snappy 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +3 -4
- data/Rakefile +32 -30
- data/ext/api.c +6 -1
- data/lib/snappy.rb +5 -5
- data/lib/snappy/hadoop/reader.rb +6 -2
- data/lib/snappy/reader.rb +11 -7
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/snappy.gemspec +13 -9
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/vendor/snappy/CMakeLists.txt +177 -54
- data/vendor/snappy/NEWS +8 -0
- data/vendor/snappy/README.md +19 -20
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +6 -6
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +12 -5
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +60 -15
- data/vendor/snappy/snappy-stubs-public.h.in +16 -36
- data/vendor/snappy/snappy-test.cc +16 -15
- data/vendor/snappy/snappy-test.h +12 -60
- data/vendor/snappy/snappy.cc +333 -187
- data/vendor/snappy/snappy.h +14 -10
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +220 -124
- metadata +26 -20
- data/.travis.yml +0 -31
- data/smoke.sh +0 -8
- data/test/hadoop/test-snappy-hadoop-reader.rb +0 -103
- data/test/hadoop/test-snappy-hadoop-writer.rb +0 -48
- data/test/test-snappy-hadoop.rb +0 -22
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/cmake/SnappyConfig.cmake +0 -1
data/vendor/snappy/snappy.h
CHANGED
@@ -39,7 +39,7 @@
|
|
39
39
|
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
|
40
40
|
#define THIRD_PARTY_SNAPPY_SNAPPY_H__
|
41
41
|
|
42
|
-
#include <
|
42
|
+
#include <cstddef>
|
43
43
|
#include <string>
|
44
44
|
|
45
45
|
#include "snappy-stubs-public.h"
|
@@ -69,11 +69,12 @@ namespace snappy {
|
|
69
69
|
// Higher-level string based routines (should be sufficient for most users)
|
70
70
|
// ------------------------------------------------------------------------
|
71
71
|
|
72
|
-
// Sets "*
|
73
|
-
// Original contents of *
|
72
|
+
// Sets "*compressed" to the compressed version of "input[0,input_length-1]".
|
73
|
+
// Original contents of *compressed are lost.
|
74
74
|
//
|
75
|
-
// REQUIRES: "input[]" is not an alias of "*
|
76
|
-
size_t Compress(const char* input, size_t input_length,
|
75
|
+
// REQUIRES: "input[]" is not an alias of "*compressed".
|
76
|
+
size_t Compress(const char* input, size_t input_length,
|
77
|
+
std::string* compressed);
|
77
78
|
|
78
79
|
// Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
|
79
80
|
// Original contents of "*uncompressed" are lost.
|
@@ -82,7 +83,7 @@ namespace snappy {
|
|
82
83
|
//
|
83
84
|
// returns false if the message is corrupted and could not be decompressed
|
84
85
|
bool Uncompress(const char* compressed, size_t compressed_length,
|
85
|
-
string* uncompressed);
|
86
|
+
std::string* uncompressed);
|
86
87
|
|
87
88
|
// Decompresses "compressed" to "*uncompressed".
|
88
89
|
//
|
@@ -193,11 +194,14 @@ namespace snappy {
|
|
193
194
|
// Note that there might be older data around that is compressed with larger
|
194
195
|
// block sizes, so the decompression code should not rely on the
|
195
196
|
// non-existence of long backreferences.
|
196
|
-
static
|
197
|
-
static
|
197
|
+
static constexpr int kBlockLog = 16;
|
198
|
+
static constexpr size_t kBlockSize = 1 << kBlockLog;
|
198
199
|
|
199
|
-
static
|
200
|
-
static
|
200
|
+
static constexpr int kMinHashTableBits = 8;
|
201
|
+
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
|
202
|
+
|
203
|
+
static constexpr int kMaxHashTableBits = 14;
|
204
|
+
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
|
201
205
|
} // end namespace snappy
|
202
206
|
|
203
207
|
#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
|
@@ -0,0 +1,59 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy compression code.
|
30
|
+
|
31
|
+
#include <cassert>
|
32
|
+
#include <cstddef>
|
33
|
+
#include <cstdint>
|
34
|
+
#include <string>
|
35
|
+
|
36
|
+
#include "snappy.h"
|
37
|
+
|
38
|
+
// Entry point for LibFuzzer.
|
39
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
40
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
41
|
+
|
42
|
+
std::string compressed;
|
43
|
+
size_t compressed_size =
|
44
|
+
snappy::Compress(input.data(), input.size(), &compressed);
|
45
|
+
|
46
|
+
(void)compressed_size; // Variable only used in debug builds.
|
47
|
+
assert(compressed_size == compressed.size());
|
48
|
+
assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
|
49
|
+
assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
50
|
+
|
51
|
+
std::string uncompressed_after_compress;
|
52
|
+
bool uncompress_succeeded = snappy::Uncompress(
|
53
|
+
compressed.data(), compressed.size(), &uncompressed_after_compress);
|
54
|
+
|
55
|
+
(void)uncompress_succeeded; // Variable only used in debug builds.
|
56
|
+
assert(uncompress_succeeded);
|
57
|
+
assert(input == uncompressed_after_compress);
|
58
|
+
return 0;
|
59
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy's decompression code.
|
30
|
+
|
31
|
+
#include <cassert>
|
32
|
+
#include <cstddef>
|
33
|
+
#include <cstdint>
|
34
|
+
#include <string>
|
35
|
+
|
36
|
+
#include "snappy.h"
|
37
|
+
|
38
|
+
// Entry point for LibFuzzer.
|
39
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
40
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
41
|
+
|
42
|
+
// Avoid self-crafted decompression bombs.
|
43
|
+
size_t uncompressed_size;
|
44
|
+
constexpr size_t kMaxUncompressedSize = 1 << 20;
|
45
|
+
bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
|
46
|
+
input.data(), input.size(), &uncompressed_size);
|
47
|
+
if (!get_uncompressed_length_succeeded ||
|
48
|
+
(uncompressed_size > kMaxUncompressedSize)) {
|
49
|
+
return 0;
|
50
|
+
}
|
51
|
+
|
52
|
+
std::string uncompressed;
|
53
|
+
// The return value of snappy::Uncompress() is ignored because decompression
|
54
|
+
// will fail on invalid inputs.
|
55
|
+
snappy::Uncompress(input.data(), input.size(), &uncompressed);
|
56
|
+
return 0;
|
57
|
+
}
|
@@ -29,8 +29,8 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdlib.h>
|
31
31
|
|
32
|
-
|
33
32
|
#include <algorithm>
|
33
|
+
#include <random>
|
34
34
|
#include <string>
|
35
35
|
#include <utility>
|
36
36
|
#include <vector>
|
@@ -73,7 +73,7 @@ namespace snappy {
|
|
73
73
|
// be able to read previously allocated memory while doing heap allocations.
|
74
74
|
class DataEndingAtUnreadablePage {
|
75
75
|
public:
|
76
|
-
explicit DataEndingAtUnreadablePage(const string& s) {
|
76
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
77
77
|
const size_t page_size = sysconf(_SC_PAGESIZE);
|
78
78
|
const size_t size = s.size();
|
79
79
|
// Round up space for string to a multiple of page_size.
|
@@ -112,7 +112,7 @@ class DataEndingAtUnreadablePage {
|
|
112
112
|
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
113
113
|
|
114
114
|
// Fallback for systems without mmap.
|
115
|
-
|
115
|
+
using DataEndingAtUnreadablePage = std::string;
|
116
116
|
|
117
117
|
#endif
|
118
118
|
|
@@ -154,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
154
154
|
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
155
155
|
// number of bytes, and may contain junk bytes at the end after return.
|
156
156
|
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
157
|
-
string* compressed, bool compressed_is_preallocated) {
|
157
|
+
std::string* compressed, bool compressed_is_preallocated) {
|
158
158
|
if (!compressed_is_preallocated) {
|
159
159
|
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
160
160
|
}
|
@@ -215,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
215
215
|
return true;
|
216
216
|
}
|
217
217
|
|
218
|
-
static bool Uncompress(const string& compressed, CompressorType comp,
|
219
|
-
int size, string* output) {
|
218
|
+
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
+
int size, std::string* output) {
|
220
220
|
switch (comp) {
|
221
221
|
#ifdef ZLIB_VERSION
|
222
222
|
case ZLIB: {
|
@@ -279,8 +279,8 @@ static void Measure(const char* data,
|
|
279
279
|
int num_blocks = (length + block_size - 1) / block_size;
|
280
280
|
std::vector<const char*> input(num_blocks);
|
281
281
|
std::vector<size_t> input_length(num_blocks);
|
282
|
-
std::vector<string> compressed(num_blocks);
|
283
|
-
std::vector<string> output(num_blocks);
|
282
|
+
std::vector<std::string> compressed(num_blocks);
|
283
|
+
std::vector<std::string> output(num_blocks);
|
284
284
|
for (int b = 0; b < num_blocks; b++) {
|
285
285
|
int input_start = b * block_size;
|
286
286
|
int input_limit = std::min<int>((b+1)*block_size, length);
|
@@ -344,11 +344,10 @@ static void Measure(const char* data,
|
|
344
344
|
|
345
345
|
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
346
346
|
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
347
|
-
string x = names[comp];
|
347
|
+
std::string x = names[comp];
|
348
348
|
x += ":";
|
349
|
-
string urate = (uncomp_rate >= 0)
|
350
|
-
|
351
|
-
: string("?");
|
349
|
+
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
+
: std::string("?");
|
352
351
|
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
353
352
|
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
354
353
|
x.c_str(),
|
@@ -359,8 +358,8 @@ static void Measure(const char* data,
|
|
359
358
|
urate.c_str());
|
360
359
|
}
|
361
360
|
|
362
|
-
static int VerifyString(const string& input) {
|
363
|
-
string compressed;
|
361
|
+
static int VerifyString(const std::string& input) {
|
362
|
+
std::string compressed;
|
364
363
|
DataEndingAtUnreadablePage i(input);
|
365
364
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
366
365
|
CHECK_EQ(written, compressed.size());
|
@@ -368,15 +367,15 @@ static int VerifyString(const string& input) {
|
|
368
367
|
snappy::MaxCompressedLength(input.size()));
|
369
368
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
370
369
|
|
371
|
-
string uncompressed;
|
370
|
+
std::string uncompressed;
|
372
371
|
DataEndingAtUnreadablePage c(compressed);
|
373
372
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
374
373
|
CHECK_EQ(uncompressed, input);
|
375
374
|
return uncompressed.size();
|
376
375
|
}
|
377
376
|
|
378
|
-
static void VerifyStringSink(const string& input) {
|
379
|
-
string compressed;
|
377
|
+
static void VerifyStringSink(const std::string& input) {
|
378
|
+
std::string compressed;
|
380
379
|
DataEndingAtUnreadablePage i(input);
|
381
380
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
382
381
|
CHECK_EQ(written, compressed.size());
|
@@ -384,7 +383,7 @@ static void VerifyStringSink(const string& input) {
|
|
384
383
|
snappy::MaxCompressedLength(input.size()));
|
385
384
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
386
385
|
|
387
|
-
string uncompressed;
|
386
|
+
std::string uncompressed;
|
388
387
|
uncompressed.resize(input.size());
|
389
388
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
390
389
|
DataEndingAtUnreadablePage c(compressed);
|
@@ -393,8 +392,8 @@ static void VerifyStringSink(const string& input) {
|
|
393
392
|
CHECK_EQ(uncompressed, input);
|
394
393
|
}
|
395
394
|
|
396
|
-
static void VerifyIOVec(const string& input) {
|
397
|
-
string compressed;
|
395
|
+
static void VerifyIOVec(const std::string& input) {
|
396
|
+
std::string compressed;
|
398
397
|
DataEndingAtUnreadablePage i(input);
|
399
398
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
400
399
|
CHECK_EQ(written, compressed.size());
|
@@ -405,23 +404,28 @@ static void VerifyIOVec(const string& input) {
|
|
405
404
|
// Try uncompressing into an iovec containing a random number of entries
|
406
405
|
// ranging from 1 to 10.
|
407
406
|
char* buf = new char[input.size()];
|
408
|
-
|
409
|
-
size_t
|
407
|
+
std::minstd_rand0 rng(input.size());
|
408
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
+
size_t num = uniform_1_to_10(rng);
|
410
410
|
if (input.size() < num) {
|
411
411
|
num = input.size();
|
412
412
|
}
|
413
413
|
struct iovec* iov = new iovec[num];
|
414
414
|
int used_so_far = 0;
|
415
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
415
416
|
for (size_t i = 0; i < num; ++i) {
|
417
|
+
assert(used_so_far < input.size());
|
416
418
|
iov[i].iov_base = buf + used_so_far;
|
417
419
|
if (i == num - 1) {
|
418
420
|
iov[i].iov_len = input.size() - used_so_far;
|
419
421
|
} else {
|
420
422
|
// Randomly choose to insert a 0 byte entry.
|
421
|
-
if (
|
423
|
+
if (one_in_five(rng)) {
|
422
424
|
iov[i].iov_len = 0;
|
423
425
|
} else {
|
424
|
-
|
426
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
427
|
+
0, input.size() - used_so_far - 1);
|
428
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
425
429
|
}
|
426
430
|
}
|
427
431
|
used_so_far += iov[i].iov_len;
|
@@ -435,22 +439,22 @@ static void VerifyIOVec(const string& input) {
|
|
435
439
|
|
436
440
|
// Test that data compressed by a compressor that does not
|
437
441
|
// obey block sizes is uncompressed properly.
|
438
|
-
static void VerifyNonBlockedCompression(const string& input) {
|
442
|
+
static void VerifyNonBlockedCompression(const std::string& input) {
|
439
443
|
if (input.length() > snappy::kBlockSize) {
|
440
444
|
// We cannot test larger blocks than the maximum block size, obviously.
|
441
445
|
return;
|
442
446
|
}
|
443
447
|
|
444
|
-
string prefix;
|
448
|
+
std::string prefix;
|
445
449
|
Varint::Append32(&prefix, input.size());
|
446
450
|
|
447
451
|
// Setup compression table
|
448
|
-
snappy::internal::WorkingMemory wmem;
|
452
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
449
453
|
int table_size;
|
450
454
|
uint16* table = wmem.GetHashTable(input.size(), &table_size);
|
451
455
|
|
452
456
|
// Compress entire input in one shot
|
453
|
-
string compressed;
|
457
|
+
std::string compressed;
|
454
458
|
compressed += prefix;
|
455
459
|
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
456
460
|
char* dest = string_as_array(&compressed) + prefix.size();
|
@@ -458,13 +462,13 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
458
462
|
dest, table, table_size);
|
459
463
|
compressed.resize(end - compressed.data());
|
460
464
|
|
461
|
-
// Uncompress into string
|
462
|
-
string uncomp_str;
|
465
|
+
// Uncompress into std::string
|
466
|
+
std::string uncomp_str;
|
463
467
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
464
468
|
CHECK_EQ(uncomp_str, input);
|
465
469
|
|
466
470
|
// Uncompress using source/sink
|
467
|
-
string uncomp_str2;
|
471
|
+
std::string uncomp_str2;
|
468
472
|
uncomp_str2.resize(input.size());
|
469
473
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
470
474
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
@@ -476,28 +480,28 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
476
480
|
static const int kNumBlocks = 10;
|
477
481
|
struct iovec vec[kNumBlocks];
|
478
482
|
const int block_size = 1 + input.size() / kNumBlocks;
|
479
|
-
string iovec_data(block_size * kNumBlocks, 'x');
|
483
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
480
484
|
for (int i = 0; i < kNumBlocks; i++) {
|
481
485
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
482
486
|
vec[i].iov_len = block_size;
|
483
487
|
}
|
484
488
|
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
485
489
|
vec, kNumBlocks));
|
486
|
-
CHECK_EQ(string(iovec_data.data(), input.size()), input);
|
490
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
487
491
|
}
|
488
492
|
}
|
489
493
|
|
490
494
|
// Expand the input so that it is at least K times as big as block size
|
491
|
-
static string Expand(const string& input) {
|
495
|
+
static std::string Expand(const std::string& input) {
|
492
496
|
static const int K = 3;
|
493
|
-
string data = input;
|
497
|
+
std::string data = input;
|
494
498
|
while (data.size() < K * snappy::kBlockSize) {
|
495
499
|
data += input;
|
496
500
|
}
|
497
501
|
return data;
|
498
502
|
}
|
499
503
|
|
500
|
-
static int Verify(const string& input) {
|
504
|
+
static int Verify(const std::string& input) {
|
501
505
|
VLOG(1) << "Verifying input of size " << input.size();
|
502
506
|
|
503
507
|
// Compress using string based routines
|
@@ -509,7 +513,7 @@ static int Verify(const string& input) {
|
|
509
513
|
VerifyNonBlockedCompression(input);
|
510
514
|
VerifyIOVec(input);
|
511
515
|
if (!input.empty()) {
|
512
|
-
const string expanded = Expand(input);
|
516
|
+
const std::string expanded = Expand(input);
|
513
517
|
VerifyNonBlockedCompression(expanded);
|
514
518
|
VerifyIOVec(input);
|
515
519
|
}
|
@@ -517,21 +521,20 @@ static int Verify(const string& input) {
|
|
517
521
|
return result;
|
518
522
|
}
|
519
523
|
|
520
|
-
|
521
|
-
static bool IsValidCompressedBuffer(const string& c) {
|
524
|
+
static bool IsValidCompressedBuffer(const std::string& c) {
|
522
525
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
523
526
|
}
|
524
|
-
static bool Uncompress(const string& c, string* u) {
|
527
|
+
static bool Uncompress(const std::string& c, std::string* u) {
|
525
528
|
return snappy::Uncompress(c.data(), c.size(), u);
|
526
529
|
}
|
527
530
|
|
528
531
|
// This test checks to ensure that snappy doesn't coredump if it gets
|
529
532
|
// corrupted data.
|
530
533
|
TEST(CorruptedTest, VerifyCorrupted) {
|
531
|
-
string source = "making sure we don't crash with corrupted input";
|
534
|
+
std::string source = "making sure we don't crash with corrupted input";
|
532
535
|
VLOG(1) << source;
|
533
|
-
string dest;
|
534
|
-
string uncmp;
|
536
|
+
std::string dest;
|
537
|
+
std::string uncmp;
|
535
538
|
snappy::Compress(source.data(), source.size(), &dest);
|
536
539
|
|
537
540
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -578,9 +581,9 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
578
581
|
|
579
582
|
// try reading stuff in from a bad file.
|
580
583
|
for (int i = 1; i <= 3; ++i) {
|
581
|
-
string data =
|
582
|
-
|
583
|
-
string uncmp;
|
584
|
+
std::string data =
|
585
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
586
|
+
std::string uncmp;
|
584
587
|
// check that we don't return a crazy length
|
585
588
|
size_t ulen;
|
586
589
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
@@ -598,7 +601,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
598
601
|
// These mirror the compression code in snappy.cc, but are copied
|
599
602
|
// here so that we can bypass some limitations in the how snappy.cc
|
600
603
|
// invokes these routines.
|
601
|
-
static void AppendLiteral(string* dst, const string& literal) {
|
604
|
+
static void AppendLiteral(std::string* dst, const std::string& literal) {
|
602
605
|
if (literal.empty()) return;
|
603
606
|
int n = literal.size() - 1;
|
604
607
|
if (n < 60) {
|
@@ -613,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) {
|
|
613
616
|
n >>= 8;
|
614
617
|
}
|
615
618
|
dst->push_back(0 | ((59+count) << 2));
|
616
|
-
*dst += string(number, count);
|
619
|
+
*dst += std::string(number, count);
|
617
620
|
}
|
618
621
|
*dst += literal;
|
619
622
|
}
|
620
623
|
|
621
|
-
static void AppendCopy(string* dst, int offset, int length) {
|
624
|
+
static void AppendCopy(std::string* dst, int offset, int length) {
|
622
625
|
while (length > 0) {
|
623
626
|
// Figure out how much to copy in one shot
|
624
627
|
int to_copy;
|
@@ -655,51 +658,67 @@ TEST(Snappy, SimpleTests) {
|
|
655
658
|
Verify("ab");
|
656
659
|
Verify("abc");
|
657
660
|
|
658
|
-
Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
|
659
|
-
Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
|
660
|
-
Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
|
661
|
-
Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
662
|
-
Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
661
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
662
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
663
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
664
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
665
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
663
666
|
}
|
664
667
|
|
665
668
|
// Verify max blowup (lots of four-byte copies)
|
666
669
|
TEST(Snappy, MaxBlowup) {
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
input.
|
672
|
-
|
673
|
-
for (int i =
|
674
|
-
|
675
|
-
|
676
|
-
input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
|
670
|
+
std::mt19937 rng;
|
671
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
672
|
+
std::string input;
|
673
|
+
for (int i = 0; i < 80000; ++i)
|
674
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
675
|
+
|
676
|
+
for (int i = 0; i < 80000; i += 4) {
|
677
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
678
|
+
input.append(four_bytes);
|
677
679
|
}
|
678
680
|
Verify(input);
|
679
681
|
}
|
680
682
|
|
681
683
|
TEST(Snappy, RandomData) {
|
682
|
-
|
683
|
-
|
684
|
-
|
684
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
685
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
688
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
689
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
690
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
|
+
|
692
|
+
constexpr int num_ops = 20000;
|
685
693
|
for (int i = 0; i < num_ops; i++) {
|
686
694
|
if ((i % 1000) == 0) {
|
687
695
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
688
696
|
}
|
689
697
|
|
690
|
-
string x;
|
691
|
-
size_t len =
|
698
|
+
std::string x;
|
699
|
+
size_t len = uniform_4k(rng);
|
692
700
|
if (i < 100) {
|
693
|
-
len = 65536 +
|
701
|
+
len = 65536 + uniform_64k(rng);
|
694
702
|
}
|
695
703
|
while (x.size() < len) {
|
696
704
|
int run_len = 1;
|
697
|
-
if (
|
698
|
-
|
705
|
+
if (one_in_ten(rng)) {
|
706
|
+
int skewed_bits = uniform_0_to_8(rng);
|
707
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
708
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
709
|
+
(1 << skewed_bits) - 1);
|
710
|
+
run_len = skewed_low(rng);
|
711
|
+
}
|
712
|
+
char c = static_cast<char>(uniform_byte(rng));
|
713
|
+
if (i >= 100) {
|
714
|
+
int skewed_bits = uniform_0_to_3(rng);
|
715
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
716
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
717
|
+
(1 << skewed_bits) - 1);
|
718
|
+
c = static_cast<char>(skewed_low(rng));
|
699
719
|
}
|
700
|
-
char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
|
701
720
|
while (run_len-- > 0 && x.size() < len) {
|
702
|
-
x
|
721
|
+
x.push_back(c);
|
703
722
|
}
|
704
723
|
}
|
705
724
|
|
@@ -713,19 +732,19 @@ TEST(Snappy, FourByteOffset) {
|
|
713
732
|
// copy manually.
|
714
733
|
|
715
734
|
// The two fragments that make up the input string.
|
716
|
-
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
717
|
-
string fragment2 = "some other string";
|
735
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
736
|
+
std::string fragment2 = "some other string";
|
718
737
|
|
719
738
|
// How many times each fragment is emitted.
|
720
739
|
const int n1 = 2;
|
721
740
|
const int n2 = 100000 / fragment2.size();
|
722
741
|
const int length = n1 * fragment1.size() + n2 * fragment2.size();
|
723
742
|
|
724
|
-
string compressed;
|
743
|
+
std::string compressed;
|
725
744
|
Varint::Append32(&compressed, length);
|
726
745
|
|
727
746
|
AppendLiteral(&compressed, fragment1);
|
728
|
-
string src = fragment1;
|
747
|
+
std::string src = fragment1;
|
729
748
|
for (int i = 0; i < n2; i++) {
|
730
749
|
AppendLiteral(&compressed, fragment2);
|
731
750
|
src += fragment2;
|
@@ -734,7 +753,7 @@ TEST(Snappy, FourByteOffset) {
|
|
734
753
|
src += fragment1;
|
735
754
|
CHECK_EQ(length, src.size());
|
736
755
|
|
737
|
-
string uncompressed;
|
756
|
+
std::string uncompressed;
|
738
757
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
739
758
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
740
759
|
&uncompressed));
|
@@ -756,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) {
|
|
756
775
|
iov[i].iov_len = kLengths[i];
|
757
776
|
}
|
758
777
|
|
759
|
-
string compressed;
|
778
|
+
std::string compressed;
|
760
779
|
Varint::Append32(&compressed, 22);
|
761
780
|
|
762
781
|
// A literal whose output crosses three blocks.
|
@@ -817,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
|
|
817
836
|
iov[i].iov_len = kLengths[i];
|
818
837
|
}
|
819
838
|
|
820
|
-
string compressed;
|
839
|
+
std::string compressed;
|
821
840
|
Varint::Append32(&compressed, 8);
|
822
841
|
|
823
842
|
AppendLiteral(&compressed, "12345678");
|
@@ -839,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
839
858
|
iov[i].iov_len = kLengths[i];
|
840
859
|
}
|
841
860
|
|
842
|
-
string compressed;
|
861
|
+
std::string compressed;
|
843
862
|
Varint::Append32(&compressed, 8);
|
844
863
|
|
845
864
|
AppendLiteral(&compressed, "123");
|
@@ -853,7 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
853
872
|
}
|
854
873
|
}
|
855
874
|
|
856
|
-
static bool CheckUncompressedLength(const string& compressed,
|
875
|
+
static bool CheckUncompressedLength(const std::string& compressed,
|
857
876
|
size_t* ulength) {
|
858
877
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
859
878
|
compressed.size(),
|
@@ -867,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed,
|
|
867
886
|
}
|
868
887
|
|
869
888
|
TEST(SnappyCorruption, TruncatedVarint) {
|
870
|
-
string compressed, uncompressed;
|
889
|
+
std::string compressed, uncompressed;
|
871
890
|
size_t ulength;
|
872
891
|
compressed.push_back('\xf0');
|
873
892
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
@@ -877,7 +896,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
877
896
|
}
|
878
897
|
|
879
898
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
880
|
-
string compressed, uncompressed;
|
899
|
+
std::string compressed, uncompressed;
|
881
900
|
size_t ulength;
|
882
901
|
compressed.push_back('\x80');
|
883
902
|
compressed.push_back('\x80');
|
@@ -892,7 +911,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
892
911
|
}
|
893
912
|
|
894
913
|
TEST(SnappyCorruption, OverflowingVarint) {
|
895
|
-
string compressed, uncompressed;
|
914
|
+
std::string compressed, uncompressed;
|
896
915
|
size_t ulength;
|
897
916
|
compressed.push_back('\xfb');
|
898
917
|
compressed.push_back('\xff');
|
@@ -909,14 +928,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
|
|
909
928
|
// Check that we do not read past end of input
|
910
929
|
|
911
930
|
// Make a compressed string that ends with a single-byte literal
|
912
|
-
string compressed;
|
931
|
+
std::string compressed;
|
913
932
|
Varint::Append32(&compressed, 1);
|
914
933
|
AppendLiteral(&compressed, "x");
|
915
934
|
|
916
|
-
string uncompressed;
|
935
|
+
std::string uncompressed;
|
917
936
|
DataEndingAtUnreadablePage c(compressed);
|
918
937
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
919
|
-
CHECK_EQ(uncompressed, string("x"));
|
938
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
920
939
|
}
|
921
940
|
|
922
941
|
// Check for an infinite loop caused by a copy with offset==0
|
@@ -1037,17 +1056,20 @@ TEST(Snappy, FindMatchLength) {
|
|
1037
1056
|
}
|
1038
1057
|
|
1039
1058
|
TEST(Snappy, FindMatchLengthRandom) {
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1059
|
+
constexpr int kNumTrials = 10000;
|
1060
|
+
constexpr int kTypicalLength = 10;
|
1061
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
1062
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1043
1065
|
|
1044
1066
|
for (int i = 0; i < kNumTrials; i++) {
|
1045
|
-
string s, t;
|
1046
|
-
char a =
|
1047
|
-
char b =
|
1048
|
-
while (!
|
1049
|
-
s.push_back(
|
1050
|
-
t.push_back(
|
1067
|
+
std::string s, t;
|
1068
|
+
char a = static_cast<char>(uniform_byte(rng));
|
1069
|
+
char b = static_cast<char>(uniform_byte(rng));
|
1070
|
+
while (!one_in_typical_length(rng)) {
|
1071
|
+
s.push_back(one_in_two(rng) ? a : b);
|
1072
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1051
1073
|
}
|
1052
1074
|
DataEndingAtUnreadablePage u(s);
|
1053
1075
|
DataEndingAtUnreadablePage v(t);
|
@@ -1157,33 +1179,33 @@ TEST(Snappy, VerifyCharTable) {
|
|
1157
1179
|
}
|
1158
1180
|
|
1159
1181
|
static void CompressFile(const char* fname) {
|
1160
|
-
string fullinput;
|
1182
|
+
std::string fullinput;
|
1161
1183
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1162
1184
|
|
1163
|
-
string compressed;
|
1185
|
+
std::string compressed;
|
1164
1186
|
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1165
1187
|
|
1166
|
-
CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
|
1188
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1167
1189
|
file::Defaults()));
|
1168
1190
|
}
|
1169
1191
|
|
1170
1192
|
static void UncompressFile(const char* fname) {
|
1171
|
-
string fullinput;
|
1193
|
+
std::string fullinput;
|
1172
1194
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1173
1195
|
|
1174
1196
|
size_t uncompLength;
|
1175
1197
|
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1176
1198
|
|
1177
|
-
string uncompressed;
|
1199
|
+
std::string uncompressed;
|
1178
1200
|
uncompressed.resize(uncompLength);
|
1179
1201
|
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1180
1202
|
|
1181
|
-
CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1203
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1182
1204
|
file::Defaults()));
|
1183
1205
|
}
|
1184
1206
|
|
1185
1207
|
static void MeasureFile(const char* fname) {
|
1186
|
-
string fullinput;
|
1208
|
+
std::string fullinput;
|
1187
1209
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1188
1210
|
printf("%-40s :\n", fname);
|
1189
1211
|
|
@@ -1236,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) {
|
|
1236
1258
|
// Pick file to process based on "arg"
|
1237
1259
|
CHECK_GE(arg, 0);
|
1238
1260
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1239
|
-
string contents =
|
1240
|
-
|
1261
|
+
std::string contents =
|
1262
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1241
1263
|
|
1242
|
-
string zcontents;
|
1264
|
+
std::string zcontents;
|
1243
1265
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1244
1266
|
char* dst = new char[contents.size()];
|
1245
1267
|
|
@@ -1262,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) {
|
|
1262
1284
|
// Pick file to process based on "arg"
|
1263
1285
|
CHECK_GE(arg, 0);
|
1264
1286
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1265
|
-
string contents =
|
1266
|
-
|
1287
|
+
std::string contents =
|
1288
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1267
1289
|
|
1268
|
-
string zcontents;
|
1290
|
+
std::string zcontents;
|
1269
1291
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1270
1292
|
|
1271
1293
|
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
@@ -1285,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1285
1307
|
// Pick file to process based on "arg"
|
1286
1308
|
CHECK_GE(arg, 0);
|
1287
1309
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1288
|
-
string contents =
|
1289
|
-
|
1310
|
+
std::string contents =
|
1311
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1290
1312
|
|
1291
|
-
string zcontents;
|
1313
|
+
std::string zcontents;
|
1292
1314
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1293
1315
|
|
1294
1316
|
// Uncompress into an iovec containing ten entries.
|
@@ -1331,10 +1353,10 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1331
1353
|
// Pick file to process based on "arg"
|
1332
1354
|
CHECK_GE(arg, 0);
|
1333
1355
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1334
|
-
string contents =
|
1335
|
-
|
1356
|
+
std::string contents =
|
1357
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1336
1358
|
|
1337
|
-
string zcontents;
|
1359
|
+
std::string zcontents;
|
1338
1360
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1339
1361
|
char* dst = new char[contents.size()];
|
1340
1362
|
|
@@ -1349,7 +1371,7 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1349
1371
|
}
|
1350
1372
|
StopBenchmarkTiming();
|
1351
1373
|
|
1352
|
-
string s(dst, contents.size());
|
1374
|
+
std::string s(dst, contents.size());
|
1353
1375
|
CHECK_EQ(contents, s);
|
1354
1376
|
|
1355
1377
|
delete[] dst;
|
@@ -1363,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1363
1385
|
// Pick file to process based on "arg"
|
1364
1386
|
CHECK_GE(arg, 0);
|
1365
1387
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1366
|
-
string contents =
|
1367
|
-
|
1388
|
+
std::string contents =
|
1389
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1368
1390
|
|
1369
1391
|
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1370
1392
|
|
@@ -1379,14 +1401,88 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1379
1401
|
StopBenchmarkTiming();
|
1380
1402
|
const double compression_ratio =
|
1381
1403
|
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1382
|
-
SetBenchmarkLabel(
|
1383
|
-
|
1384
|
-
VLOG(0) <<
|
1385
|
-
|
1404
|
+
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
+
100.0 * compression_ratio));
|
1406
|
+
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
+
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
+
static_cast<int>(zsize));
|
1386
1409
|
delete[] dst;
|
1387
1410
|
}
|
1388
1411
|
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1389
1412
|
|
1413
|
+
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
+
StopBenchmarkTiming();
|
1415
|
+
|
1416
|
+
CHECK_EQ(arg, 0);
|
1417
|
+
const int num_files = ARRAYSIZE(files);
|
1418
|
+
|
1419
|
+
std::vector<std::string> contents(num_files);
|
1420
|
+
std::vector<char*> dst(num_files);
|
1421
|
+
|
1422
|
+
int64 total_contents_size = 0;
|
1423
|
+
for (int i = 0; i < num_files; ++i) {
|
1424
|
+
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
+
total_contents_size += contents[i].size();
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
+
StartBenchmarkTiming();
|
1431
|
+
|
1432
|
+
size_t zsize = 0;
|
1433
|
+
while (iters-- > 0) {
|
1434
|
+
for (int i = 0; i < num_files; ++i) {
|
1435
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
+
&zsize);
|
1437
|
+
}
|
1438
|
+
}
|
1439
|
+
StopBenchmarkTiming();
|
1440
|
+
|
1441
|
+
for (int i = 0; i < num_files; ++i) {
|
1442
|
+
delete[] dst[i];
|
1443
|
+
}
|
1444
|
+
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
+
}
|
1446
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
+
|
1448
|
+
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
+
StopBenchmarkTiming();
|
1450
|
+
|
1451
|
+
CHECK_EQ(arg, 0);
|
1452
|
+
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
+
const std::string base_content =
|
1454
|
+
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
+
|
1456
|
+
std::vector<std::string> contents;
|
1457
|
+
std::vector<char*> dst;
|
1458
|
+
int64 total_contents_size = 0;
|
1459
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
+
++table_bits) {
|
1461
|
+
std::string content = base_content;
|
1462
|
+
content.resize(1 << table_bits);
|
1463
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
+
total_contents_size += content.size();
|
1465
|
+
contents.push_back(std::move(content));
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
size_t zsize = 0;
|
1469
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
+
StartBenchmarkTiming();
|
1471
|
+
while (iters-- > 0) {
|
1472
|
+
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
+
&zsize);
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
StopBenchmarkTiming();
|
1478
|
+
|
1479
|
+
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
+
delete[] dst[i];
|
1481
|
+
}
|
1482
|
+
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
+
}
|
1484
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1485
|
+
|
1390
1486
|
} // namespace snappy
|
1391
1487
|
|
1392
1488
|
int main(int argc, char** argv) {
|