snappy 0.0.14-java → 0.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +4 -0
- data/README.md +28 -4
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +21 -24
- data/lib/snappy.rb +6 -4
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/reader.rb +19 -11
- data/lib/snappy/shim.rb +30 -0
- data/lib/snappy/version.rb +3 -1
- data/lib/snappy/writer.rb +8 -9
- data/snappy.gemspec +17 -37
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/COPYING +1 -1
- data/vendor/snappy/NEWS +60 -0
- data/vendor/snappy/{README → README.md} +29 -16
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-c.h +3 -3
- data/vendor/snappy/snappy-internal.h +113 -32
- data/vendor/snappy/snappy-sinksource.cc +33 -0
- data/vendor/snappy/snappy-sinksource.h +51 -6
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +160 -45
- data/vendor/snappy/snappy-stubs-public.h.in +23 -47
- data/vendor/snappy/snappy-test.cc +31 -24
- data/vendor/snappy/snappy-test.h +46 -103
- data/vendor/snappy/snappy.cc +786 -431
- data/vendor/snappy/snappy.h +37 -14
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +441 -290
- metadata +35 -75
- data/.travis.yml +0 -4
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -1916
- data/vendor/snappy/Makefile.am +0 -23
- data/vendor/snappy/autogen.sh +0 -7
- data/vendor/snappy/configure.ac +0 -133
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/testdata/alice29.txt +0 -3609
- data/vendor/snappy/testdata/asyoulik.txt +0 -4122
- data/vendor/snappy/testdata/baddata1.snappy +0 -0
- data/vendor/snappy/testdata/baddata2.snappy +0 -0
- data/vendor/snappy/testdata/baddata3.snappy +0 -0
- data/vendor/snappy/testdata/fireworks.jpeg +0 -0
- data/vendor/snappy/testdata/geo.protodata +0 -0
- data/vendor/snappy/testdata/html +0 -1
- data/vendor/snappy/testdata/html_x_4 +0 -1
- data/vendor/snappy/testdata/kppkn.gtb +0 -0
- data/vendor/snappy/testdata/lcet10.txt +0 -7519
- data/vendor/snappy/testdata/paper-100k.pdf +2 -600
- data/vendor/snappy/testdata/plrabn12.txt +0 -10699
- data/vendor/snappy/testdata/urls.10K +0 -10000
data/vendor/snappy/snappy.h
CHANGED
@@ -36,10 +36,10 @@
|
|
36
36
|
// using BMDiff and then compressing the output of BMDiff with
|
37
37
|
// Snappy.
|
38
38
|
|
39
|
-
#ifndef
|
40
|
-
#define
|
39
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
|
40
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_H__
|
41
41
|
|
42
|
-
#include <
|
42
|
+
#include <cstddef>
|
43
43
|
#include <string>
|
44
44
|
|
45
45
|
#include "snappy-stubs-public.h"
|
@@ -69,11 +69,12 @@ namespace snappy {
|
|
69
69
|
// Higher-level string based routines (should be sufficient for most users)
|
70
70
|
// ------------------------------------------------------------------------
|
71
71
|
|
72
|
-
// Sets "*
|
73
|
-
// Original contents of *
|
72
|
+
// Sets "*compressed" to the compressed version of "input[0,input_length-1]".
|
73
|
+
// Original contents of *compressed are lost.
|
74
74
|
//
|
75
|
-
// REQUIRES: "input[]" is not an alias of "*
|
76
|
-
size_t Compress(const char* input, size_t input_length,
|
75
|
+
// REQUIRES: "input[]" is not an alias of "*compressed".
|
76
|
+
size_t Compress(const char* input, size_t input_length,
|
77
|
+
std::string* compressed);
|
77
78
|
|
78
79
|
// Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
|
79
80
|
// Original contents of "*uncompressed" are lost.
|
@@ -82,8 +83,20 @@ namespace snappy {
|
|
82
83
|
//
|
83
84
|
// returns false if the message is corrupted and could not be decompressed
|
84
85
|
bool Uncompress(const char* compressed, size_t compressed_length,
|
85
|
-
string* uncompressed);
|
86
|
+
std::string* uncompressed);
|
86
87
|
|
88
|
+
// Decompresses "compressed" to "*uncompressed".
|
89
|
+
//
|
90
|
+
// returns false if the message is corrupted and could not be decompressed
|
91
|
+
bool Uncompress(Source* compressed, Sink* uncompressed);
|
92
|
+
|
93
|
+
// This routine uncompresses as much of the "compressed" as possible
|
94
|
+
// into sink. It returns the number of valid bytes added to sink
|
95
|
+
// (extra invalid bytes may have been added due to errors; the caller
|
96
|
+
// should ignore those). The emitted data typically has length
|
97
|
+
// GetUncompressedLength(), but may be shorter if an error is
|
98
|
+
// encountered.
|
99
|
+
size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
|
87
100
|
|
88
101
|
// ------------------------------------------------------------------------
|
89
102
|
// Lower-level character array based routines. May be useful for
|
@@ -164,6 +177,14 @@ namespace snappy {
|
|
164
177
|
bool IsValidCompressedBuffer(const char* compressed,
|
165
178
|
size_t compressed_length);
|
166
179
|
|
180
|
+
// Returns true iff the contents of "compressed" can be uncompressed
|
181
|
+
// successfully. Does not return the uncompressed data. Takes
|
182
|
+
// time proportional to *compressed length, but is usually at least
|
183
|
+
// a factor of four faster than actual decompression.
|
184
|
+
// On success, consumes all of *compressed. On failure, consumes an
|
185
|
+
// unspecified prefix of *compressed.
|
186
|
+
bool IsValidCompressed(Source* compressed);
|
187
|
+
|
167
188
|
// The size of a compression block. Note that many parts of the compression
|
168
189
|
// code assumes that kBlockSize <= 65536; in particular, the hash table
|
169
190
|
// can only store 16-bit offsets, and EmitCopy() also assumes the offset
|
@@ -173,12 +194,14 @@ namespace snappy {
|
|
173
194
|
// Note that there might be older data around that is compressed with larger
|
174
195
|
// block sizes, so the decompression code should not rely on the
|
175
196
|
// non-existence of long backreferences.
|
176
|
-
static
|
177
|
-
static
|
197
|
+
static constexpr int kBlockLog = 16;
|
198
|
+
static constexpr size_t kBlockSize = 1 << kBlockLog;
|
178
199
|
|
179
|
-
static
|
180
|
-
static
|
181
|
-
} // end namespace snappy
|
200
|
+
static constexpr int kMinHashTableBits = 8;
|
201
|
+
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
|
182
202
|
|
203
|
+
static constexpr int kMaxHashTableBits = 14;
|
204
|
+
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
|
205
|
+
} // end namespace snappy
|
183
206
|
|
184
|
-
#endif //
|
207
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
|
@@ -0,0 +1,59 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy compression code.
|
30
|
+
|
31
|
+
#include <cassert>
|
32
|
+
#include <cstddef>
|
33
|
+
#include <cstdint>
|
34
|
+
#include <string>
|
35
|
+
|
36
|
+
#include "snappy.h"
|
37
|
+
|
38
|
+
// Entry point for LibFuzzer.
|
39
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
40
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
41
|
+
|
42
|
+
std::string compressed;
|
43
|
+
size_t compressed_size =
|
44
|
+
snappy::Compress(input.data(), input.size(), &compressed);
|
45
|
+
|
46
|
+
(void)compressed_size; // Variable only used in debug builds.
|
47
|
+
assert(compressed_size == compressed.size());
|
48
|
+
assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
|
49
|
+
assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
50
|
+
|
51
|
+
std::string uncompressed_after_compress;
|
52
|
+
bool uncompress_succeeded = snappy::Uncompress(
|
53
|
+
compressed.data(), compressed.size(), &uncompressed_after_compress);
|
54
|
+
|
55
|
+
(void)uncompress_succeeded; // Variable only used in debug builds.
|
56
|
+
assert(uncompress_succeeded);
|
57
|
+
assert(input == uncompressed_after_compress);
|
58
|
+
return 0;
|
59
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy's decompression code.
|
30
|
+
|
31
|
+
#include <cassert>
|
32
|
+
#include <cstddef>
|
33
|
+
#include <cstdint>
|
34
|
+
#include <string>
|
35
|
+
|
36
|
+
#include "snappy.h"
|
37
|
+
|
38
|
+
// Entry point for LibFuzzer.
|
39
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
40
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
41
|
+
|
42
|
+
// Avoid self-crafted decompression bombs.
|
43
|
+
size_t uncompressed_size;
|
44
|
+
constexpr size_t kMaxUncompressedSize = 1 << 20;
|
45
|
+
bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
|
46
|
+
input.data(), input.size(), &uncompressed_size);
|
47
|
+
if (!get_uncompressed_length_succeeded ||
|
48
|
+
(uncompressed_size > kMaxUncompressedSize)) {
|
49
|
+
return 0;
|
50
|
+
}
|
51
|
+
|
52
|
+
std::string uncompressed;
|
53
|
+
// The return value of snappy::Uncompress() is ignored because decompression
|
54
|
+
// will fail on invalid inputs.
|
55
|
+
snappy::Uncompress(input.data(), input.size(), &uncompressed);
|
56
|
+
return 0;
|
57
|
+
}
|
@@ -29,9 +29,10 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdlib.h>
|
31
31
|
|
32
|
-
|
33
32
|
#include <algorithm>
|
33
|
+
#include <random>
|
34
34
|
#include <string>
|
35
|
+
#include <utility>
|
35
36
|
#include <vector>
|
36
37
|
|
37
38
|
#include "snappy.h"
|
@@ -50,25 +51,19 @@ DEFINE_bool(zlib, false,
|
|
50
51
|
"Run zlib compression (http://www.zlib.net)");
|
51
52
|
DEFINE_bool(lzo, false,
|
52
53
|
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
53
|
-
DEFINE_bool(quicklz, false,
|
54
|
-
"Run quickLZ compression (http://www.quicklz.com/)");
|
55
|
-
DEFINE_bool(liblzf, false,
|
56
|
-
"Run libLZF compression "
|
57
|
-
"(http://www.goof.com/pcg/marc/liblzf.html)");
|
58
|
-
DEFINE_bool(fastlz, false,
|
59
|
-
"Run FastLZ compression (http://www.fastlz.org/");
|
60
54
|
DEFINE_bool(snappy, true, "Run snappy compression");
|
61
55
|
|
62
|
-
|
63
56
|
DEFINE_bool(write_compressed, false,
|
64
57
|
"Write compressed versions of each file to <file>.comp");
|
65
58
|
DEFINE_bool(write_uncompressed, false,
|
66
59
|
"Write uncompressed versions of each file to <file>.uncomp");
|
67
60
|
|
68
|
-
|
61
|
+
DEFINE_bool(snappy_dump_decompression_table, false,
|
62
|
+
"If true, we print the decompression table during tests.");
|
69
63
|
|
64
|
+
namespace snappy {
|
70
65
|
|
71
|
-
#
|
66
|
+
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
72
67
|
|
73
68
|
// To test against code that reads beyond its input, this class copies a
|
74
69
|
// string to a newly allocated group of pages, the last of which
|
@@ -78,8 +73,8 @@ namespace snappy {
|
|
78
73
|
// be able to read previously allocated memory while doing heap allocations.
|
79
74
|
class DataEndingAtUnreadablePage {
|
80
75
|
public:
|
81
|
-
explicit DataEndingAtUnreadablePage(const string& s) {
|
82
|
-
const size_t page_size =
|
76
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
77
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
83
78
|
const size_t size = s.size();
|
84
79
|
// Round up space for string to a multiple of page_size.
|
85
80
|
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
|
@@ -97,8 +92,9 @@ class DataEndingAtUnreadablePage {
|
|
97
92
|
}
|
98
93
|
|
99
94
|
~DataEndingAtUnreadablePage() {
|
95
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
100
96
|
// Undo the mprotect.
|
101
|
-
CHECK_EQ(0, mprotect(protected_page_,
|
97
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
|
102
98
|
CHECK_EQ(0, munmap(mem_, alloc_size_));
|
103
99
|
}
|
104
100
|
|
@@ -113,19 +109,19 @@ class DataEndingAtUnreadablePage {
|
|
113
109
|
size_t size_;
|
114
110
|
};
|
115
111
|
|
116
|
-
#else // HAVE_FUNC_MMAP
|
112
|
+
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
117
113
|
|
118
114
|
// Fallback for systems without mmap.
|
119
|
-
|
115
|
+
using DataEndingAtUnreadablePage = std::string;
|
120
116
|
|
121
117
|
#endif
|
122
118
|
|
123
119
|
enum CompressorType {
|
124
|
-
ZLIB, LZO,
|
120
|
+
ZLIB, LZO, SNAPPY
|
125
121
|
};
|
126
122
|
|
127
123
|
const char* names[] = {
|
128
|
-
"ZLIB", "LZO", "
|
124
|
+
"ZLIB", "LZO", "SNAPPY"
|
129
125
|
};
|
130
126
|
|
131
127
|
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
@@ -141,26 +137,12 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
141
137
|
return input_size + input_size/64 + 16 + 3;
|
142
138
|
#endif // LZO_VERSION
|
143
139
|
|
144
|
-
#ifdef LZF_VERSION
|
145
|
-
case LIBLZF:
|
146
|
-
return input_size;
|
147
|
-
#endif // LZF_VERSION
|
148
|
-
|
149
|
-
#ifdef QLZ_VERSION_MAJOR
|
150
|
-
case QUICKLZ:
|
151
|
-
return input_size + 36000; // 36000 is used for scratch.
|
152
|
-
#endif // QLZ_VERSION_MAJOR
|
153
|
-
|
154
|
-
#ifdef FASTLZ_VERSION
|
155
|
-
case FASTLZ:
|
156
|
-
return max(static_cast<int>(ceil(input_size * 1.05)), 66);
|
157
|
-
#endif // FASTLZ_VERSION
|
158
|
-
|
159
140
|
case SNAPPY:
|
160
141
|
return snappy::MaxCompressedLength(input_size);
|
161
142
|
|
162
143
|
default:
|
163
144
|
LOG(FATAL) << "Unknown compression type number " << comp;
|
145
|
+
return 0;
|
164
146
|
}
|
165
147
|
}
|
166
148
|
|
@@ -172,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
172
154
|
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
173
155
|
// number of bytes, and may contain junk bytes at the end after return.
|
174
156
|
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
175
|
-
string* compressed, bool compressed_is_preallocated) {
|
157
|
+
std::string* compressed, bool compressed_is_preallocated) {
|
176
158
|
if (!compressed_is_preallocated) {
|
177
159
|
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
178
160
|
}
|
@@ -214,58 +196,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
214
196
|
}
|
215
197
|
#endif // LZO_VERSION
|
216
198
|
|
217
|
-
#ifdef LZF_VERSION
|
218
|
-
case LIBLZF: {
|
219
|
-
int destlen = lzf_compress(input,
|
220
|
-
input_size,
|
221
|
-
string_as_array(compressed),
|
222
|
-
input_size);
|
223
|
-
if (destlen == 0) {
|
224
|
-
// lzf *can* cause lots of blowup when compressing, so they
|
225
|
-
// recommend to limit outsize to insize, and just not compress
|
226
|
-
// if it's bigger. Ideally, we'd just swap input and output.
|
227
|
-
compressed->assign(input, input_size);
|
228
|
-
destlen = input_size;
|
229
|
-
}
|
230
|
-
if (!compressed_is_preallocated) {
|
231
|
-
compressed->resize(destlen);
|
232
|
-
}
|
233
|
-
break;
|
234
|
-
}
|
235
|
-
#endif // LZF_VERSION
|
236
|
-
|
237
|
-
#ifdef QLZ_VERSION_MAJOR
|
238
|
-
case QUICKLZ: {
|
239
|
-
qlz_state_compress *state_compress = new qlz_state_compress;
|
240
|
-
int destlen = qlz_compress(input,
|
241
|
-
string_as_array(compressed),
|
242
|
-
input_size,
|
243
|
-
state_compress);
|
244
|
-
delete state_compress;
|
245
|
-
CHECK_NE(0, destlen);
|
246
|
-
if (!compressed_is_preallocated) {
|
247
|
-
compressed->resize(destlen);
|
248
|
-
}
|
249
|
-
break;
|
250
|
-
}
|
251
|
-
#endif // QLZ_VERSION_MAJOR
|
252
|
-
|
253
|
-
#ifdef FASTLZ_VERSION
|
254
|
-
case FASTLZ: {
|
255
|
-
// Use level 1 compression since we mostly care about speed.
|
256
|
-
int destlen = fastlz_compress_level(
|
257
|
-
1,
|
258
|
-
input,
|
259
|
-
input_size,
|
260
|
-
string_as_array(compressed));
|
261
|
-
if (!compressed_is_preallocated) {
|
262
|
-
compressed->resize(destlen);
|
263
|
-
}
|
264
|
-
CHECK_NE(destlen, 0);
|
265
|
-
break;
|
266
|
-
}
|
267
|
-
#endif // FASTLZ_VERSION
|
268
|
-
|
269
199
|
case SNAPPY: {
|
270
200
|
size_t destlen;
|
271
201
|
snappy::RawCompress(input, input_size,
|
@@ -278,7 +208,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
278
208
|
break;
|
279
209
|
}
|
280
210
|
|
281
|
-
|
282
211
|
default: {
|
283
212
|
return false; // the asked-for library wasn't compiled in
|
284
213
|
}
|
@@ -286,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
286
215
|
return true;
|
287
216
|
}
|
288
217
|
|
289
|
-
static bool Uncompress(const string& compressed, CompressorType comp,
|
290
|
-
int size, string* output) {
|
218
|
+
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
+
int size, std::string* output) {
|
291
220
|
switch (comp) {
|
292
221
|
#ifdef ZLIB_VERSION
|
293
222
|
case ZLIB: {
|
@@ -321,56 +250,12 @@ static bool Uncompress(const string& compressed, CompressorType comp,
|
|
321
250
|
}
|
322
251
|
#endif // LZO_VERSION
|
323
252
|
|
324
|
-
#ifdef LZF_VERSION
|
325
|
-
case LIBLZF: {
|
326
|
-
output->resize(size);
|
327
|
-
int destlen = lzf_decompress(compressed.data(),
|
328
|
-
compressed.size(),
|
329
|
-
string_as_array(output),
|
330
|
-
output->size());
|
331
|
-
if (destlen == 0) {
|
332
|
-
// This error probably means we had decided not to compress,
|
333
|
-
// and thus have stored input in output directly.
|
334
|
-
output->assign(compressed.data(), compressed.size());
|
335
|
-
destlen = compressed.size();
|
336
|
-
}
|
337
|
-
CHECK_EQ(destlen, size);
|
338
|
-
break;
|
339
|
-
}
|
340
|
-
#endif // LZF_VERSION
|
341
|
-
|
342
|
-
#ifdef QLZ_VERSION_MAJOR
|
343
|
-
case QUICKLZ: {
|
344
|
-
output->resize(size);
|
345
|
-
qlz_state_decompress *state_decompress = new qlz_state_decompress;
|
346
|
-
int destlen = qlz_decompress(compressed.data(),
|
347
|
-
string_as_array(output),
|
348
|
-
state_decompress);
|
349
|
-
delete state_decompress;
|
350
|
-
CHECK_EQ(destlen, size);
|
351
|
-
break;
|
352
|
-
}
|
353
|
-
#endif // QLZ_VERSION_MAJOR
|
354
|
-
|
355
|
-
#ifdef FASTLZ_VERSION
|
356
|
-
case FASTLZ: {
|
357
|
-
output->resize(size);
|
358
|
-
int destlen = fastlz_decompress(compressed.data(),
|
359
|
-
compressed.length(),
|
360
|
-
string_as_array(output),
|
361
|
-
size);
|
362
|
-
CHECK_EQ(destlen, size);
|
363
|
-
break;
|
364
|
-
}
|
365
|
-
#endif // FASTLZ_VERSION
|
366
|
-
|
367
253
|
case SNAPPY: {
|
368
254
|
snappy::RawUncompress(compressed.data(), compressed.size(),
|
369
255
|
string_as_array(output));
|
370
256
|
break;
|
371
257
|
}
|
372
258
|
|
373
|
-
|
374
259
|
default: {
|
375
260
|
return false; // the asked-for library wasn't compiled in
|
376
261
|
}
|
@@ -392,13 +277,13 @@ static void Measure(const char* data,
|
|
392
277
|
{
|
393
278
|
// Chop the input into blocks
|
394
279
|
int num_blocks = (length + block_size - 1) / block_size;
|
395
|
-
vector<const char*> input(num_blocks);
|
396
|
-
vector<size_t> input_length(num_blocks);
|
397
|
-
vector<string> compressed(num_blocks);
|
398
|
-
vector<string> output(num_blocks);
|
280
|
+
std::vector<const char*> input(num_blocks);
|
281
|
+
std::vector<size_t> input_length(num_blocks);
|
282
|
+
std::vector<std::string> compressed(num_blocks);
|
283
|
+
std::vector<std::string> output(num_blocks);
|
399
284
|
for (int b = 0; b < num_blocks; b++) {
|
400
285
|
int input_start = b * block_size;
|
401
|
-
int input_limit = min<int>((b+1)*block_size, length);
|
286
|
+
int input_limit = std::min<int>((b+1)*block_size, length);
|
402
287
|
input[b] = data+input_start;
|
403
288
|
input_length[b] = input_limit-input_start;
|
404
289
|
|
@@ -448,35 +333,33 @@ static void Measure(const char* data,
|
|
448
333
|
}
|
449
334
|
|
450
335
|
compressed_size = 0;
|
451
|
-
for (
|
336
|
+
for (size_t i = 0; i < compressed.size(); i++) {
|
452
337
|
compressed_size += compressed[i].size();
|
453
338
|
}
|
454
339
|
}
|
455
340
|
|
456
|
-
sort(ctime, ctime + kRuns);
|
457
|
-
sort(utime, utime + kRuns);
|
341
|
+
std::sort(ctime, ctime + kRuns);
|
342
|
+
std::sort(utime, utime + kRuns);
|
458
343
|
const int med = kRuns/2;
|
459
344
|
|
460
345
|
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
461
346
|
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
462
|
-
string x = names[comp];
|
347
|
+
std::string x = names[comp];
|
463
348
|
x += ":";
|
464
|
-
string urate = (uncomp_rate >= 0)
|
465
|
-
|
466
|
-
: string("?");
|
349
|
+
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
+
: std::string("?");
|
467
351
|
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
468
352
|
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
469
353
|
x.c_str(),
|
470
354
|
block_size/(1<<20),
|
471
355
|
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
472
|
-
(compressed_size * 100.0) / max<int>(1, length),
|
356
|
+
(compressed_size * 100.0) / std::max<int>(1, length),
|
473
357
|
comp_rate,
|
474
358
|
urate.c_str());
|
475
359
|
}
|
476
360
|
|
477
|
-
|
478
|
-
|
479
|
-
string compressed;
|
361
|
+
static int VerifyString(const std::string& input) {
|
362
|
+
std::string compressed;
|
480
363
|
DataEndingAtUnreadablePage i(input);
|
481
364
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
482
365
|
CHECK_EQ(written, compressed.size());
|
@@ -484,16 +367,33 @@ static int VerifyString(const string& input) {
|
|
484
367
|
snappy::MaxCompressedLength(input.size()));
|
485
368
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
486
369
|
|
487
|
-
string uncompressed;
|
370
|
+
std::string uncompressed;
|
488
371
|
DataEndingAtUnreadablePage c(compressed);
|
489
372
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
490
373
|
CHECK_EQ(uncompressed, input);
|
491
374
|
return uncompressed.size();
|
492
375
|
}
|
493
376
|
|
377
|
+
static void VerifyStringSink(const std::string& input) {
|
378
|
+
std::string compressed;
|
379
|
+
DataEndingAtUnreadablePage i(input);
|
380
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
381
|
+
CHECK_EQ(written, compressed.size());
|
382
|
+
CHECK_LE(compressed.size(),
|
383
|
+
snappy::MaxCompressedLength(input.size()));
|
384
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
494
385
|
|
495
|
-
|
496
|
-
|
386
|
+
std::string uncompressed;
|
387
|
+
uncompressed.resize(input.size());
|
388
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
389
|
+
DataEndingAtUnreadablePage c(compressed);
|
390
|
+
snappy::ByteArraySource source(c.data(), c.size());
|
391
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
392
|
+
CHECK_EQ(uncompressed, input);
|
393
|
+
}
|
394
|
+
|
395
|
+
static void VerifyIOVec(const std::string& input) {
|
396
|
+
std::string compressed;
|
497
397
|
DataEndingAtUnreadablePage i(input);
|
498
398
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
499
399
|
CHECK_EQ(written, compressed.size());
|
@@ -504,23 +404,28 @@ static void VerifyIOVec(const string& input) {
|
|
504
404
|
// Try uncompressing into an iovec containing a random number of entries
|
505
405
|
// ranging from 1 to 10.
|
506
406
|
char* buf = new char[input.size()];
|
507
|
-
|
508
|
-
|
407
|
+
std::minstd_rand0 rng(input.size());
|
408
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
+
size_t num = uniform_1_to_10(rng);
|
509
410
|
if (input.size() < num) {
|
510
411
|
num = input.size();
|
511
412
|
}
|
512
413
|
struct iovec* iov = new iovec[num];
|
513
414
|
int used_so_far = 0;
|
514
|
-
|
415
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
416
|
+
for (size_t i = 0; i < num; ++i) {
|
417
|
+
assert(used_so_far < input.size());
|
515
418
|
iov[i].iov_base = buf + used_so_far;
|
516
419
|
if (i == num - 1) {
|
517
420
|
iov[i].iov_len = input.size() - used_so_far;
|
518
421
|
} else {
|
519
422
|
// Randomly choose to insert a 0 byte entry.
|
520
|
-
if (
|
423
|
+
if (one_in_five(rng)) {
|
521
424
|
iov[i].iov_len = 0;
|
522
425
|
} else {
|
523
|
-
|
426
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
427
|
+
0, input.size() - used_so_far - 1);
|
428
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
524
429
|
}
|
525
430
|
}
|
526
431
|
used_so_far += iov[i].iov_len;
|
@@ -534,22 +439,22 @@ static void VerifyIOVec(const string& input) {
|
|
534
439
|
|
535
440
|
// Test that data compressed by a compressor that does not
|
536
441
|
// obey block sizes is uncompressed properly.
|
537
|
-
static void VerifyNonBlockedCompression(const string& input) {
|
442
|
+
static void VerifyNonBlockedCompression(const std::string& input) {
|
538
443
|
if (input.length() > snappy::kBlockSize) {
|
539
444
|
// We cannot test larger blocks than the maximum block size, obviously.
|
540
445
|
return;
|
541
446
|
}
|
542
447
|
|
543
|
-
string prefix;
|
448
|
+
std::string prefix;
|
544
449
|
Varint::Append32(&prefix, input.size());
|
545
450
|
|
546
451
|
// Setup compression table
|
547
|
-
snappy::internal::WorkingMemory wmem;
|
452
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
548
453
|
int table_size;
|
549
454
|
uint16* table = wmem.GetHashTable(input.size(), &table_size);
|
550
455
|
|
551
456
|
// Compress entire input in one shot
|
552
|
-
string compressed;
|
457
|
+
std::string compressed;
|
553
458
|
compressed += prefix;
|
554
459
|
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
555
460
|
char* dest = string_as_array(&compressed) + prefix.size();
|
@@ -557,57 +462,79 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
557
462
|
dest, table, table_size);
|
558
463
|
compressed.resize(end - compressed.data());
|
559
464
|
|
560
|
-
// Uncompress into string
|
561
|
-
string uncomp_str;
|
465
|
+
// Uncompress into std::string
|
466
|
+
std::string uncomp_str;
|
562
467
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
563
468
|
CHECK_EQ(uncomp_str, input);
|
564
469
|
|
470
|
+
// Uncompress using source/sink
|
471
|
+
std::string uncomp_str2;
|
472
|
+
uncomp_str2.resize(input.size());
|
473
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
474
|
+
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
475
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
476
|
+
CHECK_EQ(uncomp_str2, input);
|
477
|
+
|
478
|
+
// Uncompress into iovec
|
479
|
+
{
|
480
|
+
static const int kNumBlocks = 10;
|
481
|
+
struct iovec vec[kNumBlocks];
|
482
|
+
const int block_size = 1 + input.size() / kNumBlocks;
|
483
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
484
|
+
for (int i = 0; i < kNumBlocks; i++) {
|
485
|
+
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
486
|
+
vec[i].iov_len = block_size;
|
487
|
+
}
|
488
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
489
|
+
vec, kNumBlocks));
|
490
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
491
|
+
}
|
565
492
|
}
|
566
493
|
|
567
494
|
// Expand the input so that it is at least K times as big as block size
|
568
|
-
static string Expand(const string& input) {
|
495
|
+
static std::string Expand(const std::string& input) {
|
569
496
|
static const int K = 3;
|
570
|
-
string data = input;
|
497
|
+
std::string data = input;
|
571
498
|
while (data.size() < K * snappy::kBlockSize) {
|
572
499
|
data += input;
|
573
500
|
}
|
574
501
|
return data;
|
575
502
|
}
|
576
503
|
|
577
|
-
static int Verify(const string& input) {
|
504
|
+
static int Verify(const std::string& input) {
|
578
505
|
VLOG(1) << "Verifying input of size " << input.size();
|
579
506
|
|
580
507
|
// Compress using string based routines
|
581
508
|
const int result = VerifyString(input);
|
582
509
|
|
510
|
+
// Verify using sink based routines
|
511
|
+
VerifyStringSink(input);
|
583
512
|
|
584
513
|
VerifyNonBlockedCompression(input);
|
585
514
|
VerifyIOVec(input);
|
586
515
|
if (!input.empty()) {
|
587
|
-
const string expanded = Expand(input);
|
516
|
+
const std::string expanded = Expand(input);
|
588
517
|
VerifyNonBlockedCompression(expanded);
|
589
518
|
VerifyIOVec(input);
|
590
519
|
}
|
591
520
|
|
592
|
-
|
593
521
|
return result;
|
594
522
|
}
|
595
523
|
|
596
|
-
|
597
|
-
// corrupted data.
|
598
|
-
|
599
|
-
static bool IsValidCompressedBuffer(const string& c) {
|
524
|
+
static bool IsValidCompressedBuffer(const std::string& c) {
|
600
525
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
601
526
|
}
|
602
|
-
static bool Uncompress(const string& c, string* u) {
|
527
|
+
static bool Uncompress(const std::string& c, std::string* u) {
|
603
528
|
return snappy::Uncompress(c.data(), c.size(), u);
|
604
529
|
}
|
605
530
|
|
606
|
-
|
607
|
-
|
531
|
+
// This test checks to ensure that snappy doesn't coredump if it gets
|
532
|
+
// corrupted data.
|
533
|
+
TEST(CorruptedTest, VerifyCorrupted) {
|
534
|
+
std::string source = "making sure we don't crash with corrupted input";
|
608
535
|
VLOG(1) << source;
|
609
|
-
string dest;
|
610
|
-
|
536
|
+
std::string dest;
|
537
|
+
std::string uncmp;
|
611
538
|
snappy::Compress(source.data(), source.size(), &dest);
|
612
539
|
|
613
540
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -616,19 +543,19 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
616
543
|
dest[1]--;
|
617
544
|
dest[3]++;
|
618
545
|
// this really ought to fail.
|
619
|
-
CHECK(!IsValidCompressedBuffer(
|
620
|
-
CHECK(!Uncompress(
|
546
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
547
|
+
CHECK(!Uncompress(dest, &uncmp));
|
621
548
|
|
622
549
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
623
550
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
624
551
|
source.resize(100000);
|
625
|
-
for (
|
552
|
+
for (size_t i = 0; i < source.length(); ++i) {
|
626
553
|
source[i] = 'A';
|
627
554
|
}
|
628
555
|
snappy::Compress(source.data(), source.size(), &dest);
|
629
556
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
630
|
-
CHECK(!IsValidCompressedBuffer(
|
631
|
-
CHECK(!Uncompress(
|
557
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
558
|
+
CHECK(!Uncompress(dest, &uncmp));
|
632
559
|
|
633
560
|
if (sizeof(void *) == 4) {
|
634
561
|
// Another security check; check a crazy big length can't DoS us with an
|
@@ -637,26 +564,26 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
637
564
|
// where 3 GB might be an acceptable allocation size, Uncompress()
|
638
565
|
// attempts to decompress, and sometimes causes the test to run out of
|
639
566
|
// memory.
|
640
|
-
dest[0] = dest[1] = dest[2] = dest[3] =
|
567
|
+
dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
|
641
568
|
// This decodes to a really large size, i.e., about 3 GB.
|
642
569
|
dest[4] = 'k';
|
643
|
-
CHECK(!IsValidCompressedBuffer(
|
644
|
-
CHECK(!Uncompress(
|
570
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
571
|
+
CHECK(!Uncompress(dest, &uncmp));
|
645
572
|
} else {
|
646
573
|
LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
|
647
574
|
}
|
648
575
|
|
649
576
|
// This decodes to about 2 MB; much smaller, but should still fail.
|
650
|
-
dest[0] = dest[1] = dest[2] =
|
577
|
+
dest[0] = dest[1] = dest[2] = '\xff';
|
651
578
|
dest[3] = 0x00;
|
652
|
-
CHECK(!IsValidCompressedBuffer(
|
653
|
-
CHECK(!Uncompress(
|
579
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
580
|
+
CHECK(!Uncompress(dest, &uncmp));
|
654
581
|
|
655
582
|
// try reading stuff in from a bad file.
|
656
583
|
for (int i = 1; i <= 3; ++i) {
|
657
|
-
string data =
|
658
|
-
|
659
|
-
string uncmp;
|
584
|
+
std::string data =
|
585
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
586
|
+
std::string uncmp;
|
660
587
|
// check that we don't return a crazy length
|
661
588
|
size_t ulen;
|
662
589
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
@@ -665,8 +592,8 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
665
592
|
snappy::ByteArraySource source(data.data(), data.size());
|
666
593
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
667
594
|
(ulen2 < (1<<20)));
|
668
|
-
CHECK(!IsValidCompressedBuffer(
|
669
|
-
CHECK(!Uncompress(
|
595
|
+
CHECK(!IsValidCompressedBuffer(data));
|
596
|
+
CHECK(!Uncompress(data, &uncmp));
|
670
597
|
}
|
671
598
|
}
|
672
599
|
|
@@ -674,7 +601,7 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
674
601
|
// These mirror the compression code in snappy.cc, but are copied
|
675
602
|
// here so that we can bypass some limitations in the how snappy.cc
|
676
603
|
// invokes these routines.
|
677
|
-
static void AppendLiteral(string* dst, const string& literal) {
|
604
|
+
static void AppendLiteral(std::string* dst, const std::string& literal) {
|
678
605
|
if (literal.empty()) return;
|
679
606
|
int n = literal.size() - 1;
|
680
607
|
if (n < 60) {
|
@@ -689,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) {
|
|
689
616
|
n >>= 8;
|
690
617
|
}
|
691
618
|
dst->push_back(0 | ((59+count) << 2));
|
692
|
-
*dst += string(number, count);
|
619
|
+
*dst += std::string(number, count);
|
693
620
|
}
|
694
621
|
*dst += literal;
|
695
622
|
}
|
696
623
|
|
697
|
-
static void AppendCopy(string* dst, int offset, int length) {
|
624
|
+
static void AppendCopy(std::string* dst, int offset, int length) {
|
698
625
|
while (length > 0) {
|
699
626
|
// Figure out how much to copy in one shot
|
700
627
|
int to_copy;
|
@@ -731,51 +658,67 @@ TEST(Snappy, SimpleTests) {
|
|
731
658
|
Verify("ab");
|
732
659
|
Verify("abc");
|
733
660
|
|
734
|
-
Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
|
735
|
-
Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
|
736
|
-
Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
|
737
|
-
Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
738
|
-
Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
661
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
662
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
663
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
664
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
665
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
739
666
|
}
|
740
667
|
|
741
668
|
// Verify max blowup (lots of four-byte copies)
|
742
669
|
TEST(Snappy, MaxBlowup) {
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
input.
|
748
|
-
|
749
|
-
for (int i =
|
750
|
-
|
751
|
-
|
752
|
-
input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
|
670
|
+
std::mt19937 rng;
|
671
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
672
|
+
std::string input;
|
673
|
+
for (int i = 0; i < 80000; ++i)
|
674
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
675
|
+
|
676
|
+
for (int i = 0; i < 80000; i += 4) {
|
677
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
678
|
+
input.append(four_bytes);
|
753
679
|
}
|
754
680
|
Verify(input);
|
755
681
|
}
|
756
682
|
|
757
683
|
TEST(Snappy, RandomData) {
|
758
|
-
|
759
|
-
|
760
|
-
|
684
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
685
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
688
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
689
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
690
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
|
+
|
692
|
+
constexpr int num_ops = 20000;
|
761
693
|
for (int i = 0; i < num_ops; i++) {
|
762
694
|
if ((i % 1000) == 0) {
|
763
695
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
764
696
|
}
|
765
697
|
|
766
|
-
string x;
|
767
|
-
|
698
|
+
std::string x;
|
699
|
+
size_t len = uniform_4k(rng);
|
768
700
|
if (i < 100) {
|
769
|
-
len = 65536 +
|
701
|
+
len = 65536 + uniform_64k(rng);
|
770
702
|
}
|
771
703
|
while (x.size() < len) {
|
772
704
|
int run_len = 1;
|
773
|
-
if (
|
774
|
-
|
705
|
+
if (one_in_ten(rng)) {
|
706
|
+
int skewed_bits = uniform_0_to_8(rng);
|
707
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
708
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
709
|
+
(1 << skewed_bits) - 1);
|
710
|
+
run_len = skewed_low(rng);
|
711
|
+
}
|
712
|
+
char c = static_cast<char>(uniform_byte(rng));
|
713
|
+
if (i >= 100) {
|
714
|
+
int skewed_bits = uniform_0_to_3(rng);
|
715
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
716
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
717
|
+
(1 << skewed_bits) - 1);
|
718
|
+
c = static_cast<char>(skewed_low(rng));
|
775
719
|
}
|
776
|
-
char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
|
777
720
|
while (run_len-- > 0 && x.size() < len) {
|
778
|
-
x
|
721
|
+
x.push_back(c);
|
779
722
|
}
|
780
723
|
}
|
781
724
|
|
@@ -789,19 +732,19 @@ TEST(Snappy, FourByteOffset) {
|
|
789
732
|
// copy manually.
|
790
733
|
|
791
734
|
// The two fragments that make up the input string.
|
792
|
-
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
793
|
-
string fragment2 = "some other string";
|
735
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
736
|
+
std::string fragment2 = "some other string";
|
794
737
|
|
795
738
|
// How many times each fragment is emitted.
|
796
739
|
const int n1 = 2;
|
797
740
|
const int n2 = 100000 / fragment2.size();
|
798
741
|
const int length = n1 * fragment1.size() + n2 * fragment2.size();
|
799
742
|
|
800
|
-
string compressed;
|
743
|
+
std::string compressed;
|
801
744
|
Varint::Append32(&compressed, length);
|
802
745
|
|
803
746
|
AppendLiteral(&compressed, fragment1);
|
804
|
-
string src = fragment1;
|
747
|
+
std::string src = fragment1;
|
805
748
|
for (int i = 0; i < n2; i++) {
|
806
749
|
AppendLiteral(&compressed, fragment2);
|
807
750
|
src += fragment2;
|
@@ -810,7 +753,7 @@ TEST(Snappy, FourByteOffset) {
|
|
810
753
|
src += fragment1;
|
811
754
|
CHECK_EQ(length, src.size());
|
812
755
|
|
813
|
-
string uncompressed;
|
756
|
+
std::string uncompressed;
|
814
757
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
815
758
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
816
759
|
&uncompressed));
|
@@ -832,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) {
|
|
832
775
|
iov[i].iov_len = kLengths[i];
|
833
776
|
}
|
834
777
|
|
835
|
-
string compressed;
|
778
|
+
std::string compressed;
|
836
779
|
Varint::Append32(&compressed, 22);
|
837
780
|
|
838
781
|
// A literal whose output crosses three blocks.
|
@@ -893,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
|
|
893
836
|
iov[i].iov_len = kLengths[i];
|
894
837
|
}
|
895
838
|
|
896
|
-
string compressed;
|
839
|
+
std::string compressed;
|
897
840
|
Varint::Append32(&compressed, 8);
|
898
841
|
|
899
842
|
AppendLiteral(&compressed, "12345678");
|
@@ -915,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
915
858
|
iov[i].iov_len = kLengths[i];
|
916
859
|
}
|
917
860
|
|
918
|
-
string compressed;
|
861
|
+
std::string compressed;
|
919
862
|
Varint::Append32(&compressed, 8);
|
920
863
|
|
921
864
|
AppendLiteral(&compressed, "123");
|
@@ -929,8 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
929
872
|
}
|
930
873
|
}
|
931
874
|
|
932
|
-
|
933
|
-
static bool CheckUncompressedLength(const string& compressed,
|
875
|
+
static bool CheckUncompressedLength(const std::string& compressed,
|
934
876
|
size_t* ulength) {
|
935
877
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
936
878
|
compressed.size(),
|
@@ -944,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed,
|
|
944
886
|
}
|
945
887
|
|
946
888
|
TEST(SnappyCorruption, TruncatedVarint) {
|
947
|
-
string compressed, uncompressed;
|
889
|
+
std::string compressed, uncompressed;
|
948
890
|
size_t ulength;
|
949
891
|
compressed.push_back('\xf0');
|
950
892
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
@@ -954,13 +896,13 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
954
896
|
}
|
955
897
|
|
956
898
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
957
|
-
string compressed, uncompressed;
|
899
|
+
std::string compressed, uncompressed;
|
958
900
|
size_t ulength;
|
959
|
-
compressed.push_back(
|
960
|
-
compressed.push_back(
|
961
|
-
compressed.push_back(
|
962
|
-
compressed.push_back(
|
963
|
-
compressed.push_back(
|
901
|
+
compressed.push_back('\x80');
|
902
|
+
compressed.push_back('\x80');
|
903
|
+
compressed.push_back('\x80');
|
904
|
+
compressed.push_back('\x80');
|
905
|
+
compressed.push_back('\x80');
|
964
906
|
compressed.push_back(10);
|
965
907
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
966
908
|
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
@@ -968,18 +910,32 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
968
910
|
&uncompressed));
|
969
911
|
}
|
970
912
|
|
913
|
+
TEST(SnappyCorruption, OverflowingVarint) {
|
914
|
+
std::string compressed, uncompressed;
|
915
|
+
size_t ulength;
|
916
|
+
compressed.push_back('\xfb');
|
917
|
+
compressed.push_back('\xff');
|
918
|
+
compressed.push_back('\xff');
|
919
|
+
compressed.push_back('\xff');
|
920
|
+
compressed.push_back('\x7f');
|
921
|
+
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
922
|
+
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
923
|
+
CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
|
924
|
+
&uncompressed));
|
925
|
+
}
|
926
|
+
|
971
927
|
TEST(Snappy, ReadPastEndOfBuffer) {
|
972
928
|
// Check that we do not read past end of input
|
973
929
|
|
974
930
|
// Make a compressed string that ends with a single-byte literal
|
975
|
-
string compressed;
|
931
|
+
std::string compressed;
|
976
932
|
Varint::Append32(&compressed, 1);
|
977
933
|
AppendLiteral(&compressed, "x");
|
978
934
|
|
979
|
-
string uncompressed;
|
935
|
+
std::string uncompressed;
|
980
936
|
DataEndingAtUnreadablePage c(compressed);
|
981
937
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
982
|
-
CHECK_EQ(uncompressed, string("x"));
|
938
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
983
939
|
}
|
984
940
|
|
985
941
|
// Check for an infinite loop caused by a copy with offset==0
|
@@ -998,11 +954,13 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
998
954
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
999
955
|
}
|
1000
956
|
|
1001
|
-
|
1002
957
|
namespace {
|
1003
958
|
|
1004
959
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
1005
|
-
|
960
|
+
std::pair<size_t, bool> p =
|
961
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
962
|
+
CHECK_EQ(p.first < 8, p.second);
|
963
|
+
return p.first;
|
1006
964
|
}
|
1007
965
|
|
1008
966
|
} // namespace
|
@@ -1098,22 +1056,24 @@ TEST(Snappy, FindMatchLength) {
|
|
1098
1056
|
}
|
1099
1057
|
|
1100
1058
|
TEST(Snappy, FindMatchLengthRandom) {
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1059
|
+
constexpr int kNumTrials = 10000;
|
1060
|
+
constexpr int kTypicalLength = 10;
|
1061
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
1062
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1104
1065
|
|
1105
1066
|
for (int i = 0; i < kNumTrials; i++) {
|
1106
|
-
string s, t;
|
1107
|
-
char a =
|
1108
|
-
char b =
|
1109
|
-
while (!
|
1110
|
-
s.push_back(
|
1111
|
-
t.push_back(
|
1067
|
+
std::string s, t;
|
1068
|
+
char a = static_cast<char>(uniform_byte(rng));
|
1069
|
+
char b = static_cast<char>(uniform_byte(rng));
|
1070
|
+
while (!one_in_typical_length(rng)) {
|
1071
|
+
s.push_back(one_in_two(rng) ? a : b);
|
1072
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1112
1073
|
}
|
1113
1074
|
DataEndingAtUnreadablePage u(s);
|
1114
1075
|
DataEndingAtUnreadablePage v(t);
|
1115
|
-
int matched =
|
1116
|
-
u.data(), v.data(), v.data() + t.size());
|
1076
|
+
int matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1117
1077
|
if (matched == t.size()) {
|
1118
1078
|
EXPECT_EQ(s, t);
|
1119
1079
|
} else {
|
@@ -1125,51 +1085,140 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
1125
1085
|
}
|
1126
1086
|
}
|
1127
1087
|
|
1088
|
+
static uint16 MakeEntry(unsigned int extra,
|
1089
|
+
unsigned int len,
|
1090
|
+
unsigned int copy_offset) {
|
1091
|
+
// Check that all of the fields fit within the allocated space
|
1092
|
+
assert(extra == (extra & 0x7)); // At most 3 bits
|
1093
|
+
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
1094
|
+
assert(len == (len & 0x7f)); // At most 7 bits
|
1095
|
+
return len | (copy_offset << 8) | (extra << 11);
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
// Check that the decompression table is correct, and optionally print out
|
1099
|
+
// the computed one.
|
1100
|
+
TEST(Snappy, VerifyCharTable) {
|
1101
|
+
using snappy::internal::LITERAL;
|
1102
|
+
using snappy::internal::COPY_1_BYTE_OFFSET;
|
1103
|
+
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1104
|
+
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1105
|
+
using snappy::internal::char_table;
|
1106
|
+
|
1107
|
+
uint16 dst[256];
|
1108
|
+
|
1109
|
+
// Place invalid entries in all places to detect missing initialization
|
1110
|
+
int assigned = 0;
|
1111
|
+
for (int i = 0; i < 256; i++) {
|
1112
|
+
dst[i] = 0xffff;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1116
|
+
for (unsigned int len = 1; len <= 60; len++) {
|
1117
|
+
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
1118
|
+
assigned++;
|
1119
|
+
}
|
1120
|
+
|
1121
|
+
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1122
|
+
// encode the number of bytes of length info that follow the opcode.
|
1123
|
+
for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
|
1124
|
+
// We set the length field in the lookup table to 1 because extra
|
1125
|
+
// bytes encode len-1.
|
1126
|
+
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1127
|
+
assigned++;
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
// COPY_1_BYTE_OFFSET.
|
1131
|
+
//
|
1132
|
+
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1133
|
+
// offset/256 in 5 bits. offset%256 is stored in the next byte.
|
1134
|
+
//
|
1135
|
+
// This format is used for length in range [4..11] and offset in
|
1136
|
+
// range [0..2047]
|
1137
|
+
for (unsigned int len = 4; len < 12; len++) {
|
1138
|
+
for (unsigned int offset = 0; offset < 2048; offset += 256) {
|
1139
|
+
dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
|
1140
|
+
MakeEntry(1, len, offset>>8);
|
1141
|
+
assigned++;
|
1142
|
+
}
|
1143
|
+
}
|
1144
|
+
|
1145
|
+
// COPY_2_BYTE_OFFSET.
|
1146
|
+
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1147
|
+
for (unsigned int len = 1; len <= 64; len++) {
|
1148
|
+
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
1149
|
+
assigned++;
|
1150
|
+
}
|
1151
|
+
|
1152
|
+
// COPY_4_BYTE_OFFSET.
|
1153
|
+
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1154
|
+
for (unsigned int len = 1; len <= 64; len++) {
|
1155
|
+
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
1156
|
+
assigned++;
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
// Check that each entry was initialized exactly once.
|
1160
|
+
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1161
|
+
for (int i = 0; i < 256; i++) {
|
1162
|
+
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1163
|
+
}
|
1164
|
+
|
1165
|
+
if (FLAGS_snappy_dump_decompression_table) {
|
1166
|
+
printf("static const uint16 char_table[256] = {\n ");
|
1167
|
+
for (int i = 0; i < 256; i++) {
|
1168
|
+
printf("0x%04x%s",
|
1169
|
+
dst[i],
|
1170
|
+
((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
|
1171
|
+
}
|
1172
|
+
printf("};\n");
|
1173
|
+
}
|
1174
|
+
|
1175
|
+
// Check that computed table matched recorded table.
|
1176
|
+
for (int i = 0; i < 256; i++) {
|
1177
|
+
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1178
|
+
}
|
1179
|
+
}
|
1128
1180
|
|
1129
1181
|
static void CompressFile(const char* fname) {
|
1130
|
-
string fullinput;
|
1131
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1182
|
+
std::string fullinput;
|
1183
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1132
1184
|
|
1133
|
-
string compressed;
|
1185
|
+
std::string compressed;
|
1134
1186
|
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1135
1187
|
|
1136
|
-
file::SetContents(string(fname).append(".comp"), compressed,
|
1137
|
-
|
1188
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1189
|
+
file::Defaults()));
|
1138
1190
|
}
|
1139
1191
|
|
1140
1192
|
static void UncompressFile(const char* fname) {
|
1141
|
-
string fullinput;
|
1142
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1193
|
+
std::string fullinput;
|
1194
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1143
1195
|
|
1144
1196
|
size_t uncompLength;
|
1145
1197
|
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1146
1198
|
|
1147
|
-
string uncompressed;
|
1199
|
+
std::string uncompressed;
|
1148
1200
|
uncompressed.resize(uncompLength);
|
1149
1201
|
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1150
1202
|
|
1151
|
-
file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1152
|
-
|
1203
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1204
|
+
file::Defaults()));
|
1153
1205
|
}
|
1154
1206
|
|
1155
1207
|
static void MeasureFile(const char* fname) {
|
1156
|
-
string fullinput;
|
1157
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1208
|
+
std::string fullinput;
|
1209
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1158
1210
|
printf("%-40s :\n", fname);
|
1159
1211
|
|
1160
1212
|
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1161
1213
|
int end_len = fullinput.size();
|
1162
1214
|
if (FLAGS_end_len >= 0) {
|
1163
|
-
end_len = min<int>(fullinput.size(), FLAGS_end_len);
|
1215
|
+
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1164
1216
|
}
|
1165
1217
|
for (int len = start_len; len <= end_len; len++) {
|
1166
1218
|
const char* const input = fullinput.data();
|
1167
1219
|
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1168
1220
|
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1169
1221
|
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1170
|
-
if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
|
1171
|
-
if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
|
1172
|
-
if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
|
1173
1222
|
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1174
1223
|
|
1175
1224
|
// For block-size based measurements
|
@@ -1209,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) {
|
|
1209
1258
|
// Pick file to process based on "arg"
|
1210
1259
|
CHECK_GE(arg, 0);
|
1211
1260
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1212
|
-
string contents =
|
1213
|
-
|
1261
|
+
std::string contents =
|
1262
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1214
1263
|
|
1215
|
-
string zcontents;
|
1264
|
+
std::string zcontents;
|
1216
1265
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1217
1266
|
char* dst = new char[contents.size()];
|
1218
1267
|
|
@@ -1235,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) {
|
|
1235
1284
|
// Pick file to process based on "arg"
|
1236
1285
|
CHECK_GE(arg, 0);
|
1237
1286
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1238
|
-
string contents =
|
1239
|
-
|
1287
|
+
std::string contents =
|
1288
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1240
1289
|
|
1241
|
-
string zcontents;
|
1290
|
+
std::string zcontents;
|
1242
1291
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1243
1292
|
|
1244
1293
|
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
@@ -1258,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1258
1307
|
// Pick file to process based on "arg"
|
1259
1308
|
CHECK_GE(arg, 0);
|
1260
1309
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1261
|
-
string contents =
|
1262
|
-
|
1310
|
+
std::string contents =
|
1311
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1263
1312
|
|
1264
|
-
string zcontents;
|
1313
|
+
std::string zcontents;
|
1265
1314
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1266
1315
|
|
1267
1316
|
// Uncompress into an iovec containing ten entries.
|
@@ -1298,6 +1347,37 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1298
1347
|
}
|
1299
1348
|
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1300
1349
|
|
1350
|
+
static void BM_UFlatSink(int iters, int arg) {
|
1351
|
+
StopBenchmarkTiming();
|
1352
|
+
|
1353
|
+
// Pick file to process based on "arg"
|
1354
|
+
CHECK_GE(arg, 0);
|
1355
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1356
|
+
std::string contents =
|
1357
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1358
|
+
|
1359
|
+
std::string zcontents;
|
1360
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1361
|
+
char* dst = new char[contents.size()];
|
1362
|
+
|
1363
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1364
|
+
static_cast<int64>(contents.size()));
|
1365
|
+
SetBenchmarkLabel(files[arg].label);
|
1366
|
+
StartBenchmarkTiming();
|
1367
|
+
while (iters-- > 0) {
|
1368
|
+
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1369
|
+
snappy::UncheckedByteArraySink sink(dst);
|
1370
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
1371
|
+
}
|
1372
|
+
StopBenchmarkTiming();
|
1373
|
+
|
1374
|
+
std::string s(dst, contents.size());
|
1375
|
+
CHECK_EQ(contents, s);
|
1376
|
+
|
1377
|
+
delete[] dst;
|
1378
|
+
}
|
1379
|
+
|
1380
|
+
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1301
1381
|
|
1302
1382
|
static void BM_ZFlat(int iters, int arg) {
|
1303
1383
|
StopBenchmarkTiming();
|
@@ -1305,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1305
1385
|
// Pick file to process based on "arg"
|
1306
1386
|
CHECK_GE(arg, 0);
|
1307
1387
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1308
|
-
string contents =
|
1309
|
-
|
1388
|
+
std::string contents =
|
1389
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1310
1390
|
|
1311
1391
|
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1312
1392
|
|
@@ -1321,31 +1401,102 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1321
1401
|
StopBenchmarkTiming();
|
1322
1402
|
const double compression_ratio =
|
1323
1403
|
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1324
|
-
SetBenchmarkLabel(
|
1325
|
-
|
1326
|
-
VLOG(0) <<
|
1327
|
-
|
1404
|
+
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
+
100.0 * compression_ratio));
|
1406
|
+
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
+
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
+
static_cast<int>(zsize));
|
1328
1409
|
delete[] dst;
|
1329
1410
|
}
|
1330
1411
|
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1331
1412
|
|
1413
|
+
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
+
StopBenchmarkTiming();
|
1332
1415
|
|
1333
|
-
|
1416
|
+
CHECK_EQ(arg, 0);
|
1417
|
+
const int num_files = ARRAYSIZE(files);
|
1334
1418
|
|
1419
|
+
std::vector<std::string> contents(num_files);
|
1420
|
+
std::vector<char*> dst(num_files);
|
1421
|
+
|
1422
|
+
int64 total_contents_size = 0;
|
1423
|
+
for (int i = 0; i < num_files; ++i) {
|
1424
|
+
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
+
total_contents_size += contents[i].size();
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
+
StartBenchmarkTiming();
|
1431
|
+
|
1432
|
+
size_t zsize = 0;
|
1433
|
+
while (iters-- > 0) {
|
1434
|
+
for (int i = 0; i < num_files; ++i) {
|
1435
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
+
&zsize);
|
1437
|
+
}
|
1438
|
+
}
|
1439
|
+
StopBenchmarkTiming();
|
1440
|
+
|
1441
|
+
for (int i = 0; i < num_files; ++i) {
|
1442
|
+
delete[] dst[i];
|
1443
|
+
}
|
1444
|
+
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
+
}
|
1446
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
+
|
1448
|
+
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
+
StopBenchmarkTiming();
|
1450
|
+
|
1451
|
+
CHECK_EQ(arg, 0);
|
1452
|
+
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
+
const std::string base_content =
|
1454
|
+
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
+
|
1456
|
+
std::vector<std::string> contents;
|
1457
|
+
std::vector<char*> dst;
|
1458
|
+
int64 total_contents_size = 0;
|
1459
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
+
++table_bits) {
|
1461
|
+
std::string content = base_content;
|
1462
|
+
content.resize(1 << table_bits);
|
1463
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
+
total_contents_size += content.size();
|
1465
|
+
contents.push_back(std::move(content));
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
size_t zsize = 0;
|
1469
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
+
StartBenchmarkTiming();
|
1471
|
+
while (iters-- > 0) {
|
1472
|
+
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
+
&zsize);
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
StopBenchmarkTiming();
|
1478
|
+
|
1479
|
+
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
+
delete[] dst[i];
|
1481
|
+
}
|
1482
|
+
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
+
}
|
1484
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1485
|
+
|
1486
|
+
} // namespace snappy
|
1335
1487
|
|
1336
1488
|
int main(int argc, char** argv) {
|
1337
1489
|
InitGoogle(argv[0], &argc, &argv, true);
|
1338
1490
|
RunSpecifiedBenchmarks();
|
1339
1491
|
|
1340
|
-
|
1341
1492
|
if (argc >= 2) {
|
1342
1493
|
for (int arg = 1; arg < argc; arg++) {
|
1343
1494
|
if (FLAGS_write_compressed) {
|
1344
|
-
CompressFile(argv[arg]);
|
1495
|
+
snappy::CompressFile(argv[arg]);
|
1345
1496
|
} else if (FLAGS_write_uncompressed) {
|
1346
|
-
UncompressFile(argv[arg]);
|
1497
|
+
snappy::UncompressFile(argv[arg]);
|
1347
1498
|
} else {
|
1348
|
-
MeasureFile(argv[arg]);
|
1499
|
+
snappy::MeasureFile(argv[arg]);
|
1349
1500
|
}
|
1350
1501
|
}
|
1351
1502
|
return 0;
|