snappy 0.2.0-java → 0.4.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +2 -2
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/README.md +18 -2
- data/ext/extconf.rb +15 -11
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy_ext.jar +0 -0
- data/snappy.gemspec +1 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +150 -27
- data/vendor/snappy/CONTRIBUTING.md +9 -4
- data/vendor/snappy/NEWS +12 -0
- data/vendor/snappy/README.md +52 -35
- data/vendor/snappy/cmake/config.h.in +28 -24
- data/vendor/snappy/snappy-internal.h +189 -25
- data/vendor/snappy/snappy-sinksource.cc +26 -9
- data/vendor/snappy/snappy-sinksource.h +11 -11
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +227 -308
- data/vendor/snappy/snappy-stubs-public.h.in +0 -11
- data/vendor/snappy/snappy-test.cc +88 -198
- data/vendor/snappy/snappy-test.h +102 -285
- data/vendor/snappy/snappy.cc +1176 -410
- data/vendor/snappy/snappy.h +19 -4
- data/vendor/snappy/snappy_benchmark.cc +378 -0
- data/vendor/snappy/snappy_compress_fuzzer.cc +3 -2
- data/vendor/snappy/snappy_test_data.cc +57 -0
- data/vendor/snappy/snappy_test_data.h +68 -0
- data/vendor/snappy/snappy_test_tool.cc +471 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +3 -2
- data/vendor/snappy/snappy_unittest.cc +170 -666
- metadata +10 -3
@@ -26,44 +26,31 @@
|
|
26
26
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
28
|
|
29
|
-
#include <math.h>
|
30
|
-
#include <stdlib.h>
|
31
|
-
|
32
29
|
#include <algorithm>
|
30
|
+
#include <cmath>
|
31
|
+
#include <cstdlib>
|
33
32
|
#include <random>
|
34
33
|
#include <string>
|
35
34
|
#include <utility>
|
36
35
|
#include <vector>
|
37
36
|
|
38
|
-
#include "snappy.h"
|
39
|
-
#include "snappy-internal.h"
|
40
37
|
#include "snappy-test.h"
|
38
|
+
|
39
|
+
#include "gtest/gtest.h"
|
40
|
+
|
41
|
+
#include "snappy-internal.h"
|
41
42
|
#include "snappy-sinksource.h"
|
43
|
+
#include "snappy.h"
|
44
|
+
#include "snappy_test_data.h"
|
42
45
|
|
43
|
-
|
44
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
45
|
-
DEFINE_int32(end_len, -1,
|
46
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
47
|
-
DEFINE_int32(bytes, 10485760,
|
48
|
-
"How many bytes to compress/uncompress per file for timing");
|
49
|
-
|
50
|
-
DEFINE_bool(zlib, false,
|
51
|
-
"Run zlib compression (http://www.zlib.net)");
|
52
|
-
DEFINE_bool(lzo, false,
|
53
|
-
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
54
|
-
DEFINE_bool(snappy, true, "Run snappy compression");
|
55
|
-
|
56
|
-
DEFINE_bool(write_compressed, false,
|
57
|
-
"Write compressed versions of each file to <file>.comp");
|
58
|
-
DEFINE_bool(write_uncompressed, false,
|
59
|
-
"Write uncompressed versions of each file to <file>.uncomp");
|
60
|
-
|
61
|
-
DEFINE_bool(snappy_dump_decompression_table, false,
|
46
|
+
SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
|
62
47
|
"If true, we print the decompression table during tests.");
|
63
48
|
|
64
49
|
namespace snappy {
|
65
50
|
|
66
|
-
|
51
|
+
namespace {
|
52
|
+
|
53
|
+
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
|
67
54
|
|
68
55
|
// To test against code that reads beyond its input, this class copies a
|
69
56
|
// string to a newly allocated group of pages, the last of which
|
@@ -84,7 +71,7 @@ class DataEndingAtUnreadablePage {
|
|
84
71
|
CHECK_NE(MAP_FAILED, mem_);
|
85
72
|
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
|
86
73
|
char* dst = protected_page_ - size;
|
87
|
-
memcpy(dst, s.data(), size);
|
74
|
+
std::memcpy(dst, s.data(), size);
|
88
75
|
data_ = dst;
|
89
76
|
size_ = size;
|
90
77
|
// Make guard page unreadable.
|
@@ -109,256 +96,14 @@ class DataEndingAtUnreadablePage {
|
|
109
96
|
size_t size_;
|
110
97
|
};
|
111
98
|
|
112
|
-
#else //
|
99
|
+
#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
|
113
100
|
|
114
101
|
// Fallback for systems without mmap.
|
115
102
|
using DataEndingAtUnreadablePage = std::string;
|
116
103
|
|
117
104
|
#endif
|
118
105
|
|
119
|
-
|
120
|
-
ZLIB, LZO, SNAPPY
|
121
|
-
};
|
122
|
-
|
123
|
-
const char* names[] = {
|
124
|
-
"ZLIB", "LZO", "SNAPPY"
|
125
|
-
};
|
126
|
-
|
127
|
-
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
128
|
-
CompressorType comp) {
|
129
|
-
switch (comp) {
|
130
|
-
#ifdef ZLIB_VERSION
|
131
|
-
case ZLIB:
|
132
|
-
return ZLib::MinCompressbufSize(input_size);
|
133
|
-
#endif // ZLIB_VERSION
|
134
|
-
|
135
|
-
#ifdef LZO_VERSION
|
136
|
-
case LZO:
|
137
|
-
return input_size + input_size/64 + 16 + 3;
|
138
|
-
#endif // LZO_VERSION
|
139
|
-
|
140
|
-
case SNAPPY:
|
141
|
-
return snappy::MaxCompressedLength(input_size);
|
142
|
-
|
143
|
-
default:
|
144
|
-
LOG(FATAL) << "Unknown compression type number " << comp;
|
145
|
-
return 0;
|
146
|
-
}
|
147
|
-
}
|
148
|
-
|
149
|
-
// Returns true if we successfully compressed, false otherwise.
|
150
|
-
//
|
151
|
-
// If compressed_is_preallocated is set, do not resize the compressed buffer.
|
152
|
-
// This is typically what you want for a benchmark, in order to not spend
|
153
|
-
// time in the memory allocator. If you do set this flag, however,
|
154
|
-
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
155
|
-
// number of bytes, and may contain junk bytes at the end after return.
|
156
|
-
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
157
|
-
std::string* compressed, bool compressed_is_preallocated) {
|
158
|
-
if (!compressed_is_preallocated) {
|
159
|
-
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
160
|
-
}
|
161
|
-
|
162
|
-
switch (comp) {
|
163
|
-
#ifdef ZLIB_VERSION
|
164
|
-
case ZLIB: {
|
165
|
-
ZLib zlib;
|
166
|
-
uLongf destlen = compressed->size();
|
167
|
-
int ret = zlib.Compress(
|
168
|
-
reinterpret_cast<Bytef*>(string_as_array(compressed)),
|
169
|
-
&destlen,
|
170
|
-
reinterpret_cast<const Bytef*>(input),
|
171
|
-
input_size);
|
172
|
-
CHECK_EQ(Z_OK, ret);
|
173
|
-
if (!compressed_is_preallocated) {
|
174
|
-
compressed->resize(destlen);
|
175
|
-
}
|
176
|
-
return true;
|
177
|
-
}
|
178
|
-
#endif // ZLIB_VERSION
|
179
|
-
|
180
|
-
#ifdef LZO_VERSION
|
181
|
-
case LZO: {
|
182
|
-
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
|
183
|
-
lzo_uint destlen;
|
184
|
-
int ret = lzo1x_1_15_compress(
|
185
|
-
reinterpret_cast<const uint8*>(input),
|
186
|
-
input_size,
|
187
|
-
reinterpret_cast<uint8*>(string_as_array(compressed)),
|
188
|
-
&destlen,
|
189
|
-
mem);
|
190
|
-
CHECK_EQ(LZO_E_OK, ret);
|
191
|
-
delete[] mem;
|
192
|
-
if (!compressed_is_preallocated) {
|
193
|
-
compressed->resize(destlen);
|
194
|
-
}
|
195
|
-
break;
|
196
|
-
}
|
197
|
-
#endif // LZO_VERSION
|
198
|
-
|
199
|
-
case SNAPPY: {
|
200
|
-
size_t destlen;
|
201
|
-
snappy::RawCompress(input, input_size,
|
202
|
-
string_as_array(compressed),
|
203
|
-
&destlen);
|
204
|
-
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
|
205
|
-
if (!compressed_is_preallocated) {
|
206
|
-
compressed->resize(destlen);
|
207
|
-
}
|
208
|
-
break;
|
209
|
-
}
|
210
|
-
|
211
|
-
default: {
|
212
|
-
return false; // the asked-for library wasn't compiled in
|
213
|
-
}
|
214
|
-
}
|
215
|
-
return true;
|
216
|
-
}
|
217
|
-
|
218
|
-
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
-
int size, std::string* output) {
|
220
|
-
switch (comp) {
|
221
|
-
#ifdef ZLIB_VERSION
|
222
|
-
case ZLIB: {
|
223
|
-
output->resize(size);
|
224
|
-
ZLib zlib;
|
225
|
-
uLongf destlen = output->size();
|
226
|
-
int ret = zlib.Uncompress(
|
227
|
-
reinterpret_cast<Bytef*>(string_as_array(output)),
|
228
|
-
&destlen,
|
229
|
-
reinterpret_cast<const Bytef*>(compressed.data()),
|
230
|
-
compressed.size());
|
231
|
-
CHECK_EQ(Z_OK, ret);
|
232
|
-
CHECK_EQ(static_cast<uLongf>(size), destlen);
|
233
|
-
break;
|
234
|
-
}
|
235
|
-
#endif // ZLIB_VERSION
|
236
|
-
|
237
|
-
#ifdef LZO_VERSION
|
238
|
-
case LZO: {
|
239
|
-
output->resize(size);
|
240
|
-
lzo_uint destlen;
|
241
|
-
int ret = lzo1x_decompress(
|
242
|
-
reinterpret_cast<const uint8*>(compressed.data()),
|
243
|
-
compressed.size(),
|
244
|
-
reinterpret_cast<uint8*>(string_as_array(output)),
|
245
|
-
&destlen,
|
246
|
-
NULL);
|
247
|
-
CHECK_EQ(LZO_E_OK, ret);
|
248
|
-
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
|
249
|
-
break;
|
250
|
-
}
|
251
|
-
#endif // LZO_VERSION
|
252
|
-
|
253
|
-
case SNAPPY: {
|
254
|
-
snappy::RawUncompress(compressed.data(), compressed.size(),
|
255
|
-
string_as_array(output));
|
256
|
-
break;
|
257
|
-
}
|
258
|
-
|
259
|
-
default: {
|
260
|
-
return false; // the asked-for library wasn't compiled in
|
261
|
-
}
|
262
|
-
}
|
263
|
-
return true;
|
264
|
-
}
|
265
|
-
|
266
|
-
static void Measure(const char* data,
|
267
|
-
size_t length,
|
268
|
-
CompressorType comp,
|
269
|
-
int repeats,
|
270
|
-
int block_size) {
|
271
|
-
// Run tests a few time and pick median running times
|
272
|
-
static const int kRuns = 5;
|
273
|
-
double ctime[kRuns];
|
274
|
-
double utime[kRuns];
|
275
|
-
int compressed_size = 0;
|
276
|
-
|
277
|
-
{
|
278
|
-
// Chop the input into blocks
|
279
|
-
int num_blocks = (length + block_size - 1) / block_size;
|
280
|
-
std::vector<const char*> input(num_blocks);
|
281
|
-
std::vector<size_t> input_length(num_blocks);
|
282
|
-
std::vector<std::string> compressed(num_blocks);
|
283
|
-
std::vector<std::string> output(num_blocks);
|
284
|
-
for (int b = 0; b < num_blocks; b++) {
|
285
|
-
int input_start = b * block_size;
|
286
|
-
int input_limit = std::min<int>((b+1)*block_size, length);
|
287
|
-
input[b] = data+input_start;
|
288
|
-
input_length[b] = input_limit-input_start;
|
289
|
-
|
290
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
291
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
292
|
-
}
|
293
|
-
|
294
|
-
// First, try one trial compression to make sure the code is compiled in
|
295
|
-
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
|
296
|
-
LOG(WARNING) << "Skipping " << names[comp] << ": "
|
297
|
-
<< "library not compiled in";
|
298
|
-
return;
|
299
|
-
}
|
300
|
-
|
301
|
-
for (int run = 0; run < kRuns; run++) {
|
302
|
-
CycleTimer ctimer, utimer;
|
303
|
-
|
304
|
-
for (int b = 0; b < num_blocks; b++) {
|
305
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
306
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
307
|
-
}
|
308
|
-
|
309
|
-
ctimer.Start();
|
310
|
-
for (int b = 0; b < num_blocks; b++)
|
311
|
-
for (int i = 0; i < repeats; i++)
|
312
|
-
Compress(input[b], input_length[b], comp, &compressed[b], true);
|
313
|
-
ctimer.Stop();
|
314
|
-
|
315
|
-
// Compress once more, with resizing, so we don't leave junk
|
316
|
-
// at the end that will confuse the decompressor.
|
317
|
-
for (int b = 0; b < num_blocks; b++) {
|
318
|
-
Compress(input[b], input_length[b], comp, &compressed[b], false);
|
319
|
-
}
|
320
|
-
|
321
|
-
for (int b = 0; b < num_blocks; b++) {
|
322
|
-
output[b].resize(input_length[b]);
|
323
|
-
}
|
324
|
-
|
325
|
-
utimer.Start();
|
326
|
-
for (int i = 0; i < repeats; i++)
|
327
|
-
for (int b = 0; b < num_blocks; b++)
|
328
|
-
Uncompress(compressed[b], comp, input_length[b], &output[b]);
|
329
|
-
utimer.Stop();
|
330
|
-
|
331
|
-
ctime[run] = ctimer.Get();
|
332
|
-
utime[run] = utimer.Get();
|
333
|
-
}
|
334
|
-
|
335
|
-
compressed_size = 0;
|
336
|
-
for (size_t i = 0; i < compressed.size(); i++) {
|
337
|
-
compressed_size += compressed[i].size();
|
338
|
-
}
|
339
|
-
}
|
340
|
-
|
341
|
-
std::sort(ctime, ctime + kRuns);
|
342
|
-
std::sort(utime, utime + kRuns);
|
343
|
-
const int med = kRuns/2;
|
344
|
-
|
345
|
-
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
346
|
-
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
347
|
-
std::string x = names[comp];
|
348
|
-
x += ":";
|
349
|
-
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
-
: std::string("?");
|
351
|
-
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
352
|
-
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
353
|
-
x.c_str(),
|
354
|
-
block_size/(1<<20),
|
355
|
-
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
356
|
-
(compressed_size * 100.0) / std::max<int>(1, length),
|
357
|
-
comp_rate,
|
358
|
-
urate.c_str());
|
359
|
-
}
|
360
|
-
|
361
|
-
static int VerifyString(const std::string& input) {
|
106
|
+
int VerifyString(const std::string& input) {
|
362
107
|
std::string compressed;
|
363
108
|
DataEndingAtUnreadablePage i(input);
|
364
109
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
@@ -374,7 +119,7 @@ static int VerifyString(const std::string& input) {
|
|
374
119
|
return uncompressed.size();
|
375
120
|
}
|
376
121
|
|
377
|
-
|
122
|
+
void VerifyStringSink(const std::string& input) {
|
378
123
|
std::string compressed;
|
379
124
|
DataEndingAtUnreadablePage i(input);
|
380
125
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
@@ -392,26 +137,15 @@ static void VerifyStringSink(const std::string& input) {
|
|
392
137
|
CHECK_EQ(uncompressed, input);
|
393
138
|
}
|
394
139
|
|
395
|
-
|
396
|
-
std::string compressed;
|
397
|
-
DataEndingAtUnreadablePage i(input);
|
398
|
-
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
399
|
-
CHECK_EQ(written, compressed.size());
|
400
|
-
CHECK_LE(compressed.size(),
|
401
|
-
snappy::MaxCompressedLength(input.size()));
|
402
|
-
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
403
|
-
|
404
|
-
// Try uncompressing into an iovec containing a random number of entries
|
405
|
-
// ranging from 1 to 10.
|
406
|
-
char* buf = new char[input.size()];
|
140
|
+
struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
|
407
141
|
std::minstd_rand0 rng(input.size());
|
408
142
|
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
-
|
143
|
+
num = uniform_1_to_10(rng);
|
410
144
|
if (input.size() < num) {
|
411
145
|
num = input.size();
|
412
146
|
}
|
413
147
|
struct iovec* iov = new iovec[num];
|
414
|
-
|
148
|
+
size_t used_so_far = 0;
|
415
149
|
std::bernoulli_distribution one_in_five(1.0 / 5);
|
416
150
|
for (size_t i = 0; i < num; ++i) {
|
417
151
|
assert(used_so_far < input.size());
|
@@ -430,8 +164,40 @@ static void VerifyIOVec(const std::string& input) {
|
|
430
164
|
}
|
431
165
|
used_so_far += iov[i].iov_len;
|
432
166
|
}
|
433
|
-
|
434
|
-
|
167
|
+
return iov;
|
168
|
+
}
|
169
|
+
|
170
|
+
int VerifyIOVecSource(const std::string& input) {
|
171
|
+
std::string compressed;
|
172
|
+
std::string copy = input;
|
173
|
+
char* buf = const_cast<char*>(copy.data());
|
174
|
+
size_t num = 0;
|
175
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
176
|
+
const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
|
177
|
+
CHECK_EQ(written, compressed.size());
|
178
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
179
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
180
|
+
|
181
|
+
std::string uncompressed;
|
182
|
+
DataEndingAtUnreadablePage c(compressed);
|
183
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
184
|
+
CHECK_EQ(uncompressed, input);
|
185
|
+
delete[] iov;
|
186
|
+
return uncompressed.size();
|
187
|
+
}
|
188
|
+
|
189
|
+
void VerifyIOVecSink(const std::string& input) {
|
190
|
+
std::string compressed;
|
191
|
+
DataEndingAtUnreadablePage i(input);
|
192
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
193
|
+
CHECK_EQ(written, compressed.size());
|
194
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
195
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
196
|
+
char* buf = new char[input.size()];
|
197
|
+
size_t num = 0;
|
198
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
199
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
|
200
|
+
num));
|
435
201
|
CHECK(!memcmp(buf, input.data(), input.size()));
|
436
202
|
delete[] iov;
|
437
203
|
delete[] buf;
|
@@ -439,7 +205,7 @@ static void VerifyIOVec(const std::string& input) {
|
|
439
205
|
|
440
206
|
// Test that data compressed by a compressor that does not
|
441
207
|
// obey block sizes is uncompressed properly.
|
442
|
-
|
208
|
+
void VerifyNonBlockedCompression(const std::string& input) {
|
443
209
|
if (input.length() > snappy::kBlockSize) {
|
444
210
|
// We cannot test larger blocks than the maximum block size, obviously.
|
445
211
|
return;
|
@@ -451,7 +217,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
451
217
|
// Setup compression table
|
452
218
|
snappy::internal::WorkingMemory wmem(input.size());
|
453
219
|
int table_size;
|
454
|
-
|
220
|
+
uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
|
455
221
|
|
456
222
|
// Compress entire input in one shot
|
457
223
|
std::string compressed;
|
@@ -481,7 +247,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
481
247
|
struct iovec vec[kNumBlocks];
|
482
248
|
const int block_size = 1 + input.size() / kNumBlocks;
|
483
249
|
std::string iovec_data(block_size * kNumBlocks, 'x');
|
484
|
-
for (int i = 0; i < kNumBlocks; i
|
250
|
+
for (int i = 0; i < kNumBlocks; ++i) {
|
485
251
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
486
252
|
vec[i].iov_len = block_size;
|
487
253
|
}
|
@@ -492,7 +258,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
492
258
|
}
|
493
259
|
|
494
260
|
// Expand the input so that it is at least K times as big as block size
|
495
|
-
|
261
|
+
std::string Expand(const std::string& input) {
|
496
262
|
static const int K = 3;
|
497
263
|
std::string data = input;
|
498
264
|
while (data.size() < K * snappy::kBlockSize) {
|
@@ -501,30 +267,33 @@ static std::string Expand(const std::string& input) {
|
|
501
267
|
return data;
|
502
268
|
}
|
503
269
|
|
504
|
-
|
270
|
+
int Verify(const std::string& input) {
|
505
271
|
VLOG(1) << "Verifying input of size " << input.size();
|
506
272
|
|
507
273
|
// Compress using string based routines
|
508
274
|
const int result = VerifyString(input);
|
509
275
|
|
276
|
+
// Compress using `iovec`-based routines.
|
277
|
+
CHECK_EQ(VerifyIOVecSource(input), result);
|
278
|
+
|
510
279
|
// Verify using sink based routines
|
511
280
|
VerifyStringSink(input);
|
512
281
|
|
513
282
|
VerifyNonBlockedCompression(input);
|
514
|
-
|
283
|
+
VerifyIOVecSink(input);
|
515
284
|
if (!input.empty()) {
|
516
285
|
const std::string expanded = Expand(input);
|
517
286
|
VerifyNonBlockedCompression(expanded);
|
518
|
-
|
287
|
+
VerifyIOVecSink(input);
|
519
288
|
}
|
520
289
|
|
521
290
|
return result;
|
522
291
|
}
|
523
292
|
|
524
|
-
|
293
|
+
bool IsValidCompressedBuffer(const std::string& c) {
|
525
294
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
526
295
|
}
|
527
|
-
|
296
|
+
bool Uncompress(const std::string& c, std::string* u) {
|
528
297
|
return snappy::Uncompress(c.data(), c.size(), u);
|
529
298
|
}
|
530
299
|
|
@@ -549,8 +318,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
549
318
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
550
319
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
551
320
|
source.resize(100000);
|
552
|
-
for (
|
553
|
-
|
321
|
+
for (char& source_char : source) {
|
322
|
+
source_char = 'A';
|
554
323
|
}
|
555
324
|
snappy::Compress(source.data(), source.size(), &dest);
|
556
325
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
@@ -588,7 +357,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
588
357
|
size_t ulen;
|
589
358
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
590
359
|
|| (ulen < (1<<20)));
|
591
|
-
|
360
|
+
uint32_t ulen2;
|
592
361
|
snappy::ByteArraySource source(data.data(), data.size());
|
593
362
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
594
363
|
(ulen2 < (1<<20)));
|
@@ -601,7 +370,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
601
370
|
// These mirror the compression code in snappy.cc, but are copied
|
602
371
|
// here so that we can bypass some limitations in the how snappy.cc
|
603
372
|
// invokes these routines.
|
604
|
-
|
373
|
+
void AppendLiteral(std::string* dst, const std::string& literal) {
|
605
374
|
if (literal.empty()) return;
|
606
375
|
int n = literal.size() - 1;
|
607
376
|
if (n < 60) {
|
@@ -621,7 +390,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) {
|
|
621
390
|
*dst += literal;
|
622
391
|
}
|
623
392
|
|
624
|
-
|
393
|
+
void AppendCopy(std::string* dst, int offset, int length) {
|
625
394
|
while (length > 0) {
|
626
395
|
// Figure out how much to copy in one shot
|
627
396
|
int to_copy;
|
@@ -665,6 +434,41 @@ TEST(Snappy, SimpleTests) {
|
|
665
434
|
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
666
435
|
}
|
667
436
|
|
437
|
+
// Regression test for cr/345340892.
|
438
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
|
439
|
+
Verify("abcabcabcabcabcabcab");
|
440
|
+
Verify("abcabcabcabcabcabcab0123456789ABCDEF");
|
441
|
+
|
442
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
|
443
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
|
444
|
+
}
|
445
|
+
|
446
|
+
// Regression test for cr/345340892.
|
447
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
|
448
|
+
std::mt19937 rng;
|
449
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
450
|
+
for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
|
451
|
+
for (int length = 1; length <= 64; ++length) {
|
452
|
+
for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
|
453
|
+
const int size = pattern_size + length + extra_bytes_after_pattern;
|
454
|
+
std::string input;
|
455
|
+
input.resize(size);
|
456
|
+
for (int i = 0; i < pattern_size; ++i) {
|
457
|
+
input[i] = 'a' + i;
|
458
|
+
}
|
459
|
+
for (int i = 0; i < length; ++i) {
|
460
|
+
input[pattern_size + i] = input[i];
|
461
|
+
}
|
462
|
+
for (int i = 0; i < extra_bytes_after_pattern; ++i) {
|
463
|
+
input[pattern_size + length + i] =
|
464
|
+
static_cast<char>(uniform_byte(rng));
|
465
|
+
}
|
466
|
+
Verify(input);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
}
|
470
|
+
}
|
471
|
+
|
668
472
|
// Verify max blowup (lots of four-byte copies)
|
669
473
|
TEST(Snappy, MaxBlowup) {
|
670
474
|
std::mt19937 rng;
|
@@ -681,7 +485,7 @@ TEST(Snappy, MaxBlowup) {
|
|
681
485
|
}
|
682
486
|
|
683
487
|
TEST(Snappy, RandomData) {
|
684
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
488
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
685
489
|
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
490
|
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
491
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
@@ -690,7 +494,7 @@ TEST(Snappy, RandomData) {
|
|
690
494
|
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
495
|
|
692
496
|
constexpr int num_ops = 20000;
|
693
|
-
for (int i = 0; i < num_ops; i
|
497
|
+
for (int i = 0; i < num_ops; ++i) {
|
694
498
|
if ((i % 1000) == 0) {
|
695
499
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
696
500
|
}
|
@@ -738,14 +542,14 @@ TEST(Snappy, FourByteOffset) {
|
|
738
542
|
// How many times each fragment is emitted.
|
739
543
|
const int n1 = 2;
|
740
544
|
const int n2 = 100000 / fragment2.size();
|
741
|
-
const
|
545
|
+
const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
|
742
546
|
|
743
547
|
std::string compressed;
|
744
548
|
Varint::Append32(&compressed, length);
|
745
549
|
|
746
550
|
AppendLiteral(&compressed, fragment1);
|
747
551
|
std::string src = fragment1;
|
748
|
-
for (int i = 0; i < n2; i
|
552
|
+
for (int i = 0; i < n2; ++i) {
|
749
553
|
AppendLiteral(&compressed, fragment2);
|
750
554
|
src += fragment2;
|
751
555
|
}
|
@@ -760,7 +564,27 @@ TEST(Snappy, FourByteOffset) {
|
|
760
564
|
CHECK_EQ(uncompressed, src);
|
761
565
|
}
|
762
566
|
|
763
|
-
TEST(Snappy,
|
567
|
+
TEST(Snappy, IOVecSourceEdgeCases) {
|
568
|
+
// Validate that empty leading, trailing, and in-between iovecs are handled:
|
569
|
+
// [] [] ['a'] [] ['b'] [].
|
570
|
+
std::string data = "ab";
|
571
|
+
char* buf = const_cast<char*>(data.data());
|
572
|
+
size_t used_so_far = 0;
|
573
|
+
static const int kLengths[] = {0, 0, 1, 0, 1, 0};
|
574
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
575
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
576
|
+
iov[i].iov_base = buf + used_so_far;
|
577
|
+
iov[i].iov_len = kLengths[i];
|
578
|
+
used_so_far += kLengths[i];
|
579
|
+
}
|
580
|
+
std::string compressed;
|
581
|
+
snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
|
582
|
+
std::string uncompressed;
|
583
|
+
snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
|
584
|
+
CHECK_EQ(data, uncompressed);
|
585
|
+
}
|
586
|
+
|
587
|
+
TEST(Snappy, IOVecSinkEdgeCases) {
|
764
588
|
// Test some tricky edge cases in the iovec output that are not necessarily
|
765
589
|
// exercised by random tests.
|
766
590
|
|
@@ -872,14 +696,13 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
872
696
|
}
|
873
697
|
}
|
874
698
|
|
875
|
-
|
876
|
-
size_t* ulength) {
|
699
|
+
bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
|
877
700
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
878
701
|
compressed.size(),
|
879
702
|
ulength);
|
880
703
|
|
881
704
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
882
|
-
|
705
|
+
uint32_t length;
|
883
706
|
const bool result2 = snappy::GetUncompressedLength(&source, &length);
|
884
707
|
CHECK_EQ(result1, result2);
|
885
708
|
return result1;
|
@@ -954,17 +777,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
954
777
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
955
778
|
}
|
956
779
|
|
957
|
-
namespace {
|
958
|
-
|
959
780
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
781
|
+
uint64_t data;
|
960
782
|
std::pair<size_t, bool> p =
|
961
|
-
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
783
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
|
962
784
|
CHECK_EQ(p.first < 8, p.second);
|
963
785
|
return p.first;
|
964
786
|
}
|
965
787
|
|
966
|
-
} // namespace
|
967
|
-
|
968
788
|
TEST(Snappy, FindMatchLength) {
|
969
789
|
// Exercise all different code paths through the function.
|
970
790
|
// 64-bit version:
|
@@ -1058,12 +878,12 @@ TEST(Snappy, FindMatchLength) {
|
|
1058
878
|
TEST(Snappy, FindMatchLengthRandom) {
|
1059
879
|
constexpr int kNumTrials = 10000;
|
1060
880
|
constexpr int kTypicalLength = 10;
|
1061
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
881
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
1062
882
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
883
|
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
884
|
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1065
885
|
|
1066
|
-
for (int i = 0; i < kNumTrials; i
|
886
|
+
for (int i = 0; i < kNumTrials; ++i) {
|
1067
887
|
std::string s, t;
|
1068
888
|
char a = static_cast<char>(uniform_byte(rng));
|
1069
889
|
char b = static_cast<char>(uniform_byte(rng));
|
@@ -1073,21 +893,20 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
1073
893
|
}
|
1074
894
|
DataEndingAtUnreadablePage u(s);
|
1075
895
|
DataEndingAtUnreadablePage v(t);
|
1076
|
-
|
896
|
+
size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1077
897
|
if (matched == t.size()) {
|
1078
898
|
EXPECT_EQ(s, t);
|
1079
899
|
} else {
|
1080
900
|
EXPECT_NE(s[matched], t[matched]);
|
1081
|
-
for (
|
901
|
+
for (size_t j = 0; j < matched; ++j) {
|
1082
902
|
EXPECT_EQ(s[j], t[j]);
|
1083
903
|
}
|
1084
904
|
}
|
1085
905
|
}
|
1086
906
|
}
|
1087
907
|
|
1088
|
-
|
1089
|
-
|
1090
|
-
unsigned int copy_offset) {
|
908
|
+
uint16_t MakeEntry(unsigned int extra, unsigned int len,
|
909
|
+
unsigned int copy_offset) {
|
1091
910
|
// Check that all of the fields fit within the allocated space
|
1092
911
|
assert(extra == (extra & 0x7)); // At most 3 bits
|
1093
912
|
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
@@ -1104,403 +923,88 @@ TEST(Snappy, VerifyCharTable) {
|
|
1104
923
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1105
924
|
using snappy::internal::char_table;
|
1106
925
|
|
1107
|
-
|
926
|
+
uint16_t dst[256];
|
1108
927
|
|
1109
928
|
// Place invalid entries in all places to detect missing initialization
|
1110
929
|
int assigned = 0;
|
1111
|
-
for (int i = 0; i < 256; i
|
930
|
+
for (int i = 0; i < 256; ++i) {
|
1112
931
|
dst[i] = 0xffff;
|
1113
932
|
}
|
1114
933
|
|
1115
934
|
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1116
|
-
for (
|
1117
|
-
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
935
|
+
for (uint8_t len = 1; len <= 60; ++len) {
|
936
|
+
dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
|
1118
937
|
assigned++;
|
1119
938
|
}
|
1120
939
|
|
1121
940
|
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1122
941
|
// encode the number of bytes of length info that follow the opcode.
|
1123
|
-
for (
|
942
|
+
for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
|
1124
943
|
// We set the length field in the lookup table to 1 because extra
|
1125
944
|
// bytes encode len-1.
|
1126
|
-
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
945
|
+
dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1127
946
|
assigned++;
|
1128
947
|
}
|
1129
948
|
|
1130
949
|
// COPY_1_BYTE_OFFSET.
|
1131
950
|
//
|
1132
951
|
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1133
|
-
// offset/256 in
|
952
|
+
// offset/256 in 3 bits. offset%256 is stored in the next byte.
|
1134
953
|
//
|
1135
954
|
// This format is used for length in range [4..11] and offset in
|
1136
955
|
// range [0..2047]
|
1137
|
-
for (
|
1138
|
-
for (
|
1139
|
-
|
1140
|
-
|
956
|
+
for (uint8_t len = 4; len < 12; ++len) {
|
957
|
+
for (uint16_t offset = 0; offset < 2048; offset += 256) {
|
958
|
+
uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
|
959
|
+
dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
|
960
|
+
MakeEntry(1, len, offset_high);
|
1141
961
|
assigned++;
|
1142
962
|
}
|
1143
963
|
}
|
1144
964
|
|
1145
965
|
// COPY_2_BYTE_OFFSET.
|
1146
966
|
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1147
|
-
for (
|
1148
|
-
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
967
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
968
|
+
dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
|
1149
969
|
assigned++;
|
1150
970
|
}
|
1151
971
|
|
1152
972
|
// COPY_4_BYTE_OFFSET.
|
1153
973
|
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1154
|
-
for (
|
1155
|
-
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
974
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
975
|
+
dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
|
1156
976
|
assigned++;
|
1157
977
|
}
|
1158
978
|
|
1159
979
|
// Check that each entry was initialized exactly once.
|
1160
980
|
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1161
|
-
for (int i = 0; i < 256; i
|
981
|
+
for (int i = 0; i < 256; ++i) {
|
1162
982
|
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1163
983
|
}
|
1164
984
|
|
1165
|
-
if (FLAGS_snappy_dump_decompression_table) {
|
1166
|
-
printf("static const
|
1167
|
-
for (int i = 0; i < 256; i
|
1168
|
-
printf("0x%04x%s",
|
1169
|
-
|
1170
|
-
|
985
|
+
if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
|
986
|
+
std::printf("static const uint16_t char_table[256] = {\n ");
|
987
|
+
for (int i = 0; i < 256; ++i) {
|
988
|
+
std::printf("0x%04x%s",
|
989
|
+
dst[i],
|
990
|
+
((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
|
1171
991
|
}
|
1172
|
-
printf("};\n");
|
992
|
+
std::printf("};\n");
|
1173
993
|
}
|
1174
994
|
|
1175
995
|
// Check that computed table matched recorded table.
|
1176
|
-
for (int i = 0; i < 256; i
|
996
|
+
for (int i = 0; i < 256; ++i) {
|
1177
997
|
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1178
998
|
}
|
1179
999
|
}
|
1180
1000
|
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
std::string compressed;
|
1186
|
-
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1187
|
-
|
1188
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1189
|
-
file::Defaults()));
|
1190
|
-
}
|
1191
|
-
|
1192
|
-
static void UncompressFile(const char* fname) {
|
1193
|
-
std::string fullinput;
|
1194
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1195
|
-
|
1196
|
-
size_t uncompLength;
|
1197
|
-
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1198
|
-
|
1199
|
-
std::string uncompressed;
|
1200
|
-
uncompressed.resize(uncompLength);
|
1201
|
-
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1202
|
-
|
1203
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1204
|
-
file::Defaults()));
|
1205
|
-
}
|
1206
|
-
|
1207
|
-
static void MeasureFile(const char* fname) {
|
1208
|
-
std::string fullinput;
|
1209
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1210
|
-
printf("%-40s :\n", fname);
|
1211
|
-
|
1212
|
-
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1213
|
-
int end_len = fullinput.size();
|
1214
|
-
if (FLAGS_end_len >= 0) {
|
1215
|
-
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1216
|
-
}
|
1217
|
-
for (int len = start_len; len <= end_len; len++) {
|
1218
|
-
const char* const input = fullinput.data();
|
1219
|
-
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1220
|
-
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1221
|
-
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1222
|
-
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1223
|
-
|
1224
|
-
// For block-size based measurements
|
1225
|
-
if (0 && FLAGS_snappy) {
|
1226
|
-
Measure(input, len, SNAPPY, repeats, 8<<10);
|
1227
|
-
Measure(input, len, SNAPPY, repeats, 16<<10);
|
1228
|
-
Measure(input, len, SNAPPY, repeats, 32<<10);
|
1229
|
-
Measure(input, len, SNAPPY, repeats, 64<<10);
|
1230
|
-
Measure(input, len, SNAPPY, repeats, 256<<10);
|
1231
|
-
Measure(input, len, SNAPPY, repeats, 1024<<10);
|
1232
|
-
}
|
1001
|
+
TEST(Snappy, TestBenchmarkFiles) {
|
1002
|
+
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
|
1003
|
+
Verify(ReadTestDataFile(kTestDataFiles[i].filename,
|
1004
|
+
kTestDataFiles[i].size_limit));
|
1233
1005
|
}
|
1234
1006
|
}
|
1235
1007
|
|
1236
|
-
|
1237
|
-
const char* label;
|
1238
|
-
const char* filename;
|
1239
|
-
size_t size_limit;
|
1240
|
-
} files[] = {
|
1241
|
-
{ "html", "html", 0 },
|
1242
|
-
{ "urls", "urls.10K", 0 },
|
1243
|
-
{ "jpg", "fireworks.jpeg", 0 },
|
1244
|
-
{ "jpg_200", "fireworks.jpeg", 200 },
|
1245
|
-
{ "pdf", "paper-100k.pdf", 0 },
|
1246
|
-
{ "html4", "html_x_4", 0 },
|
1247
|
-
{ "txt1", "alice29.txt", 0 },
|
1248
|
-
{ "txt2", "asyoulik.txt", 0 },
|
1249
|
-
{ "txt3", "lcet10.txt", 0 },
|
1250
|
-
{ "txt4", "plrabn12.txt", 0 },
|
1251
|
-
{ "pb", "geo.protodata", 0 },
|
1252
|
-
{ "gaviota", "kppkn.gtb", 0 },
|
1253
|
-
};
|
1254
|
-
|
1255
|
-
static void BM_UFlat(int iters, int arg) {
|
1256
|
-
StopBenchmarkTiming();
|
1257
|
-
|
1258
|
-
// Pick file to process based on "arg"
|
1259
|
-
CHECK_GE(arg, 0);
|
1260
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1261
|
-
std::string contents =
|
1262
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1263
|
-
|
1264
|
-
std::string zcontents;
|
1265
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1266
|
-
char* dst = new char[contents.size()];
|
1267
|
-
|
1268
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1269
|
-
static_cast<int64>(contents.size()));
|
1270
|
-
SetBenchmarkLabel(files[arg].label);
|
1271
|
-
StartBenchmarkTiming();
|
1272
|
-
while (iters-- > 0) {
|
1273
|
-
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
1274
|
-
}
|
1275
|
-
StopBenchmarkTiming();
|
1276
|
-
|
1277
|
-
delete[] dst;
|
1278
|
-
}
|
1279
|
-
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1280
|
-
|
1281
|
-
static void BM_UValidate(int iters, int arg) {
|
1282
|
-
StopBenchmarkTiming();
|
1283
|
-
|
1284
|
-
// Pick file to process based on "arg"
|
1285
|
-
CHECK_GE(arg, 0);
|
1286
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1287
|
-
std::string contents =
|
1288
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1289
|
-
|
1290
|
-
std::string zcontents;
|
1291
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1292
|
-
|
1293
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1294
|
-
static_cast<int64>(contents.size()));
|
1295
|
-
SetBenchmarkLabel(files[arg].label);
|
1296
|
-
StartBenchmarkTiming();
|
1297
|
-
while (iters-- > 0) {
|
1298
|
-
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
1299
|
-
}
|
1300
|
-
StopBenchmarkTiming();
|
1301
|
-
}
|
1302
|
-
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
|
1303
|
-
|
1304
|
-
static void BM_UIOVec(int iters, int arg) {
|
1305
|
-
StopBenchmarkTiming();
|
1306
|
-
|
1307
|
-
// Pick file to process based on "arg"
|
1308
|
-
CHECK_GE(arg, 0);
|
1309
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1310
|
-
std::string contents =
|
1311
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1312
|
-
|
1313
|
-
std::string zcontents;
|
1314
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1315
|
-
|
1316
|
-
// Uncompress into an iovec containing ten entries.
|
1317
|
-
const int kNumEntries = 10;
|
1318
|
-
struct iovec iov[kNumEntries];
|
1319
|
-
char *dst = new char[contents.size()];
|
1320
|
-
int used_so_far = 0;
|
1321
|
-
for (int i = 0; i < kNumEntries; ++i) {
|
1322
|
-
iov[i].iov_base = dst + used_so_far;
|
1323
|
-
if (used_so_far == contents.size()) {
|
1324
|
-
iov[i].iov_len = 0;
|
1325
|
-
continue;
|
1326
|
-
}
|
1327
|
-
|
1328
|
-
if (i == kNumEntries - 1) {
|
1329
|
-
iov[i].iov_len = contents.size() - used_so_far;
|
1330
|
-
} else {
|
1331
|
-
iov[i].iov_len = contents.size() / kNumEntries;
|
1332
|
-
}
|
1333
|
-
used_so_far += iov[i].iov_len;
|
1334
|
-
}
|
1335
|
-
|
1336
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1337
|
-
static_cast<int64>(contents.size()));
|
1338
|
-
SetBenchmarkLabel(files[arg].label);
|
1339
|
-
StartBenchmarkTiming();
|
1340
|
-
while (iters-- > 0) {
|
1341
|
-
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
1342
|
-
kNumEntries));
|
1343
|
-
}
|
1344
|
-
StopBenchmarkTiming();
|
1345
|
-
|
1346
|
-
delete[] dst;
|
1347
|
-
}
|
1348
|
-
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1349
|
-
|
1350
|
-
static void BM_UFlatSink(int iters, int arg) {
|
1351
|
-
StopBenchmarkTiming();
|
1352
|
-
|
1353
|
-
// Pick file to process based on "arg"
|
1354
|
-
CHECK_GE(arg, 0);
|
1355
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1356
|
-
std::string contents =
|
1357
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1358
|
-
|
1359
|
-
std::string zcontents;
|
1360
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1361
|
-
char* dst = new char[contents.size()];
|
1362
|
-
|
1363
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1364
|
-
static_cast<int64>(contents.size()));
|
1365
|
-
SetBenchmarkLabel(files[arg].label);
|
1366
|
-
StartBenchmarkTiming();
|
1367
|
-
while (iters-- > 0) {
|
1368
|
-
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1369
|
-
snappy::UncheckedByteArraySink sink(dst);
|
1370
|
-
CHECK(snappy::Uncompress(&source, &sink));
|
1371
|
-
}
|
1372
|
-
StopBenchmarkTiming();
|
1373
|
-
|
1374
|
-
std::string s(dst, contents.size());
|
1375
|
-
CHECK_EQ(contents, s);
|
1376
|
-
|
1377
|
-
delete[] dst;
|
1378
|
-
}
|
1379
|
-
|
1380
|
-
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1381
|
-
|
1382
|
-
static void BM_ZFlat(int iters, int arg) {
|
1383
|
-
StopBenchmarkTiming();
|
1384
|
-
|
1385
|
-
// Pick file to process based on "arg"
|
1386
|
-
CHECK_GE(arg, 0);
|
1387
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1388
|
-
std::string contents =
|
1389
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1390
|
-
|
1391
|
-
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1392
|
-
|
1393
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1394
|
-
static_cast<int64>(contents.size()));
|
1395
|
-
StartBenchmarkTiming();
|
1396
|
-
|
1397
|
-
size_t zsize = 0;
|
1398
|
-
while (iters-- > 0) {
|
1399
|
-
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
|
1400
|
-
}
|
1401
|
-
StopBenchmarkTiming();
|
1402
|
-
const double compression_ratio =
|
1403
|
-
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1404
|
-
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
-
100.0 * compression_ratio));
|
1406
|
-
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
-
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
-
static_cast<int>(zsize));
|
1409
|
-
delete[] dst;
|
1410
|
-
}
|
1411
|
-
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1412
|
-
|
1413
|
-
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
-
StopBenchmarkTiming();
|
1415
|
-
|
1416
|
-
CHECK_EQ(arg, 0);
|
1417
|
-
const int num_files = ARRAYSIZE(files);
|
1418
|
-
|
1419
|
-
std::vector<std::string> contents(num_files);
|
1420
|
-
std::vector<char*> dst(num_files);
|
1421
|
-
|
1422
|
-
int64 total_contents_size = 0;
|
1423
|
-
for (int i = 0; i < num_files; ++i) {
|
1424
|
-
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
-
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
-
total_contents_size += contents[i].size();
|
1427
|
-
}
|
1428
|
-
|
1429
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
-
StartBenchmarkTiming();
|
1431
|
-
|
1432
|
-
size_t zsize = 0;
|
1433
|
-
while (iters-- > 0) {
|
1434
|
-
for (int i = 0; i < num_files; ++i) {
|
1435
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
-
&zsize);
|
1437
|
-
}
|
1438
|
-
}
|
1439
|
-
StopBenchmarkTiming();
|
1440
|
-
|
1441
|
-
for (int i = 0; i < num_files; ++i) {
|
1442
|
-
delete[] dst[i];
|
1443
|
-
}
|
1444
|
-
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
-
}
|
1446
|
-
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
-
|
1448
|
-
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
-
StopBenchmarkTiming();
|
1450
|
-
|
1451
|
-
CHECK_EQ(arg, 0);
|
1452
|
-
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
-
const std::string base_content =
|
1454
|
-
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
-
|
1456
|
-
std::vector<std::string> contents;
|
1457
|
-
std::vector<char*> dst;
|
1458
|
-
int64 total_contents_size = 0;
|
1459
|
-
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
-
++table_bits) {
|
1461
|
-
std::string content = base_content;
|
1462
|
-
content.resize(1 << table_bits);
|
1463
|
-
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
-
total_contents_size += content.size();
|
1465
|
-
contents.push_back(std::move(content));
|
1466
|
-
}
|
1467
|
-
|
1468
|
-
size_t zsize = 0;
|
1469
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
-
StartBenchmarkTiming();
|
1471
|
-
while (iters-- > 0) {
|
1472
|
-
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
-
&zsize);
|
1475
|
-
}
|
1476
|
-
}
|
1477
|
-
StopBenchmarkTiming();
|
1478
|
-
|
1479
|
-
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
-
delete[] dst[i];
|
1481
|
-
}
|
1482
|
-
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
-
}
|
1484
|
-
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1008
|
+
} // namespace
|
1485
1009
|
|
1486
1010
|
} // namespace snappy
|
1487
|
-
|
1488
|
-
int main(int argc, char** argv) {
|
1489
|
-
InitGoogle(argv[0], &argc, &argv, true);
|
1490
|
-
RunSpecifiedBenchmarks();
|
1491
|
-
|
1492
|
-
if (argc >= 2) {
|
1493
|
-
for (int arg = 1; arg < argc; arg++) {
|
1494
|
-
if (FLAGS_write_compressed) {
|
1495
|
-
snappy::CompressFile(argv[arg]);
|
1496
|
-
} else if (FLAGS_write_uncompressed) {
|
1497
|
-
snappy::UncompressFile(argv[arg]);
|
1498
|
-
} else {
|
1499
|
-
snappy::MeasureFile(argv[arg]);
|
1500
|
-
}
|
1501
|
-
}
|
1502
|
-
return 0;
|
1503
|
-
}
|
1504
|
-
|
1505
|
-
return RUN_ALL_TESTS();
|
1506
|
-
}
|