snappy 0.0.17 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/Gemfile +4 -0
- data/README.md +29 -5
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +23 -16
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/reader.rb +14 -10
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy.rb +5 -4
- data/snappy.gemspec +13 -13
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/NEWS +40 -0
- data/vendor/snappy/{README → README.md} +27 -18
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +22 -18
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +116 -38
- data/vendor/snappy/snappy-stubs-public.h.in +20 -46
- data/vendor/snappy/snappy-test.cc +26 -22
- data/vendor/snappy/snappy-test.h +24 -98
- data/vendor/snappy/snappy.cc +380 -183
- data/vendor/snappy/snappy.h +14 -10
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +236 -261
- metadata +37 -92
- data/.travis.yml +0 -26
- data/smoke.sh +0 -8
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/INSTALL +0 -370
- data/vendor/snappy/Makefile +0 -982
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/Makefile.in +0 -982
- data/vendor/snappy/aclocal.m4 +0 -9738
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/autom4te.cache/output.0 +0 -18856
- data/vendor/snappy/autom4te.cache/output.1 +0 -18852
- data/vendor/snappy/autom4te.cache/requests +0 -297
- data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
- data/vendor/snappy/autom4te.cache/traces.1 +0 -714
- data/vendor/snappy/config.guess +0 -1530
- data/vendor/snappy/config.h +0 -135
- data/vendor/snappy/config.h.in +0 -134
- data/vendor/snappy/config.log +0 -1640
- data/vendor/snappy/config.status +0 -2318
- data/vendor/snappy/config.sub +0 -1773
- data/vendor/snappy/configure +0 -18852
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/depcomp +0 -688
- data/vendor/snappy/install-sh +0 -527
- data/vendor/snappy/libtool +0 -10246
- data/vendor/snappy/ltmain.sh +0 -9661
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/m4/libtool.m4 +0 -8001
- data/vendor/snappy/m4/ltoptions.m4 +0 -384
- data/vendor/snappy/m4/ltsugar.m4 +0 -123
- data/vendor/snappy/m4/ltversion.m4 +0 -23
- data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
- data/vendor/snappy/missing +0 -331
- data/vendor/snappy/snappy-stubs-public.h +0 -100
- data/vendor/snappy/snappy.pc +0 -10
- data/vendor/snappy/snappy.pc.in +0 -10
- data/vendor/snappy/stamp-h1 +0 -1
@@ -29,9 +29,10 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdlib.h>
|
31
31
|
|
32
|
-
|
33
32
|
#include <algorithm>
|
33
|
+
#include <random>
|
34
34
|
#include <string>
|
35
|
+
#include <utility>
|
35
36
|
#include <vector>
|
36
37
|
|
37
38
|
#include "snappy.h"
|
@@ -50,13 +51,6 @@ DEFINE_bool(zlib, false,
|
|
50
51
|
"Run zlib compression (http://www.zlib.net)");
|
51
52
|
DEFINE_bool(lzo, false,
|
52
53
|
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
53
|
-
DEFINE_bool(quicklz, false,
|
54
|
-
"Run quickLZ compression (http://www.quicklz.com/)");
|
55
|
-
DEFINE_bool(liblzf, false,
|
56
|
-
"Run libLZF compression "
|
57
|
-
"(http://www.goof.com/pcg/marc/liblzf.html)");
|
58
|
-
DEFINE_bool(fastlz, false,
|
59
|
-
"Run FastLZ compression (http://www.fastlz.org/");
|
60
54
|
DEFINE_bool(snappy, true, "Run snappy compression");
|
61
55
|
|
62
56
|
DEFINE_bool(write_compressed, false,
|
@@ -69,8 +63,7 @@ DEFINE_bool(snappy_dump_decompression_table, false,
|
|
69
63
|
|
70
64
|
namespace snappy {
|
71
65
|
|
72
|
-
|
73
|
-
#ifdef HAVE_FUNC_MMAP
|
66
|
+
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
74
67
|
|
75
68
|
// To test against code that reads beyond its input, this class copies a
|
76
69
|
// string to a newly allocated group of pages, the last of which
|
@@ -80,8 +73,8 @@ namespace snappy {
|
|
80
73
|
// be able to read previously allocated memory while doing heap allocations.
|
81
74
|
class DataEndingAtUnreadablePage {
|
82
75
|
public:
|
83
|
-
explicit DataEndingAtUnreadablePage(const string& s) {
|
84
|
-
const size_t page_size =
|
76
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
77
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
85
78
|
const size_t size = s.size();
|
86
79
|
// Round up space for string to a multiple of page_size.
|
87
80
|
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
|
@@ -99,8 +92,9 @@ class DataEndingAtUnreadablePage {
|
|
99
92
|
}
|
100
93
|
|
101
94
|
~DataEndingAtUnreadablePage() {
|
95
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
102
96
|
// Undo the mprotect.
|
103
|
-
CHECK_EQ(0, mprotect(protected_page_,
|
97
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
|
104
98
|
CHECK_EQ(0, munmap(mem_, alloc_size_));
|
105
99
|
}
|
106
100
|
|
@@ -115,19 +109,19 @@ class DataEndingAtUnreadablePage {
|
|
115
109
|
size_t size_;
|
116
110
|
};
|
117
111
|
|
118
|
-
#else // HAVE_FUNC_MMAP
|
112
|
+
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
119
113
|
|
120
114
|
// Fallback for systems without mmap.
|
121
|
-
|
115
|
+
using DataEndingAtUnreadablePage = std::string;
|
122
116
|
|
123
117
|
#endif
|
124
118
|
|
125
119
|
enum CompressorType {
|
126
|
-
ZLIB, LZO,
|
120
|
+
ZLIB, LZO, SNAPPY
|
127
121
|
};
|
128
122
|
|
129
123
|
const char* names[] = {
|
130
|
-
"ZLIB", "LZO", "
|
124
|
+
"ZLIB", "LZO", "SNAPPY"
|
131
125
|
};
|
132
126
|
|
133
127
|
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
@@ -143,21 +137,6 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
143
137
|
return input_size + input_size/64 + 16 + 3;
|
144
138
|
#endif // LZO_VERSION
|
145
139
|
|
146
|
-
#ifdef LZF_VERSION
|
147
|
-
case LIBLZF:
|
148
|
-
return input_size;
|
149
|
-
#endif // LZF_VERSION
|
150
|
-
|
151
|
-
#ifdef QLZ_VERSION_MAJOR
|
152
|
-
case QUICKLZ:
|
153
|
-
return input_size + 36000; // 36000 is used for scratch.
|
154
|
-
#endif // QLZ_VERSION_MAJOR
|
155
|
-
|
156
|
-
#ifdef FASTLZ_VERSION
|
157
|
-
case FASTLZ:
|
158
|
-
return max(static_cast<int>(ceil(input_size * 1.05)), 66);
|
159
|
-
#endif // FASTLZ_VERSION
|
160
|
-
|
161
140
|
case SNAPPY:
|
162
141
|
return snappy::MaxCompressedLength(input_size);
|
163
142
|
|
@@ -175,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
175
154
|
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
176
155
|
// number of bytes, and may contain junk bytes at the end after return.
|
177
156
|
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
178
|
-
string* compressed, bool compressed_is_preallocated) {
|
157
|
+
std::string* compressed, bool compressed_is_preallocated) {
|
179
158
|
if (!compressed_is_preallocated) {
|
180
159
|
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
181
160
|
}
|
@@ -217,58 +196,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
217
196
|
}
|
218
197
|
#endif // LZO_VERSION
|
219
198
|
|
220
|
-
#ifdef LZF_VERSION
|
221
|
-
case LIBLZF: {
|
222
|
-
int destlen = lzf_compress(input,
|
223
|
-
input_size,
|
224
|
-
string_as_array(compressed),
|
225
|
-
input_size);
|
226
|
-
if (destlen == 0) {
|
227
|
-
// lzf *can* cause lots of blowup when compressing, so they
|
228
|
-
// recommend to limit outsize to insize, and just not compress
|
229
|
-
// if it's bigger. Ideally, we'd just swap input and output.
|
230
|
-
compressed->assign(input, input_size);
|
231
|
-
destlen = input_size;
|
232
|
-
}
|
233
|
-
if (!compressed_is_preallocated) {
|
234
|
-
compressed->resize(destlen);
|
235
|
-
}
|
236
|
-
break;
|
237
|
-
}
|
238
|
-
#endif // LZF_VERSION
|
239
|
-
|
240
|
-
#ifdef QLZ_VERSION_MAJOR
|
241
|
-
case QUICKLZ: {
|
242
|
-
qlz_state_compress *state_compress = new qlz_state_compress;
|
243
|
-
int destlen = qlz_compress(input,
|
244
|
-
string_as_array(compressed),
|
245
|
-
input_size,
|
246
|
-
state_compress);
|
247
|
-
delete state_compress;
|
248
|
-
CHECK_NE(0, destlen);
|
249
|
-
if (!compressed_is_preallocated) {
|
250
|
-
compressed->resize(destlen);
|
251
|
-
}
|
252
|
-
break;
|
253
|
-
}
|
254
|
-
#endif // QLZ_VERSION_MAJOR
|
255
|
-
|
256
|
-
#ifdef FASTLZ_VERSION
|
257
|
-
case FASTLZ: {
|
258
|
-
// Use level 1 compression since we mostly care about speed.
|
259
|
-
int destlen = fastlz_compress_level(
|
260
|
-
1,
|
261
|
-
input,
|
262
|
-
input_size,
|
263
|
-
string_as_array(compressed));
|
264
|
-
if (!compressed_is_preallocated) {
|
265
|
-
compressed->resize(destlen);
|
266
|
-
}
|
267
|
-
CHECK_NE(destlen, 0);
|
268
|
-
break;
|
269
|
-
}
|
270
|
-
#endif // FASTLZ_VERSION
|
271
|
-
|
272
199
|
case SNAPPY: {
|
273
200
|
size_t destlen;
|
274
201
|
snappy::RawCompress(input, input_size,
|
@@ -288,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
288
215
|
return true;
|
289
216
|
}
|
290
217
|
|
291
|
-
static bool Uncompress(const string& compressed, CompressorType comp,
|
292
|
-
int size, string* output) {
|
218
|
+
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
+
int size, std::string* output) {
|
293
220
|
switch (comp) {
|
294
221
|
#ifdef ZLIB_VERSION
|
295
222
|
case ZLIB: {
|
@@ -323,49 +250,6 @@ static bool Uncompress(const string& compressed, CompressorType comp,
|
|
323
250
|
}
|
324
251
|
#endif // LZO_VERSION
|
325
252
|
|
326
|
-
#ifdef LZF_VERSION
|
327
|
-
case LIBLZF: {
|
328
|
-
output->resize(size);
|
329
|
-
int destlen = lzf_decompress(compressed.data(),
|
330
|
-
compressed.size(),
|
331
|
-
string_as_array(output),
|
332
|
-
output->size());
|
333
|
-
if (destlen == 0) {
|
334
|
-
// This error probably means we had decided not to compress,
|
335
|
-
// and thus have stored input in output directly.
|
336
|
-
output->assign(compressed.data(), compressed.size());
|
337
|
-
destlen = compressed.size();
|
338
|
-
}
|
339
|
-
CHECK_EQ(destlen, size);
|
340
|
-
break;
|
341
|
-
}
|
342
|
-
#endif // LZF_VERSION
|
343
|
-
|
344
|
-
#ifdef QLZ_VERSION_MAJOR
|
345
|
-
case QUICKLZ: {
|
346
|
-
output->resize(size);
|
347
|
-
qlz_state_decompress *state_decompress = new qlz_state_decompress;
|
348
|
-
int destlen = qlz_decompress(compressed.data(),
|
349
|
-
string_as_array(output),
|
350
|
-
state_decompress);
|
351
|
-
delete state_decompress;
|
352
|
-
CHECK_EQ(destlen, size);
|
353
|
-
break;
|
354
|
-
}
|
355
|
-
#endif // QLZ_VERSION_MAJOR
|
356
|
-
|
357
|
-
#ifdef FASTLZ_VERSION
|
358
|
-
case FASTLZ: {
|
359
|
-
output->resize(size);
|
360
|
-
int destlen = fastlz_decompress(compressed.data(),
|
361
|
-
compressed.length(),
|
362
|
-
string_as_array(output),
|
363
|
-
size);
|
364
|
-
CHECK_EQ(destlen, size);
|
365
|
-
break;
|
366
|
-
}
|
367
|
-
#endif // FASTLZ_VERSION
|
368
|
-
|
369
253
|
case SNAPPY: {
|
370
254
|
snappy::RawUncompress(compressed.data(), compressed.size(),
|
371
255
|
string_as_array(output));
|
@@ -395,11 +279,11 @@ static void Measure(const char* data,
|
|
395
279
|
int num_blocks = (length + block_size - 1) / block_size;
|
396
280
|
std::vector<const char*> input(num_blocks);
|
397
281
|
std::vector<size_t> input_length(num_blocks);
|
398
|
-
std::vector<string> compressed(num_blocks);
|
399
|
-
std::vector<string> output(num_blocks);
|
282
|
+
std::vector<std::string> compressed(num_blocks);
|
283
|
+
std::vector<std::string> output(num_blocks);
|
400
284
|
for (int b = 0; b < num_blocks; b++) {
|
401
285
|
int input_start = b * block_size;
|
402
|
-
int input_limit = min<int>((b+1)*block_size, length);
|
286
|
+
int input_limit = std::min<int>((b+1)*block_size, length);
|
403
287
|
input[b] = data+input_start;
|
404
288
|
input_length[b] = input_limit-input_start;
|
405
289
|
|
@@ -454,29 +338,28 @@ static void Measure(const char* data,
|
|
454
338
|
}
|
455
339
|
}
|
456
340
|
|
457
|
-
sort(ctime, ctime + kRuns);
|
458
|
-
sort(utime, utime + kRuns);
|
341
|
+
std::sort(ctime, ctime + kRuns);
|
342
|
+
std::sort(utime, utime + kRuns);
|
459
343
|
const int med = kRuns/2;
|
460
344
|
|
461
345
|
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
462
346
|
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
463
|
-
string x = names[comp];
|
347
|
+
std::string x = names[comp];
|
464
348
|
x += ":";
|
465
|
-
string urate = (uncomp_rate >= 0)
|
466
|
-
|
467
|
-
: string("?");
|
349
|
+
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
+
: std::string("?");
|
468
351
|
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
469
352
|
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
470
353
|
x.c_str(),
|
471
354
|
block_size/(1<<20),
|
472
355
|
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
473
|
-
(compressed_size * 100.0) / max<int>(1, length),
|
356
|
+
(compressed_size * 100.0) / std::max<int>(1, length),
|
474
357
|
comp_rate,
|
475
358
|
urate.c_str());
|
476
359
|
}
|
477
360
|
|
478
|
-
static int VerifyString(const string& input) {
|
479
|
-
string compressed;
|
361
|
+
static int VerifyString(const std::string& input) {
|
362
|
+
std::string compressed;
|
480
363
|
DataEndingAtUnreadablePage i(input);
|
481
364
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
482
365
|
CHECK_EQ(written, compressed.size());
|
@@ -484,15 +367,15 @@ static int VerifyString(const string& input) {
|
|
484
367
|
snappy::MaxCompressedLength(input.size()));
|
485
368
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
486
369
|
|
487
|
-
string uncompressed;
|
370
|
+
std::string uncompressed;
|
488
371
|
DataEndingAtUnreadablePage c(compressed);
|
489
372
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
490
373
|
CHECK_EQ(uncompressed, input);
|
491
374
|
return uncompressed.size();
|
492
375
|
}
|
493
376
|
|
494
|
-
static void VerifyStringSink(const string& input) {
|
495
|
-
string compressed;
|
377
|
+
static void VerifyStringSink(const std::string& input) {
|
378
|
+
std::string compressed;
|
496
379
|
DataEndingAtUnreadablePage i(input);
|
497
380
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
498
381
|
CHECK_EQ(written, compressed.size());
|
@@ -500,7 +383,7 @@ static void VerifyStringSink(const string& input) {
|
|
500
383
|
snappy::MaxCompressedLength(input.size()));
|
501
384
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
502
385
|
|
503
|
-
string uncompressed;
|
386
|
+
std::string uncompressed;
|
504
387
|
uncompressed.resize(input.size());
|
505
388
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
506
389
|
DataEndingAtUnreadablePage c(compressed);
|
@@ -509,8 +392,8 @@ static void VerifyStringSink(const string& input) {
|
|
509
392
|
CHECK_EQ(uncompressed, input);
|
510
393
|
}
|
511
394
|
|
512
|
-
static void VerifyIOVec(const string& input) {
|
513
|
-
string compressed;
|
395
|
+
static void VerifyIOVec(const std::string& input) {
|
396
|
+
std::string compressed;
|
514
397
|
DataEndingAtUnreadablePage i(input);
|
515
398
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
516
399
|
CHECK_EQ(written, compressed.size());
|
@@ -521,23 +404,28 @@ static void VerifyIOVec(const string& input) {
|
|
521
404
|
// Try uncompressing into an iovec containing a random number of entries
|
522
405
|
// ranging from 1 to 10.
|
523
406
|
char* buf = new char[input.size()];
|
524
|
-
|
525
|
-
size_t
|
407
|
+
std::minstd_rand0 rng(input.size());
|
408
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
+
size_t num = uniform_1_to_10(rng);
|
526
410
|
if (input.size() < num) {
|
527
411
|
num = input.size();
|
528
412
|
}
|
529
413
|
struct iovec* iov = new iovec[num];
|
530
414
|
int used_so_far = 0;
|
415
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
531
416
|
for (size_t i = 0; i < num; ++i) {
|
417
|
+
assert(used_so_far < input.size());
|
532
418
|
iov[i].iov_base = buf + used_so_far;
|
533
419
|
if (i == num - 1) {
|
534
420
|
iov[i].iov_len = input.size() - used_so_far;
|
535
421
|
} else {
|
536
422
|
// Randomly choose to insert a 0 byte entry.
|
537
|
-
if (
|
423
|
+
if (one_in_five(rng)) {
|
538
424
|
iov[i].iov_len = 0;
|
539
425
|
} else {
|
540
|
-
|
426
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
427
|
+
0, input.size() - used_so_far - 1);
|
428
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
541
429
|
}
|
542
430
|
}
|
543
431
|
used_so_far += iov[i].iov_len;
|
@@ -551,22 +439,22 @@ static void VerifyIOVec(const string& input) {
|
|
551
439
|
|
552
440
|
// Test that data compressed by a compressor that does not
|
553
441
|
// obey block sizes is uncompressed properly.
|
554
|
-
static void VerifyNonBlockedCompression(const string& input) {
|
442
|
+
static void VerifyNonBlockedCompression(const std::string& input) {
|
555
443
|
if (input.length() > snappy::kBlockSize) {
|
556
444
|
// We cannot test larger blocks than the maximum block size, obviously.
|
557
445
|
return;
|
558
446
|
}
|
559
447
|
|
560
|
-
string prefix;
|
448
|
+
std::string prefix;
|
561
449
|
Varint::Append32(&prefix, input.size());
|
562
450
|
|
563
451
|
// Setup compression table
|
564
|
-
snappy::internal::WorkingMemory wmem;
|
452
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
565
453
|
int table_size;
|
566
454
|
uint16* table = wmem.GetHashTable(input.size(), &table_size);
|
567
455
|
|
568
456
|
// Compress entire input in one shot
|
569
|
-
string compressed;
|
457
|
+
std::string compressed;
|
570
458
|
compressed += prefix;
|
571
459
|
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
572
460
|
char* dest = string_as_array(&compressed) + prefix.size();
|
@@ -574,13 +462,13 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
574
462
|
dest, table, table_size);
|
575
463
|
compressed.resize(end - compressed.data());
|
576
464
|
|
577
|
-
// Uncompress into string
|
578
|
-
string uncomp_str;
|
465
|
+
// Uncompress into std::string
|
466
|
+
std::string uncomp_str;
|
579
467
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
580
468
|
CHECK_EQ(uncomp_str, input);
|
581
469
|
|
582
470
|
// Uncompress using source/sink
|
583
|
-
string uncomp_str2;
|
471
|
+
std::string uncomp_str2;
|
584
472
|
uncomp_str2.resize(input.size());
|
585
473
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
586
474
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
@@ -592,28 +480,28 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
592
480
|
static const int kNumBlocks = 10;
|
593
481
|
struct iovec vec[kNumBlocks];
|
594
482
|
const int block_size = 1 + input.size() / kNumBlocks;
|
595
|
-
string iovec_data(block_size * kNumBlocks, 'x');
|
483
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
596
484
|
for (int i = 0; i < kNumBlocks; i++) {
|
597
485
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
598
486
|
vec[i].iov_len = block_size;
|
599
487
|
}
|
600
488
|
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
601
489
|
vec, kNumBlocks));
|
602
|
-
CHECK_EQ(string(iovec_data.data(), input.size()), input);
|
490
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
603
491
|
}
|
604
492
|
}
|
605
493
|
|
606
494
|
// Expand the input so that it is at least K times as big as block size
|
607
|
-
static string Expand(const string& input) {
|
495
|
+
static std::string Expand(const std::string& input) {
|
608
496
|
static const int K = 3;
|
609
|
-
string data = input;
|
497
|
+
std::string data = input;
|
610
498
|
while (data.size() < K * snappy::kBlockSize) {
|
611
499
|
data += input;
|
612
500
|
}
|
613
501
|
return data;
|
614
502
|
}
|
615
503
|
|
616
|
-
static int Verify(const string& input) {
|
504
|
+
static int Verify(const std::string& input) {
|
617
505
|
VLOG(1) << "Verifying input of size " << input.size();
|
618
506
|
|
619
507
|
// Compress using string based routines
|
@@ -625,7 +513,7 @@ static int Verify(const string& input) {
|
|
625
513
|
VerifyNonBlockedCompression(input);
|
626
514
|
VerifyIOVec(input);
|
627
515
|
if (!input.empty()) {
|
628
|
-
const string expanded = Expand(input);
|
516
|
+
const std::string expanded = Expand(input);
|
629
517
|
VerifyNonBlockedCompression(expanded);
|
630
518
|
VerifyIOVec(input);
|
631
519
|
}
|
@@ -633,21 +521,20 @@ static int Verify(const string& input) {
|
|
633
521
|
return result;
|
634
522
|
}
|
635
523
|
|
636
|
-
|
637
|
-
static bool IsValidCompressedBuffer(const string& c) {
|
524
|
+
static bool IsValidCompressedBuffer(const std::string& c) {
|
638
525
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
639
526
|
}
|
640
|
-
static bool Uncompress(const string& c, string* u) {
|
527
|
+
static bool Uncompress(const std::string& c, std::string* u) {
|
641
528
|
return snappy::Uncompress(c.data(), c.size(), u);
|
642
529
|
}
|
643
530
|
|
644
531
|
// This test checks to ensure that snappy doesn't coredump if it gets
|
645
532
|
// corrupted data.
|
646
533
|
TEST(CorruptedTest, VerifyCorrupted) {
|
647
|
-
string source = "making sure we don't crash with corrupted input";
|
534
|
+
std::string source = "making sure we don't crash with corrupted input";
|
648
535
|
VLOG(1) << source;
|
649
|
-
string dest;
|
650
|
-
string uncmp;
|
536
|
+
std::string dest;
|
537
|
+
std::string uncmp;
|
651
538
|
snappy::Compress(source.data(), source.size(), &dest);
|
652
539
|
|
653
540
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -694,9 +581,9 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
694
581
|
|
695
582
|
// try reading stuff in from a bad file.
|
696
583
|
for (int i = 1; i <= 3; ++i) {
|
697
|
-
string data =
|
698
|
-
|
699
|
-
string uncmp;
|
584
|
+
std::string data =
|
585
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
586
|
+
std::string uncmp;
|
700
587
|
// check that we don't return a crazy length
|
701
588
|
size_t ulen;
|
702
589
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
@@ -714,7 +601,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
714
601
|
// These mirror the compression code in snappy.cc, but are copied
|
715
602
|
// here so that we can bypass some limitations in the how snappy.cc
|
716
603
|
// invokes these routines.
|
717
|
-
static void AppendLiteral(string* dst, const string& literal) {
|
604
|
+
static void AppendLiteral(std::string* dst, const std::string& literal) {
|
718
605
|
if (literal.empty()) return;
|
719
606
|
int n = literal.size() - 1;
|
720
607
|
if (n < 60) {
|
@@ -729,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) {
|
|
729
616
|
n >>= 8;
|
730
617
|
}
|
731
618
|
dst->push_back(0 | ((59+count) << 2));
|
732
|
-
*dst += string(number, count);
|
619
|
+
*dst += std::string(number, count);
|
733
620
|
}
|
734
621
|
*dst += literal;
|
735
622
|
}
|
736
623
|
|
737
|
-
static void AppendCopy(string* dst, int offset, int length) {
|
624
|
+
static void AppendCopy(std::string* dst, int offset, int length) {
|
738
625
|
while (length > 0) {
|
739
626
|
// Figure out how much to copy in one shot
|
740
627
|
int to_copy;
|
@@ -771,51 +658,67 @@ TEST(Snappy, SimpleTests) {
|
|
771
658
|
Verify("ab");
|
772
659
|
Verify("abc");
|
773
660
|
|
774
|
-
Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
|
775
|
-
Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
|
776
|
-
Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
|
777
|
-
Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
778
|
-
Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
661
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
662
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
663
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
664
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
665
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
779
666
|
}
|
780
667
|
|
781
668
|
// Verify max blowup (lots of four-byte copies)
|
782
669
|
TEST(Snappy, MaxBlowup) {
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
input.
|
788
|
-
|
789
|
-
for (int i =
|
790
|
-
|
791
|
-
|
792
|
-
input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
|
670
|
+
std::mt19937 rng;
|
671
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
672
|
+
std::string input;
|
673
|
+
for (int i = 0; i < 80000; ++i)
|
674
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
675
|
+
|
676
|
+
for (int i = 0; i < 80000; i += 4) {
|
677
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
678
|
+
input.append(four_bytes);
|
793
679
|
}
|
794
680
|
Verify(input);
|
795
681
|
}
|
796
682
|
|
797
683
|
TEST(Snappy, RandomData) {
|
798
|
-
|
799
|
-
|
800
|
-
|
684
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
685
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
688
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
689
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
690
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
|
+
|
692
|
+
constexpr int num_ops = 20000;
|
801
693
|
for (int i = 0; i < num_ops; i++) {
|
802
694
|
if ((i % 1000) == 0) {
|
803
695
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
804
696
|
}
|
805
697
|
|
806
|
-
string x;
|
807
|
-
size_t len =
|
698
|
+
std::string x;
|
699
|
+
size_t len = uniform_4k(rng);
|
808
700
|
if (i < 100) {
|
809
|
-
len = 65536 +
|
701
|
+
len = 65536 + uniform_64k(rng);
|
810
702
|
}
|
811
703
|
while (x.size() < len) {
|
812
704
|
int run_len = 1;
|
813
|
-
if (
|
814
|
-
|
705
|
+
if (one_in_ten(rng)) {
|
706
|
+
int skewed_bits = uniform_0_to_8(rng);
|
707
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
708
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
709
|
+
(1 << skewed_bits) - 1);
|
710
|
+
run_len = skewed_low(rng);
|
711
|
+
}
|
712
|
+
char c = static_cast<char>(uniform_byte(rng));
|
713
|
+
if (i >= 100) {
|
714
|
+
int skewed_bits = uniform_0_to_3(rng);
|
715
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
716
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
717
|
+
(1 << skewed_bits) - 1);
|
718
|
+
c = static_cast<char>(skewed_low(rng));
|
815
719
|
}
|
816
|
-
char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
|
817
720
|
while (run_len-- > 0 && x.size() < len) {
|
818
|
-
x
|
721
|
+
x.push_back(c);
|
819
722
|
}
|
820
723
|
}
|
821
724
|
|
@@ -829,19 +732,19 @@ TEST(Snappy, FourByteOffset) {
|
|
829
732
|
// copy manually.
|
830
733
|
|
831
734
|
// The two fragments that make up the input string.
|
832
|
-
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
833
|
-
string fragment2 = "some other string";
|
735
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
736
|
+
std::string fragment2 = "some other string";
|
834
737
|
|
835
738
|
// How many times each fragment is emitted.
|
836
739
|
const int n1 = 2;
|
837
740
|
const int n2 = 100000 / fragment2.size();
|
838
741
|
const int length = n1 * fragment1.size() + n2 * fragment2.size();
|
839
742
|
|
840
|
-
string compressed;
|
743
|
+
std::string compressed;
|
841
744
|
Varint::Append32(&compressed, length);
|
842
745
|
|
843
746
|
AppendLiteral(&compressed, fragment1);
|
844
|
-
string src = fragment1;
|
747
|
+
std::string src = fragment1;
|
845
748
|
for (int i = 0; i < n2; i++) {
|
846
749
|
AppendLiteral(&compressed, fragment2);
|
847
750
|
src += fragment2;
|
@@ -850,7 +753,7 @@ TEST(Snappy, FourByteOffset) {
|
|
850
753
|
src += fragment1;
|
851
754
|
CHECK_EQ(length, src.size());
|
852
755
|
|
853
|
-
string uncompressed;
|
756
|
+
std::string uncompressed;
|
854
757
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
855
758
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
856
759
|
&uncompressed));
|
@@ -872,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) {
|
|
872
775
|
iov[i].iov_len = kLengths[i];
|
873
776
|
}
|
874
777
|
|
875
|
-
string compressed;
|
778
|
+
std::string compressed;
|
876
779
|
Varint::Append32(&compressed, 22);
|
877
780
|
|
878
781
|
// A literal whose output crosses three blocks.
|
@@ -933,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
|
|
933
836
|
iov[i].iov_len = kLengths[i];
|
934
837
|
}
|
935
838
|
|
936
|
-
string compressed;
|
839
|
+
std::string compressed;
|
937
840
|
Varint::Append32(&compressed, 8);
|
938
841
|
|
939
842
|
AppendLiteral(&compressed, "12345678");
|
@@ -955,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
955
858
|
iov[i].iov_len = kLengths[i];
|
956
859
|
}
|
957
860
|
|
958
|
-
string compressed;
|
861
|
+
std::string compressed;
|
959
862
|
Varint::Append32(&compressed, 8);
|
960
863
|
|
961
864
|
AppendLiteral(&compressed, "123");
|
@@ -969,7 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
969
872
|
}
|
970
873
|
}
|
971
874
|
|
972
|
-
static bool CheckUncompressedLength(const string& compressed,
|
875
|
+
static bool CheckUncompressedLength(const std::string& compressed,
|
973
876
|
size_t* ulength) {
|
974
877
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
975
878
|
compressed.size(),
|
@@ -983,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed,
|
|
983
886
|
}
|
984
887
|
|
985
888
|
TEST(SnappyCorruption, TruncatedVarint) {
|
986
|
-
string compressed, uncompressed;
|
889
|
+
std::string compressed, uncompressed;
|
987
890
|
size_t ulength;
|
988
891
|
compressed.push_back('\xf0');
|
989
892
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
@@ -993,7 +896,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
993
896
|
}
|
994
897
|
|
995
898
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
996
|
-
string compressed, uncompressed;
|
899
|
+
std::string compressed, uncompressed;
|
997
900
|
size_t ulength;
|
998
901
|
compressed.push_back('\x80');
|
999
902
|
compressed.push_back('\x80');
|
@@ -1008,7 +911,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
1008
911
|
}
|
1009
912
|
|
1010
913
|
TEST(SnappyCorruption, OverflowingVarint) {
|
1011
|
-
string compressed, uncompressed;
|
914
|
+
std::string compressed, uncompressed;
|
1012
915
|
size_t ulength;
|
1013
916
|
compressed.push_back('\xfb');
|
1014
917
|
compressed.push_back('\xff');
|
@@ -1025,14 +928,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
|
|
1025
928
|
// Check that we do not read past end of input
|
1026
929
|
|
1027
930
|
// Make a compressed string that ends with a single-byte literal
|
1028
|
-
string compressed;
|
931
|
+
std::string compressed;
|
1029
932
|
Varint::Append32(&compressed, 1);
|
1030
933
|
AppendLiteral(&compressed, "x");
|
1031
934
|
|
1032
|
-
string uncompressed;
|
935
|
+
std::string uncompressed;
|
1033
936
|
DataEndingAtUnreadablePage c(compressed);
|
1034
937
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
1035
|
-
CHECK_EQ(uncompressed, string("x"));
|
938
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
1036
939
|
}
|
1037
940
|
|
1038
941
|
// Check for an infinite loop caused by a copy with offset==0
|
@@ -1153,17 +1056,20 @@ TEST(Snappy, FindMatchLength) {
|
|
1153
1056
|
}
|
1154
1057
|
|
1155
1058
|
TEST(Snappy, FindMatchLengthRandom) {
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1059
|
+
constexpr int kNumTrials = 10000;
|
1060
|
+
constexpr int kTypicalLength = 10;
|
1061
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
1062
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1159
1065
|
|
1160
1066
|
for (int i = 0; i < kNumTrials; i++) {
|
1161
|
-
string s, t;
|
1162
|
-
char a =
|
1163
|
-
char b =
|
1164
|
-
while (!
|
1165
|
-
s.push_back(
|
1166
|
-
t.push_back(
|
1067
|
+
std::string s, t;
|
1068
|
+
char a = static_cast<char>(uniform_byte(rng));
|
1069
|
+
char b = static_cast<char>(uniform_byte(rng));
|
1070
|
+
while (!one_in_typical_length(rng)) {
|
1071
|
+
s.push_back(one_in_two(rng) ? a : b);
|
1072
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1167
1073
|
}
|
1168
1074
|
DataEndingAtUnreadablePage u(s);
|
1169
1075
|
DataEndingAtUnreadablePage v(t);
|
@@ -1197,7 +1103,6 @@ TEST(Snappy, VerifyCharTable) {
|
|
1197
1103
|
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1198
1104
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1199
1105
|
using snappy::internal::char_table;
|
1200
|
-
using snappy::internal::wordmask;
|
1201
1106
|
|
1202
1107
|
uint16 dst[256];
|
1203
1108
|
|
@@ -1274,49 +1179,46 @@ TEST(Snappy, VerifyCharTable) {
|
|
1274
1179
|
}
|
1275
1180
|
|
1276
1181
|
static void CompressFile(const char* fname) {
|
1277
|
-
string fullinput;
|
1182
|
+
std::string fullinput;
|
1278
1183
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1279
1184
|
|
1280
|
-
string compressed;
|
1185
|
+
std::string compressed;
|
1281
1186
|
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1282
1187
|
|
1283
|
-
CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
|
1188
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1284
1189
|
file::Defaults()));
|
1285
1190
|
}
|
1286
1191
|
|
1287
1192
|
static void UncompressFile(const char* fname) {
|
1288
|
-
string fullinput;
|
1193
|
+
std::string fullinput;
|
1289
1194
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1290
1195
|
|
1291
1196
|
size_t uncompLength;
|
1292
1197
|
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1293
1198
|
|
1294
|
-
string uncompressed;
|
1199
|
+
std::string uncompressed;
|
1295
1200
|
uncompressed.resize(uncompLength);
|
1296
1201
|
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1297
1202
|
|
1298
|
-
CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1203
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1299
1204
|
file::Defaults()));
|
1300
1205
|
}
|
1301
1206
|
|
1302
1207
|
static void MeasureFile(const char* fname) {
|
1303
|
-
string fullinput;
|
1208
|
+
std::string fullinput;
|
1304
1209
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1305
1210
|
printf("%-40s :\n", fname);
|
1306
1211
|
|
1307
1212
|
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1308
1213
|
int end_len = fullinput.size();
|
1309
1214
|
if (FLAGS_end_len >= 0) {
|
1310
|
-
end_len = min<int>(fullinput.size(), FLAGS_end_len);
|
1215
|
+
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1311
1216
|
}
|
1312
1217
|
for (int len = start_len; len <= end_len; len++) {
|
1313
1218
|
const char* const input = fullinput.data();
|
1314
1219
|
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1315
1220
|
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1316
1221
|
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1317
|
-
if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
|
1318
|
-
if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
|
1319
|
-
if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
|
1320
1222
|
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1321
1223
|
|
1322
1224
|
// For block-size based measurements
|
@@ -1356,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) {
|
|
1356
1258
|
// Pick file to process based on "arg"
|
1357
1259
|
CHECK_GE(arg, 0);
|
1358
1260
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1359
|
-
string contents =
|
1360
|
-
|
1261
|
+
std::string contents =
|
1262
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1361
1263
|
|
1362
|
-
string zcontents;
|
1264
|
+
std::string zcontents;
|
1363
1265
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1364
1266
|
char* dst = new char[contents.size()];
|
1365
1267
|
|
@@ -1382,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) {
|
|
1382
1284
|
// Pick file to process based on "arg"
|
1383
1285
|
CHECK_GE(arg, 0);
|
1384
1286
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1385
|
-
string contents =
|
1386
|
-
|
1287
|
+
std::string contents =
|
1288
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1387
1289
|
|
1388
|
-
string zcontents;
|
1290
|
+
std::string zcontents;
|
1389
1291
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1390
1292
|
|
1391
1293
|
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
@@ -1405,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1405
1307
|
// Pick file to process based on "arg"
|
1406
1308
|
CHECK_GE(arg, 0);
|
1407
1309
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1408
|
-
string contents =
|
1409
|
-
|
1310
|
+
std::string contents =
|
1311
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1410
1312
|
|
1411
|
-
string zcontents;
|
1313
|
+
std::string zcontents;
|
1412
1314
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1413
1315
|
|
1414
1316
|
// Uncompress into an iovec containing ten entries.
|
@@ -1451,10 +1353,10 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1451
1353
|
// Pick file to process based on "arg"
|
1452
1354
|
CHECK_GE(arg, 0);
|
1453
1355
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1454
|
-
string contents =
|
1455
|
-
|
1356
|
+
std::string contents =
|
1357
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1456
1358
|
|
1457
|
-
string zcontents;
|
1359
|
+
std::string zcontents;
|
1458
1360
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1459
1361
|
char* dst = new char[contents.size()];
|
1460
1362
|
|
@@ -1469,7 +1371,7 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1469
1371
|
}
|
1470
1372
|
StopBenchmarkTiming();
|
1471
1373
|
|
1472
|
-
string s(dst, contents.size());
|
1374
|
+
std::string s(dst, contents.size());
|
1473
1375
|
CHECK_EQ(contents, s);
|
1474
1376
|
|
1475
1377
|
delete[] dst;
|
@@ -1483,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1483
1385
|
// Pick file to process based on "arg"
|
1484
1386
|
CHECK_GE(arg, 0);
|
1485
1387
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1486
|
-
string contents =
|
1487
|
-
|
1388
|
+
std::string contents =
|
1389
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1488
1390
|
|
1489
1391
|
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1490
1392
|
|
@@ -1499,16 +1401,89 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1499
1401
|
StopBenchmarkTiming();
|
1500
1402
|
const double compression_ratio =
|
1501
1403
|
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1502
|
-
SetBenchmarkLabel(
|
1503
|
-
|
1504
|
-
VLOG(0) <<
|
1505
|
-
|
1404
|
+
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
+
100.0 * compression_ratio));
|
1406
|
+
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
+
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
+
static_cast<int>(zsize));
|
1506
1409
|
delete[] dst;
|
1507
1410
|
}
|
1508
1411
|
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1509
1412
|
|
1510
|
-
|
1413
|
+
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
+
StopBenchmarkTiming();
|
1415
|
+
|
1416
|
+
CHECK_EQ(arg, 0);
|
1417
|
+
const int num_files = ARRAYSIZE(files);
|
1418
|
+
|
1419
|
+
std::vector<std::string> contents(num_files);
|
1420
|
+
std::vector<char*> dst(num_files);
|
1421
|
+
|
1422
|
+
int64 total_contents_size = 0;
|
1423
|
+
for (int i = 0; i < num_files; ++i) {
|
1424
|
+
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
+
total_contents_size += contents[i].size();
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
+
StartBenchmarkTiming();
|
1431
|
+
|
1432
|
+
size_t zsize = 0;
|
1433
|
+
while (iters-- > 0) {
|
1434
|
+
for (int i = 0; i < num_files; ++i) {
|
1435
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
+
&zsize);
|
1437
|
+
}
|
1438
|
+
}
|
1439
|
+
StopBenchmarkTiming();
|
1511
1440
|
|
1441
|
+
for (int i = 0; i < num_files; ++i) {
|
1442
|
+
delete[] dst[i];
|
1443
|
+
}
|
1444
|
+
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
+
}
|
1446
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
+
|
1448
|
+
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
+
StopBenchmarkTiming();
|
1450
|
+
|
1451
|
+
CHECK_EQ(arg, 0);
|
1452
|
+
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
+
const std::string base_content =
|
1454
|
+
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
+
|
1456
|
+
std::vector<std::string> contents;
|
1457
|
+
std::vector<char*> dst;
|
1458
|
+
int64 total_contents_size = 0;
|
1459
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
+
++table_bits) {
|
1461
|
+
std::string content = base_content;
|
1462
|
+
content.resize(1 << table_bits);
|
1463
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
+
total_contents_size += content.size();
|
1465
|
+
contents.push_back(std::move(content));
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
size_t zsize = 0;
|
1469
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
+
StartBenchmarkTiming();
|
1471
|
+
while (iters-- > 0) {
|
1472
|
+
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
+
&zsize);
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
StopBenchmarkTiming();
|
1478
|
+
|
1479
|
+
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
+
delete[] dst[i];
|
1481
|
+
}
|
1482
|
+
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
+
}
|
1484
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1485
|
+
|
1486
|
+
} // namespace snappy
|
1512
1487
|
|
1513
1488
|
int main(int argc, char** argv) {
|
1514
1489
|
InitGoogle(argv[0], &argc, &argv, true);
|
@@ -1517,11 +1492,11 @@ int main(int argc, char** argv) {
|
|
1517
1492
|
if (argc >= 2) {
|
1518
1493
|
for (int arg = 1; arg < argc; arg++) {
|
1519
1494
|
if (FLAGS_write_compressed) {
|
1520
|
-
CompressFile(argv[arg]);
|
1495
|
+
snappy::CompressFile(argv[arg]);
|
1521
1496
|
} else if (FLAGS_write_uncompressed) {
|
1522
|
-
UncompressFile(argv[arg]);
|
1497
|
+
snappy::UncompressFile(argv[arg]);
|
1523
1498
|
} else {
|
1524
|
-
MeasureFile(argv[arg]);
|
1499
|
+
snappy::MeasureFile(argv[arg]);
|
1525
1500
|
}
|
1526
1501
|
}
|
1527
1502
|
return 0;
|