snappy 0.0.17 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/Gemfile +4 -0
- data/README.md +29 -5
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +23 -16
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/reader.rb +14 -10
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy.rb +5 -4
- data/snappy.gemspec +13 -13
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/NEWS +40 -0
- data/vendor/snappy/{README → README.md} +27 -18
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +22 -18
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +116 -38
- data/vendor/snappy/snappy-stubs-public.h.in +20 -46
- data/vendor/snappy/snappy-test.cc +26 -22
- data/vendor/snappy/snappy-test.h +24 -98
- data/vendor/snappy/snappy.cc +380 -183
- data/vendor/snappy/snappy.h +14 -10
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +236 -261
- metadata +37 -92
- data/.travis.yml +0 -26
- data/smoke.sh +0 -8
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/INSTALL +0 -370
- data/vendor/snappy/Makefile +0 -982
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/Makefile.in +0 -982
- data/vendor/snappy/aclocal.m4 +0 -9738
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/autom4te.cache/output.0 +0 -18856
- data/vendor/snappy/autom4te.cache/output.1 +0 -18852
- data/vendor/snappy/autom4te.cache/requests +0 -297
- data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
- data/vendor/snappy/autom4te.cache/traces.1 +0 -714
- data/vendor/snappy/config.guess +0 -1530
- data/vendor/snappy/config.h +0 -135
- data/vendor/snappy/config.h.in +0 -134
- data/vendor/snappy/config.log +0 -1640
- data/vendor/snappy/config.status +0 -2318
- data/vendor/snappy/config.sub +0 -1773
- data/vendor/snappy/configure +0 -18852
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/depcomp +0 -688
- data/vendor/snappy/install-sh +0 -527
- data/vendor/snappy/libtool +0 -10246
- data/vendor/snappy/ltmain.sh +0 -9661
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/m4/libtool.m4 +0 -8001
- data/vendor/snappy/m4/ltoptions.m4 +0 -384
- data/vendor/snappy/m4/ltsugar.m4 +0 -123
- data/vendor/snappy/m4/ltversion.m4 +0 -23
- data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
- data/vendor/snappy/missing +0 -331
- data/vendor/snappy/snappy-stubs-public.h +0 -100
- data/vendor/snappy/snappy.pc +0 -10
- data/vendor/snappy/snappy.pc.in +0 -10
- data/vendor/snappy/stamp-h1 +0 -1
@@ -29,9 +29,10 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdlib.h>
|
31
31
|
|
32
|
-
|
33
32
|
#include <algorithm>
|
33
|
+
#include <random>
|
34
34
|
#include <string>
|
35
|
+
#include <utility>
|
35
36
|
#include <vector>
|
36
37
|
|
37
38
|
#include "snappy.h"
|
@@ -50,13 +51,6 @@ DEFINE_bool(zlib, false,
|
|
50
51
|
"Run zlib compression (http://www.zlib.net)");
|
51
52
|
DEFINE_bool(lzo, false,
|
52
53
|
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
53
|
-
DEFINE_bool(quicklz, false,
|
54
|
-
"Run quickLZ compression (http://www.quicklz.com/)");
|
55
|
-
DEFINE_bool(liblzf, false,
|
56
|
-
"Run libLZF compression "
|
57
|
-
"(http://www.goof.com/pcg/marc/liblzf.html)");
|
58
|
-
DEFINE_bool(fastlz, false,
|
59
|
-
"Run FastLZ compression (http://www.fastlz.org/");
|
60
54
|
DEFINE_bool(snappy, true, "Run snappy compression");
|
61
55
|
|
62
56
|
DEFINE_bool(write_compressed, false,
|
@@ -69,8 +63,7 @@ DEFINE_bool(snappy_dump_decompression_table, false,
|
|
69
63
|
|
70
64
|
namespace snappy {
|
71
65
|
|
72
|
-
|
73
|
-
#ifdef HAVE_FUNC_MMAP
|
66
|
+
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
74
67
|
|
75
68
|
// To test against code that reads beyond its input, this class copies a
|
76
69
|
// string to a newly allocated group of pages, the last of which
|
@@ -80,8 +73,8 @@ namespace snappy {
|
|
80
73
|
// be able to read previously allocated memory while doing heap allocations.
|
81
74
|
class DataEndingAtUnreadablePage {
|
82
75
|
public:
|
83
|
-
explicit DataEndingAtUnreadablePage(const string& s) {
|
84
|
-
const size_t page_size =
|
76
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
77
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
85
78
|
const size_t size = s.size();
|
86
79
|
// Round up space for string to a multiple of page_size.
|
87
80
|
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
|
@@ -99,8 +92,9 @@ class DataEndingAtUnreadablePage {
|
|
99
92
|
}
|
100
93
|
|
101
94
|
~DataEndingAtUnreadablePage() {
|
95
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
102
96
|
// Undo the mprotect.
|
103
|
-
CHECK_EQ(0, mprotect(protected_page_,
|
97
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
|
104
98
|
CHECK_EQ(0, munmap(mem_, alloc_size_));
|
105
99
|
}
|
106
100
|
|
@@ -115,19 +109,19 @@ class DataEndingAtUnreadablePage {
|
|
115
109
|
size_t size_;
|
116
110
|
};
|
117
111
|
|
118
|
-
#else // HAVE_FUNC_MMAP
|
112
|
+
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
119
113
|
|
120
114
|
// Fallback for systems without mmap.
|
121
|
-
|
115
|
+
using DataEndingAtUnreadablePage = std::string;
|
122
116
|
|
123
117
|
#endif
|
124
118
|
|
125
119
|
enum CompressorType {
|
126
|
-
ZLIB, LZO,
|
120
|
+
ZLIB, LZO, SNAPPY
|
127
121
|
};
|
128
122
|
|
129
123
|
const char* names[] = {
|
130
|
-
"ZLIB", "LZO", "
|
124
|
+
"ZLIB", "LZO", "SNAPPY"
|
131
125
|
};
|
132
126
|
|
133
127
|
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
@@ -143,21 +137,6 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
143
137
|
return input_size + input_size/64 + 16 + 3;
|
144
138
|
#endif // LZO_VERSION
|
145
139
|
|
146
|
-
#ifdef LZF_VERSION
|
147
|
-
case LIBLZF:
|
148
|
-
return input_size;
|
149
|
-
#endif // LZF_VERSION
|
150
|
-
|
151
|
-
#ifdef QLZ_VERSION_MAJOR
|
152
|
-
case QUICKLZ:
|
153
|
-
return input_size + 36000; // 36000 is used for scratch.
|
154
|
-
#endif // QLZ_VERSION_MAJOR
|
155
|
-
|
156
|
-
#ifdef FASTLZ_VERSION
|
157
|
-
case FASTLZ:
|
158
|
-
return max(static_cast<int>(ceil(input_size * 1.05)), 66);
|
159
|
-
#endif // FASTLZ_VERSION
|
160
|
-
|
161
140
|
case SNAPPY:
|
162
141
|
return snappy::MaxCompressedLength(input_size);
|
163
142
|
|
@@ -175,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
175
154
|
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
176
155
|
// number of bytes, and may contain junk bytes at the end after return.
|
177
156
|
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
178
|
-
string* compressed, bool compressed_is_preallocated) {
|
157
|
+
std::string* compressed, bool compressed_is_preallocated) {
|
179
158
|
if (!compressed_is_preallocated) {
|
180
159
|
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
181
160
|
}
|
@@ -217,58 +196,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
217
196
|
}
|
218
197
|
#endif // LZO_VERSION
|
219
198
|
|
220
|
-
#ifdef LZF_VERSION
|
221
|
-
case LIBLZF: {
|
222
|
-
int destlen = lzf_compress(input,
|
223
|
-
input_size,
|
224
|
-
string_as_array(compressed),
|
225
|
-
input_size);
|
226
|
-
if (destlen == 0) {
|
227
|
-
// lzf *can* cause lots of blowup when compressing, so they
|
228
|
-
// recommend to limit outsize to insize, and just not compress
|
229
|
-
// if it's bigger. Ideally, we'd just swap input and output.
|
230
|
-
compressed->assign(input, input_size);
|
231
|
-
destlen = input_size;
|
232
|
-
}
|
233
|
-
if (!compressed_is_preallocated) {
|
234
|
-
compressed->resize(destlen);
|
235
|
-
}
|
236
|
-
break;
|
237
|
-
}
|
238
|
-
#endif // LZF_VERSION
|
239
|
-
|
240
|
-
#ifdef QLZ_VERSION_MAJOR
|
241
|
-
case QUICKLZ: {
|
242
|
-
qlz_state_compress *state_compress = new qlz_state_compress;
|
243
|
-
int destlen = qlz_compress(input,
|
244
|
-
string_as_array(compressed),
|
245
|
-
input_size,
|
246
|
-
state_compress);
|
247
|
-
delete state_compress;
|
248
|
-
CHECK_NE(0, destlen);
|
249
|
-
if (!compressed_is_preallocated) {
|
250
|
-
compressed->resize(destlen);
|
251
|
-
}
|
252
|
-
break;
|
253
|
-
}
|
254
|
-
#endif // QLZ_VERSION_MAJOR
|
255
|
-
|
256
|
-
#ifdef FASTLZ_VERSION
|
257
|
-
case FASTLZ: {
|
258
|
-
// Use level 1 compression since we mostly care about speed.
|
259
|
-
int destlen = fastlz_compress_level(
|
260
|
-
1,
|
261
|
-
input,
|
262
|
-
input_size,
|
263
|
-
string_as_array(compressed));
|
264
|
-
if (!compressed_is_preallocated) {
|
265
|
-
compressed->resize(destlen);
|
266
|
-
}
|
267
|
-
CHECK_NE(destlen, 0);
|
268
|
-
break;
|
269
|
-
}
|
270
|
-
#endif // FASTLZ_VERSION
|
271
|
-
|
272
199
|
case SNAPPY: {
|
273
200
|
size_t destlen;
|
274
201
|
snappy::RawCompress(input, input_size,
|
@@ -288,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
288
215
|
return true;
|
289
216
|
}
|
290
217
|
|
291
|
-
static bool Uncompress(const string& compressed, CompressorType comp,
|
292
|
-
int size, string* output) {
|
218
|
+
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
+
int size, std::string* output) {
|
293
220
|
switch (comp) {
|
294
221
|
#ifdef ZLIB_VERSION
|
295
222
|
case ZLIB: {
|
@@ -323,49 +250,6 @@ static bool Uncompress(const string& compressed, CompressorType comp,
|
|
323
250
|
}
|
324
251
|
#endif // LZO_VERSION
|
325
252
|
|
326
|
-
#ifdef LZF_VERSION
|
327
|
-
case LIBLZF: {
|
328
|
-
output->resize(size);
|
329
|
-
int destlen = lzf_decompress(compressed.data(),
|
330
|
-
compressed.size(),
|
331
|
-
string_as_array(output),
|
332
|
-
output->size());
|
333
|
-
if (destlen == 0) {
|
334
|
-
// This error probably means we had decided not to compress,
|
335
|
-
// and thus have stored input in output directly.
|
336
|
-
output->assign(compressed.data(), compressed.size());
|
337
|
-
destlen = compressed.size();
|
338
|
-
}
|
339
|
-
CHECK_EQ(destlen, size);
|
340
|
-
break;
|
341
|
-
}
|
342
|
-
#endif // LZF_VERSION
|
343
|
-
|
344
|
-
#ifdef QLZ_VERSION_MAJOR
|
345
|
-
case QUICKLZ: {
|
346
|
-
output->resize(size);
|
347
|
-
qlz_state_decompress *state_decompress = new qlz_state_decompress;
|
348
|
-
int destlen = qlz_decompress(compressed.data(),
|
349
|
-
string_as_array(output),
|
350
|
-
state_decompress);
|
351
|
-
delete state_decompress;
|
352
|
-
CHECK_EQ(destlen, size);
|
353
|
-
break;
|
354
|
-
}
|
355
|
-
#endif // QLZ_VERSION_MAJOR
|
356
|
-
|
357
|
-
#ifdef FASTLZ_VERSION
|
358
|
-
case FASTLZ: {
|
359
|
-
output->resize(size);
|
360
|
-
int destlen = fastlz_decompress(compressed.data(),
|
361
|
-
compressed.length(),
|
362
|
-
string_as_array(output),
|
363
|
-
size);
|
364
|
-
CHECK_EQ(destlen, size);
|
365
|
-
break;
|
366
|
-
}
|
367
|
-
#endif // FASTLZ_VERSION
|
368
|
-
|
369
253
|
case SNAPPY: {
|
370
254
|
snappy::RawUncompress(compressed.data(), compressed.size(),
|
371
255
|
string_as_array(output));
|
@@ -395,11 +279,11 @@ static void Measure(const char* data,
|
|
395
279
|
int num_blocks = (length + block_size - 1) / block_size;
|
396
280
|
std::vector<const char*> input(num_blocks);
|
397
281
|
std::vector<size_t> input_length(num_blocks);
|
398
|
-
std::vector<string> compressed(num_blocks);
|
399
|
-
std::vector<string> output(num_blocks);
|
282
|
+
std::vector<std::string> compressed(num_blocks);
|
283
|
+
std::vector<std::string> output(num_blocks);
|
400
284
|
for (int b = 0; b < num_blocks; b++) {
|
401
285
|
int input_start = b * block_size;
|
402
|
-
int input_limit = min<int>((b+1)*block_size, length);
|
286
|
+
int input_limit = std::min<int>((b+1)*block_size, length);
|
403
287
|
input[b] = data+input_start;
|
404
288
|
input_length[b] = input_limit-input_start;
|
405
289
|
|
@@ -454,29 +338,28 @@ static void Measure(const char* data,
|
|
454
338
|
}
|
455
339
|
}
|
456
340
|
|
457
|
-
sort(ctime, ctime + kRuns);
|
458
|
-
sort(utime, utime + kRuns);
|
341
|
+
std::sort(ctime, ctime + kRuns);
|
342
|
+
std::sort(utime, utime + kRuns);
|
459
343
|
const int med = kRuns/2;
|
460
344
|
|
461
345
|
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
462
346
|
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
463
|
-
string x = names[comp];
|
347
|
+
std::string x = names[comp];
|
464
348
|
x += ":";
|
465
|
-
string urate = (uncomp_rate >= 0)
|
466
|
-
|
467
|
-
: string("?");
|
349
|
+
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
+
: std::string("?");
|
468
351
|
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
469
352
|
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
470
353
|
x.c_str(),
|
471
354
|
block_size/(1<<20),
|
472
355
|
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
473
|
-
(compressed_size * 100.0) / max<int>(1, length),
|
356
|
+
(compressed_size * 100.0) / std::max<int>(1, length),
|
474
357
|
comp_rate,
|
475
358
|
urate.c_str());
|
476
359
|
}
|
477
360
|
|
478
|
-
static int VerifyString(const string& input) {
|
479
|
-
string compressed;
|
361
|
+
static int VerifyString(const std::string& input) {
|
362
|
+
std::string compressed;
|
480
363
|
DataEndingAtUnreadablePage i(input);
|
481
364
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
482
365
|
CHECK_EQ(written, compressed.size());
|
@@ -484,15 +367,15 @@ static int VerifyString(const string& input) {
|
|
484
367
|
snappy::MaxCompressedLength(input.size()));
|
485
368
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
486
369
|
|
487
|
-
string uncompressed;
|
370
|
+
std::string uncompressed;
|
488
371
|
DataEndingAtUnreadablePage c(compressed);
|
489
372
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
490
373
|
CHECK_EQ(uncompressed, input);
|
491
374
|
return uncompressed.size();
|
492
375
|
}
|
493
376
|
|
494
|
-
static void VerifyStringSink(const string& input) {
|
495
|
-
string compressed;
|
377
|
+
static void VerifyStringSink(const std::string& input) {
|
378
|
+
std::string compressed;
|
496
379
|
DataEndingAtUnreadablePage i(input);
|
497
380
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
498
381
|
CHECK_EQ(written, compressed.size());
|
@@ -500,7 +383,7 @@ static void VerifyStringSink(const string& input) {
|
|
500
383
|
snappy::MaxCompressedLength(input.size()));
|
501
384
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
502
385
|
|
503
|
-
string uncompressed;
|
386
|
+
std::string uncompressed;
|
504
387
|
uncompressed.resize(input.size());
|
505
388
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
506
389
|
DataEndingAtUnreadablePage c(compressed);
|
@@ -509,8 +392,8 @@ static void VerifyStringSink(const string& input) {
|
|
509
392
|
CHECK_EQ(uncompressed, input);
|
510
393
|
}
|
511
394
|
|
512
|
-
static void VerifyIOVec(const string& input) {
|
513
|
-
string compressed;
|
395
|
+
static void VerifyIOVec(const std::string& input) {
|
396
|
+
std::string compressed;
|
514
397
|
DataEndingAtUnreadablePage i(input);
|
515
398
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
516
399
|
CHECK_EQ(written, compressed.size());
|
@@ -521,23 +404,28 @@ static void VerifyIOVec(const string& input) {
|
|
521
404
|
// Try uncompressing into an iovec containing a random number of entries
|
522
405
|
// ranging from 1 to 10.
|
523
406
|
char* buf = new char[input.size()];
|
524
|
-
|
525
|
-
size_t
|
407
|
+
std::minstd_rand0 rng(input.size());
|
408
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
+
size_t num = uniform_1_to_10(rng);
|
526
410
|
if (input.size() < num) {
|
527
411
|
num = input.size();
|
528
412
|
}
|
529
413
|
struct iovec* iov = new iovec[num];
|
530
414
|
int used_so_far = 0;
|
415
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
531
416
|
for (size_t i = 0; i < num; ++i) {
|
417
|
+
assert(used_so_far < input.size());
|
532
418
|
iov[i].iov_base = buf + used_so_far;
|
533
419
|
if (i == num - 1) {
|
534
420
|
iov[i].iov_len = input.size() - used_so_far;
|
535
421
|
} else {
|
536
422
|
// Randomly choose to insert a 0 byte entry.
|
537
|
-
if (
|
423
|
+
if (one_in_five(rng)) {
|
538
424
|
iov[i].iov_len = 0;
|
539
425
|
} else {
|
540
|
-
|
426
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
427
|
+
0, input.size() - used_so_far - 1);
|
428
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
541
429
|
}
|
542
430
|
}
|
543
431
|
used_so_far += iov[i].iov_len;
|
@@ -551,22 +439,22 @@ static void VerifyIOVec(const string& input) {
|
|
551
439
|
|
552
440
|
// Test that data compressed by a compressor that does not
|
553
441
|
// obey block sizes is uncompressed properly.
|
554
|
-
static void VerifyNonBlockedCompression(const string& input) {
|
442
|
+
static void VerifyNonBlockedCompression(const std::string& input) {
|
555
443
|
if (input.length() > snappy::kBlockSize) {
|
556
444
|
// We cannot test larger blocks than the maximum block size, obviously.
|
557
445
|
return;
|
558
446
|
}
|
559
447
|
|
560
|
-
string prefix;
|
448
|
+
std::string prefix;
|
561
449
|
Varint::Append32(&prefix, input.size());
|
562
450
|
|
563
451
|
// Setup compression table
|
564
|
-
snappy::internal::WorkingMemory wmem;
|
452
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
565
453
|
int table_size;
|
566
454
|
uint16* table = wmem.GetHashTable(input.size(), &table_size);
|
567
455
|
|
568
456
|
// Compress entire input in one shot
|
569
|
-
string compressed;
|
457
|
+
std::string compressed;
|
570
458
|
compressed += prefix;
|
571
459
|
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
572
460
|
char* dest = string_as_array(&compressed) + prefix.size();
|
@@ -574,13 +462,13 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
574
462
|
dest, table, table_size);
|
575
463
|
compressed.resize(end - compressed.data());
|
576
464
|
|
577
|
-
// Uncompress into string
|
578
|
-
string uncomp_str;
|
465
|
+
// Uncompress into std::string
|
466
|
+
std::string uncomp_str;
|
579
467
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
580
468
|
CHECK_EQ(uncomp_str, input);
|
581
469
|
|
582
470
|
// Uncompress using source/sink
|
583
|
-
string uncomp_str2;
|
471
|
+
std::string uncomp_str2;
|
584
472
|
uncomp_str2.resize(input.size());
|
585
473
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
586
474
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
@@ -592,28 +480,28 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
592
480
|
static const int kNumBlocks = 10;
|
593
481
|
struct iovec vec[kNumBlocks];
|
594
482
|
const int block_size = 1 + input.size() / kNumBlocks;
|
595
|
-
string iovec_data(block_size * kNumBlocks, 'x');
|
483
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
596
484
|
for (int i = 0; i < kNumBlocks; i++) {
|
597
485
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
598
486
|
vec[i].iov_len = block_size;
|
599
487
|
}
|
600
488
|
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
601
489
|
vec, kNumBlocks));
|
602
|
-
CHECK_EQ(string(iovec_data.data(), input.size()), input);
|
490
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
603
491
|
}
|
604
492
|
}
|
605
493
|
|
606
494
|
// Expand the input so that it is at least K times as big as block size
|
607
|
-
static string Expand(const string& input) {
|
495
|
+
static std::string Expand(const std::string& input) {
|
608
496
|
static const int K = 3;
|
609
|
-
string data = input;
|
497
|
+
std::string data = input;
|
610
498
|
while (data.size() < K * snappy::kBlockSize) {
|
611
499
|
data += input;
|
612
500
|
}
|
613
501
|
return data;
|
614
502
|
}
|
615
503
|
|
616
|
-
static int Verify(const string& input) {
|
504
|
+
static int Verify(const std::string& input) {
|
617
505
|
VLOG(1) << "Verifying input of size " << input.size();
|
618
506
|
|
619
507
|
// Compress using string based routines
|
@@ -625,7 +513,7 @@ static int Verify(const string& input) {
|
|
625
513
|
VerifyNonBlockedCompression(input);
|
626
514
|
VerifyIOVec(input);
|
627
515
|
if (!input.empty()) {
|
628
|
-
const string expanded = Expand(input);
|
516
|
+
const std::string expanded = Expand(input);
|
629
517
|
VerifyNonBlockedCompression(expanded);
|
630
518
|
VerifyIOVec(input);
|
631
519
|
}
|
@@ -633,21 +521,20 @@ static int Verify(const string& input) {
|
|
633
521
|
return result;
|
634
522
|
}
|
635
523
|
|
636
|
-
|
637
|
-
static bool IsValidCompressedBuffer(const string& c) {
|
524
|
+
static bool IsValidCompressedBuffer(const std::string& c) {
|
638
525
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
639
526
|
}
|
640
|
-
static bool Uncompress(const string& c, string* u) {
|
527
|
+
static bool Uncompress(const std::string& c, std::string* u) {
|
641
528
|
return snappy::Uncompress(c.data(), c.size(), u);
|
642
529
|
}
|
643
530
|
|
644
531
|
// This test checks to ensure that snappy doesn't coredump if it gets
|
645
532
|
// corrupted data.
|
646
533
|
TEST(CorruptedTest, VerifyCorrupted) {
|
647
|
-
string source = "making sure we don't crash with corrupted input";
|
534
|
+
std::string source = "making sure we don't crash with corrupted input";
|
648
535
|
VLOG(1) << source;
|
649
|
-
string dest;
|
650
|
-
string uncmp;
|
536
|
+
std::string dest;
|
537
|
+
std::string uncmp;
|
651
538
|
snappy::Compress(source.data(), source.size(), &dest);
|
652
539
|
|
653
540
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -694,9 +581,9 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
694
581
|
|
695
582
|
// try reading stuff in from a bad file.
|
696
583
|
for (int i = 1; i <= 3; ++i) {
|
697
|
-
string data =
|
698
|
-
|
699
|
-
string uncmp;
|
584
|
+
std::string data =
|
585
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
586
|
+
std::string uncmp;
|
700
587
|
// check that we don't return a crazy length
|
701
588
|
size_t ulen;
|
702
589
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
@@ -714,7 +601,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
714
601
|
// These mirror the compression code in snappy.cc, but are copied
|
715
602
|
// here so that we can bypass some limitations in the how snappy.cc
|
716
603
|
// invokes these routines.
|
717
|
-
static void AppendLiteral(string* dst, const string& literal) {
|
604
|
+
static void AppendLiteral(std::string* dst, const std::string& literal) {
|
718
605
|
if (literal.empty()) return;
|
719
606
|
int n = literal.size() - 1;
|
720
607
|
if (n < 60) {
|
@@ -729,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) {
|
|
729
616
|
n >>= 8;
|
730
617
|
}
|
731
618
|
dst->push_back(0 | ((59+count) << 2));
|
732
|
-
*dst += string(number, count);
|
619
|
+
*dst += std::string(number, count);
|
733
620
|
}
|
734
621
|
*dst += literal;
|
735
622
|
}
|
736
623
|
|
737
|
-
static void AppendCopy(string* dst, int offset, int length) {
|
624
|
+
static void AppendCopy(std::string* dst, int offset, int length) {
|
738
625
|
while (length > 0) {
|
739
626
|
// Figure out how much to copy in one shot
|
740
627
|
int to_copy;
|
@@ -771,51 +658,67 @@ TEST(Snappy, SimpleTests) {
|
|
771
658
|
Verify("ab");
|
772
659
|
Verify("abc");
|
773
660
|
|
774
|
-
Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
|
775
|
-
Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
|
776
|
-
Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
|
777
|
-
Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
778
|
-
Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
661
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
662
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
663
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
664
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
665
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
779
666
|
}
|
780
667
|
|
781
668
|
// Verify max blowup (lots of four-byte copies)
|
782
669
|
TEST(Snappy, MaxBlowup) {
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
input.
|
788
|
-
|
789
|
-
for (int i =
|
790
|
-
|
791
|
-
|
792
|
-
input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
|
670
|
+
std::mt19937 rng;
|
671
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
672
|
+
std::string input;
|
673
|
+
for (int i = 0; i < 80000; ++i)
|
674
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
675
|
+
|
676
|
+
for (int i = 0; i < 80000; i += 4) {
|
677
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
678
|
+
input.append(four_bytes);
|
793
679
|
}
|
794
680
|
Verify(input);
|
795
681
|
}
|
796
682
|
|
797
683
|
TEST(Snappy, RandomData) {
|
798
|
-
|
799
|
-
|
800
|
-
|
684
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
685
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
688
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
689
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
690
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
|
+
|
692
|
+
constexpr int num_ops = 20000;
|
801
693
|
for (int i = 0; i < num_ops; i++) {
|
802
694
|
if ((i % 1000) == 0) {
|
803
695
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
804
696
|
}
|
805
697
|
|
806
|
-
string x;
|
807
|
-
size_t len =
|
698
|
+
std::string x;
|
699
|
+
size_t len = uniform_4k(rng);
|
808
700
|
if (i < 100) {
|
809
|
-
len = 65536 +
|
701
|
+
len = 65536 + uniform_64k(rng);
|
810
702
|
}
|
811
703
|
while (x.size() < len) {
|
812
704
|
int run_len = 1;
|
813
|
-
if (
|
814
|
-
|
705
|
+
if (one_in_ten(rng)) {
|
706
|
+
int skewed_bits = uniform_0_to_8(rng);
|
707
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
708
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
709
|
+
(1 << skewed_bits) - 1);
|
710
|
+
run_len = skewed_low(rng);
|
711
|
+
}
|
712
|
+
char c = static_cast<char>(uniform_byte(rng));
|
713
|
+
if (i >= 100) {
|
714
|
+
int skewed_bits = uniform_0_to_3(rng);
|
715
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
716
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
717
|
+
(1 << skewed_bits) - 1);
|
718
|
+
c = static_cast<char>(skewed_low(rng));
|
815
719
|
}
|
816
|
-
char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
|
817
720
|
while (run_len-- > 0 && x.size() < len) {
|
818
|
-
x
|
721
|
+
x.push_back(c);
|
819
722
|
}
|
820
723
|
}
|
821
724
|
|
@@ -829,19 +732,19 @@ TEST(Snappy, FourByteOffset) {
|
|
829
732
|
// copy manually.
|
830
733
|
|
831
734
|
// The two fragments that make up the input string.
|
832
|
-
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
833
|
-
string fragment2 = "some other string";
|
735
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
736
|
+
std::string fragment2 = "some other string";
|
834
737
|
|
835
738
|
// How many times each fragment is emitted.
|
836
739
|
const int n1 = 2;
|
837
740
|
const int n2 = 100000 / fragment2.size();
|
838
741
|
const int length = n1 * fragment1.size() + n2 * fragment2.size();
|
839
742
|
|
840
|
-
string compressed;
|
743
|
+
std::string compressed;
|
841
744
|
Varint::Append32(&compressed, length);
|
842
745
|
|
843
746
|
AppendLiteral(&compressed, fragment1);
|
844
|
-
string src = fragment1;
|
747
|
+
std::string src = fragment1;
|
845
748
|
for (int i = 0; i < n2; i++) {
|
846
749
|
AppendLiteral(&compressed, fragment2);
|
847
750
|
src += fragment2;
|
@@ -850,7 +753,7 @@ TEST(Snappy, FourByteOffset) {
|
|
850
753
|
src += fragment1;
|
851
754
|
CHECK_EQ(length, src.size());
|
852
755
|
|
853
|
-
string uncompressed;
|
756
|
+
std::string uncompressed;
|
854
757
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
855
758
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
856
759
|
&uncompressed));
|
@@ -872,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) {
|
|
872
775
|
iov[i].iov_len = kLengths[i];
|
873
776
|
}
|
874
777
|
|
875
|
-
string compressed;
|
778
|
+
std::string compressed;
|
876
779
|
Varint::Append32(&compressed, 22);
|
877
780
|
|
878
781
|
// A literal whose output crosses three blocks.
|
@@ -933,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
|
|
933
836
|
iov[i].iov_len = kLengths[i];
|
934
837
|
}
|
935
838
|
|
936
|
-
string compressed;
|
839
|
+
std::string compressed;
|
937
840
|
Varint::Append32(&compressed, 8);
|
938
841
|
|
939
842
|
AppendLiteral(&compressed, "12345678");
|
@@ -955,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
955
858
|
iov[i].iov_len = kLengths[i];
|
956
859
|
}
|
957
860
|
|
958
|
-
string compressed;
|
861
|
+
std::string compressed;
|
959
862
|
Varint::Append32(&compressed, 8);
|
960
863
|
|
961
864
|
AppendLiteral(&compressed, "123");
|
@@ -969,7 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
969
872
|
}
|
970
873
|
}
|
971
874
|
|
972
|
-
static bool CheckUncompressedLength(const string& compressed,
|
875
|
+
static bool CheckUncompressedLength(const std::string& compressed,
|
973
876
|
size_t* ulength) {
|
974
877
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
975
878
|
compressed.size(),
|
@@ -983,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed,
|
|
983
886
|
}
|
984
887
|
|
985
888
|
TEST(SnappyCorruption, TruncatedVarint) {
|
986
|
-
string compressed, uncompressed;
|
889
|
+
std::string compressed, uncompressed;
|
987
890
|
size_t ulength;
|
988
891
|
compressed.push_back('\xf0');
|
989
892
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
@@ -993,7 +896,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
993
896
|
}
|
994
897
|
|
995
898
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
996
|
-
string compressed, uncompressed;
|
899
|
+
std::string compressed, uncompressed;
|
997
900
|
size_t ulength;
|
998
901
|
compressed.push_back('\x80');
|
999
902
|
compressed.push_back('\x80');
|
@@ -1008,7 +911,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
1008
911
|
}
|
1009
912
|
|
1010
913
|
TEST(SnappyCorruption, OverflowingVarint) {
|
1011
|
-
string compressed, uncompressed;
|
914
|
+
std::string compressed, uncompressed;
|
1012
915
|
size_t ulength;
|
1013
916
|
compressed.push_back('\xfb');
|
1014
917
|
compressed.push_back('\xff');
|
@@ -1025,14 +928,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
|
|
1025
928
|
// Check that we do not read past end of input
|
1026
929
|
|
1027
930
|
// Make a compressed string that ends with a single-byte literal
|
1028
|
-
string compressed;
|
931
|
+
std::string compressed;
|
1029
932
|
Varint::Append32(&compressed, 1);
|
1030
933
|
AppendLiteral(&compressed, "x");
|
1031
934
|
|
1032
|
-
string uncompressed;
|
935
|
+
std::string uncompressed;
|
1033
936
|
DataEndingAtUnreadablePage c(compressed);
|
1034
937
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
1035
|
-
CHECK_EQ(uncompressed, string("x"));
|
938
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
1036
939
|
}
|
1037
940
|
|
1038
941
|
// Check for an infinite loop caused by a copy with offset==0
|
@@ -1153,17 +1056,20 @@ TEST(Snappy, FindMatchLength) {
|
|
1153
1056
|
}
|
1154
1057
|
|
1155
1058
|
TEST(Snappy, FindMatchLengthRandom) {
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1059
|
+
constexpr int kNumTrials = 10000;
|
1060
|
+
constexpr int kTypicalLength = 10;
|
1061
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
1062
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1159
1065
|
|
1160
1066
|
for (int i = 0; i < kNumTrials; i++) {
|
1161
|
-
string s, t;
|
1162
|
-
char a =
|
1163
|
-
char b =
|
1164
|
-
while (!
|
1165
|
-
s.push_back(
|
1166
|
-
t.push_back(
|
1067
|
+
std::string s, t;
|
1068
|
+
char a = static_cast<char>(uniform_byte(rng));
|
1069
|
+
char b = static_cast<char>(uniform_byte(rng));
|
1070
|
+
while (!one_in_typical_length(rng)) {
|
1071
|
+
s.push_back(one_in_two(rng) ? a : b);
|
1072
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1167
1073
|
}
|
1168
1074
|
DataEndingAtUnreadablePage u(s);
|
1169
1075
|
DataEndingAtUnreadablePage v(t);
|
@@ -1197,7 +1103,6 @@ TEST(Snappy, VerifyCharTable) {
|
|
1197
1103
|
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1198
1104
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1199
1105
|
using snappy::internal::char_table;
|
1200
|
-
using snappy::internal::wordmask;
|
1201
1106
|
|
1202
1107
|
uint16 dst[256];
|
1203
1108
|
|
@@ -1274,49 +1179,46 @@ TEST(Snappy, VerifyCharTable) {
|
|
1274
1179
|
}
|
1275
1180
|
|
1276
1181
|
static void CompressFile(const char* fname) {
|
1277
|
-
string fullinput;
|
1182
|
+
std::string fullinput;
|
1278
1183
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1279
1184
|
|
1280
|
-
string compressed;
|
1185
|
+
std::string compressed;
|
1281
1186
|
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1282
1187
|
|
1283
|
-
CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
|
1188
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1284
1189
|
file::Defaults()));
|
1285
1190
|
}
|
1286
1191
|
|
1287
1192
|
static void UncompressFile(const char* fname) {
|
1288
|
-
string fullinput;
|
1193
|
+
std::string fullinput;
|
1289
1194
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1290
1195
|
|
1291
1196
|
size_t uncompLength;
|
1292
1197
|
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1293
1198
|
|
1294
|
-
string uncompressed;
|
1199
|
+
std::string uncompressed;
|
1295
1200
|
uncompressed.resize(uncompLength);
|
1296
1201
|
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1297
1202
|
|
1298
|
-
CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1203
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1299
1204
|
file::Defaults()));
|
1300
1205
|
}
|
1301
1206
|
|
1302
1207
|
static void MeasureFile(const char* fname) {
|
1303
|
-
string fullinput;
|
1208
|
+
std::string fullinput;
|
1304
1209
|
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1305
1210
|
printf("%-40s :\n", fname);
|
1306
1211
|
|
1307
1212
|
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1308
1213
|
int end_len = fullinput.size();
|
1309
1214
|
if (FLAGS_end_len >= 0) {
|
1310
|
-
end_len = min<int>(fullinput.size(), FLAGS_end_len);
|
1215
|
+
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1311
1216
|
}
|
1312
1217
|
for (int len = start_len; len <= end_len; len++) {
|
1313
1218
|
const char* const input = fullinput.data();
|
1314
1219
|
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1315
1220
|
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1316
1221
|
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1317
|
-
if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
|
1318
|
-
if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
|
1319
|
-
if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
|
1320
1222
|
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1321
1223
|
|
1322
1224
|
// For block-size based measurements
|
@@ -1356,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) {
|
|
1356
1258
|
// Pick file to process based on "arg"
|
1357
1259
|
CHECK_GE(arg, 0);
|
1358
1260
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1359
|
-
string contents =
|
1360
|
-
|
1261
|
+
std::string contents =
|
1262
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1361
1263
|
|
1362
|
-
string zcontents;
|
1264
|
+
std::string zcontents;
|
1363
1265
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1364
1266
|
char* dst = new char[contents.size()];
|
1365
1267
|
|
@@ -1382,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) {
|
|
1382
1284
|
// Pick file to process based on "arg"
|
1383
1285
|
CHECK_GE(arg, 0);
|
1384
1286
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1385
|
-
string contents =
|
1386
|
-
|
1287
|
+
std::string contents =
|
1288
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1387
1289
|
|
1388
|
-
string zcontents;
|
1290
|
+
std::string zcontents;
|
1389
1291
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1390
1292
|
|
1391
1293
|
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
@@ -1405,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1405
1307
|
// Pick file to process based on "arg"
|
1406
1308
|
CHECK_GE(arg, 0);
|
1407
1309
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1408
|
-
string contents =
|
1409
|
-
|
1310
|
+
std::string contents =
|
1311
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1410
1312
|
|
1411
|
-
string zcontents;
|
1313
|
+
std::string zcontents;
|
1412
1314
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1413
1315
|
|
1414
1316
|
// Uncompress into an iovec containing ten entries.
|
@@ -1451,10 +1353,10 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1451
1353
|
// Pick file to process based on "arg"
|
1452
1354
|
CHECK_GE(arg, 0);
|
1453
1355
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1454
|
-
string contents =
|
1455
|
-
|
1356
|
+
std::string contents =
|
1357
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1456
1358
|
|
1457
|
-
string zcontents;
|
1359
|
+
std::string zcontents;
|
1458
1360
|
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1459
1361
|
char* dst = new char[contents.size()];
|
1460
1362
|
|
@@ -1469,7 +1371,7 @@ static void BM_UFlatSink(int iters, int arg) {
|
|
1469
1371
|
}
|
1470
1372
|
StopBenchmarkTiming();
|
1471
1373
|
|
1472
|
-
string s(dst, contents.size());
|
1374
|
+
std::string s(dst, contents.size());
|
1473
1375
|
CHECK_EQ(contents, s);
|
1474
1376
|
|
1475
1377
|
delete[] dst;
|
@@ -1483,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1483
1385
|
// Pick file to process based on "arg"
|
1484
1386
|
CHECK_GE(arg, 0);
|
1485
1387
|
CHECK_LT(arg, ARRAYSIZE(files));
|
1486
|
-
string contents =
|
1487
|
-
|
1388
|
+
std::string contents =
|
1389
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1488
1390
|
|
1489
1391
|
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1490
1392
|
|
@@ -1499,16 +1401,89 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1499
1401
|
StopBenchmarkTiming();
|
1500
1402
|
const double compression_ratio =
|
1501
1403
|
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1502
|
-
SetBenchmarkLabel(
|
1503
|
-
|
1504
|
-
VLOG(0) <<
|
1505
|
-
|
1404
|
+
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
+
100.0 * compression_ratio));
|
1406
|
+
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
+
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
+
static_cast<int>(zsize));
|
1506
1409
|
delete[] dst;
|
1507
1410
|
}
|
1508
1411
|
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1509
1412
|
|
1510
|
-
|
1413
|
+
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
+
StopBenchmarkTiming();
|
1415
|
+
|
1416
|
+
CHECK_EQ(arg, 0);
|
1417
|
+
const int num_files = ARRAYSIZE(files);
|
1418
|
+
|
1419
|
+
std::vector<std::string> contents(num_files);
|
1420
|
+
std::vector<char*> dst(num_files);
|
1421
|
+
|
1422
|
+
int64 total_contents_size = 0;
|
1423
|
+
for (int i = 0; i < num_files; ++i) {
|
1424
|
+
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
+
total_contents_size += contents[i].size();
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
+
StartBenchmarkTiming();
|
1431
|
+
|
1432
|
+
size_t zsize = 0;
|
1433
|
+
while (iters-- > 0) {
|
1434
|
+
for (int i = 0; i < num_files; ++i) {
|
1435
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
+
&zsize);
|
1437
|
+
}
|
1438
|
+
}
|
1439
|
+
StopBenchmarkTiming();
|
1511
1440
|
|
1441
|
+
for (int i = 0; i < num_files; ++i) {
|
1442
|
+
delete[] dst[i];
|
1443
|
+
}
|
1444
|
+
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
+
}
|
1446
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
+
|
1448
|
+
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
+
StopBenchmarkTiming();
|
1450
|
+
|
1451
|
+
CHECK_EQ(arg, 0);
|
1452
|
+
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
+
const std::string base_content =
|
1454
|
+
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
+
|
1456
|
+
std::vector<std::string> contents;
|
1457
|
+
std::vector<char*> dst;
|
1458
|
+
int64 total_contents_size = 0;
|
1459
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
+
++table_bits) {
|
1461
|
+
std::string content = base_content;
|
1462
|
+
content.resize(1 << table_bits);
|
1463
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
+
total_contents_size += content.size();
|
1465
|
+
contents.push_back(std::move(content));
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
size_t zsize = 0;
|
1469
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
+
StartBenchmarkTiming();
|
1471
|
+
while (iters-- > 0) {
|
1472
|
+
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
+
&zsize);
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
StopBenchmarkTiming();
|
1478
|
+
|
1479
|
+
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
+
delete[] dst[i];
|
1481
|
+
}
|
1482
|
+
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
+
}
|
1484
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1485
|
+
|
1486
|
+
} // namespace snappy
|
1512
1487
|
|
1513
1488
|
int main(int argc, char** argv) {
|
1514
1489
|
InitGoogle(argv[0], &argc, &argv, true);
|
@@ -1517,11 +1492,11 @@ int main(int argc, char** argv) {
|
|
1517
1492
|
if (argc >= 2) {
|
1518
1493
|
for (int arg = 1; arg < argc; arg++) {
|
1519
1494
|
if (FLAGS_write_compressed) {
|
1520
|
-
CompressFile(argv[arg]);
|
1495
|
+
snappy::CompressFile(argv[arg]);
|
1521
1496
|
} else if (FLAGS_write_uncompressed) {
|
1522
|
-
UncompressFile(argv[arg]);
|
1497
|
+
snappy::UncompressFile(argv[arg]);
|
1523
1498
|
} else {
|
1524
|
-
MeasureFile(argv[arg]);
|
1499
|
+
snappy::MeasureFile(argv[arg]);
|
1525
1500
|
}
|
1526
1501
|
}
|
1527
1502
|
return 0;
|