snappy 0.0.17 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/Gemfile +4 -0
- data/README.md +45 -5
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +31 -22
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/reader.rb +14 -10
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy.rb +5 -4
- data/snappy.gemspec +14 -13
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +420 -0
- data/vendor/snappy/CONTRIBUTING.md +31 -0
- data/vendor/snappy/NEWS +52 -0
- data/vendor/snappy/{README → README.md} +75 -49
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +66 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +200 -32
- data/vendor/snappy/snappy-sinksource.cc +26 -9
- data/vendor/snappy/snappy-sinksource.h +11 -11
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +299 -302
- data/vendor/snappy/snappy-stubs-public.h.in +10 -47
- data/vendor/snappy/snappy-test.cc +94 -200
- data/vendor/snappy/snappy-test.h +101 -358
- data/vendor/snappy/snappy.cc +1437 -474
- data/vendor/snappy/snappy.h +31 -12
- data/vendor/snappy/snappy_benchmark.cc +378 -0
- data/vendor/snappy/snappy_compress_fuzzer.cc +60 -0
- data/vendor/snappy/snappy_test_data.cc +57 -0
- data/vendor/snappy/snappy_test_data.h +68 -0
- data/vendor/snappy/snappy_test_tool.cc +471 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +58 -0
- data/vendor/snappy/snappy_unittest.cc +271 -792
- metadata +42 -92
- data/.travis.yml +0 -26
- data/smoke.sh +0 -8
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/INSTALL +0 -370
- data/vendor/snappy/Makefile +0 -982
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/Makefile.in +0 -982
- data/vendor/snappy/aclocal.m4 +0 -9738
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/autom4te.cache/output.0 +0 -18856
- data/vendor/snappy/autom4te.cache/output.1 +0 -18852
- data/vendor/snappy/autom4te.cache/requests +0 -297
- data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
- data/vendor/snappy/autom4te.cache/traces.1 +0 -714
- data/vendor/snappy/config.guess +0 -1530
- data/vendor/snappy/config.h +0 -135
- data/vendor/snappy/config.h.in +0 -134
- data/vendor/snappy/config.log +0 -1640
- data/vendor/snappy/config.status +0 -2318
- data/vendor/snappy/config.sub +0 -1773
- data/vendor/snappy/configure +0 -18852
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/depcomp +0 -688
- data/vendor/snappy/install-sh +0 -527
- data/vendor/snappy/libtool +0 -10246
- data/vendor/snappy/ltmain.sh +0 -9661
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/m4/libtool.m4 +0 -8001
- data/vendor/snappy/m4/ltoptions.m4 +0 -384
- data/vendor/snappy/m4/ltsugar.m4 +0 -123
- data/vendor/snappy/m4/ltversion.m4 +0 -23
- data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
- data/vendor/snappy/missing +0 -331
- data/vendor/snappy/snappy-stubs-public.h +0 -100
- data/vendor/snappy/snappy.pc +0 -10
- data/vendor/snappy/snappy.pc.in +0 -10
- data/vendor/snappy/stamp-h1 +0 -1
@@ -26,51 +26,31 @@
|
|
26
26
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
28
|
|
29
|
-
#include <math.h>
|
30
|
-
#include <stdlib.h>
|
31
|
-
|
32
|
-
|
33
29
|
#include <algorithm>
|
30
|
+
#include <cmath>
|
31
|
+
#include <cstdlib>
|
32
|
+
#include <random>
|
34
33
|
#include <string>
|
34
|
+
#include <utility>
|
35
35
|
#include <vector>
|
36
36
|
|
37
|
-
#include "snappy.h"
|
38
|
-
#include "snappy-internal.h"
|
39
37
|
#include "snappy-test.h"
|
38
|
+
|
39
|
+
#include "gtest/gtest.h"
|
40
|
+
|
41
|
+
#include "snappy-internal.h"
|
40
42
|
#include "snappy-sinksource.h"
|
43
|
+
#include "snappy.h"
|
44
|
+
#include "snappy_test_data.h"
|
41
45
|
|
42
|
-
|
43
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
44
|
-
DEFINE_int32(end_len, -1,
|
45
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
46
|
-
DEFINE_int32(bytes, 10485760,
|
47
|
-
"How many bytes to compress/uncompress per file for timing");
|
48
|
-
|
49
|
-
DEFINE_bool(zlib, false,
|
50
|
-
"Run zlib compression (http://www.zlib.net)");
|
51
|
-
DEFINE_bool(lzo, false,
|
52
|
-
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
53
|
-
DEFINE_bool(quicklz, false,
|
54
|
-
"Run quickLZ compression (http://www.quicklz.com/)");
|
55
|
-
DEFINE_bool(liblzf, false,
|
56
|
-
"Run libLZF compression "
|
57
|
-
"(http://www.goof.com/pcg/marc/liblzf.html)");
|
58
|
-
DEFINE_bool(fastlz, false,
|
59
|
-
"Run FastLZ compression (http://www.fastlz.org/");
|
60
|
-
DEFINE_bool(snappy, true, "Run snappy compression");
|
61
|
-
|
62
|
-
DEFINE_bool(write_compressed, false,
|
63
|
-
"Write compressed versions of each file to <file>.comp");
|
64
|
-
DEFINE_bool(write_uncompressed, false,
|
65
|
-
"Write uncompressed versions of each file to <file>.uncomp");
|
66
|
-
|
67
|
-
DEFINE_bool(snappy_dump_decompression_table, false,
|
46
|
+
SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
|
68
47
|
"If true, we print the decompression table during tests.");
|
69
48
|
|
70
49
|
namespace snappy {
|
71
50
|
|
51
|
+
namespace {
|
72
52
|
|
73
|
-
#
|
53
|
+
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
|
74
54
|
|
75
55
|
// To test against code that reads beyond its input, this class copies a
|
76
56
|
// string to a newly allocated group of pages, the last of which
|
@@ -80,8 +60,8 @@ namespace snappy {
|
|
80
60
|
// be able to read previously allocated memory while doing heap allocations.
|
81
61
|
class DataEndingAtUnreadablePage {
|
82
62
|
public:
|
83
|
-
explicit DataEndingAtUnreadablePage(const string& s) {
|
84
|
-
const size_t page_size =
|
63
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
64
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
85
65
|
const size_t size = s.size();
|
86
66
|
// Round up space for string to a multiple of page_size.
|
87
67
|
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
|
@@ -91,7 +71,7 @@ class DataEndingAtUnreadablePage {
|
|
91
71
|
CHECK_NE(MAP_FAILED, mem_);
|
92
72
|
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
|
93
73
|
char* dst = protected_page_ - size;
|
94
|
-
memcpy(dst, s.data(), size);
|
74
|
+
std::memcpy(dst, s.data(), size);
|
95
75
|
data_ = dst;
|
96
76
|
size_ = size;
|
97
77
|
// Make guard page unreadable.
|
@@ -99,8 +79,9 @@ class DataEndingAtUnreadablePage {
|
|
99
79
|
}
|
100
80
|
|
101
81
|
~DataEndingAtUnreadablePage() {
|
82
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
102
83
|
// Undo the mprotect.
|
103
|
-
CHECK_EQ(0, mprotect(protected_page_,
|
84
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
|
104
85
|
CHECK_EQ(0, munmap(mem_, alloc_size_));
|
105
86
|
}
|
106
87
|
|
@@ -115,368 +96,15 @@ class DataEndingAtUnreadablePage {
|
|
115
96
|
size_t size_;
|
116
97
|
};
|
117
98
|
|
118
|
-
#else // HAVE_FUNC_MMAP
|
99
|
+
#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
|
119
100
|
|
120
101
|
// Fallback for systems without mmap.
|
121
|
-
|
102
|
+
using DataEndingAtUnreadablePage = std::string;
|
122
103
|
|
123
104
|
#endif
|
124
105
|
|
125
|
-
|
126
|
-
|
127
|
-
};
|
128
|
-
|
129
|
-
const char* names[] = {
|
130
|
-
"ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
|
131
|
-
};
|
132
|
-
|
133
|
-
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
134
|
-
CompressorType comp) {
|
135
|
-
switch (comp) {
|
136
|
-
#ifdef ZLIB_VERSION
|
137
|
-
case ZLIB:
|
138
|
-
return ZLib::MinCompressbufSize(input_size);
|
139
|
-
#endif // ZLIB_VERSION
|
140
|
-
|
141
|
-
#ifdef LZO_VERSION
|
142
|
-
case LZO:
|
143
|
-
return input_size + input_size/64 + 16 + 3;
|
144
|
-
#endif // LZO_VERSION
|
145
|
-
|
146
|
-
#ifdef LZF_VERSION
|
147
|
-
case LIBLZF:
|
148
|
-
return input_size;
|
149
|
-
#endif // LZF_VERSION
|
150
|
-
|
151
|
-
#ifdef QLZ_VERSION_MAJOR
|
152
|
-
case QUICKLZ:
|
153
|
-
return input_size + 36000; // 36000 is used for scratch.
|
154
|
-
#endif // QLZ_VERSION_MAJOR
|
155
|
-
|
156
|
-
#ifdef FASTLZ_VERSION
|
157
|
-
case FASTLZ:
|
158
|
-
return max(static_cast<int>(ceil(input_size * 1.05)), 66);
|
159
|
-
#endif // FASTLZ_VERSION
|
160
|
-
|
161
|
-
case SNAPPY:
|
162
|
-
return snappy::MaxCompressedLength(input_size);
|
163
|
-
|
164
|
-
default:
|
165
|
-
LOG(FATAL) << "Unknown compression type number " << comp;
|
166
|
-
return 0;
|
167
|
-
}
|
168
|
-
}
|
169
|
-
|
170
|
-
// Returns true if we successfully compressed, false otherwise.
|
171
|
-
//
|
172
|
-
// If compressed_is_preallocated is set, do not resize the compressed buffer.
|
173
|
-
// This is typically what you want for a benchmark, in order to not spend
|
174
|
-
// time in the memory allocator. If you do set this flag, however,
|
175
|
-
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
176
|
-
// number of bytes, and may contain junk bytes at the end after return.
|
177
|
-
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
178
|
-
string* compressed, bool compressed_is_preallocated) {
|
179
|
-
if (!compressed_is_preallocated) {
|
180
|
-
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
181
|
-
}
|
182
|
-
|
183
|
-
switch (comp) {
|
184
|
-
#ifdef ZLIB_VERSION
|
185
|
-
case ZLIB: {
|
186
|
-
ZLib zlib;
|
187
|
-
uLongf destlen = compressed->size();
|
188
|
-
int ret = zlib.Compress(
|
189
|
-
reinterpret_cast<Bytef*>(string_as_array(compressed)),
|
190
|
-
&destlen,
|
191
|
-
reinterpret_cast<const Bytef*>(input),
|
192
|
-
input_size);
|
193
|
-
CHECK_EQ(Z_OK, ret);
|
194
|
-
if (!compressed_is_preallocated) {
|
195
|
-
compressed->resize(destlen);
|
196
|
-
}
|
197
|
-
return true;
|
198
|
-
}
|
199
|
-
#endif // ZLIB_VERSION
|
200
|
-
|
201
|
-
#ifdef LZO_VERSION
|
202
|
-
case LZO: {
|
203
|
-
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
|
204
|
-
lzo_uint destlen;
|
205
|
-
int ret = lzo1x_1_15_compress(
|
206
|
-
reinterpret_cast<const uint8*>(input),
|
207
|
-
input_size,
|
208
|
-
reinterpret_cast<uint8*>(string_as_array(compressed)),
|
209
|
-
&destlen,
|
210
|
-
mem);
|
211
|
-
CHECK_EQ(LZO_E_OK, ret);
|
212
|
-
delete[] mem;
|
213
|
-
if (!compressed_is_preallocated) {
|
214
|
-
compressed->resize(destlen);
|
215
|
-
}
|
216
|
-
break;
|
217
|
-
}
|
218
|
-
#endif // LZO_VERSION
|
219
|
-
|
220
|
-
#ifdef LZF_VERSION
|
221
|
-
case LIBLZF: {
|
222
|
-
int destlen = lzf_compress(input,
|
223
|
-
input_size,
|
224
|
-
string_as_array(compressed),
|
225
|
-
input_size);
|
226
|
-
if (destlen == 0) {
|
227
|
-
// lzf *can* cause lots of blowup when compressing, so they
|
228
|
-
// recommend to limit outsize to insize, and just not compress
|
229
|
-
// if it's bigger. Ideally, we'd just swap input and output.
|
230
|
-
compressed->assign(input, input_size);
|
231
|
-
destlen = input_size;
|
232
|
-
}
|
233
|
-
if (!compressed_is_preallocated) {
|
234
|
-
compressed->resize(destlen);
|
235
|
-
}
|
236
|
-
break;
|
237
|
-
}
|
238
|
-
#endif // LZF_VERSION
|
239
|
-
|
240
|
-
#ifdef QLZ_VERSION_MAJOR
|
241
|
-
case QUICKLZ: {
|
242
|
-
qlz_state_compress *state_compress = new qlz_state_compress;
|
243
|
-
int destlen = qlz_compress(input,
|
244
|
-
string_as_array(compressed),
|
245
|
-
input_size,
|
246
|
-
state_compress);
|
247
|
-
delete state_compress;
|
248
|
-
CHECK_NE(0, destlen);
|
249
|
-
if (!compressed_is_preallocated) {
|
250
|
-
compressed->resize(destlen);
|
251
|
-
}
|
252
|
-
break;
|
253
|
-
}
|
254
|
-
#endif // QLZ_VERSION_MAJOR
|
255
|
-
|
256
|
-
#ifdef FASTLZ_VERSION
|
257
|
-
case FASTLZ: {
|
258
|
-
// Use level 1 compression since we mostly care about speed.
|
259
|
-
int destlen = fastlz_compress_level(
|
260
|
-
1,
|
261
|
-
input,
|
262
|
-
input_size,
|
263
|
-
string_as_array(compressed));
|
264
|
-
if (!compressed_is_preallocated) {
|
265
|
-
compressed->resize(destlen);
|
266
|
-
}
|
267
|
-
CHECK_NE(destlen, 0);
|
268
|
-
break;
|
269
|
-
}
|
270
|
-
#endif // FASTLZ_VERSION
|
271
|
-
|
272
|
-
case SNAPPY: {
|
273
|
-
size_t destlen;
|
274
|
-
snappy::RawCompress(input, input_size,
|
275
|
-
string_as_array(compressed),
|
276
|
-
&destlen);
|
277
|
-
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
|
278
|
-
if (!compressed_is_preallocated) {
|
279
|
-
compressed->resize(destlen);
|
280
|
-
}
|
281
|
-
break;
|
282
|
-
}
|
283
|
-
|
284
|
-
default: {
|
285
|
-
return false; // the asked-for library wasn't compiled in
|
286
|
-
}
|
287
|
-
}
|
288
|
-
return true;
|
289
|
-
}
|
290
|
-
|
291
|
-
static bool Uncompress(const string& compressed, CompressorType comp,
|
292
|
-
int size, string* output) {
|
293
|
-
switch (comp) {
|
294
|
-
#ifdef ZLIB_VERSION
|
295
|
-
case ZLIB: {
|
296
|
-
output->resize(size);
|
297
|
-
ZLib zlib;
|
298
|
-
uLongf destlen = output->size();
|
299
|
-
int ret = zlib.Uncompress(
|
300
|
-
reinterpret_cast<Bytef*>(string_as_array(output)),
|
301
|
-
&destlen,
|
302
|
-
reinterpret_cast<const Bytef*>(compressed.data()),
|
303
|
-
compressed.size());
|
304
|
-
CHECK_EQ(Z_OK, ret);
|
305
|
-
CHECK_EQ(static_cast<uLongf>(size), destlen);
|
306
|
-
break;
|
307
|
-
}
|
308
|
-
#endif // ZLIB_VERSION
|
309
|
-
|
310
|
-
#ifdef LZO_VERSION
|
311
|
-
case LZO: {
|
312
|
-
output->resize(size);
|
313
|
-
lzo_uint destlen;
|
314
|
-
int ret = lzo1x_decompress(
|
315
|
-
reinterpret_cast<const uint8*>(compressed.data()),
|
316
|
-
compressed.size(),
|
317
|
-
reinterpret_cast<uint8*>(string_as_array(output)),
|
318
|
-
&destlen,
|
319
|
-
NULL);
|
320
|
-
CHECK_EQ(LZO_E_OK, ret);
|
321
|
-
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
|
322
|
-
break;
|
323
|
-
}
|
324
|
-
#endif // LZO_VERSION
|
325
|
-
|
326
|
-
#ifdef LZF_VERSION
|
327
|
-
case LIBLZF: {
|
328
|
-
output->resize(size);
|
329
|
-
int destlen = lzf_decompress(compressed.data(),
|
330
|
-
compressed.size(),
|
331
|
-
string_as_array(output),
|
332
|
-
output->size());
|
333
|
-
if (destlen == 0) {
|
334
|
-
// This error probably means we had decided not to compress,
|
335
|
-
// and thus have stored input in output directly.
|
336
|
-
output->assign(compressed.data(), compressed.size());
|
337
|
-
destlen = compressed.size();
|
338
|
-
}
|
339
|
-
CHECK_EQ(destlen, size);
|
340
|
-
break;
|
341
|
-
}
|
342
|
-
#endif // LZF_VERSION
|
343
|
-
|
344
|
-
#ifdef QLZ_VERSION_MAJOR
|
345
|
-
case QUICKLZ: {
|
346
|
-
output->resize(size);
|
347
|
-
qlz_state_decompress *state_decompress = new qlz_state_decompress;
|
348
|
-
int destlen = qlz_decompress(compressed.data(),
|
349
|
-
string_as_array(output),
|
350
|
-
state_decompress);
|
351
|
-
delete state_decompress;
|
352
|
-
CHECK_EQ(destlen, size);
|
353
|
-
break;
|
354
|
-
}
|
355
|
-
#endif // QLZ_VERSION_MAJOR
|
356
|
-
|
357
|
-
#ifdef FASTLZ_VERSION
|
358
|
-
case FASTLZ: {
|
359
|
-
output->resize(size);
|
360
|
-
int destlen = fastlz_decompress(compressed.data(),
|
361
|
-
compressed.length(),
|
362
|
-
string_as_array(output),
|
363
|
-
size);
|
364
|
-
CHECK_EQ(destlen, size);
|
365
|
-
break;
|
366
|
-
}
|
367
|
-
#endif // FASTLZ_VERSION
|
368
|
-
|
369
|
-
case SNAPPY: {
|
370
|
-
snappy::RawUncompress(compressed.data(), compressed.size(),
|
371
|
-
string_as_array(output));
|
372
|
-
break;
|
373
|
-
}
|
374
|
-
|
375
|
-
default: {
|
376
|
-
return false; // the asked-for library wasn't compiled in
|
377
|
-
}
|
378
|
-
}
|
379
|
-
return true;
|
380
|
-
}
|
381
|
-
|
382
|
-
static void Measure(const char* data,
|
383
|
-
size_t length,
|
384
|
-
CompressorType comp,
|
385
|
-
int repeats,
|
386
|
-
int block_size) {
|
387
|
-
// Run tests a few time and pick median running times
|
388
|
-
static const int kRuns = 5;
|
389
|
-
double ctime[kRuns];
|
390
|
-
double utime[kRuns];
|
391
|
-
int compressed_size = 0;
|
392
|
-
|
393
|
-
{
|
394
|
-
// Chop the input into blocks
|
395
|
-
int num_blocks = (length + block_size - 1) / block_size;
|
396
|
-
std::vector<const char*> input(num_blocks);
|
397
|
-
std::vector<size_t> input_length(num_blocks);
|
398
|
-
std::vector<string> compressed(num_blocks);
|
399
|
-
std::vector<string> output(num_blocks);
|
400
|
-
for (int b = 0; b < num_blocks; b++) {
|
401
|
-
int input_start = b * block_size;
|
402
|
-
int input_limit = min<int>((b+1)*block_size, length);
|
403
|
-
input[b] = data+input_start;
|
404
|
-
input_length[b] = input_limit-input_start;
|
405
|
-
|
406
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
407
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
408
|
-
}
|
409
|
-
|
410
|
-
// First, try one trial compression to make sure the code is compiled in
|
411
|
-
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
|
412
|
-
LOG(WARNING) << "Skipping " << names[comp] << ": "
|
413
|
-
<< "library not compiled in";
|
414
|
-
return;
|
415
|
-
}
|
416
|
-
|
417
|
-
for (int run = 0; run < kRuns; run++) {
|
418
|
-
CycleTimer ctimer, utimer;
|
419
|
-
|
420
|
-
for (int b = 0; b < num_blocks; b++) {
|
421
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
422
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
423
|
-
}
|
424
|
-
|
425
|
-
ctimer.Start();
|
426
|
-
for (int b = 0; b < num_blocks; b++)
|
427
|
-
for (int i = 0; i < repeats; i++)
|
428
|
-
Compress(input[b], input_length[b], comp, &compressed[b], true);
|
429
|
-
ctimer.Stop();
|
430
|
-
|
431
|
-
// Compress once more, with resizing, so we don't leave junk
|
432
|
-
// at the end that will confuse the decompressor.
|
433
|
-
for (int b = 0; b < num_blocks; b++) {
|
434
|
-
Compress(input[b], input_length[b], comp, &compressed[b], false);
|
435
|
-
}
|
436
|
-
|
437
|
-
for (int b = 0; b < num_blocks; b++) {
|
438
|
-
output[b].resize(input_length[b]);
|
439
|
-
}
|
440
|
-
|
441
|
-
utimer.Start();
|
442
|
-
for (int i = 0; i < repeats; i++)
|
443
|
-
for (int b = 0; b < num_blocks; b++)
|
444
|
-
Uncompress(compressed[b], comp, input_length[b], &output[b]);
|
445
|
-
utimer.Stop();
|
446
|
-
|
447
|
-
ctime[run] = ctimer.Get();
|
448
|
-
utime[run] = utimer.Get();
|
449
|
-
}
|
450
|
-
|
451
|
-
compressed_size = 0;
|
452
|
-
for (size_t i = 0; i < compressed.size(); i++) {
|
453
|
-
compressed_size += compressed[i].size();
|
454
|
-
}
|
455
|
-
}
|
456
|
-
|
457
|
-
sort(ctime, ctime + kRuns);
|
458
|
-
sort(utime, utime + kRuns);
|
459
|
-
const int med = kRuns/2;
|
460
|
-
|
461
|
-
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
462
|
-
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
463
|
-
string x = names[comp];
|
464
|
-
x += ":";
|
465
|
-
string urate = (uncomp_rate >= 0)
|
466
|
-
? StringPrintf("%.1f", uncomp_rate)
|
467
|
-
: string("?");
|
468
|
-
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
469
|
-
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
470
|
-
x.c_str(),
|
471
|
-
block_size/(1<<20),
|
472
|
-
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
473
|
-
(compressed_size * 100.0) / max<int>(1, length),
|
474
|
-
comp_rate,
|
475
|
-
urate.c_str());
|
476
|
-
}
|
477
|
-
|
478
|
-
static int VerifyString(const string& input) {
|
479
|
-
string compressed;
|
106
|
+
int VerifyString(const std::string& input) {
|
107
|
+
std::string compressed;
|
480
108
|
DataEndingAtUnreadablePage i(input);
|
481
109
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
482
110
|
CHECK_EQ(written, compressed.size());
|
@@ -484,15 +112,15 @@ static int VerifyString(const string& input) {
|
|
484
112
|
snappy::MaxCompressedLength(input.size()));
|
485
113
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
486
114
|
|
487
|
-
string uncompressed;
|
115
|
+
std::string uncompressed;
|
488
116
|
DataEndingAtUnreadablePage c(compressed);
|
489
117
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
490
118
|
CHECK_EQ(uncompressed, input);
|
491
119
|
return uncompressed.size();
|
492
120
|
}
|
493
121
|
|
494
|
-
|
495
|
-
string compressed;
|
122
|
+
void VerifyStringSink(const std::string& input) {
|
123
|
+
std::string compressed;
|
496
124
|
DataEndingAtUnreadablePage i(input);
|
497
125
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
498
126
|
CHECK_EQ(written, compressed.size());
|
@@ -500,7 +128,7 @@ static void VerifyStringSink(const string& input) {
|
|
500
128
|
snappy::MaxCompressedLength(input.size()));
|
501
129
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
502
130
|
|
503
|
-
string uncompressed;
|
131
|
+
std::string uncompressed;
|
504
132
|
uncompressed.resize(input.size());
|
505
133
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
506
134
|
DataEndingAtUnreadablePage c(compressed);
|
@@ -509,41 +137,67 @@ static void VerifyStringSink(const string& input) {
|
|
509
137
|
CHECK_EQ(uncompressed, input);
|
510
138
|
}
|
511
139
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
CHECK_EQ(written, compressed.size());
|
517
|
-
CHECK_LE(compressed.size(),
|
518
|
-
snappy::MaxCompressedLength(input.size()));
|
519
|
-
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
520
|
-
|
521
|
-
// Try uncompressing into an iovec containing a random number of entries
|
522
|
-
// ranging from 1 to 10.
|
523
|
-
char* buf = new char[input.size()];
|
524
|
-
ACMRandom rnd(input.size());
|
525
|
-
size_t num = rnd.Next() % 10 + 1;
|
140
|
+
struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
|
141
|
+
std::minstd_rand0 rng(input.size());
|
142
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
143
|
+
num = uniform_1_to_10(rng);
|
526
144
|
if (input.size() < num) {
|
527
145
|
num = input.size();
|
528
146
|
}
|
529
147
|
struct iovec* iov = new iovec[num];
|
530
|
-
|
148
|
+
size_t used_so_far = 0;
|
149
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
531
150
|
for (size_t i = 0; i < num; ++i) {
|
151
|
+
assert(used_so_far < input.size());
|
532
152
|
iov[i].iov_base = buf + used_so_far;
|
533
153
|
if (i == num - 1) {
|
534
154
|
iov[i].iov_len = input.size() - used_so_far;
|
535
155
|
} else {
|
536
156
|
// Randomly choose to insert a 0 byte entry.
|
537
|
-
if (
|
157
|
+
if (one_in_five(rng)) {
|
538
158
|
iov[i].iov_len = 0;
|
539
159
|
} else {
|
540
|
-
|
160
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
161
|
+
0, input.size() - used_so_far - 1);
|
162
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
541
163
|
}
|
542
164
|
}
|
543
165
|
used_so_far += iov[i].iov_len;
|
544
166
|
}
|
545
|
-
|
546
|
-
|
167
|
+
return iov;
|
168
|
+
}
|
169
|
+
|
170
|
+
int VerifyIOVecSource(const std::string& input) {
|
171
|
+
std::string compressed;
|
172
|
+
std::string copy = input;
|
173
|
+
char* buf = const_cast<char*>(copy.data());
|
174
|
+
size_t num = 0;
|
175
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
176
|
+
const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
|
177
|
+
CHECK_EQ(written, compressed.size());
|
178
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
179
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
180
|
+
|
181
|
+
std::string uncompressed;
|
182
|
+
DataEndingAtUnreadablePage c(compressed);
|
183
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
184
|
+
CHECK_EQ(uncompressed, input);
|
185
|
+
delete[] iov;
|
186
|
+
return uncompressed.size();
|
187
|
+
}
|
188
|
+
|
189
|
+
void VerifyIOVecSink(const std::string& input) {
|
190
|
+
std::string compressed;
|
191
|
+
DataEndingAtUnreadablePage i(input);
|
192
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
193
|
+
CHECK_EQ(written, compressed.size());
|
194
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
195
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
196
|
+
char* buf = new char[input.size()];
|
197
|
+
size_t num = 0;
|
198
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
199
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
|
200
|
+
num));
|
547
201
|
CHECK(!memcmp(buf, input.data(), input.size()));
|
548
202
|
delete[] iov;
|
549
203
|
delete[] buf;
|
@@ -551,22 +205,22 @@ static void VerifyIOVec(const string& input) {
|
|
551
205
|
|
552
206
|
// Test that data compressed by a compressor that does not
|
553
207
|
// obey block sizes is uncompressed properly.
|
554
|
-
|
208
|
+
void VerifyNonBlockedCompression(const std::string& input) {
|
555
209
|
if (input.length() > snappy::kBlockSize) {
|
556
210
|
// We cannot test larger blocks than the maximum block size, obviously.
|
557
211
|
return;
|
558
212
|
}
|
559
213
|
|
560
|
-
string prefix;
|
214
|
+
std::string prefix;
|
561
215
|
Varint::Append32(&prefix, input.size());
|
562
216
|
|
563
217
|
// Setup compression table
|
564
|
-
snappy::internal::WorkingMemory wmem;
|
218
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
565
219
|
int table_size;
|
566
|
-
|
220
|
+
uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
|
567
221
|
|
568
222
|
// Compress entire input in one shot
|
569
|
-
string compressed;
|
223
|
+
std::string compressed;
|
570
224
|
compressed += prefix;
|
571
225
|
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
572
226
|
char* dest = string_as_array(&compressed) + prefix.size();
|
@@ -574,13 +228,13 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
574
228
|
dest, table, table_size);
|
575
229
|
compressed.resize(end - compressed.data());
|
576
230
|
|
577
|
-
// Uncompress into string
|
578
|
-
string uncomp_str;
|
231
|
+
// Uncompress into std::string
|
232
|
+
std::string uncomp_str;
|
579
233
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
580
234
|
CHECK_EQ(uncomp_str, input);
|
581
235
|
|
582
236
|
// Uncompress using source/sink
|
583
|
-
string uncomp_str2;
|
237
|
+
std::string uncomp_str2;
|
584
238
|
uncomp_str2.resize(input.size());
|
585
239
|
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
586
240
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
@@ -592,62 +246,64 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
592
246
|
static const int kNumBlocks = 10;
|
593
247
|
struct iovec vec[kNumBlocks];
|
594
248
|
const int block_size = 1 + input.size() / kNumBlocks;
|
595
|
-
string iovec_data(block_size * kNumBlocks, 'x');
|
596
|
-
for (int i = 0; i < kNumBlocks; i
|
249
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
250
|
+
for (int i = 0; i < kNumBlocks; ++i) {
|
597
251
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
598
252
|
vec[i].iov_len = block_size;
|
599
253
|
}
|
600
254
|
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
601
255
|
vec, kNumBlocks));
|
602
|
-
CHECK_EQ(string(iovec_data.data(), input.size()), input);
|
256
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
603
257
|
}
|
604
258
|
}
|
605
259
|
|
606
260
|
// Expand the input so that it is at least K times as big as block size
|
607
|
-
|
261
|
+
std::string Expand(const std::string& input) {
|
608
262
|
static const int K = 3;
|
609
|
-
string data = input;
|
263
|
+
std::string data = input;
|
610
264
|
while (data.size() < K * snappy::kBlockSize) {
|
611
265
|
data += input;
|
612
266
|
}
|
613
267
|
return data;
|
614
268
|
}
|
615
269
|
|
616
|
-
|
270
|
+
int Verify(const std::string& input) {
|
617
271
|
VLOG(1) << "Verifying input of size " << input.size();
|
618
272
|
|
619
273
|
// Compress using string based routines
|
620
274
|
const int result = VerifyString(input);
|
621
275
|
|
276
|
+
// Compress using `iovec`-based routines.
|
277
|
+
CHECK_EQ(VerifyIOVecSource(input), result);
|
278
|
+
|
622
279
|
// Verify using sink based routines
|
623
280
|
VerifyStringSink(input);
|
624
281
|
|
625
282
|
VerifyNonBlockedCompression(input);
|
626
|
-
|
283
|
+
VerifyIOVecSink(input);
|
627
284
|
if (!input.empty()) {
|
628
|
-
const string expanded = Expand(input);
|
285
|
+
const std::string expanded = Expand(input);
|
629
286
|
VerifyNonBlockedCompression(expanded);
|
630
|
-
|
287
|
+
VerifyIOVecSink(input);
|
631
288
|
}
|
632
289
|
|
633
290
|
return result;
|
634
291
|
}
|
635
292
|
|
636
|
-
|
637
|
-
static bool IsValidCompressedBuffer(const string& c) {
|
293
|
+
bool IsValidCompressedBuffer(const std::string& c) {
|
638
294
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
639
295
|
}
|
640
|
-
|
296
|
+
bool Uncompress(const std::string& c, std::string* u) {
|
641
297
|
return snappy::Uncompress(c.data(), c.size(), u);
|
642
298
|
}
|
643
299
|
|
644
300
|
// This test checks to ensure that snappy doesn't coredump if it gets
|
645
301
|
// corrupted data.
|
646
302
|
TEST(CorruptedTest, VerifyCorrupted) {
|
647
|
-
string source = "making sure we don't crash with corrupted input";
|
303
|
+
std::string source = "making sure we don't crash with corrupted input";
|
648
304
|
VLOG(1) << source;
|
649
|
-
string dest;
|
650
|
-
string uncmp;
|
305
|
+
std::string dest;
|
306
|
+
std::string uncmp;
|
651
307
|
snappy::Compress(source.data(), source.size(), &dest);
|
652
308
|
|
653
309
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -662,8 +318,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
662
318
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
663
319
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
664
320
|
source.resize(100000);
|
665
|
-
for (
|
666
|
-
|
321
|
+
for (char& source_char : source) {
|
322
|
+
source_char = 'A';
|
667
323
|
}
|
668
324
|
snappy::Compress(source.data(), source.size(), &dest);
|
669
325
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
@@ -694,14 +350,14 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
694
350
|
|
695
351
|
// try reading stuff in from a bad file.
|
696
352
|
for (int i = 1; i <= 3; ++i) {
|
697
|
-
string data =
|
698
|
-
|
699
|
-
string uncmp;
|
353
|
+
std::string data =
|
354
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
355
|
+
std::string uncmp;
|
700
356
|
// check that we don't return a crazy length
|
701
357
|
size_t ulen;
|
702
358
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
703
359
|
|| (ulen < (1<<20)));
|
704
|
-
|
360
|
+
uint32_t ulen2;
|
705
361
|
snappy::ByteArraySource source(data.data(), data.size());
|
706
362
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
707
363
|
(ulen2 < (1<<20)));
|
@@ -714,7 +370,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
714
370
|
// These mirror the compression code in snappy.cc, but are copied
|
715
371
|
// here so that we can bypass some limitations in the how snappy.cc
|
716
372
|
// invokes these routines.
|
717
|
-
|
373
|
+
void AppendLiteral(std::string* dst, const std::string& literal) {
|
718
374
|
if (literal.empty()) return;
|
719
375
|
int n = literal.size() - 1;
|
720
376
|
if (n < 60) {
|
@@ -729,12 +385,12 @@ static void AppendLiteral(string* dst, const string& literal) {
|
|
729
385
|
n >>= 8;
|
730
386
|
}
|
731
387
|
dst->push_back(0 | ((59+count) << 2));
|
732
|
-
*dst += string(number, count);
|
388
|
+
*dst += std::string(number, count);
|
733
389
|
}
|
734
390
|
*dst += literal;
|
735
391
|
}
|
736
392
|
|
737
|
-
|
393
|
+
void AppendCopy(std::string* dst, int offset, int length) {
|
738
394
|
while (length > 0) {
|
739
395
|
// Figure out how much to copy in one shot
|
740
396
|
int to_copy;
|
@@ -771,51 +427,102 @@ TEST(Snappy, SimpleTests) {
|
|
771
427
|
Verify("ab");
|
772
428
|
Verify("abc");
|
773
429
|
|
774
|
-
Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
|
775
|
-
Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
|
776
|
-
Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
|
777
|
-
Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
778
|
-
Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
|
430
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
431
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
432
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
433
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
434
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
435
|
+
}
|
436
|
+
|
437
|
+
// Regression test for cr/345340892.
|
438
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
|
439
|
+
Verify("abcabcabcabcabcabcab");
|
440
|
+
Verify("abcabcabcabcabcabcab0123456789ABCDEF");
|
441
|
+
|
442
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
|
443
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
|
444
|
+
}
|
445
|
+
|
446
|
+
// Regression test for cr/345340892.
|
447
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
|
448
|
+
std::mt19937 rng;
|
449
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
450
|
+
for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
|
451
|
+
for (int length = 1; length <= 64; ++length) {
|
452
|
+
for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
|
453
|
+
const int size = pattern_size + length + extra_bytes_after_pattern;
|
454
|
+
std::string input;
|
455
|
+
input.resize(size);
|
456
|
+
for (int i = 0; i < pattern_size; ++i) {
|
457
|
+
input[i] = 'a' + i;
|
458
|
+
}
|
459
|
+
for (int i = 0; i < length; ++i) {
|
460
|
+
input[pattern_size + i] = input[i];
|
461
|
+
}
|
462
|
+
for (int i = 0; i < extra_bytes_after_pattern; ++i) {
|
463
|
+
input[pattern_size + length + i] =
|
464
|
+
static_cast<char>(uniform_byte(rng));
|
465
|
+
}
|
466
|
+
Verify(input);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
}
|
779
470
|
}
|
780
471
|
|
781
472
|
// Verify max blowup (lots of four-byte copies)
|
782
473
|
TEST(Snappy, MaxBlowup) {
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
input.
|
788
|
-
|
789
|
-
for (int i =
|
790
|
-
|
791
|
-
|
792
|
-
input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
|
474
|
+
std::mt19937 rng;
|
475
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
476
|
+
std::string input;
|
477
|
+
for (int i = 0; i < 80000; ++i)
|
478
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
479
|
+
|
480
|
+
for (int i = 0; i < 80000; i += 4) {
|
481
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
482
|
+
input.append(four_bytes);
|
793
483
|
}
|
794
484
|
Verify(input);
|
795
485
|
}
|
796
486
|
|
797
487
|
TEST(Snappy, RandomData) {
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
488
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
489
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
490
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
491
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
492
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
493
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
494
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
495
|
+
|
496
|
+
constexpr int num_ops = 20000;
|
497
|
+
for (int i = 0; i < num_ops; ++i) {
|
802
498
|
if ((i % 1000) == 0) {
|
803
499
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
804
500
|
}
|
805
501
|
|
806
|
-
string x;
|
807
|
-
size_t len =
|
502
|
+
std::string x;
|
503
|
+
size_t len = uniform_4k(rng);
|
808
504
|
if (i < 100) {
|
809
|
-
len = 65536 +
|
505
|
+
len = 65536 + uniform_64k(rng);
|
810
506
|
}
|
811
507
|
while (x.size() < len) {
|
812
508
|
int run_len = 1;
|
813
|
-
if (
|
814
|
-
|
509
|
+
if (one_in_ten(rng)) {
|
510
|
+
int skewed_bits = uniform_0_to_8(rng);
|
511
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
512
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
513
|
+
(1 << skewed_bits) - 1);
|
514
|
+
run_len = skewed_low(rng);
|
515
|
+
}
|
516
|
+
char c = static_cast<char>(uniform_byte(rng));
|
517
|
+
if (i >= 100) {
|
518
|
+
int skewed_bits = uniform_0_to_3(rng);
|
519
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
520
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
521
|
+
(1 << skewed_bits) - 1);
|
522
|
+
c = static_cast<char>(skewed_low(rng));
|
815
523
|
}
|
816
|
-
char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
|
817
524
|
while (run_len-- > 0 && x.size() < len) {
|
818
|
-
x
|
525
|
+
x.push_back(c);
|
819
526
|
}
|
820
527
|
}
|
821
528
|
|
@@ -829,20 +536,20 @@ TEST(Snappy, FourByteOffset) {
|
|
829
536
|
// copy manually.
|
830
537
|
|
831
538
|
// The two fragments that make up the input string.
|
832
|
-
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
833
|
-
string fragment2 = "some other string";
|
539
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
540
|
+
std::string fragment2 = "some other string";
|
834
541
|
|
835
542
|
// How many times each fragment is emitted.
|
836
543
|
const int n1 = 2;
|
837
544
|
const int n2 = 100000 / fragment2.size();
|
838
|
-
const
|
545
|
+
const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
|
839
546
|
|
840
|
-
string compressed;
|
547
|
+
std::string compressed;
|
841
548
|
Varint::Append32(&compressed, length);
|
842
549
|
|
843
550
|
AppendLiteral(&compressed, fragment1);
|
844
|
-
string src = fragment1;
|
845
|
-
for (int i = 0; i < n2; i
|
551
|
+
std::string src = fragment1;
|
552
|
+
for (int i = 0; i < n2; ++i) {
|
846
553
|
AppendLiteral(&compressed, fragment2);
|
847
554
|
src += fragment2;
|
848
555
|
}
|
@@ -850,14 +557,34 @@ TEST(Snappy, FourByteOffset) {
|
|
850
557
|
src += fragment1;
|
851
558
|
CHECK_EQ(length, src.size());
|
852
559
|
|
853
|
-
string uncompressed;
|
560
|
+
std::string uncompressed;
|
854
561
|
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
855
562
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
856
563
|
&uncompressed));
|
857
564
|
CHECK_EQ(uncompressed, src);
|
858
565
|
}
|
859
566
|
|
860
|
-
TEST(Snappy,
|
567
|
+
TEST(Snappy, IOVecSourceEdgeCases) {
|
568
|
+
// Validate that empty leading, trailing, and in-between iovecs are handled:
|
569
|
+
// [] [] ['a'] [] ['b'] [].
|
570
|
+
std::string data = "ab";
|
571
|
+
char* buf = const_cast<char*>(data.data());
|
572
|
+
size_t used_so_far = 0;
|
573
|
+
static const int kLengths[] = {0, 0, 1, 0, 1, 0};
|
574
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
575
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
576
|
+
iov[i].iov_base = buf + used_so_far;
|
577
|
+
iov[i].iov_len = kLengths[i];
|
578
|
+
used_so_far += kLengths[i];
|
579
|
+
}
|
580
|
+
std::string compressed;
|
581
|
+
snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
|
582
|
+
std::string uncompressed;
|
583
|
+
snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
|
584
|
+
CHECK_EQ(data, uncompressed);
|
585
|
+
}
|
586
|
+
|
587
|
+
TEST(Snappy, IOVecSinkEdgeCases) {
|
861
588
|
// Test some tricky edge cases in the iovec output that are not necessarily
|
862
589
|
// exercised by random tests.
|
863
590
|
|
@@ -872,7 +599,7 @@ TEST(Snappy, IOVecEdgeCases) {
|
|
872
599
|
iov[i].iov_len = kLengths[i];
|
873
600
|
}
|
874
601
|
|
875
|
-
string compressed;
|
602
|
+
std::string compressed;
|
876
603
|
Varint::Append32(&compressed, 22);
|
877
604
|
|
878
605
|
// A literal whose output crosses three blocks.
|
@@ -933,7 +660,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
|
|
933
660
|
iov[i].iov_len = kLengths[i];
|
934
661
|
}
|
935
662
|
|
936
|
-
string compressed;
|
663
|
+
std::string compressed;
|
937
664
|
Varint::Append32(&compressed, 8);
|
938
665
|
|
939
666
|
AppendLiteral(&compressed, "12345678");
|
@@ -955,7 +682,7 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
955
682
|
iov[i].iov_len = kLengths[i];
|
956
683
|
}
|
957
684
|
|
958
|
-
string compressed;
|
685
|
+
std::string compressed;
|
959
686
|
Varint::Append32(&compressed, 8);
|
960
687
|
|
961
688
|
AppendLiteral(&compressed, "123");
|
@@ -969,21 +696,20 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
969
696
|
}
|
970
697
|
}
|
971
698
|
|
972
|
-
|
973
|
-
size_t* ulength) {
|
699
|
+
bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
|
974
700
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
975
701
|
compressed.size(),
|
976
702
|
ulength);
|
977
703
|
|
978
704
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
979
|
-
|
705
|
+
uint32_t length;
|
980
706
|
const bool result2 = snappy::GetUncompressedLength(&source, &length);
|
981
707
|
CHECK_EQ(result1, result2);
|
982
708
|
return result1;
|
983
709
|
}
|
984
710
|
|
985
711
|
TEST(SnappyCorruption, TruncatedVarint) {
|
986
|
-
string compressed, uncompressed;
|
712
|
+
std::string compressed, uncompressed;
|
987
713
|
size_t ulength;
|
988
714
|
compressed.push_back('\xf0');
|
989
715
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
@@ -993,7 +719,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
993
719
|
}
|
994
720
|
|
995
721
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
996
|
-
string compressed, uncompressed;
|
722
|
+
std::string compressed, uncompressed;
|
997
723
|
size_t ulength;
|
998
724
|
compressed.push_back('\x80');
|
999
725
|
compressed.push_back('\x80');
|
@@ -1008,7 +734,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
1008
734
|
}
|
1009
735
|
|
1010
736
|
TEST(SnappyCorruption, OverflowingVarint) {
|
1011
|
-
string compressed, uncompressed;
|
737
|
+
std::string compressed, uncompressed;
|
1012
738
|
size_t ulength;
|
1013
739
|
compressed.push_back('\xfb');
|
1014
740
|
compressed.push_back('\xff');
|
@@ -1025,14 +751,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
|
|
1025
751
|
// Check that we do not read past end of input
|
1026
752
|
|
1027
753
|
// Make a compressed string that ends with a single-byte literal
|
1028
|
-
string compressed;
|
754
|
+
std::string compressed;
|
1029
755
|
Varint::Append32(&compressed, 1);
|
1030
756
|
AppendLiteral(&compressed, "x");
|
1031
757
|
|
1032
|
-
string uncompressed;
|
758
|
+
std::string uncompressed;
|
1033
759
|
DataEndingAtUnreadablePage c(compressed);
|
1034
760
|
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
1035
|
-
CHECK_EQ(uncompressed, string("x"));
|
761
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
1036
762
|
}
|
1037
763
|
|
1038
764
|
// Check for an infinite loop caused by a copy with offset==0
|
@@ -1051,17 +777,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
1051
777
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
1052
778
|
}
|
1053
779
|
|
1054
|
-
namespace {
|
1055
|
-
|
1056
780
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
781
|
+
uint64_t data;
|
1057
782
|
std::pair<size_t, bool> p =
|
1058
|
-
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
783
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
|
1059
784
|
CHECK_EQ(p.first < 8, p.second);
|
1060
785
|
return p.first;
|
1061
786
|
}
|
1062
787
|
|
1063
|
-
} // namespace
|
1064
|
-
|
1065
788
|
TEST(Snappy, FindMatchLength) {
|
1066
789
|
// Exercise all different code paths through the function.
|
1067
790
|
// 64-bit version:
|
@@ -1153,35 +876,37 @@ TEST(Snappy, FindMatchLength) {
|
|
1153
876
|
}
|
1154
877
|
|
1155
878
|
TEST(Snappy, FindMatchLengthRandom) {
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
879
|
+
constexpr int kNumTrials = 10000;
|
880
|
+
constexpr int kTypicalLength = 10;
|
881
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
882
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
883
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
884
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
885
|
+
|
886
|
+
for (int i = 0; i < kNumTrials; ++i) {
|
887
|
+
std::string s, t;
|
888
|
+
char a = static_cast<char>(uniform_byte(rng));
|
889
|
+
char b = static_cast<char>(uniform_byte(rng));
|
890
|
+
while (!one_in_typical_length(rng)) {
|
891
|
+
s.push_back(one_in_two(rng) ? a : b);
|
892
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1167
893
|
}
|
1168
894
|
DataEndingAtUnreadablePage u(s);
|
1169
895
|
DataEndingAtUnreadablePage v(t);
|
1170
|
-
|
896
|
+
size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1171
897
|
if (matched == t.size()) {
|
1172
898
|
EXPECT_EQ(s, t);
|
1173
899
|
} else {
|
1174
900
|
EXPECT_NE(s[matched], t[matched]);
|
1175
|
-
for (
|
901
|
+
for (size_t j = 0; j < matched; ++j) {
|
1176
902
|
EXPECT_EQ(s[j], t[j]);
|
1177
903
|
}
|
1178
904
|
}
|
1179
905
|
}
|
1180
906
|
}
|
1181
907
|
|
1182
|
-
|
1183
|
-
|
1184
|
-
unsigned int copy_offset) {
|
908
|
+
uint16_t MakeEntry(unsigned int extra, unsigned int len,
|
909
|
+
unsigned int copy_offset) {
|
1185
910
|
// Check that all of the fields fit within the allocated space
|
1186
911
|
assert(extra == (extra & 0x7)); // At most 3 bits
|
1187
912
|
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
@@ -1197,335 +922,89 @@ TEST(Snappy, VerifyCharTable) {
|
|
1197
922
|
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1198
923
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1199
924
|
using snappy::internal::char_table;
|
1200
|
-
using snappy::internal::wordmask;
|
1201
925
|
|
1202
|
-
|
926
|
+
uint16_t dst[256];
|
1203
927
|
|
1204
928
|
// Place invalid entries in all places to detect missing initialization
|
1205
929
|
int assigned = 0;
|
1206
|
-
for (int i = 0; i < 256; i
|
930
|
+
for (int i = 0; i < 256; ++i) {
|
1207
931
|
dst[i] = 0xffff;
|
1208
932
|
}
|
1209
933
|
|
1210
934
|
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1211
|
-
for (
|
1212
|
-
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
935
|
+
for (uint8_t len = 1; len <= 60; ++len) {
|
936
|
+
dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
|
1213
937
|
assigned++;
|
1214
938
|
}
|
1215
939
|
|
1216
940
|
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1217
941
|
// encode the number of bytes of length info that follow the opcode.
|
1218
|
-
for (
|
942
|
+
for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
|
1219
943
|
// We set the length field in the lookup table to 1 because extra
|
1220
944
|
// bytes encode len-1.
|
1221
|
-
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
945
|
+
dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1222
946
|
assigned++;
|
1223
947
|
}
|
1224
948
|
|
1225
949
|
// COPY_1_BYTE_OFFSET.
|
1226
950
|
//
|
1227
951
|
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1228
|
-
// offset/256 in
|
952
|
+
// offset/256 in 3 bits. offset%256 is stored in the next byte.
|
1229
953
|
//
|
1230
954
|
// This format is used for length in range [4..11] and offset in
|
1231
955
|
// range [0..2047]
|
1232
|
-
for (
|
1233
|
-
for (
|
1234
|
-
|
1235
|
-
|
956
|
+
for (uint8_t len = 4; len < 12; ++len) {
|
957
|
+
for (uint16_t offset = 0; offset < 2048; offset += 256) {
|
958
|
+
uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
|
959
|
+
dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
|
960
|
+
MakeEntry(1, len, offset_high);
|
1236
961
|
assigned++;
|
1237
962
|
}
|
1238
963
|
}
|
1239
964
|
|
1240
965
|
// COPY_2_BYTE_OFFSET.
|
1241
966
|
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1242
|
-
for (
|
1243
|
-
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
967
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
968
|
+
dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
|
1244
969
|
assigned++;
|
1245
970
|
}
|
1246
971
|
|
1247
972
|
// COPY_4_BYTE_OFFSET.
|
1248
973
|
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1249
|
-
for (
|
1250
|
-
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
974
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
975
|
+
dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
|
1251
976
|
assigned++;
|
1252
977
|
}
|
1253
978
|
|
1254
979
|
// Check that each entry was initialized exactly once.
|
1255
980
|
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1256
|
-
for (int i = 0; i < 256; i
|
981
|
+
for (int i = 0; i < 256; ++i) {
|
1257
982
|
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1258
983
|
}
|
1259
984
|
|
1260
|
-
if (FLAGS_snappy_dump_decompression_table) {
|
1261
|
-
printf("static const
|
1262
|
-
for (int i = 0; i < 256; i
|
1263
|
-
printf("0x%04x%s",
|
1264
|
-
|
1265
|
-
|
985
|
+
if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
|
986
|
+
std::printf("static const uint16_t char_table[256] = {\n ");
|
987
|
+
for (int i = 0; i < 256; ++i) {
|
988
|
+
std::printf("0x%04x%s",
|
989
|
+
dst[i],
|
990
|
+
((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
|
1266
991
|
}
|
1267
|
-
printf("};\n");
|
992
|
+
std::printf("};\n");
|
1268
993
|
}
|
1269
994
|
|
1270
995
|
// Check that computed table matched recorded table.
|
1271
|
-
for (int i = 0; i < 256; i
|
996
|
+
for (int i = 0; i < 256; ++i) {
|
1272
997
|
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1273
998
|
}
|
1274
999
|
}
|
1275
1000
|
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
string compressed;
|
1281
|
-
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1282
|
-
|
1283
|
-
CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
|
1284
|
-
file::Defaults()));
|
1285
|
-
}
|
1286
|
-
|
1287
|
-
static void UncompressFile(const char* fname) {
|
1288
|
-
string fullinput;
|
1289
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1290
|
-
|
1291
|
-
size_t uncompLength;
|
1292
|
-
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1293
|
-
|
1294
|
-
string uncompressed;
|
1295
|
-
uncompressed.resize(uncompLength);
|
1296
|
-
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1297
|
-
|
1298
|
-
CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1299
|
-
file::Defaults()));
|
1300
|
-
}
|
1301
|
-
|
1302
|
-
static void MeasureFile(const char* fname) {
|
1303
|
-
string fullinput;
|
1304
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1305
|
-
printf("%-40s :\n", fname);
|
1306
|
-
|
1307
|
-
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1308
|
-
int end_len = fullinput.size();
|
1309
|
-
if (FLAGS_end_len >= 0) {
|
1310
|
-
end_len = min<int>(fullinput.size(), FLAGS_end_len);
|
1311
|
-
}
|
1312
|
-
for (int len = start_len; len <= end_len; len++) {
|
1313
|
-
const char* const input = fullinput.data();
|
1314
|
-
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1315
|
-
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1316
|
-
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1317
|
-
if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
|
1318
|
-
if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
|
1319
|
-
if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
|
1320
|
-
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1321
|
-
|
1322
|
-
// For block-size based measurements
|
1323
|
-
if (0 && FLAGS_snappy) {
|
1324
|
-
Measure(input, len, SNAPPY, repeats, 8<<10);
|
1325
|
-
Measure(input, len, SNAPPY, repeats, 16<<10);
|
1326
|
-
Measure(input, len, SNAPPY, repeats, 32<<10);
|
1327
|
-
Measure(input, len, SNAPPY, repeats, 64<<10);
|
1328
|
-
Measure(input, len, SNAPPY, repeats, 256<<10);
|
1329
|
-
Measure(input, len, SNAPPY, repeats, 1024<<10);
|
1330
|
-
}
|
1331
|
-
}
|
1332
|
-
}
|
1333
|
-
|
1334
|
-
static struct {
|
1335
|
-
const char* label;
|
1336
|
-
const char* filename;
|
1337
|
-
size_t size_limit;
|
1338
|
-
} files[] = {
|
1339
|
-
{ "html", "html", 0 },
|
1340
|
-
{ "urls", "urls.10K", 0 },
|
1341
|
-
{ "jpg", "fireworks.jpeg", 0 },
|
1342
|
-
{ "jpg_200", "fireworks.jpeg", 200 },
|
1343
|
-
{ "pdf", "paper-100k.pdf", 0 },
|
1344
|
-
{ "html4", "html_x_4", 0 },
|
1345
|
-
{ "txt1", "alice29.txt", 0 },
|
1346
|
-
{ "txt2", "asyoulik.txt", 0 },
|
1347
|
-
{ "txt3", "lcet10.txt", 0 },
|
1348
|
-
{ "txt4", "plrabn12.txt", 0 },
|
1349
|
-
{ "pb", "geo.protodata", 0 },
|
1350
|
-
{ "gaviota", "kppkn.gtb", 0 },
|
1351
|
-
};
|
1352
|
-
|
1353
|
-
static void BM_UFlat(int iters, int arg) {
|
1354
|
-
StopBenchmarkTiming();
|
1355
|
-
|
1356
|
-
// Pick file to process based on "arg"
|
1357
|
-
CHECK_GE(arg, 0);
|
1358
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1359
|
-
string contents = ReadTestDataFile(files[arg].filename,
|
1360
|
-
files[arg].size_limit);
|
1361
|
-
|
1362
|
-
string zcontents;
|
1363
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1364
|
-
char* dst = new char[contents.size()];
|
1365
|
-
|
1366
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1367
|
-
static_cast<int64>(contents.size()));
|
1368
|
-
SetBenchmarkLabel(files[arg].label);
|
1369
|
-
StartBenchmarkTiming();
|
1370
|
-
while (iters-- > 0) {
|
1371
|
-
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
1372
|
-
}
|
1373
|
-
StopBenchmarkTiming();
|
1374
|
-
|
1375
|
-
delete[] dst;
|
1376
|
-
}
|
1377
|
-
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1378
|
-
|
1379
|
-
static void BM_UValidate(int iters, int arg) {
|
1380
|
-
StopBenchmarkTiming();
|
1381
|
-
|
1382
|
-
// Pick file to process based on "arg"
|
1383
|
-
CHECK_GE(arg, 0);
|
1384
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1385
|
-
string contents = ReadTestDataFile(files[arg].filename,
|
1386
|
-
files[arg].size_limit);
|
1387
|
-
|
1388
|
-
string zcontents;
|
1389
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1390
|
-
|
1391
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1392
|
-
static_cast<int64>(contents.size()));
|
1393
|
-
SetBenchmarkLabel(files[arg].label);
|
1394
|
-
StartBenchmarkTiming();
|
1395
|
-
while (iters-- > 0) {
|
1396
|
-
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
1397
|
-
}
|
1398
|
-
StopBenchmarkTiming();
|
1399
|
-
}
|
1400
|
-
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
|
1401
|
-
|
1402
|
-
static void BM_UIOVec(int iters, int arg) {
|
1403
|
-
StopBenchmarkTiming();
|
1404
|
-
|
1405
|
-
// Pick file to process based on "arg"
|
1406
|
-
CHECK_GE(arg, 0);
|
1407
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1408
|
-
string contents = ReadTestDataFile(files[arg].filename,
|
1409
|
-
files[arg].size_limit);
|
1410
|
-
|
1411
|
-
string zcontents;
|
1412
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1413
|
-
|
1414
|
-
// Uncompress into an iovec containing ten entries.
|
1415
|
-
const int kNumEntries = 10;
|
1416
|
-
struct iovec iov[kNumEntries];
|
1417
|
-
char *dst = new char[contents.size()];
|
1418
|
-
int used_so_far = 0;
|
1419
|
-
for (int i = 0; i < kNumEntries; ++i) {
|
1420
|
-
iov[i].iov_base = dst + used_so_far;
|
1421
|
-
if (used_so_far == contents.size()) {
|
1422
|
-
iov[i].iov_len = 0;
|
1423
|
-
continue;
|
1424
|
-
}
|
1425
|
-
|
1426
|
-
if (i == kNumEntries - 1) {
|
1427
|
-
iov[i].iov_len = contents.size() - used_so_far;
|
1428
|
-
} else {
|
1429
|
-
iov[i].iov_len = contents.size() / kNumEntries;
|
1430
|
-
}
|
1431
|
-
used_so_far += iov[i].iov_len;
|
1432
|
-
}
|
1433
|
-
|
1434
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1435
|
-
static_cast<int64>(contents.size()));
|
1436
|
-
SetBenchmarkLabel(files[arg].label);
|
1437
|
-
StartBenchmarkTiming();
|
1438
|
-
while (iters-- > 0) {
|
1439
|
-
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
1440
|
-
kNumEntries));
|
1441
|
-
}
|
1442
|
-
StopBenchmarkTiming();
|
1443
|
-
|
1444
|
-
delete[] dst;
|
1445
|
-
}
|
1446
|
-
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1447
|
-
|
1448
|
-
static void BM_UFlatSink(int iters, int arg) {
|
1449
|
-
StopBenchmarkTiming();
|
1450
|
-
|
1451
|
-
// Pick file to process based on "arg"
|
1452
|
-
CHECK_GE(arg, 0);
|
1453
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1454
|
-
string contents = ReadTestDataFile(files[arg].filename,
|
1455
|
-
files[arg].size_limit);
|
1456
|
-
|
1457
|
-
string zcontents;
|
1458
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1459
|
-
char* dst = new char[contents.size()];
|
1460
|
-
|
1461
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1462
|
-
static_cast<int64>(contents.size()));
|
1463
|
-
SetBenchmarkLabel(files[arg].label);
|
1464
|
-
StartBenchmarkTiming();
|
1465
|
-
while (iters-- > 0) {
|
1466
|
-
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1467
|
-
snappy::UncheckedByteArraySink sink(dst);
|
1468
|
-
CHECK(snappy::Uncompress(&source, &sink));
|
1001
|
+
TEST(Snappy, TestBenchmarkFiles) {
|
1002
|
+
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
|
1003
|
+
Verify(ReadTestDataFile(kTestDataFiles[i].filename,
|
1004
|
+
kTestDataFiles[i].size_limit));
|
1469
1005
|
}
|
1470
|
-
StopBenchmarkTiming();
|
1471
|
-
|
1472
|
-
string s(dst, contents.size());
|
1473
|
-
CHECK_EQ(contents, s);
|
1474
|
-
|
1475
|
-
delete[] dst;
|
1476
1006
|
}
|
1477
1007
|
|
1478
|
-
|
1479
|
-
|
1480
|
-
static void BM_ZFlat(int iters, int arg) {
|
1481
|
-
StopBenchmarkTiming();
|
1482
|
-
|
1483
|
-
// Pick file to process based on "arg"
|
1484
|
-
CHECK_GE(arg, 0);
|
1485
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1486
|
-
string contents = ReadTestDataFile(files[arg].filename,
|
1487
|
-
files[arg].size_limit);
|
1488
|
-
|
1489
|
-
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1490
|
-
|
1491
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1492
|
-
static_cast<int64>(contents.size()));
|
1493
|
-
StartBenchmarkTiming();
|
1494
|
-
|
1495
|
-
size_t zsize = 0;
|
1496
|
-
while (iters-- > 0) {
|
1497
|
-
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
|
1498
|
-
}
|
1499
|
-
StopBenchmarkTiming();
|
1500
|
-
const double compression_ratio =
|
1501
|
-
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1502
|
-
SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
|
1503
|
-
files[arg].label, 100.0 * compression_ratio));
|
1504
|
-
VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
|
1505
|
-
files[arg].label, contents.size(), zsize);
|
1506
|
-
delete[] dst;
|
1507
|
-
}
|
1508
|
-
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1008
|
+
} // namespace
|
1509
1009
|
|
1510
1010
|
} // namespace snappy
|
1511
|
-
|
1512
|
-
|
1513
|
-
int main(int argc, char** argv) {
|
1514
|
-
InitGoogle(argv[0], &argc, &argv, true);
|
1515
|
-
RunSpecifiedBenchmarks();
|
1516
|
-
|
1517
|
-
if (argc >= 2) {
|
1518
|
-
for (int arg = 1; arg < argc; arg++) {
|
1519
|
-
if (FLAGS_write_compressed) {
|
1520
|
-
CompressFile(argv[arg]);
|
1521
|
-
} else if (FLAGS_write_uncompressed) {
|
1522
|
-
UncompressFile(argv[arg]);
|
1523
|
-
} else {
|
1524
|
-
MeasureFile(argv[arg]);
|
1525
|
-
}
|
1526
|
-
}
|
1527
|
-
return 0;
|
1528
|
-
}
|
1529
|
-
|
1530
|
-
return RUN_ALL_TESTS();
|
1531
|
-
}
|