snappy 0.3.0-java → 0.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/README.md +17 -1
- data/ext/extconf.rb +13 -11
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy_ext.jar +0 -0
- data/snappy.gemspec +1 -0
- data/vendor/snappy/CMakeLists.txt +150 -27
- data/vendor/snappy/CONTRIBUTING.md +9 -4
- data/vendor/snappy/NEWS +12 -0
- data/vendor/snappy/README.md +52 -35
- data/vendor/snappy/cmake/config.h.in +28 -24
- data/vendor/snappy/snappy-internal.h +189 -25
- data/vendor/snappy/snappy-sinksource.cc +26 -9
- data/vendor/snappy/snappy-sinksource.h +11 -11
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +227 -308
- data/vendor/snappy/snappy-stubs-public.h.in +0 -11
- data/vendor/snappy/snappy-test.cc +88 -198
- data/vendor/snappy/snappy-test.h +102 -285
- data/vendor/snappy/snappy.cc +1176 -410
- data/vendor/snappy/snappy.h +19 -4
- data/vendor/snappy/snappy_benchmark.cc +378 -0
- data/vendor/snappy/snappy_compress_fuzzer.cc +3 -2
- data/vendor/snappy/snappy_test_data.cc +57 -0
- data/vendor/snappy/snappy_test_data.h +68 -0
- data/vendor/snappy/snappy_test_tool.cc +471 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +3 -2
- data/vendor/snappy/snappy_unittest.cc +170 -666
- metadata +8 -4
@@ -26,44 +26,31 @@
|
|
26
26
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
28
|
|
29
|
-
#include <math.h>
|
30
|
-
#include <stdlib.h>
|
31
|
-
|
32
29
|
#include <algorithm>
|
30
|
+
#include <cmath>
|
31
|
+
#include <cstdlib>
|
33
32
|
#include <random>
|
34
33
|
#include <string>
|
35
34
|
#include <utility>
|
36
35
|
#include <vector>
|
37
36
|
|
38
|
-
#include "snappy.h"
|
39
|
-
#include "snappy-internal.h"
|
40
37
|
#include "snappy-test.h"
|
38
|
+
|
39
|
+
#include "gtest/gtest.h"
|
40
|
+
|
41
|
+
#include "snappy-internal.h"
|
41
42
|
#include "snappy-sinksource.h"
|
43
|
+
#include "snappy.h"
|
44
|
+
#include "snappy_test_data.h"
|
42
45
|
|
43
|
-
|
44
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
45
|
-
DEFINE_int32(end_len, -1,
|
46
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
47
|
-
DEFINE_int32(bytes, 10485760,
|
48
|
-
"How many bytes to compress/uncompress per file for timing");
|
49
|
-
|
50
|
-
DEFINE_bool(zlib, false,
|
51
|
-
"Run zlib compression (http://www.zlib.net)");
|
52
|
-
DEFINE_bool(lzo, false,
|
53
|
-
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
54
|
-
DEFINE_bool(snappy, true, "Run snappy compression");
|
55
|
-
|
56
|
-
DEFINE_bool(write_compressed, false,
|
57
|
-
"Write compressed versions of each file to <file>.comp");
|
58
|
-
DEFINE_bool(write_uncompressed, false,
|
59
|
-
"Write uncompressed versions of each file to <file>.uncomp");
|
60
|
-
|
61
|
-
DEFINE_bool(snappy_dump_decompression_table, false,
|
46
|
+
SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
|
62
47
|
"If true, we print the decompression table during tests.");
|
63
48
|
|
64
49
|
namespace snappy {
|
65
50
|
|
66
|
-
|
51
|
+
namespace {
|
52
|
+
|
53
|
+
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
|
67
54
|
|
68
55
|
// To test against code that reads beyond its input, this class copies a
|
69
56
|
// string to a newly allocated group of pages, the last of which
|
@@ -84,7 +71,7 @@ class DataEndingAtUnreadablePage {
|
|
84
71
|
CHECK_NE(MAP_FAILED, mem_);
|
85
72
|
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
|
86
73
|
char* dst = protected_page_ - size;
|
87
|
-
memcpy(dst, s.data(), size);
|
74
|
+
std::memcpy(dst, s.data(), size);
|
88
75
|
data_ = dst;
|
89
76
|
size_ = size;
|
90
77
|
// Make guard page unreadable.
|
@@ -109,256 +96,14 @@ class DataEndingAtUnreadablePage {
|
|
109
96
|
size_t size_;
|
110
97
|
};
|
111
98
|
|
112
|
-
#else //
|
99
|
+
#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
|
113
100
|
|
114
101
|
// Fallback for systems without mmap.
|
115
102
|
using DataEndingAtUnreadablePage = std::string;
|
116
103
|
|
117
104
|
#endif
|
118
105
|
|
119
|
-
|
120
|
-
ZLIB, LZO, SNAPPY
|
121
|
-
};
|
122
|
-
|
123
|
-
const char* names[] = {
|
124
|
-
"ZLIB", "LZO", "SNAPPY"
|
125
|
-
};
|
126
|
-
|
127
|
-
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
128
|
-
CompressorType comp) {
|
129
|
-
switch (comp) {
|
130
|
-
#ifdef ZLIB_VERSION
|
131
|
-
case ZLIB:
|
132
|
-
return ZLib::MinCompressbufSize(input_size);
|
133
|
-
#endif // ZLIB_VERSION
|
134
|
-
|
135
|
-
#ifdef LZO_VERSION
|
136
|
-
case LZO:
|
137
|
-
return input_size + input_size/64 + 16 + 3;
|
138
|
-
#endif // LZO_VERSION
|
139
|
-
|
140
|
-
case SNAPPY:
|
141
|
-
return snappy::MaxCompressedLength(input_size);
|
142
|
-
|
143
|
-
default:
|
144
|
-
LOG(FATAL) << "Unknown compression type number " << comp;
|
145
|
-
return 0;
|
146
|
-
}
|
147
|
-
}
|
148
|
-
|
149
|
-
// Returns true if we successfully compressed, false otherwise.
|
150
|
-
//
|
151
|
-
// If compressed_is_preallocated is set, do not resize the compressed buffer.
|
152
|
-
// This is typically what you want for a benchmark, in order to not spend
|
153
|
-
// time in the memory allocator. If you do set this flag, however,
|
154
|
-
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
155
|
-
// number of bytes, and may contain junk bytes at the end after return.
|
156
|
-
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
157
|
-
std::string* compressed, bool compressed_is_preallocated) {
|
158
|
-
if (!compressed_is_preallocated) {
|
159
|
-
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
160
|
-
}
|
161
|
-
|
162
|
-
switch (comp) {
|
163
|
-
#ifdef ZLIB_VERSION
|
164
|
-
case ZLIB: {
|
165
|
-
ZLib zlib;
|
166
|
-
uLongf destlen = compressed->size();
|
167
|
-
int ret = zlib.Compress(
|
168
|
-
reinterpret_cast<Bytef*>(string_as_array(compressed)),
|
169
|
-
&destlen,
|
170
|
-
reinterpret_cast<const Bytef*>(input),
|
171
|
-
input_size);
|
172
|
-
CHECK_EQ(Z_OK, ret);
|
173
|
-
if (!compressed_is_preallocated) {
|
174
|
-
compressed->resize(destlen);
|
175
|
-
}
|
176
|
-
return true;
|
177
|
-
}
|
178
|
-
#endif // ZLIB_VERSION
|
179
|
-
|
180
|
-
#ifdef LZO_VERSION
|
181
|
-
case LZO: {
|
182
|
-
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
|
183
|
-
lzo_uint destlen;
|
184
|
-
int ret = lzo1x_1_15_compress(
|
185
|
-
reinterpret_cast<const uint8*>(input),
|
186
|
-
input_size,
|
187
|
-
reinterpret_cast<uint8*>(string_as_array(compressed)),
|
188
|
-
&destlen,
|
189
|
-
mem);
|
190
|
-
CHECK_EQ(LZO_E_OK, ret);
|
191
|
-
delete[] mem;
|
192
|
-
if (!compressed_is_preallocated) {
|
193
|
-
compressed->resize(destlen);
|
194
|
-
}
|
195
|
-
break;
|
196
|
-
}
|
197
|
-
#endif // LZO_VERSION
|
198
|
-
|
199
|
-
case SNAPPY: {
|
200
|
-
size_t destlen;
|
201
|
-
snappy::RawCompress(input, input_size,
|
202
|
-
string_as_array(compressed),
|
203
|
-
&destlen);
|
204
|
-
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
|
205
|
-
if (!compressed_is_preallocated) {
|
206
|
-
compressed->resize(destlen);
|
207
|
-
}
|
208
|
-
break;
|
209
|
-
}
|
210
|
-
|
211
|
-
default: {
|
212
|
-
return false; // the asked-for library wasn't compiled in
|
213
|
-
}
|
214
|
-
}
|
215
|
-
return true;
|
216
|
-
}
|
217
|
-
|
218
|
-
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
-
int size, std::string* output) {
|
220
|
-
switch (comp) {
|
221
|
-
#ifdef ZLIB_VERSION
|
222
|
-
case ZLIB: {
|
223
|
-
output->resize(size);
|
224
|
-
ZLib zlib;
|
225
|
-
uLongf destlen = output->size();
|
226
|
-
int ret = zlib.Uncompress(
|
227
|
-
reinterpret_cast<Bytef*>(string_as_array(output)),
|
228
|
-
&destlen,
|
229
|
-
reinterpret_cast<const Bytef*>(compressed.data()),
|
230
|
-
compressed.size());
|
231
|
-
CHECK_EQ(Z_OK, ret);
|
232
|
-
CHECK_EQ(static_cast<uLongf>(size), destlen);
|
233
|
-
break;
|
234
|
-
}
|
235
|
-
#endif // ZLIB_VERSION
|
236
|
-
|
237
|
-
#ifdef LZO_VERSION
|
238
|
-
case LZO: {
|
239
|
-
output->resize(size);
|
240
|
-
lzo_uint destlen;
|
241
|
-
int ret = lzo1x_decompress(
|
242
|
-
reinterpret_cast<const uint8*>(compressed.data()),
|
243
|
-
compressed.size(),
|
244
|
-
reinterpret_cast<uint8*>(string_as_array(output)),
|
245
|
-
&destlen,
|
246
|
-
NULL);
|
247
|
-
CHECK_EQ(LZO_E_OK, ret);
|
248
|
-
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
|
249
|
-
break;
|
250
|
-
}
|
251
|
-
#endif // LZO_VERSION
|
252
|
-
|
253
|
-
case SNAPPY: {
|
254
|
-
snappy::RawUncompress(compressed.data(), compressed.size(),
|
255
|
-
string_as_array(output));
|
256
|
-
break;
|
257
|
-
}
|
258
|
-
|
259
|
-
default: {
|
260
|
-
return false; // the asked-for library wasn't compiled in
|
261
|
-
}
|
262
|
-
}
|
263
|
-
return true;
|
264
|
-
}
|
265
|
-
|
266
|
-
static void Measure(const char* data,
|
267
|
-
size_t length,
|
268
|
-
CompressorType comp,
|
269
|
-
int repeats,
|
270
|
-
int block_size) {
|
271
|
-
// Run tests a few time and pick median running times
|
272
|
-
static const int kRuns = 5;
|
273
|
-
double ctime[kRuns];
|
274
|
-
double utime[kRuns];
|
275
|
-
int compressed_size = 0;
|
276
|
-
|
277
|
-
{
|
278
|
-
// Chop the input into blocks
|
279
|
-
int num_blocks = (length + block_size - 1) / block_size;
|
280
|
-
std::vector<const char*> input(num_blocks);
|
281
|
-
std::vector<size_t> input_length(num_blocks);
|
282
|
-
std::vector<std::string> compressed(num_blocks);
|
283
|
-
std::vector<std::string> output(num_blocks);
|
284
|
-
for (int b = 0; b < num_blocks; b++) {
|
285
|
-
int input_start = b * block_size;
|
286
|
-
int input_limit = std::min<int>((b+1)*block_size, length);
|
287
|
-
input[b] = data+input_start;
|
288
|
-
input_length[b] = input_limit-input_start;
|
289
|
-
|
290
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
291
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
292
|
-
}
|
293
|
-
|
294
|
-
// First, try one trial compression to make sure the code is compiled in
|
295
|
-
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
|
296
|
-
LOG(WARNING) << "Skipping " << names[comp] << ": "
|
297
|
-
<< "library not compiled in";
|
298
|
-
return;
|
299
|
-
}
|
300
|
-
|
301
|
-
for (int run = 0; run < kRuns; run++) {
|
302
|
-
CycleTimer ctimer, utimer;
|
303
|
-
|
304
|
-
for (int b = 0; b < num_blocks; b++) {
|
305
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
306
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
307
|
-
}
|
308
|
-
|
309
|
-
ctimer.Start();
|
310
|
-
for (int b = 0; b < num_blocks; b++)
|
311
|
-
for (int i = 0; i < repeats; i++)
|
312
|
-
Compress(input[b], input_length[b], comp, &compressed[b], true);
|
313
|
-
ctimer.Stop();
|
314
|
-
|
315
|
-
// Compress once more, with resizing, so we don't leave junk
|
316
|
-
// at the end that will confuse the decompressor.
|
317
|
-
for (int b = 0; b < num_blocks; b++) {
|
318
|
-
Compress(input[b], input_length[b], comp, &compressed[b], false);
|
319
|
-
}
|
320
|
-
|
321
|
-
for (int b = 0; b < num_blocks; b++) {
|
322
|
-
output[b].resize(input_length[b]);
|
323
|
-
}
|
324
|
-
|
325
|
-
utimer.Start();
|
326
|
-
for (int i = 0; i < repeats; i++)
|
327
|
-
for (int b = 0; b < num_blocks; b++)
|
328
|
-
Uncompress(compressed[b], comp, input_length[b], &output[b]);
|
329
|
-
utimer.Stop();
|
330
|
-
|
331
|
-
ctime[run] = ctimer.Get();
|
332
|
-
utime[run] = utimer.Get();
|
333
|
-
}
|
334
|
-
|
335
|
-
compressed_size = 0;
|
336
|
-
for (size_t i = 0; i < compressed.size(); i++) {
|
337
|
-
compressed_size += compressed[i].size();
|
338
|
-
}
|
339
|
-
}
|
340
|
-
|
341
|
-
std::sort(ctime, ctime + kRuns);
|
342
|
-
std::sort(utime, utime + kRuns);
|
343
|
-
const int med = kRuns/2;
|
344
|
-
|
345
|
-
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
346
|
-
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
347
|
-
std::string x = names[comp];
|
348
|
-
x += ":";
|
349
|
-
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
350
|
-
: std::string("?");
|
351
|
-
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
352
|
-
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
353
|
-
x.c_str(),
|
354
|
-
block_size/(1<<20),
|
355
|
-
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
356
|
-
(compressed_size * 100.0) / std::max<int>(1, length),
|
357
|
-
comp_rate,
|
358
|
-
urate.c_str());
|
359
|
-
}
|
360
|
-
|
361
|
-
static int VerifyString(const std::string& input) {
|
106
|
+
int VerifyString(const std::string& input) {
|
362
107
|
std::string compressed;
|
363
108
|
DataEndingAtUnreadablePage i(input);
|
364
109
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
@@ -374,7 +119,7 @@ static int VerifyString(const std::string& input) {
|
|
374
119
|
return uncompressed.size();
|
375
120
|
}
|
376
121
|
|
377
|
-
|
122
|
+
void VerifyStringSink(const std::string& input) {
|
378
123
|
std::string compressed;
|
379
124
|
DataEndingAtUnreadablePage i(input);
|
380
125
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
@@ -392,26 +137,15 @@ static void VerifyStringSink(const std::string& input) {
|
|
392
137
|
CHECK_EQ(uncompressed, input);
|
393
138
|
}
|
394
139
|
|
395
|
-
|
396
|
-
std::string compressed;
|
397
|
-
DataEndingAtUnreadablePage i(input);
|
398
|
-
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
399
|
-
CHECK_EQ(written, compressed.size());
|
400
|
-
CHECK_LE(compressed.size(),
|
401
|
-
snappy::MaxCompressedLength(input.size()));
|
402
|
-
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
403
|
-
|
404
|
-
// Try uncompressing into an iovec containing a random number of entries
|
405
|
-
// ranging from 1 to 10.
|
406
|
-
char* buf = new char[input.size()];
|
140
|
+
struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
|
407
141
|
std::minstd_rand0 rng(input.size());
|
408
142
|
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
409
|
-
|
143
|
+
num = uniform_1_to_10(rng);
|
410
144
|
if (input.size() < num) {
|
411
145
|
num = input.size();
|
412
146
|
}
|
413
147
|
struct iovec* iov = new iovec[num];
|
414
|
-
|
148
|
+
size_t used_so_far = 0;
|
415
149
|
std::bernoulli_distribution one_in_five(1.0 / 5);
|
416
150
|
for (size_t i = 0; i < num; ++i) {
|
417
151
|
assert(used_so_far < input.size());
|
@@ -430,8 +164,40 @@ static void VerifyIOVec(const std::string& input) {
|
|
430
164
|
}
|
431
165
|
used_so_far += iov[i].iov_len;
|
432
166
|
}
|
433
|
-
|
434
|
-
|
167
|
+
return iov;
|
168
|
+
}
|
169
|
+
|
170
|
+
int VerifyIOVecSource(const std::string& input) {
|
171
|
+
std::string compressed;
|
172
|
+
std::string copy = input;
|
173
|
+
char* buf = const_cast<char*>(copy.data());
|
174
|
+
size_t num = 0;
|
175
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
176
|
+
const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
|
177
|
+
CHECK_EQ(written, compressed.size());
|
178
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
179
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
180
|
+
|
181
|
+
std::string uncompressed;
|
182
|
+
DataEndingAtUnreadablePage c(compressed);
|
183
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
184
|
+
CHECK_EQ(uncompressed, input);
|
185
|
+
delete[] iov;
|
186
|
+
return uncompressed.size();
|
187
|
+
}
|
188
|
+
|
189
|
+
void VerifyIOVecSink(const std::string& input) {
|
190
|
+
std::string compressed;
|
191
|
+
DataEndingAtUnreadablePage i(input);
|
192
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
193
|
+
CHECK_EQ(written, compressed.size());
|
194
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
195
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
196
|
+
char* buf = new char[input.size()];
|
197
|
+
size_t num = 0;
|
198
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
199
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
|
200
|
+
num));
|
435
201
|
CHECK(!memcmp(buf, input.data(), input.size()));
|
436
202
|
delete[] iov;
|
437
203
|
delete[] buf;
|
@@ -439,7 +205,7 @@ static void VerifyIOVec(const std::string& input) {
|
|
439
205
|
|
440
206
|
// Test that data compressed by a compressor that does not
|
441
207
|
// obey block sizes is uncompressed properly.
|
442
|
-
|
208
|
+
void VerifyNonBlockedCompression(const std::string& input) {
|
443
209
|
if (input.length() > snappy::kBlockSize) {
|
444
210
|
// We cannot test larger blocks than the maximum block size, obviously.
|
445
211
|
return;
|
@@ -451,7 +217,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
451
217
|
// Setup compression table
|
452
218
|
snappy::internal::WorkingMemory wmem(input.size());
|
453
219
|
int table_size;
|
454
|
-
|
220
|
+
uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
|
455
221
|
|
456
222
|
// Compress entire input in one shot
|
457
223
|
std::string compressed;
|
@@ -481,7 +247,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
481
247
|
struct iovec vec[kNumBlocks];
|
482
248
|
const int block_size = 1 + input.size() / kNumBlocks;
|
483
249
|
std::string iovec_data(block_size * kNumBlocks, 'x');
|
484
|
-
for (int i = 0; i < kNumBlocks; i
|
250
|
+
for (int i = 0; i < kNumBlocks; ++i) {
|
485
251
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
486
252
|
vec[i].iov_len = block_size;
|
487
253
|
}
|
@@ -492,7 +258,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
492
258
|
}
|
493
259
|
|
494
260
|
// Expand the input so that it is at least K times as big as block size
|
495
|
-
|
261
|
+
std::string Expand(const std::string& input) {
|
496
262
|
static const int K = 3;
|
497
263
|
std::string data = input;
|
498
264
|
while (data.size() < K * snappy::kBlockSize) {
|
@@ -501,30 +267,33 @@ static std::string Expand(const std::string& input) {
|
|
501
267
|
return data;
|
502
268
|
}
|
503
269
|
|
504
|
-
|
270
|
+
int Verify(const std::string& input) {
|
505
271
|
VLOG(1) << "Verifying input of size " << input.size();
|
506
272
|
|
507
273
|
// Compress using string based routines
|
508
274
|
const int result = VerifyString(input);
|
509
275
|
|
276
|
+
// Compress using `iovec`-based routines.
|
277
|
+
CHECK_EQ(VerifyIOVecSource(input), result);
|
278
|
+
|
510
279
|
// Verify using sink based routines
|
511
280
|
VerifyStringSink(input);
|
512
281
|
|
513
282
|
VerifyNonBlockedCompression(input);
|
514
|
-
|
283
|
+
VerifyIOVecSink(input);
|
515
284
|
if (!input.empty()) {
|
516
285
|
const std::string expanded = Expand(input);
|
517
286
|
VerifyNonBlockedCompression(expanded);
|
518
|
-
|
287
|
+
VerifyIOVecSink(input);
|
519
288
|
}
|
520
289
|
|
521
290
|
return result;
|
522
291
|
}
|
523
292
|
|
524
|
-
|
293
|
+
bool IsValidCompressedBuffer(const std::string& c) {
|
525
294
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
526
295
|
}
|
527
|
-
|
296
|
+
bool Uncompress(const std::string& c, std::string* u) {
|
528
297
|
return snappy::Uncompress(c.data(), c.size(), u);
|
529
298
|
}
|
530
299
|
|
@@ -549,8 +318,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
549
318
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
550
319
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
551
320
|
source.resize(100000);
|
552
|
-
for (
|
553
|
-
|
321
|
+
for (char& source_char : source) {
|
322
|
+
source_char = 'A';
|
554
323
|
}
|
555
324
|
snappy::Compress(source.data(), source.size(), &dest);
|
556
325
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
@@ -588,7 +357,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
588
357
|
size_t ulen;
|
589
358
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
590
359
|
|| (ulen < (1<<20)));
|
591
|
-
|
360
|
+
uint32_t ulen2;
|
592
361
|
snappy::ByteArraySource source(data.data(), data.size());
|
593
362
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
594
363
|
(ulen2 < (1<<20)));
|
@@ -601,7 +370,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
601
370
|
// These mirror the compression code in snappy.cc, but are copied
|
602
371
|
// here so that we can bypass some limitations in the how snappy.cc
|
603
372
|
// invokes these routines.
|
604
|
-
|
373
|
+
void AppendLiteral(std::string* dst, const std::string& literal) {
|
605
374
|
if (literal.empty()) return;
|
606
375
|
int n = literal.size() - 1;
|
607
376
|
if (n < 60) {
|
@@ -621,7 +390,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) {
|
|
621
390
|
*dst += literal;
|
622
391
|
}
|
623
392
|
|
624
|
-
|
393
|
+
void AppendCopy(std::string* dst, int offset, int length) {
|
625
394
|
while (length > 0) {
|
626
395
|
// Figure out how much to copy in one shot
|
627
396
|
int to_copy;
|
@@ -665,6 +434,41 @@ TEST(Snappy, SimpleTests) {
|
|
665
434
|
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
666
435
|
}
|
667
436
|
|
437
|
+
// Regression test for cr/345340892.
|
438
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
|
439
|
+
Verify("abcabcabcabcabcabcab");
|
440
|
+
Verify("abcabcabcabcabcabcab0123456789ABCDEF");
|
441
|
+
|
442
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
|
443
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
|
444
|
+
}
|
445
|
+
|
446
|
+
// Regression test for cr/345340892.
|
447
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
|
448
|
+
std::mt19937 rng;
|
449
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
450
|
+
for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
|
451
|
+
for (int length = 1; length <= 64; ++length) {
|
452
|
+
for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
|
453
|
+
const int size = pattern_size + length + extra_bytes_after_pattern;
|
454
|
+
std::string input;
|
455
|
+
input.resize(size);
|
456
|
+
for (int i = 0; i < pattern_size; ++i) {
|
457
|
+
input[i] = 'a' + i;
|
458
|
+
}
|
459
|
+
for (int i = 0; i < length; ++i) {
|
460
|
+
input[pattern_size + i] = input[i];
|
461
|
+
}
|
462
|
+
for (int i = 0; i < extra_bytes_after_pattern; ++i) {
|
463
|
+
input[pattern_size + length + i] =
|
464
|
+
static_cast<char>(uniform_byte(rng));
|
465
|
+
}
|
466
|
+
Verify(input);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
}
|
470
|
+
}
|
471
|
+
|
668
472
|
// Verify max blowup (lots of four-byte copies)
|
669
473
|
TEST(Snappy, MaxBlowup) {
|
670
474
|
std::mt19937 rng;
|
@@ -681,7 +485,7 @@ TEST(Snappy, MaxBlowup) {
|
|
681
485
|
}
|
682
486
|
|
683
487
|
TEST(Snappy, RandomData) {
|
684
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
488
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
685
489
|
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
686
490
|
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
687
491
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
@@ -690,7 +494,7 @@ TEST(Snappy, RandomData) {
|
|
690
494
|
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
691
495
|
|
692
496
|
constexpr int num_ops = 20000;
|
693
|
-
for (int i = 0; i < num_ops; i
|
497
|
+
for (int i = 0; i < num_ops; ++i) {
|
694
498
|
if ((i % 1000) == 0) {
|
695
499
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
696
500
|
}
|
@@ -738,14 +542,14 @@ TEST(Snappy, FourByteOffset) {
|
|
738
542
|
// How many times each fragment is emitted.
|
739
543
|
const int n1 = 2;
|
740
544
|
const int n2 = 100000 / fragment2.size();
|
741
|
-
const
|
545
|
+
const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
|
742
546
|
|
743
547
|
std::string compressed;
|
744
548
|
Varint::Append32(&compressed, length);
|
745
549
|
|
746
550
|
AppendLiteral(&compressed, fragment1);
|
747
551
|
std::string src = fragment1;
|
748
|
-
for (int i = 0; i < n2; i
|
552
|
+
for (int i = 0; i < n2; ++i) {
|
749
553
|
AppendLiteral(&compressed, fragment2);
|
750
554
|
src += fragment2;
|
751
555
|
}
|
@@ -760,7 +564,27 @@ TEST(Snappy, FourByteOffset) {
|
|
760
564
|
CHECK_EQ(uncompressed, src);
|
761
565
|
}
|
762
566
|
|
763
|
-
TEST(Snappy,
|
567
|
+
TEST(Snappy, IOVecSourceEdgeCases) {
|
568
|
+
// Validate that empty leading, trailing, and in-between iovecs are handled:
|
569
|
+
// [] [] ['a'] [] ['b'] [].
|
570
|
+
std::string data = "ab";
|
571
|
+
char* buf = const_cast<char*>(data.data());
|
572
|
+
size_t used_so_far = 0;
|
573
|
+
static const int kLengths[] = {0, 0, 1, 0, 1, 0};
|
574
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
575
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
576
|
+
iov[i].iov_base = buf + used_so_far;
|
577
|
+
iov[i].iov_len = kLengths[i];
|
578
|
+
used_so_far += kLengths[i];
|
579
|
+
}
|
580
|
+
std::string compressed;
|
581
|
+
snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
|
582
|
+
std::string uncompressed;
|
583
|
+
snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
|
584
|
+
CHECK_EQ(data, uncompressed);
|
585
|
+
}
|
586
|
+
|
587
|
+
TEST(Snappy, IOVecSinkEdgeCases) {
|
764
588
|
// Test some tricky edge cases in the iovec output that are not necessarily
|
765
589
|
// exercised by random tests.
|
766
590
|
|
@@ -872,14 +696,13 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
872
696
|
}
|
873
697
|
}
|
874
698
|
|
875
|
-
|
876
|
-
size_t* ulength) {
|
699
|
+
bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
|
877
700
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
878
701
|
compressed.size(),
|
879
702
|
ulength);
|
880
703
|
|
881
704
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
882
|
-
|
705
|
+
uint32_t length;
|
883
706
|
const bool result2 = snappy::GetUncompressedLength(&source, &length);
|
884
707
|
CHECK_EQ(result1, result2);
|
885
708
|
return result1;
|
@@ -954,17 +777,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
954
777
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
955
778
|
}
|
956
779
|
|
957
|
-
namespace {
|
958
|
-
|
959
780
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
781
|
+
uint64_t data;
|
960
782
|
std::pair<size_t, bool> p =
|
961
|
-
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
783
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
|
962
784
|
CHECK_EQ(p.first < 8, p.second);
|
963
785
|
return p.first;
|
964
786
|
}
|
965
787
|
|
966
|
-
} // namespace
|
967
|
-
|
968
788
|
TEST(Snappy, FindMatchLength) {
|
969
789
|
// Exercise all different code paths through the function.
|
970
790
|
// 64-bit version:
|
@@ -1058,12 +878,12 @@ TEST(Snappy, FindMatchLength) {
|
|
1058
878
|
TEST(Snappy, FindMatchLengthRandom) {
|
1059
879
|
constexpr int kNumTrials = 10000;
|
1060
880
|
constexpr int kTypicalLength = 10;
|
1061
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
881
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
1062
882
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1063
883
|
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1064
884
|
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1065
885
|
|
1066
|
-
for (int i = 0; i < kNumTrials; i
|
886
|
+
for (int i = 0; i < kNumTrials; ++i) {
|
1067
887
|
std::string s, t;
|
1068
888
|
char a = static_cast<char>(uniform_byte(rng));
|
1069
889
|
char b = static_cast<char>(uniform_byte(rng));
|
@@ -1073,21 +893,20 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
1073
893
|
}
|
1074
894
|
DataEndingAtUnreadablePage u(s);
|
1075
895
|
DataEndingAtUnreadablePage v(t);
|
1076
|
-
|
896
|
+
size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1077
897
|
if (matched == t.size()) {
|
1078
898
|
EXPECT_EQ(s, t);
|
1079
899
|
} else {
|
1080
900
|
EXPECT_NE(s[matched], t[matched]);
|
1081
|
-
for (
|
901
|
+
for (size_t j = 0; j < matched; ++j) {
|
1082
902
|
EXPECT_EQ(s[j], t[j]);
|
1083
903
|
}
|
1084
904
|
}
|
1085
905
|
}
|
1086
906
|
}
|
1087
907
|
|
1088
|
-
|
1089
|
-
|
1090
|
-
unsigned int copy_offset) {
|
908
|
+
uint16_t MakeEntry(unsigned int extra, unsigned int len,
|
909
|
+
unsigned int copy_offset) {
|
1091
910
|
// Check that all of the fields fit within the allocated space
|
1092
911
|
assert(extra == (extra & 0x7)); // At most 3 bits
|
1093
912
|
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
@@ -1104,403 +923,88 @@ TEST(Snappy, VerifyCharTable) {
|
|
1104
923
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1105
924
|
using snappy::internal::char_table;
|
1106
925
|
|
1107
|
-
|
926
|
+
uint16_t dst[256];
|
1108
927
|
|
1109
928
|
// Place invalid entries in all places to detect missing initialization
|
1110
929
|
int assigned = 0;
|
1111
|
-
for (int i = 0; i < 256; i
|
930
|
+
for (int i = 0; i < 256; ++i) {
|
1112
931
|
dst[i] = 0xffff;
|
1113
932
|
}
|
1114
933
|
|
1115
934
|
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1116
|
-
for (
|
1117
|
-
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
935
|
+
for (uint8_t len = 1; len <= 60; ++len) {
|
936
|
+
dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
|
1118
937
|
assigned++;
|
1119
938
|
}
|
1120
939
|
|
1121
940
|
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1122
941
|
// encode the number of bytes of length info that follow the opcode.
|
1123
|
-
for (
|
942
|
+
for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
|
1124
943
|
// We set the length field in the lookup table to 1 because extra
|
1125
944
|
// bytes encode len-1.
|
1126
|
-
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
945
|
+
dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1127
946
|
assigned++;
|
1128
947
|
}
|
1129
948
|
|
1130
949
|
// COPY_1_BYTE_OFFSET.
|
1131
950
|
//
|
1132
951
|
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1133
|
-
// offset/256 in
|
952
|
+
// offset/256 in 3 bits. offset%256 is stored in the next byte.
|
1134
953
|
//
|
1135
954
|
// This format is used for length in range [4..11] and offset in
|
1136
955
|
// range [0..2047]
|
1137
|
-
for (
|
1138
|
-
for (
|
1139
|
-
|
1140
|
-
|
956
|
+
for (uint8_t len = 4; len < 12; ++len) {
|
957
|
+
for (uint16_t offset = 0; offset < 2048; offset += 256) {
|
958
|
+
uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
|
959
|
+
dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
|
960
|
+
MakeEntry(1, len, offset_high);
|
1141
961
|
assigned++;
|
1142
962
|
}
|
1143
963
|
}
|
1144
964
|
|
1145
965
|
// COPY_2_BYTE_OFFSET.
|
1146
966
|
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1147
|
-
for (
|
1148
|
-
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
967
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
968
|
+
dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
|
1149
969
|
assigned++;
|
1150
970
|
}
|
1151
971
|
|
1152
972
|
// COPY_4_BYTE_OFFSET.
|
1153
973
|
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1154
|
-
for (
|
1155
|
-
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
974
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
975
|
+
dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
|
1156
976
|
assigned++;
|
1157
977
|
}
|
1158
978
|
|
1159
979
|
// Check that each entry was initialized exactly once.
|
1160
980
|
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1161
|
-
for (int i = 0; i < 256; i
|
981
|
+
for (int i = 0; i < 256; ++i) {
|
1162
982
|
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1163
983
|
}
|
1164
984
|
|
1165
|
-
if (FLAGS_snappy_dump_decompression_table) {
|
1166
|
-
printf("static const
|
1167
|
-
for (int i = 0; i < 256; i
|
1168
|
-
printf("0x%04x%s",
|
1169
|
-
|
1170
|
-
|
985
|
+
if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
|
986
|
+
std::printf("static const uint16_t char_table[256] = {\n ");
|
987
|
+
for (int i = 0; i < 256; ++i) {
|
988
|
+
std::printf("0x%04x%s",
|
989
|
+
dst[i],
|
990
|
+
((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
|
1171
991
|
}
|
1172
|
-
printf("};\n");
|
992
|
+
std::printf("};\n");
|
1173
993
|
}
|
1174
994
|
|
1175
995
|
// Check that computed table matched recorded table.
|
1176
|
-
for (int i = 0; i < 256; i
|
996
|
+
for (int i = 0; i < 256; ++i) {
|
1177
997
|
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1178
998
|
}
|
1179
999
|
}
|
1180
1000
|
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
std::string compressed;
|
1186
|
-
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1187
|
-
|
1188
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1189
|
-
file::Defaults()));
|
1190
|
-
}
|
1191
|
-
|
1192
|
-
static void UncompressFile(const char* fname) {
|
1193
|
-
std::string fullinput;
|
1194
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1195
|
-
|
1196
|
-
size_t uncompLength;
|
1197
|
-
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1198
|
-
|
1199
|
-
std::string uncompressed;
|
1200
|
-
uncompressed.resize(uncompLength);
|
1201
|
-
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1202
|
-
|
1203
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1204
|
-
file::Defaults()));
|
1205
|
-
}
|
1206
|
-
|
1207
|
-
static void MeasureFile(const char* fname) {
|
1208
|
-
std::string fullinput;
|
1209
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1210
|
-
printf("%-40s :\n", fname);
|
1211
|
-
|
1212
|
-
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1213
|
-
int end_len = fullinput.size();
|
1214
|
-
if (FLAGS_end_len >= 0) {
|
1215
|
-
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1216
|
-
}
|
1217
|
-
for (int len = start_len; len <= end_len; len++) {
|
1218
|
-
const char* const input = fullinput.data();
|
1219
|
-
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1220
|
-
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1221
|
-
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1222
|
-
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1223
|
-
|
1224
|
-
// For block-size based measurements
|
1225
|
-
if (0 && FLAGS_snappy) {
|
1226
|
-
Measure(input, len, SNAPPY, repeats, 8<<10);
|
1227
|
-
Measure(input, len, SNAPPY, repeats, 16<<10);
|
1228
|
-
Measure(input, len, SNAPPY, repeats, 32<<10);
|
1229
|
-
Measure(input, len, SNAPPY, repeats, 64<<10);
|
1230
|
-
Measure(input, len, SNAPPY, repeats, 256<<10);
|
1231
|
-
Measure(input, len, SNAPPY, repeats, 1024<<10);
|
1232
|
-
}
|
1001
|
+
TEST(Snappy, TestBenchmarkFiles) {
|
1002
|
+
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
|
1003
|
+
Verify(ReadTestDataFile(kTestDataFiles[i].filename,
|
1004
|
+
kTestDataFiles[i].size_limit));
|
1233
1005
|
}
|
1234
1006
|
}
|
1235
1007
|
|
1236
|
-
|
1237
|
-
const char* label;
|
1238
|
-
const char* filename;
|
1239
|
-
size_t size_limit;
|
1240
|
-
} files[] = {
|
1241
|
-
{ "html", "html", 0 },
|
1242
|
-
{ "urls", "urls.10K", 0 },
|
1243
|
-
{ "jpg", "fireworks.jpeg", 0 },
|
1244
|
-
{ "jpg_200", "fireworks.jpeg", 200 },
|
1245
|
-
{ "pdf", "paper-100k.pdf", 0 },
|
1246
|
-
{ "html4", "html_x_4", 0 },
|
1247
|
-
{ "txt1", "alice29.txt", 0 },
|
1248
|
-
{ "txt2", "asyoulik.txt", 0 },
|
1249
|
-
{ "txt3", "lcet10.txt", 0 },
|
1250
|
-
{ "txt4", "plrabn12.txt", 0 },
|
1251
|
-
{ "pb", "geo.protodata", 0 },
|
1252
|
-
{ "gaviota", "kppkn.gtb", 0 },
|
1253
|
-
};
|
1254
|
-
|
1255
|
-
static void BM_UFlat(int iters, int arg) {
|
1256
|
-
StopBenchmarkTiming();
|
1257
|
-
|
1258
|
-
// Pick file to process based on "arg"
|
1259
|
-
CHECK_GE(arg, 0);
|
1260
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1261
|
-
std::string contents =
|
1262
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1263
|
-
|
1264
|
-
std::string zcontents;
|
1265
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1266
|
-
char* dst = new char[contents.size()];
|
1267
|
-
|
1268
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1269
|
-
static_cast<int64>(contents.size()));
|
1270
|
-
SetBenchmarkLabel(files[arg].label);
|
1271
|
-
StartBenchmarkTiming();
|
1272
|
-
while (iters-- > 0) {
|
1273
|
-
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
1274
|
-
}
|
1275
|
-
StopBenchmarkTiming();
|
1276
|
-
|
1277
|
-
delete[] dst;
|
1278
|
-
}
|
1279
|
-
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1280
|
-
|
1281
|
-
static void BM_UValidate(int iters, int arg) {
|
1282
|
-
StopBenchmarkTiming();
|
1283
|
-
|
1284
|
-
// Pick file to process based on "arg"
|
1285
|
-
CHECK_GE(arg, 0);
|
1286
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1287
|
-
std::string contents =
|
1288
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1289
|
-
|
1290
|
-
std::string zcontents;
|
1291
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1292
|
-
|
1293
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1294
|
-
static_cast<int64>(contents.size()));
|
1295
|
-
SetBenchmarkLabel(files[arg].label);
|
1296
|
-
StartBenchmarkTiming();
|
1297
|
-
while (iters-- > 0) {
|
1298
|
-
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
1299
|
-
}
|
1300
|
-
StopBenchmarkTiming();
|
1301
|
-
}
|
1302
|
-
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
|
1303
|
-
|
1304
|
-
static void BM_UIOVec(int iters, int arg) {
|
1305
|
-
StopBenchmarkTiming();
|
1306
|
-
|
1307
|
-
// Pick file to process based on "arg"
|
1308
|
-
CHECK_GE(arg, 0);
|
1309
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1310
|
-
std::string contents =
|
1311
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1312
|
-
|
1313
|
-
std::string zcontents;
|
1314
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1315
|
-
|
1316
|
-
// Uncompress into an iovec containing ten entries.
|
1317
|
-
const int kNumEntries = 10;
|
1318
|
-
struct iovec iov[kNumEntries];
|
1319
|
-
char *dst = new char[contents.size()];
|
1320
|
-
int used_so_far = 0;
|
1321
|
-
for (int i = 0; i < kNumEntries; ++i) {
|
1322
|
-
iov[i].iov_base = dst + used_so_far;
|
1323
|
-
if (used_so_far == contents.size()) {
|
1324
|
-
iov[i].iov_len = 0;
|
1325
|
-
continue;
|
1326
|
-
}
|
1327
|
-
|
1328
|
-
if (i == kNumEntries - 1) {
|
1329
|
-
iov[i].iov_len = contents.size() - used_so_far;
|
1330
|
-
} else {
|
1331
|
-
iov[i].iov_len = contents.size() / kNumEntries;
|
1332
|
-
}
|
1333
|
-
used_so_far += iov[i].iov_len;
|
1334
|
-
}
|
1335
|
-
|
1336
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1337
|
-
static_cast<int64>(contents.size()));
|
1338
|
-
SetBenchmarkLabel(files[arg].label);
|
1339
|
-
StartBenchmarkTiming();
|
1340
|
-
while (iters-- > 0) {
|
1341
|
-
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
1342
|
-
kNumEntries));
|
1343
|
-
}
|
1344
|
-
StopBenchmarkTiming();
|
1345
|
-
|
1346
|
-
delete[] dst;
|
1347
|
-
}
|
1348
|
-
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1349
|
-
|
1350
|
-
static void BM_UFlatSink(int iters, int arg) {
|
1351
|
-
StopBenchmarkTiming();
|
1352
|
-
|
1353
|
-
// Pick file to process based on "arg"
|
1354
|
-
CHECK_GE(arg, 0);
|
1355
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1356
|
-
std::string contents =
|
1357
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1358
|
-
|
1359
|
-
std::string zcontents;
|
1360
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1361
|
-
char* dst = new char[contents.size()];
|
1362
|
-
|
1363
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1364
|
-
static_cast<int64>(contents.size()));
|
1365
|
-
SetBenchmarkLabel(files[arg].label);
|
1366
|
-
StartBenchmarkTiming();
|
1367
|
-
while (iters-- > 0) {
|
1368
|
-
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1369
|
-
snappy::UncheckedByteArraySink sink(dst);
|
1370
|
-
CHECK(snappy::Uncompress(&source, &sink));
|
1371
|
-
}
|
1372
|
-
StopBenchmarkTiming();
|
1373
|
-
|
1374
|
-
std::string s(dst, contents.size());
|
1375
|
-
CHECK_EQ(contents, s);
|
1376
|
-
|
1377
|
-
delete[] dst;
|
1378
|
-
}
|
1379
|
-
|
1380
|
-
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1381
|
-
|
1382
|
-
static void BM_ZFlat(int iters, int arg) {
|
1383
|
-
StopBenchmarkTiming();
|
1384
|
-
|
1385
|
-
// Pick file to process based on "arg"
|
1386
|
-
CHECK_GE(arg, 0);
|
1387
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
1388
|
-
std::string contents =
|
1389
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1390
|
-
|
1391
|
-
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1392
|
-
|
1393
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1394
|
-
static_cast<int64>(contents.size()));
|
1395
|
-
StartBenchmarkTiming();
|
1396
|
-
|
1397
|
-
size_t zsize = 0;
|
1398
|
-
while (iters-- > 0) {
|
1399
|
-
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
|
1400
|
-
}
|
1401
|
-
StopBenchmarkTiming();
|
1402
|
-
const double compression_ratio =
|
1403
|
-
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1404
|
-
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1405
|
-
100.0 * compression_ratio));
|
1406
|
-
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1407
|
-
files[arg].label, static_cast<int>(contents.size()),
|
1408
|
-
static_cast<int>(zsize));
|
1409
|
-
delete[] dst;
|
1410
|
-
}
|
1411
|
-
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1412
|
-
|
1413
|
-
static void BM_ZFlatAll(int iters, int arg) {
|
1414
|
-
StopBenchmarkTiming();
|
1415
|
-
|
1416
|
-
CHECK_EQ(arg, 0);
|
1417
|
-
const int num_files = ARRAYSIZE(files);
|
1418
|
-
|
1419
|
-
std::vector<std::string> contents(num_files);
|
1420
|
-
std::vector<char*> dst(num_files);
|
1421
|
-
|
1422
|
-
int64 total_contents_size = 0;
|
1423
|
-
for (int i = 0; i < num_files; ++i) {
|
1424
|
-
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1425
|
-
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1426
|
-
total_contents_size += contents[i].size();
|
1427
|
-
}
|
1428
|
-
|
1429
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1430
|
-
StartBenchmarkTiming();
|
1431
|
-
|
1432
|
-
size_t zsize = 0;
|
1433
|
-
while (iters-- > 0) {
|
1434
|
-
for (int i = 0; i < num_files; ++i) {
|
1435
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1436
|
-
&zsize);
|
1437
|
-
}
|
1438
|
-
}
|
1439
|
-
StopBenchmarkTiming();
|
1440
|
-
|
1441
|
-
for (int i = 0; i < num_files; ++i) {
|
1442
|
-
delete[] dst[i];
|
1443
|
-
}
|
1444
|
-
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1445
|
-
}
|
1446
|
-
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1447
|
-
|
1448
|
-
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1449
|
-
StopBenchmarkTiming();
|
1450
|
-
|
1451
|
-
CHECK_EQ(arg, 0);
|
1452
|
-
CHECK_GT(ARRAYSIZE(files), 0);
|
1453
|
-
const std::string base_content =
|
1454
|
-
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1455
|
-
|
1456
|
-
std::vector<std::string> contents;
|
1457
|
-
std::vector<char*> dst;
|
1458
|
-
int64 total_contents_size = 0;
|
1459
|
-
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1460
|
-
++table_bits) {
|
1461
|
-
std::string content = base_content;
|
1462
|
-
content.resize(1 << table_bits);
|
1463
|
-
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1464
|
-
total_contents_size += content.size();
|
1465
|
-
contents.push_back(std::move(content));
|
1466
|
-
}
|
1467
|
-
|
1468
|
-
size_t zsize = 0;
|
1469
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
1470
|
-
StartBenchmarkTiming();
|
1471
|
-
while (iters-- > 0) {
|
1472
|
-
for (int i = 0; i < contents.size(); ++i) {
|
1473
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1474
|
-
&zsize);
|
1475
|
-
}
|
1476
|
-
}
|
1477
|
-
StopBenchmarkTiming();
|
1478
|
-
|
1479
|
-
for (int i = 0; i < dst.size(); ++i) {
|
1480
|
-
delete[] dst[i];
|
1481
|
-
}
|
1482
|
-
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1483
|
-
}
|
1484
|
-
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1008
|
+
} // namespace
|
1485
1009
|
|
1486
1010
|
} // namespace snappy
|
1487
|
-
|
1488
|
-
int main(int argc, char** argv) {
|
1489
|
-
InitGoogle(argv[0], &argc, &argv, true);
|
1490
|
-
RunSpecifiedBenchmarks();
|
1491
|
-
|
1492
|
-
if (argc >= 2) {
|
1493
|
-
for (int arg = 1; arg < argc; arg++) {
|
1494
|
-
if (FLAGS_write_compressed) {
|
1495
|
-
snappy::CompressFile(argv[arg]);
|
1496
|
-
} else if (FLAGS_write_uncompressed) {
|
1497
|
-
snappy::UncompressFile(argv[arg]);
|
1498
|
-
} else {
|
1499
|
-
snappy::MeasureFile(argv[arg]);
|
1500
|
-
}
|
1501
|
-
}
|
1502
|
-
return 0;
|
1503
|
-
}
|
1504
|
-
|
1505
|
-
return RUN_ALL_TESTS();
|
1506
|
-
}
|