snappy 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.github/workflows/publish.yml +7 -13
- data/Dockerfile +1 -1
- data/Gemfile +1 -0
- data/README.md +20 -1
- data/Rakefile +1 -1
- data/ext/extconf.rb +13 -11
- data/lib/snappy/shim.rb +3 -23
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy/writer.rb +1 -1
- data/snappy.gemspec +1 -0
- data/test/snappy_test.rb +29 -4
- data/vendor/snappy/BUILD.bazel +211 -0
- data/vendor/snappy/CMakeLists.txt +176 -31
- data/vendor/snappy/CONTRIBUTING.md +9 -4
- data/vendor/snappy/MODULE.bazel +23 -0
- data/vendor/snappy/NEWS +27 -0
- data/vendor/snappy/README.md +52 -35
- data/vendor/snappy/WORKSPACE +27 -0
- data/vendor/snappy/WORKSPACE.bzlmod +0 -0
- data/vendor/snappy/cmake/config.h.in +30 -23
- data/vendor/snappy/snappy-internal.h +218 -25
- data/vendor/snappy/snappy-sinksource.cc +26 -9
- data/vendor/snappy/snappy-sinksource.h +11 -11
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +231 -306
- data/vendor/snappy/snappy-stubs-public.h.in +0 -11
- data/vendor/snappy/snappy-test.cc +88 -198
- data/vendor/snappy/snappy-test.h +102 -285
- data/vendor/snappy/snappy.cc +1412 -425
- data/vendor/snappy/snappy.h +60 -10
- data/vendor/snappy/snappy_benchmark.cc +398 -0
- data/vendor/snappy/snappy_compress_fuzzer.cc +21 -16
- data/vendor/snappy/snappy_test_data.cc +57 -0
- data/vendor/snappy/snappy_test_data.h +68 -0
- data/vendor/snappy/snappy_test_tool.cc +471 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +3 -2
- data/vendor/snappy/snappy_unittest.cc +183 -666
- metadata +12 -6
|
@@ -26,44 +26,32 @@
|
|
|
26
26
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28
28
|
|
|
29
|
-
#include <math.h>
|
|
30
|
-
#include <stdlib.h>
|
|
31
|
-
|
|
32
29
|
#include <algorithm>
|
|
30
|
+
#include <cinttypes>
|
|
31
|
+
#include <cmath>
|
|
32
|
+
#include <cstdlib>
|
|
33
33
|
#include <random>
|
|
34
34
|
#include <string>
|
|
35
35
|
#include <utility>
|
|
36
36
|
#include <vector>
|
|
37
37
|
|
|
38
|
-
#include "snappy.h"
|
|
39
|
-
#include "snappy-internal.h"
|
|
40
38
|
#include "snappy-test.h"
|
|
39
|
+
|
|
40
|
+
#include "gtest/gtest.h"
|
|
41
|
+
|
|
42
|
+
#include "snappy-internal.h"
|
|
41
43
|
#include "snappy-sinksource.h"
|
|
44
|
+
#include "snappy.h"
|
|
45
|
+
#include "snappy_test_data.h"
|
|
42
46
|
|
|
43
|
-
|
|
44
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
|
45
|
-
DEFINE_int32(end_len, -1,
|
|
46
|
-
"Starting prefix size for testing (-1: just full file contents)");
|
|
47
|
-
DEFINE_int32(bytes, 10485760,
|
|
48
|
-
"How many bytes to compress/uncompress per file for timing");
|
|
49
|
-
|
|
50
|
-
DEFINE_bool(zlib, false,
|
|
51
|
-
"Run zlib compression (http://www.zlib.net)");
|
|
52
|
-
DEFINE_bool(lzo, false,
|
|
53
|
-
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
|
54
|
-
DEFINE_bool(snappy, true, "Run snappy compression");
|
|
55
|
-
|
|
56
|
-
DEFINE_bool(write_compressed, false,
|
|
57
|
-
"Write compressed versions of each file to <file>.comp");
|
|
58
|
-
DEFINE_bool(write_uncompressed, false,
|
|
59
|
-
"Write uncompressed versions of each file to <file>.uncomp");
|
|
60
|
-
|
|
61
|
-
DEFINE_bool(snappy_dump_decompression_table, false,
|
|
47
|
+
SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
|
|
62
48
|
"If true, we print the decompression table during tests.");
|
|
63
49
|
|
|
64
50
|
namespace snappy {
|
|
65
51
|
|
|
66
|
-
|
|
52
|
+
namespace {
|
|
53
|
+
|
|
54
|
+
#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
|
|
67
55
|
|
|
68
56
|
// To test against code that reads beyond its input, this class copies a
|
|
69
57
|
// string to a newly allocated group of pages, the last of which
|
|
@@ -84,7 +72,7 @@ class DataEndingAtUnreadablePage {
|
|
|
84
72
|
CHECK_NE(MAP_FAILED, mem_);
|
|
85
73
|
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
|
|
86
74
|
char* dst = protected_page_ - size;
|
|
87
|
-
memcpy(dst, s.data(), size);
|
|
75
|
+
std::memcpy(dst, s.data(), size);
|
|
88
76
|
data_ = dst;
|
|
89
77
|
size_ = size;
|
|
90
78
|
// Make guard page unreadable.
|
|
@@ -109,256 +97,14 @@ class DataEndingAtUnreadablePage {
|
|
|
109
97
|
size_t size_;
|
|
110
98
|
};
|
|
111
99
|
|
|
112
|
-
#else //
|
|
100
|
+
#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
|
|
113
101
|
|
|
114
102
|
// Fallback for systems without mmap.
|
|
115
103
|
using DataEndingAtUnreadablePage = std::string;
|
|
116
104
|
|
|
117
105
|
#endif
|
|
118
106
|
|
|
119
|
-
|
|
120
|
-
ZLIB, LZO, SNAPPY
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
const char* names[] = {
|
|
124
|
-
"ZLIB", "LZO", "SNAPPY"
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
128
|
-
CompressorType comp) {
|
|
129
|
-
switch (comp) {
|
|
130
|
-
#ifdef ZLIB_VERSION
|
|
131
|
-
case ZLIB:
|
|
132
|
-
return ZLib::MinCompressbufSize(input_size);
|
|
133
|
-
#endif // ZLIB_VERSION
|
|
134
|
-
|
|
135
|
-
#ifdef LZO_VERSION
|
|
136
|
-
case LZO:
|
|
137
|
-
return input_size + input_size/64 + 16 + 3;
|
|
138
|
-
#endif // LZO_VERSION
|
|
139
|
-
|
|
140
|
-
case SNAPPY:
|
|
141
|
-
return snappy::MaxCompressedLength(input_size);
|
|
142
|
-
|
|
143
|
-
default:
|
|
144
|
-
LOG(FATAL) << "Unknown compression type number " << comp;
|
|
145
|
-
return 0;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Returns true if we successfully compressed, false otherwise.
|
|
150
|
-
//
|
|
151
|
-
// If compressed_is_preallocated is set, do not resize the compressed buffer.
|
|
152
|
-
// This is typically what you want for a benchmark, in order to not spend
|
|
153
|
-
// time in the memory allocator. If you do set this flag, however,
|
|
154
|
-
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
|
155
|
-
// number of bytes, and may contain junk bytes at the end after return.
|
|
156
|
-
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
157
|
-
std::string* compressed, bool compressed_is_preallocated) {
|
|
158
|
-
if (!compressed_is_preallocated) {
|
|
159
|
-
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
switch (comp) {
|
|
163
|
-
#ifdef ZLIB_VERSION
|
|
164
|
-
case ZLIB: {
|
|
165
|
-
ZLib zlib;
|
|
166
|
-
uLongf destlen = compressed->size();
|
|
167
|
-
int ret = zlib.Compress(
|
|
168
|
-
reinterpret_cast<Bytef*>(string_as_array(compressed)),
|
|
169
|
-
&destlen,
|
|
170
|
-
reinterpret_cast<const Bytef*>(input),
|
|
171
|
-
input_size);
|
|
172
|
-
CHECK_EQ(Z_OK, ret);
|
|
173
|
-
if (!compressed_is_preallocated) {
|
|
174
|
-
compressed->resize(destlen);
|
|
175
|
-
}
|
|
176
|
-
return true;
|
|
177
|
-
}
|
|
178
|
-
#endif // ZLIB_VERSION
|
|
179
|
-
|
|
180
|
-
#ifdef LZO_VERSION
|
|
181
|
-
case LZO: {
|
|
182
|
-
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
|
|
183
|
-
lzo_uint destlen;
|
|
184
|
-
int ret = lzo1x_1_15_compress(
|
|
185
|
-
reinterpret_cast<const uint8*>(input),
|
|
186
|
-
input_size,
|
|
187
|
-
reinterpret_cast<uint8*>(string_as_array(compressed)),
|
|
188
|
-
&destlen,
|
|
189
|
-
mem);
|
|
190
|
-
CHECK_EQ(LZO_E_OK, ret);
|
|
191
|
-
delete[] mem;
|
|
192
|
-
if (!compressed_is_preallocated) {
|
|
193
|
-
compressed->resize(destlen);
|
|
194
|
-
}
|
|
195
|
-
break;
|
|
196
|
-
}
|
|
197
|
-
#endif // LZO_VERSION
|
|
198
|
-
|
|
199
|
-
case SNAPPY: {
|
|
200
|
-
size_t destlen;
|
|
201
|
-
snappy::RawCompress(input, input_size,
|
|
202
|
-
string_as_array(compressed),
|
|
203
|
-
&destlen);
|
|
204
|
-
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
|
|
205
|
-
if (!compressed_is_preallocated) {
|
|
206
|
-
compressed->resize(destlen);
|
|
207
|
-
}
|
|
208
|
-
break;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
default: {
|
|
212
|
-
return false; // the asked-for library wasn't compiled in
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
return true;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
|
219
|
-
int size, std::string* output) {
|
|
220
|
-
switch (comp) {
|
|
221
|
-
#ifdef ZLIB_VERSION
|
|
222
|
-
case ZLIB: {
|
|
223
|
-
output->resize(size);
|
|
224
|
-
ZLib zlib;
|
|
225
|
-
uLongf destlen = output->size();
|
|
226
|
-
int ret = zlib.Uncompress(
|
|
227
|
-
reinterpret_cast<Bytef*>(string_as_array(output)),
|
|
228
|
-
&destlen,
|
|
229
|
-
reinterpret_cast<const Bytef*>(compressed.data()),
|
|
230
|
-
compressed.size());
|
|
231
|
-
CHECK_EQ(Z_OK, ret);
|
|
232
|
-
CHECK_EQ(static_cast<uLongf>(size), destlen);
|
|
233
|
-
break;
|
|
234
|
-
}
|
|
235
|
-
#endif // ZLIB_VERSION
|
|
236
|
-
|
|
237
|
-
#ifdef LZO_VERSION
|
|
238
|
-
case LZO: {
|
|
239
|
-
output->resize(size);
|
|
240
|
-
lzo_uint destlen;
|
|
241
|
-
int ret = lzo1x_decompress(
|
|
242
|
-
reinterpret_cast<const uint8*>(compressed.data()),
|
|
243
|
-
compressed.size(),
|
|
244
|
-
reinterpret_cast<uint8*>(string_as_array(output)),
|
|
245
|
-
&destlen,
|
|
246
|
-
NULL);
|
|
247
|
-
CHECK_EQ(LZO_E_OK, ret);
|
|
248
|
-
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
|
|
249
|
-
break;
|
|
250
|
-
}
|
|
251
|
-
#endif // LZO_VERSION
|
|
252
|
-
|
|
253
|
-
case SNAPPY: {
|
|
254
|
-
snappy::RawUncompress(compressed.data(), compressed.size(),
|
|
255
|
-
string_as_array(output));
|
|
256
|
-
break;
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
default: {
|
|
260
|
-
return false; // the asked-for library wasn't compiled in
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
return true;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
static void Measure(const char* data,
|
|
267
|
-
size_t length,
|
|
268
|
-
CompressorType comp,
|
|
269
|
-
int repeats,
|
|
270
|
-
int block_size) {
|
|
271
|
-
// Run tests a few time and pick median running times
|
|
272
|
-
static const int kRuns = 5;
|
|
273
|
-
double ctime[kRuns];
|
|
274
|
-
double utime[kRuns];
|
|
275
|
-
int compressed_size = 0;
|
|
276
|
-
|
|
277
|
-
{
|
|
278
|
-
// Chop the input into blocks
|
|
279
|
-
int num_blocks = (length + block_size - 1) / block_size;
|
|
280
|
-
std::vector<const char*> input(num_blocks);
|
|
281
|
-
std::vector<size_t> input_length(num_blocks);
|
|
282
|
-
std::vector<std::string> compressed(num_blocks);
|
|
283
|
-
std::vector<std::string> output(num_blocks);
|
|
284
|
-
for (int b = 0; b < num_blocks; b++) {
|
|
285
|
-
int input_start = b * block_size;
|
|
286
|
-
int input_limit = std::min<int>((b+1)*block_size, length);
|
|
287
|
-
input[b] = data+input_start;
|
|
288
|
-
input_length[b] = input_limit-input_start;
|
|
289
|
-
|
|
290
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
|
291
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
// First, try one trial compression to make sure the code is compiled in
|
|
295
|
-
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
|
|
296
|
-
LOG(WARNING) << "Skipping " << names[comp] << ": "
|
|
297
|
-
<< "library not compiled in";
|
|
298
|
-
return;
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
for (int run = 0; run < kRuns; run++) {
|
|
302
|
-
CycleTimer ctimer, utimer;
|
|
303
|
-
|
|
304
|
-
for (int b = 0; b < num_blocks; b++) {
|
|
305
|
-
// Pre-grow the output buffer so we don't measure string append time.
|
|
306
|
-
compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
ctimer.Start();
|
|
310
|
-
for (int b = 0; b < num_blocks; b++)
|
|
311
|
-
for (int i = 0; i < repeats; i++)
|
|
312
|
-
Compress(input[b], input_length[b], comp, &compressed[b], true);
|
|
313
|
-
ctimer.Stop();
|
|
314
|
-
|
|
315
|
-
// Compress once more, with resizing, so we don't leave junk
|
|
316
|
-
// at the end that will confuse the decompressor.
|
|
317
|
-
for (int b = 0; b < num_blocks; b++) {
|
|
318
|
-
Compress(input[b], input_length[b], comp, &compressed[b], false);
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
for (int b = 0; b < num_blocks; b++) {
|
|
322
|
-
output[b].resize(input_length[b]);
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
utimer.Start();
|
|
326
|
-
for (int i = 0; i < repeats; i++)
|
|
327
|
-
for (int b = 0; b < num_blocks; b++)
|
|
328
|
-
Uncompress(compressed[b], comp, input_length[b], &output[b]);
|
|
329
|
-
utimer.Stop();
|
|
330
|
-
|
|
331
|
-
ctime[run] = ctimer.Get();
|
|
332
|
-
utime[run] = utimer.Get();
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
compressed_size = 0;
|
|
336
|
-
for (size_t i = 0; i < compressed.size(); i++) {
|
|
337
|
-
compressed_size += compressed[i].size();
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
std::sort(ctime, ctime + kRuns);
|
|
342
|
-
std::sort(utime, utime + kRuns);
|
|
343
|
-
const int med = kRuns/2;
|
|
344
|
-
|
|
345
|
-
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
|
346
|
-
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
|
347
|
-
std::string x = names[comp];
|
|
348
|
-
x += ":";
|
|
349
|
-
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
|
350
|
-
: std::string("?");
|
|
351
|
-
printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
|
352
|
-
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
|
353
|
-
x.c_str(),
|
|
354
|
-
block_size/(1<<20),
|
|
355
|
-
static_cast<int>(length), static_cast<uint32>(compressed_size),
|
|
356
|
-
(compressed_size * 100.0) / std::max<int>(1, length),
|
|
357
|
-
comp_rate,
|
|
358
|
-
urate.c_str());
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
static int VerifyString(const std::string& input) {
|
|
107
|
+
int VerifyString(const std::string& input) {
|
|
362
108
|
std::string compressed;
|
|
363
109
|
DataEndingAtUnreadablePage i(input);
|
|
364
110
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
|
@@ -374,7 +120,7 @@ static int VerifyString(const std::string& input) {
|
|
|
374
120
|
return uncompressed.size();
|
|
375
121
|
}
|
|
376
122
|
|
|
377
|
-
|
|
123
|
+
void VerifyStringSink(const std::string& input) {
|
|
378
124
|
std::string compressed;
|
|
379
125
|
DataEndingAtUnreadablePage i(input);
|
|
380
126
|
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
|
@@ -392,26 +138,15 @@ static void VerifyStringSink(const std::string& input) {
|
|
|
392
138
|
CHECK_EQ(uncompressed, input);
|
|
393
139
|
}
|
|
394
140
|
|
|
395
|
-
|
|
396
|
-
std::string compressed;
|
|
397
|
-
DataEndingAtUnreadablePage i(input);
|
|
398
|
-
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
|
399
|
-
CHECK_EQ(written, compressed.size());
|
|
400
|
-
CHECK_LE(compressed.size(),
|
|
401
|
-
snappy::MaxCompressedLength(input.size()));
|
|
402
|
-
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
|
403
|
-
|
|
404
|
-
// Try uncompressing into an iovec containing a random number of entries
|
|
405
|
-
// ranging from 1 to 10.
|
|
406
|
-
char* buf = new char[input.size()];
|
|
141
|
+
struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
|
|
407
142
|
std::minstd_rand0 rng(input.size());
|
|
408
143
|
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
|
409
|
-
|
|
144
|
+
num = uniform_1_to_10(rng);
|
|
410
145
|
if (input.size() < num) {
|
|
411
146
|
num = input.size();
|
|
412
147
|
}
|
|
413
148
|
struct iovec* iov = new iovec[num];
|
|
414
|
-
|
|
149
|
+
size_t used_so_far = 0;
|
|
415
150
|
std::bernoulli_distribution one_in_five(1.0 / 5);
|
|
416
151
|
for (size_t i = 0; i < num; ++i) {
|
|
417
152
|
assert(used_so_far < input.size());
|
|
@@ -430,8 +165,40 @@ static void VerifyIOVec(const std::string& input) {
|
|
|
430
165
|
}
|
|
431
166
|
used_so_far += iov[i].iov_len;
|
|
432
167
|
}
|
|
433
|
-
|
|
434
|
-
|
|
168
|
+
return iov;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
int VerifyIOVecSource(const std::string& input) {
|
|
172
|
+
std::string compressed;
|
|
173
|
+
std::string copy = input;
|
|
174
|
+
char* buf = const_cast<char*>(copy.data());
|
|
175
|
+
size_t num = 0;
|
|
176
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
|
177
|
+
const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
|
|
178
|
+
CHECK_EQ(written, compressed.size());
|
|
179
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
|
180
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
|
181
|
+
|
|
182
|
+
std::string uncompressed;
|
|
183
|
+
DataEndingAtUnreadablePage c(compressed);
|
|
184
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
|
185
|
+
CHECK_EQ(uncompressed, input);
|
|
186
|
+
delete[] iov;
|
|
187
|
+
return uncompressed.size();
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
void VerifyIOVecSink(const std::string& input) {
|
|
191
|
+
std::string compressed;
|
|
192
|
+
DataEndingAtUnreadablePage i(input);
|
|
193
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
|
194
|
+
CHECK_EQ(written, compressed.size());
|
|
195
|
+
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
|
|
196
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
|
197
|
+
char* buf = new char[input.size()];
|
|
198
|
+
size_t num = 0;
|
|
199
|
+
struct iovec* iov = GetIOVec(input, buf, num);
|
|
200
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
|
|
201
|
+
num));
|
|
435
202
|
CHECK(!memcmp(buf, input.data(), input.size()));
|
|
436
203
|
delete[] iov;
|
|
437
204
|
delete[] buf;
|
|
@@ -439,7 +206,7 @@ static void VerifyIOVec(const std::string& input) {
|
|
|
439
206
|
|
|
440
207
|
// Test that data compressed by a compressor that does not
|
|
441
208
|
// obey block sizes is uncompressed properly.
|
|
442
|
-
|
|
209
|
+
void VerifyNonBlockedCompression(const std::string& input) {
|
|
443
210
|
if (input.length() > snappy::kBlockSize) {
|
|
444
211
|
// We cannot test larger blocks than the maximum block size, obviously.
|
|
445
212
|
return;
|
|
@@ -451,7 +218,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
|
451
218
|
// Setup compression table
|
|
452
219
|
snappy::internal::WorkingMemory wmem(input.size());
|
|
453
220
|
int table_size;
|
|
454
|
-
|
|
221
|
+
uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
|
|
455
222
|
|
|
456
223
|
// Compress entire input in one shot
|
|
457
224
|
std::string compressed;
|
|
@@ -481,7 +248,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
|
481
248
|
struct iovec vec[kNumBlocks];
|
|
482
249
|
const int block_size = 1 + input.size() / kNumBlocks;
|
|
483
250
|
std::string iovec_data(block_size * kNumBlocks, 'x');
|
|
484
|
-
for (int i = 0; i < kNumBlocks; i
|
|
251
|
+
for (int i = 0; i < kNumBlocks; ++i) {
|
|
485
252
|
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
|
486
253
|
vec[i].iov_len = block_size;
|
|
487
254
|
}
|
|
@@ -492,7 +259,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
|
|
|
492
259
|
}
|
|
493
260
|
|
|
494
261
|
// Expand the input so that it is at least K times as big as block size
|
|
495
|
-
|
|
262
|
+
std::string Expand(const std::string& input) {
|
|
496
263
|
static const int K = 3;
|
|
497
264
|
std::string data = input;
|
|
498
265
|
while (data.size() < K * snappy::kBlockSize) {
|
|
@@ -501,30 +268,33 @@ static std::string Expand(const std::string& input) {
|
|
|
501
268
|
return data;
|
|
502
269
|
}
|
|
503
270
|
|
|
504
|
-
|
|
271
|
+
int Verify(const std::string& input) {
|
|
505
272
|
VLOG(1) << "Verifying input of size " << input.size();
|
|
506
273
|
|
|
507
274
|
// Compress using string based routines
|
|
508
275
|
const int result = VerifyString(input);
|
|
509
276
|
|
|
277
|
+
// Compress using `iovec`-based routines.
|
|
278
|
+
CHECK_EQ(VerifyIOVecSource(input), result);
|
|
279
|
+
|
|
510
280
|
// Verify using sink based routines
|
|
511
281
|
VerifyStringSink(input);
|
|
512
282
|
|
|
513
283
|
VerifyNonBlockedCompression(input);
|
|
514
|
-
|
|
284
|
+
VerifyIOVecSink(input);
|
|
515
285
|
if (!input.empty()) {
|
|
516
286
|
const std::string expanded = Expand(input);
|
|
517
287
|
VerifyNonBlockedCompression(expanded);
|
|
518
|
-
|
|
288
|
+
VerifyIOVecSink(input);
|
|
519
289
|
}
|
|
520
290
|
|
|
521
291
|
return result;
|
|
522
292
|
}
|
|
523
293
|
|
|
524
|
-
|
|
294
|
+
bool IsValidCompressedBuffer(const std::string& c) {
|
|
525
295
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
|
526
296
|
}
|
|
527
|
-
|
|
297
|
+
bool Uncompress(const std::string& c, std::string* u) {
|
|
528
298
|
return snappy::Uncompress(c.data(), c.size(), u);
|
|
529
299
|
}
|
|
530
300
|
|
|
@@ -549,8 +319,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
|
549
319
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
|
550
320
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
|
551
321
|
source.resize(100000);
|
|
552
|
-
for (
|
|
553
|
-
|
|
322
|
+
for (char& source_char : source) {
|
|
323
|
+
source_char = 'A';
|
|
554
324
|
}
|
|
555
325
|
snappy::Compress(source.data(), source.size(), &dest);
|
|
556
326
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
|
@@ -588,7 +358,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
|
588
358
|
size_t ulen;
|
|
589
359
|
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
|
590
360
|
|| (ulen < (1<<20)));
|
|
591
|
-
|
|
361
|
+
uint32_t ulen2;
|
|
592
362
|
snappy::ByteArraySource source(data.data(), data.size());
|
|
593
363
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
|
594
364
|
(ulen2 < (1<<20)));
|
|
@@ -601,7 +371,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
|
|
|
601
371
|
// These mirror the compression code in snappy.cc, but are copied
|
|
602
372
|
// here so that we can bypass some limitations in the how snappy.cc
|
|
603
373
|
// invokes these routines.
|
|
604
|
-
|
|
374
|
+
void AppendLiteral(std::string* dst, const std::string& literal) {
|
|
605
375
|
if (literal.empty()) return;
|
|
606
376
|
int n = literal.size() - 1;
|
|
607
377
|
if (n < 60) {
|
|
@@ -621,7 +391,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) {
|
|
|
621
391
|
*dst += literal;
|
|
622
392
|
}
|
|
623
393
|
|
|
624
|
-
|
|
394
|
+
void AppendCopy(std::string* dst, int offset, int length) {
|
|
625
395
|
while (length > 0) {
|
|
626
396
|
// Figure out how much to copy in one shot
|
|
627
397
|
int to_copy;
|
|
@@ -665,6 +435,41 @@ TEST(Snappy, SimpleTests) {
|
|
|
665
435
|
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
|
666
436
|
}
|
|
667
437
|
|
|
438
|
+
// Regression test for cr/345340892.
|
|
439
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
|
|
440
|
+
Verify("abcabcabcabcabcabcab");
|
|
441
|
+
Verify("abcabcabcabcabcabcab0123456789ABCDEF");
|
|
442
|
+
|
|
443
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
|
|
444
|
+
Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Regression test for cr/345340892.
|
|
448
|
+
TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
|
|
449
|
+
std::mt19937 rng;
|
|
450
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
|
451
|
+
for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
|
|
452
|
+
for (int length = 1; length <= 64; ++length) {
|
|
453
|
+
for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
|
|
454
|
+
const int size = pattern_size + length + extra_bytes_after_pattern;
|
|
455
|
+
std::string input;
|
|
456
|
+
input.resize(size);
|
|
457
|
+
for (int i = 0; i < pattern_size; ++i) {
|
|
458
|
+
input[i] = 'a' + i;
|
|
459
|
+
}
|
|
460
|
+
for (int i = 0; i < length; ++i) {
|
|
461
|
+
input[pattern_size + i] = input[i];
|
|
462
|
+
}
|
|
463
|
+
for (int i = 0; i < extra_bytes_after_pattern; ++i) {
|
|
464
|
+
input[pattern_size + length + i] =
|
|
465
|
+
static_cast<char>(uniform_byte(rng));
|
|
466
|
+
}
|
|
467
|
+
Verify(input);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
668
473
|
// Verify max blowup (lots of four-byte copies)
|
|
669
474
|
TEST(Snappy, MaxBlowup) {
|
|
670
475
|
std::mt19937 rng;
|
|
@@ -680,8 +485,20 @@ TEST(Snappy, MaxBlowup) {
|
|
|
680
485
|
Verify(input);
|
|
681
486
|
}
|
|
682
487
|
|
|
488
|
+
// Issue #201, when output is more than 4GB, we had a data corruption bug.
|
|
489
|
+
// We cannot run this test always because of CI constraints.
|
|
490
|
+
TEST(Snappy, DISABLED_MoreThan4GB) {
|
|
491
|
+
std::mt19937 rng;
|
|
492
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
|
493
|
+
std::string input;
|
|
494
|
+
input.resize((1ull << 32) - 1);
|
|
495
|
+
for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i)
|
|
496
|
+
input[i] = static_cast<char>(uniform_byte(rng));
|
|
497
|
+
Verify(input);
|
|
498
|
+
}
|
|
499
|
+
|
|
683
500
|
TEST(Snappy, RandomData) {
|
|
684
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
|
501
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
|
685
502
|
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
|
686
503
|
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
|
687
504
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
|
@@ -690,7 +507,7 @@ TEST(Snappy, RandomData) {
|
|
|
690
507
|
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
|
691
508
|
|
|
692
509
|
constexpr int num_ops = 20000;
|
|
693
|
-
for (int i = 0; i < num_ops; i
|
|
510
|
+
for (int i = 0; i < num_ops; ++i) {
|
|
694
511
|
if ((i % 1000) == 0) {
|
|
695
512
|
VLOG(0) << "Random op " << i << " of " << num_ops;
|
|
696
513
|
}
|
|
@@ -738,14 +555,14 @@ TEST(Snappy, FourByteOffset) {
|
|
|
738
555
|
// How many times each fragment is emitted.
|
|
739
556
|
const int n1 = 2;
|
|
740
557
|
const int n2 = 100000 / fragment2.size();
|
|
741
|
-
const
|
|
558
|
+
const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
|
|
742
559
|
|
|
743
560
|
std::string compressed;
|
|
744
561
|
Varint::Append32(&compressed, length);
|
|
745
562
|
|
|
746
563
|
AppendLiteral(&compressed, fragment1);
|
|
747
564
|
std::string src = fragment1;
|
|
748
|
-
for (int i = 0; i < n2; i
|
|
565
|
+
for (int i = 0; i < n2; ++i) {
|
|
749
566
|
AppendLiteral(&compressed, fragment2);
|
|
750
567
|
src += fragment2;
|
|
751
568
|
}
|
|
@@ -760,7 +577,27 @@ TEST(Snappy, FourByteOffset) {
|
|
|
760
577
|
CHECK_EQ(uncompressed, src);
|
|
761
578
|
}
|
|
762
579
|
|
|
763
|
-
TEST(Snappy,
|
|
580
|
+
TEST(Snappy, IOVecSourceEdgeCases) {
|
|
581
|
+
// Validate that empty leading, trailing, and in-between iovecs are handled:
|
|
582
|
+
// [] [] ['a'] [] ['b'] [].
|
|
583
|
+
std::string data = "ab";
|
|
584
|
+
char* buf = const_cast<char*>(data.data());
|
|
585
|
+
size_t used_so_far = 0;
|
|
586
|
+
static const int kLengths[] = {0, 0, 1, 0, 1, 0};
|
|
587
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
|
588
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
|
589
|
+
iov[i].iov_base = buf + used_so_far;
|
|
590
|
+
iov[i].iov_len = kLengths[i];
|
|
591
|
+
used_so_far += kLengths[i];
|
|
592
|
+
}
|
|
593
|
+
std::string compressed;
|
|
594
|
+
snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
|
|
595
|
+
std::string uncompressed;
|
|
596
|
+
snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
|
|
597
|
+
CHECK_EQ(data, uncompressed);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
TEST(Snappy, IOVecSinkEdgeCases) {
|
|
764
601
|
// Test some tricky edge cases in the iovec output that are not necessarily
|
|
765
602
|
// exercised by random tests.
|
|
766
603
|
|
|
@@ -872,14 +709,13 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
|
872
709
|
}
|
|
873
710
|
}
|
|
874
711
|
|
|
875
|
-
|
|
876
|
-
size_t* ulength) {
|
|
712
|
+
bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
|
|
877
713
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
|
878
714
|
compressed.size(),
|
|
879
715
|
ulength);
|
|
880
716
|
|
|
881
717
|
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
|
882
|
-
|
|
718
|
+
uint32_t length;
|
|
883
719
|
const bool result2 = snappy::GetUncompressedLength(&source, &length);
|
|
884
720
|
CHECK_EQ(result1, result2);
|
|
885
721
|
return result1;
|
|
@@ -954,17 +790,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
|
954
790
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
|
955
791
|
}
|
|
956
792
|
|
|
957
|
-
namespace {
|
|
958
|
-
|
|
959
793
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
|
794
|
+
uint64_t data;
|
|
960
795
|
std::pair<size_t, bool> p =
|
|
961
|
-
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
|
796
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
|
|
962
797
|
CHECK_EQ(p.first < 8, p.second);
|
|
963
798
|
return p.first;
|
|
964
799
|
}
|
|
965
800
|
|
|
966
|
-
} // namespace
|
|
967
|
-
|
|
968
801
|
TEST(Snappy, FindMatchLength) {
|
|
969
802
|
// Exercise all different code paths through the function.
|
|
970
803
|
// 64-bit version:
|
|
@@ -1058,12 +891,12 @@ TEST(Snappy, FindMatchLength) {
|
|
|
1058
891
|
TEST(Snappy, FindMatchLengthRandom) {
|
|
1059
892
|
constexpr int kNumTrials = 10000;
|
|
1060
893
|
constexpr int kTypicalLength = 10;
|
|
1061
|
-
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
|
894
|
+
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
|
|
1062
895
|
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
|
1063
896
|
std::bernoulli_distribution one_in_two(1.0 / 2);
|
|
1064
897
|
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
|
1065
898
|
|
|
1066
|
-
for (int i = 0; i < kNumTrials; i
|
|
899
|
+
for (int i = 0; i < kNumTrials; ++i) {
|
|
1067
900
|
std::string s, t;
|
|
1068
901
|
char a = static_cast<char>(uniform_byte(rng));
|
|
1069
902
|
char b = static_cast<char>(uniform_byte(rng));
|
|
@@ -1073,21 +906,20 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
|
1073
906
|
}
|
|
1074
907
|
DataEndingAtUnreadablePage u(s);
|
|
1075
908
|
DataEndingAtUnreadablePage v(t);
|
|
1076
|
-
|
|
909
|
+
size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
|
1077
910
|
if (matched == t.size()) {
|
|
1078
911
|
EXPECT_EQ(s, t);
|
|
1079
912
|
} else {
|
|
1080
913
|
EXPECT_NE(s[matched], t[matched]);
|
|
1081
|
-
for (
|
|
914
|
+
for (size_t j = 0; j < matched; ++j) {
|
|
1082
915
|
EXPECT_EQ(s[j], t[j]);
|
|
1083
916
|
}
|
|
1084
917
|
}
|
|
1085
918
|
}
|
|
1086
919
|
}
|
|
1087
920
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
unsigned int copy_offset) {
|
|
921
|
+
uint16_t MakeEntry(unsigned int extra, unsigned int len,
|
|
922
|
+
unsigned int copy_offset) {
|
|
1091
923
|
// Check that all of the fields fit within the allocated space
|
|
1092
924
|
assert(extra == (extra & 0x7)); // At most 3 bits
|
|
1093
925
|
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
|
@@ -1104,403 +936,88 @@ TEST(Snappy, VerifyCharTable) {
|
|
|
1104
936
|
using snappy::internal::COPY_4_BYTE_OFFSET;
|
|
1105
937
|
using snappy::internal::char_table;
|
|
1106
938
|
|
|
1107
|
-
|
|
939
|
+
uint16_t dst[256];
|
|
1108
940
|
|
|
1109
941
|
// Place invalid entries in all places to detect missing initialization
|
|
1110
942
|
int assigned = 0;
|
|
1111
|
-
for (int i = 0; i < 256; i
|
|
943
|
+
for (int i = 0; i < 256; ++i) {
|
|
1112
944
|
dst[i] = 0xffff;
|
|
1113
945
|
}
|
|
1114
946
|
|
|
1115
947
|
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
|
1116
|
-
for (
|
|
1117
|
-
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
|
948
|
+
for (uint8_t len = 1; len <= 60; ++len) {
|
|
949
|
+
dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
|
|
1118
950
|
assigned++;
|
|
1119
951
|
}
|
|
1120
952
|
|
|
1121
953
|
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
|
1122
954
|
// encode the number of bytes of length info that follow the opcode.
|
|
1123
|
-
for (
|
|
955
|
+
for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
|
|
1124
956
|
// We set the length field in the lookup table to 1 because extra
|
|
1125
957
|
// bytes encode len-1.
|
|
1126
|
-
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
|
958
|
+
dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
|
1127
959
|
assigned++;
|
|
1128
960
|
}
|
|
1129
961
|
|
|
1130
962
|
// COPY_1_BYTE_OFFSET.
|
|
1131
963
|
//
|
|
1132
964
|
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
|
1133
|
-
// offset/256 in
|
|
965
|
+
// offset/256 in 3 bits. offset%256 is stored in the next byte.
|
|
1134
966
|
//
|
|
1135
967
|
// This format is used for length in range [4..11] and offset in
|
|
1136
968
|
// range [0..2047]
|
|
1137
|
-
for (
|
|
1138
|
-
for (
|
|
1139
|
-
|
|
1140
|
-
|
|
969
|
+
for (uint8_t len = 4; len < 12; ++len) {
|
|
970
|
+
for (uint16_t offset = 0; offset < 2048; offset += 256) {
|
|
971
|
+
uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
|
|
972
|
+
dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
|
|
973
|
+
MakeEntry(1, len, offset_high);
|
|
1141
974
|
assigned++;
|
|
1142
975
|
}
|
|
1143
976
|
}
|
|
1144
977
|
|
|
1145
978
|
// COPY_2_BYTE_OFFSET.
|
|
1146
979
|
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
|
1147
|
-
for (
|
|
1148
|
-
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
|
980
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
|
981
|
+
dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
|
|
1149
982
|
assigned++;
|
|
1150
983
|
}
|
|
1151
984
|
|
|
1152
985
|
// COPY_4_BYTE_OFFSET.
|
|
1153
986
|
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
|
1154
|
-
for (
|
|
1155
|
-
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
|
987
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
|
988
|
+
dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
|
|
1156
989
|
assigned++;
|
|
1157
990
|
}
|
|
1158
991
|
|
|
1159
992
|
// Check that each entry was initialized exactly once.
|
|
1160
993
|
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
|
1161
|
-
for (int i = 0; i < 256; i
|
|
994
|
+
for (int i = 0; i < 256; ++i) {
|
|
1162
995
|
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
|
1163
996
|
}
|
|
1164
997
|
|
|
1165
|
-
if (FLAGS_snappy_dump_decompression_table) {
|
|
1166
|
-
printf("static const
|
|
1167
|
-
for (int i = 0; i < 256; i
|
|
1168
|
-
printf("0x%04x%s",
|
|
1169
|
-
|
|
1170
|
-
|
|
998
|
+
if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
|
|
999
|
+
std::printf("static const uint16_t char_table[256] = {\n ");
|
|
1000
|
+
for (int i = 0; i < 256; ++i) {
|
|
1001
|
+
std::printf("0x%04x%s",
|
|
1002
|
+
dst[i],
|
|
1003
|
+
((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
|
|
1171
1004
|
}
|
|
1172
|
-
printf("};\n");
|
|
1005
|
+
std::printf("};\n");
|
|
1173
1006
|
}
|
|
1174
1007
|
|
|
1175
1008
|
// Check that computed table matched recorded table.
|
|
1176
|
-
for (int i = 0; i < 256; i
|
|
1009
|
+
for (int i = 0; i < 256; ++i) {
|
|
1177
1010
|
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
|
1178
1011
|
}
|
|
1179
1012
|
}
|
|
1180
1013
|
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
std::string compressed;
|
|
1186
|
-
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
|
1187
|
-
|
|
1188
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
|
1189
|
-
file::Defaults()));
|
|
1190
|
-
}
|
|
1191
|
-
|
|
1192
|
-
static void UncompressFile(const char* fname) {
|
|
1193
|
-
std::string fullinput;
|
|
1194
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
|
1195
|
-
|
|
1196
|
-
size_t uncompLength;
|
|
1197
|
-
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
|
1198
|
-
|
|
1199
|
-
std::string uncompressed;
|
|
1200
|
-
uncompressed.resize(uncompLength);
|
|
1201
|
-
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
|
1202
|
-
|
|
1203
|
-
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
|
1204
|
-
file::Defaults()));
|
|
1205
|
-
}
|
|
1206
|
-
|
|
1207
|
-
static void MeasureFile(const char* fname) {
|
|
1208
|
-
std::string fullinput;
|
|
1209
|
-
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
|
1210
|
-
printf("%-40s :\n", fname);
|
|
1211
|
-
|
|
1212
|
-
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
|
1213
|
-
int end_len = fullinput.size();
|
|
1214
|
-
if (FLAGS_end_len >= 0) {
|
|
1215
|
-
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
|
1216
|
-
}
|
|
1217
|
-
for (int len = start_len; len <= end_len; len++) {
|
|
1218
|
-
const char* const input = fullinput.data();
|
|
1219
|
-
int repeats = (FLAGS_bytes + len) / (len + 1);
|
|
1220
|
-
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
|
1221
|
-
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
|
1222
|
-
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
|
1223
|
-
|
|
1224
|
-
// For block-size based measurements
|
|
1225
|
-
if (0 && FLAGS_snappy) {
|
|
1226
|
-
Measure(input, len, SNAPPY, repeats, 8<<10);
|
|
1227
|
-
Measure(input, len, SNAPPY, repeats, 16<<10);
|
|
1228
|
-
Measure(input, len, SNAPPY, repeats, 32<<10);
|
|
1229
|
-
Measure(input, len, SNAPPY, repeats, 64<<10);
|
|
1230
|
-
Measure(input, len, SNAPPY, repeats, 256<<10);
|
|
1231
|
-
Measure(input, len, SNAPPY, repeats, 1024<<10);
|
|
1232
|
-
}
|
|
1233
|
-
}
|
|
1234
|
-
}
|
|
1235
|
-
|
|
1236
|
-
static struct {
|
|
1237
|
-
const char* label;
|
|
1238
|
-
const char* filename;
|
|
1239
|
-
size_t size_limit;
|
|
1240
|
-
} files[] = {
|
|
1241
|
-
{ "html", "html", 0 },
|
|
1242
|
-
{ "urls", "urls.10K", 0 },
|
|
1243
|
-
{ "jpg", "fireworks.jpeg", 0 },
|
|
1244
|
-
{ "jpg_200", "fireworks.jpeg", 200 },
|
|
1245
|
-
{ "pdf", "paper-100k.pdf", 0 },
|
|
1246
|
-
{ "html4", "html_x_4", 0 },
|
|
1247
|
-
{ "txt1", "alice29.txt", 0 },
|
|
1248
|
-
{ "txt2", "asyoulik.txt", 0 },
|
|
1249
|
-
{ "txt3", "lcet10.txt", 0 },
|
|
1250
|
-
{ "txt4", "plrabn12.txt", 0 },
|
|
1251
|
-
{ "pb", "geo.protodata", 0 },
|
|
1252
|
-
{ "gaviota", "kppkn.gtb", 0 },
|
|
1253
|
-
};
|
|
1254
|
-
|
|
1255
|
-
static void BM_UFlat(int iters, int arg) {
|
|
1256
|
-
StopBenchmarkTiming();
|
|
1257
|
-
|
|
1258
|
-
// Pick file to process based on "arg"
|
|
1259
|
-
CHECK_GE(arg, 0);
|
|
1260
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
|
1261
|
-
std::string contents =
|
|
1262
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
|
1263
|
-
|
|
1264
|
-
std::string zcontents;
|
|
1265
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
|
1266
|
-
char* dst = new char[contents.size()];
|
|
1267
|
-
|
|
1268
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
|
1269
|
-
static_cast<int64>(contents.size()));
|
|
1270
|
-
SetBenchmarkLabel(files[arg].label);
|
|
1271
|
-
StartBenchmarkTiming();
|
|
1272
|
-
while (iters-- > 0) {
|
|
1273
|
-
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
|
1274
|
-
}
|
|
1275
|
-
StopBenchmarkTiming();
|
|
1276
|
-
|
|
1277
|
-
delete[] dst;
|
|
1278
|
-
}
|
|
1279
|
-
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
|
1280
|
-
|
|
1281
|
-
static void BM_UValidate(int iters, int arg) {
|
|
1282
|
-
StopBenchmarkTiming();
|
|
1283
|
-
|
|
1284
|
-
// Pick file to process based on "arg"
|
|
1285
|
-
CHECK_GE(arg, 0);
|
|
1286
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
|
1287
|
-
std::string contents =
|
|
1288
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
|
1289
|
-
|
|
1290
|
-
std::string zcontents;
|
|
1291
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
|
1292
|
-
|
|
1293
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
|
1294
|
-
static_cast<int64>(contents.size()));
|
|
1295
|
-
SetBenchmarkLabel(files[arg].label);
|
|
1296
|
-
StartBenchmarkTiming();
|
|
1297
|
-
while (iters-- > 0) {
|
|
1298
|
-
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
|
1299
|
-
}
|
|
1300
|
-
StopBenchmarkTiming();
|
|
1301
|
-
}
|
|
1302
|
-
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
|
|
1303
|
-
|
|
1304
|
-
static void BM_UIOVec(int iters, int arg) {
|
|
1305
|
-
StopBenchmarkTiming();
|
|
1306
|
-
|
|
1307
|
-
// Pick file to process based on "arg"
|
|
1308
|
-
CHECK_GE(arg, 0);
|
|
1309
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
|
1310
|
-
std::string contents =
|
|
1311
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
|
1312
|
-
|
|
1313
|
-
std::string zcontents;
|
|
1314
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
|
1315
|
-
|
|
1316
|
-
// Uncompress into an iovec containing ten entries.
|
|
1317
|
-
const int kNumEntries = 10;
|
|
1318
|
-
struct iovec iov[kNumEntries];
|
|
1319
|
-
char *dst = new char[contents.size()];
|
|
1320
|
-
int used_so_far = 0;
|
|
1321
|
-
for (int i = 0; i < kNumEntries; ++i) {
|
|
1322
|
-
iov[i].iov_base = dst + used_so_far;
|
|
1323
|
-
if (used_so_far == contents.size()) {
|
|
1324
|
-
iov[i].iov_len = 0;
|
|
1325
|
-
continue;
|
|
1326
|
-
}
|
|
1327
|
-
|
|
1328
|
-
if (i == kNumEntries - 1) {
|
|
1329
|
-
iov[i].iov_len = contents.size() - used_so_far;
|
|
1330
|
-
} else {
|
|
1331
|
-
iov[i].iov_len = contents.size() / kNumEntries;
|
|
1332
|
-
}
|
|
1333
|
-
used_so_far += iov[i].iov_len;
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
|
1337
|
-
static_cast<int64>(contents.size()));
|
|
1338
|
-
SetBenchmarkLabel(files[arg].label);
|
|
1339
|
-
StartBenchmarkTiming();
|
|
1340
|
-
while (iters-- > 0) {
|
|
1341
|
-
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
|
1342
|
-
kNumEntries));
|
|
1343
|
-
}
|
|
1344
|
-
StopBenchmarkTiming();
|
|
1345
|
-
|
|
1346
|
-
delete[] dst;
|
|
1347
|
-
}
|
|
1348
|
-
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
|
1349
|
-
|
|
1350
|
-
static void BM_UFlatSink(int iters, int arg) {
|
|
1351
|
-
StopBenchmarkTiming();
|
|
1352
|
-
|
|
1353
|
-
// Pick file to process based on "arg"
|
|
1354
|
-
CHECK_GE(arg, 0);
|
|
1355
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
|
1356
|
-
std::string contents =
|
|
1357
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
|
1358
|
-
|
|
1359
|
-
std::string zcontents;
|
|
1360
|
-
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
|
1361
|
-
char* dst = new char[contents.size()];
|
|
1362
|
-
|
|
1363
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
|
1364
|
-
static_cast<int64>(contents.size()));
|
|
1365
|
-
SetBenchmarkLabel(files[arg].label);
|
|
1366
|
-
StartBenchmarkTiming();
|
|
1367
|
-
while (iters-- > 0) {
|
|
1368
|
-
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
|
1369
|
-
snappy::UncheckedByteArraySink sink(dst);
|
|
1370
|
-
CHECK(snappy::Uncompress(&source, &sink));
|
|
1371
|
-
}
|
|
1372
|
-
StopBenchmarkTiming();
|
|
1373
|
-
|
|
1374
|
-
std::string s(dst, contents.size());
|
|
1375
|
-
CHECK_EQ(contents, s);
|
|
1376
|
-
|
|
1377
|
-
delete[] dst;
|
|
1378
|
-
}
|
|
1379
|
-
|
|
1380
|
-
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
|
1381
|
-
|
|
1382
|
-
static void BM_ZFlat(int iters, int arg) {
|
|
1383
|
-
StopBenchmarkTiming();
|
|
1384
|
-
|
|
1385
|
-
// Pick file to process based on "arg"
|
|
1386
|
-
CHECK_GE(arg, 0);
|
|
1387
|
-
CHECK_LT(arg, ARRAYSIZE(files));
|
|
1388
|
-
std::string contents =
|
|
1389
|
-
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
|
1390
|
-
|
|
1391
|
-
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
|
1392
|
-
|
|
1393
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
|
1394
|
-
static_cast<int64>(contents.size()));
|
|
1395
|
-
StartBenchmarkTiming();
|
|
1396
|
-
|
|
1397
|
-
size_t zsize = 0;
|
|
1398
|
-
while (iters-- > 0) {
|
|
1399
|
-
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
|
|
1400
|
-
}
|
|
1401
|
-
StopBenchmarkTiming();
|
|
1402
|
-
const double compression_ratio =
|
|
1403
|
-
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
|
1404
|
-
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
|
1405
|
-
100.0 * compression_ratio));
|
|
1406
|
-
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
|
1407
|
-
files[arg].label, static_cast<int>(contents.size()),
|
|
1408
|
-
static_cast<int>(zsize));
|
|
1409
|
-
delete[] dst;
|
|
1410
|
-
}
|
|
1411
|
-
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
|
1412
|
-
|
|
1413
|
-
static void BM_ZFlatAll(int iters, int arg) {
|
|
1414
|
-
StopBenchmarkTiming();
|
|
1415
|
-
|
|
1416
|
-
CHECK_EQ(arg, 0);
|
|
1417
|
-
const int num_files = ARRAYSIZE(files);
|
|
1418
|
-
|
|
1419
|
-
std::vector<std::string> contents(num_files);
|
|
1420
|
-
std::vector<char*> dst(num_files);
|
|
1421
|
-
|
|
1422
|
-
int64 total_contents_size = 0;
|
|
1423
|
-
for (int i = 0; i < num_files; ++i) {
|
|
1424
|
-
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
|
1425
|
-
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
|
1426
|
-
total_contents_size += contents[i].size();
|
|
1427
|
-
}
|
|
1428
|
-
|
|
1429
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
|
1430
|
-
StartBenchmarkTiming();
|
|
1431
|
-
|
|
1432
|
-
size_t zsize = 0;
|
|
1433
|
-
while (iters-- > 0) {
|
|
1434
|
-
for (int i = 0; i < num_files; ++i) {
|
|
1435
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
|
1436
|
-
&zsize);
|
|
1437
|
-
}
|
|
1438
|
-
}
|
|
1439
|
-
StopBenchmarkTiming();
|
|
1440
|
-
|
|
1441
|
-
for (int i = 0; i < num_files; ++i) {
|
|
1442
|
-
delete[] dst[i];
|
|
1014
|
+
TEST(Snappy, TestBenchmarkFiles) {
|
|
1015
|
+
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
|
|
1016
|
+
Verify(ReadTestDataFile(kTestDataFiles[i].filename,
|
|
1017
|
+
kTestDataFiles[i].size_limit));
|
|
1443
1018
|
}
|
|
1444
|
-
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
|
1445
1019
|
}
|
|
1446
|
-
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
|
1447
|
-
|
|
1448
|
-
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
|
1449
|
-
StopBenchmarkTiming();
|
|
1450
|
-
|
|
1451
|
-
CHECK_EQ(arg, 0);
|
|
1452
|
-
CHECK_GT(ARRAYSIZE(files), 0);
|
|
1453
|
-
const std::string base_content =
|
|
1454
|
-
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
|
1455
|
-
|
|
1456
|
-
std::vector<std::string> contents;
|
|
1457
|
-
std::vector<char*> dst;
|
|
1458
|
-
int64 total_contents_size = 0;
|
|
1459
|
-
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
|
1460
|
-
++table_bits) {
|
|
1461
|
-
std::string content = base_content;
|
|
1462
|
-
content.resize(1 << table_bits);
|
|
1463
|
-
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
|
1464
|
-
total_contents_size += content.size();
|
|
1465
|
-
contents.push_back(std::move(content));
|
|
1466
|
-
}
|
|
1467
|
-
|
|
1468
|
-
size_t zsize = 0;
|
|
1469
|
-
SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
|
|
1470
|
-
StartBenchmarkTiming();
|
|
1471
|
-
while (iters-- > 0) {
|
|
1472
|
-
for (int i = 0; i < contents.size(); ++i) {
|
|
1473
|
-
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
|
1474
|
-
&zsize);
|
|
1475
|
-
}
|
|
1476
|
-
}
|
|
1477
|
-
StopBenchmarkTiming();
|
|
1478
1020
|
|
|
1479
|
-
|
|
1480
|
-
delete[] dst[i];
|
|
1481
|
-
}
|
|
1482
|
-
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
|
1483
|
-
}
|
|
1484
|
-
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
|
1021
|
+
} // namespace
|
|
1485
1022
|
|
|
1486
1023
|
} // namespace snappy
|
|
1487
|
-
|
|
1488
|
-
int main(int argc, char** argv) {
|
|
1489
|
-
InitGoogle(argv[0], &argc, &argv, true);
|
|
1490
|
-
RunSpecifiedBenchmarks();
|
|
1491
|
-
|
|
1492
|
-
if (argc >= 2) {
|
|
1493
|
-
for (int arg = 1; arg < argc; arg++) {
|
|
1494
|
-
if (FLAGS_write_compressed) {
|
|
1495
|
-
snappy::CompressFile(argv[arg]);
|
|
1496
|
-
} else if (FLAGS_write_uncompressed) {
|
|
1497
|
-
snappy::UncompressFile(argv[arg]);
|
|
1498
|
-
} else {
|
|
1499
|
-
snappy::MeasureFile(argv[arg]);
|
|
1500
|
-
}
|
|
1501
|
-
}
|
|
1502
|
-
return 0;
|
|
1503
|
-
}
|
|
1504
|
-
|
|
1505
|
-
return RUN_ALL_TESTS();
|
|
1506
|
-
}
|