snappy 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +2 -2
  3. data/.github/workflows/publish.yml +7 -13
  4. data/Dockerfile +1 -1
  5. data/Gemfile +1 -0
  6. data/README.md +20 -1
  7. data/Rakefile +1 -1
  8. data/ext/extconf.rb +13 -11
  9. data/lib/snappy/shim.rb +3 -23
  10. data/lib/snappy/version.rb +1 -1
  11. data/lib/snappy/writer.rb +1 -1
  12. data/snappy.gemspec +1 -0
  13. data/test/snappy_test.rb +29 -4
  14. data/vendor/snappy/BUILD.bazel +211 -0
  15. data/vendor/snappy/CMakeLists.txt +176 -31
  16. data/vendor/snappy/CONTRIBUTING.md +9 -4
  17. data/vendor/snappy/MODULE.bazel +23 -0
  18. data/vendor/snappy/NEWS +27 -0
  19. data/vendor/snappy/README.md +52 -35
  20. data/vendor/snappy/WORKSPACE +27 -0
  21. data/vendor/snappy/WORKSPACE.bzlmod +0 -0
  22. data/vendor/snappy/cmake/config.h.in +30 -23
  23. data/vendor/snappy/snappy-internal.h +218 -25
  24. data/vendor/snappy/snappy-sinksource.cc +26 -9
  25. data/vendor/snappy/snappy-sinksource.h +11 -11
  26. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  27. data/vendor/snappy/snappy-stubs-internal.h +231 -306
  28. data/vendor/snappy/snappy-stubs-public.h.in +0 -11
  29. data/vendor/snappy/snappy-test.cc +88 -198
  30. data/vendor/snappy/snappy-test.h +102 -285
  31. data/vendor/snappy/snappy.cc +1412 -425
  32. data/vendor/snappy/snappy.h +60 -10
  33. data/vendor/snappy/snappy_benchmark.cc +398 -0
  34. data/vendor/snappy/snappy_compress_fuzzer.cc +21 -16
  35. data/vendor/snappy/snappy_test_data.cc +57 -0
  36. data/vendor/snappy/snappy_test_data.h +68 -0
  37. data/vendor/snappy/snappy_test_tool.cc +471 -0
  38. data/vendor/snappy/snappy_uncompress_fuzzer.cc +3 -2
  39. data/vendor/snappy/snappy_unittest.cc +183 -666
  40. metadata +12 -6
@@ -26,44 +26,32 @@
26
26
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
- #include <math.h>
30
- #include <stdlib.h>
31
-
32
29
  #include <algorithm>
30
+ #include <cinttypes>
31
+ #include <cmath>
32
+ #include <cstdlib>
33
33
  #include <random>
34
34
  #include <string>
35
35
  #include <utility>
36
36
  #include <vector>
37
37
 
38
- #include "snappy.h"
39
- #include "snappy-internal.h"
40
38
  #include "snappy-test.h"
39
+
40
+ #include "gtest/gtest.h"
41
+
42
+ #include "snappy-internal.h"
41
43
  #include "snappy-sinksource.h"
44
+ #include "snappy.h"
45
+ #include "snappy_test_data.h"
42
46
 
43
- DEFINE_int32(start_len, -1,
44
- "Starting prefix size for testing (-1: just full file contents)");
45
- DEFINE_int32(end_len, -1,
46
- "Starting prefix size for testing (-1: just full file contents)");
47
- DEFINE_int32(bytes, 10485760,
48
- "How many bytes to compress/uncompress per file for timing");
49
-
50
- DEFINE_bool(zlib, false,
51
- "Run zlib compression (http://www.zlib.net)");
52
- DEFINE_bool(lzo, false,
53
- "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
54
- DEFINE_bool(snappy, true, "Run snappy compression");
55
-
56
- DEFINE_bool(write_compressed, false,
57
- "Write compressed versions of each file to <file>.comp");
58
- DEFINE_bool(write_uncompressed, false,
59
- "Write uncompressed versions of each file to <file>.uncomp");
60
-
61
- DEFINE_bool(snappy_dump_decompression_table, false,
47
+ SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
62
48
  "If true, we print the decompression table during tests.");
63
49
 
64
50
  namespace snappy {
65
51
 
66
- #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
52
+ namespace {
53
+
54
+ #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
67
55
 
68
56
  // To test against code that reads beyond its input, this class copies a
69
57
  // string to a newly allocated group of pages, the last of which
@@ -84,7 +72,7 @@ class DataEndingAtUnreadablePage {
84
72
  CHECK_NE(MAP_FAILED, mem_);
85
73
  protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
86
74
  char* dst = protected_page_ - size;
87
- memcpy(dst, s.data(), size);
75
+ std::memcpy(dst, s.data(), size);
88
76
  data_ = dst;
89
77
  size_ = size;
90
78
  // Make guard page unreadable.
@@ -109,256 +97,14 @@ class DataEndingAtUnreadablePage {
109
97
  size_t size_;
110
98
  };
111
99
 
112
- #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
100
+ #else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
113
101
 
114
102
  // Fallback for systems without mmap.
115
103
  using DataEndingAtUnreadablePage = std::string;
116
104
 
117
105
  #endif
118
106
 
119
- enum CompressorType {
120
- ZLIB, LZO, SNAPPY
121
- };
122
-
123
- const char* names[] = {
124
- "ZLIB", "LZO", "SNAPPY"
125
- };
126
-
127
- static size_t MinimumRequiredOutputSpace(size_t input_size,
128
- CompressorType comp) {
129
- switch (comp) {
130
- #ifdef ZLIB_VERSION
131
- case ZLIB:
132
- return ZLib::MinCompressbufSize(input_size);
133
- #endif // ZLIB_VERSION
134
-
135
- #ifdef LZO_VERSION
136
- case LZO:
137
- return input_size + input_size/64 + 16 + 3;
138
- #endif // LZO_VERSION
139
-
140
- case SNAPPY:
141
- return snappy::MaxCompressedLength(input_size);
142
-
143
- default:
144
- LOG(FATAL) << "Unknown compression type number " << comp;
145
- return 0;
146
- }
147
- }
148
-
149
- // Returns true if we successfully compressed, false otherwise.
150
- //
151
- // If compressed_is_preallocated is set, do not resize the compressed buffer.
152
- // This is typically what you want for a benchmark, in order to not spend
153
- // time in the memory allocator. If you do set this flag, however,
154
- // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
155
- // number of bytes, and may contain junk bytes at the end after return.
156
- static bool Compress(const char* input, size_t input_size, CompressorType comp,
157
- std::string* compressed, bool compressed_is_preallocated) {
158
- if (!compressed_is_preallocated) {
159
- compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
160
- }
161
-
162
- switch (comp) {
163
- #ifdef ZLIB_VERSION
164
- case ZLIB: {
165
- ZLib zlib;
166
- uLongf destlen = compressed->size();
167
- int ret = zlib.Compress(
168
- reinterpret_cast<Bytef*>(string_as_array(compressed)),
169
- &destlen,
170
- reinterpret_cast<const Bytef*>(input),
171
- input_size);
172
- CHECK_EQ(Z_OK, ret);
173
- if (!compressed_is_preallocated) {
174
- compressed->resize(destlen);
175
- }
176
- return true;
177
- }
178
- #endif // ZLIB_VERSION
179
-
180
- #ifdef LZO_VERSION
181
- case LZO: {
182
- unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
183
- lzo_uint destlen;
184
- int ret = lzo1x_1_15_compress(
185
- reinterpret_cast<const uint8*>(input),
186
- input_size,
187
- reinterpret_cast<uint8*>(string_as_array(compressed)),
188
- &destlen,
189
- mem);
190
- CHECK_EQ(LZO_E_OK, ret);
191
- delete[] mem;
192
- if (!compressed_is_preallocated) {
193
- compressed->resize(destlen);
194
- }
195
- break;
196
- }
197
- #endif // LZO_VERSION
198
-
199
- case SNAPPY: {
200
- size_t destlen;
201
- snappy::RawCompress(input, input_size,
202
- string_as_array(compressed),
203
- &destlen);
204
- CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
205
- if (!compressed_is_preallocated) {
206
- compressed->resize(destlen);
207
- }
208
- break;
209
- }
210
-
211
- default: {
212
- return false; // the asked-for library wasn't compiled in
213
- }
214
- }
215
- return true;
216
- }
217
-
218
- static bool Uncompress(const std::string& compressed, CompressorType comp,
219
- int size, std::string* output) {
220
- switch (comp) {
221
- #ifdef ZLIB_VERSION
222
- case ZLIB: {
223
- output->resize(size);
224
- ZLib zlib;
225
- uLongf destlen = output->size();
226
- int ret = zlib.Uncompress(
227
- reinterpret_cast<Bytef*>(string_as_array(output)),
228
- &destlen,
229
- reinterpret_cast<const Bytef*>(compressed.data()),
230
- compressed.size());
231
- CHECK_EQ(Z_OK, ret);
232
- CHECK_EQ(static_cast<uLongf>(size), destlen);
233
- break;
234
- }
235
- #endif // ZLIB_VERSION
236
-
237
- #ifdef LZO_VERSION
238
- case LZO: {
239
- output->resize(size);
240
- lzo_uint destlen;
241
- int ret = lzo1x_decompress(
242
- reinterpret_cast<const uint8*>(compressed.data()),
243
- compressed.size(),
244
- reinterpret_cast<uint8*>(string_as_array(output)),
245
- &destlen,
246
- NULL);
247
- CHECK_EQ(LZO_E_OK, ret);
248
- CHECK_EQ(static_cast<lzo_uint>(size), destlen);
249
- break;
250
- }
251
- #endif // LZO_VERSION
252
-
253
- case SNAPPY: {
254
- snappy::RawUncompress(compressed.data(), compressed.size(),
255
- string_as_array(output));
256
- break;
257
- }
258
-
259
- default: {
260
- return false; // the asked-for library wasn't compiled in
261
- }
262
- }
263
- return true;
264
- }
265
-
266
- static void Measure(const char* data,
267
- size_t length,
268
- CompressorType comp,
269
- int repeats,
270
- int block_size) {
271
- // Run tests a few time and pick median running times
272
- static const int kRuns = 5;
273
- double ctime[kRuns];
274
- double utime[kRuns];
275
- int compressed_size = 0;
276
-
277
- {
278
- // Chop the input into blocks
279
- int num_blocks = (length + block_size - 1) / block_size;
280
- std::vector<const char*> input(num_blocks);
281
- std::vector<size_t> input_length(num_blocks);
282
- std::vector<std::string> compressed(num_blocks);
283
- std::vector<std::string> output(num_blocks);
284
- for (int b = 0; b < num_blocks; b++) {
285
- int input_start = b * block_size;
286
- int input_limit = std::min<int>((b+1)*block_size, length);
287
- input[b] = data+input_start;
288
- input_length[b] = input_limit-input_start;
289
-
290
- // Pre-grow the output buffer so we don't measure string append time.
291
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
292
- }
293
-
294
- // First, try one trial compression to make sure the code is compiled in
295
- if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
296
- LOG(WARNING) << "Skipping " << names[comp] << ": "
297
- << "library not compiled in";
298
- return;
299
- }
300
-
301
- for (int run = 0; run < kRuns; run++) {
302
- CycleTimer ctimer, utimer;
303
-
304
- for (int b = 0; b < num_blocks; b++) {
305
- // Pre-grow the output buffer so we don't measure string append time.
306
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
307
- }
308
-
309
- ctimer.Start();
310
- for (int b = 0; b < num_blocks; b++)
311
- for (int i = 0; i < repeats; i++)
312
- Compress(input[b], input_length[b], comp, &compressed[b], true);
313
- ctimer.Stop();
314
-
315
- // Compress once more, with resizing, so we don't leave junk
316
- // at the end that will confuse the decompressor.
317
- for (int b = 0; b < num_blocks; b++) {
318
- Compress(input[b], input_length[b], comp, &compressed[b], false);
319
- }
320
-
321
- for (int b = 0; b < num_blocks; b++) {
322
- output[b].resize(input_length[b]);
323
- }
324
-
325
- utimer.Start();
326
- for (int i = 0; i < repeats; i++)
327
- for (int b = 0; b < num_blocks; b++)
328
- Uncompress(compressed[b], comp, input_length[b], &output[b]);
329
- utimer.Stop();
330
-
331
- ctime[run] = ctimer.Get();
332
- utime[run] = utimer.Get();
333
- }
334
-
335
- compressed_size = 0;
336
- for (size_t i = 0; i < compressed.size(); i++) {
337
- compressed_size += compressed[i].size();
338
- }
339
- }
340
-
341
- std::sort(ctime, ctime + kRuns);
342
- std::sort(utime, utime + kRuns);
343
- const int med = kRuns/2;
344
-
345
- float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
346
- float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
347
- std::string x = names[comp];
348
- x += ":";
349
- std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
350
- : std::string("?");
351
- printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
352
- "comp %5.1f MB/s uncomp %5s MB/s\n",
353
- x.c_str(),
354
- block_size/(1<<20),
355
- static_cast<int>(length), static_cast<uint32>(compressed_size),
356
- (compressed_size * 100.0) / std::max<int>(1, length),
357
- comp_rate,
358
- urate.c_str());
359
- }
360
-
361
- static int VerifyString(const std::string& input) {
107
+ int VerifyString(const std::string& input) {
362
108
  std::string compressed;
363
109
  DataEndingAtUnreadablePage i(input);
364
110
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
@@ -374,7 +120,7 @@ static int VerifyString(const std::string& input) {
374
120
  return uncompressed.size();
375
121
  }
376
122
 
377
- static void VerifyStringSink(const std::string& input) {
123
+ void VerifyStringSink(const std::string& input) {
378
124
  std::string compressed;
379
125
  DataEndingAtUnreadablePage i(input);
380
126
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
@@ -392,26 +138,15 @@ static void VerifyStringSink(const std::string& input) {
392
138
  CHECK_EQ(uncompressed, input);
393
139
  }
394
140
 
395
- static void VerifyIOVec(const std::string& input) {
396
- std::string compressed;
397
- DataEndingAtUnreadablePage i(input);
398
- const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
399
- CHECK_EQ(written, compressed.size());
400
- CHECK_LE(compressed.size(),
401
- snappy::MaxCompressedLength(input.size()));
402
- CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
403
-
404
- // Try uncompressing into an iovec containing a random number of entries
405
- // ranging from 1 to 10.
406
- char* buf = new char[input.size()];
141
+ struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
407
142
  std::minstd_rand0 rng(input.size());
408
143
  std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
409
- size_t num = uniform_1_to_10(rng);
144
+ num = uniform_1_to_10(rng);
410
145
  if (input.size() < num) {
411
146
  num = input.size();
412
147
  }
413
148
  struct iovec* iov = new iovec[num];
414
- int used_so_far = 0;
149
+ size_t used_so_far = 0;
415
150
  std::bernoulli_distribution one_in_five(1.0 / 5);
416
151
  for (size_t i = 0; i < num; ++i) {
417
152
  assert(used_so_far < input.size());
@@ -430,8 +165,40 @@ static void VerifyIOVec(const std::string& input) {
430
165
  }
431
166
  used_so_far += iov[i].iov_len;
432
167
  }
433
- CHECK(snappy::RawUncompressToIOVec(
434
- compressed.data(), compressed.size(), iov, num));
168
+ return iov;
169
+ }
170
+
171
+ int VerifyIOVecSource(const std::string& input) {
172
+ std::string compressed;
173
+ std::string copy = input;
174
+ char* buf = const_cast<char*>(copy.data());
175
+ size_t num = 0;
176
+ struct iovec* iov = GetIOVec(input, buf, num);
177
+ const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
178
+ CHECK_EQ(written, compressed.size());
179
+ CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
180
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
181
+
182
+ std::string uncompressed;
183
+ DataEndingAtUnreadablePage c(compressed);
184
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
185
+ CHECK_EQ(uncompressed, input);
186
+ delete[] iov;
187
+ return uncompressed.size();
188
+ }
189
+
190
+ void VerifyIOVecSink(const std::string& input) {
191
+ std::string compressed;
192
+ DataEndingAtUnreadablePage i(input);
193
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
194
+ CHECK_EQ(written, compressed.size());
195
+ CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
196
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
197
+ char* buf = new char[input.size()];
198
+ size_t num = 0;
199
+ struct iovec* iov = GetIOVec(input, buf, num);
200
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
201
+ num));
435
202
  CHECK(!memcmp(buf, input.data(), input.size()));
436
203
  delete[] iov;
437
204
  delete[] buf;
@@ -439,7 +206,7 @@ static void VerifyIOVec(const std::string& input) {
439
206
 
440
207
  // Test that data compressed by a compressor that does not
441
208
  // obey block sizes is uncompressed properly.
442
- static void VerifyNonBlockedCompression(const std::string& input) {
209
+ void VerifyNonBlockedCompression(const std::string& input) {
443
210
  if (input.length() > snappy::kBlockSize) {
444
211
  // We cannot test larger blocks than the maximum block size, obviously.
445
212
  return;
@@ -451,7 +218,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
451
218
  // Setup compression table
452
219
  snappy::internal::WorkingMemory wmem(input.size());
453
220
  int table_size;
454
- uint16* table = wmem.GetHashTable(input.size(), &table_size);
221
+ uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
455
222
 
456
223
  // Compress entire input in one shot
457
224
  std::string compressed;
@@ -481,7 +248,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
481
248
  struct iovec vec[kNumBlocks];
482
249
  const int block_size = 1 + input.size() / kNumBlocks;
483
250
  std::string iovec_data(block_size * kNumBlocks, 'x');
484
- for (int i = 0; i < kNumBlocks; i++) {
251
+ for (int i = 0; i < kNumBlocks; ++i) {
485
252
  vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
486
253
  vec[i].iov_len = block_size;
487
254
  }
@@ -492,7 +259,7 @@ static void VerifyNonBlockedCompression(const std::string& input) {
492
259
  }
493
260
 
494
261
  // Expand the input so that it is at least K times as big as block size
495
- static std::string Expand(const std::string& input) {
262
+ std::string Expand(const std::string& input) {
496
263
  static const int K = 3;
497
264
  std::string data = input;
498
265
  while (data.size() < K * snappy::kBlockSize) {
@@ -501,30 +268,33 @@ static std::string Expand(const std::string& input) {
501
268
  return data;
502
269
  }
503
270
 
504
- static int Verify(const std::string& input) {
271
+ int Verify(const std::string& input) {
505
272
  VLOG(1) << "Verifying input of size " << input.size();
506
273
 
507
274
  // Compress using string based routines
508
275
  const int result = VerifyString(input);
509
276
 
277
+ // Compress using `iovec`-based routines.
278
+ CHECK_EQ(VerifyIOVecSource(input), result);
279
+
510
280
  // Verify using sink based routines
511
281
  VerifyStringSink(input);
512
282
 
513
283
  VerifyNonBlockedCompression(input);
514
- VerifyIOVec(input);
284
+ VerifyIOVecSink(input);
515
285
  if (!input.empty()) {
516
286
  const std::string expanded = Expand(input);
517
287
  VerifyNonBlockedCompression(expanded);
518
- VerifyIOVec(input);
288
+ VerifyIOVecSink(input);
519
289
  }
520
290
 
521
291
  return result;
522
292
  }
523
293
 
524
- static bool IsValidCompressedBuffer(const std::string& c) {
294
+ bool IsValidCompressedBuffer(const std::string& c) {
525
295
  return snappy::IsValidCompressedBuffer(c.data(), c.size());
526
296
  }
527
- static bool Uncompress(const std::string& c, std::string* u) {
297
+ bool Uncompress(const std::string& c, std::string* u) {
528
298
  return snappy::Uncompress(c.data(), c.size(), u);
529
299
  }
530
300
 
@@ -549,8 +319,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
549
319
  // This is testing for a security bug - a buffer that decompresses to 100k
550
320
  // but we lie in the snappy header and only reserve 0 bytes of memory :)
551
321
  source.resize(100000);
552
- for (size_t i = 0; i < source.length(); ++i) {
553
- source[i] = 'A';
322
+ for (char& source_char : source) {
323
+ source_char = 'A';
554
324
  }
555
325
  snappy::Compress(source.data(), source.size(), &dest);
556
326
  dest[0] = dest[1] = dest[2] = dest[3] = 0;
@@ -588,7 +358,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
588
358
  size_t ulen;
589
359
  CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
590
360
  || (ulen < (1<<20)));
591
- uint32 ulen2;
361
+ uint32_t ulen2;
592
362
  snappy::ByteArraySource source(data.data(), data.size());
593
363
  CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
594
364
  (ulen2 < (1<<20)));
@@ -601,7 +371,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
601
371
  // These mirror the compression code in snappy.cc, but are copied
602
372
  // here so that we can bypass some limitations in the how snappy.cc
603
373
  // invokes these routines.
604
- static void AppendLiteral(std::string* dst, const std::string& literal) {
374
+ void AppendLiteral(std::string* dst, const std::string& literal) {
605
375
  if (literal.empty()) return;
606
376
  int n = literal.size() - 1;
607
377
  if (n < 60) {
@@ -621,7 +391,7 @@ static void AppendLiteral(std::string* dst, const std::string& literal) {
621
391
  *dst += literal;
622
392
  }
623
393
 
624
- static void AppendCopy(std::string* dst, int offset, int length) {
394
+ void AppendCopy(std::string* dst, int offset, int length) {
625
395
  while (length > 0) {
626
396
  // Figure out how much to copy in one shot
627
397
  int to_copy;
@@ -665,6 +435,41 @@ TEST(Snappy, SimpleTests) {
665
435
  Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
666
436
  }
667
437
 
438
+ // Regression test for cr/345340892.
439
+ TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
440
+ Verify("abcabcabcabcabcabcab");
441
+ Verify("abcabcabcabcabcabcab0123456789ABCDEF");
442
+
443
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
444
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
445
+ }
446
+
447
+ // Regression test for cr/345340892.
448
+ TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
449
+ std::mt19937 rng;
450
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
451
+ for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
452
+ for (int length = 1; length <= 64; ++length) {
453
+ for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
454
+ const int size = pattern_size + length + extra_bytes_after_pattern;
455
+ std::string input;
456
+ input.resize(size);
457
+ for (int i = 0; i < pattern_size; ++i) {
458
+ input[i] = 'a' + i;
459
+ }
460
+ for (int i = 0; i < length; ++i) {
461
+ input[pattern_size + i] = input[i];
462
+ }
463
+ for (int i = 0; i < extra_bytes_after_pattern; ++i) {
464
+ input[pattern_size + length + i] =
465
+ static_cast<char>(uniform_byte(rng));
466
+ }
467
+ Verify(input);
468
+ }
469
+ }
470
+ }
471
+ }
472
+
668
473
  // Verify max blowup (lots of four-byte copies)
669
474
  TEST(Snappy, MaxBlowup) {
670
475
  std::mt19937 rng;
@@ -680,8 +485,20 @@ TEST(Snappy, MaxBlowup) {
680
485
  Verify(input);
681
486
  }
682
487
 
488
+ // Issue #201, when output is more than 4GB, we had a data corruption bug.
489
+ // We cannot run this test always because of CI constraints.
490
+ TEST(Snappy, DISABLED_MoreThan4GB) {
491
+ std::mt19937 rng;
492
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
493
+ std::string input;
494
+ input.resize((1ull << 32) - 1);
495
+ for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i)
496
+ input[i] = static_cast<char>(uniform_byte(rng));
497
+ Verify(input);
498
+ }
499
+
683
500
  TEST(Snappy, RandomData) {
684
- std::minstd_rand0 rng(FLAGS_test_random_seed);
501
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
685
502
  std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
686
503
  std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
687
504
  std::uniform_int_distribution<int> uniform_byte(0, 255);
@@ -690,7 +507,7 @@ TEST(Snappy, RandomData) {
690
507
  std::bernoulli_distribution one_in_ten(1.0 / 10);
691
508
 
692
509
  constexpr int num_ops = 20000;
693
- for (int i = 0; i < num_ops; i++) {
510
+ for (int i = 0; i < num_ops; ++i) {
694
511
  if ((i % 1000) == 0) {
695
512
  VLOG(0) << "Random op " << i << " of " << num_ops;
696
513
  }
@@ -738,14 +555,14 @@ TEST(Snappy, FourByteOffset) {
738
555
  // How many times each fragment is emitted.
739
556
  const int n1 = 2;
740
557
  const int n2 = 100000 / fragment2.size();
741
- const int length = n1 * fragment1.size() + n2 * fragment2.size();
558
+ const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
742
559
 
743
560
  std::string compressed;
744
561
  Varint::Append32(&compressed, length);
745
562
 
746
563
  AppendLiteral(&compressed, fragment1);
747
564
  std::string src = fragment1;
748
- for (int i = 0; i < n2; i++) {
565
+ for (int i = 0; i < n2; ++i) {
749
566
  AppendLiteral(&compressed, fragment2);
750
567
  src += fragment2;
751
568
  }
@@ -760,7 +577,27 @@ TEST(Snappy, FourByteOffset) {
760
577
  CHECK_EQ(uncompressed, src);
761
578
  }
762
579
 
763
- TEST(Snappy, IOVecEdgeCases) {
580
+ TEST(Snappy, IOVecSourceEdgeCases) {
581
+ // Validate that empty leading, trailing, and in-between iovecs are handled:
582
+ // [] [] ['a'] [] ['b'] [].
583
+ std::string data = "ab";
584
+ char* buf = const_cast<char*>(data.data());
585
+ size_t used_so_far = 0;
586
+ static const int kLengths[] = {0, 0, 1, 0, 1, 0};
587
+ struct iovec iov[ARRAYSIZE(kLengths)];
588
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
589
+ iov[i].iov_base = buf + used_so_far;
590
+ iov[i].iov_len = kLengths[i];
591
+ used_so_far += kLengths[i];
592
+ }
593
+ std::string compressed;
594
+ snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
595
+ std::string uncompressed;
596
+ snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
597
+ CHECK_EQ(data, uncompressed);
598
+ }
599
+
600
+ TEST(Snappy, IOVecSinkEdgeCases) {
764
601
  // Test some tricky edge cases in the iovec output that are not necessarily
765
602
  // exercised by random tests.
766
603
 
@@ -872,14 +709,13 @@ TEST(Snappy, IOVecCopyOverflow) {
872
709
  }
873
710
  }
874
711
 
875
- static bool CheckUncompressedLength(const std::string& compressed,
876
- size_t* ulength) {
712
+ bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
877
713
  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
878
714
  compressed.size(),
879
715
  ulength);
880
716
 
881
717
  snappy::ByteArraySource source(compressed.data(), compressed.size());
882
- uint32 length;
718
+ uint32_t length;
883
719
  const bool result2 = snappy::GetUncompressedLength(&source, &length);
884
720
  CHECK_EQ(result1, result2);
885
721
  return result1;
@@ -954,17 +790,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
954
790
  EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
955
791
  }
956
792
 
957
- namespace {
958
-
959
793
  int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
794
+ uint64_t data;
960
795
  std::pair<size_t, bool> p =
961
- snappy::internal::FindMatchLength(s1, s2, s2 + length);
796
+ snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
962
797
  CHECK_EQ(p.first < 8, p.second);
963
798
  return p.first;
964
799
  }
965
800
 
966
- } // namespace
967
-
968
801
  TEST(Snappy, FindMatchLength) {
969
802
  // Exercise all different code paths through the function.
970
803
  // 64-bit version:
@@ -1058,12 +891,12 @@ TEST(Snappy, FindMatchLength) {
1058
891
  TEST(Snappy, FindMatchLengthRandom) {
1059
892
  constexpr int kNumTrials = 10000;
1060
893
  constexpr int kTypicalLength = 10;
1061
- std::minstd_rand0 rng(FLAGS_test_random_seed);
894
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
1062
895
  std::uniform_int_distribution<int> uniform_byte(0, 255);
1063
896
  std::bernoulli_distribution one_in_two(1.0 / 2);
1064
897
  std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
1065
898
 
1066
- for (int i = 0; i < kNumTrials; i++) {
899
+ for (int i = 0; i < kNumTrials; ++i) {
1067
900
  std::string s, t;
1068
901
  char a = static_cast<char>(uniform_byte(rng));
1069
902
  char b = static_cast<char>(uniform_byte(rng));
@@ -1073,21 +906,20 @@ TEST(Snappy, FindMatchLengthRandom) {
1073
906
  }
1074
907
  DataEndingAtUnreadablePage u(s);
1075
908
  DataEndingAtUnreadablePage v(t);
1076
- int matched = TestFindMatchLength(u.data(), v.data(), t.size());
909
+ size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
1077
910
  if (matched == t.size()) {
1078
911
  EXPECT_EQ(s, t);
1079
912
  } else {
1080
913
  EXPECT_NE(s[matched], t[matched]);
1081
- for (int j = 0; j < matched; j++) {
914
+ for (size_t j = 0; j < matched; ++j) {
1082
915
  EXPECT_EQ(s[j], t[j]);
1083
916
  }
1084
917
  }
1085
918
  }
1086
919
  }
1087
920
 
1088
- static uint16 MakeEntry(unsigned int extra,
1089
- unsigned int len,
1090
- unsigned int copy_offset) {
921
+ uint16_t MakeEntry(unsigned int extra, unsigned int len,
922
+ unsigned int copy_offset) {
1091
923
  // Check that all of the fields fit within the allocated space
1092
924
  assert(extra == (extra & 0x7)); // At most 3 bits
1093
925
  assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
@@ -1104,403 +936,88 @@ TEST(Snappy, VerifyCharTable) {
1104
936
  using snappy::internal::COPY_4_BYTE_OFFSET;
1105
937
  using snappy::internal::char_table;
1106
938
 
1107
- uint16 dst[256];
939
+ uint16_t dst[256];
1108
940
 
1109
941
  // Place invalid entries in all places to detect missing initialization
1110
942
  int assigned = 0;
1111
- for (int i = 0; i < 256; i++) {
943
+ for (int i = 0; i < 256; ++i) {
1112
944
  dst[i] = 0xffff;
1113
945
  }
1114
946
 
1115
947
  // Small LITERAL entries. We store (len-1) in the top 6 bits.
1116
- for (unsigned int len = 1; len <= 60; len++) {
1117
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
948
+ for (uint8_t len = 1; len <= 60; ++len) {
949
+ dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
1118
950
  assigned++;
1119
951
  }
1120
952
 
1121
953
  // Large LITERAL entries. We use 60..63 in the high 6 bits to
1122
954
  // encode the number of bytes of length info that follow the opcode.
1123
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
955
+ for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
1124
956
  // We set the length field in the lookup table to 1 because extra
1125
957
  // bytes encode len-1.
1126
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
958
+ dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1127
959
  assigned++;
1128
960
  }
1129
961
 
1130
962
  // COPY_1_BYTE_OFFSET.
1131
963
  //
1132
964
  // The tag byte in the compressed data stores len-4 in 3 bits, and
1133
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
965
+ // offset/256 in 3 bits. offset%256 is stored in the next byte.
1134
966
  //
1135
967
  // This format is used for length in range [4..11] and offset in
1136
968
  // range [0..2047]
1137
- for (unsigned int len = 4; len < 12; len++) {
1138
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
1139
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1140
- MakeEntry(1, len, offset>>8);
969
+ for (uint8_t len = 4; len < 12; ++len) {
970
+ for (uint16_t offset = 0; offset < 2048; offset += 256) {
971
+ uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
972
+ dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
973
+ MakeEntry(1, len, offset_high);
1141
974
  assigned++;
1142
975
  }
1143
976
  }
1144
977
 
1145
978
  // COPY_2_BYTE_OFFSET.
1146
979
  // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1147
- for (unsigned int len = 1; len <= 64; len++) {
1148
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
980
+ for (uint8_t len = 1; len <= 64; ++len) {
981
+ dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
1149
982
  assigned++;
1150
983
  }
1151
984
 
1152
985
  // COPY_4_BYTE_OFFSET.
1153
986
  // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1154
- for (unsigned int len = 1; len <= 64; len++) {
1155
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
987
+ for (uint8_t len = 1; len <= 64; ++len) {
988
+ dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
1156
989
  assigned++;
1157
990
  }
1158
991
 
1159
992
  // Check that each entry was initialized exactly once.
1160
993
  EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1161
- for (int i = 0; i < 256; i++) {
994
+ for (int i = 0; i < 256; ++i) {
1162
995
  EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1163
996
  }
1164
997
 
1165
- if (FLAGS_snappy_dump_decompression_table) {
1166
- printf("static const uint16 char_table[256] = {\n ");
1167
- for (int i = 0; i < 256; i++) {
1168
- printf("0x%04x%s",
1169
- dst[i],
1170
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
998
+ if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
999
+ std::printf("static const uint16_t char_table[256] = {\n ");
1000
+ for (int i = 0; i < 256; ++i) {
1001
+ std::printf("0x%04x%s",
1002
+ dst[i],
1003
+ ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
1171
1004
  }
1172
- printf("};\n");
1005
+ std::printf("};\n");
1173
1006
  }
1174
1007
 
1175
1008
  // Check that computed table matched recorded table.
1176
- for (int i = 0; i < 256; i++) {
1009
+ for (int i = 0; i < 256; ++i) {
1177
1010
  EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1178
1011
  }
1179
1012
  }
1180
1013
 
1181
- static void CompressFile(const char* fname) {
1182
- std::string fullinput;
1183
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1184
-
1185
- std::string compressed;
1186
- Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1187
-
1188
- CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
1189
- file::Defaults()));
1190
- }
1191
-
1192
- static void UncompressFile(const char* fname) {
1193
- std::string fullinput;
1194
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1195
-
1196
- size_t uncompLength;
1197
- CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1198
-
1199
- std::string uncompressed;
1200
- uncompressed.resize(uncompLength);
1201
- CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1202
-
1203
- CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
1204
- file::Defaults()));
1205
- }
1206
-
1207
- static void MeasureFile(const char* fname) {
1208
- std::string fullinput;
1209
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1210
- printf("%-40s :\n", fname);
1211
-
1212
- int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1213
- int end_len = fullinput.size();
1214
- if (FLAGS_end_len >= 0) {
1215
- end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1216
- }
1217
- for (int len = start_len; len <= end_len; len++) {
1218
- const char* const input = fullinput.data();
1219
- int repeats = (FLAGS_bytes + len) / (len + 1);
1220
- if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1221
- if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1222
- if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1223
-
1224
- // For block-size based measurements
1225
- if (0 && FLAGS_snappy) {
1226
- Measure(input, len, SNAPPY, repeats, 8<<10);
1227
- Measure(input, len, SNAPPY, repeats, 16<<10);
1228
- Measure(input, len, SNAPPY, repeats, 32<<10);
1229
- Measure(input, len, SNAPPY, repeats, 64<<10);
1230
- Measure(input, len, SNAPPY, repeats, 256<<10);
1231
- Measure(input, len, SNAPPY, repeats, 1024<<10);
1232
- }
1233
- }
1234
- }
1235
-
1236
- static struct {
1237
- const char* label;
1238
- const char* filename;
1239
- size_t size_limit;
1240
- } files[] = {
1241
- { "html", "html", 0 },
1242
- { "urls", "urls.10K", 0 },
1243
- { "jpg", "fireworks.jpeg", 0 },
1244
- { "jpg_200", "fireworks.jpeg", 200 },
1245
- { "pdf", "paper-100k.pdf", 0 },
1246
- { "html4", "html_x_4", 0 },
1247
- { "txt1", "alice29.txt", 0 },
1248
- { "txt2", "asyoulik.txt", 0 },
1249
- { "txt3", "lcet10.txt", 0 },
1250
- { "txt4", "plrabn12.txt", 0 },
1251
- { "pb", "geo.protodata", 0 },
1252
- { "gaviota", "kppkn.gtb", 0 },
1253
- };
1254
-
1255
- static void BM_UFlat(int iters, int arg) {
1256
- StopBenchmarkTiming();
1257
-
1258
- // Pick file to process based on "arg"
1259
- CHECK_GE(arg, 0);
1260
- CHECK_LT(arg, ARRAYSIZE(files));
1261
- std::string contents =
1262
- ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1263
-
1264
- std::string zcontents;
1265
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1266
- char* dst = new char[contents.size()];
1267
-
1268
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1269
- static_cast<int64>(contents.size()));
1270
- SetBenchmarkLabel(files[arg].label);
1271
- StartBenchmarkTiming();
1272
- while (iters-- > 0) {
1273
- CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1274
- }
1275
- StopBenchmarkTiming();
1276
-
1277
- delete[] dst;
1278
- }
1279
- BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1280
-
1281
- static void BM_UValidate(int iters, int arg) {
1282
- StopBenchmarkTiming();
1283
-
1284
- // Pick file to process based on "arg"
1285
- CHECK_GE(arg, 0);
1286
- CHECK_LT(arg, ARRAYSIZE(files));
1287
- std::string contents =
1288
- ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1289
-
1290
- std::string zcontents;
1291
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1292
-
1293
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1294
- static_cast<int64>(contents.size()));
1295
- SetBenchmarkLabel(files[arg].label);
1296
- StartBenchmarkTiming();
1297
- while (iters-- > 0) {
1298
- CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1299
- }
1300
- StopBenchmarkTiming();
1301
- }
1302
- BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1303
-
1304
- static void BM_UIOVec(int iters, int arg) {
1305
- StopBenchmarkTiming();
1306
-
1307
- // Pick file to process based on "arg"
1308
- CHECK_GE(arg, 0);
1309
- CHECK_LT(arg, ARRAYSIZE(files));
1310
- std::string contents =
1311
- ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1312
-
1313
- std::string zcontents;
1314
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1315
-
1316
- // Uncompress into an iovec containing ten entries.
1317
- const int kNumEntries = 10;
1318
- struct iovec iov[kNumEntries];
1319
- char *dst = new char[contents.size()];
1320
- int used_so_far = 0;
1321
- for (int i = 0; i < kNumEntries; ++i) {
1322
- iov[i].iov_base = dst + used_so_far;
1323
- if (used_so_far == contents.size()) {
1324
- iov[i].iov_len = 0;
1325
- continue;
1326
- }
1327
-
1328
- if (i == kNumEntries - 1) {
1329
- iov[i].iov_len = contents.size() - used_so_far;
1330
- } else {
1331
- iov[i].iov_len = contents.size() / kNumEntries;
1332
- }
1333
- used_so_far += iov[i].iov_len;
1334
- }
1335
-
1336
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1337
- static_cast<int64>(contents.size()));
1338
- SetBenchmarkLabel(files[arg].label);
1339
- StartBenchmarkTiming();
1340
- while (iters-- > 0) {
1341
- CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1342
- kNumEntries));
1343
- }
1344
- StopBenchmarkTiming();
1345
-
1346
- delete[] dst;
1347
- }
1348
- BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1349
-
1350
- static void BM_UFlatSink(int iters, int arg) {
1351
- StopBenchmarkTiming();
1352
-
1353
- // Pick file to process based on "arg"
1354
- CHECK_GE(arg, 0);
1355
- CHECK_LT(arg, ARRAYSIZE(files));
1356
- std::string contents =
1357
- ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1358
-
1359
- std::string zcontents;
1360
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1361
- char* dst = new char[contents.size()];
1362
-
1363
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1364
- static_cast<int64>(contents.size()));
1365
- SetBenchmarkLabel(files[arg].label);
1366
- StartBenchmarkTiming();
1367
- while (iters-- > 0) {
1368
- snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1369
- snappy::UncheckedByteArraySink sink(dst);
1370
- CHECK(snappy::Uncompress(&source, &sink));
1371
- }
1372
- StopBenchmarkTiming();
1373
-
1374
- std::string s(dst, contents.size());
1375
- CHECK_EQ(contents, s);
1376
-
1377
- delete[] dst;
1378
- }
1379
-
1380
- BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1381
-
1382
- static void BM_ZFlat(int iters, int arg) {
1383
- StopBenchmarkTiming();
1384
-
1385
- // Pick file to process based on "arg"
1386
- CHECK_GE(arg, 0);
1387
- CHECK_LT(arg, ARRAYSIZE(files));
1388
- std::string contents =
1389
- ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1390
-
1391
- char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1392
-
1393
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1394
- static_cast<int64>(contents.size()));
1395
- StartBenchmarkTiming();
1396
-
1397
- size_t zsize = 0;
1398
- while (iters-- > 0) {
1399
- snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1400
- }
1401
- StopBenchmarkTiming();
1402
- const double compression_ratio =
1403
- static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1404
- SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
1405
- 100.0 * compression_ratio));
1406
- VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
1407
- files[arg].label, static_cast<int>(contents.size()),
1408
- static_cast<int>(zsize));
1409
- delete[] dst;
1410
- }
1411
- BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1412
-
1413
- static void BM_ZFlatAll(int iters, int arg) {
1414
- StopBenchmarkTiming();
1415
-
1416
- CHECK_EQ(arg, 0);
1417
- const int num_files = ARRAYSIZE(files);
1418
-
1419
- std::vector<std::string> contents(num_files);
1420
- std::vector<char*> dst(num_files);
1421
-
1422
- int64 total_contents_size = 0;
1423
- for (int i = 0; i < num_files; ++i) {
1424
- contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
1425
- dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
1426
- total_contents_size += contents[i].size();
1427
- }
1428
-
1429
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
1430
- StartBenchmarkTiming();
1431
-
1432
- size_t zsize = 0;
1433
- while (iters-- > 0) {
1434
- for (int i = 0; i < num_files; ++i) {
1435
- snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1436
- &zsize);
1437
- }
1438
- }
1439
- StopBenchmarkTiming();
1440
-
1441
- for (int i = 0; i < num_files; ++i) {
1442
- delete[] dst[i];
1014
+ TEST(Snappy, TestBenchmarkFiles) {
1015
+ for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
1016
+ Verify(ReadTestDataFile(kTestDataFiles[i].filename,
1017
+ kTestDataFiles[i].size_limit));
1443
1018
  }
1444
- SetBenchmarkLabel(StrFormat("%d files", num_files));
1445
1019
  }
1446
- BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
1447
-
1448
- static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
1449
- StopBenchmarkTiming();
1450
-
1451
- CHECK_EQ(arg, 0);
1452
- CHECK_GT(ARRAYSIZE(files), 0);
1453
- const std::string base_content =
1454
- ReadTestDataFile(files[0].filename, files[0].size_limit);
1455
-
1456
- std::vector<std::string> contents;
1457
- std::vector<char*> dst;
1458
- int64 total_contents_size = 0;
1459
- for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
1460
- ++table_bits) {
1461
- std::string content = base_content;
1462
- content.resize(1 << table_bits);
1463
- dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
1464
- total_contents_size += content.size();
1465
- contents.push_back(std::move(content));
1466
- }
1467
-
1468
- size_t zsize = 0;
1469
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
1470
- StartBenchmarkTiming();
1471
- while (iters-- > 0) {
1472
- for (int i = 0; i < contents.size(); ++i) {
1473
- snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1474
- &zsize);
1475
- }
1476
- }
1477
- StopBenchmarkTiming();
1478
1020
 
1479
- for (int i = 0; i < dst.size(); ++i) {
1480
- delete[] dst[i];
1481
- }
1482
- SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
1483
- }
1484
- BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
1021
+ } // namespace
1485
1022
 
1486
1023
  } // namespace snappy
1487
-
1488
- int main(int argc, char** argv) {
1489
- InitGoogle(argv[0], &argc, &argv, true);
1490
- RunSpecifiedBenchmarks();
1491
-
1492
- if (argc >= 2) {
1493
- for (int arg = 1; arg < argc; arg++) {
1494
- if (FLAGS_write_compressed) {
1495
- snappy::CompressFile(argv[arg]);
1496
- } else if (FLAGS_write_uncompressed) {
1497
- snappy::UncompressFile(argv[arg]);
1498
- } else {
1499
- snappy::MeasureFile(argv[arg]);
1500
- }
1501
- }
1502
- return 0;
1503
- }
1504
-
1505
- return RUN_ALL_TESTS();
1506
- }