snappy 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/main.yml +34 -0
  3. data/.github/workflows/publish.yml +34 -0
  4. data/Gemfile +3 -4
  5. data/Rakefile +32 -30
  6. data/ext/api.c +6 -1
  7. data/lib/snappy.rb +5 -5
  8. data/lib/snappy/hadoop/reader.rb +6 -2
  9. data/lib/snappy/reader.rb +11 -7
  10. data/lib/snappy/shim.rb +1 -1
  11. data/lib/snappy/version.rb +1 -1
  12. data/snappy.gemspec +13 -9
  13. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  14. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  15. data/test/snappy_hadoop_test.rb +26 -0
  16. data/test/snappy_reader_test.rb +148 -0
  17. data/test/snappy_test.rb +95 -0
  18. data/test/snappy_writer_test.rb +55 -0
  19. data/test/test_helper.rb +7 -0
  20. data/vendor/snappy/CMakeLists.txt +177 -54
  21. data/vendor/snappy/NEWS +8 -0
  22. data/vendor/snappy/README.md +19 -20
  23. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  24. data/vendor/snappy/cmake/config.h.in +6 -6
  25. data/vendor/snappy/docs/README.md +72 -0
  26. data/vendor/snappy/snappy-internal.h +12 -5
  27. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  28. data/vendor/snappy/snappy-stubs-internal.h +60 -15
  29. data/vendor/snappy/snappy-stubs-public.h.in +16 -36
  30. data/vendor/snappy/snappy-test.cc +16 -15
  31. data/vendor/snappy/snappy-test.h +12 -60
  32. data/vendor/snappy/snappy.cc +333 -187
  33. data/vendor/snappy/snappy.h +14 -10
  34. data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
  35. data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
  36. data/vendor/snappy/snappy_unittest.cc +220 -124
  37. metadata +26 -20
  38. data/.travis.yml +0 -31
  39. data/smoke.sh +0 -8
  40. data/test/hadoop/test-snappy-hadoop-reader.rb +0 -103
  41. data/test/hadoop/test-snappy-hadoop-writer.rb +0 -48
  42. data/test/test-snappy-hadoop.rb +0 -22
  43. data/test/test-snappy-reader.rb +0 -129
  44. data/test/test-snappy-writer.rb +0 -55
  45. data/test/test-snappy.rb +0 -58
  46. data/vendor/snappy/cmake/SnappyConfig.cmake +0 -1
@@ -39,7 +39,7 @@
39
39
  #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
40
40
  #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
41
 
42
- #include <stddef.h>
42
+ #include <cstddef>
43
43
  #include <string>
44
44
 
45
45
  #include "snappy-stubs-public.h"
@@ -69,11 +69,12 @@ namespace snappy {
69
69
  // Higher-level string based routines (should be sufficient for most users)
70
70
  // ------------------------------------------------------------------------
71
71
 
72
- // Sets "*output" to the compressed version of "input[0,input_length-1]".
73
- // Original contents of *output are lost.
72
+ // Sets "*compressed" to the compressed version of "input[0,input_length-1]".
73
+ // Original contents of *compressed are lost.
74
74
  //
75
- // REQUIRES: "input[]" is not an alias of "*output".
76
- size_t Compress(const char* input, size_t input_length, string* output);
75
+ // REQUIRES: "input[]" is not an alias of "*compressed".
76
+ size_t Compress(const char* input, size_t input_length,
77
+ std::string* compressed);
77
78
 
78
79
  // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
79
80
  // Original contents of "*uncompressed" are lost.
@@ -82,7 +83,7 @@ namespace snappy {
82
83
  //
83
84
  // returns false if the message is corrupted and could not be decompressed
84
85
  bool Uncompress(const char* compressed, size_t compressed_length,
85
- string* uncompressed);
86
+ std::string* uncompressed);
86
87
 
87
88
  // Decompresses "compressed" to "*uncompressed".
88
89
  //
@@ -193,11 +194,14 @@ namespace snappy {
193
194
  // Note that there might be older data around that is compressed with larger
194
195
  // block sizes, so the decompression code should not rely on the
195
196
  // non-existence of long backreferences.
196
- static const int kBlockLog = 16;
197
- static const size_t kBlockSize = 1 << kBlockLog;
197
+ static constexpr int kBlockLog = 16;
198
+ static constexpr size_t kBlockSize = 1 << kBlockLog;
198
199
 
199
- static const int kMaxHashTableBits = 14;
200
- static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
200
+ static constexpr int kMinHashTableBits = 8;
201
+ static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
202
+
203
+ static constexpr int kMaxHashTableBits = 14;
204
+ static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
201
205
  } // end namespace snappy
202
206
 
203
207
  #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -0,0 +1,59 @@
1
+ // Copyright 2019 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // libFuzzer harness for fuzzing snappy compression code.
30
+
31
+ #include <cassert>
32
+ #include <cstddef>
33
+ #include <cstdint>
34
+ #include <string>
35
+
36
+ #include "snappy.h"
37
+
38
+ // Entry point for LibFuzzer.
39
+ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
40
+ std::string input(reinterpret_cast<const char*>(data), size);
41
+
42
+ std::string compressed;
43
+ size_t compressed_size =
44
+ snappy::Compress(input.data(), input.size(), &compressed);
45
+
46
+ (void)compressed_size; // Variable only used in debug builds.
47
+ assert(compressed_size == compressed.size());
48
+ assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
49
+ assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
50
+
51
+ std::string uncompressed_after_compress;
52
+ bool uncompress_succeeded = snappy::Uncompress(
53
+ compressed.data(), compressed.size(), &uncompressed_after_compress);
54
+
55
+ (void)uncompress_succeeded; // Variable only used in debug builds.
56
+ assert(uncompress_succeeded);
57
+ assert(input == uncompressed_after_compress);
58
+ return 0;
59
+ }
@@ -0,0 +1,57 @@
1
+ // Copyright 2019 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // libFuzzer harness for fuzzing snappy's decompression code.
30
+
31
+ #include <cassert>
32
+ #include <cstddef>
33
+ #include <cstdint>
34
+ #include <string>
35
+
36
+ #include "snappy.h"
37
+
38
+ // Entry point for LibFuzzer.
39
+ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
40
+ std::string input(reinterpret_cast<const char*>(data), size);
41
+
42
+ // Avoid self-crafted decompression bombs.
43
+ size_t uncompressed_size;
44
+ constexpr size_t kMaxUncompressedSize = 1 << 20;
45
+ bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
46
+ input.data(), input.size(), &uncompressed_size);
47
+ if (!get_uncompressed_length_succeeded ||
48
+ (uncompressed_size > kMaxUncompressedSize)) {
49
+ return 0;
50
+ }
51
+
52
+ std::string uncompressed;
53
+ // The return value of snappy::Uncompress() is ignored because decompression
54
+ // will fail on invalid inputs.
55
+ snappy::Uncompress(input.data(), input.size(), &uncompressed);
56
+ return 0;
57
+ }
@@ -29,8 +29,8 @@
29
29
  #include <math.h>
30
30
  #include <stdlib.h>
31
31
 
32
-
33
32
  #include <algorithm>
33
+ #include <random>
34
34
  #include <string>
35
35
  #include <utility>
36
36
  #include <vector>
@@ -73,7 +73,7 @@ namespace snappy {
73
73
  // be able to read previously allocated memory while doing heap allocations.
74
74
  class DataEndingAtUnreadablePage {
75
75
  public:
76
- explicit DataEndingAtUnreadablePage(const string& s) {
76
+ explicit DataEndingAtUnreadablePage(const std::string& s) {
77
77
  const size_t page_size = sysconf(_SC_PAGESIZE);
78
78
  const size_t size = s.size();
79
79
  // Round up space for string to a multiple of page_size.
@@ -112,7 +112,7 @@ class DataEndingAtUnreadablePage {
112
112
  #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
113
113
 
114
114
  // Fallback for systems without mmap.
115
- typedef string DataEndingAtUnreadablePage;
115
+ using DataEndingAtUnreadablePage = std::string;
116
116
 
117
117
  #endif
118
118
 
@@ -154,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
154
154
  // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
155
155
  // number of bytes, and may contain junk bytes at the end after return.
156
156
  static bool Compress(const char* input, size_t input_size, CompressorType comp,
157
- string* compressed, bool compressed_is_preallocated) {
157
+ std::string* compressed, bool compressed_is_preallocated) {
158
158
  if (!compressed_is_preallocated) {
159
159
  compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
160
160
  }
@@ -215,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
215
215
  return true;
216
216
  }
217
217
 
218
- static bool Uncompress(const string& compressed, CompressorType comp,
219
- int size, string* output) {
218
+ static bool Uncompress(const std::string& compressed, CompressorType comp,
219
+ int size, std::string* output) {
220
220
  switch (comp) {
221
221
  #ifdef ZLIB_VERSION
222
222
  case ZLIB: {
@@ -279,8 +279,8 @@ static void Measure(const char* data,
279
279
  int num_blocks = (length + block_size - 1) / block_size;
280
280
  std::vector<const char*> input(num_blocks);
281
281
  std::vector<size_t> input_length(num_blocks);
282
- std::vector<string> compressed(num_blocks);
283
- std::vector<string> output(num_blocks);
282
+ std::vector<std::string> compressed(num_blocks);
283
+ std::vector<std::string> output(num_blocks);
284
284
  for (int b = 0; b < num_blocks; b++) {
285
285
  int input_start = b * block_size;
286
286
  int input_limit = std::min<int>((b+1)*block_size, length);
@@ -344,11 +344,10 @@ static void Measure(const char* data,
344
344
 
345
345
  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
346
346
  float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
347
- string x = names[comp];
347
+ std::string x = names[comp];
348
348
  x += ":";
349
- string urate = (uncomp_rate >= 0)
350
- ? StringPrintf("%.1f", uncomp_rate)
351
- : string("?");
349
+ std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
350
+ : std::string("?");
352
351
  printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
353
352
  "comp %5.1f MB/s uncomp %5s MB/s\n",
354
353
  x.c_str(),
@@ -359,8 +358,8 @@ static void Measure(const char* data,
359
358
  urate.c_str());
360
359
  }
361
360
 
362
- static int VerifyString(const string& input) {
363
- string compressed;
361
+ static int VerifyString(const std::string& input) {
362
+ std::string compressed;
364
363
  DataEndingAtUnreadablePage i(input);
365
364
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
366
365
  CHECK_EQ(written, compressed.size());
@@ -368,15 +367,15 @@ static int VerifyString(const string& input) {
368
367
  snappy::MaxCompressedLength(input.size()));
369
368
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
370
369
 
371
- string uncompressed;
370
+ std::string uncompressed;
372
371
  DataEndingAtUnreadablePage c(compressed);
373
372
  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
374
373
  CHECK_EQ(uncompressed, input);
375
374
  return uncompressed.size();
376
375
  }
377
376
 
378
- static void VerifyStringSink(const string& input) {
379
- string compressed;
377
+ static void VerifyStringSink(const std::string& input) {
378
+ std::string compressed;
380
379
  DataEndingAtUnreadablePage i(input);
381
380
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
382
381
  CHECK_EQ(written, compressed.size());
@@ -384,7 +383,7 @@ static void VerifyStringSink(const string& input) {
384
383
  snappy::MaxCompressedLength(input.size()));
385
384
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
386
385
 
387
- string uncompressed;
386
+ std::string uncompressed;
388
387
  uncompressed.resize(input.size());
389
388
  snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
390
389
  DataEndingAtUnreadablePage c(compressed);
@@ -393,8 +392,8 @@ static void VerifyStringSink(const string& input) {
393
392
  CHECK_EQ(uncompressed, input);
394
393
  }
395
394
 
396
- static void VerifyIOVec(const string& input) {
397
- string compressed;
395
+ static void VerifyIOVec(const std::string& input) {
396
+ std::string compressed;
398
397
  DataEndingAtUnreadablePage i(input);
399
398
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
400
399
  CHECK_EQ(written, compressed.size());
@@ -405,23 +404,28 @@ static void VerifyIOVec(const string& input) {
405
404
  // Try uncompressing into an iovec containing a random number of entries
406
405
  // ranging from 1 to 10.
407
406
  char* buf = new char[input.size()];
408
- ACMRandom rnd(input.size());
409
- size_t num = rnd.Next() % 10 + 1;
407
+ std::minstd_rand0 rng(input.size());
408
+ std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
409
+ size_t num = uniform_1_to_10(rng);
410
410
  if (input.size() < num) {
411
411
  num = input.size();
412
412
  }
413
413
  struct iovec* iov = new iovec[num];
414
414
  int used_so_far = 0;
415
+ std::bernoulli_distribution one_in_five(1.0 / 5);
415
416
  for (size_t i = 0; i < num; ++i) {
417
+ assert(used_so_far < input.size());
416
418
  iov[i].iov_base = buf + used_so_far;
417
419
  if (i == num - 1) {
418
420
  iov[i].iov_len = input.size() - used_so_far;
419
421
  } else {
420
422
  // Randomly choose to insert a 0 byte entry.
421
- if (rnd.OneIn(5)) {
423
+ if (one_in_five(rng)) {
422
424
  iov[i].iov_len = 0;
423
425
  } else {
424
- iov[i].iov_len = rnd.Uniform(input.size());
426
+ std::uniform_int_distribution<size_t> uniform_not_used_so_far(
427
+ 0, input.size() - used_so_far - 1);
428
+ iov[i].iov_len = uniform_not_used_so_far(rng);
425
429
  }
426
430
  }
427
431
  used_so_far += iov[i].iov_len;
@@ -435,22 +439,22 @@ static void VerifyIOVec(const string& input) {
435
439
 
436
440
  // Test that data compressed by a compressor that does not
437
441
  // obey block sizes is uncompressed properly.
438
- static void VerifyNonBlockedCompression(const string& input) {
442
+ static void VerifyNonBlockedCompression(const std::string& input) {
439
443
  if (input.length() > snappy::kBlockSize) {
440
444
  // We cannot test larger blocks than the maximum block size, obviously.
441
445
  return;
442
446
  }
443
447
 
444
- string prefix;
448
+ std::string prefix;
445
449
  Varint::Append32(&prefix, input.size());
446
450
 
447
451
  // Setup compression table
448
- snappy::internal::WorkingMemory wmem;
452
+ snappy::internal::WorkingMemory wmem(input.size());
449
453
  int table_size;
450
454
  uint16* table = wmem.GetHashTable(input.size(), &table_size);
451
455
 
452
456
  // Compress entire input in one shot
453
- string compressed;
457
+ std::string compressed;
454
458
  compressed += prefix;
455
459
  compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
456
460
  char* dest = string_as_array(&compressed) + prefix.size();
@@ -458,13 +462,13 @@ static void VerifyNonBlockedCompression(const string& input) {
458
462
  dest, table, table_size);
459
463
  compressed.resize(end - compressed.data());
460
464
 
461
- // Uncompress into string
462
- string uncomp_str;
465
+ // Uncompress into std::string
466
+ std::string uncomp_str;
463
467
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
464
468
  CHECK_EQ(uncomp_str, input);
465
469
 
466
470
  // Uncompress using source/sink
467
- string uncomp_str2;
471
+ std::string uncomp_str2;
468
472
  uncomp_str2.resize(input.size());
469
473
  snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
470
474
  snappy::ByteArraySource source(compressed.data(), compressed.size());
@@ -476,28 +480,28 @@ static void VerifyNonBlockedCompression(const string& input) {
476
480
  static const int kNumBlocks = 10;
477
481
  struct iovec vec[kNumBlocks];
478
482
  const int block_size = 1 + input.size() / kNumBlocks;
479
- string iovec_data(block_size * kNumBlocks, 'x');
483
+ std::string iovec_data(block_size * kNumBlocks, 'x');
480
484
  for (int i = 0; i < kNumBlocks; i++) {
481
485
  vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
482
486
  vec[i].iov_len = block_size;
483
487
  }
484
488
  CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
485
489
  vec, kNumBlocks));
486
- CHECK_EQ(string(iovec_data.data(), input.size()), input);
490
+ CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
487
491
  }
488
492
  }
489
493
 
490
494
  // Expand the input so that it is at least K times as big as block size
491
- static string Expand(const string& input) {
495
+ static std::string Expand(const std::string& input) {
492
496
  static const int K = 3;
493
- string data = input;
497
+ std::string data = input;
494
498
  while (data.size() < K * snappy::kBlockSize) {
495
499
  data += input;
496
500
  }
497
501
  return data;
498
502
  }
499
503
 
500
- static int Verify(const string& input) {
504
+ static int Verify(const std::string& input) {
501
505
  VLOG(1) << "Verifying input of size " << input.size();
502
506
 
503
507
  // Compress using string based routines
@@ -509,7 +513,7 @@ static int Verify(const string& input) {
509
513
  VerifyNonBlockedCompression(input);
510
514
  VerifyIOVec(input);
511
515
  if (!input.empty()) {
512
- const string expanded = Expand(input);
516
+ const std::string expanded = Expand(input);
513
517
  VerifyNonBlockedCompression(expanded);
514
518
  VerifyIOVec(input);
515
519
  }
@@ -517,21 +521,20 @@ static int Verify(const string& input) {
517
521
  return result;
518
522
  }
519
523
 
520
-
521
- static bool IsValidCompressedBuffer(const string& c) {
524
+ static bool IsValidCompressedBuffer(const std::string& c) {
522
525
  return snappy::IsValidCompressedBuffer(c.data(), c.size());
523
526
  }
524
- static bool Uncompress(const string& c, string* u) {
527
+ static bool Uncompress(const std::string& c, std::string* u) {
525
528
  return snappy::Uncompress(c.data(), c.size(), u);
526
529
  }
527
530
 
528
531
  // This test checks to ensure that snappy doesn't coredump if it gets
529
532
  // corrupted data.
530
533
  TEST(CorruptedTest, VerifyCorrupted) {
531
- string source = "making sure we don't crash with corrupted input";
534
+ std::string source = "making sure we don't crash with corrupted input";
532
535
  VLOG(1) << source;
533
- string dest;
534
- string uncmp;
536
+ std::string dest;
537
+ std::string uncmp;
535
538
  snappy::Compress(source.data(), source.size(), &dest);
536
539
 
537
540
  // Mess around with the data. It's hard to simulate all possible
@@ -578,9 +581,9 @@ TEST(CorruptedTest, VerifyCorrupted) {
578
581
 
579
582
  // try reading stuff in from a bad file.
580
583
  for (int i = 1; i <= 3; ++i) {
581
- string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
582
- 0);
583
- string uncmp;
584
+ std::string data =
585
+ ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
586
+ std::string uncmp;
584
587
  // check that we don't return a crazy length
585
588
  size_t ulen;
586
589
  CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
@@ -598,7 +601,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
598
601
  // These mirror the compression code in snappy.cc, but are copied
599
602
  // here so that we can bypass some limitations in the how snappy.cc
600
603
  // invokes these routines.
601
- static void AppendLiteral(string* dst, const string& literal) {
604
+ static void AppendLiteral(std::string* dst, const std::string& literal) {
602
605
  if (literal.empty()) return;
603
606
  int n = literal.size() - 1;
604
607
  if (n < 60) {
@@ -613,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) {
613
616
  n >>= 8;
614
617
  }
615
618
  dst->push_back(0 | ((59+count) << 2));
616
- *dst += string(number, count);
619
+ *dst += std::string(number, count);
617
620
  }
618
621
  *dst += literal;
619
622
  }
620
623
 
621
- static void AppendCopy(string* dst, int offset, int length) {
624
+ static void AppendCopy(std::string* dst, int offset, int length) {
622
625
  while (length > 0) {
623
626
  // Figure out how much to copy in one shot
624
627
  int to_copy;
@@ -655,51 +658,67 @@ TEST(Snappy, SimpleTests) {
655
658
  Verify("ab");
656
659
  Verify("abc");
657
660
 
658
- Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
659
- Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
660
- Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
661
- Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
662
- Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
661
+ Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
662
+ Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
663
+ Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
664
+ Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
665
+ Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
663
666
  }
664
667
 
665
668
  // Verify max blowup (lots of four-byte copies)
666
669
  TEST(Snappy, MaxBlowup) {
667
- string input;
668
- for (int i = 0; i < 20000; i++) {
669
- ACMRandom rnd(i);
670
- uint32 bytes = static_cast<uint32>(rnd.Next());
671
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
672
- }
673
- for (int i = 19999; i >= 0; i--) {
674
- ACMRandom rnd(i);
675
- uint32 bytes = static_cast<uint32>(rnd.Next());
676
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
670
+ std::mt19937 rng;
671
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
672
+ std::string input;
673
+ for (int i = 0; i < 80000; ++i)
674
+ input.push_back(static_cast<char>(uniform_byte(rng)));
675
+
676
+ for (int i = 0; i < 80000; i += 4) {
677
+ std::string four_bytes(input.end() - i - 4, input.end() - i);
678
+ input.append(four_bytes);
677
679
  }
678
680
  Verify(input);
679
681
  }
680
682
 
681
683
  TEST(Snappy, RandomData) {
682
- ACMRandom rnd(FLAGS_test_random_seed);
683
-
684
- const int num_ops = 20000;
684
+ std::minstd_rand0 rng(FLAGS_test_random_seed);
685
+ std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
686
+ std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
687
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
688
+ std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
689
+ std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
690
+ std::bernoulli_distribution one_in_ten(1.0 / 10);
691
+
692
+ constexpr int num_ops = 20000;
685
693
  for (int i = 0; i < num_ops; i++) {
686
694
  if ((i % 1000) == 0) {
687
695
  VLOG(0) << "Random op " << i << " of " << num_ops;
688
696
  }
689
697
 
690
- string x;
691
- size_t len = rnd.Uniform(4096);
698
+ std::string x;
699
+ size_t len = uniform_4k(rng);
692
700
  if (i < 100) {
693
- len = 65536 + rnd.Uniform(65536);
701
+ len = 65536 + uniform_64k(rng);
694
702
  }
695
703
  while (x.size() < len) {
696
704
  int run_len = 1;
697
- if (rnd.OneIn(10)) {
698
- run_len = rnd.Skewed(8);
705
+ if (one_in_ten(rng)) {
706
+ int skewed_bits = uniform_0_to_8(rng);
707
+ // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
708
+ std::uniform_int_distribution<int> skewed_low(0,
709
+ (1 << skewed_bits) - 1);
710
+ run_len = skewed_low(rng);
711
+ }
712
+ char c = static_cast<char>(uniform_byte(rng));
713
+ if (i >= 100) {
714
+ int skewed_bits = uniform_0_to_3(rng);
715
+ // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
716
+ std::uniform_int_distribution<int> skewed_low(0,
717
+ (1 << skewed_bits) - 1);
718
+ c = static_cast<char>(skewed_low(rng));
699
719
  }
700
- char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
701
720
  while (run_len-- > 0 && x.size() < len) {
702
- x += c;
721
+ x.push_back(c);
703
722
  }
704
723
  }
705
724
 
@@ -713,19 +732,19 @@ TEST(Snappy, FourByteOffset) {
713
732
  // copy manually.
714
733
 
715
734
  // The two fragments that make up the input string.
716
- string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
717
- string fragment2 = "some other string";
735
+ std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
736
+ std::string fragment2 = "some other string";
718
737
 
719
738
  // How many times each fragment is emitted.
720
739
  const int n1 = 2;
721
740
  const int n2 = 100000 / fragment2.size();
722
741
  const int length = n1 * fragment1.size() + n2 * fragment2.size();
723
742
 
724
- string compressed;
743
+ std::string compressed;
725
744
  Varint::Append32(&compressed, length);
726
745
 
727
746
  AppendLiteral(&compressed, fragment1);
728
- string src = fragment1;
747
+ std::string src = fragment1;
729
748
  for (int i = 0; i < n2; i++) {
730
749
  AppendLiteral(&compressed, fragment2);
731
750
  src += fragment2;
@@ -734,7 +753,7 @@ TEST(Snappy, FourByteOffset) {
734
753
  src += fragment1;
735
754
  CHECK_EQ(length, src.size());
736
755
 
737
- string uncompressed;
756
+ std::string uncompressed;
738
757
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
739
758
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
740
759
  &uncompressed));
@@ -756,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) {
756
775
  iov[i].iov_len = kLengths[i];
757
776
  }
758
777
 
759
- string compressed;
778
+ std::string compressed;
760
779
  Varint::Append32(&compressed, 22);
761
780
 
762
781
  // A literal whose output crosses three blocks.
@@ -817,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
817
836
  iov[i].iov_len = kLengths[i];
818
837
  }
819
838
 
820
- string compressed;
839
+ std::string compressed;
821
840
  Varint::Append32(&compressed, 8);
822
841
 
823
842
  AppendLiteral(&compressed, "12345678");
@@ -839,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) {
839
858
  iov[i].iov_len = kLengths[i];
840
859
  }
841
860
 
842
- string compressed;
861
+ std::string compressed;
843
862
  Varint::Append32(&compressed, 8);
844
863
 
845
864
  AppendLiteral(&compressed, "123");
@@ -853,7 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) {
853
872
  }
854
873
  }
855
874
 
856
- static bool CheckUncompressedLength(const string& compressed,
875
+ static bool CheckUncompressedLength(const std::string& compressed,
857
876
  size_t* ulength) {
858
877
  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
859
878
  compressed.size(),
@@ -867,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed,
867
886
  }
868
887
 
869
888
  TEST(SnappyCorruption, TruncatedVarint) {
870
- string compressed, uncompressed;
889
+ std::string compressed, uncompressed;
871
890
  size_t ulength;
872
891
  compressed.push_back('\xf0');
873
892
  CHECK(!CheckUncompressedLength(compressed, &ulength));
@@ -877,7 +896,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
877
896
  }
878
897
 
879
898
  TEST(SnappyCorruption, UnterminatedVarint) {
880
- string compressed, uncompressed;
899
+ std::string compressed, uncompressed;
881
900
  size_t ulength;
882
901
  compressed.push_back('\x80');
883
902
  compressed.push_back('\x80');
@@ -892,7 +911,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
892
911
  }
893
912
 
894
913
  TEST(SnappyCorruption, OverflowingVarint) {
895
- string compressed, uncompressed;
914
+ std::string compressed, uncompressed;
896
915
  size_t ulength;
897
916
  compressed.push_back('\xfb');
898
917
  compressed.push_back('\xff');
@@ -909,14 +928,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
909
928
  // Check that we do not read past end of input
910
929
 
911
930
  // Make a compressed string that ends with a single-byte literal
912
- string compressed;
931
+ std::string compressed;
913
932
  Varint::Append32(&compressed, 1);
914
933
  AppendLiteral(&compressed, "x");
915
934
 
916
- string uncompressed;
935
+ std::string uncompressed;
917
936
  DataEndingAtUnreadablePage c(compressed);
918
937
  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
919
- CHECK_EQ(uncompressed, string("x"));
938
+ CHECK_EQ(uncompressed, std::string("x"));
920
939
  }
921
940
 
922
941
  // Check for an infinite loop caused by a copy with offset==0
@@ -1037,17 +1056,20 @@ TEST(Snappy, FindMatchLength) {
1037
1056
  }
1038
1057
 
1039
1058
  TEST(Snappy, FindMatchLengthRandom) {
1040
- const int kNumTrials = 10000;
1041
- const int kTypicalLength = 10;
1042
- ACMRandom rnd(FLAGS_test_random_seed);
1059
+ constexpr int kNumTrials = 10000;
1060
+ constexpr int kTypicalLength = 10;
1061
+ std::minstd_rand0 rng(FLAGS_test_random_seed);
1062
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
1063
+ std::bernoulli_distribution one_in_two(1.0 / 2);
1064
+ std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
1043
1065
 
1044
1066
  for (int i = 0; i < kNumTrials; i++) {
1045
- string s, t;
1046
- char a = rnd.Rand8();
1047
- char b = rnd.Rand8();
1048
- while (!rnd.OneIn(kTypicalLength)) {
1049
- s.push_back(rnd.OneIn(2) ? a : b);
1050
- t.push_back(rnd.OneIn(2) ? a : b);
1067
+ std::string s, t;
1068
+ char a = static_cast<char>(uniform_byte(rng));
1069
+ char b = static_cast<char>(uniform_byte(rng));
1070
+ while (!one_in_typical_length(rng)) {
1071
+ s.push_back(one_in_two(rng) ? a : b);
1072
+ t.push_back(one_in_two(rng) ? a : b);
1051
1073
  }
1052
1074
  DataEndingAtUnreadablePage u(s);
1053
1075
  DataEndingAtUnreadablePage v(t);
@@ -1157,33 +1179,33 @@ TEST(Snappy, VerifyCharTable) {
1157
1179
  }
1158
1180
 
1159
1181
  static void CompressFile(const char* fname) {
1160
- string fullinput;
1182
+ std::string fullinput;
1161
1183
  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1162
1184
 
1163
- string compressed;
1185
+ std::string compressed;
1164
1186
  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1165
1187
 
1166
- CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1188
+ CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
1167
1189
  file::Defaults()));
1168
1190
  }
1169
1191
 
1170
1192
  static void UncompressFile(const char* fname) {
1171
- string fullinput;
1193
+ std::string fullinput;
1172
1194
  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1173
1195
 
1174
1196
  size_t uncompLength;
1175
1197
  CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1176
1198
 
1177
- string uncompressed;
1199
+ std::string uncompressed;
1178
1200
  uncompressed.resize(uncompLength);
1179
1201
  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1180
1202
 
1181
- CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1203
+ CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
1182
1204
  file::Defaults()));
1183
1205
  }
1184
1206
 
1185
1207
  static void MeasureFile(const char* fname) {
1186
- string fullinput;
1208
+ std::string fullinput;
1187
1209
  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1188
1210
  printf("%-40s :\n", fname);
1189
1211
 
@@ -1236,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) {
1236
1258
  // Pick file to process based on "arg"
1237
1259
  CHECK_GE(arg, 0);
1238
1260
  CHECK_LT(arg, ARRAYSIZE(files));
1239
- string contents = ReadTestDataFile(files[arg].filename,
1240
- files[arg].size_limit);
1261
+ std::string contents =
1262
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1241
1263
 
1242
- string zcontents;
1264
+ std::string zcontents;
1243
1265
  snappy::Compress(contents.data(), contents.size(), &zcontents);
1244
1266
  char* dst = new char[contents.size()];
1245
1267
 
@@ -1262,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) {
1262
1284
  // Pick file to process based on "arg"
1263
1285
  CHECK_GE(arg, 0);
1264
1286
  CHECK_LT(arg, ARRAYSIZE(files));
1265
- string contents = ReadTestDataFile(files[arg].filename,
1266
- files[arg].size_limit);
1287
+ std::string contents =
1288
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1267
1289
 
1268
- string zcontents;
1290
+ std::string zcontents;
1269
1291
  snappy::Compress(contents.data(), contents.size(), &zcontents);
1270
1292
 
1271
1293
  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
@@ -1285,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) {
1285
1307
  // Pick file to process based on "arg"
1286
1308
  CHECK_GE(arg, 0);
1287
1309
  CHECK_LT(arg, ARRAYSIZE(files));
1288
- string contents = ReadTestDataFile(files[arg].filename,
1289
- files[arg].size_limit);
1310
+ std::string contents =
1311
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1290
1312
 
1291
- string zcontents;
1313
+ std::string zcontents;
1292
1314
  snappy::Compress(contents.data(), contents.size(), &zcontents);
1293
1315
 
1294
1316
  // Uncompress into an iovec containing ten entries.
@@ -1331,10 +1353,10 @@ static void BM_UFlatSink(int iters, int arg) {
1331
1353
  // Pick file to process based on "arg"
1332
1354
  CHECK_GE(arg, 0);
1333
1355
  CHECK_LT(arg, ARRAYSIZE(files));
1334
- string contents = ReadTestDataFile(files[arg].filename,
1335
- files[arg].size_limit);
1356
+ std::string contents =
1357
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1336
1358
 
1337
- string zcontents;
1359
+ std::string zcontents;
1338
1360
  snappy::Compress(contents.data(), contents.size(), &zcontents);
1339
1361
  char* dst = new char[contents.size()];
1340
1362
 
@@ -1349,7 +1371,7 @@ static void BM_UFlatSink(int iters, int arg) {
1349
1371
  }
1350
1372
  StopBenchmarkTiming();
1351
1373
 
1352
- string s(dst, contents.size());
1374
+ std::string s(dst, contents.size());
1353
1375
  CHECK_EQ(contents, s);
1354
1376
 
1355
1377
  delete[] dst;
@@ -1363,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) {
1363
1385
  // Pick file to process based on "arg"
1364
1386
  CHECK_GE(arg, 0);
1365
1387
  CHECK_LT(arg, ARRAYSIZE(files));
1366
- string contents = ReadTestDataFile(files[arg].filename,
1367
- files[arg].size_limit);
1388
+ std::string contents =
1389
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1368
1390
 
1369
1391
  char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1370
1392
 
@@ -1379,14 +1401,88 @@ static void BM_ZFlat(int iters, int arg) {
1379
1401
  StopBenchmarkTiming();
1380
1402
  const double compression_ratio =
1381
1403
  static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1382
- SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1383
- files[arg].label, 100.0 * compression_ratio));
1384
- VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1385
- files[arg].label, contents.size(), zsize);
1404
+ SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
1405
+ 100.0 * compression_ratio));
1406
+ VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
1407
+ files[arg].label, static_cast<int>(contents.size()),
1408
+ static_cast<int>(zsize));
1386
1409
  delete[] dst;
1387
1410
  }
1388
1411
  BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1389
1412
 
1413
+ static void BM_ZFlatAll(int iters, int arg) {
1414
+ StopBenchmarkTiming();
1415
+
1416
+ CHECK_EQ(arg, 0);
1417
+ const int num_files = ARRAYSIZE(files);
1418
+
1419
+ std::vector<std::string> contents(num_files);
1420
+ std::vector<char*> dst(num_files);
1421
+
1422
+ int64 total_contents_size = 0;
1423
+ for (int i = 0; i < num_files; ++i) {
1424
+ contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
1425
+ dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
1426
+ total_contents_size += contents[i].size();
1427
+ }
1428
+
1429
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
1430
+ StartBenchmarkTiming();
1431
+
1432
+ size_t zsize = 0;
1433
+ while (iters-- > 0) {
1434
+ for (int i = 0; i < num_files; ++i) {
1435
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1436
+ &zsize);
1437
+ }
1438
+ }
1439
+ StopBenchmarkTiming();
1440
+
1441
+ for (int i = 0; i < num_files; ++i) {
1442
+ delete[] dst[i];
1443
+ }
1444
+ SetBenchmarkLabel(StrFormat("%d files", num_files));
1445
+ }
1446
+ BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
1447
+
1448
+ static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
1449
+ StopBenchmarkTiming();
1450
+
1451
+ CHECK_EQ(arg, 0);
1452
+ CHECK_GT(ARRAYSIZE(files), 0);
1453
+ const std::string base_content =
1454
+ ReadTestDataFile(files[0].filename, files[0].size_limit);
1455
+
1456
+ std::vector<std::string> contents;
1457
+ std::vector<char*> dst;
1458
+ int64 total_contents_size = 0;
1459
+ for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
1460
+ ++table_bits) {
1461
+ std::string content = base_content;
1462
+ content.resize(1 << table_bits);
1463
+ dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
1464
+ total_contents_size += content.size();
1465
+ contents.push_back(std::move(content));
1466
+ }
1467
+
1468
+ size_t zsize = 0;
1469
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) * total_contents_size);
1470
+ StartBenchmarkTiming();
1471
+ while (iters-- > 0) {
1472
+ for (int i = 0; i < contents.size(); ++i) {
1473
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1474
+ &zsize);
1475
+ }
1476
+ }
1477
+ StopBenchmarkTiming();
1478
+
1479
+ for (int i = 0; i < dst.size(); ++i) {
1480
+ delete[] dst[i];
1481
+ }
1482
+ SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
1483
+ }
1484
+ BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
1485
+
1390
1486
  } // namespace snappy
1391
1487
 
1392
1488
  int main(int argc, char** argv) {