snappy 0.0.17 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +2 -0
  3. data/.github/workflows/main.yml +34 -0
  4. data/.github/workflows/publish.yml +34 -0
  5. data/.gitignore +2 -1
  6. data/.gitmodules +1 -1
  7. data/Dockerfile +13 -0
  8. data/Gemfile +4 -0
  9. data/README.md +45 -5
  10. data/Rakefile +32 -29
  11. data/ext/api.c +6 -1
  12. data/ext/extconf.rb +31 -22
  13. data/lib/snappy/hadoop/reader.rb +62 -0
  14. data/lib/snappy/hadoop/writer.rb +51 -0
  15. data/lib/snappy/hadoop.rb +22 -0
  16. data/lib/snappy/reader.rb +14 -10
  17. data/lib/snappy/shim.rb +1 -1
  18. data/lib/snappy/version.rb +1 -1
  19. data/lib/snappy.rb +5 -4
  20. data/snappy.gemspec +14 -13
  21. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  22. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  23. data/test/snappy_hadoop_test.rb +26 -0
  24. data/test/snappy_reader_test.rb +148 -0
  25. data/test/snappy_test.rb +95 -0
  26. data/test/snappy_writer_test.rb +55 -0
  27. data/test/test_helper.rb +7 -0
  28. data/test.sh +3 -0
  29. data/vendor/snappy/CMakeLists.txt +420 -0
  30. data/vendor/snappy/CONTRIBUTING.md +31 -0
  31. data/vendor/snappy/NEWS +52 -0
  32. data/vendor/snappy/{README → README.md} +75 -49
  33. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  34. data/vendor/snappy/cmake/config.h.in +66 -0
  35. data/vendor/snappy/docs/README.md +72 -0
  36. data/vendor/snappy/snappy-internal.h +200 -32
  37. data/vendor/snappy/snappy-sinksource.cc +26 -9
  38. data/vendor/snappy/snappy-sinksource.h +11 -11
  39. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  40. data/vendor/snappy/snappy-stubs-internal.h +299 -302
  41. data/vendor/snappy/snappy-stubs-public.h.in +10 -47
  42. data/vendor/snappy/snappy-test.cc +94 -200
  43. data/vendor/snappy/snappy-test.h +101 -358
  44. data/vendor/snappy/snappy.cc +1437 -474
  45. data/vendor/snappy/snappy.h +31 -12
  46. data/vendor/snappy/snappy_benchmark.cc +378 -0
  47. data/vendor/snappy/snappy_compress_fuzzer.cc +60 -0
  48. data/vendor/snappy/snappy_test_data.cc +57 -0
  49. data/vendor/snappy/snappy_test_data.h +68 -0
  50. data/vendor/snappy/snappy_test_tool.cc +471 -0
  51. data/vendor/snappy/snappy_uncompress_fuzzer.cc +58 -0
  52. data/vendor/snappy/snappy_unittest.cc +271 -792
  53. metadata +42 -92
  54. data/.travis.yml +0 -26
  55. data/smoke.sh +0 -8
  56. data/test/test-snappy-reader.rb +0 -129
  57. data/test/test-snappy-writer.rb +0 -55
  58. data/test/test-snappy.rb +0 -58
  59. data/vendor/snappy/ChangeLog +0 -2468
  60. data/vendor/snappy/INSTALL +0 -370
  61. data/vendor/snappy/Makefile +0 -982
  62. data/vendor/snappy/Makefile.am +0 -26
  63. data/vendor/snappy/Makefile.in +0 -982
  64. data/vendor/snappy/aclocal.m4 +0 -9738
  65. data/vendor/snappy/autogen.sh +0 -12
  66. data/vendor/snappy/autom4te.cache/output.0 +0 -18856
  67. data/vendor/snappy/autom4te.cache/output.1 +0 -18852
  68. data/vendor/snappy/autom4te.cache/requests +0 -297
  69. data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
  70. data/vendor/snappy/autom4te.cache/traces.1 +0 -714
  71. data/vendor/snappy/config.guess +0 -1530
  72. data/vendor/snappy/config.h +0 -135
  73. data/vendor/snappy/config.h.in +0 -134
  74. data/vendor/snappy/config.log +0 -1640
  75. data/vendor/snappy/config.status +0 -2318
  76. data/vendor/snappy/config.sub +0 -1773
  77. data/vendor/snappy/configure +0 -18852
  78. data/vendor/snappy/configure.ac +0 -134
  79. data/vendor/snappy/depcomp +0 -688
  80. data/vendor/snappy/install-sh +0 -527
  81. data/vendor/snappy/libtool +0 -10246
  82. data/vendor/snappy/ltmain.sh +0 -9661
  83. data/vendor/snappy/m4/gtest.m4 +0 -74
  84. data/vendor/snappy/m4/libtool.m4 +0 -8001
  85. data/vendor/snappy/m4/ltoptions.m4 +0 -384
  86. data/vendor/snappy/m4/ltsugar.m4 +0 -123
  87. data/vendor/snappy/m4/ltversion.m4 +0 -23
  88. data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
  89. data/vendor/snappy/missing +0 -331
  90. data/vendor/snappy/snappy-stubs-public.h +0 -100
  91. data/vendor/snappy/snappy.pc +0 -10
  92. data/vendor/snappy/snappy.pc.in +0 -10
  93. data/vendor/snappy/stamp-h1 +0 -1
@@ -26,51 +26,31 @@
26
26
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
- #include <math.h>
30
- #include <stdlib.h>
31
-
32
-
33
29
  #include <algorithm>
30
+ #include <cmath>
31
+ #include <cstdlib>
32
+ #include <random>
34
33
  #include <string>
34
+ #include <utility>
35
35
  #include <vector>
36
36
 
37
- #include "snappy.h"
38
- #include "snappy-internal.h"
39
37
  #include "snappy-test.h"
38
+
39
+ #include "gtest/gtest.h"
40
+
41
+ #include "snappy-internal.h"
40
42
  #include "snappy-sinksource.h"
43
+ #include "snappy.h"
44
+ #include "snappy_test_data.h"
41
45
 
42
- DEFINE_int32(start_len, -1,
43
- "Starting prefix size for testing (-1: just full file contents)");
44
- DEFINE_int32(end_len, -1,
45
- "Starting prefix size for testing (-1: just full file contents)");
46
- DEFINE_int32(bytes, 10485760,
47
- "How many bytes to compress/uncompress per file for timing");
48
-
49
- DEFINE_bool(zlib, false,
50
- "Run zlib compression (http://www.zlib.net)");
51
- DEFINE_bool(lzo, false,
52
- "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53
- DEFINE_bool(quicklz, false,
54
- "Run quickLZ compression (http://www.quicklz.com/)");
55
- DEFINE_bool(liblzf, false,
56
- "Run libLZF compression "
57
- "(http://www.goof.com/pcg/marc/liblzf.html)");
58
- DEFINE_bool(fastlz, false,
59
- "Run FastLZ compression (http://www.fastlz.org/");
60
- DEFINE_bool(snappy, true, "Run snappy compression");
61
-
62
- DEFINE_bool(write_compressed, false,
63
- "Write compressed versions of each file to <file>.comp");
64
- DEFINE_bool(write_uncompressed, false,
65
- "Write uncompressed versions of each file to <file>.uncomp");
66
-
67
- DEFINE_bool(snappy_dump_decompression_table, false,
46
+ SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
68
47
  "If true, we print the decompression table during tests.");
69
48
 
70
49
  namespace snappy {
71
50
 
51
+ namespace {
72
52
 
73
- #ifdef HAVE_FUNC_MMAP
53
+ #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
74
54
 
75
55
  // To test against code that reads beyond its input, this class copies a
76
56
  // string to a newly allocated group of pages, the last of which
@@ -80,8 +60,8 @@ namespace snappy {
80
60
  // be able to read previously allocated memory while doing heap allocations.
81
61
  class DataEndingAtUnreadablePage {
82
62
  public:
83
- explicit DataEndingAtUnreadablePage(const string& s) {
84
- const size_t page_size = getpagesize();
63
+ explicit DataEndingAtUnreadablePage(const std::string& s) {
64
+ const size_t page_size = sysconf(_SC_PAGESIZE);
85
65
  const size_t size = s.size();
86
66
  // Round up space for string to a multiple of page_size.
87
67
  size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
@@ -91,7 +71,7 @@ class DataEndingAtUnreadablePage {
91
71
  CHECK_NE(MAP_FAILED, mem_);
92
72
  protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
93
73
  char* dst = protected_page_ - size;
94
- memcpy(dst, s.data(), size);
74
+ std::memcpy(dst, s.data(), size);
95
75
  data_ = dst;
96
76
  size_ = size;
97
77
  // Make guard page unreadable.
@@ -99,8 +79,9 @@ class DataEndingAtUnreadablePage {
99
79
  }
100
80
 
101
81
  ~DataEndingAtUnreadablePage() {
82
+ const size_t page_size = sysconf(_SC_PAGESIZE);
102
83
  // Undo the mprotect.
103
- CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
84
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
104
85
  CHECK_EQ(0, munmap(mem_, alloc_size_));
105
86
  }
106
87
 
@@ -115,368 +96,15 @@ class DataEndingAtUnreadablePage {
115
96
  size_t size_;
116
97
  };
117
98
 
118
- #else // HAVE_FUNC_MMAP
99
+ #else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
119
100
 
120
101
  // Fallback for systems without mmap.
121
- typedef string DataEndingAtUnreadablePage;
102
+ using DataEndingAtUnreadablePage = std::string;
122
103
 
123
104
  #endif
124
105
 
125
- enum CompressorType {
126
- ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
127
- };
128
-
129
- const char* names[] = {
130
- "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
131
- };
132
-
133
- static size_t MinimumRequiredOutputSpace(size_t input_size,
134
- CompressorType comp) {
135
- switch (comp) {
136
- #ifdef ZLIB_VERSION
137
- case ZLIB:
138
- return ZLib::MinCompressbufSize(input_size);
139
- #endif // ZLIB_VERSION
140
-
141
- #ifdef LZO_VERSION
142
- case LZO:
143
- return input_size + input_size/64 + 16 + 3;
144
- #endif // LZO_VERSION
145
-
146
- #ifdef LZF_VERSION
147
- case LIBLZF:
148
- return input_size;
149
- #endif // LZF_VERSION
150
-
151
- #ifdef QLZ_VERSION_MAJOR
152
- case QUICKLZ:
153
- return input_size + 36000; // 36000 is used for scratch.
154
- #endif // QLZ_VERSION_MAJOR
155
-
156
- #ifdef FASTLZ_VERSION
157
- case FASTLZ:
158
- return max(static_cast<int>(ceil(input_size * 1.05)), 66);
159
- #endif // FASTLZ_VERSION
160
-
161
- case SNAPPY:
162
- return snappy::MaxCompressedLength(input_size);
163
-
164
- default:
165
- LOG(FATAL) << "Unknown compression type number " << comp;
166
- return 0;
167
- }
168
- }
169
-
170
- // Returns true if we successfully compressed, false otherwise.
171
- //
172
- // If compressed_is_preallocated is set, do not resize the compressed buffer.
173
- // This is typically what you want for a benchmark, in order to not spend
174
- // time in the memory allocator. If you do set this flag, however,
175
- // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
176
- // number of bytes, and may contain junk bytes at the end after return.
177
- static bool Compress(const char* input, size_t input_size, CompressorType comp,
178
- string* compressed, bool compressed_is_preallocated) {
179
- if (!compressed_is_preallocated) {
180
- compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
181
- }
182
-
183
- switch (comp) {
184
- #ifdef ZLIB_VERSION
185
- case ZLIB: {
186
- ZLib zlib;
187
- uLongf destlen = compressed->size();
188
- int ret = zlib.Compress(
189
- reinterpret_cast<Bytef*>(string_as_array(compressed)),
190
- &destlen,
191
- reinterpret_cast<const Bytef*>(input),
192
- input_size);
193
- CHECK_EQ(Z_OK, ret);
194
- if (!compressed_is_preallocated) {
195
- compressed->resize(destlen);
196
- }
197
- return true;
198
- }
199
- #endif // ZLIB_VERSION
200
-
201
- #ifdef LZO_VERSION
202
- case LZO: {
203
- unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
204
- lzo_uint destlen;
205
- int ret = lzo1x_1_15_compress(
206
- reinterpret_cast<const uint8*>(input),
207
- input_size,
208
- reinterpret_cast<uint8*>(string_as_array(compressed)),
209
- &destlen,
210
- mem);
211
- CHECK_EQ(LZO_E_OK, ret);
212
- delete[] mem;
213
- if (!compressed_is_preallocated) {
214
- compressed->resize(destlen);
215
- }
216
- break;
217
- }
218
- #endif // LZO_VERSION
219
-
220
- #ifdef LZF_VERSION
221
- case LIBLZF: {
222
- int destlen = lzf_compress(input,
223
- input_size,
224
- string_as_array(compressed),
225
- input_size);
226
- if (destlen == 0) {
227
- // lzf *can* cause lots of blowup when compressing, so they
228
- // recommend to limit outsize to insize, and just not compress
229
- // if it's bigger. Ideally, we'd just swap input and output.
230
- compressed->assign(input, input_size);
231
- destlen = input_size;
232
- }
233
- if (!compressed_is_preallocated) {
234
- compressed->resize(destlen);
235
- }
236
- break;
237
- }
238
- #endif // LZF_VERSION
239
-
240
- #ifdef QLZ_VERSION_MAJOR
241
- case QUICKLZ: {
242
- qlz_state_compress *state_compress = new qlz_state_compress;
243
- int destlen = qlz_compress(input,
244
- string_as_array(compressed),
245
- input_size,
246
- state_compress);
247
- delete state_compress;
248
- CHECK_NE(0, destlen);
249
- if (!compressed_is_preallocated) {
250
- compressed->resize(destlen);
251
- }
252
- break;
253
- }
254
- #endif // QLZ_VERSION_MAJOR
255
-
256
- #ifdef FASTLZ_VERSION
257
- case FASTLZ: {
258
- // Use level 1 compression since we mostly care about speed.
259
- int destlen = fastlz_compress_level(
260
- 1,
261
- input,
262
- input_size,
263
- string_as_array(compressed));
264
- if (!compressed_is_preallocated) {
265
- compressed->resize(destlen);
266
- }
267
- CHECK_NE(destlen, 0);
268
- break;
269
- }
270
- #endif // FASTLZ_VERSION
271
-
272
- case SNAPPY: {
273
- size_t destlen;
274
- snappy::RawCompress(input, input_size,
275
- string_as_array(compressed),
276
- &destlen);
277
- CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
278
- if (!compressed_is_preallocated) {
279
- compressed->resize(destlen);
280
- }
281
- break;
282
- }
283
-
284
- default: {
285
- return false; // the asked-for library wasn't compiled in
286
- }
287
- }
288
- return true;
289
- }
290
-
291
- static bool Uncompress(const string& compressed, CompressorType comp,
292
- int size, string* output) {
293
- switch (comp) {
294
- #ifdef ZLIB_VERSION
295
- case ZLIB: {
296
- output->resize(size);
297
- ZLib zlib;
298
- uLongf destlen = output->size();
299
- int ret = zlib.Uncompress(
300
- reinterpret_cast<Bytef*>(string_as_array(output)),
301
- &destlen,
302
- reinterpret_cast<const Bytef*>(compressed.data()),
303
- compressed.size());
304
- CHECK_EQ(Z_OK, ret);
305
- CHECK_EQ(static_cast<uLongf>(size), destlen);
306
- break;
307
- }
308
- #endif // ZLIB_VERSION
309
-
310
- #ifdef LZO_VERSION
311
- case LZO: {
312
- output->resize(size);
313
- lzo_uint destlen;
314
- int ret = lzo1x_decompress(
315
- reinterpret_cast<const uint8*>(compressed.data()),
316
- compressed.size(),
317
- reinterpret_cast<uint8*>(string_as_array(output)),
318
- &destlen,
319
- NULL);
320
- CHECK_EQ(LZO_E_OK, ret);
321
- CHECK_EQ(static_cast<lzo_uint>(size), destlen);
322
- break;
323
- }
324
- #endif // LZO_VERSION
325
-
326
- #ifdef LZF_VERSION
327
- case LIBLZF: {
328
- output->resize(size);
329
- int destlen = lzf_decompress(compressed.data(),
330
- compressed.size(),
331
- string_as_array(output),
332
- output->size());
333
- if (destlen == 0) {
334
- // This error probably means we had decided not to compress,
335
- // and thus have stored input in output directly.
336
- output->assign(compressed.data(), compressed.size());
337
- destlen = compressed.size();
338
- }
339
- CHECK_EQ(destlen, size);
340
- break;
341
- }
342
- #endif // LZF_VERSION
343
-
344
- #ifdef QLZ_VERSION_MAJOR
345
- case QUICKLZ: {
346
- output->resize(size);
347
- qlz_state_decompress *state_decompress = new qlz_state_decompress;
348
- int destlen = qlz_decompress(compressed.data(),
349
- string_as_array(output),
350
- state_decompress);
351
- delete state_decompress;
352
- CHECK_EQ(destlen, size);
353
- break;
354
- }
355
- #endif // QLZ_VERSION_MAJOR
356
-
357
- #ifdef FASTLZ_VERSION
358
- case FASTLZ: {
359
- output->resize(size);
360
- int destlen = fastlz_decompress(compressed.data(),
361
- compressed.length(),
362
- string_as_array(output),
363
- size);
364
- CHECK_EQ(destlen, size);
365
- break;
366
- }
367
- #endif // FASTLZ_VERSION
368
-
369
- case SNAPPY: {
370
- snappy::RawUncompress(compressed.data(), compressed.size(),
371
- string_as_array(output));
372
- break;
373
- }
374
-
375
- default: {
376
- return false; // the asked-for library wasn't compiled in
377
- }
378
- }
379
- return true;
380
- }
381
-
382
- static void Measure(const char* data,
383
- size_t length,
384
- CompressorType comp,
385
- int repeats,
386
- int block_size) {
387
- // Run tests a few time and pick median running times
388
- static const int kRuns = 5;
389
- double ctime[kRuns];
390
- double utime[kRuns];
391
- int compressed_size = 0;
392
-
393
- {
394
- // Chop the input into blocks
395
- int num_blocks = (length + block_size - 1) / block_size;
396
- std::vector<const char*> input(num_blocks);
397
- std::vector<size_t> input_length(num_blocks);
398
- std::vector<string> compressed(num_blocks);
399
- std::vector<string> output(num_blocks);
400
- for (int b = 0; b < num_blocks; b++) {
401
- int input_start = b * block_size;
402
- int input_limit = min<int>((b+1)*block_size, length);
403
- input[b] = data+input_start;
404
- input_length[b] = input_limit-input_start;
405
-
406
- // Pre-grow the output buffer so we don't measure string append time.
407
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
408
- }
409
-
410
- // First, try one trial compression to make sure the code is compiled in
411
- if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
412
- LOG(WARNING) << "Skipping " << names[comp] << ": "
413
- << "library not compiled in";
414
- return;
415
- }
416
-
417
- for (int run = 0; run < kRuns; run++) {
418
- CycleTimer ctimer, utimer;
419
-
420
- for (int b = 0; b < num_blocks; b++) {
421
- // Pre-grow the output buffer so we don't measure string append time.
422
- compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
423
- }
424
-
425
- ctimer.Start();
426
- for (int b = 0; b < num_blocks; b++)
427
- for (int i = 0; i < repeats; i++)
428
- Compress(input[b], input_length[b], comp, &compressed[b], true);
429
- ctimer.Stop();
430
-
431
- // Compress once more, with resizing, so we don't leave junk
432
- // at the end that will confuse the decompressor.
433
- for (int b = 0; b < num_blocks; b++) {
434
- Compress(input[b], input_length[b], comp, &compressed[b], false);
435
- }
436
-
437
- for (int b = 0; b < num_blocks; b++) {
438
- output[b].resize(input_length[b]);
439
- }
440
-
441
- utimer.Start();
442
- for (int i = 0; i < repeats; i++)
443
- for (int b = 0; b < num_blocks; b++)
444
- Uncompress(compressed[b], comp, input_length[b], &output[b]);
445
- utimer.Stop();
446
-
447
- ctime[run] = ctimer.Get();
448
- utime[run] = utimer.Get();
449
- }
450
-
451
- compressed_size = 0;
452
- for (size_t i = 0; i < compressed.size(); i++) {
453
- compressed_size += compressed[i].size();
454
- }
455
- }
456
-
457
- sort(ctime, ctime + kRuns);
458
- sort(utime, utime + kRuns);
459
- const int med = kRuns/2;
460
-
461
- float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
462
- float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
463
- string x = names[comp];
464
- x += ":";
465
- string urate = (uncomp_rate >= 0)
466
- ? StringPrintf("%.1f", uncomp_rate)
467
- : string("?");
468
- printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
469
- "comp %5.1f MB/s uncomp %5s MB/s\n",
470
- x.c_str(),
471
- block_size/(1<<20),
472
- static_cast<int>(length), static_cast<uint32>(compressed_size),
473
- (compressed_size * 100.0) / max<int>(1, length),
474
- comp_rate,
475
- urate.c_str());
476
- }
477
-
478
- static int VerifyString(const string& input) {
479
- string compressed;
106
+ int VerifyString(const std::string& input) {
107
+ std::string compressed;
480
108
  DataEndingAtUnreadablePage i(input);
481
109
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
482
110
  CHECK_EQ(written, compressed.size());
@@ -484,15 +112,15 @@ static int VerifyString(const string& input) {
484
112
  snappy::MaxCompressedLength(input.size()));
485
113
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
486
114
 
487
- string uncompressed;
115
+ std::string uncompressed;
488
116
  DataEndingAtUnreadablePage c(compressed);
489
117
  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
490
118
  CHECK_EQ(uncompressed, input);
491
119
  return uncompressed.size();
492
120
  }
493
121
 
494
- static void VerifyStringSink(const string& input) {
495
- string compressed;
122
+ void VerifyStringSink(const std::string& input) {
123
+ std::string compressed;
496
124
  DataEndingAtUnreadablePage i(input);
497
125
  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
498
126
  CHECK_EQ(written, compressed.size());
@@ -500,7 +128,7 @@ static void VerifyStringSink(const string& input) {
500
128
  snappy::MaxCompressedLength(input.size()));
501
129
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
502
130
 
503
- string uncompressed;
131
+ std::string uncompressed;
504
132
  uncompressed.resize(input.size());
505
133
  snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
506
134
  DataEndingAtUnreadablePage c(compressed);
@@ -509,41 +137,67 @@ static void VerifyStringSink(const string& input) {
509
137
  CHECK_EQ(uncompressed, input);
510
138
  }
511
139
 
512
- static void VerifyIOVec(const string& input) {
513
- string compressed;
514
- DataEndingAtUnreadablePage i(input);
515
- const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
516
- CHECK_EQ(written, compressed.size());
517
- CHECK_LE(compressed.size(),
518
- snappy::MaxCompressedLength(input.size()));
519
- CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
520
-
521
- // Try uncompressing into an iovec containing a random number of entries
522
- // ranging from 1 to 10.
523
- char* buf = new char[input.size()];
524
- ACMRandom rnd(input.size());
525
- size_t num = rnd.Next() % 10 + 1;
140
+ struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
141
+ std::minstd_rand0 rng(input.size());
142
+ std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
143
+ num = uniform_1_to_10(rng);
526
144
  if (input.size() < num) {
527
145
  num = input.size();
528
146
  }
529
147
  struct iovec* iov = new iovec[num];
530
- int used_so_far = 0;
148
+ size_t used_so_far = 0;
149
+ std::bernoulli_distribution one_in_five(1.0 / 5);
531
150
  for (size_t i = 0; i < num; ++i) {
151
+ assert(used_so_far < input.size());
532
152
  iov[i].iov_base = buf + used_so_far;
533
153
  if (i == num - 1) {
534
154
  iov[i].iov_len = input.size() - used_so_far;
535
155
  } else {
536
156
  // Randomly choose to insert a 0 byte entry.
537
- if (rnd.OneIn(5)) {
157
+ if (one_in_five(rng)) {
538
158
  iov[i].iov_len = 0;
539
159
  } else {
540
- iov[i].iov_len = rnd.Uniform(input.size());
160
+ std::uniform_int_distribution<size_t> uniform_not_used_so_far(
161
+ 0, input.size() - used_so_far - 1);
162
+ iov[i].iov_len = uniform_not_used_so_far(rng);
541
163
  }
542
164
  }
543
165
  used_so_far += iov[i].iov_len;
544
166
  }
545
- CHECK(snappy::RawUncompressToIOVec(
546
- compressed.data(), compressed.size(), iov, num));
167
+ return iov;
168
+ }
169
+
170
+ int VerifyIOVecSource(const std::string& input) {
171
+ std::string compressed;
172
+ std::string copy = input;
173
+ char* buf = const_cast<char*>(copy.data());
174
+ size_t num = 0;
175
+ struct iovec* iov = GetIOVec(input, buf, num);
176
+ const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
177
+ CHECK_EQ(written, compressed.size());
178
+ CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
179
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
180
+
181
+ std::string uncompressed;
182
+ DataEndingAtUnreadablePage c(compressed);
183
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
184
+ CHECK_EQ(uncompressed, input);
185
+ delete[] iov;
186
+ return uncompressed.size();
187
+ }
188
+
189
+ void VerifyIOVecSink(const std::string& input) {
190
+ std::string compressed;
191
+ DataEndingAtUnreadablePage i(input);
192
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
193
+ CHECK_EQ(written, compressed.size());
194
+ CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
195
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
196
+ char* buf = new char[input.size()];
197
+ size_t num = 0;
198
+ struct iovec* iov = GetIOVec(input, buf, num);
199
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
200
+ num));
547
201
  CHECK(!memcmp(buf, input.data(), input.size()));
548
202
  delete[] iov;
549
203
  delete[] buf;
@@ -551,22 +205,22 @@ static void VerifyIOVec(const string& input) {
551
205
 
552
206
  // Test that data compressed by a compressor that does not
553
207
  // obey block sizes is uncompressed properly.
554
- static void VerifyNonBlockedCompression(const string& input) {
208
+ void VerifyNonBlockedCompression(const std::string& input) {
555
209
  if (input.length() > snappy::kBlockSize) {
556
210
  // We cannot test larger blocks than the maximum block size, obviously.
557
211
  return;
558
212
  }
559
213
 
560
- string prefix;
214
+ std::string prefix;
561
215
  Varint::Append32(&prefix, input.size());
562
216
 
563
217
  // Setup compression table
564
- snappy::internal::WorkingMemory wmem;
218
+ snappy::internal::WorkingMemory wmem(input.size());
565
219
  int table_size;
566
- uint16* table = wmem.GetHashTable(input.size(), &table_size);
220
+ uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
567
221
 
568
222
  // Compress entire input in one shot
569
- string compressed;
223
+ std::string compressed;
570
224
  compressed += prefix;
571
225
  compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
572
226
  char* dest = string_as_array(&compressed) + prefix.size();
@@ -574,13 +228,13 @@ static void VerifyNonBlockedCompression(const string& input) {
574
228
  dest, table, table_size);
575
229
  compressed.resize(end - compressed.data());
576
230
 
577
- // Uncompress into string
578
- string uncomp_str;
231
+ // Uncompress into std::string
232
+ std::string uncomp_str;
579
233
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
580
234
  CHECK_EQ(uncomp_str, input);
581
235
 
582
236
  // Uncompress using source/sink
583
- string uncomp_str2;
237
+ std::string uncomp_str2;
584
238
  uncomp_str2.resize(input.size());
585
239
  snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
586
240
  snappy::ByteArraySource source(compressed.data(), compressed.size());
@@ -592,62 +246,64 @@ static void VerifyNonBlockedCompression(const string& input) {
592
246
  static const int kNumBlocks = 10;
593
247
  struct iovec vec[kNumBlocks];
594
248
  const int block_size = 1 + input.size() / kNumBlocks;
595
- string iovec_data(block_size * kNumBlocks, 'x');
596
- for (int i = 0; i < kNumBlocks; i++) {
249
+ std::string iovec_data(block_size * kNumBlocks, 'x');
250
+ for (int i = 0; i < kNumBlocks; ++i) {
597
251
  vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
598
252
  vec[i].iov_len = block_size;
599
253
  }
600
254
  CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
601
255
  vec, kNumBlocks));
602
- CHECK_EQ(string(iovec_data.data(), input.size()), input);
256
+ CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
603
257
  }
604
258
  }
605
259
 
606
260
  // Expand the input so that it is at least K times as big as block size
607
- static string Expand(const string& input) {
261
+ std::string Expand(const std::string& input) {
608
262
  static const int K = 3;
609
- string data = input;
263
+ std::string data = input;
610
264
  while (data.size() < K * snappy::kBlockSize) {
611
265
  data += input;
612
266
  }
613
267
  return data;
614
268
  }
615
269
 
616
- static int Verify(const string& input) {
270
+ int Verify(const std::string& input) {
617
271
  VLOG(1) << "Verifying input of size " << input.size();
618
272
 
619
273
  // Compress using string based routines
620
274
  const int result = VerifyString(input);
621
275
 
276
+ // Compress using `iovec`-based routines.
277
+ CHECK_EQ(VerifyIOVecSource(input), result);
278
+
622
279
  // Verify using sink based routines
623
280
  VerifyStringSink(input);
624
281
 
625
282
  VerifyNonBlockedCompression(input);
626
- VerifyIOVec(input);
283
+ VerifyIOVecSink(input);
627
284
  if (!input.empty()) {
628
- const string expanded = Expand(input);
285
+ const std::string expanded = Expand(input);
629
286
  VerifyNonBlockedCompression(expanded);
630
- VerifyIOVec(input);
287
+ VerifyIOVecSink(input);
631
288
  }
632
289
 
633
290
  return result;
634
291
  }
635
292
 
636
-
637
- static bool IsValidCompressedBuffer(const string& c) {
293
+ bool IsValidCompressedBuffer(const std::string& c) {
638
294
  return snappy::IsValidCompressedBuffer(c.data(), c.size());
639
295
  }
640
- static bool Uncompress(const string& c, string* u) {
296
+ bool Uncompress(const std::string& c, std::string* u) {
641
297
  return snappy::Uncompress(c.data(), c.size(), u);
642
298
  }
643
299
 
644
300
  // This test checks to ensure that snappy doesn't coredump if it gets
645
301
  // corrupted data.
646
302
  TEST(CorruptedTest, VerifyCorrupted) {
647
- string source = "making sure we don't crash with corrupted input";
303
+ std::string source = "making sure we don't crash with corrupted input";
648
304
  VLOG(1) << source;
649
- string dest;
650
- string uncmp;
305
+ std::string dest;
306
+ std::string uncmp;
651
307
  snappy::Compress(source.data(), source.size(), &dest);
652
308
 
653
309
  // Mess around with the data. It's hard to simulate all possible
@@ -662,8 +318,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
662
318
  // This is testing for a security bug - a buffer that decompresses to 100k
663
319
  // but we lie in the snappy header and only reserve 0 bytes of memory :)
664
320
  source.resize(100000);
665
- for (size_t i = 0; i < source.length(); ++i) {
666
- source[i] = 'A';
321
+ for (char& source_char : source) {
322
+ source_char = 'A';
667
323
  }
668
324
  snappy::Compress(source.data(), source.size(), &dest);
669
325
  dest[0] = dest[1] = dest[2] = dest[3] = 0;
@@ -694,14 +350,14 @@ TEST(CorruptedTest, VerifyCorrupted) {
694
350
 
695
351
  // try reading stuff in from a bad file.
696
352
  for (int i = 1; i <= 3; ++i) {
697
- string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
698
- 0);
699
- string uncmp;
353
+ std::string data =
354
+ ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
355
+ std::string uncmp;
700
356
  // check that we don't return a crazy length
701
357
  size_t ulen;
702
358
  CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
703
359
  || (ulen < (1<<20)));
704
- uint32 ulen2;
360
+ uint32_t ulen2;
705
361
  snappy::ByteArraySource source(data.data(), data.size());
706
362
  CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
707
363
  (ulen2 < (1<<20)));
@@ -714,7 +370,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
714
370
  // These mirror the compression code in snappy.cc, but are copied
715
371
  // here so that we can bypass some limitations in the how snappy.cc
716
372
  // invokes these routines.
717
- static void AppendLiteral(string* dst, const string& literal) {
373
+ void AppendLiteral(std::string* dst, const std::string& literal) {
718
374
  if (literal.empty()) return;
719
375
  int n = literal.size() - 1;
720
376
  if (n < 60) {
@@ -729,12 +385,12 @@ static void AppendLiteral(string* dst, const string& literal) {
729
385
  n >>= 8;
730
386
  }
731
387
  dst->push_back(0 | ((59+count) << 2));
732
- *dst += string(number, count);
388
+ *dst += std::string(number, count);
733
389
  }
734
390
  *dst += literal;
735
391
  }
736
392
 
737
- static void AppendCopy(string* dst, int offset, int length) {
393
+ void AppendCopy(std::string* dst, int offset, int length) {
738
394
  while (length > 0) {
739
395
  // Figure out how much to copy in one shot
740
396
  int to_copy;
@@ -771,51 +427,102 @@ TEST(Snappy, SimpleTests) {
771
427
  Verify("ab");
772
428
  Verify("abc");
773
429
 
774
- Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
775
- Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
776
- Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
777
- Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
778
- Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
430
+ Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
431
+ Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
432
+ Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
433
+ Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
434
+ Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
435
+ }
436
+
437
+ // Regression test for cr/345340892.
438
+ TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
439
+ Verify("abcabcabcabcabcabcab");
440
+ Verify("abcabcabcabcabcabcab0123456789ABCDEF");
441
+
442
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
443
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
444
+ }
445
+
446
+ // Regression test for cr/345340892.
447
+ TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
448
+ std::mt19937 rng;
449
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
450
+ for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
451
+ for (int length = 1; length <= 64; ++length) {
452
+ for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
453
+ const int size = pattern_size + length + extra_bytes_after_pattern;
454
+ std::string input;
455
+ input.resize(size);
456
+ for (int i = 0; i < pattern_size; ++i) {
457
+ input[i] = 'a' + i;
458
+ }
459
+ for (int i = 0; i < length; ++i) {
460
+ input[pattern_size + i] = input[i];
461
+ }
462
+ for (int i = 0; i < extra_bytes_after_pattern; ++i) {
463
+ input[pattern_size + length + i] =
464
+ static_cast<char>(uniform_byte(rng));
465
+ }
466
+ Verify(input);
467
+ }
468
+ }
469
+ }
779
470
  }
780
471
 
781
472
  // Verify max blowup (lots of four-byte copies)
782
473
  TEST(Snappy, MaxBlowup) {
783
- string input;
784
- for (int i = 0; i < 20000; i++) {
785
- ACMRandom rnd(i);
786
- uint32 bytes = static_cast<uint32>(rnd.Next());
787
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
788
- }
789
- for (int i = 19999; i >= 0; i--) {
790
- ACMRandom rnd(i);
791
- uint32 bytes = static_cast<uint32>(rnd.Next());
792
- input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
474
+ std::mt19937 rng;
475
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
476
+ std::string input;
477
+ for (int i = 0; i < 80000; ++i)
478
+ input.push_back(static_cast<char>(uniform_byte(rng)));
479
+
480
+ for (int i = 0; i < 80000; i += 4) {
481
+ std::string four_bytes(input.end() - i - 4, input.end() - i);
482
+ input.append(four_bytes);
793
483
  }
794
484
  Verify(input);
795
485
  }
796
486
 
797
487
  TEST(Snappy, RandomData) {
798
- ACMRandom rnd(FLAGS_test_random_seed);
799
-
800
- const int num_ops = 20000;
801
- for (int i = 0; i < num_ops; i++) {
488
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
489
+ std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
490
+ std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
491
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
492
+ std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
493
+ std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
494
+ std::bernoulli_distribution one_in_ten(1.0 / 10);
495
+
496
+ constexpr int num_ops = 20000;
497
+ for (int i = 0; i < num_ops; ++i) {
802
498
  if ((i % 1000) == 0) {
803
499
  VLOG(0) << "Random op " << i << " of " << num_ops;
804
500
  }
805
501
 
806
- string x;
807
- size_t len = rnd.Uniform(4096);
502
+ std::string x;
503
+ size_t len = uniform_4k(rng);
808
504
  if (i < 100) {
809
- len = 65536 + rnd.Uniform(65536);
505
+ len = 65536 + uniform_64k(rng);
810
506
  }
811
507
  while (x.size() < len) {
812
508
  int run_len = 1;
813
- if (rnd.OneIn(10)) {
814
- run_len = rnd.Skewed(8);
509
+ if (one_in_ten(rng)) {
510
+ int skewed_bits = uniform_0_to_8(rng);
511
+ // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
512
+ std::uniform_int_distribution<int> skewed_low(0,
513
+ (1 << skewed_bits) - 1);
514
+ run_len = skewed_low(rng);
515
+ }
516
+ char c = static_cast<char>(uniform_byte(rng));
517
+ if (i >= 100) {
518
+ int skewed_bits = uniform_0_to_3(rng);
519
+ // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
520
+ std::uniform_int_distribution<int> skewed_low(0,
521
+ (1 << skewed_bits) - 1);
522
+ c = static_cast<char>(skewed_low(rng));
815
523
  }
816
- char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
817
524
  while (run_len-- > 0 && x.size() < len) {
818
- x += c;
525
+ x.push_back(c);
819
526
  }
820
527
  }
821
528
 
@@ -829,20 +536,20 @@ TEST(Snappy, FourByteOffset) {
829
536
  // copy manually.
830
537
 
831
538
  // The two fragments that make up the input string.
832
- string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
833
- string fragment2 = "some other string";
539
+ std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
540
+ std::string fragment2 = "some other string";
834
541
 
835
542
  // How many times each fragment is emitted.
836
543
  const int n1 = 2;
837
544
  const int n2 = 100000 / fragment2.size();
838
- const int length = n1 * fragment1.size() + n2 * fragment2.size();
545
+ const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
839
546
 
840
- string compressed;
547
+ std::string compressed;
841
548
  Varint::Append32(&compressed, length);
842
549
 
843
550
  AppendLiteral(&compressed, fragment1);
844
- string src = fragment1;
845
- for (int i = 0; i < n2; i++) {
551
+ std::string src = fragment1;
552
+ for (int i = 0; i < n2; ++i) {
846
553
  AppendLiteral(&compressed, fragment2);
847
554
  src += fragment2;
848
555
  }
@@ -850,14 +557,34 @@ TEST(Snappy, FourByteOffset) {
850
557
  src += fragment1;
851
558
  CHECK_EQ(length, src.size());
852
559
 
853
- string uncompressed;
560
+ std::string uncompressed;
854
561
  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
855
562
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
856
563
  &uncompressed));
857
564
  CHECK_EQ(uncompressed, src);
858
565
  }
859
566
 
860
- TEST(Snappy, IOVecEdgeCases) {
567
+ TEST(Snappy, IOVecSourceEdgeCases) {
568
+ // Validate that empty leading, trailing, and in-between iovecs are handled:
569
+ // [] [] ['a'] [] ['b'] [].
570
+ std::string data = "ab";
571
+ char* buf = const_cast<char*>(data.data());
572
+ size_t used_so_far = 0;
573
+ static const int kLengths[] = {0, 0, 1, 0, 1, 0};
574
+ struct iovec iov[ARRAYSIZE(kLengths)];
575
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
576
+ iov[i].iov_base = buf + used_so_far;
577
+ iov[i].iov_len = kLengths[i];
578
+ used_so_far += kLengths[i];
579
+ }
580
+ std::string compressed;
581
+ snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
582
+ std::string uncompressed;
583
+ snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
584
+ CHECK_EQ(data, uncompressed);
585
+ }
586
+
587
+ TEST(Snappy, IOVecSinkEdgeCases) {
861
588
  // Test some tricky edge cases in the iovec output that are not necessarily
862
589
  // exercised by random tests.
863
590
 
@@ -872,7 +599,7 @@ TEST(Snappy, IOVecEdgeCases) {
872
599
  iov[i].iov_len = kLengths[i];
873
600
  }
874
601
 
875
- string compressed;
602
+ std::string compressed;
876
603
  Varint::Append32(&compressed, 22);
877
604
 
878
605
  // A literal whose output crosses three blocks.
@@ -933,7 +660,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
933
660
  iov[i].iov_len = kLengths[i];
934
661
  }
935
662
 
936
- string compressed;
663
+ std::string compressed;
937
664
  Varint::Append32(&compressed, 8);
938
665
 
939
666
  AppendLiteral(&compressed, "12345678");
@@ -955,7 +682,7 @@ TEST(Snappy, IOVecCopyOverflow) {
955
682
  iov[i].iov_len = kLengths[i];
956
683
  }
957
684
 
958
- string compressed;
685
+ std::string compressed;
959
686
  Varint::Append32(&compressed, 8);
960
687
 
961
688
  AppendLiteral(&compressed, "123");
@@ -969,21 +696,20 @@ TEST(Snappy, IOVecCopyOverflow) {
969
696
  }
970
697
  }
971
698
 
972
- static bool CheckUncompressedLength(const string& compressed,
973
- size_t* ulength) {
699
+ bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
974
700
  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
975
701
  compressed.size(),
976
702
  ulength);
977
703
 
978
704
  snappy::ByteArraySource source(compressed.data(), compressed.size());
979
- uint32 length;
705
+ uint32_t length;
980
706
  const bool result2 = snappy::GetUncompressedLength(&source, &length);
981
707
  CHECK_EQ(result1, result2);
982
708
  return result1;
983
709
  }
984
710
 
985
711
  TEST(SnappyCorruption, TruncatedVarint) {
986
- string compressed, uncompressed;
712
+ std::string compressed, uncompressed;
987
713
  size_t ulength;
988
714
  compressed.push_back('\xf0');
989
715
  CHECK(!CheckUncompressedLength(compressed, &ulength));
@@ -993,7 +719,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
993
719
  }
994
720
 
995
721
  TEST(SnappyCorruption, UnterminatedVarint) {
996
- string compressed, uncompressed;
722
+ std::string compressed, uncompressed;
997
723
  size_t ulength;
998
724
  compressed.push_back('\x80');
999
725
  compressed.push_back('\x80');
@@ -1008,7 +734,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
1008
734
  }
1009
735
 
1010
736
  TEST(SnappyCorruption, OverflowingVarint) {
1011
- string compressed, uncompressed;
737
+ std::string compressed, uncompressed;
1012
738
  size_t ulength;
1013
739
  compressed.push_back('\xfb');
1014
740
  compressed.push_back('\xff');
@@ -1025,14 +751,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
1025
751
  // Check that we do not read past end of input
1026
752
 
1027
753
  // Make a compressed string that ends with a single-byte literal
1028
- string compressed;
754
+ std::string compressed;
1029
755
  Varint::Append32(&compressed, 1);
1030
756
  AppendLiteral(&compressed, "x");
1031
757
 
1032
- string uncompressed;
758
+ std::string uncompressed;
1033
759
  DataEndingAtUnreadablePage c(compressed);
1034
760
  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
1035
- CHECK_EQ(uncompressed, string("x"));
761
+ CHECK_EQ(uncompressed, std::string("x"));
1036
762
  }
1037
763
 
1038
764
  // Check for an infinite loop caused by a copy with offset==0
@@ -1051,17 +777,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
1051
777
  EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
1052
778
  }
1053
779
 
1054
- namespace {
1055
-
1056
780
  int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
781
+ uint64_t data;
1057
782
  std::pair<size_t, bool> p =
1058
- snappy::internal::FindMatchLength(s1, s2, s2 + length);
783
+ snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
1059
784
  CHECK_EQ(p.first < 8, p.second);
1060
785
  return p.first;
1061
786
  }
1062
787
 
1063
- } // namespace
1064
-
1065
788
  TEST(Snappy, FindMatchLength) {
1066
789
  // Exercise all different code paths through the function.
1067
790
  // 64-bit version:
@@ -1153,35 +876,37 @@ TEST(Snappy, FindMatchLength) {
1153
876
  }
1154
877
 
1155
878
  TEST(Snappy, FindMatchLengthRandom) {
1156
- const int kNumTrials = 10000;
1157
- const int kTypicalLength = 10;
1158
- ACMRandom rnd(FLAGS_test_random_seed);
1159
-
1160
- for (int i = 0; i < kNumTrials; i++) {
1161
- string s, t;
1162
- char a = rnd.Rand8();
1163
- char b = rnd.Rand8();
1164
- while (!rnd.OneIn(kTypicalLength)) {
1165
- s.push_back(rnd.OneIn(2) ? a : b);
1166
- t.push_back(rnd.OneIn(2) ? a : b);
879
+ constexpr int kNumTrials = 10000;
880
+ constexpr int kTypicalLength = 10;
881
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
882
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
883
+ std::bernoulli_distribution one_in_two(1.0 / 2);
884
+ std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
885
+
886
+ for (int i = 0; i < kNumTrials; ++i) {
887
+ std::string s, t;
888
+ char a = static_cast<char>(uniform_byte(rng));
889
+ char b = static_cast<char>(uniform_byte(rng));
890
+ while (!one_in_typical_length(rng)) {
891
+ s.push_back(one_in_two(rng) ? a : b);
892
+ t.push_back(one_in_two(rng) ? a : b);
1167
893
  }
1168
894
  DataEndingAtUnreadablePage u(s);
1169
895
  DataEndingAtUnreadablePage v(t);
1170
- int matched = TestFindMatchLength(u.data(), v.data(), t.size());
896
+ size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
1171
897
  if (matched == t.size()) {
1172
898
  EXPECT_EQ(s, t);
1173
899
  } else {
1174
900
  EXPECT_NE(s[matched], t[matched]);
1175
- for (int j = 0; j < matched; j++) {
901
+ for (size_t j = 0; j < matched; ++j) {
1176
902
  EXPECT_EQ(s[j], t[j]);
1177
903
  }
1178
904
  }
1179
905
  }
1180
906
  }
1181
907
 
1182
- static uint16 MakeEntry(unsigned int extra,
1183
- unsigned int len,
1184
- unsigned int copy_offset) {
908
+ uint16_t MakeEntry(unsigned int extra, unsigned int len,
909
+ unsigned int copy_offset) {
1185
910
  // Check that all of the fields fit within the allocated space
1186
911
  assert(extra == (extra & 0x7)); // At most 3 bits
1187
912
  assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
@@ -1197,335 +922,89 @@ TEST(Snappy, VerifyCharTable) {
1197
922
  using snappy::internal::COPY_2_BYTE_OFFSET;
1198
923
  using snappy::internal::COPY_4_BYTE_OFFSET;
1199
924
  using snappy::internal::char_table;
1200
- using snappy::internal::wordmask;
1201
925
 
1202
- uint16 dst[256];
926
+ uint16_t dst[256];
1203
927
 
1204
928
  // Place invalid entries in all places to detect missing initialization
1205
929
  int assigned = 0;
1206
- for (int i = 0; i < 256; i++) {
930
+ for (int i = 0; i < 256; ++i) {
1207
931
  dst[i] = 0xffff;
1208
932
  }
1209
933
 
1210
934
  // Small LITERAL entries. We store (len-1) in the top 6 bits.
1211
- for (unsigned int len = 1; len <= 60; len++) {
1212
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
935
+ for (uint8_t len = 1; len <= 60; ++len) {
936
+ dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
1213
937
  assigned++;
1214
938
  }
1215
939
 
1216
940
  // Large LITERAL entries. We use 60..63 in the high 6 bits to
1217
941
  // encode the number of bytes of length info that follow the opcode.
1218
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
942
+ for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
1219
943
  // We set the length field in the lookup table to 1 because extra
1220
944
  // bytes encode len-1.
1221
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
945
+ dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1222
946
  assigned++;
1223
947
  }
1224
948
 
1225
949
  // COPY_1_BYTE_OFFSET.
1226
950
  //
1227
951
  // The tag byte in the compressed data stores len-4 in 3 bits, and
1228
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
952
+ // offset/256 in 3 bits. offset%256 is stored in the next byte.
1229
953
  //
1230
954
  // This format is used for length in range [4..11] and offset in
1231
955
  // range [0..2047]
1232
- for (unsigned int len = 4; len < 12; len++) {
1233
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
1234
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1235
- MakeEntry(1, len, offset>>8);
956
+ for (uint8_t len = 4; len < 12; ++len) {
957
+ for (uint16_t offset = 0; offset < 2048; offset += 256) {
958
+ uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
959
+ dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
960
+ MakeEntry(1, len, offset_high);
1236
961
  assigned++;
1237
962
  }
1238
963
  }
1239
964
 
1240
965
  // COPY_2_BYTE_OFFSET.
1241
966
  // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1242
- for (unsigned int len = 1; len <= 64; len++) {
1243
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
967
+ for (uint8_t len = 1; len <= 64; ++len) {
968
+ dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
1244
969
  assigned++;
1245
970
  }
1246
971
 
1247
972
  // COPY_4_BYTE_OFFSET.
1248
973
  // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1249
- for (unsigned int len = 1; len <= 64; len++) {
1250
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
974
+ for (uint8_t len = 1; len <= 64; ++len) {
975
+ dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
1251
976
  assigned++;
1252
977
  }
1253
978
 
1254
979
  // Check that each entry was initialized exactly once.
1255
980
  EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1256
- for (int i = 0; i < 256; i++) {
981
+ for (int i = 0; i < 256; ++i) {
1257
982
  EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1258
983
  }
1259
984
 
1260
- if (FLAGS_snappy_dump_decompression_table) {
1261
- printf("static const uint16 char_table[256] = {\n ");
1262
- for (int i = 0; i < 256; i++) {
1263
- printf("0x%04x%s",
1264
- dst[i],
1265
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
985
+ if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
986
+ std::printf("static const uint16_t char_table[256] = {\n ");
987
+ for (int i = 0; i < 256; ++i) {
988
+ std::printf("0x%04x%s",
989
+ dst[i],
990
+ ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
1266
991
  }
1267
- printf("};\n");
992
+ std::printf("};\n");
1268
993
  }
1269
994
 
1270
995
  // Check that computed table matched recorded table.
1271
- for (int i = 0; i < 256; i++) {
996
+ for (int i = 0; i < 256; ++i) {
1272
997
  EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1273
998
  }
1274
999
  }
1275
1000
 
1276
- static void CompressFile(const char* fname) {
1277
- string fullinput;
1278
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1279
-
1280
- string compressed;
1281
- Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1282
-
1283
- CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1284
- file::Defaults()));
1285
- }
1286
-
1287
- static void UncompressFile(const char* fname) {
1288
- string fullinput;
1289
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1290
-
1291
- size_t uncompLength;
1292
- CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1293
-
1294
- string uncompressed;
1295
- uncompressed.resize(uncompLength);
1296
- CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1297
-
1298
- CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1299
- file::Defaults()));
1300
- }
1301
-
1302
- static void MeasureFile(const char* fname) {
1303
- string fullinput;
1304
- CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1305
- printf("%-40s :\n", fname);
1306
-
1307
- int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1308
- int end_len = fullinput.size();
1309
- if (FLAGS_end_len >= 0) {
1310
- end_len = min<int>(fullinput.size(), FLAGS_end_len);
1311
- }
1312
- for (int len = start_len; len <= end_len; len++) {
1313
- const char* const input = fullinput.data();
1314
- int repeats = (FLAGS_bytes + len) / (len + 1);
1315
- if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1316
- if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1317
- if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1318
- if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1319
- if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1320
- if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1321
-
1322
- // For block-size based measurements
1323
- if (0 && FLAGS_snappy) {
1324
- Measure(input, len, SNAPPY, repeats, 8<<10);
1325
- Measure(input, len, SNAPPY, repeats, 16<<10);
1326
- Measure(input, len, SNAPPY, repeats, 32<<10);
1327
- Measure(input, len, SNAPPY, repeats, 64<<10);
1328
- Measure(input, len, SNAPPY, repeats, 256<<10);
1329
- Measure(input, len, SNAPPY, repeats, 1024<<10);
1330
- }
1331
- }
1332
- }
1333
-
1334
- static struct {
1335
- const char* label;
1336
- const char* filename;
1337
- size_t size_limit;
1338
- } files[] = {
1339
- { "html", "html", 0 },
1340
- { "urls", "urls.10K", 0 },
1341
- { "jpg", "fireworks.jpeg", 0 },
1342
- { "jpg_200", "fireworks.jpeg", 200 },
1343
- { "pdf", "paper-100k.pdf", 0 },
1344
- { "html4", "html_x_4", 0 },
1345
- { "txt1", "alice29.txt", 0 },
1346
- { "txt2", "asyoulik.txt", 0 },
1347
- { "txt3", "lcet10.txt", 0 },
1348
- { "txt4", "plrabn12.txt", 0 },
1349
- { "pb", "geo.protodata", 0 },
1350
- { "gaviota", "kppkn.gtb", 0 },
1351
- };
1352
-
1353
- static void BM_UFlat(int iters, int arg) {
1354
- StopBenchmarkTiming();
1355
-
1356
- // Pick file to process based on "arg"
1357
- CHECK_GE(arg, 0);
1358
- CHECK_LT(arg, ARRAYSIZE(files));
1359
- string contents = ReadTestDataFile(files[arg].filename,
1360
- files[arg].size_limit);
1361
-
1362
- string zcontents;
1363
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1364
- char* dst = new char[contents.size()];
1365
-
1366
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1367
- static_cast<int64>(contents.size()));
1368
- SetBenchmarkLabel(files[arg].label);
1369
- StartBenchmarkTiming();
1370
- while (iters-- > 0) {
1371
- CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1372
- }
1373
- StopBenchmarkTiming();
1374
-
1375
- delete[] dst;
1376
- }
1377
- BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1378
-
1379
- static void BM_UValidate(int iters, int arg) {
1380
- StopBenchmarkTiming();
1381
-
1382
- // Pick file to process based on "arg"
1383
- CHECK_GE(arg, 0);
1384
- CHECK_LT(arg, ARRAYSIZE(files));
1385
- string contents = ReadTestDataFile(files[arg].filename,
1386
- files[arg].size_limit);
1387
-
1388
- string zcontents;
1389
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1390
-
1391
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1392
- static_cast<int64>(contents.size()));
1393
- SetBenchmarkLabel(files[arg].label);
1394
- StartBenchmarkTiming();
1395
- while (iters-- > 0) {
1396
- CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1397
- }
1398
- StopBenchmarkTiming();
1399
- }
1400
- BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1401
-
1402
- static void BM_UIOVec(int iters, int arg) {
1403
- StopBenchmarkTiming();
1404
-
1405
- // Pick file to process based on "arg"
1406
- CHECK_GE(arg, 0);
1407
- CHECK_LT(arg, ARRAYSIZE(files));
1408
- string contents = ReadTestDataFile(files[arg].filename,
1409
- files[arg].size_limit);
1410
-
1411
- string zcontents;
1412
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1413
-
1414
- // Uncompress into an iovec containing ten entries.
1415
- const int kNumEntries = 10;
1416
- struct iovec iov[kNumEntries];
1417
- char *dst = new char[contents.size()];
1418
- int used_so_far = 0;
1419
- for (int i = 0; i < kNumEntries; ++i) {
1420
- iov[i].iov_base = dst + used_so_far;
1421
- if (used_so_far == contents.size()) {
1422
- iov[i].iov_len = 0;
1423
- continue;
1424
- }
1425
-
1426
- if (i == kNumEntries - 1) {
1427
- iov[i].iov_len = contents.size() - used_so_far;
1428
- } else {
1429
- iov[i].iov_len = contents.size() / kNumEntries;
1430
- }
1431
- used_so_far += iov[i].iov_len;
1432
- }
1433
-
1434
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1435
- static_cast<int64>(contents.size()));
1436
- SetBenchmarkLabel(files[arg].label);
1437
- StartBenchmarkTiming();
1438
- while (iters-- > 0) {
1439
- CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1440
- kNumEntries));
1441
- }
1442
- StopBenchmarkTiming();
1443
-
1444
- delete[] dst;
1445
- }
1446
- BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1447
-
1448
- static void BM_UFlatSink(int iters, int arg) {
1449
- StopBenchmarkTiming();
1450
-
1451
- // Pick file to process based on "arg"
1452
- CHECK_GE(arg, 0);
1453
- CHECK_LT(arg, ARRAYSIZE(files));
1454
- string contents = ReadTestDataFile(files[arg].filename,
1455
- files[arg].size_limit);
1456
-
1457
- string zcontents;
1458
- snappy::Compress(contents.data(), contents.size(), &zcontents);
1459
- char* dst = new char[contents.size()];
1460
-
1461
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1462
- static_cast<int64>(contents.size()));
1463
- SetBenchmarkLabel(files[arg].label);
1464
- StartBenchmarkTiming();
1465
- while (iters-- > 0) {
1466
- snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1467
- snappy::UncheckedByteArraySink sink(dst);
1468
- CHECK(snappy::Uncompress(&source, &sink));
1001
+ TEST(Snappy, TestBenchmarkFiles) {
1002
+ for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
1003
+ Verify(ReadTestDataFile(kTestDataFiles[i].filename,
1004
+ kTestDataFiles[i].size_limit));
1469
1005
  }
1470
- StopBenchmarkTiming();
1471
-
1472
- string s(dst, contents.size());
1473
- CHECK_EQ(contents, s);
1474
-
1475
- delete[] dst;
1476
1006
  }
1477
1007
 
1478
- BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1479
-
1480
- static void BM_ZFlat(int iters, int arg) {
1481
- StopBenchmarkTiming();
1482
-
1483
- // Pick file to process based on "arg"
1484
- CHECK_GE(arg, 0);
1485
- CHECK_LT(arg, ARRAYSIZE(files));
1486
- string contents = ReadTestDataFile(files[arg].filename,
1487
- files[arg].size_limit);
1488
-
1489
- char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1490
-
1491
- SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1492
- static_cast<int64>(contents.size()));
1493
- StartBenchmarkTiming();
1494
-
1495
- size_t zsize = 0;
1496
- while (iters-- > 0) {
1497
- snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1498
- }
1499
- StopBenchmarkTiming();
1500
- const double compression_ratio =
1501
- static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1502
- SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1503
- files[arg].label, 100.0 * compression_ratio));
1504
- VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1505
- files[arg].label, contents.size(), zsize);
1506
- delete[] dst;
1507
- }
1508
- BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1008
+ } // namespace
1509
1009
 
1510
1010
  } // namespace snappy
1511
-
1512
-
1513
- int main(int argc, char** argv) {
1514
- InitGoogle(argv[0], &argc, &argv, true);
1515
- RunSpecifiedBenchmarks();
1516
-
1517
- if (argc >= 2) {
1518
- for (int arg = 1; arg < argc; arg++) {
1519
- if (FLAGS_write_compressed) {
1520
- CompressFile(argv[arg]);
1521
- } else if (FLAGS_write_uncompressed) {
1522
- UncompressFile(argv[arg]);
1523
- } else {
1524
- MeasureFile(argv[arg]);
1525
- }
1526
- }
1527
- return 0;
1528
- }
1529
-
1530
- return RUN_ALL_TESTS();
1531
- }