snappy 0.0.12-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +14 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/CMakeLists.txt +174 -0
  21. data/vendor/snappy/CONTRIBUTING.md +26 -0
  22. data/vendor/snappy/COPYING +1 -1
  23. data/vendor/snappy/NEWS +52 -0
  24. data/vendor/snappy/{README → README.md} +23 -9
  25. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  26. data/vendor/snappy/cmake/config.h.in +62 -0
  27. data/vendor/snappy/snappy-c.h +3 -3
  28. data/vendor/snappy/snappy-internal.h +101 -27
  29. data/vendor/snappy/snappy-sinksource.cc +33 -0
  30. data/vendor/snappy/snappy-sinksource.h +51 -6
  31. data/vendor/snappy/snappy-stubs-internal.h +107 -37
  32. data/vendor/snappy/snappy-stubs-public.h.in +16 -20
  33. data/vendor/snappy/snappy-test.cc +15 -9
  34. data/vendor/snappy/snappy-test.h +34 -43
  35. data/vendor/snappy/snappy.cc +529 -320
  36. data/vendor/snappy/snappy.h +23 -4
  37. data/vendor/snappy/snappy_unittest.cc +240 -185
  38. metadata +27 -74
  39. data/vendor/snappy/ChangeLog +0 -1916
  40. data/vendor/snappy/Makefile.am +0 -23
  41. data/vendor/snappy/autogen.sh +0 -7
  42. data/vendor/snappy/configure.ac +0 -133
  43. data/vendor/snappy/m4/gtest.m4 +0 -74
  44. data/vendor/snappy/testdata/alice29.txt +0 -3609
  45. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  46. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  47. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  48. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  49. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  50. data/vendor/snappy/testdata/geo.protodata +0 -0
  51. data/vendor/snappy/testdata/html +0 -1
  52. data/vendor/snappy/testdata/html_x_4 +0 -1
  53. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  54. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  55. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  56. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  57. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -36,8 +36,8 @@
36
36
  // using BMDiff and then compressing the output of BMDiff with
37
37
  // Snappy.
38
38
 
39
- #ifndef UTIL_SNAPPY_SNAPPY_H__
40
- #define UTIL_SNAPPY_SNAPPY_H__
39
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
40
+ #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
41
 
42
42
  #include <stddef.h>
43
43
  #include <string>
@@ -84,6 +84,18 @@ namespace snappy {
84
84
  bool Uncompress(const char* compressed, size_t compressed_length,
85
85
  string* uncompressed);
86
86
 
87
+ // Decompresses "compressed" to "*uncompressed".
88
+ //
89
+ // returns false if the message is corrupted and could not be decompressed
90
+ bool Uncompress(Source* compressed, Sink* uncompressed);
91
+
92
+ // This routine uncompresses as much of the "compressed" as possible
93
+ // into sink. It returns the number of valid bytes added to sink
94
+ // (extra invalid bytes may have been added due to errors; the caller
95
+ // should ignore those). The emitted data typically has length
96
+ // GetUncompressedLength(), but may be shorter if an error is
97
+ // encountered.
98
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
87
99
 
88
100
  // ------------------------------------------------------------------------
89
101
  // Lower-level character array based routines. May be useful for
@@ -164,6 +176,14 @@ namespace snappy {
164
176
  bool IsValidCompressedBuffer(const char* compressed,
165
177
  size_t compressed_length);
166
178
 
179
+ // Returns true iff the contents of "compressed" can be uncompressed
180
+ // successfully. Does not return the uncompressed data. Takes
181
+ // time proportional to *compressed length, but is usually at least
182
+ // a factor of four faster than actual decompression.
183
+ // On success, consumes all of *compressed. On failure, consumes an
184
+ // unspecified prefix of *compressed.
185
+ bool IsValidCompressed(Source* compressed);
186
+
167
187
  // The size of a compression block. Note that many parts of the compression
168
188
  // code assumes that kBlockSize <= 65536; in particular, the hash table
169
189
  // can only store 16-bit offsets, and EmitCopy() also assumes the offset
@@ -180,5 +200,4 @@ namespace snappy {
180
200
  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
181
201
  } // end namespace snappy
182
202
 
183
-
184
- #endif // UTIL_SNAPPY_SNAPPY_H__
203
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <algorithm>
34
34
  #include <string>
35
+ #include <utility>
35
36
  #include <vector>
36
37
 
37
38
  #include "snappy.h"
@@ -50,25 +51,19 @@ DEFINE_bool(zlib, false,
50
51
  "Run zlib compression (http://www.zlib.net)");
51
52
  DEFINE_bool(lzo, false,
52
53
  "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53
- DEFINE_bool(quicklz, false,
54
- "Run quickLZ compression (http://www.quicklz.com/)");
55
- DEFINE_bool(liblzf, false,
56
- "Run libLZF compression "
57
- "(http://www.goof.com/pcg/marc/liblzf.html)");
58
- DEFINE_bool(fastlz, false,
59
- "Run FastLZ compression (http://www.fastlz.org/");
60
54
  DEFINE_bool(snappy, true, "Run snappy compression");
61
55
 
62
-
63
56
  DEFINE_bool(write_compressed, false,
64
57
  "Write compressed versions of each file to <file>.comp");
65
58
  DEFINE_bool(write_uncompressed, false,
66
59
  "Write uncompressed versions of each file to <file>.uncomp");
67
60
 
68
- namespace snappy {
61
+ DEFINE_bool(snappy_dump_decompression_table, false,
62
+ "If true, we print the decompression table during tests.");
69
63
 
64
+ namespace snappy {
70
65
 
71
- #ifdef HAVE_FUNC_MMAP
66
+ #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
72
67
 
73
68
  // To test against code that reads beyond its input, this class copies a
74
69
  // string to a newly allocated group of pages, the last of which
@@ -79,7 +74,7 @@ namespace snappy {
79
74
  class DataEndingAtUnreadablePage {
80
75
  public:
81
76
  explicit DataEndingAtUnreadablePage(const string& s) {
82
- const size_t page_size = getpagesize();
77
+ const size_t page_size = sysconf(_SC_PAGESIZE);
83
78
  const size_t size = s.size();
84
79
  // Round up space for string to a multiple of page_size.
85
80
  size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
@@ -97,8 +92,9 @@ class DataEndingAtUnreadablePage {
97
92
  }
98
93
 
99
94
  ~DataEndingAtUnreadablePage() {
95
+ const size_t page_size = sysconf(_SC_PAGESIZE);
100
96
  // Undo the mprotect.
101
- CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
97
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
102
98
  CHECK_EQ(0, munmap(mem_, alloc_size_));
103
99
  }
104
100
 
@@ -113,7 +109,7 @@ class DataEndingAtUnreadablePage {
113
109
  size_t size_;
114
110
  };
115
111
 
116
- #else // HAVE_FUNC_MMAP
112
+ #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
117
113
 
118
114
  // Fallback for systems without mmap.
119
115
  typedef string DataEndingAtUnreadablePage;
@@ -121,11 +117,11 @@ typedef string DataEndingAtUnreadablePage;
121
117
  #endif
122
118
 
123
119
  enum CompressorType {
124
- ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
120
+ ZLIB, LZO, SNAPPY
125
121
  };
126
122
 
127
123
  const char* names[] = {
128
- "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
124
+ "ZLIB", "LZO", "SNAPPY"
129
125
  };
130
126
 
131
127
  static size_t MinimumRequiredOutputSpace(size_t input_size,
@@ -141,26 +137,12 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
141
137
  return input_size + input_size/64 + 16 + 3;
142
138
  #endif // LZO_VERSION
143
139
 
144
- #ifdef LZF_VERSION
145
- case LIBLZF:
146
- return input_size;
147
- #endif // LZF_VERSION
148
-
149
- #ifdef QLZ_VERSION_MAJOR
150
- case QUICKLZ:
151
- return input_size + 36000; // 36000 is used for scratch.
152
- #endif // QLZ_VERSION_MAJOR
153
-
154
- #ifdef FASTLZ_VERSION
155
- case FASTLZ:
156
- return max(static_cast<int>(ceil(input_size * 1.05)), 66);
157
- #endif // FASTLZ_VERSION
158
-
159
140
  case SNAPPY:
160
141
  return snappy::MaxCompressedLength(input_size);
161
142
 
162
143
  default:
163
144
  LOG(FATAL) << "Unknown compression type number " << comp;
145
+ return 0;
164
146
  }
165
147
  }
166
148
 
@@ -214,58 +196,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
214
196
  }
215
197
  #endif // LZO_VERSION
216
198
 
217
- #ifdef LZF_VERSION
218
- case LIBLZF: {
219
- int destlen = lzf_compress(input,
220
- input_size,
221
- string_as_array(compressed),
222
- input_size);
223
- if (destlen == 0) {
224
- // lzf *can* cause lots of blowup when compressing, so they
225
- // recommend to limit outsize to insize, and just not compress
226
- // if it's bigger. Ideally, we'd just swap input and output.
227
- compressed->assign(input, input_size);
228
- destlen = input_size;
229
- }
230
- if (!compressed_is_preallocated) {
231
- compressed->resize(destlen);
232
- }
233
- break;
234
- }
235
- #endif // LZF_VERSION
236
-
237
- #ifdef QLZ_VERSION_MAJOR
238
- case QUICKLZ: {
239
- qlz_state_compress *state_compress = new qlz_state_compress;
240
- int destlen = qlz_compress(input,
241
- string_as_array(compressed),
242
- input_size,
243
- state_compress);
244
- delete state_compress;
245
- CHECK_NE(0, destlen);
246
- if (!compressed_is_preallocated) {
247
- compressed->resize(destlen);
248
- }
249
- break;
250
- }
251
- #endif // QLZ_VERSION_MAJOR
252
-
253
- #ifdef FASTLZ_VERSION
254
- case FASTLZ: {
255
- // Use level 1 compression since we mostly care about speed.
256
- int destlen = fastlz_compress_level(
257
- 1,
258
- input,
259
- input_size,
260
- string_as_array(compressed));
261
- if (!compressed_is_preallocated) {
262
- compressed->resize(destlen);
263
- }
264
- CHECK_NE(destlen, 0);
265
- break;
266
- }
267
- #endif // FASTLZ_VERSION
268
-
269
199
  case SNAPPY: {
270
200
  size_t destlen;
271
201
  snappy::RawCompress(input, input_size,
@@ -278,7 +208,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
278
208
  break;
279
209
  }
280
210
 
281
-
282
211
  default: {
283
212
  return false; // the asked-for library wasn't compiled in
284
213
  }
@@ -321,56 +250,12 @@ static bool Uncompress(const string& compressed, CompressorType comp,
321
250
  }
322
251
  #endif // LZO_VERSION
323
252
 
324
- #ifdef LZF_VERSION
325
- case LIBLZF: {
326
- output->resize(size);
327
- int destlen = lzf_decompress(compressed.data(),
328
- compressed.size(),
329
- string_as_array(output),
330
- output->size());
331
- if (destlen == 0) {
332
- // This error probably means we had decided not to compress,
333
- // and thus have stored input in output directly.
334
- output->assign(compressed.data(), compressed.size());
335
- destlen = compressed.size();
336
- }
337
- CHECK_EQ(destlen, size);
338
- break;
339
- }
340
- #endif // LZF_VERSION
341
-
342
- #ifdef QLZ_VERSION_MAJOR
343
- case QUICKLZ: {
344
- output->resize(size);
345
- qlz_state_decompress *state_decompress = new qlz_state_decompress;
346
- int destlen = qlz_decompress(compressed.data(),
347
- string_as_array(output),
348
- state_decompress);
349
- delete state_decompress;
350
- CHECK_EQ(destlen, size);
351
- break;
352
- }
353
- #endif // QLZ_VERSION_MAJOR
354
-
355
- #ifdef FASTLZ_VERSION
356
- case FASTLZ: {
357
- output->resize(size);
358
- int destlen = fastlz_decompress(compressed.data(),
359
- compressed.length(),
360
- string_as_array(output),
361
- size);
362
- CHECK_EQ(destlen, size);
363
- break;
364
- }
365
- #endif // FASTLZ_VERSION
366
-
367
253
  case SNAPPY: {
368
254
  snappy::RawUncompress(compressed.data(), compressed.size(),
369
255
  string_as_array(output));
370
256
  break;
371
257
  }
372
258
 
373
-
374
259
  default: {
375
260
  return false; // the asked-for library wasn't compiled in
376
261
  }
@@ -392,13 +277,13 @@ static void Measure(const char* data,
392
277
  {
393
278
  // Chop the input into blocks
394
279
  int num_blocks = (length + block_size - 1) / block_size;
395
- vector<const char*> input(num_blocks);
396
- vector<size_t> input_length(num_blocks);
397
- vector<string> compressed(num_blocks);
398
- vector<string> output(num_blocks);
280
+ std::vector<const char*> input(num_blocks);
281
+ std::vector<size_t> input_length(num_blocks);
282
+ std::vector<string> compressed(num_blocks);
283
+ std::vector<string> output(num_blocks);
399
284
  for (int b = 0; b < num_blocks; b++) {
400
285
  int input_start = b * block_size;
401
- int input_limit = min<int>((b+1)*block_size, length);
286
+ int input_limit = std::min<int>((b+1)*block_size, length);
402
287
  input[b] = data+input_start;
403
288
  input_length[b] = input_limit-input_start;
404
289
 
@@ -448,13 +333,13 @@ static void Measure(const char* data,
448
333
  }
449
334
 
450
335
  compressed_size = 0;
451
- for (int i = 0; i < compressed.size(); i++) {
336
+ for (size_t i = 0; i < compressed.size(); i++) {
452
337
  compressed_size += compressed[i].size();
453
338
  }
454
339
  }
455
340
 
456
- sort(ctime, ctime + kRuns);
457
- sort(utime, utime + kRuns);
341
+ std::sort(ctime, ctime + kRuns);
342
+ std::sort(utime, utime + kRuns);
458
343
  const int med = kRuns/2;
459
344
 
460
345
  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
@@ -469,12 +354,11 @@ static void Measure(const char* data,
469
354
  x.c_str(),
470
355
  block_size/(1<<20),
471
356
  static_cast<int>(length), static_cast<uint32>(compressed_size),
472
- (compressed_size * 100.0) / max<int>(1, length),
357
+ (compressed_size * 100.0) / std::max<int>(1, length),
473
358
  comp_rate,
474
359
  urate.c_str());
475
360
  }
476
361
 
477
-
478
362
  static int VerifyString(const string& input) {
479
363
  string compressed;
480
364
  DataEndingAtUnreadablePage i(input);
@@ -491,6 +375,23 @@ static int VerifyString(const string& input) {
491
375
  return uncompressed.size();
492
376
  }
493
377
 
378
+ static void VerifyStringSink(const string& input) {
379
+ string compressed;
380
+ DataEndingAtUnreadablePage i(input);
381
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
382
+ CHECK_EQ(written, compressed.size());
383
+ CHECK_LE(compressed.size(),
384
+ snappy::MaxCompressedLength(input.size()));
385
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
386
+
387
+ string uncompressed;
388
+ uncompressed.resize(input.size());
389
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
390
+ DataEndingAtUnreadablePage c(compressed);
391
+ snappy::ByteArraySource source(c.data(), c.size());
392
+ CHECK(snappy::Uncompress(&source, &sink));
393
+ CHECK_EQ(uncompressed, input);
394
+ }
494
395
 
495
396
  static void VerifyIOVec(const string& input) {
496
397
  string compressed;
@@ -505,13 +406,13 @@ static void VerifyIOVec(const string& input) {
505
406
  // ranging from 1 to 10.
506
407
  char* buf = new char[input.size()];
507
408
  ACMRandom rnd(input.size());
508
- int num = rnd.Next() % 10 + 1;
409
+ size_t num = rnd.Next() % 10 + 1;
509
410
  if (input.size() < num) {
510
411
  num = input.size();
511
412
  }
512
413
  struct iovec* iov = new iovec[num];
513
414
  int used_so_far = 0;
514
- for (int i = 0; i < num; ++i) {
415
+ for (size_t i = 0; i < num; ++i) {
515
416
  iov[i].iov_base = buf + used_so_far;
516
417
  if (i == num - 1) {
517
418
  iov[i].iov_len = input.size() - used_so_far;
@@ -562,6 +463,28 @@ static void VerifyNonBlockedCompression(const string& input) {
562
463
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
563
464
  CHECK_EQ(uncomp_str, input);
564
465
 
466
+ // Uncompress using source/sink
467
+ string uncomp_str2;
468
+ uncomp_str2.resize(input.size());
469
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
470
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
471
+ CHECK(snappy::Uncompress(&source, &sink));
472
+ CHECK_EQ(uncomp_str2, input);
473
+
474
+ // Uncompress into iovec
475
+ {
476
+ static const int kNumBlocks = 10;
477
+ struct iovec vec[kNumBlocks];
478
+ const int block_size = 1 + input.size() / kNumBlocks;
479
+ string iovec_data(block_size * kNumBlocks, 'x');
480
+ for (int i = 0; i < kNumBlocks; i++) {
481
+ vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
482
+ vec[i].iov_len = block_size;
483
+ }
484
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
485
+ vec, kNumBlocks));
486
+ CHECK_EQ(string(iovec_data.data(), input.size()), input);
487
+ }
565
488
  }
566
489
 
567
490
  // Expand the input so that it is at least K times as big as block size
@@ -580,6 +503,8 @@ static int Verify(const string& input) {
580
503
  // Compress using string based routines
581
504
  const int result = VerifyString(input);
582
505
 
506
+ // Verify using sink based routines
507
+ VerifyStringSink(input);
583
508
 
584
509
  VerifyNonBlockedCompression(input);
585
510
  VerifyIOVec(input);
@@ -589,12 +514,9 @@ static int Verify(const string& input) {
589
514
  VerifyIOVec(input);
590
515
  }
591
516
 
592
-
593
517
  return result;
594
518
  }
595
519
 
596
- // This test checks to ensure that snappy doesn't coredump if it gets
597
- // corrupted data.
598
520
 
599
521
  static bool IsValidCompressedBuffer(const string& c) {
600
522
  return snappy::IsValidCompressedBuffer(c.data(), c.size());
@@ -603,11 +525,13 @@ static bool Uncompress(const string& c, string* u) {
603
525
  return snappy::Uncompress(c.data(), c.size(), u);
604
526
  }
605
527
 
606
- TYPED_TEST(CorruptedTest, VerifyCorrupted) {
528
+ // This test checks to ensure that snappy doesn't coredump if it gets
529
+ // corrupted data.
530
+ TEST(CorruptedTest, VerifyCorrupted) {
607
531
  string source = "making sure we don't crash with corrupted input";
608
532
  VLOG(1) << source;
609
533
  string dest;
610
- TypeParam uncmp;
534
+ string uncmp;
611
535
  snappy::Compress(source.data(), source.size(), &dest);
612
536
 
613
537
  // Mess around with the data. It's hard to simulate all possible
@@ -616,19 +540,19 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
616
540
  dest[1]--;
617
541
  dest[3]++;
618
542
  // this really ought to fail.
619
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
620
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
543
+ CHECK(!IsValidCompressedBuffer(dest));
544
+ CHECK(!Uncompress(dest, &uncmp));
621
545
 
622
546
  // This is testing for a security bug - a buffer that decompresses to 100k
623
547
  // but we lie in the snappy header and only reserve 0 bytes of memory :)
624
548
  source.resize(100000);
625
- for (int i = 0; i < source.length(); ++i) {
549
+ for (size_t i = 0; i < source.length(); ++i) {
626
550
  source[i] = 'A';
627
551
  }
628
552
  snappy::Compress(source.data(), source.size(), &dest);
629
553
  dest[0] = dest[1] = dest[2] = dest[3] = 0;
630
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
631
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
554
+ CHECK(!IsValidCompressedBuffer(dest));
555
+ CHECK(!Uncompress(dest, &uncmp));
632
556
 
633
557
  if (sizeof(void *) == 4) {
634
558
  // Another security check; check a crazy big length can't DoS us with an
@@ -637,20 +561,20 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
637
561
  // where 3 GB might be an acceptable allocation size, Uncompress()
638
562
  // attempts to decompress, and sometimes causes the test to run out of
639
563
  // memory.
640
- dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
564
+ dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
641
565
  // This decodes to a really large size, i.e., about 3 GB.
642
566
  dest[4] = 'k';
643
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
644
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
567
+ CHECK(!IsValidCompressedBuffer(dest));
568
+ CHECK(!Uncompress(dest, &uncmp));
645
569
  } else {
646
570
  LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
647
571
  }
648
572
 
649
573
  // This decodes to about 2 MB; much smaller, but should still fail.
650
- dest[0] = dest[1] = dest[2] = 0xff;
574
+ dest[0] = dest[1] = dest[2] = '\xff';
651
575
  dest[3] = 0x00;
652
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
653
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
576
+ CHECK(!IsValidCompressedBuffer(dest));
577
+ CHECK(!Uncompress(dest, &uncmp));
654
578
 
655
579
  // try reading stuff in from a bad file.
656
580
  for (int i = 1; i <= 3; ++i) {
@@ -665,8 +589,8 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
665
589
  snappy::ByteArraySource source(data.data(), data.size());
666
590
  CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
667
591
  (ulen2 < (1<<20)));
668
- CHECK(!IsValidCompressedBuffer(TypeParam(data)));
669
- CHECK(!Uncompress(TypeParam(data), &uncmp));
592
+ CHECK(!IsValidCompressedBuffer(data));
593
+ CHECK(!Uncompress(data, &uncmp));
670
594
  }
671
595
  }
672
596
 
@@ -764,7 +688,7 @@ TEST(Snappy, RandomData) {
764
688
  }
765
689
 
766
690
  string x;
767
- int len = rnd.Uniform(4096);
691
+ size_t len = rnd.Uniform(4096);
768
692
  if (i < 100) {
769
693
  len = 65536 + rnd.Uniform(65536);
770
694
  }
@@ -929,7 +853,6 @@ TEST(Snappy, IOVecCopyOverflow) {
929
853
  }
930
854
  }
931
855
 
932
-
933
856
  static bool CheckUncompressedLength(const string& compressed,
934
857
  size_t* ulength) {
935
858
  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
@@ -956,11 +879,11 @@ TEST(SnappyCorruption, TruncatedVarint) {
956
879
  TEST(SnappyCorruption, UnterminatedVarint) {
957
880
  string compressed, uncompressed;
958
881
  size_t ulength;
959
- compressed.push_back(128);
960
- compressed.push_back(128);
961
- compressed.push_back(128);
962
- compressed.push_back(128);
963
- compressed.push_back(128);
882
+ compressed.push_back('\x80');
883
+ compressed.push_back('\x80');
884
+ compressed.push_back('\x80');
885
+ compressed.push_back('\x80');
886
+ compressed.push_back('\x80');
964
887
  compressed.push_back(10);
965
888
  CHECK(!CheckUncompressedLength(compressed, &ulength));
966
889
  CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
@@ -968,6 +891,20 @@ TEST(SnappyCorruption, UnterminatedVarint) {
968
891
  &uncompressed));
969
892
  }
970
893
 
894
+ TEST(SnappyCorruption, OverflowingVarint) {
895
+ string compressed, uncompressed;
896
+ size_t ulength;
897
+ compressed.push_back('\xfb');
898
+ compressed.push_back('\xff');
899
+ compressed.push_back('\xff');
900
+ compressed.push_back('\xff');
901
+ compressed.push_back('\x7f');
902
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
903
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
904
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
905
+ &uncompressed));
906
+ }
907
+
971
908
  TEST(Snappy, ReadPastEndOfBuffer) {
972
909
  // Check that we do not read past end of input
973
910
 
@@ -998,11 +935,13 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
998
935
  EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
999
936
  }
1000
937
 
1001
-
1002
938
  namespace {
1003
939
 
1004
940
  int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
1005
- return snappy::internal::FindMatchLength(s1, s2, s2 + length);
941
+ std::pair<size_t, bool> p =
942
+ snappy::internal::FindMatchLength(s1, s2, s2 + length);
943
+ CHECK_EQ(p.first < 8, p.second);
944
+ return p.first;
1006
945
  }
1007
946
 
1008
947
  } // namespace
@@ -1112,8 +1051,7 @@ TEST(Snappy, FindMatchLengthRandom) {
1112
1051
  }
1113
1052
  DataEndingAtUnreadablePage u(s);
1114
1053
  DataEndingAtUnreadablePage v(t);
1115
- int matched = snappy::internal::FindMatchLength(
1116
- u.data(), v.data(), v.data() + t.size());
1054
+ int matched = TestFindMatchLength(u.data(), v.data(), t.size());
1117
1055
  if (matched == t.size()) {
1118
1056
  EXPECT_EQ(s, t);
1119
1057
  } else {
@@ -1125,21 +1063,113 @@ TEST(Snappy, FindMatchLengthRandom) {
1125
1063
  }
1126
1064
  }
1127
1065
 
1066
+ static uint16 MakeEntry(unsigned int extra,
1067
+ unsigned int len,
1068
+ unsigned int copy_offset) {
1069
+ // Check that all of the fields fit within the allocated space
1070
+ assert(extra == (extra & 0x7)); // At most 3 bits
1071
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
1072
+ assert(len == (len & 0x7f)); // At most 7 bits
1073
+ return len | (copy_offset << 8) | (extra << 11);
1074
+ }
1075
+
1076
+ // Check that the decompression table is correct, and optionally print out
1077
+ // the computed one.
1078
+ TEST(Snappy, VerifyCharTable) {
1079
+ using snappy::internal::LITERAL;
1080
+ using snappy::internal::COPY_1_BYTE_OFFSET;
1081
+ using snappy::internal::COPY_2_BYTE_OFFSET;
1082
+ using snappy::internal::COPY_4_BYTE_OFFSET;
1083
+ using snappy::internal::char_table;
1084
+
1085
+ uint16 dst[256];
1086
+
1087
+ // Place invalid entries in all places to detect missing initialization
1088
+ int assigned = 0;
1089
+ for (int i = 0; i < 256; i++) {
1090
+ dst[i] = 0xffff;
1091
+ }
1092
+
1093
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
1094
+ for (unsigned int len = 1; len <= 60; len++) {
1095
+ dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
1096
+ assigned++;
1097
+ }
1098
+
1099
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
1100
+ // encode the number of bytes of length info that follow the opcode.
1101
+ for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
1102
+ // We set the length field in the lookup table to 1 because extra
1103
+ // bytes encode len-1.
1104
+ dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1105
+ assigned++;
1106
+ }
1107
+
1108
+ // COPY_1_BYTE_OFFSET.
1109
+ //
1110
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
1111
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
1112
+ //
1113
+ // This format is used for length in range [4..11] and offset in
1114
+ // range [0..2047]
1115
+ for (unsigned int len = 4; len < 12; len++) {
1116
+ for (unsigned int offset = 0; offset < 2048; offset += 256) {
1117
+ dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1118
+ MakeEntry(1, len, offset>>8);
1119
+ assigned++;
1120
+ }
1121
+ }
1122
+
1123
+ // COPY_2_BYTE_OFFSET.
1124
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1125
+ for (unsigned int len = 1; len <= 64; len++) {
1126
+ dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
1127
+ assigned++;
1128
+ }
1129
+
1130
+ // COPY_4_BYTE_OFFSET.
1131
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1132
+ for (unsigned int len = 1; len <= 64; len++) {
1133
+ dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
1134
+ assigned++;
1135
+ }
1136
+
1137
+ // Check that each entry was initialized exactly once.
1138
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1139
+ for (int i = 0; i < 256; i++) {
1140
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1141
+ }
1142
+
1143
+ if (FLAGS_snappy_dump_decompression_table) {
1144
+ printf("static const uint16 char_table[256] = {\n ");
1145
+ for (int i = 0; i < 256; i++) {
1146
+ printf("0x%04x%s",
1147
+ dst[i],
1148
+ ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
1149
+ }
1150
+ printf("};\n");
1151
+ }
1152
+
1153
+ // Check that computed table matched recorded table.
1154
+ for (int i = 0; i < 256; i++) {
1155
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1156
+ }
1157
+ }
1128
1158
 
1129
1159
  static void CompressFile(const char* fname) {
1130
1160
  string fullinput;
1131
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1161
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1132
1162
 
1133
1163
  string compressed;
1134
1164
  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1135
1165
 
1136
- file::SetContents(string(fname).append(".comp"), compressed, file::Defaults())
1137
- .CheckSuccess();
1166
+ CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1167
+ file::Defaults()));
1138
1168
  }
1139
1169
 
1140
1170
  static void UncompressFile(const char* fname) {
1141
1171
  string fullinput;
1142
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1172
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1143
1173
 
1144
1174
  size_t uncompLength;
1145
1175
  CHECK(CheckUncompressedLength(fullinput, &uncompLength));
@@ -1148,28 +1178,25 @@ static void UncompressFile(const char* fname) {
1148
1178
  uncompressed.resize(uncompLength);
1149
1179
  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1150
1180
 
1151
- file::SetContents(string(fname).append(".uncomp"), uncompressed,
1152
- file::Defaults()).CheckSuccess();
1181
+ CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1182
+ file::Defaults()));
1153
1183
  }
1154
1184
 
1155
1185
  static void MeasureFile(const char* fname) {
1156
1186
  string fullinput;
1157
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1187
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1158
1188
  printf("%-40s :\n", fname);
1159
1189
 
1160
1190
  int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1161
1191
  int end_len = fullinput.size();
1162
1192
  if (FLAGS_end_len >= 0) {
1163
- end_len = min<int>(fullinput.size(), FLAGS_end_len);
1193
+ end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1164
1194
  }
1165
1195
  for (int len = start_len; len <= end_len; len++) {
1166
1196
  const char* const input = fullinput.data();
1167
1197
  int repeats = (FLAGS_bytes + len) / (len + 1);
1168
1198
  if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1169
1199
  if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1170
- if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1171
- if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1172
- if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1173
1200
  if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1174
1201
 
1175
1202
  // For block-size based measurements
@@ -1298,6 +1325,37 @@ static void BM_UIOVec(int iters, int arg) {
1298
1325
  }
1299
1326
  BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1300
1327
 
1328
+ static void BM_UFlatSink(int iters, int arg) {
1329
+ StopBenchmarkTiming();
1330
+
1331
+ // Pick file to process based on "arg"
1332
+ CHECK_GE(arg, 0);
1333
+ CHECK_LT(arg, ARRAYSIZE(files));
1334
+ string contents = ReadTestDataFile(files[arg].filename,
1335
+ files[arg].size_limit);
1336
+
1337
+ string zcontents;
1338
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1339
+ char* dst = new char[contents.size()];
1340
+
1341
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1342
+ static_cast<int64>(contents.size()));
1343
+ SetBenchmarkLabel(files[arg].label);
1344
+ StartBenchmarkTiming();
1345
+ while (iters-- > 0) {
1346
+ snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1347
+ snappy::UncheckedByteArraySink sink(dst);
1348
+ CHECK(snappy::Uncompress(&source, &sink));
1349
+ }
1350
+ StopBenchmarkTiming();
1351
+
1352
+ string s(dst, contents.size());
1353
+ CHECK_EQ(contents, s);
1354
+
1355
+ delete[] dst;
1356
+ }
1357
+
1358
+ BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1301
1359
 
1302
1360
  static void BM_ZFlat(int iters, int arg) {
1303
1361
  StopBenchmarkTiming();
@@ -1329,23 +1387,20 @@ static void BM_ZFlat(int iters, int arg) {
1329
1387
  }
1330
1388
  BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1331
1389
 
1332
-
1333
1390
  } // namespace snappy
1334
1391
 
1335
-
1336
1392
  int main(int argc, char** argv) {
1337
1393
  InitGoogle(argv[0], &argc, &argv, true);
1338
1394
  RunSpecifiedBenchmarks();
1339
1395
 
1340
-
1341
1396
  if (argc >= 2) {
1342
1397
  for (int arg = 1; arg < argc; arg++) {
1343
1398
  if (FLAGS_write_compressed) {
1344
- CompressFile(argv[arg]);
1399
+ snappy::CompressFile(argv[arg]);
1345
1400
  } else if (FLAGS_write_uncompressed) {
1346
- UncompressFile(argv[arg]);
1401
+ snappy::UncompressFile(argv[arg]);
1347
1402
  } else {
1348
- MeasureFile(argv[arg]);
1403
+ snappy::MeasureFile(argv[arg]);
1349
1404
  }
1350
1405
  }
1351
1406
  return 0;