snappy 0.0.12-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +14 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/CMakeLists.txt +174 -0
  21. data/vendor/snappy/CONTRIBUTING.md +26 -0
  22. data/vendor/snappy/COPYING +1 -1
  23. data/vendor/snappy/NEWS +52 -0
  24. data/vendor/snappy/{README → README.md} +23 -9
  25. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  26. data/vendor/snappy/cmake/config.h.in +62 -0
  27. data/vendor/snappy/snappy-c.h +3 -3
  28. data/vendor/snappy/snappy-internal.h +101 -27
  29. data/vendor/snappy/snappy-sinksource.cc +33 -0
  30. data/vendor/snappy/snappy-sinksource.h +51 -6
  31. data/vendor/snappy/snappy-stubs-internal.h +107 -37
  32. data/vendor/snappy/snappy-stubs-public.h.in +16 -20
  33. data/vendor/snappy/snappy-test.cc +15 -9
  34. data/vendor/snappy/snappy-test.h +34 -43
  35. data/vendor/snappy/snappy.cc +529 -320
  36. data/vendor/snappy/snappy.h +23 -4
  37. data/vendor/snappy/snappy_unittest.cc +240 -185
  38. metadata +27 -74
  39. data/vendor/snappy/ChangeLog +0 -1916
  40. data/vendor/snappy/Makefile.am +0 -23
  41. data/vendor/snappy/autogen.sh +0 -7
  42. data/vendor/snappy/configure.ac +0 -133
  43. data/vendor/snappy/m4/gtest.m4 +0 -74
  44. data/vendor/snappy/testdata/alice29.txt +0 -3609
  45. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  46. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  47. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  48. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  49. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  50. data/vendor/snappy/testdata/geo.protodata +0 -0
  51. data/vendor/snappy/testdata/html +0 -1
  52. data/vendor/snappy/testdata/html_x_4 +0 -1
  53. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  54. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  55. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  56. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  57. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -36,8 +36,8 @@
36
36
  // using BMDiff and then compressing the output of BMDiff with
37
37
  // Snappy.
38
38
 
39
- #ifndef UTIL_SNAPPY_SNAPPY_H__
40
- #define UTIL_SNAPPY_SNAPPY_H__
39
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
40
+ #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
41
 
42
42
  #include <stddef.h>
43
43
  #include <string>
@@ -84,6 +84,18 @@ namespace snappy {
84
84
  bool Uncompress(const char* compressed, size_t compressed_length,
85
85
  string* uncompressed);
86
86
 
87
+ // Decompresses "compressed" to "*uncompressed".
88
+ //
89
+ // returns false if the message is corrupted and could not be decompressed
90
+ bool Uncompress(Source* compressed, Sink* uncompressed);
91
+
92
+ // This routine uncompresses as much of the "compressed" as possible
93
+ // into sink. It returns the number of valid bytes added to sink
94
+ // (extra invalid bytes may have been added due to errors; the caller
95
+ // should ignore those). The emitted data typically has length
96
+ // GetUncompressedLength(), but may be shorter if an error is
97
+ // encountered.
98
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
87
99
 
88
100
  // ------------------------------------------------------------------------
89
101
  // Lower-level character array based routines. May be useful for
@@ -164,6 +176,14 @@ namespace snappy {
164
176
  bool IsValidCompressedBuffer(const char* compressed,
165
177
  size_t compressed_length);
166
178
 
179
+ // Returns true iff the contents of "compressed" can be uncompressed
180
+ // successfully. Does not return the uncompressed data. Takes
181
+ // time proportional to *compressed length, but is usually at least
182
+ // a factor of four faster than actual decompression.
183
+ // On success, consumes all of *compressed. On failure, consumes an
184
+ // unspecified prefix of *compressed.
185
+ bool IsValidCompressed(Source* compressed);
186
+
167
187
  // The size of a compression block. Note that many parts of the compression
168
188
  // code assumes that kBlockSize <= 65536; in particular, the hash table
169
189
  // can only store 16-bit offsets, and EmitCopy() also assumes the offset
@@ -180,5 +200,4 @@ namespace snappy {
180
200
  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
181
201
  } // end namespace snappy
182
202
 
183
-
184
- #endif // UTIL_SNAPPY_SNAPPY_H__
203
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -32,6 +32,7 @@
32
32
 
33
33
  #include <algorithm>
34
34
  #include <string>
35
+ #include <utility>
35
36
  #include <vector>
36
37
 
37
38
  #include "snappy.h"
@@ -50,25 +51,19 @@ DEFINE_bool(zlib, false,
50
51
  "Run zlib compression (http://www.zlib.net)");
51
52
  DEFINE_bool(lzo, false,
52
53
  "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53
- DEFINE_bool(quicklz, false,
54
- "Run quickLZ compression (http://www.quicklz.com/)");
55
- DEFINE_bool(liblzf, false,
56
- "Run libLZF compression "
57
- "(http://www.goof.com/pcg/marc/liblzf.html)");
58
- DEFINE_bool(fastlz, false,
59
- "Run FastLZ compression (http://www.fastlz.org/");
60
54
  DEFINE_bool(snappy, true, "Run snappy compression");
61
55
 
62
-
63
56
  DEFINE_bool(write_compressed, false,
64
57
  "Write compressed versions of each file to <file>.comp");
65
58
  DEFINE_bool(write_uncompressed, false,
66
59
  "Write uncompressed versions of each file to <file>.uncomp");
67
60
 
68
- namespace snappy {
61
+ DEFINE_bool(snappy_dump_decompression_table, false,
62
+ "If true, we print the decompression table during tests.");
69
63
 
64
+ namespace snappy {
70
65
 
71
- #ifdef HAVE_FUNC_MMAP
66
+ #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
72
67
 
73
68
  // To test against code that reads beyond its input, this class copies a
74
69
  // string to a newly allocated group of pages, the last of which
@@ -79,7 +74,7 @@ namespace snappy {
79
74
  class DataEndingAtUnreadablePage {
80
75
  public:
81
76
  explicit DataEndingAtUnreadablePage(const string& s) {
82
- const size_t page_size = getpagesize();
77
+ const size_t page_size = sysconf(_SC_PAGESIZE);
83
78
  const size_t size = s.size();
84
79
  // Round up space for string to a multiple of page_size.
85
80
  size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
@@ -97,8 +92,9 @@ class DataEndingAtUnreadablePage {
97
92
  }
98
93
 
99
94
  ~DataEndingAtUnreadablePage() {
95
+ const size_t page_size = sysconf(_SC_PAGESIZE);
100
96
  // Undo the mprotect.
101
- CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
97
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
102
98
  CHECK_EQ(0, munmap(mem_, alloc_size_));
103
99
  }
104
100
 
@@ -113,7 +109,7 @@ class DataEndingAtUnreadablePage {
113
109
  size_t size_;
114
110
  };
115
111
 
116
- #else // HAVE_FUNC_MMAP
112
+ #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
117
113
 
118
114
  // Fallback for systems without mmap.
119
115
  typedef string DataEndingAtUnreadablePage;
@@ -121,11 +117,11 @@ typedef string DataEndingAtUnreadablePage;
121
117
  #endif
122
118
 
123
119
  enum CompressorType {
124
- ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
120
+ ZLIB, LZO, SNAPPY
125
121
  };
126
122
 
127
123
  const char* names[] = {
128
- "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
124
+ "ZLIB", "LZO", "SNAPPY"
129
125
  };
130
126
 
131
127
  static size_t MinimumRequiredOutputSpace(size_t input_size,
@@ -141,26 +137,12 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
141
137
  return input_size + input_size/64 + 16 + 3;
142
138
  #endif // LZO_VERSION
143
139
 
144
- #ifdef LZF_VERSION
145
- case LIBLZF:
146
- return input_size;
147
- #endif // LZF_VERSION
148
-
149
- #ifdef QLZ_VERSION_MAJOR
150
- case QUICKLZ:
151
- return input_size + 36000; // 36000 is used for scratch.
152
- #endif // QLZ_VERSION_MAJOR
153
-
154
- #ifdef FASTLZ_VERSION
155
- case FASTLZ:
156
- return max(static_cast<int>(ceil(input_size * 1.05)), 66);
157
- #endif // FASTLZ_VERSION
158
-
159
140
  case SNAPPY:
160
141
  return snappy::MaxCompressedLength(input_size);
161
142
 
162
143
  default:
163
144
  LOG(FATAL) << "Unknown compression type number " << comp;
145
+ return 0;
164
146
  }
165
147
  }
166
148
 
@@ -214,58 +196,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
214
196
  }
215
197
  #endif // LZO_VERSION
216
198
 
217
- #ifdef LZF_VERSION
218
- case LIBLZF: {
219
- int destlen = lzf_compress(input,
220
- input_size,
221
- string_as_array(compressed),
222
- input_size);
223
- if (destlen == 0) {
224
- // lzf *can* cause lots of blowup when compressing, so they
225
- // recommend to limit outsize to insize, and just not compress
226
- // if it's bigger. Ideally, we'd just swap input and output.
227
- compressed->assign(input, input_size);
228
- destlen = input_size;
229
- }
230
- if (!compressed_is_preallocated) {
231
- compressed->resize(destlen);
232
- }
233
- break;
234
- }
235
- #endif // LZF_VERSION
236
-
237
- #ifdef QLZ_VERSION_MAJOR
238
- case QUICKLZ: {
239
- qlz_state_compress *state_compress = new qlz_state_compress;
240
- int destlen = qlz_compress(input,
241
- string_as_array(compressed),
242
- input_size,
243
- state_compress);
244
- delete state_compress;
245
- CHECK_NE(0, destlen);
246
- if (!compressed_is_preallocated) {
247
- compressed->resize(destlen);
248
- }
249
- break;
250
- }
251
- #endif // QLZ_VERSION_MAJOR
252
-
253
- #ifdef FASTLZ_VERSION
254
- case FASTLZ: {
255
- // Use level 1 compression since we mostly care about speed.
256
- int destlen = fastlz_compress_level(
257
- 1,
258
- input,
259
- input_size,
260
- string_as_array(compressed));
261
- if (!compressed_is_preallocated) {
262
- compressed->resize(destlen);
263
- }
264
- CHECK_NE(destlen, 0);
265
- break;
266
- }
267
- #endif // FASTLZ_VERSION
268
-
269
199
  case SNAPPY: {
270
200
  size_t destlen;
271
201
  snappy::RawCompress(input, input_size,
@@ -278,7 +208,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
278
208
  break;
279
209
  }
280
210
 
281
-
282
211
  default: {
283
212
  return false; // the asked-for library wasn't compiled in
284
213
  }
@@ -321,56 +250,12 @@ static bool Uncompress(const string& compressed, CompressorType comp,
321
250
  }
322
251
  #endif // LZO_VERSION
323
252
 
324
- #ifdef LZF_VERSION
325
- case LIBLZF: {
326
- output->resize(size);
327
- int destlen = lzf_decompress(compressed.data(),
328
- compressed.size(),
329
- string_as_array(output),
330
- output->size());
331
- if (destlen == 0) {
332
- // This error probably means we had decided not to compress,
333
- // and thus have stored input in output directly.
334
- output->assign(compressed.data(), compressed.size());
335
- destlen = compressed.size();
336
- }
337
- CHECK_EQ(destlen, size);
338
- break;
339
- }
340
- #endif // LZF_VERSION
341
-
342
- #ifdef QLZ_VERSION_MAJOR
343
- case QUICKLZ: {
344
- output->resize(size);
345
- qlz_state_decompress *state_decompress = new qlz_state_decompress;
346
- int destlen = qlz_decompress(compressed.data(),
347
- string_as_array(output),
348
- state_decompress);
349
- delete state_decompress;
350
- CHECK_EQ(destlen, size);
351
- break;
352
- }
353
- #endif // QLZ_VERSION_MAJOR
354
-
355
- #ifdef FASTLZ_VERSION
356
- case FASTLZ: {
357
- output->resize(size);
358
- int destlen = fastlz_decompress(compressed.data(),
359
- compressed.length(),
360
- string_as_array(output),
361
- size);
362
- CHECK_EQ(destlen, size);
363
- break;
364
- }
365
- #endif // FASTLZ_VERSION
366
-
367
253
  case SNAPPY: {
368
254
  snappy::RawUncompress(compressed.data(), compressed.size(),
369
255
  string_as_array(output));
370
256
  break;
371
257
  }
372
258
 
373
-
374
259
  default: {
375
260
  return false; // the asked-for library wasn't compiled in
376
261
  }
@@ -392,13 +277,13 @@ static void Measure(const char* data,
392
277
  {
393
278
  // Chop the input into blocks
394
279
  int num_blocks = (length + block_size - 1) / block_size;
395
- vector<const char*> input(num_blocks);
396
- vector<size_t> input_length(num_blocks);
397
- vector<string> compressed(num_blocks);
398
- vector<string> output(num_blocks);
280
+ std::vector<const char*> input(num_blocks);
281
+ std::vector<size_t> input_length(num_blocks);
282
+ std::vector<string> compressed(num_blocks);
283
+ std::vector<string> output(num_blocks);
399
284
  for (int b = 0; b < num_blocks; b++) {
400
285
  int input_start = b * block_size;
401
- int input_limit = min<int>((b+1)*block_size, length);
286
+ int input_limit = std::min<int>((b+1)*block_size, length);
402
287
  input[b] = data+input_start;
403
288
  input_length[b] = input_limit-input_start;
404
289
 
@@ -448,13 +333,13 @@ static void Measure(const char* data,
448
333
  }
449
334
 
450
335
  compressed_size = 0;
451
- for (int i = 0; i < compressed.size(); i++) {
336
+ for (size_t i = 0; i < compressed.size(); i++) {
452
337
  compressed_size += compressed[i].size();
453
338
  }
454
339
  }
455
340
 
456
- sort(ctime, ctime + kRuns);
457
- sort(utime, utime + kRuns);
341
+ std::sort(ctime, ctime + kRuns);
342
+ std::sort(utime, utime + kRuns);
458
343
  const int med = kRuns/2;
459
344
 
460
345
  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
@@ -469,12 +354,11 @@ static void Measure(const char* data,
469
354
  x.c_str(),
470
355
  block_size/(1<<20),
471
356
  static_cast<int>(length), static_cast<uint32>(compressed_size),
472
- (compressed_size * 100.0) / max<int>(1, length),
357
+ (compressed_size * 100.0) / std::max<int>(1, length),
473
358
  comp_rate,
474
359
  urate.c_str());
475
360
  }
476
361
 
477
-
478
362
  static int VerifyString(const string& input) {
479
363
  string compressed;
480
364
  DataEndingAtUnreadablePage i(input);
@@ -491,6 +375,23 @@ static int VerifyString(const string& input) {
491
375
  return uncompressed.size();
492
376
  }
493
377
 
378
+ static void VerifyStringSink(const string& input) {
379
+ string compressed;
380
+ DataEndingAtUnreadablePage i(input);
381
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
382
+ CHECK_EQ(written, compressed.size());
383
+ CHECK_LE(compressed.size(),
384
+ snappy::MaxCompressedLength(input.size()));
385
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
386
+
387
+ string uncompressed;
388
+ uncompressed.resize(input.size());
389
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
390
+ DataEndingAtUnreadablePage c(compressed);
391
+ snappy::ByteArraySource source(c.data(), c.size());
392
+ CHECK(snappy::Uncompress(&source, &sink));
393
+ CHECK_EQ(uncompressed, input);
394
+ }
494
395
 
495
396
  static void VerifyIOVec(const string& input) {
496
397
  string compressed;
@@ -505,13 +406,13 @@ static void VerifyIOVec(const string& input) {
505
406
  // ranging from 1 to 10.
506
407
  char* buf = new char[input.size()];
507
408
  ACMRandom rnd(input.size());
508
- int num = rnd.Next() % 10 + 1;
409
+ size_t num = rnd.Next() % 10 + 1;
509
410
  if (input.size() < num) {
510
411
  num = input.size();
511
412
  }
512
413
  struct iovec* iov = new iovec[num];
513
414
  int used_so_far = 0;
514
- for (int i = 0; i < num; ++i) {
415
+ for (size_t i = 0; i < num; ++i) {
515
416
  iov[i].iov_base = buf + used_so_far;
516
417
  if (i == num - 1) {
517
418
  iov[i].iov_len = input.size() - used_so_far;
@@ -562,6 +463,28 @@ static void VerifyNonBlockedCompression(const string& input) {
562
463
  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
563
464
  CHECK_EQ(uncomp_str, input);
564
465
 
466
+ // Uncompress using source/sink
467
+ string uncomp_str2;
468
+ uncomp_str2.resize(input.size());
469
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
470
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
471
+ CHECK(snappy::Uncompress(&source, &sink));
472
+ CHECK_EQ(uncomp_str2, input);
473
+
474
+ // Uncompress into iovec
475
+ {
476
+ static const int kNumBlocks = 10;
477
+ struct iovec vec[kNumBlocks];
478
+ const int block_size = 1 + input.size() / kNumBlocks;
479
+ string iovec_data(block_size * kNumBlocks, 'x');
480
+ for (int i = 0; i < kNumBlocks; i++) {
481
+ vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
482
+ vec[i].iov_len = block_size;
483
+ }
484
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
485
+ vec, kNumBlocks));
486
+ CHECK_EQ(string(iovec_data.data(), input.size()), input);
487
+ }
565
488
  }
566
489
 
567
490
  // Expand the input so that it is at least K times as big as block size
@@ -580,6 +503,8 @@ static int Verify(const string& input) {
580
503
  // Compress using string based routines
581
504
  const int result = VerifyString(input);
582
505
 
506
+ // Verify using sink based routines
507
+ VerifyStringSink(input);
583
508
 
584
509
  VerifyNonBlockedCompression(input);
585
510
  VerifyIOVec(input);
@@ -589,12 +514,9 @@ static int Verify(const string& input) {
589
514
  VerifyIOVec(input);
590
515
  }
591
516
 
592
-
593
517
  return result;
594
518
  }
595
519
 
596
- // This test checks to ensure that snappy doesn't coredump if it gets
597
- // corrupted data.
598
520
 
599
521
  static bool IsValidCompressedBuffer(const string& c) {
600
522
  return snappy::IsValidCompressedBuffer(c.data(), c.size());
@@ -603,11 +525,13 @@ static bool Uncompress(const string& c, string* u) {
603
525
  return snappy::Uncompress(c.data(), c.size(), u);
604
526
  }
605
527
 
606
- TYPED_TEST(CorruptedTest, VerifyCorrupted) {
528
+ // This test checks to ensure that snappy doesn't coredump if it gets
529
+ // corrupted data.
530
+ TEST(CorruptedTest, VerifyCorrupted) {
607
531
  string source = "making sure we don't crash with corrupted input";
608
532
  VLOG(1) << source;
609
533
  string dest;
610
- TypeParam uncmp;
534
+ string uncmp;
611
535
  snappy::Compress(source.data(), source.size(), &dest);
612
536
 
613
537
  // Mess around with the data. It's hard to simulate all possible
@@ -616,19 +540,19 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
616
540
  dest[1]--;
617
541
  dest[3]++;
618
542
  // this really ought to fail.
619
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
620
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
543
+ CHECK(!IsValidCompressedBuffer(dest));
544
+ CHECK(!Uncompress(dest, &uncmp));
621
545
 
622
546
  // This is testing for a security bug - a buffer that decompresses to 100k
623
547
  // but we lie in the snappy header and only reserve 0 bytes of memory :)
624
548
  source.resize(100000);
625
- for (int i = 0; i < source.length(); ++i) {
549
+ for (size_t i = 0; i < source.length(); ++i) {
626
550
  source[i] = 'A';
627
551
  }
628
552
  snappy::Compress(source.data(), source.size(), &dest);
629
553
  dest[0] = dest[1] = dest[2] = dest[3] = 0;
630
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
631
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
554
+ CHECK(!IsValidCompressedBuffer(dest));
555
+ CHECK(!Uncompress(dest, &uncmp));
632
556
 
633
557
  if (sizeof(void *) == 4) {
634
558
  // Another security check; check a crazy big length can't DoS us with an
@@ -637,20 +561,20 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
637
561
  // where 3 GB might be an acceptable allocation size, Uncompress()
638
562
  // attempts to decompress, and sometimes causes the test to run out of
639
563
  // memory.
640
- dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
564
+ dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
641
565
  // This decodes to a really large size, i.e., about 3 GB.
642
566
  dest[4] = 'k';
643
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
644
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
567
+ CHECK(!IsValidCompressedBuffer(dest));
568
+ CHECK(!Uncompress(dest, &uncmp));
645
569
  } else {
646
570
  LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
647
571
  }
648
572
 
649
573
  // This decodes to about 2 MB; much smaller, but should still fail.
650
- dest[0] = dest[1] = dest[2] = 0xff;
574
+ dest[0] = dest[1] = dest[2] = '\xff';
651
575
  dest[3] = 0x00;
652
- CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
653
- CHECK(!Uncompress(TypeParam(dest), &uncmp));
576
+ CHECK(!IsValidCompressedBuffer(dest));
577
+ CHECK(!Uncompress(dest, &uncmp));
654
578
 
655
579
  // try reading stuff in from a bad file.
656
580
  for (int i = 1; i <= 3; ++i) {
@@ -665,8 +589,8 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
665
589
  snappy::ByteArraySource source(data.data(), data.size());
666
590
  CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
667
591
  (ulen2 < (1<<20)));
668
- CHECK(!IsValidCompressedBuffer(TypeParam(data)));
669
- CHECK(!Uncompress(TypeParam(data), &uncmp));
592
+ CHECK(!IsValidCompressedBuffer(data));
593
+ CHECK(!Uncompress(data, &uncmp));
670
594
  }
671
595
  }
672
596
 
@@ -764,7 +688,7 @@ TEST(Snappy, RandomData) {
764
688
  }
765
689
 
766
690
  string x;
767
- int len = rnd.Uniform(4096);
691
+ size_t len = rnd.Uniform(4096);
768
692
  if (i < 100) {
769
693
  len = 65536 + rnd.Uniform(65536);
770
694
  }
@@ -929,7 +853,6 @@ TEST(Snappy, IOVecCopyOverflow) {
929
853
  }
930
854
  }
931
855
 
932
-
933
856
  static bool CheckUncompressedLength(const string& compressed,
934
857
  size_t* ulength) {
935
858
  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
@@ -956,11 +879,11 @@ TEST(SnappyCorruption, TruncatedVarint) {
956
879
  TEST(SnappyCorruption, UnterminatedVarint) {
957
880
  string compressed, uncompressed;
958
881
  size_t ulength;
959
- compressed.push_back(128);
960
- compressed.push_back(128);
961
- compressed.push_back(128);
962
- compressed.push_back(128);
963
- compressed.push_back(128);
882
+ compressed.push_back('\x80');
883
+ compressed.push_back('\x80');
884
+ compressed.push_back('\x80');
885
+ compressed.push_back('\x80');
886
+ compressed.push_back('\x80');
964
887
  compressed.push_back(10);
965
888
  CHECK(!CheckUncompressedLength(compressed, &ulength));
966
889
  CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
@@ -968,6 +891,20 @@ TEST(SnappyCorruption, UnterminatedVarint) {
968
891
  &uncompressed));
969
892
  }
970
893
 
894
+ TEST(SnappyCorruption, OverflowingVarint) {
895
+ string compressed, uncompressed;
896
+ size_t ulength;
897
+ compressed.push_back('\xfb');
898
+ compressed.push_back('\xff');
899
+ compressed.push_back('\xff');
900
+ compressed.push_back('\xff');
901
+ compressed.push_back('\x7f');
902
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
903
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
904
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
905
+ &uncompressed));
906
+ }
907
+
971
908
  TEST(Snappy, ReadPastEndOfBuffer) {
972
909
  // Check that we do not read past end of input
973
910
 
@@ -998,11 +935,13 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
998
935
  EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
999
936
  }
1000
937
 
1001
-
1002
938
  namespace {
1003
939
 
1004
940
  int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
1005
- return snappy::internal::FindMatchLength(s1, s2, s2 + length);
941
+ std::pair<size_t, bool> p =
942
+ snappy::internal::FindMatchLength(s1, s2, s2 + length);
943
+ CHECK_EQ(p.first < 8, p.second);
944
+ return p.first;
1006
945
  }
1007
946
 
1008
947
  } // namespace
@@ -1112,8 +1051,7 @@ TEST(Snappy, FindMatchLengthRandom) {
1112
1051
  }
1113
1052
  DataEndingAtUnreadablePage u(s);
1114
1053
  DataEndingAtUnreadablePage v(t);
1115
- int matched = snappy::internal::FindMatchLength(
1116
- u.data(), v.data(), v.data() + t.size());
1054
+ int matched = TestFindMatchLength(u.data(), v.data(), t.size());
1117
1055
  if (matched == t.size()) {
1118
1056
  EXPECT_EQ(s, t);
1119
1057
  } else {
@@ -1125,21 +1063,113 @@ TEST(Snappy, FindMatchLengthRandom) {
1125
1063
  }
1126
1064
  }
1127
1065
 
1066
+ static uint16 MakeEntry(unsigned int extra,
1067
+ unsigned int len,
1068
+ unsigned int copy_offset) {
1069
+ // Check that all of the fields fit within the allocated space
1070
+ assert(extra == (extra & 0x7)); // At most 3 bits
1071
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
1072
+ assert(len == (len & 0x7f)); // At most 7 bits
1073
+ return len | (copy_offset << 8) | (extra << 11);
1074
+ }
1075
+
1076
+ // Check that the decompression table is correct, and optionally print out
1077
+ // the computed one.
1078
+ TEST(Snappy, VerifyCharTable) {
1079
+ using snappy::internal::LITERAL;
1080
+ using snappy::internal::COPY_1_BYTE_OFFSET;
1081
+ using snappy::internal::COPY_2_BYTE_OFFSET;
1082
+ using snappy::internal::COPY_4_BYTE_OFFSET;
1083
+ using snappy::internal::char_table;
1084
+
1085
+ uint16 dst[256];
1086
+
1087
+ // Place invalid entries in all places to detect missing initialization
1088
+ int assigned = 0;
1089
+ for (int i = 0; i < 256; i++) {
1090
+ dst[i] = 0xffff;
1091
+ }
1092
+
1093
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
1094
+ for (unsigned int len = 1; len <= 60; len++) {
1095
+ dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
1096
+ assigned++;
1097
+ }
1098
+
1099
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
1100
+ // encode the number of bytes of length info that follow the opcode.
1101
+ for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
1102
+ // We set the length field in the lookup table to 1 because extra
1103
+ // bytes encode len-1.
1104
+ dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1105
+ assigned++;
1106
+ }
1107
+
1108
+ // COPY_1_BYTE_OFFSET.
1109
+ //
1110
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
1111
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
1112
+ //
1113
+ // This format is used for length in range [4..11] and offset in
1114
+ // range [0..2047]
1115
+ for (unsigned int len = 4; len < 12; len++) {
1116
+ for (unsigned int offset = 0; offset < 2048; offset += 256) {
1117
+ dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1118
+ MakeEntry(1, len, offset>>8);
1119
+ assigned++;
1120
+ }
1121
+ }
1122
+
1123
+ // COPY_2_BYTE_OFFSET.
1124
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1125
+ for (unsigned int len = 1; len <= 64; len++) {
1126
+ dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
1127
+ assigned++;
1128
+ }
1129
+
1130
+ // COPY_4_BYTE_OFFSET.
1131
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1132
+ for (unsigned int len = 1; len <= 64; len++) {
1133
+ dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
1134
+ assigned++;
1135
+ }
1136
+
1137
+ // Check that each entry was initialized exactly once.
1138
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1139
+ for (int i = 0; i < 256; i++) {
1140
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1141
+ }
1142
+
1143
+ if (FLAGS_snappy_dump_decompression_table) {
1144
+ printf("static const uint16 char_table[256] = {\n ");
1145
+ for (int i = 0; i < 256; i++) {
1146
+ printf("0x%04x%s",
1147
+ dst[i],
1148
+ ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
1149
+ }
1150
+ printf("};\n");
1151
+ }
1152
+
1153
+ // Check that computed table matched recorded table.
1154
+ for (int i = 0; i < 256; i++) {
1155
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1156
+ }
1157
+ }
1128
1158
 
1129
1159
  static void CompressFile(const char* fname) {
1130
1160
  string fullinput;
1131
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1161
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1132
1162
 
1133
1163
  string compressed;
1134
1164
  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1135
1165
 
1136
- file::SetContents(string(fname).append(".comp"), compressed, file::Defaults())
1137
- .CheckSuccess();
1166
+ CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1167
+ file::Defaults()));
1138
1168
  }
1139
1169
 
1140
1170
  static void UncompressFile(const char* fname) {
1141
1171
  string fullinput;
1142
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1172
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1143
1173
 
1144
1174
  size_t uncompLength;
1145
1175
  CHECK(CheckUncompressedLength(fullinput, &uncompLength));
@@ -1148,28 +1178,25 @@ static void UncompressFile(const char* fname) {
1148
1178
  uncompressed.resize(uncompLength);
1149
1179
  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1150
1180
 
1151
- file::SetContents(string(fname).append(".uncomp"), uncompressed,
1152
- file::Defaults()).CheckSuccess();
1181
+ CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1182
+ file::Defaults()));
1153
1183
  }
1154
1184
 
1155
1185
  static void MeasureFile(const char* fname) {
1156
1186
  string fullinput;
1157
- file::GetContents(fname, &fullinput, file::Defaults()).CheckSuccess();
1187
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1158
1188
  printf("%-40s :\n", fname);
1159
1189
 
1160
1190
  int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1161
1191
  int end_len = fullinput.size();
1162
1192
  if (FLAGS_end_len >= 0) {
1163
- end_len = min<int>(fullinput.size(), FLAGS_end_len);
1193
+ end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1164
1194
  }
1165
1195
  for (int len = start_len; len <= end_len; len++) {
1166
1196
  const char* const input = fullinput.data();
1167
1197
  int repeats = (FLAGS_bytes + len) / (len + 1);
1168
1198
  if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1169
1199
  if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1170
- if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1171
- if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1172
- if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1173
1200
  if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1174
1201
 
1175
1202
  // For block-size based measurements
@@ -1298,6 +1325,37 @@ static void BM_UIOVec(int iters, int arg) {
1298
1325
  }
1299
1326
  BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1300
1327
 
1328
+ static void BM_UFlatSink(int iters, int arg) {
1329
+ StopBenchmarkTiming();
1330
+
1331
+ // Pick file to process based on "arg"
1332
+ CHECK_GE(arg, 0);
1333
+ CHECK_LT(arg, ARRAYSIZE(files));
1334
+ string contents = ReadTestDataFile(files[arg].filename,
1335
+ files[arg].size_limit);
1336
+
1337
+ string zcontents;
1338
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1339
+ char* dst = new char[contents.size()];
1340
+
1341
+ SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1342
+ static_cast<int64>(contents.size()));
1343
+ SetBenchmarkLabel(files[arg].label);
1344
+ StartBenchmarkTiming();
1345
+ while (iters-- > 0) {
1346
+ snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1347
+ snappy::UncheckedByteArraySink sink(dst);
1348
+ CHECK(snappy::Uncompress(&source, &sink));
1349
+ }
1350
+ StopBenchmarkTiming();
1351
+
1352
+ string s(dst, contents.size());
1353
+ CHECK_EQ(contents, s);
1354
+
1355
+ delete[] dst;
1356
+ }
1357
+
1358
+ BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1301
1359
 
1302
1360
  static void BM_ZFlat(int iters, int arg) {
1303
1361
  StopBenchmarkTiming();
@@ -1329,23 +1387,20 @@ static void BM_ZFlat(int iters, int arg) {
1329
1387
  }
1330
1388
  BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1331
1389
 
1332
-
1333
1390
  } // namespace snappy
1334
1391
 
1335
-
1336
1392
  int main(int argc, char** argv) {
1337
1393
  InitGoogle(argv[0], &argc, &argv, true);
1338
1394
  RunSpecifiedBenchmarks();
1339
1395
 
1340
-
1341
1396
  if (argc >= 2) {
1342
1397
  for (int arg = 1; arg < argc; arg++) {
1343
1398
  if (FLAGS_write_compressed) {
1344
- CompressFile(argv[arg]);
1399
+ snappy::CompressFile(argv[arg]);
1345
1400
  } else if (FLAGS_write_uncompressed) {
1346
- UncompressFile(argv[arg]);
1401
+ snappy::UncompressFile(argv[arg]);
1347
1402
  } else {
1348
- MeasureFile(argv[arg]);
1403
+ snappy::MeasureFile(argv[arg]);
1349
1404
  }
1350
1405
  }
1351
1406
  return 0;