snappy 0.0.15 → 0.0.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +23 -1
- data/ext/extconf.rb +1 -9
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/AUTHORS +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/COPYING +1 -1
- data/home/travis/build/miyucy/snappy/vendor/snappy/ChangeLog +2468 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/Makefile.am +3 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/NEWS +20 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/README +10 -6
- data/home/travis/build/miyucy/snappy/vendor/snappy/autogen.sh +12 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/configure.ac +4 -3
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/format_description.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/framing_format.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/m4/gtest.m4 +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-c.cc +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-c.h +3 -3
- data/home/travis/build/miyucy/snappy/vendor/snappy/snappy-internal.h +227 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-sinksource.cc +33 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-sinksource.h +51 -6
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-internal.cc +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-internal.h +44 -7
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-stubs-public.h.in +5 -3
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-test.cc +5 -2
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy-test.h +22 -5
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy.cc +474 -316
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy.h +23 -4
- data/home/travis/build/miyucy/snappy/vendor/snappy/snappy.pc.in +10 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/snappy_unittest.cc +225 -49
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/alice29.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/asyoulik.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata1.snappy +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata2.snappy +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/baddata3.snappy +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/fireworks.jpeg +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/geo.protodata +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/html +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/html_x_4 +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/kppkn.gtb +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/lcet10.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/paper-100k.pdf +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/plrabn12.txt +0 -0
- data/{vendor → home/travis/build/miyucy/snappy/vendor}/snappy/testdata/urls.10K +0 -0
- data/lib/snappy.rb +2 -1
- data/lib/snappy/reader.rb +7 -3
- data/lib/snappy/shim.rb +30 -0
- data/lib/snappy/version.rb +3 -1
- data/lib/snappy/writer.rb +8 -9
- data/smoke.sh +8 -0
- metadata +44 -41
- data/vendor/snappy/ChangeLog +0 -1916
- data/vendor/snappy/autogen.sh +0 -7
- data/vendor/snappy/snappy-internal.h +0 -150
@@ -36,8 +36,8 @@
|
|
36
36
|
// using BMDiff and then compressing the output of BMDiff with
|
37
37
|
// Snappy.
|
38
38
|
|
39
|
-
#ifndef
|
40
|
-
#define
|
39
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
|
40
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_H__
|
41
41
|
|
42
42
|
#include <stddef.h>
|
43
43
|
#include <string>
|
@@ -84,6 +84,18 @@ namespace snappy {
|
|
84
84
|
bool Uncompress(const char* compressed, size_t compressed_length,
|
85
85
|
string* uncompressed);
|
86
86
|
|
87
|
+
// Decompresses "compressed" to "*uncompressed".
|
88
|
+
//
|
89
|
+
// returns false if the message is corrupted and could not be decompressed
|
90
|
+
bool Uncompress(Source* compressed, Sink* uncompressed);
|
91
|
+
|
92
|
+
// This routine uncompresses as much of the "compressed" as possible
|
93
|
+
// into sink. It returns the number of valid bytes added to sink
|
94
|
+
// (extra invalid bytes may have been added due to errors; the caller
|
95
|
+
// should ignore those). The emitted data typically has length
|
96
|
+
// GetUncompressedLength(), but may be shorter if an error is
|
97
|
+
// encountered.
|
98
|
+
size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
|
87
99
|
|
88
100
|
// ------------------------------------------------------------------------
|
89
101
|
// Lower-level character array based routines. May be useful for
|
@@ -164,6 +176,14 @@ namespace snappy {
|
|
164
176
|
bool IsValidCompressedBuffer(const char* compressed,
|
165
177
|
size_t compressed_length);
|
166
178
|
|
179
|
+
// Returns true iff the contents of "compressed" can be uncompressed
|
180
|
+
// successfully. Does not return the uncompressed data. Takes
|
181
|
+
// time proportional to *compressed length, but is usually at least
|
182
|
+
// a factor of four faster than actual decompression.
|
183
|
+
// On success, consumes all of *compressed. On failure, consumes an
|
184
|
+
// unspecified prefix of *compressed.
|
185
|
+
bool IsValidCompressed(Source* compressed);
|
186
|
+
|
167
187
|
// The size of a compression block. Note that many parts of the compression
|
168
188
|
// code assumes that kBlockSize <= 65536; in particular, the hash table
|
169
189
|
// can only store 16-bit offsets, and EmitCopy() also assumes the offset
|
@@ -180,5 +200,4 @@ namespace snappy {
|
|
180
200
|
static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
|
181
201
|
} // end namespace snappy
|
182
202
|
|
183
|
-
|
184
|
-
#endif // UTIL_SNAPPY_SNAPPY_H__
|
203
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
|
@@ -59,12 +59,14 @@ DEFINE_bool(fastlz, false,
|
|
59
59
|
"Run FastLZ compression (http://www.fastlz.org/");
|
60
60
|
DEFINE_bool(snappy, true, "Run snappy compression");
|
61
61
|
|
62
|
-
|
63
62
|
DEFINE_bool(write_compressed, false,
|
64
63
|
"Write compressed versions of each file to <file>.comp");
|
65
64
|
DEFINE_bool(write_uncompressed, false,
|
66
65
|
"Write uncompressed versions of each file to <file>.uncomp");
|
67
66
|
|
67
|
+
DEFINE_bool(snappy_dump_decompression_table, false,
|
68
|
+
"If true, we print the decompression table during tests.");
|
69
|
+
|
68
70
|
namespace snappy {
|
69
71
|
|
70
72
|
|
@@ -161,6 +163,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size,
|
|
161
163
|
|
162
164
|
default:
|
163
165
|
LOG(FATAL) << "Unknown compression type number " << comp;
|
166
|
+
return 0;
|
164
167
|
}
|
165
168
|
}
|
166
169
|
|
@@ -278,7 +281,6 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
|
278
281
|
break;
|
279
282
|
}
|
280
283
|
|
281
|
-
|
282
284
|
default: {
|
283
285
|
return false; // the asked-for library wasn't compiled in
|
284
286
|
}
|
@@ -370,7 +372,6 @@ static bool Uncompress(const string& compressed, CompressorType comp,
|
|
370
372
|
break;
|
371
373
|
}
|
372
374
|
|
373
|
-
|
374
375
|
default: {
|
375
376
|
return false; // the asked-for library wasn't compiled in
|
376
377
|
}
|
@@ -392,10 +393,10 @@ static void Measure(const char* data,
|
|
392
393
|
{
|
393
394
|
// Chop the input into blocks
|
394
395
|
int num_blocks = (length + block_size - 1) / block_size;
|
395
|
-
vector<const char*> input(num_blocks);
|
396
|
-
vector<size_t> input_length(num_blocks);
|
397
|
-
vector<string> compressed(num_blocks);
|
398
|
-
vector<string> output(num_blocks);
|
396
|
+
std::vector<const char*> input(num_blocks);
|
397
|
+
std::vector<size_t> input_length(num_blocks);
|
398
|
+
std::vector<string> compressed(num_blocks);
|
399
|
+
std::vector<string> output(num_blocks);
|
399
400
|
for (int b = 0; b < num_blocks; b++) {
|
400
401
|
int input_start = b * block_size;
|
401
402
|
int input_limit = min<int>((b+1)*block_size, length);
|
@@ -448,7 +449,7 @@ static void Measure(const char* data,
|
|
448
449
|
}
|
449
450
|
|
450
451
|
compressed_size = 0;
|
451
|
-
for (
|
452
|
+
for (size_t i = 0; i < compressed.size(); i++) {
|
452
453
|
compressed_size += compressed[i].size();
|
453
454
|
}
|
454
455
|
}
|
@@ -474,7 +475,6 @@ static void Measure(const char* data,
|
|
474
475
|
urate.c_str());
|
475
476
|
}
|
476
477
|
|
477
|
-
|
478
478
|
static int VerifyString(const string& input) {
|
479
479
|
string compressed;
|
480
480
|
DataEndingAtUnreadablePage i(input);
|
@@ -491,6 +491,23 @@ static int VerifyString(const string& input) {
|
|
491
491
|
return uncompressed.size();
|
492
492
|
}
|
493
493
|
|
494
|
+
static void VerifyStringSink(const string& input) {
|
495
|
+
string compressed;
|
496
|
+
DataEndingAtUnreadablePage i(input);
|
497
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
498
|
+
CHECK_EQ(written, compressed.size());
|
499
|
+
CHECK_LE(compressed.size(),
|
500
|
+
snappy::MaxCompressedLength(input.size()));
|
501
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
502
|
+
|
503
|
+
string uncompressed;
|
504
|
+
uncompressed.resize(input.size());
|
505
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
506
|
+
DataEndingAtUnreadablePage c(compressed);
|
507
|
+
snappy::ByteArraySource source(c.data(), c.size());
|
508
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
509
|
+
CHECK_EQ(uncompressed, input);
|
510
|
+
}
|
494
511
|
|
495
512
|
static void VerifyIOVec(const string& input) {
|
496
513
|
string compressed;
|
@@ -505,13 +522,13 @@ static void VerifyIOVec(const string& input) {
|
|
505
522
|
// ranging from 1 to 10.
|
506
523
|
char* buf = new char[input.size()];
|
507
524
|
ACMRandom rnd(input.size());
|
508
|
-
|
525
|
+
size_t num = rnd.Next() % 10 + 1;
|
509
526
|
if (input.size() < num) {
|
510
527
|
num = input.size();
|
511
528
|
}
|
512
529
|
struct iovec* iov = new iovec[num];
|
513
530
|
int used_so_far = 0;
|
514
|
-
for (
|
531
|
+
for (size_t i = 0; i < num; ++i) {
|
515
532
|
iov[i].iov_base = buf + used_so_far;
|
516
533
|
if (i == num - 1) {
|
517
534
|
iov[i].iov_len = input.size() - used_so_far;
|
@@ -562,6 +579,28 @@ static void VerifyNonBlockedCompression(const string& input) {
|
|
562
579
|
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
563
580
|
CHECK_EQ(uncomp_str, input);
|
564
581
|
|
582
|
+
// Uncompress using source/sink
|
583
|
+
string uncomp_str2;
|
584
|
+
uncomp_str2.resize(input.size());
|
585
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
586
|
+
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
587
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
588
|
+
CHECK_EQ(uncomp_str2, input);
|
589
|
+
|
590
|
+
// Uncompress into iovec
|
591
|
+
{
|
592
|
+
static const int kNumBlocks = 10;
|
593
|
+
struct iovec vec[kNumBlocks];
|
594
|
+
const int block_size = 1 + input.size() / kNumBlocks;
|
595
|
+
string iovec_data(block_size * kNumBlocks, 'x');
|
596
|
+
for (int i = 0; i < kNumBlocks; i++) {
|
597
|
+
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
598
|
+
vec[i].iov_len = block_size;
|
599
|
+
}
|
600
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
601
|
+
vec, kNumBlocks));
|
602
|
+
CHECK_EQ(string(iovec_data.data(), input.size()), input);
|
603
|
+
}
|
565
604
|
}
|
566
605
|
|
567
606
|
// Expand the input so that it is at least K times as big as block size
|
@@ -580,6 +619,8 @@ static int Verify(const string& input) {
|
|
580
619
|
// Compress using string based routines
|
581
620
|
const int result = VerifyString(input);
|
582
621
|
|
622
|
+
// Verify using sink based routines
|
623
|
+
VerifyStringSink(input);
|
583
624
|
|
584
625
|
VerifyNonBlockedCompression(input);
|
585
626
|
VerifyIOVec(input);
|
@@ -589,12 +630,9 @@ static int Verify(const string& input) {
|
|
589
630
|
VerifyIOVec(input);
|
590
631
|
}
|
591
632
|
|
592
|
-
|
593
633
|
return result;
|
594
634
|
}
|
595
635
|
|
596
|
-
// This test checks to ensure that snappy doesn't coredump if it gets
|
597
|
-
// corrupted data.
|
598
636
|
|
599
637
|
static bool IsValidCompressedBuffer(const string& c) {
|
600
638
|
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
@@ -603,11 +641,13 @@ static bool Uncompress(const string& c, string* u) {
|
|
603
641
|
return snappy::Uncompress(c.data(), c.size(), u);
|
604
642
|
}
|
605
643
|
|
606
|
-
|
644
|
+
// This test checks to ensure that snappy doesn't coredump if it gets
|
645
|
+
// corrupted data.
|
646
|
+
TEST(CorruptedTest, VerifyCorrupted) {
|
607
647
|
string source = "making sure we don't crash with corrupted input";
|
608
648
|
VLOG(1) << source;
|
609
649
|
string dest;
|
610
|
-
|
650
|
+
string uncmp;
|
611
651
|
snappy::Compress(source.data(), source.size(), &dest);
|
612
652
|
|
613
653
|
// Mess around with the data. It's hard to simulate all possible
|
@@ -616,19 +656,19 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
616
656
|
dest[1]--;
|
617
657
|
dest[3]++;
|
618
658
|
// this really ought to fail.
|
619
|
-
CHECK(!IsValidCompressedBuffer(
|
620
|
-
CHECK(!Uncompress(
|
659
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
660
|
+
CHECK(!Uncompress(dest, &uncmp));
|
621
661
|
|
622
662
|
// This is testing for a security bug - a buffer that decompresses to 100k
|
623
663
|
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
624
664
|
source.resize(100000);
|
625
|
-
for (
|
665
|
+
for (size_t i = 0; i < source.length(); ++i) {
|
626
666
|
source[i] = 'A';
|
627
667
|
}
|
628
668
|
snappy::Compress(source.data(), source.size(), &dest);
|
629
669
|
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
630
|
-
CHECK(!IsValidCompressedBuffer(
|
631
|
-
CHECK(!Uncompress(
|
670
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
671
|
+
CHECK(!Uncompress(dest, &uncmp));
|
632
672
|
|
633
673
|
if (sizeof(void *) == 4) {
|
634
674
|
// Another security check; check a crazy big length can't DoS us with an
|
@@ -637,20 +677,20 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
637
677
|
// where 3 GB might be an acceptable allocation size, Uncompress()
|
638
678
|
// attempts to decompress, and sometimes causes the test to run out of
|
639
679
|
// memory.
|
640
|
-
dest[0] = dest[1] = dest[2] = dest[3] =
|
680
|
+
dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
|
641
681
|
// This decodes to a really large size, i.e., about 3 GB.
|
642
682
|
dest[4] = 'k';
|
643
|
-
CHECK(!IsValidCompressedBuffer(
|
644
|
-
CHECK(!Uncompress(
|
683
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
684
|
+
CHECK(!Uncompress(dest, &uncmp));
|
645
685
|
} else {
|
646
686
|
LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
|
647
687
|
}
|
648
688
|
|
649
689
|
// This decodes to about 2 MB; much smaller, but should still fail.
|
650
|
-
dest[0] = dest[1] = dest[2] =
|
690
|
+
dest[0] = dest[1] = dest[2] = '\xff';
|
651
691
|
dest[3] = 0x00;
|
652
|
-
CHECK(!IsValidCompressedBuffer(
|
653
|
-
CHECK(!Uncompress(
|
692
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
693
|
+
CHECK(!Uncompress(dest, &uncmp));
|
654
694
|
|
655
695
|
// try reading stuff in from a bad file.
|
656
696
|
for (int i = 1; i <= 3; ++i) {
|
@@ -665,8 +705,8 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
|
|
665
705
|
snappy::ByteArraySource source(data.data(), data.size());
|
666
706
|
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
667
707
|
(ulen2 < (1<<20)));
|
668
|
-
CHECK(!IsValidCompressedBuffer(
|
669
|
-
CHECK(!Uncompress(
|
708
|
+
CHECK(!IsValidCompressedBuffer(data));
|
709
|
+
CHECK(!Uncompress(data, &uncmp));
|
670
710
|
}
|
671
711
|
}
|
672
712
|
|
@@ -764,7 +804,7 @@ TEST(Snappy, RandomData) {
|
|
764
804
|
}
|
765
805
|
|
766
806
|
string x;
|
767
|
-
|
807
|
+
size_t len = rnd.Uniform(4096);
|
768
808
|
if (i < 100) {
|
769
809
|
len = 65536 + rnd.Uniform(65536);
|
770
810
|
}
|
@@ -929,7 +969,6 @@ TEST(Snappy, IOVecCopyOverflow) {
|
|
929
969
|
}
|
930
970
|
}
|
931
971
|
|
932
|
-
|
933
972
|
static bool CheckUncompressedLength(const string& compressed,
|
934
973
|
size_t* ulength) {
|
935
974
|
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
@@ -956,11 +995,11 @@ TEST(SnappyCorruption, TruncatedVarint) {
|
|
956
995
|
TEST(SnappyCorruption, UnterminatedVarint) {
|
957
996
|
string compressed, uncompressed;
|
958
997
|
size_t ulength;
|
959
|
-
compressed.push_back(
|
960
|
-
compressed.push_back(
|
961
|
-
compressed.push_back(
|
962
|
-
compressed.push_back(
|
963
|
-
compressed.push_back(
|
998
|
+
compressed.push_back('\x80');
|
999
|
+
compressed.push_back('\x80');
|
1000
|
+
compressed.push_back('\x80');
|
1001
|
+
compressed.push_back('\x80');
|
1002
|
+
compressed.push_back('\x80');
|
964
1003
|
compressed.push_back(10);
|
965
1004
|
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
966
1005
|
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
@@ -968,6 +1007,20 @@ TEST(SnappyCorruption, UnterminatedVarint) {
|
|
968
1007
|
&uncompressed));
|
969
1008
|
}
|
970
1009
|
|
1010
|
+
TEST(SnappyCorruption, OverflowingVarint) {
|
1011
|
+
string compressed, uncompressed;
|
1012
|
+
size_t ulength;
|
1013
|
+
compressed.push_back('\xfb');
|
1014
|
+
compressed.push_back('\xff');
|
1015
|
+
compressed.push_back('\xff');
|
1016
|
+
compressed.push_back('\xff');
|
1017
|
+
compressed.push_back('\x7f');
|
1018
|
+
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
1019
|
+
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
1020
|
+
CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
|
1021
|
+
&uncompressed));
|
1022
|
+
}
|
1023
|
+
|
971
1024
|
TEST(Snappy, ReadPastEndOfBuffer) {
|
972
1025
|
// Check that we do not read past end of input
|
973
1026
|
|
@@ -998,11 +1051,13 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
|
|
998
1051
|
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
999
1052
|
}
|
1000
1053
|
|
1001
|
-
|
1002
1054
|
namespace {
|
1003
1055
|
|
1004
1056
|
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
1005
|
-
|
1057
|
+
std::pair<size_t, bool> p =
|
1058
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length);
|
1059
|
+
CHECK_EQ(p.first < 8, p.second);
|
1060
|
+
return p.first;
|
1006
1061
|
}
|
1007
1062
|
|
1008
1063
|
} // namespace
|
@@ -1112,8 +1167,7 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
1112
1167
|
}
|
1113
1168
|
DataEndingAtUnreadablePage u(s);
|
1114
1169
|
DataEndingAtUnreadablePage v(t);
|
1115
|
-
int matched =
|
1116
|
-
u.data(), v.data(), v.data() + t.size());
|
1170
|
+
int matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1117
1171
|
if (matched == t.size()) {
|
1118
1172
|
EXPECT_EQ(s, t);
|
1119
1173
|
} else {
|
@@ -1125,21 +1179,114 @@ TEST(Snappy, FindMatchLengthRandom) {
|
|
1125
1179
|
}
|
1126
1180
|
}
|
1127
1181
|
|
1182
|
+
static uint16 MakeEntry(unsigned int extra,
|
1183
|
+
unsigned int len,
|
1184
|
+
unsigned int copy_offset) {
|
1185
|
+
// Check that all of the fields fit within the allocated space
|
1186
|
+
assert(extra == (extra & 0x7)); // At most 3 bits
|
1187
|
+
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
1188
|
+
assert(len == (len & 0x7f)); // At most 7 bits
|
1189
|
+
return len | (copy_offset << 8) | (extra << 11);
|
1190
|
+
}
|
1191
|
+
|
1192
|
+
// Check that the decompression table is correct, and optionally print out
|
1193
|
+
// the computed one.
|
1194
|
+
TEST(Snappy, VerifyCharTable) {
|
1195
|
+
using snappy::internal::LITERAL;
|
1196
|
+
using snappy::internal::COPY_1_BYTE_OFFSET;
|
1197
|
+
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1198
|
+
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1199
|
+
using snappy::internal::char_table;
|
1200
|
+
using snappy::internal::wordmask;
|
1201
|
+
|
1202
|
+
uint16 dst[256];
|
1203
|
+
|
1204
|
+
// Place invalid entries in all places to detect missing initialization
|
1205
|
+
int assigned = 0;
|
1206
|
+
for (int i = 0; i < 256; i++) {
|
1207
|
+
dst[i] = 0xffff;
|
1208
|
+
}
|
1209
|
+
|
1210
|
+
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1211
|
+
for (unsigned int len = 1; len <= 60; len++) {
|
1212
|
+
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
1213
|
+
assigned++;
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1217
|
+
// encode the number of bytes of length info that follow the opcode.
|
1218
|
+
for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
|
1219
|
+
// We set the length field in the lookup table to 1 because extra
|
1220
|
+
// bytes encode len-1.
|
1221
|
+
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1222
|
+
assigned++;
|
1223
|
+
}
|
1224
|
+
|
1225
|
+
// COPY_1_BYTE_OFFSET.
|
1226
|
+
//
|
1227
|
+
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1228
|
+
// offset/256 in 5 bits. offset%256 is stored in the next byte.
|
1229
|
+
//
|
1230
|
+
// This format is used for length in range [4..11] and offset in
|
1231
|
+
// range [0..2047]
|
1232
|
+
for (unsigned int len = 4; len < 12; len++) {
|
1233
|
+
for (unsigned int offset = 0; offset < 2048; offset += 256) {
|
1234
|
+
dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
|
1235
|
+
MakeEntry(1, len, offset>>8);
|
1236
|
+
assigned++;
|
1237
|
+
}
|
1238
|
+
}
|
1239
|
+
|
1240
|
+
// COPY_2_BYTE_OFFSET.
|
1241
|
+
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1242
|
+
for (unsigned int len = 1; len <= 64; len++) {
|
1243
|
+
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
1244
|
+
assigned++;
|
1245
|
+
}
|
1246
|
+
|
1247
|
+
// COPY_4_BYTE_OFFSET.
|
1248
|
+
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1249
|
+
for (unsigned int len = 1; len <= 64; len++) {
|
1250
|
+
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
1251
|
+
assigned++;
|
1252
|
+
}
|
1253
|
+
|
1254
|
+
// Check that each entry was initialized exactly once.
|
1255
|
+
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1256
|
+
for (int i = 0; i < 256; i++) {
|
1257
|
+
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
if (FLAGS_snappy_dump_decompression_table) {
|
1261
|
+
printf("static const uint16 char_table[256] = {\n ");
|
1262
|
+
for (int i = 0; i < 256; i++) {
|
1263
|
+
printf("0x%04x%s",
|
1264
|
+
dst[i],
|
1265
|
+
((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
|
1266
|
+
}
|
1267
|
+
printf("};\n");
|
1268
|
+
}
|
1269
|
+
|
1270
|
+
// Check that computed table matched recorded table.
|
1271
|
+
for (int i = 0; i < 256; i++) {
|
1272
|
+
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1273
|
+
}
|
1274
|
+
}
|
1128
1275
|
|
1129
1276
|
static void CompressFile(const char* fname) {
|
1130
1277
|
string fullinput;
|
1131
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1278
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1132
1279
|
|
1133
1280
|
string compressed;
|
1134
1281
|
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1135
1282
|
|
1136
|
-
file::SetContents(string(fname).append(".comp"), compressed,
|
1137
|
-
|
1283
|
+
CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
|
1284
|
+
file::Defaults()));
|
1138
1285
|
}
|
1139
1286
|
|
1140
1287
|
static void UncompressFile(const char* fname) {
|
1141
1288
|
string fullinput;
|
1142
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1289
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1143
1290
|
|
1144
1291
|
size_t uncompLength;
|
1145
1292
|
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
@@ -1148,13 +1295,13 @@ static void UncompressFile(const char* fname) {
|
|
1148
1295
|
uncompressed.resize(uncompLength);
|
1149
1296
|
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1150
1297
|
|
1151
|
-
file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1152
|
-
|
1298
|
+
CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
|
1299
|
+
file::Defaults()));
|
1153
1300
|
}
|
1154
1301
|
|
1155
1302
|
static void MeasureFile(const char* fname) {
|
1156
1303
|
string fullinput;
|
1157
|
-
file::GetContents(fname, &fullinput, file::Defaults())
|
1304
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1158
1305
|
printf("%-40s :\n", fname);
|
1159
1306
|
|
1160
1307
|
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
@@ -1298,6 +1445,37 @@ static void BM_UIOVec(int iters, int arg) {
|
|
1298
1445
|
}
|
1299
1446
|
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1300
1447
|
|
1448
|
+
static void BM_UFlatSink(int iters, int arg) {
|
1449
|
+
StopBenchmarkTiming();
|
1450
|
+
|
1451
|
+
// Pick file to process based on "arg"
|
1452
|
+
CHECK_GE(arg, 0);
|
1453
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1454
|
+
string contents = ReadTestDataFile(files[arg].filename,
|
1455
|
+
files[arg].size_limit);
|
1456
|
+
|
1457
|
+
string zcontents;
|
1458
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1459
|
+
char* dst = new char[contents.size()];
|
1460
|
+
|
1461
|
+
SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
|
1462
|
+
static_cast<int64>(contents.size()));
|
1463
|
+
SetBenchmarkLabel(files[arg].label);
|
1464
|
+
StartBenchmarkTiming();
|
1465
|
+
while (iters-- > 0) {
|
1466
|
+
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1467
|
+
snappy::UncheckedByteArraySink sink(dst);
|
1468
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
1469
|
+
}
|
1470
|
+
StopBenchmarkTiming();
|
1471
|
+
|
1472
|
+
string s(dst, contents.size());
|
1473
|
+
CHECK_EQ(contents, s);
|
1474
|
+
|
1475
|
+
delete[] dst;
|
1476
|
+
}
|
1477
|
+
|
1478
|
+
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1301
1479
|
|
1302
1480
|
static void BM_ZFlat(int iters, int arg) {
|
1303
1481
|
StopBenchmarkTiming();
|
@@ -1329,7 +1507,6 @@ static void BM_ZFlat(int iters, int arg) {
|
|
1329
1507
|
}
|
1330
1508
|
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1331
1509
|
|
1332
|
-
|
1333
1510
|
} // namespace snappy
|
1334
1511
|
|
1335
1512
|
|
@@ -1337,7 +1514,6 @@ int main(int argc, char** argv) {
|
|
1337
1514
|
InitGoogle(argv[0], &argc, &argv, true);
|
1338
1515
|
RunSpecifiedBenchmarks();
|
1339
1516
|
|
1340
|
-
|
1341
1517
|
if (argc >= 2) {
|
1342
1518
|
for (int arg = 1; arg < argc; arg++) {
|
1343
1519
|
if (FLAGS_write_compressed) {
|