couchbase 3.0.0.alpha.1-universal-darwin-19 → 3.0.0.alpha.2-universal-darwin-19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/tests-6.0.3.yml +49 -0
- data/.github/workflows/tests.yml +47 -0
- data/.gitmodules +3 -0
- data/.idea/dictionaries/gem_terms.xml +5 -0
- data/.idea/inspectionProfiles/Project_Default.xml +1 -0
- data/.idea/vcs.xml +1 -0
- data/Gemfile +1 -0
- data/README.md +55 -2
- data/Rakefile +18 -0
- data/bin/init-cluster +62 -0
- data/bin/setup +1 -0
- data/couchbase.gemspec +3 -2
- data/examples/crud.rb +1 -2
- data/examples/managing_buckets.rb +47 -0
- data/examples/managing_collections.rb +58 -0
- data/examples/managing_query_indexes.rb +63 -0
- data/examples/query.rb +3 -2
- data/examples/query_with_consistency.rb +76 -0
- data/examples/subdocument.rb +23 -1
- data/ext/.clang-format +1 -1
- data/ext/.idea/dictionaries/couchbase_terms.xml +2 -0
- data/ext/.idea/vcs.xml +1 -0
- data/ext/CMakeLists.txt +30 -12
- data/ext/build_version.hxx.in +26 -0
- data/ext/couchbase/bucket.hxx +69 -8
- data/ext/couchbase/cluster.hxx +70 -54
- data/ext/couchbase/collections_manifest.hxx +3 -3
- data/ext/couchbase/configuration.hxx +14 -0
- data/ext/couchbase/couchbase.cxx +2044 -383
- data/ext/couchbase/{operations/document_id.hxx → document_id.hxx} +5 -4
- data/ext/couchbase/io/http_message.hxx +5 -1
- data/ext/couchbase/io/http_parser.hxx +2 -1
- data/ext/couchbase/io/http_session.hxx +6 -3
- data/ext/couchbase/io/{binary_message.hxx → mcbp_message.hxx} +15 -12
- data/ext/couchbase/io/mcbp_parser.hxx +99 -0
- data/ext/couchbase/io/{key_value_session.hxx → mcbp_session.hxx} +200 -95
- data/ext/couchbase/io/session_manager.hxx +37 -22
- data/ext/couchbase/mutation_token.hxx +2 -1
- data/ext/couchbase/operations.hxx +38 -8
- data/ext/couchbase/operations/bucket_create.hxx +138 -0
- data/ext/couchbase/operations/bucket_drop.hxx +65 -0
- data/ext/couchbase/operations/bucket_flush.hxx +65 -0
- data/ext/couchbase/operations/bucket_get.hxx +69 -0
- data/ext/couchbase/operations/bucket_get_all.hxx +62 -0
- data/ext/couchbase/operations/bucket_settings.hxx +111 -0
- data/ext/couchbase/operations/bucket_update.hxx +115 -0
- data/ext/couchbase/operations/cluster_developer_preview_enable.hxx +60 -0
- data/ext/couchbase/operations/collection_create.hxx +86 -0
- data/ext/couchbase/operations/collection_drop.hxx +82 -0
- data/ext/couchbase/operations/command.hxx +10 -10
- data/ext/couchbase/operations/document_decrement.hxx +80 -0
- data/ext/couchbase/operations/document_exists.hxx +80 -0
- data/ext/couchbase/operations/{get.hxx → document_get.hxx} +4 -2
- data/ext/couchbase/operations/document_get_and_lock.hxx +64 -0
- data/ext/couchbase/operations/document_get_and_touch.hxx +64 -0
- data/ext/couchbase/operations/document_increment.hxx +80 -0
- data/ext/couchbase/operations/document_insert.hxx +74 -0
- data/ext/couchbase/operations/{lookup_in.hxx → document_lookup_in.hxx} +2 -2
- data/ext/couchbase/operations/{mutate_in.hxx → document_mutate_in.hxx} +11 -2
- data/ext/couchbase/operations/{query.hxx → document_query.hxx} +101 -6
- data/ext/couchbase/operations/document_remove.hxx +67 -0
- data/ext/couchbase/operations/document_replace.hxx +76 -0
- data/ext/couchbase/operations/{upsert.hxx → document_touch.hxx} +14 -14
- data/ext/couchbase/operations/{remove.hxx → document_unlock.hxx} +12 -10
- data/ext/couchbase/operations/document_upsert.hxx +74 -0
- data/ext/couchbase/operations/query_index_build_deferred.hxx +85 -0
- data/ext/couchbase/operations/query_index_create.hxx +134 -0
- data/ext/couchbase/operations/query_index_drop.hxx +108 -0
- data/ext/couchbase/operations/query_index_get_all.hxx +106 -0
- data/ext/couchbase/operations/scope_create.hxx +81 -0
- data/ext/couchbase/operations/scope_drop.hxx +79 -0
- data/ext/couchbase/operations/scope_get_all.hxx +72 -0
- data/ext/couchbase/protocol/client_opcode.hxx +35 -0
- data/ext/couchbase/protocol/client_request.hxx +56 -9
- data/ext/couchbase/protocol/client_response.hxx +52 -15
- data/ext/couchbase/protocol/cmd_cluster_map_change_notification.hxx +81 -0
- data/ext/couchbase/protocol/cmd_decrement.hxx +187 -0
- data/ext/couchbase/protocol/cmd_exists.hxx +171 -0
- data/ext/couchbase/protocol/cmd_get.hxx +31 -8
- data/ext/couchbase/protocol/cmd_get_and_lock.hxx +142 -0
- data/ext/couchbase/protocol/cmd_get_and_touch.hxx +142 -0
- data/ext/couchbase/protocol/cmd_get_cluster_config.hxx +16 -3
- data/ext/couchbase/protocol/cmd_get_collections_manifest.hxx +16 -3
- data/ext/couchbase/protocol/cmd_get_error_map.hxx +16 -3
- data/ext/couchbase/protocol/cmd_hello.hxx +24 -8
- data/ext/couchbase/protocol/cmd_increment.hxx +187 -0
- data/ext/couchbase/protocol/cmd_info.hxx +1 -0
- data/ext/couchbase/protocol/cmd_insert.hxx +172 -0
- data/ext/couchbase/protocol/cmd_lookup_in.hxx +28 -13
- data/ext/couchbase/protocol/cmd_mutate_in.hxx +65 -13
- data/ext/couchbase/protocol/cmd_remove.hxx +59 -4
- data/ext/couchbase/protocol/cmd_replace.hxx +172 -0
- data/ext/couchbase/protocol/cmd_sasl_auth.hxx +15 -3
- data/ext/couchbase/protocol/cmd_sasl_list_mechs.hxx +15 -3
- data/ext/couchbase/protocol/cmd_sasl_step.hxx +15 -3
- data/ext/couchbase/protocol/cmd_select_bucket.hxx +14 -2
- data/ext/couchbase/protocol/cmd_touch.hxx +102 -0
- data/ext/couchbase/protocol/cmd_unlock.hxx +95 -0
- data/ext/couchbase/protocol/cmd_upsert.hxx +50 -14
- data/ext/couchbase/protocol/durability_level.hxx +67 -0
- data/ext/couchbase/protocol/frame_info_id.hxx +187 -0
- data/ext/couchbase/protocol/hello_feature.hxx +137 -0
- data/ext/couchbase/protocol/server_opcode.hxx +57 -0
- data/ext/couchbase/protocol/server_request.hxx +122 -0
- data/ext/couchbase/protocol/unsigned_leb128.h +15 -15
- data/ext/couchbase/utils/byteswap.hxx +1 -2
- data/ext/couchbase/utils/url_codec.hxx +225 -0
- data/ext/couchbase/version.hxx +3 -1
- data/ext/extconf.rb +4 -1
- data/ext/test/main.cxx +37 -113
- data/ext/third_party/snappy/.appveyor.yml +36 -0
- data/ext/third_party/snappy/.gitignore +8 -0
- data/ext/third_party/snappy/.travis.yml +98 -0
- data/ext/third_party/snappy/AUTHORS +1 -0
- data/ext/third_party/snappy/CMakeLists.txt +345 -0
- data/ext/third_party/snappy/CONTRIBUTING.md +26 -0
- data/ext/third_party/snappy/COPYING +54 -0
- data/ext/third_party/snappy/NEWS +188 -0
- data/ext/third_party/snappy/README.md +148 -0
- data/ext/third_party/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/ext/third_party/snappy/cmake/config.h.in +59 -0
- data/ext/third_party/snappy/docs/README.md +72 -0
- data/ext/third_party/snappy/format_description.txt +110 -0
- data/ext/third_party/snappy/framing_format.txt +135 -0
- data/ext/third_party/snappy/snappy-c.cc +90 -0
- data/ext/third_party/snappy/snappy-c.h +138 -0
- data/ext/third_party/snappy/snappy-internal.h +315 -0
- data/ext/third_party/snappy/snappy-sinksource.cc +121 -0
- data/ext/third_party/snappy/snappy-sinksource.h +182 -0
- data/ext/third_party/snappy/snappy-stubs-internal.cc +42 -0
- data/ext/third_party/snappy/snappy-stubs-internal.h +493 -0
- data/ext/third_party/snappy/snappy-stubs-public.h.in +63 -0
- data/ext/third_party/snappy/snappy-test.cc +613 -0
- data/ext/third_party/snappy/snappy-test.h +526 -0
- data/ext/third_party/snappy/snappy.cc +1770 -0
- data/ext/third_party/snappy/snappy.h +209 -0
- data/ext/third_party/snappy/snappy_compress_fuzzer.cc +60 -0
- data/ext/third_party/snappy/snappy_uncompress_fuzzer.cc +58 -0
- data/ext/third_party/snappy/snappy_unittest.cc +1512 -0
- data/ext/third_party/snappy/testdata/alice29.txt +3609 -0
- data/ext/third_party/snappy/testdata/asyoulik.txt +4122 -0
- data/ext/third_party/snappy/testdata/baddata1.snappy +0 -0
- data/ext/third_party/snappy/testdata/baddata2.snappy +0 -0
- data/ext/third_party/snappy/testdata/baddata3.snappy +0 -0
- data/ext/third_party/snappy/testdata/fireworks.jpeg +0 -0
- data/ext/third_party/snappy/testdata/geo.protodata +0 -0
- data/ext/third_party/snappy/testdata/html +1 -0
- data/ext/third_party/snappy/testdata/html_x_4 +1 -0
- data/ext/third_party/snappy/testdata/kppkn.gtb +0 -0
- data/ext/third_party/snappy/testdata/lcet10.txt +7519 -0
- data/ext/third_party/snappy/testdata/paper-100k.pdf +600 -2
- data/ext/third_party/snappy/testdata/plrabn12.txt +10699 -0
- data/ext/third_party/snappy/testdata/urls.10K +10000 -0
- data/lib/couchbase/binary_collection.rb +33 -76
- data/lib/couchbase/binary_collection_options.rb +94 -0
- data/lib/couchbase/bucket.rb +9 -3
- data/lib/couchbase/cluster.rb +161 -23
- data/lib/couchbase/collection.rb +108 -191
- data/lib/couchbase/collection_options.rb +430 -0
- data/lib/couchbase/errors.rb +136 -134
- data/lib/couchbase/json_transcoder.rb +32 -0
- data/lib/couchbase/management/analytics_index_manager.rb +185 -9
- data/lib/couchbase/management/bucket_manager.rb +84 -33
- data/lib/couchbase/management/collection_manager.rb +166 -1
- data/lib/couchbase/management/query_index_manager.rb +261 -0
- data/lib/couchbase/management/search_index_manager.rb +291 -0
- data/lib/couchbase/management/user_manager.rb +12 -10
- data/lib/couchbase/management/view_index_manager.rb +151 -1
- data/lib/couchbase/mutation_state.rb +11 -1
- data/lib/couchbase/scope.rb +4 -4
- data/lib/couchbase/version.rb +1 -1
- metadata +113 -18
- data/.travis.yml +0 -7
- data/ext/couchbase/io/binary_parser.hxx +0 -64
- data/lib/couchbase/results.rb +0 -307
@@ -0,0 +1,209 @@
|
|
1
|
+
// Copyright 2005 and onwards Google Inc.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// A light-weight compression algorithm. It is designed for speed of
|
30
|
+
// compression and decompression, rather than for the utmost in space
|
31
|
+
// savings.
|
32
|
+
//
|
33
|
+
// For getting better compression ratios when you are compressing data
|
34
|
+
// with long repeated sequences or compressing data that is similar to
|
35
|
+
// other data, while still compressing fast, you might look at first
|
36
|
+
// using BMDiff and then compressing the output of BMDiff with
|
37
|
+
// Snappy.
|
38
|
+
|
39
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
|
40
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_H__
|
41
|
+
|
42
|
+
#include <stddef.h>
|
43
|
+
#include <stdint.h>
|
44
|
+
|
45
|
+
#include <string>
|
46
|
+
|
47
|
+
#include "snappy-stubs-public.h"
|
48
|
+
|
49
|
+
namespace snappy {
|
50
|
+
class Source;
|
51
|
+
class Sink;
|
52
|
+
|
53
|
+
// ------------------------------------------------------------------------
|
54
|
+
// Generic compression/decompression routines.
|
55
|
+
// ------------------------------------------------------------------------
|
56
|
+
|
57
|
+
// Compress the bytes read from "*source" and append to "*sink". Return the
|
58
|
+
// number of bytes written.
|
59
|
+
size_t Compress(Source* source, Sink* sink);
|
60
|
+
|
61
|
+
// Find the uncompressed length of the given stream, as given by the header.
|
62
|
+
// Note that the true length could deviate from this; the stream could e.g.
|
63
|
+
// be truncated.
|
64
|
+
//
|
65
|
+
// Also note that this leaves "*source" in a state that is unsuitable for
|
66
|
+
// further operations, such as RawUncompress(). You will need to rewind
|
67
|
+
// or recreate the source yourself before attempting any further calls.
|
68
|
+
bool GetUncompressedLength(Source* source, uint32_t* result);
|
69
|
+
|
70
|
+
// ------------------------------------------------------------------------
|
71
|
+
// Higher-level string based routines (should be sufficient for most users)
|
72
|
+
// ------------------------------------------------------------------------
|
73
|
+
|
74
|
+
// Sets "*compressed" to the compressed version of "input[0,input_length-1]".
|
75
|
+
// Original contents of *compressed are lost.
|
76
|
+
//
|
77
|
+
// REQUIRES: "input[]" is not an alias of "*compressed".
|
78
|
+
size_t Compress(const char* input, size_t input_length,
|
79
|
+
std::string* compressed);
|
80
|
+
|
81
|
+
// Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
|
82
|
+
// Original contents of "*uncompressed" are lost.
|
83
|
+
//
|
84
|
+
// REQUIRES: "compressed[]" is not an alias of "*uncompressed".
|
85
|
+
//
|
86
|
+
// returns false if the message is corrupted and could not be decompressed
|
87
|
+
bool Uncompress(const char* compressed, size_t compressed_length,
|
88
|
+
std::string* uncompressed);
|
89
|
+
|
90
|
+
// Decompresses "compressed" to "*uncompressed".
|
91
|
+
//
|
92
|
+
// returns false if the message is corrupted and could not be decompressed
|
93
|
+
bool Uncompress(Source* compressed, Sink* uncompressed);
|
94
|
+
|
95
|
+
// This routine uncompresses as much of the "compressed" as possible
|
96
|
+
// into sink. It returns the number of valid bytes added to sink
|
97
|
+
// (extra invalid bytes may have been added due to errors; the caller
|
98
|
+
// should ignore those). The emitted data typically has length
|
99
|
+
// GetUncompressedLength(), but may be shorter if an error is
|
100
|
+
// encountered.
|
101
|
+
size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
|
102
|
+
|
103
|
+
// ------------------------------------------------------------------------
|
104
|
+
// Lower-level character array based routines. May be useful for
|
105
|
+
// efficiency reasons in certain circumstances.
|
106
|
+
// ------------------------------------------------------------------------
|
107
|
+
|
108
|
+
// REQUIRES: "compressed" must point to an area of memory that is at
|
109
|
+
// least "MaxCompressedLength(input_length)" bytes in length.
|
110
|
+
//
|
111
|
+
// Takes the data stored in "input[0..input_length]" and stores
|
112
|
+
// it in the array pointed to by "compressed".
|
113
|
+
//
|
114
|
+
// "*compressed_length" is set to the length of the compressed output.
|
115
|
+
//
|
116
|
+
// Example:
|
117
|
+
// char* output = new char[snappy::MaxCompressedLength(input_length)];
|
118
|
+
// size_t output_length;
|
119
|
+
// RawCompress(input, input_length, output, &output_length);
|
120
|
+
// ... Process(output, output_length) ...
|
121
|
+
// delete [] output;
|
122
|
+
void RawCompress(const char* input,
|
123
|
+
size_t input_length,
|
124
|
+
char* compressed,
|
125
|
+
size_t* compressed_length);
|
126
|
+
|
127
|
+
// Given data in "compressed[0..compressed_length-1]" generated by
|
128
|
+
// calling the Snappy::Compress routine, this routine
|
129
|
+
// stores the uncompressed data to
|
130
|
+
// uncompressed[0..GetUncompressedLength(compressed)-1]
|
131
|
+
// returns false if the message is corrupted and could not be decrypted
|
132
|
+
bool RawUncompress(const char* compressed, size_t compressed_length,
|
133
|
+
char* uncompressed);
|
134
|
+
|
135
|
+
// Given data from the byte source 'compressed' generated by calling
|
136
|
+
// the Snappy::Compress routine, this routine stores the uncompressed
|
137
|
+
// data to
|
138
|
+
// uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
|
139
|
+
// returns false if the message is corrupted and could not be decrypted
|
140
|
+
bool RawUncompress(Source* compressed, char* uncompressed);
|
141
|
+
|
142
|
+
// Given data in "compressed[0..compressed_length-1]" generated by
|
143
|
+
// calling the Snappy::Compress routine, this routine
|
144
|
+
// stores the uncompressed data to the iovec "iov". The number of physical
|
145
|
+
// buffers in "iov" is given by iov_cnt and their cumulative size
|
146
|
+
// must be at least GetUncompressedLength(compressed). The individual buffers
|
147
|
+
// in "iov" must not overlap with each other.
|
148
|
+
//
|
149
|
+
// returns false if the message is corrupted and could not be decrypted
|
150
|
+
bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
|
151
|
+
const struct iovec* iov, size_t iov_cnt);
|
152
|
+
|
153
|
+
// Given data from the byte source 'compressed' generated by calling
|
154
|
+
// the Snappy::Compress routine, this routine stores the uncompressed
|
155
|
+
// data to the iovec "iov". The number of physical
|
156
|
+
// buffers in "iov" is given by iov_cnt and their cumulative size
|
157
|
+
// must be at least GetUncompressedLength(compressed). The individual buffers
|
158
|
+
// in "iov" must not overlap with each other.
|
159
|
+
//
|
160
|
+
// returns false if the message is corrupted and could not be decrypted
|
161
|
+
bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
|
162
|
+
size_t iov_cnt);
|
163
|
+
|
164
|
+
// Returns the maximal size of the compressed representation of
|
165
|
+
// input data that is "source_bytes" bytes in length;
|
166
|
+
size_t MaxCompressedLength(size_t source_bytes);
|
167
|
+
|
168
|
+
// REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
|
169
|
+
// Returns true and stores the length of the uncompressed data in
|
170
|
+
// *result normally. Returns false on parsing error.
|
171
|
+
// This operation takes O(1) time.
|
172
|
+
bool GetUncompressedLength(const char* compressed, size_t compressed_length,
|
173
|
+
size_t* result);
|
174
|
+
|
175
|
+
// Returns true iff the contents of "compressed[]" can be uncompressed
|
176
|
+
// successfully. Does not return the uncompressed data. Takes
|
177
|
+
// time proportional to compressed_length, but is usually at least
|
178
|
+
// a factor of four faster than actual decompression.
|
179
|
+
bool IsValidCompressedBuffer(const char* compressed,
|
180
|
+
size_t compressed_length);
|
181
|
+
|
182
|
+
// Returns true iff the contents of "compressed" can be uncompressed
|
183
|
+
// successfully. Does not return the uncompressed data. Takes
|
184
|
+
// time proportional to *compressed length, but is usually at least
|
185
|
+
// a factor of four faster than actual decompression.
|
186
|
+
// On success, consumes all of *compressed. On failure, consumes an
|
187
|
+
// unspecified prefix of *compressed.
|
188
|
+
bool IsValidCompressed(Source* compressed);
|
189
|
+
|
190
|
+
// The size of a compression block. Note that many parts of the compression
|
191
|
+
// code assumes that kBlockSize <= 65536; in particular, the hash table
|
192
|
+
// can only store 16-bit offsets, and EmitCopy() also assumes the offset
|
193
|
+
// is 65535 bytes or less. Note also that if you change this, it will
|
194
|
+
// affect the framing format (see framing_format.txt).
|
195
|
+
//
|
196
|
+
// Note that there might be older data around that is compressed with larger
|
197
|
+
// block sizes, so the decompression code should not rely on the
|
198
|
+
// non-existence of long backreferences.
|
199
|
+
static constexpr int kBlockLog = 16;
|
200
|
+
static constexpr size_t kBlockSize = 1 << kBlockLog;
|
201
|
+
|
202
|
+
static constexpr int kMinHashTableBits = 8;
|
203
|
+
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
|
204
|
+
|
205
|
+
static constexpr int kMaxHashTableBits = 14;
|
206
|
+
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
|
207
|
+
} // end namespace snappy
|
208
|
+
|
209
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
|
@@ -0,0 +1,60 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy compression code.
|
30
|
+
|
31
|
+
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
|
34
|
+
#include <cassert>
|
35
|
+
#include <string>
|
36
|
+
|
37
|
+
#include "snappy.h"
|
38
|
+
|
39
|
+
// Entry point for LibFuzzer.
|
40
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
41
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
42
|
+
|
43
|
+
std::string compressed;
|
44
|
+
size_t compressed_size =
|
45
|
+
snappy::Compress(input.data(), input.size(), &compressed);
|
46
|
+
|
47
|
+
(void)compressed_size; // Variable only used in debug builds.
|
48
|
+
assert(compressed_size == compressed.size());
|
49
|
+
assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
|
50
|
+
assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
51
|
+
|
52
|
+
std::string uncompressed_after_compress;
|
53
|
+
bool uncompress_succeeded = snappy::Uncompress(
|
54
|
+
compressed.data(), compressed.size(), &uncompressed_after_compress);
|
55
|
+
|
56
|
+
(void)uncompress_succeeded; // Variable only used in debug builds.
|
57
|
+
assert(uncompress_succeeded);
|
58
|
+
assert(input == uncompressed_after_compress);
|
59
|
+
return 0;
|
60
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
// Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
//
|
29
|
+
// libFuzzer harness for fuzzing snappy's decompression code.
|
30
|
+
|
31
|
+
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
|
34
|
+
#include <cassert>
|
35
|
+
#include <string>
|
36
|
+
|
37
|
+
#include "snappy.h"
|
38
|
+
|
39
|
+
// Entry point for LibFuzzer.
|
40
|
+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
41
|
+
std::string input(reinterpret_cast<const char*>(data), size);
|
42
|
+
|
43
|
+
// Avoid self-crafted decompression bombs.
|
44
|
+
size_t uncompressed_size;
|
45
|
+
constexpr size_t kMaxUncompressedSize = 1 << 20;
|
46
|
+
bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
|
47
|
+
input.data(), input.size(), &uncompressed_size);
|
48
|
+
if (!get_uncompressed_length_succeeded ||
|
49
|
+
(uncompressed_size > kMaxUncompressedSize)) {
|
50
|
+
return 0;
|
51
|
+
}
|
52
|
+
|
53
|
+
std::string uncompressed;
|
54
|
+
// The return value of snappy::Uncompress() is ignored because decompression
|
55
|
+
// will fail on invalid inputs.
|
56
|
+
snappy::Uncompress(input.data(), input.size(), &uncompressed);
|
57
|
+
return 0;
|
58
|
+
}
|
@@ -0,0 +1,1512 @@
|
|
1
|
+
// Copyright 2005 and onwards Google Inc.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
#include <cmath>
|
30
|
+
#include <cstdlib>
|
31
|
+
|
32
|
+
#include <algorithm>
|
33
|
+
#include <random>
|
34
|
+
#include <string>
|
35
|
+
#include <utility>
|
36
|
+
#include <vector>
|
37
|
+
|
38
|
+
#include "snappy.h"
|
39
|
+
#include "snappy-internal.h"
|
40
|
+
#include "snappy-test.h"
|
41
|
+
#include "snappy-sinksource.h"
|
42
|
+
|
43
|
+
DEFINE_int32(start_len, -1,
|
44
|
+
"Starting prefix size for testing (-1: just full file contents)");
|
45
|
+
DEFINE_int32(end_len, -1,
|
46
|
+
"Starting prefix size for testing (-1: just full file contents)");
|
47
|
+
DEFINE_int32(bytes, 10485760,
|
48
|
+
"How many bytes to compress/uncompress per file for timing");
|
49
|
+
|
50
|
+
DEFINE_bool(zlib, false,
|
51
|
+
"Run zlib compression (http://www.zlib.net)");
|
52
|
+
DEFINE_bool(lzo, false,
|
53
|
+
"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
|
54
|
+
DEFINE_bool(snappy, true, "Run snappy compression");
|
55
|
+
|
56
|
+
DEFINE_bool(write_compressed, false,
|
57
|
+
"Write compressed versions of each file to <file>.comp");
|
58
|
+
DEFINE_bool(write_uncompressed, false,
|
59
|
+
"Write uncompressed versions of each file to <file>.uncomp");
|
60
|
+
|
61
|
+
DEFINE_bool(snappy_dump_decompression_table, false,
|
62
|
+
"If true, we print the decompression table during tests.");
|
63
|
+
|
64
|
+
namespace snappy {
|
65
|
+
|
66
|
+
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
67
|
+
|
68
|
+
// To test against code that reads beyond its input, this class copies a
|
69
|
+
// string to a newly allocated group of pages, the last of which
|
70
|
+
// is made unreadable via mprotect. Note that we need to allocate the
|
71
|
+
// memory with mmap(), as POSIX allows mprotect() only on memory allocated
|
72
|
+
// with mmap(), and some malloc/posix_memalign implementations expect to
|
73
|
+
// be able to read previously allocated memory while doing heap allocations.
|
74
|
+
class DataEndingAtUnreadablePage {
|
75
|
+
public:
|
76
|
+
explicit DataEndingAtUnreadablePage(const std::string& s) {
|
77
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
78
|
+
const size_t size = s.size();
|
79
|
+
// Round up space for string to a multiple of page_size.
|
80
|
+
size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
|
81
|
+
alloc_size_ = space_for_string + page_size;
|
82
|
+
mem_ = mmap(NULL, alloc_size_,
|
83
|
+
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
84
|
+
CHECK_NE(MAP_FAILED, mem_);
|
85
|
+
protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
|
86
|
+
char* dst = protected_page_ - size;
|
87
|
+
std::memcpy(dst, s.data(), size);
|
88
|
+
data_ = dst;
|
89
|
+
size_ = size;
|
90
|
+
// Make guard page unreadable.
|
91
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
|
92
|
+
}
|
93
|
+
|
94
|
+
~DataEndingAtUnreadablePage() {
|
95
|
+
const size_t page_size = sysconf(_SC_PAGESIZE);
|
96
|
+
// Undo the mprotect.
|
97
|
+
CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
|
98
|
+
CHECK_EQ(0, munmap(mem_, alloc_size_));
|
99
|
+
}
|
100
|
+
|
101
|
+
const char* data() const { return data_; }
|
102
|
+
size_t size() const { return size_; }
|
103
|
+
|
104
|
+
private:
|
105
|
+
size_t alloc_size_;
|
106
|
+
void* mem_;
|
107
|
+
char* protected_page_;
|
108
|
+
const char* data_;
|
109
|
+
size_t size_;
|
110
|
+
};
|
111
|
+
|
112
|
+
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
|
113
|
+
|
114
|
+
// Fallback for systems without mmap.
|
115
|
+
using DataEndingAtUnreadablePage = std::string;
|
116
|
+
|
117
|
+
#endif
|
118
|
+
|
119
|
+
enum CompressorType {
|
120
|
+
ZLIB, LZO, SNAPPY
|
121
|
+
};
|
122
|
+
|
123
|
+
const char* names[] = {
|
124
|
+
"ZLIB", "LZO", "SNAPPY"
|
125
|
+
};
|
126
|
+
|
127
|
+
static size_t MinimumRequiredOutputSpace(size_t input_size,
|
128
|
+
CompressorType comp) {
|
129
|
+
switch (comp) {
|
130
|
+
#ifdef ZLIB_VERSION
|
131
|
+
case ZLIB:
|
132
|
+
return ZLib::MinCompressbufSize(input_size);
|
133
|
+
#endif // ZLIB_VERSION
|
134
|
+
|
135
|
+
#ifdef LZO_VERSION
|
136
|
+
case LZO:
|
137
|
+
return input_size + input_size/64 + 16 + 3;
|
138
|
+
#endif // LZO_VERSION
|
139
|
+
|
140
|
+
case SNAPPY:
|
141
|
+
return snappy::MaxCompressedLength(input_size);
|
142
|
+
|
143
|
+
default:
|
144
|
+
LOG(FATAL) << "Unknown compression type number " << comp;
|
145
|
+
return 0;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
// Returns true if we successfully compressed, false otherwise.
|
150
|
+
//
|
151
|
+
// If compressed_is_preallocated is set, do not resize the compressed buffer.
|
152
|
+
// This is typically what you want for a benchmark, in order to not spend
|
153
|
+
// time in the memory allocator. If you do set this flag, however,
|
154
|
+
// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
|
155
|
+
// number of bytes, and may contain junk bytes at the end after return.
|
156
|
+
static bool Compress(const char* input, size_t input_size, CompressorType comp,
|
157
|
+
std::string* compressed, bool compressed_is_preallocated) {
|
158
|
+
if (!compressed_is_preallocated) {
|
159
|
+
compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
|
160
|
+
}
|
161
|
+
|
162
|
+
switch (comp) {
|
163
|
+
#ifdef ZLIB_VERSION
|
164
|
+
case ZLIB: {
|
165
|
+
ZLib zlib;
|
166
|
+
uLongf destlen = compressed->size();
|
167
|
+
int ret = zlib.Compress(
|
168
|
+
reinterpret_cast<Bytef*>(string_as_array(compressed)),
|
169
|
+
&destlen,
|
170
|
+
reinterpret_cast<const Bytef*>(input),
|
171
|
+
input_size);
|
172
|
+
CHECK_EQ(Z_OK, ret);
|
173
|
+
if (!compressed_is_preallocated) {
|
174
|
+
compressed->resize(destlen);
|
175
|
+
}
|
176
|
+
return true;
|
177
|
+
}
|
178
|
+
#endif // ZLIB_VERSION
|
179
|
+
|
180
|
+
#ifdef LZO_VERSION
|
181
|
+
case LZO: {
|
182
|
+
unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
|
183
|
+
lzo_uint destlen;
|
184
|
+
int ret = lzo1x_1_15_compress(
|
185
|
+
reinterpret_cast<const uint8_t*>(input),
|
186
|
+
input_size,
|
187
|
+
reinterpret_cast<uint8_t*>(string_as_array(compressed)),
|
188
|
+
&destlen,
|
189
|
+
mem);
|
190
|
+
CHECK_EQ(LZO_E_OK, ret);
|
191
|
+
delete[] mem;
|
192
|
+
if (!compressed_is_preallocated) {
|
193
|
+
compressed->resize(destlen);
|
194
|
+
}
|
195
|
+
break;
|
196
|
+
}
|
197
|
+
#endif // LZO_VERSION
|
198
|
+
|
199
|
+
case SNAPPY: {
|
200
|
+
size_t destlen;
|
201
|
+
snappy::RawCompress(input, input_size,
|
202
|
+
string_as_array(compressed),
|
203
|
+
&destlen);
|
204
|
+
CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
|
205
|
+
if (!compressed_is_preallocated) {
|
206
|
+
compressed->resize(destlen);
|
207
|
+
}
|
208
|
+
break;
|
209
|
+
}
|
210
|
+
|
211
|
+
default: {
|
212
|
+
return false; // the asked-for library wasn't compiled in
|
213
|
+
}
|
214
|
+
}
|
215
|
+
return true;
|
216
|
+
}
|
217
|
+
|
218
|
+
static bool Uncompress(const std::string& compressed, CompressorType comp,
|
219
|
+
int size, std::string* output) {
|
220
|
+
switch (comp) {
|
221
|
+
#ifdef ZLIB_VERSION
|
222
|
+
case ZLIB: {
|
223
|
+
output->resize(size);
|
224
|
+
ZLib zlib;
|
225
|
+
uLongf destlen = output->size();
|
226
|
+
int ret = zlib.Uncompress(
|
227
|
+
reinterpret_cast<Bytef*>(string_as_array(output)),
|
228
|
+
&destlen,
|
229
|
+
reinterpret_cast<const Bytef*>(compressed.data()),
|
230
|
+
compressed.size());
|
231
|
+
CHECK_EQ(Z_OK, ret);
|
232
|
+
CHECK_EQ(static_cast<uLongf>(size), destlen);
|
233
|
+
break;
|
234
|
+
}
|
235
|
+
#endif // ZLIB_VERSION
|
236
|
+
|
237
|
+
#ifdef LZO_VERSION
|
238
|
+
case LZO: {
|
239
|
+
output->resize(size);
|
240
|
+
lzo_uint destlen;
|
241
|
+
int ret = lzo1x_decompress(
|
242
|
+
reinterpret_cast<const uint8_t*>(compressed.data()),
|
243
|
+
compressed.size(),
|
244
|
+
reinterpret_cast<uint8_t*>(string_as_array(output)),
|
245
|
+
&destlen,
|
246
|
+
NULL);
|
247
|
+
CHECK_EQ(LZO_E_OK, ret);
|
248
|
+
CHECK_EQ(static_cast<lzo_uint>(size), destlen);
|
249
|
+
break;
|
250
|
+
}
|
251
|
+
#endif // LZO_VERSION
|
252
|
+
|
253
|
+
case SNAPPY: {
|
254
|
+
snappy::RawUncompress(compressed.data(), compressed.size(),
|
255
|
+
string_as_array(output));
|
256
|
+
break;
|
257
|
+
}
|
258
|
+
|
259
|
+
default: {
|
260
|
+
return false; // the asked-for library wasn't compiled in
|
261
|
+
}
|
262
|
+
}
|
263
|
+
return true;
|
264
|
+
}
|
265
|
+
|
266
|
+
static void Measure(const char* data,
|
267
|
+
size_t length,
|
268
|
+
CompressorType comp,
|
269
|
+
int repeats,
|
270
|
+
int block_size) {
|
271
|
+
// Run tests a few time and pick median running times
|
272
|
+
static const int kRuns = 5;
|
273
|
+
double ctime[kRuns];
|
274
|
+
double utime[kRuns];
|
275
|
+
int compressed_size = 0;
|
276
|
+
|
277
|
+
{
|
278
|
+
// Chop the input into blocks
|
279
|
+
int num_blocks = (length + block_size - 1) / block_size;
|
280
|
+
std::vector<const char*> input(num_blocks);
|
281
|
+
std::vector<size_t> input_length(num_blocks);
|
282
|
+
std::vector<std::string> compressed(num_blocks);
|
283
|
+
std::vector<std::string> output(num_blocks);
|
284
|
+
for (int b = 0; b < num_blocks; ++b) {
|
285
|
+
int input_start = b * block_size;
|
286
|
+
int input_limit = std::min<int>((b+1)*block_size, length);
|
287
|
+
input[b] = data+input_start;
|
288
|
+
input_length[b] = input_limit-input_start;
|
289
|
+
}
|
290
|
+
|
291
|
+
// Pre-grow the output buffers so we don't measure string append time.
|
292
|
+
for (std::string& compressed_block : compressed) {
|
293
|
+
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
|
294
|
+
}
|
295
|
+
|
296
|
+
// First, try one trial compression to make sure the code is compiled in
|
297
|
+
if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
|
298
|
+
LOG(WARNING) << "Skipping " << names[comp] << ": "
|
299
|
+
<< "library not compiled in";
|
300
|
+
return;
|
301
|
+
}
|
302
|
+
|
303
|
+
for (int run = 0; run < kRuns; ++run) {
|
304
|
+
CycleTimer ctimer, utimer;
|
305
|
+
|
306
|
+
// Pre-grow the output buffers so we don't measure string append time.
|
307
|
+
for (std::string& compressed_block : compressed) {
|
308
|
+
compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
|
309
|
+
}
|
310
|
+
|
311
|
+
ctimer.Start();
|
312
|
+
for (int b = 0; b < num_blocks; ++b) {
|
313
|
+
for (int i = 0; i < repeats; ++i)
|
314
|
+
Compress(input[b], input_length[b], comp, &compressed[b], true);
|
315
|
+
}
|
316
|
+
ctimer.Stop();
|
317
|
+
|
318
|
+
// Compress once more, with resizing, so we don't leave junk
|
319
|
+
// at the end that will confuse the decompressor.
|
320
|
+
for (int b = 0; b < num_blocks; ++b) {
|
321
|
+
Compress(input[b], input_length[b], comp, &compressed[b], false);
|
322
|
+
}
|
323
|
+
|
324
|
+
for (int b = 0; b < num_blocks; ++b) {
|
325
|
+
output[b].resize(input_length[b]);
|
326
|
+
}
|
327
|
+
|
328
|
+
utimer.Start();
|
329
|
+
for (int i = 0; i < repeats; ++i) {
|
330
|
+
for (int b = 0; b < num_blocks; ++b)
|
331
|
+
Uncompress(compressed[b], comp, input_length[b], &output[b]);
|
332
|
+
}
|
333
|
+
utimer.Stop();
|
334
|
+
|
335
|
+
ctime[run] = ctimer.Get();
|
336
|
+
utime[run] = utimer.Get();
|
337
|
+
}
|
338
|
+
|
339
|
+
compressed_size = 0;
|
340
|
+
for (const std::string& compressed_item : compressed) {
|
341
|
+
compressed_size += compressed_item.size();
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
std::sort(ctime, ctime + kRuns);
|
346
|
+
std::sort(utime, utime + kRuns);
|
347
|
+
const int med = kRuns/2;
|
348
|
+
|
349
|
+
float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
|
350
|
+
float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
|
351
|
+
std::string x = names[comp];
|
352
|
+
x += ":";
|
353
|
+
std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
|
354
|
+
: std::string("?");
|
355
|
+
std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
|
356
|
+
"comp %5.1f MB/s uncomp %5s MB/s\n",
|
357
|
+
x.c_str(),
|
358
|
+
block_size/(1<<20),
|
359
|
+
static_cast<int>(length), static_cast<uint32_t>(compressed_size),
|
360
|
+
(compressed_size * 100.0) / std::max<int>(1, length),
|
361
|
+
comp_rate,
|
362
|
+
urate.c_str());
|
363
|
+
}
|
364
|
+
|
365
|
+
static int VerifyString(const std::string& input) {
|
366
|
+
std::string compressed;
|
367
|
+
DataEndingAtUnreadablePage i(input);
|
368
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
369
|
+
CHECK_EQ(written, compressed.size());
|
370
|
+
CHECK_LE(compressed.size(),
|
371
|
+
snappy::MaxCompressedLength(input.size()));
|
372
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
373
|
+
|
374
|
+
std::string uncompressed;
|
375
|
+
DataEndingAtUnreadablePage c(compressed);
|
376
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
377
|
+
CHECK_EQ(uncompressed, input);
|
378
|
+
return uncompressed.size();
|
379
|
+
}
|
380
|
+
|
381
|
+
static void VerifyStringSink(const std::string& input) {
|
382
|
+
std::string compressed;
|
383
|
+
DataEndingAtUnreadablePage i(input);
|
384
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
385
|
+
CHECK_EQ(written, compressed.size());
|
386
|
+
CHECK_LE(compressed.size(),
|
387
|
+
snappy::MaxCompressedLength(input.size()));
|
388
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
389
|
+
|
390
|
+
std::string uncompressed;
|
391
|
+
uncompressed.resize(input.size());
|
392
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
|
393
|
+
DataEndingAtUnreadablePage c(compressed);
|
394
|
+
snappy::ByteArraySource source(c.data(), c.size());
|
395
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
396
|
+
CHECK_EQ(uncompressed, input);
|
397
|
+
}
|
398
|
+
|
399
|
+
static void VerifyIOVec(const std::string& input) {
|
400
|
+
std::string compressed;
|
401
|
+
DataEndingAtUnreadablePage i(input);
|
402
|
+
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
|
403
|
+
CHECK_EQ(written, compressed.size());
|
404
|
+
CHECK_LE(compressed.size(),
|
405
|
+
snappy::MaxCompressedLength(input.size()));
|
406
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
407
|
+
|
408
|
+
// Try uncompressing into an iovec containing a random number of entries
|
409
|
+
// ranging from 1 to 10.
|
410
|
+
char* buf = new char[input.size()];
|
411
|
+
std::minstd_rand0 rng(input.size());
|
412
|
+
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
|
413
|
+
size_t num = uniform_1_to_10(rng);
|
414
|
+
if (input.size() < num) {
|
415
|
+
num = input.size();
|
416
|
+
}
|
417
|
+
struct iovec* iov = new iovec[num];
|
418
|
+
size_t used_so_far = 0;
|
419
|
+
std::bernoulli_distribution one_in_five(1.0 / 5);
|
420
|
+
for (size_t i = 0; i < num; ++i) {
|
421
|
+
assert(used_so_far < input.size());
|
422
|
+
iov[i].iov_base = buf + used_so_far;
|
423
|
+
if (i == num - 1) {
|
424
|
+
iov[i].iov_len = input.size() - used_so_far;
|
425
|
+
} else {
|
426
|
+
// Randomly choose to insert a 0 byte entry.
|
427
|
+
if (one_in_five(rng)) {
|
428
|
+
iov[i].iov_len = 0;
|
429
|
+
} else {
|
430
|
+
std::uniform_int_distribution<size_t> uniform_not_used_so_far(
|
431
|
+
0, input.size() - used_so_far - 1);
|
432
|
+
iov[i].iov_len = uniform_not_used_so_far(rng);
|
433
|
+
}
|
434
|
+
}
|
435
|
+
used_so_far += iov[i].iov_len;
|
436
|
+
}
|
437
|
+
CHECK(snappy::RawUncompressToIOVec(
|
438
|
+
compressed.data(), compressed.size(), iov, num));
|
439
|
+
CHECK(!memcmp(buf, input.data(), input.size()));
|
440
|
+
delete[] iov;
|
441
|
+
delete[] buf;
|
442
|
+
}
|
443
|
+
|
444
|
+
// Test that data compressed by a compressor that does not
|
445
|
+
// obey block sizes is uncompressed properly.
|
446
|
+
static void VerifyNonBlockedCompression(const std::string& input) {
|
447
|
+
if (input.length() > snappy::kBlockSize) {
|
448
|
+
// We cannot test larger blocks than the maximum block size, obviously.
|
449
|
+
return;
|
450
|
+
}
|
451
|
+
|
452
|
+
std::string prefix;
|
453
|
+
Varint::Append32(&prefix, input.size());
|
454
|
+
|
455
|
+
// Setup compression table
|
456
|
+
snappy::internal::WorkingMemory wmem(input.size());
|
457
|
+
int table_size;
|
458
|
+
uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
|
459
|
+
|
460
|
+
// Compress entire input in one shot
|
461
|
+
std::string compressed;
|
462
|
+
compressed += prefix;
|
463
|
+
compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
|
464
|
+
char* dest = string_as_array(&compressed) + prefix.size();
|
465
|
+
char* end = snappy::internal::CompressFragment(input.data(), input.size(),
|
466
|
+
dest, table, table_size);
|
467
|
+
compressed.resize(end - compressed.data());
|
468
|
+
|
469
|
+
// Uncompress into std::string
|
470
|
+
std::string uncomp_str;
|
471
|
+
CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
|
472
|
+
CHECK_EQ(uncomp_str, input);
|
473
|
+
|
474
|
+
// Uncompress using source/sink
|
475
|
+
std::string uncomp_str2;
|
476
|
+
uncomp_str2.resize(input.size());
|
477
|
+
snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
|
478
|
+
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
479
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
480
|
+
CHECK_EQ(uncomp_str2, input);
|
481
|
+
|
482
|
+
// Uncompress into iovec
|
483
|
+
{
|
484
|
+
static const int kNumBlocks = 10;
|
485
|
+
struct iovec vec[kNumBlocks];
|
486
|
+
const int block_size = 1 + input.size() / kNumBlocks;
|
487
|
+
std::string iovec_data(block_size * kNumBlocks, 'x');
|
488
|
+
for (int i = 0; i < kNumBlocks; ++i) {
|
489
|
+
vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
|
490
|
+
vec[i].iov_len = block_size;
|
491
|
+
}
|
492
|
+
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
|
493
|
+
vec, kNumBlocks));
|
494
|
+
CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
// Expand the input so that it is at least K times as big as block size
|
499
|
+
static std::string Expand(const std::string& input) {
|
500
|
+
static const int K = 3;
|
501
|
+
std::string data = input;
|
502
|
+
while (data.size() < K * snappy::kBlockSize) {
|
503
|
+
data += input;
|
504
|
+
}
|
505
|
+
return data;
|
506
|
+
}
|
507
|
+
|
508
|
+
static int Verify(const std::string& input) {
|
509
|
+
VLOG(1) << "Verifying input of size " << input.size();
|
510
|
+
|
511
|
+
// Compress using string based routines
|
512
|
+
const int result = VerifyString(input);
|
513
|
+
|
514
|
+
// Verify using sink based routines
|
515
|
+
VerifyStringSink(input);
|
516
|
+
|
517
|
+
VerifyNonBlockedCompression(input);
|
518
|
+
VerifyIOVec(input);
|
519
|
+
if (!input.empty()) {
|
520
|
+
const std::string expanded = Expand(input);
|
521
|
+
VerifyNonBlockedCompression(expanded);
|
522
|
+
VerifyIOVec(input);
|
523
|
+
}
|
524
|
+
|
525
|
+
return result;
|
526
|
+
}
|
527
|
+
|
528
|
+
static bool IsValidCompressedBuffer(const std::string& c) {
|
529
|
+
return snappy::IsValidCompressedBuffer(c.data(), c.size());
|
530
|
+
}
|
531
|
+
static bool Uncompress(const std::string& c, std::string* u) {
|
532
|
+
return snappy::Uncompress(c.data(), c.size(), u);
|
533
|
+
}
|
534
|
+
|
535
|
+
// This test checks to ensure that snappy doesn't coredump if it gets
|
536
|
+
// corrupted data.
|
537
|
+
TEST(CorruptedTest, VerifyCorrupted) {
|
538
|
+
std::string source = "making sure we don't crash with corrupted input";
|
539
|
+
VLOG(1) << source;
|
540
|
+
std::string dest;
|
541
|
+
std::string uncmp;
|
542
|
+
snappy::Compress(source.data(), source.size(), &dest);
|
543
|
+
|
544
|
+
// Mess around with the data. It's hard to simulate all possible
|
545
|
+
// corruptions; this is just one example ...
|
546
|
+
CHECK_GT(dest.size(), 3);
|
547
|
+
dest[1]--;
|
548
|
+
dest[3]++;
|
549
|
+
// this really ought to fail.
|
550
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
551
|
+
CHECK(!Uncompress(dest, &uncmp));
|
552
|
+
|
553
|
+
// This is testing for a security bug - a buffer that decompresses to 100k
|
554
|
+
// but we lie in the snappy header and only reserve 0 bytes of memory :)
|
555
|
+
source.resize(100000);
|
556
|
+
for (char& source_char : source) {
|
557
|
+
source_char = 'A';
|
558
|
+
}
|
559
|
+
snappy::Compress(source.data(), source.size(), &dest);
|
560
|
+
dest[0] = dest[1] = dest[2] = dest[3] = 0;
|
561
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
562
|
+
CHECK(!Uncompress(dest, &uncmp));
|
563
|
+
|
564
|
+
if (sizeof(void *) == 4) {
|
565
|
+
// Another security check; check a crazy big length can't DoS us with an
|
566
|
+
// over-allocation.
|
567
|
+
// Currently this is done only for 32-bit builds. On 64-bit builds,
|
568
|
+
// where 3 GB might be an acceptable allocation size, Uncompress()
|
569
|
+
// attempts to decompress, and sometimes causes the test to run out of
|
570
|
+
// memory.
|
571
|
+
dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
|
572
|
+
// This decodes to a really large size, i.e., about 3 GB.
|
573
|
+
dest[4] = 'k';
|
574
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
575
|
+
CHECK(!Uncompress(dest, &uncmp));
|
576
|
+
} else {
|
577
|
+
LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
|
578
|
+
}
|
579
|
+
|
580
|
+
// This decodes to about 2 MB; much smaller, but should still fail.
|
581
|
+
dest[0] = dest[1] = dest[2] = '\xff';
|
582
|
+
dest[3] = 0x00;
|
583
|
+
CHECK(!IsValidCompressedBuffer(dest));
|
584
|
+
CHECK(!Uncompress(dest, &uncmp));
|
585
|
+
|
586
|
+
// try reading stuff in from a bad file.
|
587
|
+
for (int i = 1; i <= 3; ++i) {
|
588
|
+
std::string data =
|
589
|
+
ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
|
590
|
+
std::string uncmp;
|
591
|
+
// check that we don't return a crazy length
|
592
|
+
size_t ulen;
|
593
|
+
CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
|
594
|
+
|| (ulen < (1<<20)));
|
595
|
+
uint32_t ulen2;
|
596
|
+
snappy::ByteArraySource source(data.data(), data.size());
|
597
|
+
CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
|
598
|
+
(ulen2 < (1<<20)));
|
599
|
+
CHECK(!IsValidCompressedBuffer(data));
|
600
|
+
CHECK(!Uncompress(data, &uncmp));
|
601
|
+
}
|
602
|
+
}
|
603
|
+
|
604
|
+
// Helper routines to construct arbitrary compressed strings.
|
605
|
+
// These mirror the compression code in snappy.cc, but are copied
|
606
|
+
// here so that we can bypass some limitations in the how snappy.cc
|
607
|
+
// invokes these routines.
|
608
|
+
static void AppendLiteral(std::string* dst, const std::string& literal) {
|
609
|
+
if (literal.empty()) return;
|
610
|
+
int n = literal.size() - 1;
|
611
|
+
if (n < 60) {
|
612
|
+
// Fit length in tag byte
|
613
|
+
dst->push_back(0 | (n << 2));
|
614
|
+
} else {
|
615
|
+
// Encode in upcoming bytes
|
616
|
+
char number[4];
|
617
|
+
int count = 0;
|
618
|
+
while (n > 0) {
|
619
|
+
number[count++] = n & 0xff;
|
620
|
+
n >>= 8;
|
621
|
+
}
|
622
|
+
dst->push_back(0 | ((59+count) << 2));
|
623
|
+
*dst += std::string(number, count);
|
624
|
+
}
|
625
|
+
*dst += literal;
|
626
|
+
}
|
627
|
+
|
628
|
+
static void AppendCopy(std::string* dst, int offset, int length) {
|
629
|
+
while (length > 0) {
|
630
|
+
// Figure out how much to copy in one shot
|
631
|
+
int to_copy;
|
632
|
+
if (length >= 68) {
|
633
|
+
to_copy = 64;
|
634
|
+
} else if (length > 64) {
|
635
|
+
to_copy = 60;
|
636
|
+
} else {
|
637
|
+
to_copy = length;
|
638
|
+
}
|
639
|
+
length -= to_copy;
|
640
|
+
|
641
|
+
if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
|
642
|
+
assert(to_copy-4 < 8); // Must fit in 3 bits
|
643
|
+
dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
|
644
|
+
dst->push_back(offset & 0xff);
|
645
|
+
} else if (offset < 65536) {
|
646
|
+
dst->push_back(2 | ((to_copy-1) << 2));
|
647
|
+
dst->push_back(offset & 0xff);
|
648
|
+
dst->push_back(offset >> 8);
|
649
|
+
} else {
|
650
|
+
dst->push_back(3 | ((to_copy-1) << 2));
|
651
|
+
dst->push_back(offset & 0xff);
|
652
|
+
dst->push_back((offset >> 8) & 0xff);
|
653
|
+
dst->push_back((offset >> 16) & 0xff);
|
654
|
+
dst->push_back((offset >> 24) & 0xff);
|
655
|
+
}
|
656
|
+
}
|
657
|
+
}
|
658
|
+
|
659
|
+
TEST(Snappy, SimpleTests) {
|
660
|
+
Verify("");
|
661
|
+
Verify("a");
|
662
|
+
Verify("ab");
|
663
|
+
Verify("abc");
|
664
|
+
|
665
|
+
Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
|
666
|
+
Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
|
667
|
+
Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
|
668
|
+
Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
669
|
+
Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
|
670
|
+
}
|
671
|
+
|
672
|
+
// Verify max blowup (lots of four-byte copies)
|
673
|
+
TEST(Snappy, MaxBlowup) {
|
674
|
+
std::mt19937 rng;
|
675
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
676
|
+
std::string input;
|
677
|
+
for (int i = 0; i < 80000; ++i)
|
678
|
+
input.push_back(static_cast<char>(uniform_byte(rng)));
|
679
|
+
|
680
|
+
for (int i = 0; i < 80000; i += 4) {
|
681
|
+
std::string four_bytes(input.end() - i - 4, input.end() - i);
|
682
|
+
input.append(four_bytes);
|
683
|
+
}
|
684
|
+
Verify(input);
|
685
|
+
}
|
686
|
+
|
687
|
+
TEST(Snappy, RandomData) {
|
688
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
689
|
+
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
|
690
|
+
std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
|
691
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
692
|
+
std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
|
693
|
+
std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
|
694
|
+
std::bernoulli_distribution one_in_ten(1.0 / 10);
|
695
|
+
|
696
|
+
constexpr int num_ops = 20000;
|
697
|
+
for (int i = 0; i < num_ops; ++i) {
|
698
|
+
if ((i % 1000) == 0) {
|
699
|
+
VLOG(0) << "Random op " << i << " of " << num_ops;
|
700
|
+
}
|
701
|
+
|
702
|
+
std::string x;
|
703
|
+
size_t len = uniform_4k(rng);
|
704
|
+
if (i < 100) {
|
705
|
+
len = 65536 + uniform_64k(rng);
|
706
|
+
}
|
707
|
+
while (x.size() < len) {
|
708
|
+
int run_len = 1;
|
709
|
+
if (one_in_ten(rng)) {
|
710
|
+
int skewed_bits = uniform_0_to_8(rng);
|
711
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
|
712
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
713
|
+
(1 << skewed_bits) - 1);
|
714
|
+
run_len = skewed_low(rng);
|
715
|
+
}
|
716
|
+
char c = static_cast<char>(uniform_byte(rng));
|
717
|
+
if (i >= 100) {
|
718
|
+
int skewed_bits = uniform_0_to_3(rng);
|
719
|
+
// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
|
720
|
+
std::uniform_int_distribution<int> skewed_low(0,
|
721
|
+
(1 << skewed_bits) - 1);
|
722
|
+
c = static_cast<char>(skewed_low(rng));
|
723
|
+
}
|
724
|
+
while (run_len-- > 0 && x.size() < len) {
|
725
|
+
x.push_back(c);
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
Verify(x);
|
730
|
+
}
|
731
|
+
}
|
732
|
+
|
733
|
+
TEST(Snappy, FourByteOffset) {
|
734
|
+
// The new compressor cannot generate four-byte offsets since
|
735
|
+
// it chops up the input into 32KB pieces. So we hand-emit the
|
736
|
+
// copy manually.
|
737
|
+
|
738
|
+
// The two fragments that make up the input string.
|
739
|
+
std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
|
740
|
+
std::string fragment2 = "some other string";
|
741
|
+
|
742
|
+
// How many times each fragment is emitted.
|
743
|
+
const int n1 = 2;
|
744
|
+
const int n2 = 100000 / fragment2.size();
|
745
|
+
const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
|
746
|
+
|
747
|
+
std::string compressed;
|
748
|
+
Varint::Append32(&compressed, length);
|
749
|
+
|
750
|
+
AppendLiteral(&compressed, fragment1);
|
751
|
+
std::string src = fragment1;
|
752
|
+
for (int i = 0; i < n2; ++i) {
|
753
|
+
AppendLiteral(&compressed, fragment2);
|
754
|
+
src += fragment2;
|
755
|
+
}
|
756
|
+
AppendCopy(&compressed, src.size(), fragment1.size());
|
757
|
+
src += fragment1;
|
758
|
+
CHECK_EQ(length, src.size());
|
759
|
+
|
760
|
+
std::string uncompressed;
|
761
|
+
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
762
|
+
CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
|
763
|
+
&uncompressed));
|
764
|
+
CHECK_EQ(uncompressed, src);
|
765
|
+
}
|
766
|
+
|
767
|
+
TEST(Snappy, IOVecEdgeCases) {
|
768
|
+
// Test some tricky edge cases in the iovec output that are not necessarily
|
769
|
+
// exercised by random tests.
|
770
|
+
|
771
|
+
// Our output blocks look like this initially (the last iovec is bigger
|
772
|
+
// than depicted):
|
773
|
+
// [ ] [ ] [ ] [ ] [ ]
|
774
|
+
static const int kLengths[] = { 2, 1, 4, 8, 128 };
|
775
|
+
|
776
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
777
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
778
|
+
iov[i].iov_base = new char[kLengths[i]];
|
779
|
+
iov[i].iov_len = kLengths[i];
|
780
|
+
}
|
781
|
+
|
782
|
+
std::string compressed;
|
783
|
+
Varint::Append32(&compressed, 22);
|
784
|
+
|
785
|
+
// A literal whose output crosses three blocks.
|
786
|
+
// [ab] [c] [123 ] [ ] [ ]
|
787
|
+
AppendLiteral(&compressed, "abc123");
|
788
|
+
|
789
|
+
// A copy whose output crosses two blocks (source and destination
|
790
|
+
// segments marked).
|
791
|
+
// [ab] [c] [1231] [23 ] [ ]
|
792
|
+
// ^--^ --
|
793
|
+
AppendCopy(&compressed, 3, 3);
|
794
|
+
|
795
|
+
// A copy where the input is, at first, in the block before the output:
|
796
|
+
//
|
797
|
+
// [ab] [c] [1231] [231231 ] [ ]
|
798
|
+
// ^--- ^---
|
799
|
+
// Then during the copy, the pointers move such that the input and
|
800
|
+
// output pointers are in the same block:
|
801
|
+
//
|
802
|
+
// [ab] [c] [1231] [23123123] [ ]
|
803
|
+
// ^- ^-
|
804
|
+
// And then they move again, so that the output pointer is no longer
|
805
|
+
// in the same block as the input pointer:
|
806
|
+
// [ab] [c] [1231] [23123123] [123 ]
|
807
|
+
// ^-- ^--
|
808
|
+
AppendCopy(&compressed, 6, 9);
|
809
|
+
|
810
|
+
// Finally, a copy where the input is from several blocks back,
|
811
|
+
// and it also crosses three blocks:
|
812
|
+
//
|
813
|
+
// [ab] [c] [1231] [23123123] [123b ]
|
814
|
+
// ^ ^
|
815
|
+
// [ab] [c] [1231] [23123123] [123bc ]
|
816
|
+
// ^ ^
|
817
|
+
// [ab] [c] [1231] [23123123] [123bc12 ]
|
818
|
+
// ^- ^-
|
819
|
+
AppendCopy(&compressed, 17, 4);
|
820
|
+
|
821
|
+
CHECK(snappy::RawUncompressToIOVec(
|
822
|
+
compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
|
823
|
+
CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
|
824
|
+
CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
|
825
|
+
CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
|
826
|
+
CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
|
827
|
+
CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
|
828
|
+
|
829
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
830
|
+
delete[] reinterpret_cast<char *>(iov[i].iov_base);
|
831
|
+
}
|
832
|
+
}
|
833
|
+
|
834
|
+
TEST(Snappy, IOVecLiteralOverflow) {
|
835
|
+
static const int kLengths[] = { 3, 4 };
|
836
|
+
|
837
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
838
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
839
|
+
iov[i].iov_base = new char[kLengths[i]];
|
840
|
+
iov[i].iov_len = kLengths[i];
|
841
|
+
}
|
842
|
+
|
843
|
+
std::string compressed;
|
844
|
+
Varint::Append32(&compressed, 8);
|
845
|
+
|
846
|
+
AppendLiteral(&compressed, "12345678");
|
847
|
+
|
848
|
+
CHECK(!snappy::RawUncompressToIOVec(
|
849
|
+
compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
|
850
|
+
|
851
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
852
|
+
delete[] reinterpret_cast<char *>(iov[i].iov_base);
|
853
|
+
}
|
854
|
+
}
|
855
|
+
|
856
|
+
TEST(Snappy, IOVecCopyOverflow) {
|
857
|
+
static const int kLengths[] = { 3, 4 };
|
858
|
+
|
859
|
+
struct iovec iov[ARRAYSIZE(kLengths)];
|
860
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
861
|
+
iov[i].iov_base = new char[kLengths[i]];
|
862
|
+
iov[i].iov_len = kLengths[i];
|
863
|
+
}
|
864
|
+
|
865
|
+
std::string compressed;
|
866
|
+
Varint::Append32(&compressed, 8);
|
867
|
+
|
868
|
+
AppendLiteral(&compressed, "123");
|
869
|
+
AppendCopy(&compressed, 3, 5);
|
870
|
+
|
871
|
+
CHECK(!snappy::RawUncompressToIOVec(
|
872
|
+
compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
|
873
|
+
|
874
|
+
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
|
875
|
+
delete[] reinterpret_cast<char *>(iov[i].iov_base);
|
876
|
+
}
|
877
|
+
}
|
878
|
+
|
879
|
+
static bool CheckUncompressedLength(const std::string& compressed,
|
880
|
+
size_t* ulength) {
|
881
|
+
const bool result1 = snappy::GetUncompressedLength(compressed.data(),
|
882
|
+
compressed.size(),
|
883
|
+
ulength);
|
884
|
+
|
885
|
+
snappy::ByteArraySource source(compressed.data(), compressed.size());
|
886
|
+
uint32_t length;
|
887
|
+
const bool result2 = snappy::GetUncompressedLength(&source, &length);
|
888
|
+
CHECK_EQ(result1, result2);
|
889
|
+
return result1;
|
890
|
+
}
|
891
|
+
|
892
|
+
TEST(SnappyCorruption, TruncatedVarint) {
|
893
|
+
std::string compressed, uncompressed;
|
894
|
+
size_t ulength;
|
895
|
+
compressed.push_back('\xf0');
|
896
|
+
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
897
|
+
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
898
|
+
CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
|
899
|
+
&uncompressed));
|
900
|
+
}
|
901
|
+
|
902
|
+
TEST(SnappyCorruption, UnterminatedVarint) {
|
903
|
+
std::string compressed, uncompressed;
|
904
|
+
size_t ulength;
|
905
|
+
compressed.push_back('\x80');
|
906
|
+
compressed.push_back('\x80');
|
907
|
+
compressed.push_back('\x80');
|
908
|
+
compressed.push_back('\x80');
|
909
|
+
compressed.push_back('\x80');
|
910
|
+
compressed.push_back(10);
|
911
|
+
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
912
|
+
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
913
|
+
CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
|
914
|
+
&uncompressed));
|
915
|
+
}
|
916
|
+
|
917
|
+
TEST(SnappyCorruption, OverflowingVarint) {
|
918
|
+
std::string compressed, uncompressed;
|
919
|
+
size_t ulength;
|
920
|
+
compressed.push_back('\xfb');
|
921
|
+
compressed.push_back('\xff');
|
922
|
+
compressed.push_back('\xff');
|
923
|
+
compressed.push_back('\xff');
|
924
|
+
compressed.push_back('\x7f');
|
925
|
+
CHECK(!CheckUncompressedLength(compressed, &ulength));
|
926
|
+
CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
|
927
|
+
CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
|
928
|
+
&uncompressed));
|
929
|
+
}
|
930
|
+
|
931
|
+
TEST(Snappy, ReadPastEndOfBuffer) {
|
932
|
+
// Check that we do not read past end of input
|
933
|
+
|
934
|
+
// Make a compressed string that ends with a single-byte literal
|
935
|
+
std::string compressed;
|
936
|
+
Varint::Append32(&compressed, 1);
|
937
|
+
AppendLiteral(&compressed, "x");
|
938
|
+
|
939
|
+
std::string uncompressed;
|
940
|
+
DataEndingAtUnreadablePage c(compressed);
|
941
|
+
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
|
942
|
+
CHECK_EQ(uncompressed, std::string("x"));
|
943
|
+
}
|
944
|
+
|
945
|
+
// Check for an infinite loop caused by a copy with offset==0
|
946
|
+
TEST(Snappy, ZeroOffsetCopy) {
|
947
|
+
const char* compressed = "\x40\x12\x00\x00";
|
948
|
+
// \x40 Length (must be > kMaxIncrementCopyOverflow)
|
949
|
+
// \x12\x00\x00 Copy with offset==0, length==5
|
950
|
+
char uncompressed[100];
|
951
|
+
EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
|
952
|
+
}
|
953
|
+
|
954
|
+
TEST(Snappy, ZeroOffsetCopyValidation) {
|
955
|
+
const char* compressed = "\x05\x12\x00\x00";
|
956
|
+
// \x05 Length
|
957
|
+
// \x12\x00\x00 Copy with offset==0, length==5
|
958
|
+
EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
|
959
|
+
}
|
960
|
+
|
961
|
+
namespace {
|
962
|
+
|
963
|
+
int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
|
964
|
+
uint64_t data;
|
965
|
+
std::pair<size_t, bool> p =
|
966
|
+
snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
|
967
|
+
CHECK_EQ(p.first < 8, p.second);
|
968
|
+
return p.first;
|
969
|
+
}
|
970
|
+
|
971
|
+
} // namespace
|
972
|
+
|
973
|
+
TEST(Snappy, FindMatchLength) {
|
974
|
+
// Exercise all different code paths through the function.
|
975
|
+
// 64-bit version:
|
976
|
+
|
977
|
+
// Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
|
978
|
+
EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
|
979
|
+
EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
|
980
|
+
|
981
|
+
// Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
|
982
|
+
EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
|
983
|
+
|
984
|
+
// Same, but edge cases.
|
985
|
+
EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
|
986
|
+
EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
|
987
|
+
|
988
|
+
// Find non-match at once in first loop.
|
989
|
+
EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
|
990
|
+
EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
|
991
|
+
EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
|
992
|
+
EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
|
993
|
+
|
994
|
+
// Find non-match in first loop after one block.
|
995
|
+
EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
|
996
|
+
"abcdefgh?1234567xxxxxxxx", 24));
|
997
|
+
EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
|
998
|
+
"abcdefgh0?234567xxxxxxxx", 24));
|
999
|
+
EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
|
1000
|
+
"abcdefgh01237654xxxxxxxx", 24));
|
1001
|
+
EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
|
1002
|
+
"abcdefgh0123456?xxxxxxxx", 24));
|
1003
|
+
|
1004
|
+
// 32-bit version:
|
1005
|
+
|
1006
|
+
// Short matches.
|
1007
|
+
EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
|
1008
|
+
EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
|
1009
|
+
EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
|
1010
|
+
EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
|
1011
|
+
EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
|
1012
|
+
EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
|
1013
|
+
EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
|
1014
|
+
EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
|
1015
|
+
EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
|
1016
|
+
EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
|
1017
|
+
|
1018
|
+
// Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
|
1019
|
+
EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
|
1020
|
+
EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
|
1021
|
+
EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
|
1022
|
+
|
1023
|
+
// Same, but edge cases.
|
1024
|
+
EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
|
1025
|
+
EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
|
1026
|
+
|
1027
|
+
// Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
|
1028
|
+
EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
|
1029
|
+
|
1030
|
+
// Find non-match at once in first loop.
|
1031
|
+
EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
|
1032
|
+
"xxxxxx?123xxxxxxxx", 18));
|
1033
|
+
EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
|
1034
|
+
"xxxxxx0?23xxxxxxxx", 18));
|
1035
|
+
EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
|
1036
|
+
"xxxxxx0132xxxxxxxx", 18));
|
1037
|
+
EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
|
1038
|
+
"xxxxxx012?xxxxxxxx", 18));
|
1039
|
+
|
1040
|
+
// Same, but edge cases.
|
1041
|
+
EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
|
1042
|
+
EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
|
1043
|
+
EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
|
1044
|
+
EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
|
1045
|
+
|
1046
|
+
// Find non-match in first loop after one block.
|
1047
|
+
EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
|
1048
|
+
"xxxxxxabcd?123xx", 16));
|
1049
|
+
EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
|
1050
|
+
"xxxxxxabcd0?23xx", 16));
|
1051
|
+
EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
|
1052
|
+
"xxxxxxabcd0132xx", 16));
|
1053
|
+
EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
|
1054
|
+
"xxxxxxabcd012?xx", 16));
|
1055
|
+
|
1056
|
+
// Same, but edge cases.
|
1057
|
+
EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
|
1058
|
+
EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
|
1059
|
+
EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
|
1060
|
+
EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
|
1061
|
+
}
|
1062
|
+
|
1063
|
+
TEST(Snappy, FindMatchLengthRandom) {
|
1064
|
+
constexpr int kNumTrials = 10000;
|
1065
|
+
constexpr int kTypicalLength = 10;
|
1066
|
+
std::minstd_rand0 rng(FLAGS_test_random_seed);
|
1067
|
+
std::uniform_int_distribution<int> uniform_byte(0, 255);
|
1068
|
+
std::bernoulli_distribution one_in_two(1.0 / 2);
|
1069
|
+
std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
|
1070
|
+
|
1071
|
+
for (int i = 0; i < kNumTrials; ++i) {
|
1072
|
+
std::string s, t;
|
1073
|
+
char a = static_cast<char>(uniform_byte(rng));
|
1074
|
+
char b = static_cast<char>(uniform_byte(rng));
|
1075
|
+
while (!one_in_typical_length(rng)) {
|
1076
|
+
s.push_back(one_in_two(rng) ? a : b);
|
1077
|
+
t.push_back(one_in_two(rng) ? a : b);
|
1078
|
+
}
|
1079
|
+
DataEndingAtUnreadablePage u(s);
|
1080
|
+
DataEndingAtUnreadablePage v(t);
|
1081
|
+
size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
|
1082
|
+
if (matched == t.size()) {
|
1083
|
+
EXPECT_EQ(s, t);
|
1084
|
+
} else {
|
1085
|
+
EXPECT_NE(s[matched], t[matched]);
|
1086
|
+
for (size_t j = 0; j < matched; ++j) {
|
1087
|
+
EXPECT_EQ(s[j], t[j]);
|
1088
|
+
}
|
1089
|
+
}
|
1090
|
+
}
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
static uint16_t MakeEntry(unsigned int extra,
|
1094
|
+
unsigned int len,
|
1095
|
+
unsigned int copy_offset) {
|
1096
|
+
// Check that all of the fields fit within the allocated space
|
1097
|
+
assert(extra == (extra & 0x7)); // At most 3 bits
|
1098
|
+
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
1099
|
+
assert(len == (len & 0x7f)); // At most 7 bits
|
1100
|
+
return len | (copy_offset << 8) | (extra << 11);
|
1101
|
+
}
|
1102
|
+
|
1103
|
+
// Check that the decompression table is correct, and optionally print out
|
1104
|
+
// the computed one.
|
1105
|
+
TEST(Snappy, VerifyCharTable) {
|
1106
|
+
using snappy::internal::LITERAL;
|
1107
|
+
using snappy::internal::COPY_1_BYTE_OFFSET;
|
1108
|
+
using snappy::internal::COPY_2_BYTE_OFFSET;
|
1109
|
+
using snappy::internal::COPY_4_BYTE_OFFSET;
|
1110
|
+
using snappy::internal::char_table;
|
1111
|
+
|
1112
|
+
uint16_t dst[256];
|
1113
|
+
|
1114
|
+
// Place invalid entries in all places to detect missing initialization
|
1115
|
+
int assigned = 0;
|
1116
|
+
for (int i = 0; i < 256; ++i) {
|
1117
|
+
dst[i] = 0xffff;
|
1118
|
+
}
|
1119
|
+
|
1120
|
+
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
1121
|
+
for (uint8_t len = 1; len <= 60; ++len) {
|
1122
|
+
dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
|
1123
|
+
assigned++;
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
1127
|
+
// encode the number of bytes of length info that follow the opcode.
|
1128
|
+
for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
|
1129
|
+
// We set the length field in the lookup table to 1 because extra
|
1130
|
+
// bytes encode len-1.
|
1131
|
+
dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
1132
|
+
assigned++;
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
// COPY_1_BYTE_OFFSET.
|
1136
|
+
//
|
1137
|
+
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
1138
|
+
// offset/256 in 5 bits. offset%256 is stored in the next byte.
|
1139
|
+
//
|
1140
|
+
// This format is used for length in range [4..11] and offset in
|
1141
|
+
// range [0..2047]
|
1142
|
+
for (uint8_t len = 4; len < 12; ++len) {
|
1143
|
+
for (uint16_t offset = 0; offset < 2048; offset += 256) {
|
1144
|
+
uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
|
1145
|
+
dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
|
1146
|
+
MakeEntry(1, len, offset_high);
|
1147
|
+
assigned++;
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
// COPY_2_BYTE_OFFSET.
|
1152
|
+
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
1153
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
1154
|
+
dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
|
1155
|
+
assigned++;
|
1156
|
+
}
|
1157
|
+
|
1158
|
+
// COPY_4_BYTE_OFFSET.
|
1159
|
+
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
1160
|
+
for (uint8_t len = 1; len <= 64; ++len) {
|
1161
|
+
dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
|
1162
|
+
assigned++;
|
1163
|
+
}
|
1164
|
+
|
1165
|
+
// Check that each entry was initialized exactly once.
|
1166
|
+
EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
|
1167
|
+
for (int i = 0; i < 256; ++i) {
|
1168
|
+
EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
if (FLAGS_snappy_dump_decompression_table) {
|
1172
|
+
std::printf("static const uint16_t char_table[256] = {\n ");
|
1173
|
+
for (int i = 0; i < 256; ++i) {
|
1174
|
+
std::printf("0x%04x%s",
|
1175
|
+
dst[i],
|
1176
|
+
((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
|
1177
|
+
}
|
1178
|
+
std::printf("};\n");
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
// Check that computed table matched recorded table.
|
1182
|
+
for (int i = 0; i < 256; ++i) {
|
1183
|
+
EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
|
1184
|
+
}
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
static void CompressFile(const char* fname) {
|
1188
|
+
std::string fullinput;
|
1189
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1190
|
+
|
1191
|
+
std::string compressed;
|
1192
|
+
Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
|
1193
|
+
|
1194
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
|
1195
|
+
file::Defaults()));
|
1196
|
+
}
|
1197
|
+
|
1198
|
+
static void UncompressFile(const char* fname) {
|
1199
|
+
std::string fullinput;
|
1200
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1201
|
+
|
1202
|
+
size_t uncompLength;
|
1203
|
+
CHECK(CheckUncompressedLength(fullinput, &uncompLength));
|
1204
|
+
|
1205
|
+
std::string uncompressed;
|
1206
|
+
uncompressed.resize(uncompLength);
|
1207
|
+
CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
|
1208
|
+
|
1209
|
+
CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
|
1210
|
+
file::Defaults()));
|
1211
|
+
}
|
1212
|
+
|
1213
|
+
static void MeasureFile(const char* fname) {
|
1214
|
+
std::string fullinput;
|
1215
|
+
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
|
1216
|
+
std::printf("%-40s :\n", fname);
|
1217
|
+
|
1218
|
+
int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
|
1219
|
+
int end_len = fullinput.size();
|
1220
|
+
if (FLAGS_end_len >= 0) {
|
1221
|
+
end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
|
1222
|
+
}
|
1223
|
+
for (int len = start_len; len <= end_len; ++len) {
|
1224
|
+
const char* const input = fullinput.data();
|
1225
|
+
int repeats = (FLAGS_bytes + len) / (len + 1);
|
1226
|
+
if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
|
1227
|
+
if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
|
1228
|
+
if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
|
1229
|
+
|
1230
|
+
// For block-size based measurements
|
1231
|
+
if (0 && FLAGS_snappy) {
|
1232
|
+
Measure(input, len, SNAPPY, repeats, 8<<10);
|
1233
|
+
Measure(input, len, SNAPPY, repeats, 16<<10);
|
1234
|
+
Measure(input, len, SNAPPY, repeats, 32<<10);
|
1235
|
+
Measure(input, len, SNAPPY, repeats, 64<<10);
|
1236
|
+
Measure(input, len, SNAPPY, repeats, 256<<10);
|
1237
|
+
Measure(input, len, SNAPPY, repeats, 1024<<10);
|
1238
|
+
}
|
1239
|
+
}
|
1240
|
+
}
|
1241
|
+
|
1242
|
+
static struct {
|
1243
|
+
const char* label;
|
1244
|
+
const char* filename;
|
1245
|
+
size_t size_limit;
|
1246
|
+
} files[] = {
|
1247
|
+
{ "html", "html", 0 },
|
1248
|
+
{ "urls", "urls.10K", 0 },
|
1249
|
+
{ "jpg", "fireworks.jpeg", 0 },
|
1250
|
+
{ "jpg_200", "fireworks.jpeg", 200 },
|
1251
|
+
{ "pdf", "paper-100k.pdf", 0 },
|
1252
|
+
{ "html4", "html_x_4", 0 },
|
1253
|
+
{ "txt1", "alice29.txt", 0 },
|
1254
|
+
{ "txt2", "asyoulik.txt", 0 },
|
1255
|
+
{ "txt3", "lcet10.txt", 0 },
|
1256
|
+
{ "txt4", "plrabn12.txt", 0 },
|
1257
|
+
{ "pb", "geo.protodata", 0 },
|
1258
|
+
{ "gaviota", "kppkn.gtb", 0 },
|
1259
|
+
};
|
1260
|
+
|
1261
|
+
static void BM_UFlat(int iters, int arg) {
|
1262
|
+
StopBenchmarkTiming();
|
1263
|
+
|
1264
|
+
// Pick file to process based on "arg"
|
1265
|
+
CHECK_GE(arg, 0);
|
1266
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1267
|
+
std::string contents =
|
1268
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1269
|
+
|
1270
|
+
std::string zcontents;
|
1271
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1272
|
+
char* dst = new char[contents.size()];
|
1273
|
+
|
1274
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
|
1275
|
+
static_cast<int64_t>(contents.size()));
|
1276
|
+
SetBenchmarkLabel(files[arg].label);
|
1277
|
+
StartBenchmarkTiming();
|
1278
|
+
while (iters-- > 0) {
|
1279
|
+
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
1280
|
+
}
|
1281
|
+
StopBenchmarkTiming();
|
1282
|
+
|
1283
|
+
delete[] dst;
|
1284
|
+
}
|
1285
|
+
BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1286
|
+
|
1287
|
+
static void BM_UValidate(int iters, int arg) {
|
1288
|
+
StopBenchmarkTiming();
|
1289
|
+
|
1290
|
+
// Pick file to process based on "arg"
|
1291
|
+
CHECK_GE(arg, 0);
|
1292
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1293
|
+
std::string contents =
|
1294
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1295
|
+
|
1296
|
+
std::string zcontents;
|
1297
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1298
|
+
|
1299
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
|
1300
|
+
static_cast<int64_t>(contents.size()));
|
1301
|
+
SetBenchmarkLabel(files[arg].label);
|
1302
|
+
StartBenchmarkTiming();
|
1303
|
+
while (iters-- > 0) {
|
1304
|
+
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
1305
|
+
}
|
1306
|
+
StopBenchmarkTiming();
|
1307
|
+
}
|
1308
|
+
BENCHMARK(BM_UValidate)->DenseRange(0, 4);
|
1309
|
+
|
1310
|
+
static void BM_UIOVec(int iters, int arg) {
|
1311
|
+
StopBenchmarkTiming();
|
1312
|
+
|
1313
|
+
// Pick file to process based on "arg"
|
1314
|
+
CHECK_GE(arg, 0);
|
1315
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1316
|
+
std::string contents =
|
1317
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1318
|
+
|
1319
|
+
std::string zcontents;
|
1320
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1321
|
+
|
1322
|
+
// Uncompress into an iovec containing ten entries.
|
1323
|
+
const int kNumEntries = 10;
|
1324
|
+
struct iovec iov[kNumEntries];
|
1325
|
+
char *dst = new char[contents.size()];
|
1326
|
+
size_t used_so_far = 0;
|
1327
|
+
for (int i = 0; i < kNumEntries; ++i) {
|
1328
|
+
iov[i].iov_base = dst + used_so_far;
|
1329
|
+
if (used_so_far == contents.size()) {
|
1330
|
+
iov[i].iov_len = 0;
|
1331
|
+
continue;
|
1332
|
+
}
|
1333
|
+
|
1334
|
+
if (i == kNumEntries - 1) {
|
1335
|
+
iov[i].iov_len = contents.size() - used_so_far;
|
1336
|
+
} else {
|
1337
|
+
iov[i].iov_len = contents.size() / kNumEntries;
|
1338
|
+
}
|
1339
|
+
used_so_far += iov[i].iov_len;
|
1340
|
+
}
|
1341
|
+
|
1342
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
|
1343
|
+
static_cast<int64_t>(contents.size()));
|
1344
|
+
SetBenchmarkLabel(files[arg].label);
|
1345
|
+
StartBenchmarkTiming();
|
1346
|
+
while (iters-- > 0) {
|
1347
|
+
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
1348
|
+
kNumEntries));
|
1349
|
+
}
|
1350
|
+
StopBenchmarkTiming();
|
1351
|
+
|
1352
|
+
delete[] dst;
|
1353
|
+
}
|
1354
|
+
BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
|
1355
|
+
|
1356
|
+
static void BM_UFlatSink(int iters, int arg) {
|
1357
|
+
StopBenchmarkTiming();
|
1358
|
+
|
1359
|
+
// Pick file to process based on "arg"
|
1360
|
+
CHECK_GE(arg, 0);
|
1361
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1362
|
+
std::string contents =
|
1363
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1364
|
+
|
1365
|
+
std::string zcontents;
|
1366
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
1367
|
+
char* dst = new char[contents.size()];
|
1368
|
+
|
1369
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
|
1370
|
+
static_cast<int64_t>(contents.size()));
|
1371
|
+
SetBenchmarkLabel(files[arg].label);
|
1372
|
+
StartBenchmarkTiming();
|
1373
|
+
while (iters-- > 0) {
|
1374
|
+
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
1375
|
+
snappy::UncheckedByteArraySink sink(dst);
|
1376
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
1377
|
+
}
|
1378
|
+
StopBenchmarkTiming();
|
1379
|
+
|
1380
|
+
std::string s(dst, contents.size());
|
1381
|
+
CHECK_EQ(contents, s);
|
1382
|
+
|
1383
|
+
delete[] dst;
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1387
|
+
|
1388
|
+
static void BM_ZFlat(int iters, int arg) {
|
1389
|
+
StopBenchmarkTiming();
|
1390
|
+
|
1391
|
+
// Pick file to process based on "arg"
|
1392
|
+
CHECK_GE(arg, 0);
|
1393
|
+
CHECK_LT(arg, ARRAYSIZE(files));
|
1394
|
+
std::string contents =
|
1395
|
+
ReadTestDataFile(files[arg].filename, files[arg].size_limit);
|
1396
|
+
|
1397
|
+
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
1398
|
+
|
1399
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
|
1400
|
+
static_cast<int64_t>(contents.size()));
|
1401
|
+
StartBenchmarkTiming();
|
1402
|
+
|
1403
|
+
size_t zsize = 0;
|
1404
|
+
while (iters-- > 0) {
|
1405
|
+
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
|
1406
|
+
}
|
1407
|
+
StopBenchmarkTiming();
|
1408
|
+
const double compression_ratio =
|
1409
|
+
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
1410
|
+
SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
|
1411
|
+
100.0 * compression_ratio));
|
1412
|
+
VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
|
1413
|
+
files[arg].label, static_cast<int>(contents.size()),
|
1414
|
+
static_cast<int>(zsize));
|
1415
|
+
delete[] dst;
|
1416
|
+
}
|
1417
|
+
BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
|
1418
|
+
|
1419
|
+
static void BM_ZFlatAll(int iters, int arg) {
|
1420
|
+
StopBenchmarkTiming();
|
1421
|
+
|
1422
|
+
CHECK_EQ(arg, 0);
|
1423
|
+
const int num_files = ARRAYSIZE(files);
|
1424
|
+
|
1425
|
+
std::vector<std::string> contents(num_files);
|
1426
|
+
std::vector<char*> dst(num_files);
|
1427
|
+
|
1428
|
+
int64_t total_contents_size = 0;
|
1429
|
+
for (int i = 0; i < num_files; ++i) {
|
1430
|
+
contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
|
1431
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
1432
|
+
total_contents_size += contents[i].size();
|
1433
|
+
}
|
1434
|
+
|
1435
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
|
1436
|
+
StartBenchmarkTiming();
|
1437
|
+
|
1438
|
+
size_t zsize = 0;
|
1439
|
+
while (iters-- > 0) {
|
1440
|
+
for (int i = 0; i < num_files; ++i) {
|
1441
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1442
|
+
&zsize);
|
1443
|
+
}
|
1444
|
+
}
|
1445
|
+
StopBenchmarkTiming();
|
1446
|
+
|
1447
|
+
for (char* dst_item : dst) {
|
1448
|
+
delete[] dst_item;
|
1449
|
+
}
|
1450
|
+
SetBenchmarkLabel(StrFormat("%d files", num_files));
|
1451
|
+
}
|
1452
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
|
1453
|
+
|
1454
|
+
static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
|
1455
|
+
StopBenchmarkTiming();
|
1456
|
+
|
1457
|
+
CHECK_EQ(arg, 0);
|
1458
|
+
CHECK_GT(ARRAYSIZE(files), 0);
|
1459
|
+
const std::string base_content =
|
1460
|
+
ReadTestDataFile(files[0].filename, files[0].size_limit);
|
1461
|
+
|
1462
|
+
std::vector<std::string> contents;
|
1463
|
+
std::vector<char*> dst;
|
1464
|
+
int64_t total_contents_size = 0;
|
1465
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
1466
|
+
++table_bits) {
|
1467
|
+
std::string content = base_content;
|
1468
|
+
content.resize(1 << table_bits);
|
1469
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
1470
|
+
total_contents_size += content.size();
|
1471
|
+
contents.push_back(std::move(content));
|
1472
|
+
}
|
1473
|
+
|
1474
|
+
size_t zsize = 0;
|
1475
|
+
SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
|
1476
|
+
StartBenchmarkTiming();
|
1477
|
+
while (iters-- > 0) {
|
1478
|
+
for (size_t i = 0; i < contents.size(); ++i) {
|
1479
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
1480
|
+
&zsize);
|
1481
|
+
}
|
1482
|
+
}
|
1483
|
+
StopBenchmarkTiming();
|
1484
|
+
|
1485
|
+
for (char* dst_item : dst) {
|
1486
|
+
delete[] dst_item;
|
1487
|
+
}
|
1488
|
+
SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
|
1489
|
+
}
|
1490
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
|
1491
|
+
|
1492
|
+
} // namespace snappy
|
1493
|
+
|
1494
|
+
int main(int argc, char** argv) {
|
1495
|
+
InitGoogle(argv[0], &argc, &argv, true);
|
1496
|
+
RunSpecifiedBenchmarks();
|
1497
|
+
|
1498
|
+
if (argc >= 2) {
|
1499
|
+
for (int arg = 1; arg < argc; ++arg) {
|
1500
|
+
if (FLAGS_write_compressed) {
|
1501
|
+
snappy::CompressFile(argv[arg]);
|
1502
|
+
} else if (FLAGS_write_uncompressed) {
|
1503
|
+
snappy::UncompressFile(argv[arg]);
|
1504
|
+
} else {
|
1505
|
+
snappy::MeasureFile(argv[arg]);
|
1506
|
+
}
|
1507
|
+
}
|
1508
|
+
return 0;
|
1509
|
+
}
|
1510
|
+
|
1511
|
+
return RUN_ALL_TESTS();
|
1512
|
+
}
|