couchbase 3.0.0.alpha.1-universal-darwin-19 → 3.0.0.alpha.2-universal-darwin-19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/tests-6.0.3.yml +49 -0
  3. data/.github/workflows/tests.yml +47 -0
  4. data/.gitmodules +3 -0
  5. data/.idea/dictionaries/gem_terms.xml +5 -0
  6. data/.idea/inspectionProfiles/Project_Default.xml +1 -0
  7. data/.idea/vcs.xml +1 -0
  8. data/Gemfile +1 -0
  9. data/README.md +55 -2
  10. data/Rakefile +18 -0
  11. data/bin/init-cluster +62 -0
  12. data/bin/setup +1 -0
  13. data/couchbase.gemspec +3 -2
  14. data/examples/crud.rb +1 -2
  15. data/examples/managing_buckets.rb +47 -0
  16. data/examples/managing_collections.rb +58 -0
  17. data/examples/managing_query_indexes.rb +63 -0
  18. data/examples/query.rb +3 -2
  19. data/examples/query_with_consistency.rb +76 -0
  20. data/examples/subdocument.rb +23 -1
  21. data/ext/.clang-format +1 -1
  22. data/ext/.idea/dictionaries/couchbase_terms.xml +2 -0
  23. data/ext/.idea/vcs.xml +1 -0
  24. data/ext/CMakeLists.txt +30 -12
  25. data/ext/build_version.hxx.in +26 -0
  26. data/ext/couchbase/bucket.hxx +69 -8
  27. data/ext/couchbase/cluster.hxx +70 -54
  28. data/ext/couchbase/collections_manifest.hxx +3 -3
  29. data/ext/couchbase/configuration.hxx +14 -0
  30. data/ext/couchbase/couchbase.cxx +2044 -383
  31. data/ext/couchbase/{operations/document_id.hxx → document_id.hxx} +5 -4
  32. data/ext/couchbase/io/http_message.hxx +5 -1
  33. data/ext/couchbase/io/http_parser.hxx +2 -1
  34. data/ext/couchbase/io/http_session.hxx +6 -3
  35. data/ext/couchbase/io/{binary_message.hxx → mcbp_message.hxx} +15 -12
  36. data/ext/couchbase/io/mcbp_parser.hxx +99 -0
  37. data/ext/couchbase/io/{key_value_session.hxx → mcbp_session.hxx} +200 -95
  38. data/ext/couchbase/io/session_manager.hxx +37 -22
  39. data/ext/couchbase/mutation_token.hxx +2 -1
  40. data/ext/couchbase/operations.hxx +38 -8
  41. data/ext/couchbase/operations/bucket_create.hxx +138 -0
  42. data/ext/couchbase/operations/bucket_drop.hxx +65 -0
  43. data/ext/couchbase/operations/bucket_flush.hxx +65 -0
  44. data/ext/couchbase/operations/bucket_get.hxx +69 -0
  45. data/ext/couchbase/operations/bucket_get_all.hxx +62 -0
  46. data/ext/couchbase/operations/bucket_settings.hxx +111 -0
  47. data/ext/couchbase/operations/bucket_update.hxx +115 -0
  48. data/ext/couchbase/operations/cluster_developer_preview_enable.hxx +60 -0
  49. data/ext/couchbase/operations/collection_create.hxx +86 -0
  50. data/ext/couchbase/operations/collection_drop.hxx +82 -0
  51. data/ext/couchbase/operations/command.hxx +10 -10
  52. data/ext/couchbase/operations/document_decrement.hxx +80 -0
  53. data/ext/couchbase/operations/document_exists.hxx +80 -0
  54. data/ext/couchbase/operations/{get.hxx → document_get.hxx} +4 -2
  55. data/ext/couchbase/operations/document_get_and_lock.hxx +64 -0
  56. data/ext/couchbase/operations/document_get_and_touch.hxx +64 -0
  57. data/ext/couchbase/operations/document_increment.hxx +80 -0
  58. data/ext/couchbase/operations/document_insert.hxx +74 -0
  59. data/ext/couchbase/operations/{lookup_in.hxx → document_lookup_in.hxx} +2 -2
  60. data/ext/couchbase/operations/{mutate_in.hxx → document_mutate_in.hxx} +11 -2
  61. data/ext/couchbase/operations/{query.hxx → document_query.hxx} +101 -6
  62. data/ext/couchbase/operations/document_remove.hxx +67 -0
  63. data/ext/couchbase/operations/document_replace.hxx +76 -0
  64. data/ext/couchbase/operations/{upsert.hxx → document_touch.hxx} +14 -14
  65. data/ext/couchbase/operations/{remove.hxx → document_unlock.hxx} +12 -10
  66. data/ext/couchbase/operations/document_upsert.hxx +74 -0
  67. data/ext/couchbase/operations/query_index_build_deferred.hxx +85 -0
  68. data/ext/couchbase/operations/query_index_create.hxx +134 -0
  69. data/ext/couchbase/operations/query_index_drop.hxx +108 -0
  70. data/ext/couchbase/operations/query_index_get_all.hxx +106 -0
  71. data/ext/couchbase/operations/scope_create.hxx +81 -0
  72. data/ext/couchbase/operations/scope_drop.hxx +79 -0
  73. data/ext/couchbase/operations/scope_get_all.hxx +72 -0
  74. data/ext/couchbase/protocol/client_opcode.hxx +35 -0
  75. data/ext/couchbase/protocol/client_request.hxx +56 -9
  76. data/ext/couchbase/protocol/client_response.hxx +52 -15
  77. data/ext/couchbase/protocol/cmd_cluster_map_change_notification.hxx +81 -0
  78. data/ext/couchbase/protocol/cmd_decrement.hxx +187 -0
  79. data/ext/couchbase/protocol/cmd_exists.hxx +171 -0
  80. data/ext/couchbase/protocol/cmd_get.hxx +31 -8
  81. data/ext/couchbase/protocol/cmd_get_and_lock.hxx +142 -0
  82. data/ext/couchbase/protocol/cmd_get_and_touch.hxx +142 -0
  83. data/ext/couchbase/protocol/cmd_get_cluster_config.hxx +16 -3
  84. data/ext/couchbase/protocol/cmd_get_collections_manifest.hxx +16 -3
  85. data/ext/couchbase/protocol/cmd_get_error_map.hxx +16 -3
  86. data/ext/couchbase/protocol/cmd_hello.hxx +24 -8
  87. data/ext/couchbase/protocol/cmd_increment.hxx +187 -0
  88. data/ext/couchbase/protocol/cmd_info.hxx +1 -0
  89. data/ext/couchbase/protocol/cmd_insert.hxx +172 -0
  90. data/ext/couchbase/protocol/cmd_lookup_in.hxx +28 -13
  91. data/ext/couchbase/protocol/cmd_mutate_in.hxx +65 -13
  92. data/ext/couchbase/protocol/cmd_remove.hxx +59 -4
  93. data/ext/couchbase/protocol/cmd_replace.hxx +172 -0
  94. data/ext/couchbase/protocol/cmd_sasl_auth.hxx +15 -3
  95. data/ext/couchbase/protocol/cmd_sasl_list_mechs.hxx +15 -3
  96. data/ext/couchbase/protocol/cmd_sasl_step.hxx +15 -3
  97. data/ext/couchbase/protocol/cmd_select_bucket.hxx +14 -2
  98. data/ext/couchbase/protocol/cmd_touch.hxx +102 -0
  99. data/ext/couchbase/protocol/cmd_unlock.hxx +95 -0
  100. data/ext/couchbase/protocol/cmd_upsert.hxx +50 -14
  101. data/ext/couchbase/protocol/durability_level.hxx +67 -0
  102. data/ext/couchbase/protocol/frame_info_id.hxx +187 -0
  103. data/ext/couchbase/protocol/hello_feature.hxx +137 -0
  104. data/ext/couchbase/protocol/server_opcode.hxx +57 -0
  105. data/ext/couchbase/protocol/server_request.hxx +122 -0
  106. data/ext/couchbase/protocol/unsigned_leb128.h +15 -15
  107. data/ext/couchbase/utils/byteswap.hxx +1 -2
  108. data/ext/couchbase/utils/url_codec.hxx +225 -0
  109. data/ext/couchbase/version.hxx +3 -1
  110. data/ext/extconf.rb +4 -1
  111. data/ext/test/main.cxx +37 -113
  112. data/ext/third_party/snappy/.appveyor.yml +36 -0
  113. data/ext/third_party/snappy/.gitignore +8 -0
  114. data/ext/third_party/snappy/.travis.yml +98 -0
  115. data/ext/third_party/snappy/AUTHORS +1 -0
  116. data/ext/third_party/snappy/CMakeLists.txt +345 -0
  117. data/ext/third_party/snappy/CONTRIBUTING.md +26 -0
  118. data/ext/third_party/snappy/COPYING +54 -0
  119. data/ext/third_party/snappy/NEWS +188 -0
  120. data/ext/third_party/snappy/README.md +148 -0
  121. data/ext/third_party/snappy/cmake/SnappyConfig.cmake.in +33 -0
  122. data/ext/third_party/snappy/cmake/config.h.in +59 -0
  123. data/ext/third_party/snappy/docs/README.md +72 -0
  124. data/ext/third_party/snappy/format_description.txt +110 -0
  125. data/ext/third_party/snappy/framing_format.txt +135 -0
  126. data/ext/third_party/snappy/snappy-c.cc +90 -0
  127. data/ext/third_party/snappy/snappy-c.h +138 -0
  128. data/ext/third_party/snappy/snappy-internal.h +315 -0
  129. data/ext/third_party/snappy/snappy-sinksource.cc +121 -0
  130. data/ext/third_party/snappy/snappy-sinksource.h +182 -0
  131. data/ext/third_party/snappy/snappy-stubs-internal.cc +42 -0
  132. data/ext/third_party/snappy/snappy-stubs-internal.h +493 -0
  133. data/ext/third_party/snappy/snappy-stubs-public.h.in +63 -0
  134. data/ext/third_party/snappy/snappy-test.cc +613 -0
  135. data/ext/third_party/snappy/snappy-test.h +526 -0
  136. data/ext/third_party/snappy/snappy.cc +1770 -0
  137. data/ext/third_party/snappy/snappy.h +209 -0
  138. data/ext/third_party/snappy/snappy_compress_fuzzer.cc +60 -0
  139. data/ext/third_party/snappy/snappy_uncompress_fuzzer.cc +58 -0
  140. data/ext/third_party/snappy/snappy_unittest.cc +1512 -0
  141. data/ext/third_party/snappy/testdata/alice29.txt +3609 -0
  142. data/ext/third_party/snappy/testdata/asyoulik.txt +4122 -0
  143. data/ext/third_party/snappy/testdata/baddata1.snappy +0 -0
  144. data/ext/third_party/snappy/testdata/baddata2.snappy +0 -0
  145. data/ext/third_party/snappy/testdata/baddata3.snappy +0 -0
  146. data/ext/third_party/snappy/testdata/fireworks.jpeg +0 -0
  147. data/ext/third_party/snappy/testdata/geo.protodata +0 -0
  148. data/ext/third_party/snappy/testdata/html +1 -0
  149. data/ext/third_party/snappy/testdata/html_x_4 +1 -0
  150. data/ext/third_party/snappy/testdata/kppkn.gtb +0 -0
  151. data/ext/third_party/snappy/testdata/lcet10.txt +7519 -0
  152. data/ext/third_party/snappy/testdata/paper-100k.pdf +600 -2
  153. data/ext/third_party/snappy/testdata/plrabn12.txt +10699 -0
  154. data/ext/third_party/snappy/testdata/urls.10K +10000 -0
  155. data/lib/couchbase/binary_collection.rb +33 -76
  156. data/lib/couchbase/binary_collection_options.rb +94 -0
  157. data/lib/couchbase/bucket.rb +9 -3
  158. data/lib/couchbase/cluster.rb +161 -23
  159. data/lib/couchbase/collection.rb +108 -191
  160. data/lib/couchbase/collection_options.rb +430 -0
  161. data/lib/couchbase/errors.rb +136 -134
  162. data/lib/couchbase/json_transcoder.rb +32 -0
  163. data/lib/couchbase/management/analytics_index_manager.rb +185 -9
  164. data/lib/couchbase/management/bucket_manager.rb +84 -33
  165. data/lib/couchbase/management/collection_manager.rb +166 -1
  166. data/lib/couchbase/management/query_index_manager.rb +261 -0
  167. data/lib/couchbase/management/search_index_manager.rb +291 -0
  168. data/lib/couchbase/management/user_manager.rb +12 -10
  169. data/lib/couchbase/management/view_index_manager.rb +151 -1
  170. data/lib/couchbase/mutation_state.rb +11 -1
  171. data/lib/couchbase/scope.rb +4 -4
  172. data/lib/couchbase/version.rb +1 -1
  173. metadata +113 -18
  174. data/.travis.yml +0 -7
  175. data/ext/couchbase/io/binary_parser.hxx +0 -64
  176. data/lib/couchbase/results.rb +0 -307
@@ -0,0 +1,209 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // A light-weight compression algorithm. It is designed for speed of
30
+ // compression and decompression, rather than for the utmost in space
31
+ // savings.
32
+ //
33
+ // For getting better compression ratios when you are compressing data
34
+ // with long repeated sequences or compressing data that is similar to
35
+ // other data, while still compressing fast, you might look at first
36
+ // using BMDiff and then compressing the output of BMDiff with
37
+ // Snappy.
38
+
39
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__
40
+ #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
+
42
+ #include <stddef.h>
43
+ #include <stdint.h>
44
+
45
+ #include <string>
46
+
47
+ #include "snappy-stubs-public.h"
48
+
49
+ namespace snappy {
50
+ class Source;
51
+ class Sink;
52
+
53
+ // ------------------------------------------------------------------------
54
+ // Generic compression/decompression routines.
55
+ // ------------------------------------------------------------------------
56
+
57
+ // Compress the bytes read from "*source" and append to "*sink". Return the
58
+ // number of bytes written.
59
+ size_t Compress(Source* source, Sink* sink);
60
+
61
+ // Find the uncompressed length of the given stream, as given by the header.
62
+ // Note that the true length could deviate from this; the stream could e.g.
63
+ // be truncated.
64
+ //
65
+ // Also note that this leaves "*source" in a state that is unsuitable for
66
+ // further operations, such as RawUncompress(). You will need to rewind
67
+ // or recreate the source yourself before attempting any further calls.
68
+ bool GetUncompressedLength(Source* source, uint32_t* result);
69
+
70
+ // ------------------------------------------------------------------------
71
+ // Higher-level string based routines (should be sufficient for most users)
72
+ // ------------------------------------------------------------------------
73
+
74
+ // Sets "*compressed" to the compressed version of "input[0,input_length-1]".
75
+ // Original contents of *compressed are lost.
76
+ //
77
+ // REQUIRES: "input[]" is not an alias of "*compressed".
78
+ size_t Compress(const char* input, size_t input_length,
79
+ std::string* compressed);
80
+
81
+ // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
82
+ // Original contents of "*uncompressed" are lost.
83
+ //
84
+ // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
85
+ //
86
+ // returns false if the message is corrupted and could not be decompressed
87
+ bool Uncompress(const char* compressed, size_t compressed_length,
88
+ std::string* uncompressed);
89
+
90
+ // Decompresses "compressed" to "*uncompressed".
91
+ //
92
+ // returns false if the message is corrupted and could not be decompressed
93
+ bool Uncompress(Source* compressed, Sink* uncompressed);
94
+
95
+ // This routine uncompresses as much of the "compressed" as possible
96
+ // into sink. It returns the number of valid bytes added to sink
97
+ // (extra invalid bytes may have been added due to errors; the caller
98
+ // should ignore those). The emitted data typically has length
99
+ // GetUncompressedLength(), but may be shorter if an error is
100
+ // encountered.
101
+ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed);
102
+
103
+ // ------------------------------------------------------------------------
104
+ // Lower-level character array based routines. May be useful for
105
+ // efficiency reasons in certain circumstances.
106
+ // ------------------------------------------------------------------------
107
+
108
+ // REQUIRES: "compressed" must point to an area of memory that is at
109
+ // least "MaxCompressedLength(input_length)" bytes in length.
110
+ //
111
+ // Takes the data stored in "input[0..input_length]" and stores
112
+ // it in the array pointed to by "compressed".
113
+ //
114
+ // "*compressed_length" is set to the length of the compressed output.
115
+ //
116
+ // Example:
117
+ // char* output = new char[snappy::MaxCompressedLength(input_length)];
118
+ // size_t output_length;
119
+ // RawCompress(input, input_length, output, &output_length);
120
+ // ... Process(output, output_length) ...
121
+ // delete [] output;
122
+ void RawCompress(const char* input,
123
+ size_t input_length,
124
+ char* compressed,
125
+ size_t* compressed_length);
126
+
127
+ // Given data in "compressed[0..compressed_length-1]" generated by
128
+ // calling the Snappy::Compress routine, this routine
129
+ // stores the uncompressed data to
130
+ // uncompressed[0..GetUncompressedLength(compressed)-1]
131
+ // returns false if the message is corrupted and could not be decrypted
132
+ bool RawUncompress(const char* compressed, size_t compressed_length,
133
+ char* uncompressed);
134
+
135
+ // Given data from the byte source 'compressed' generated by calling
136
+ // the Snappy::Compress routine, this routine stores the uncompressed
137
+ // data to
138
+ // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
139
+ // returns false if the message is corrupted and could not be decrypted
140
+ bool RawUncompress(Source* compressed, char* uncompressed);
141
+
142
+ // Given data in "compressed[0..compressed_length-1]" generated by
143
+ // calling the Snappy::Compress routine, this routine
144
+ // stores the uncompressed data to the iovec "iov". The number of physical
145
+ // buffers in "iov" is given by iov_cnt and their cumulative size
146
+ // must be at least GetUncompressedLength(compressed). The individual buffers
147
+ // in "iov" must not overlap with each other.
148
+ //
149
+ // returns false if the message is corrupted and could not be decrypted
150
+ bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
151
+ const struct iovec* iov, size_t iov_cnt);
152
+
153
+ // Given data from the byte source 'compressed' generated by calling
154
+ // the Snappy::Compress routine, this routine stores the uncompressed
155
+ // data to the iovec "iov". The number of physical
156
+ // buffers in "iov" is given by iov_cnt and their cumulative size
157
+ // must be at least GetUncompressedLength(compressed). The individual buffers
158
+ // in "iov" must not overlap with each other.
159
+ //
160
+ // returns false if the message is corrupted and could not be decrypted
161
+ bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
162
+ size_t iov_cnt);
163
+
164
+ // Returns the maximal size of the compressed representation of
165
+ // input data that is "source_bytes" bytes in length;
166
+ size_t MaxCompressedLength(size_t source_bytes);
167
+
168
+ // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
169
+ // Returns true and stores the length of the uncompressed data in
170
+ // *result normally. Returns false on parsing error.
171
+ // This operation takes O(1) time.
172
+ bool GetUncompressedLength(const char* compressed, size_t compressed_length,
173
+ size_t* result);
174
+
175
+ // Returns true iff the contents of "compressed[]" can be uncompressed
176
+ // successfully. Does not return the uncompressed data. Takes
177
+ // time proportional to compressed_length, but is usually at least
178
+ // a factor of four faster than actual decompression.
179
+ bool IsValidCompressedBuffer(const char* compressed,
180
+ size_t compressed_length);
181
+
182
+ // Returns true iff the contents of "compressed" can be uncompressed
183
+ // successfully. Does not return the uncompressed data. Takes
184
+ // time proportional to *compressed length, but is usually at least
185
+ // a factor of four faster than actual decompression.
186
+ // On success, consumes all of *compressed. On failure, consumes an
187
+ // unspecified prefix of *compressed.
188
+ bool IsValidCompressed(Source* compressed);
189
+
190
+ // The size of a compression block. Note that many parts of the compression
191
+ // code assumes that kBlockSize <= 65536; in particular, the hash table
192
+ // can only store 16-bit offsets, and EmitCopy() also assumes the offset
193
+ // is 65535 bytes or less. Note also that if you change this, it will
194
+ // affect the framing format (see framing_format.txt).
195
+ //
196
+ // Note that there might be older data around that is compressed with larger
197
+ // block sizes, so the decompression code should not rely on the
198
+ // non-existence of long backreferences.
199
+ static constexpr int kBlockLog = 16;
200
+ static constexpr size_t kBlockSize = 1 << kBlockLog;
201
+
202
+ static constexpr int kMinHashTableBits = 8;
203
+ static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
204
+
205
+ static constexpr int kMaxHashTableBits = 14;
206
+ static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
207
+ } // end namespace snappy
208
+
209
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -0,0 +1,60 @@
1
+ // Copyright 2019 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // libFuzzer harness for fuzzing snappy compression code.
30
+
31
+ #include <stddef.h>
32
+ #include <stdint.h>
33
+
34
+ #include <cassert>
35
+ #include <string>
36
+
37
+ #include "snappy.h"
38
+
39
+ // Entry point for LibFuzzer.
40
+ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
41
+ std::string input(reinterpret_cast<const char*>(data), size);
42
+
43
+ std::string compressed;
44
+ size_t compressed_size =
45
+ snappy::Compress(input.data(), input.size(), &compressed);
46
+
47
+ (void)compressed_size; // Variable only used in debug builds.
48
+ assert(compressed_size == compressed.size());
49
+ assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
50
+ assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
51
+
52
+ std::string uncompressed_after_compress;
53
+ bool uncompress_succeeded = snappy::Uncompress(
54
+ compressed.data(), compressed.size(), &uncompressed_after_compress);
55
+
56
+ (void)uncompress_succeeded; // Variable only used in debug builds.
57
+ assert(uncompress_succeeded);
58
+ assert(input == uncompressed_after_compress);
59
+ return 0;
60
+ }
@@ -0,0 +1,58 @@
1
+ // Copyright 2019 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // libFuzzer harness for fuzzing snappy's decompression code.
30
+
31
+ #include <stddef.h>
32
+ #include <stdint.h>
33
+
34
+ #include <cassert>
35
+ #include <string>
36
+
37
+ #include "snappy.h"
38
+
39
+ // Entry point for LibFuzzer.
40
+ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
41
+ std::string input(reinterpret_cast<const char*>(data), size);
42
+
43
+ // Avoid self-crafted decompression bombs.
44
+ size_t uncompressed_size;
45
+ constexpr size_t kMaxUncompressedSize = 1 << 20;
46
+ bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
47
+ input.data(), input.size(), &uncompressed_size);
48
+ if (!get_uncompressed_length_succeeded ||
49
+ (uncompressed_size > kMaxUncompressedSize)) {
50
+ return 0;
51
+ }
52
+
53
+ std::string uncompressed;
54
+ // The return value of snappy::Uncompress() is ignored because decompression
55
+ // will fail on invalid inputs.
56
+ snappy::Uncompress(input.data(), input.size(), &uncompressed);
57
+ return 0;
58
+ }
@@ -0,0 +1,1512 @@
1
+ // Copyright 2005 and onwards Google Inc.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include <cmath>
30
+ #include <cstdlib>
31
+
32
+ #include <algorithm>
33
+ #include <random>
34
+ #include <string>
35
+ #include <utility>
36
+ #include <vector>
37
+
38
+ #include "snappy.h"
39
+ #include "snappy-internal.h"
40
+ #include "snappy-test.h"
41
+ #include "snappy-sinksource.h"
42
+
43
+ DEFINE_int32(start_len, -1,
44
+ "Starting prefix size for testing (-1: just full file contents)");
45
+ DEFINE_int32(end_len, -1,
46
+ "Starting prefix size for testing (-1: just full file contents)");
47
+ DEFINE_int32(bytes, 10485760,
48
+ "How many bytes to compress/uncompress per file for timing");
49
+
50
+ DEFINE_bool(zlib, false,
51
+ "Run zlib compression (http://www.zlib.net)");
52
+ DEFINE_bool(lzo, false,
53
+ "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
54
+ DEFINE_bool(snappy, true, "Run snappy compression");
55
+
56
+ DEFINE_bool(write_compressed, false,
57
+ "Write compressed versions of each file to <file>.comp");
58
+ DEFINE_bool(write_uncompressed, false,
59
+ "Write uncompressed versions of each file to <file>.uncomp");
60
+
61
+ DEFINE_bool(snappy_dump_decompression_table, false,
62
+ "If true, we print the decompression table during tests.");
63
+
64
+ namespace snappy {
65
+
66
+ #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
67
+
68
+ // To test against code that reads beyond its input, this class copies a
69
+ // string to a newly allocated group of pages, the last of which
70
+ // is made unreadable via mprotect. Note that we need to allocate the
71
+ // memory with mmap(), as POSIX allows mprotect() only on memory allocated
72
+ // with mmap(), and some malloc/posix_memalign implementations expect to
73
+ // be able to read previously allocated memory while doing heap allocations.
74
+ class DataEndingAtUnreadablePage {
75
+ public:
76
+ explicit DataEndingAtUnreadablePage(const std::string& s) {
77
+ const size_t page_size = sysconf(_SC_PAGESIZE);
78
+ const size_t size = s.size();
79
+ // Round up space for string to a multiple of page_size.
80
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
81
+ alloc_size_ = space_for_string + page_size;
82
+ mem_ = mmap(NULL, alloc_size_,
83
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
84
+ CHECK_NE(MAP_FAILED, mem_);
85
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
86
+ char* dst = protected_page_ - size;
87
+ std::memcpy(dst, s.data(), size);
88
+ data_ = dst;
89
+ size_ = size;
90
+ // Make guard page unreadable.
91
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
92
+ }
93
+
94
+ ~DataEndingAtUnreadablePage() {
95
+ const size_t page_size = sysconf(_SC_PAGESIZE);
96
+ // Undo the mprotect.
97
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
98
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
99
+ }
100
+
101
+ const char* data() const { return data_; }
102
+ size_t size() const { return size_; }
103
+
104
+ private:
105
+ size_t alloc_size_;
106
+ void* mem_;
107
+ char* protected_page_;
108
+ const char* data_;
109
+ size_t size_;
110
+ };
111
+
112
+ #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
113
+
114
+ // Fallback for systems without mmap.
115
+ using DataEndingAtUnreadablePage = std::string;
116
+
117
+ #endif
118
+
119
+ enum CompressorType {
120
+ ZLIB, LZO, SNAPPY
121
+ };
122
+
123
+ const char* names[] = {
124
+ "ZLIB", "LZO", "SNAPPY"
125
+ };
126
+
127
+ static size_t MinimumRequiredOutputSpace(size_t input_size,
128
+ CompressorType comp) {
129
+ switch (comp) {
130
+ #ifdef ZLIB_VERSION
131
+ case ZLIB:
132
+ return ZLib::MinCompressbufSize(input_size);
133
+ #endif // ZLIB_VERSION
134
+
135
+ #ifdef LZO_VERSION
136
+ case LZO:
137
+ return input_size + input_size/64 + 16 + 3;
138
+ #endif // LZO_VERSION
139
+
140
+ case SNAPPY:
141
+ return snappy::MaxCompressedLength(input_size);
142
+
143
+ default:
144
+ LOG(FATAL) << "Unknown compression type number " << comp;
145
+ return 0;
146
+ }
147
+ }
148
+
149
+ // Returns true if we successfully compressed, false otherwise.
150
+ //
151
+ // If compressed_is_preallocated is set, do not resize the compressed buffer.
152
+ // This is typically what you want for a benchmark, in order to not spend
153
+ // time in the memory allocator. If you do set this flag, however,
154
+ // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
155
+ // number of bytes, and may contain junk bytes at the end after return.
156
+ static bool Compress(const char* input, size_t input_size, CompressorType comp,
157
+ std::string* compressed, bool compressed_is_preallocated) {
158
+ if (!compressed_is_preallocated) {
159
+ compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
160
+ }
161
+
162
+ switch (comp) {
163
+ #ifdef ZLIB_VERSION
164
+ case ZLIB: {
165
+ ZLib zlib;
166
+ uLongf destlen = compressed->size();
167
+ int ret = zlib.Compress(
168
+ reinterpret_cast<Bytef*>(string_as_array(compressed)),
169
+ &destlen,
170
+ reinterpret_cast<const Bytef*>(input),
171
+ input_size);
172
+ CHECK_EQ(Z_OK, ret);
173
+ if (!compressed_is_preallocated) {
174
+ compressed->resize(destlen);
175
+ }
176
+ return true;
177
+ }
178
+ #endif // ZLIB_VERSION
179
+
180
+ #ifdef LZO_VERSION
181
+ case LZO: {
182
+ unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
183
+ lzo_uint destlen;
184
+ int ret = lzo1x_1_15_compress(
185
+ reinterpret_cast<const uint8_t*>(input),
186
+ input_size,
187
+ reinterpret_cast<uint8_t*>(string_as_array(compressed)),
188
+ &destlen,
189
+ mem);
190
+ CHECK_EQ(LZO_E_OK, ret);
191
+ delete[] mem;
192
+ if (!compressed_is_preallocated) {
193
+ compressed->resize(destlen);
194
+ }
195
+ break;
196
+ }
197
+ #endif // LZO_VERSION
198
+
199
+ case SNAPPY: {
200
+ size_t destlen;
201
+ snappy::RawCompress(input, input_size,
202
+ string_as_array(compressed),
203
+ &destlen);
204
+ CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
205
+ if (!compressed_is_preallocated) {
206
+ compressed->resize(destlen);
207
+ }
208
+ break;
209
+ }
210
+
211
+ default: {
212
+ return false; // the asked-for library wasn't compiled in
213
+ }
214
+ }
215
+ return true;
216
+ }
217
+
218
+ static bool Uncompress(const std::string& compressed, CompressorType comp,
219
+ int size, std::string* output) {
220
+ switch (comp) {
221
+ #ifdef ZLIB_VERSION
222
+ case ZLIB: {
223
+ output->resize(size);
224
+ ZLib zlib;
225
+ uLongf destlen = output->size();
226
+ int ret = zlib.Uncompress(
227
+ reinterpret_cast<Bytef*>(string_as_array(output)),
228
+ &destlen,
229
+ reinterpret_cast<const Bytef*>(compressed.data()),
230
+ compressed.size());
231
+ CHECK_EQ(Z_OK, ret);
232
+ CHECK_EQ(static_cast<uLongf>(size), destlen);
233
+ break;
234
+ }
235
+ #endif // ZLIB_VERSION
236
+
237
+ #ifdef LZO_VERSION
238
+ case LZO: {
239
+ output->resize(size);
240
+ lzo_uint destlen;
241
+ int ret = lzo1x_decompress(
242
+ reinterpret_cast<const uint8_t*>(compressed.data()),
243
+ compressed.size(),
244
+ reinterpret_cast<uint8_t*>(string_as_array(output)),
245
+ &destlen,
246
+ NULL);
247
+ CHECK_EQ(LZO_E_OK, ret);
248
+ CHECK_EQ(static_cast<lzo_uint>(size), destlen);
249
+ break;
250
+ }
251
+ #endif // LZO_VERSION
252
+
253
+ case SNAPPY: {
254
+ snappy::RawUncompress(compressed.data(), compressed.size(),
255
+ string_as_array(output));
256
+ break;
257
+ }
258
+
259
+ default: {
260
+ return false; // the asked-for library wasn't compiled in
261
+ }
262
+ }
263
+ return true;
264
+ }
265
+
266
+ static void Measure(const char* data,
267
+ size_t length,
268
+ CompressorType comp,
269
+ int repeats,
270
+ int block_size) {
271
+ // Run tests a few time and pick median running times
272
+ static const int kRuns = 5;
273
+ double ctime[kRuns];
274
+ double utime[kRuns];
275
+ int compressed_size = 0;
276
+
277
+ {
278
+ // Chop the input into blocks
279
+ int num_blocks = (length + block_size - 1) / block_size;
280
+ std::vector<const char*> input(num_blocks);
281
+ std::vector<size_t> input_length(num_blocks);
282
+ std::vector<std::string> compressed(num_blocks);
283
+ std::vector<std::string> output(num_blocks);
284
+ for (int b = 0; b < num_blocks; ++b) {
285
+ int input_start = b * block_size;
286
+ int input_limit = std::min<int>((b+1)*block_size, length);
287
+ input[b] = data+input_start;
288
+ input_length[b] = input_limit-input_start;
289
+ }
290
+
291
+ // Pre-grow the output buffers so we don't measure string append time.
292
+ for (std::string& compressed_block : compressed) {
293
+ compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
294
+ }
295
+
296
+ // First, try one trial compression to make sure the code is compiled in
297
+ if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
298
+ LOG(WARNING) << "Skipping " << names[comp] << ": "
299
+ << "library not compiled in";
300
+ return;
301
+ }
302
+
303
+ for (int run = 0; run < kRuns; ++run) {
304
+ CycleTimer ctimer, utimer;
305
+
306
+ // Pre-grow the output buffers so we don't measure string append time.
307
+ for (std::string& compressed_block : compressed) {
308
+ compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
309
+ }
310
+
311
+ ctimer.Start();
312
+ for (int b = 0; b < num_blocks; ++b) {
313
+ for (int i = 0; i < repeats; ++i)
314
+ Compress(input[b], input_length[b], comp, &compressed[b], true);
315
+ }
316
+ ctimer.Stop();
317
+
318
+ // Compress once more, with resizing, so we don't leave junk
319
+ // at the end that will confuse the decompressor.
320
+ for (int b = 0; b < num_blocks; ++b) {
321
+ Compress(input[b], input_length[b], comp, &compressed[b], false);
322
+ }
323
+
324
+ for (int b = 0; b < num_blocks; ++b) {
325
+ output[b].resize(input_length[b]);
326
+ }
327
+
328
+ utimer.Start();
329
+ for (int i = 0; i < repeats; ++i) {
330
+ for (int b = 0; b < num_blocks; ++b)
331
+ Uncompress(compressed[b], comp, input_length[b], &output[b]);
332
+ }
333
+ utimer.Stop();
334
+
335
+ ctime[run] = ctimer.Get();
336
+ utime[run] = utimer.Get();
337
+ }
338
+
339
+ compressed_size = 0;
340
+ for (const std::string& compressed_item : compressed) {
341
+ compressed_size += compressed_item.size();
342
+ }
343
+ }
344
+
345
+ std::sort(ctime, ctime + kRuns);
346
+ std::sort(utime, utime + kRuns);
347
+ const int med = kRuns/2;
348
+
349
+ float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
350
+ float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
351
+ std::string x = names[comp];
352
+ x += ":";
353
+ std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
354
+ : std::string("?");
355
+ std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
356
+ "comp %5.1f MB/s uncomp %5s MB/s\n",
357
+ x.c_str(),
358
+ block_size/(1<<20),
359
+ static_cast<int>(length), static_cast<uint32_t>(compressed_size),
360
+ (compressed_size * 100.0) / std::max<int>(1, length),
361
+ comp_rate,
362
+ urate.c_str());
363
+ }
364
+
365
+ static int VerifyString(const std::string& input) {
366
+ std::string compressed;
367
+ DataEndingAtUnreadablePage i(input);
368
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
369
+ CHECK_EQ(written, compressed.size());
370
+ CHECK_LE(compressed.size(),
371
+ snappy::MaxCompressedLength(input.size()));
372
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
373
+
374
+ std::string uncompressed;
375
+ DataEndingAtUnreadablePage c(compressed);
376
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
377
+ CHECK_EQ(uncompressed, input);
378
+ return uncompressed.size();
379
+ }
380
+
381
+ static void VerifyStringSink(const std::string& input) {
382
+ std::string compressed;
383
+ DataEndingAtUnreadablePage i(input);
384
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
385
+ CHECK_EQ(written, compressed.size());
386
+ CHECK_LE(compressed.size(),
387
+ snappy::MaxCompressedLength(input.size()));
388
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
389
+
390
+ std::string uncompressed;
391
+ uncompressed.resize(input.size());
392
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
393
+ DataEndingAtUnreadablePage c(compressed);
394
+ snappy::ByteArraySource source(c.data(), c.size());
395
+ CHECK(snappy::Uncompress(&source, &sink));
396
+ CHECK_EQ(uncompressed, input);
397
+ }
398
+
399
+ static void VerifyIOVec(const std::string& input) {
400
+ std::string compressed;
401
+ DataEndingAtUnreadablePage i(input);
402
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
403
+ CHECK_EQ(written, compressed.size());
404
+ CHECK_LE(compressed.size(),
405
+ snappy::MaxCompressedLength(input.size()));
406
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
407
+
408
+ // Try uncompressing into an iovec containing a random number of entries
409
+ // ranging from 1 to 10.
410
+ char* buf = new char[input.size()];
411
+ std::minstd_rand0 rng(input.size());
412
+ std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
413
+ size_t num = uniform_1_to_10(rng);
414
+ if (input.size() < num) {
415
+ num = input.size();
416
+ }
417
+ struct iovec* iov = new iovec[num];
418
+ size_t used_so_far = 0;
419
+ std::bernoulli_distribution one_in_five(1.0 / 5);
420
+ for (size_t i = 0; i < num; ++i) {
421
+ assert(used_so_far < input.size());
422
+ iov[i].iov_base = buf + used_so_far;
423
+ if (i == num - 1) {
424
+ iov[i].iov_len = input.size() - used_so_far;
425
+ } else {
426
+ // Randomly choose to insert a 0 byte entry.
427
+ if (one_in_five(rng)) {
428
+ iov[i].iov_len = 0;
429
+ } else {
430
+ std::uniform_int_distribution<size_t> uniform_not_used_so_far(
431
+ 0, input.size() - used_so_far - 1);
432
+ iov[i].iov_len = uniform_not_used_so_far(rng);
433
+ }
434
+ }
435
+ used_so_far += iov[i].iov_len;
436
+ }
437
+ CHECK(snappy::RawUncompressToIOVec(
438
+ compressed.data(), compressed.size(), iov, num));
439
+ CHECK(!memcmp(buf, input.data(), input.size()));
440
+ delete[] iov;
441
+ delete[] buf;
442
+ }
443
+
444
+ // Test that data compressed by a compressor that does not
445
+ // obey block sizes is uncompressed properly.
446
+ static void VerifyNonBlockedCompression(const std::string& input) {
447
+ if (input.length() > snappy::kBlockSize) {
448
+ // We cannot test larger blocks than the maximum block size, obviously.
449
+ return;
450
+ }
451
+
452
+ std::string prefix;
453
+ Varint::Append32(&prefix, input.size());
454
+
455
+ // Setup compression table
456
+ snappy::internal::WorkingMemory wmem(input.size());
457
+ int table_size;
458
+ uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
459
+
460
+ // Compress entire input in one shot
461
+ std::string compressed;
462
+ compressed += prefix;
463
+ compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
464
+ char* dest = string_as_array(&compressed) + prefix.size();
465
+ char* end = snappy::internal::CompressFragment(input.data(), input.size(),
466
+ dest, table, table_size);
467
+ compressed.resize(end - compressed.data());
468
+
469
+ // Uncompress into std::string
470
+ std::string uncomp_str;
471
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
472
+ CHECK_EQ(uncomp_str, input);
473
+
474
+ // Uncompress using source/sink
475
+ std::string uncomp_str2;
476
+ uncomp_str2.resize(input.size());
477
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
478
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
479
+ CHECK(snappy::Uncompress(&source, &sink));
480
+ CHECK_EQ(uncomp_str2, input);
481
+
482
+ // Uncompress into iovec
483
+ {
484
+ static const int kNumBlocks = 10;
485
+ struct iovec vec[kNumBlocks];
486
+ const int block_size = 1 + input.size() / kNumBlocks;
487
+ std::string iovec_data(block_size * kNumBlocks, 'x');
488
+ for (int i = 0; i < kNumBlocks; ++i) {
489
+ vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
490
+ vec[i].iov_len = block_size;
491
+ }
492
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
493
+ vec, kNumBlocks));
494
+ CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
495
+ }
496
+ }
497
+
498
+ // Expand the input so that it is at least K times as big as block size
499
+ static std::string Expand(const std::string& input) {
500
+ static const int K = 3;
501
+ std::string data = input;
502
+ while (data.size() < K * snappy::kBlockSize) {
503
+ data += input;
504
+ }
505
+ return data;
506
+ }
507
+
508
+ static int Verify(const std::string& input) {
509
+ VLOG(1) << "Verifying input of size " << input.size();
510
+
511
+ // Compress using string based routines
512
+ const int result = VerifyString(input);
513
+
514
+ // Verify using sink based routines
515
+ VerifyStringSink(input);
516
+
517
+ VerifyNonBlockedCompression(input);
518
+ VerifyIOVec(input);
519
+ if (!input.empty()) {
520
+ const std::string expanded = Expand(input);
521
+ VerifyNonBlockedCompression(expanded);
522
+ VerifyIOVec(input);
523
+ }
524
+
525
+ return result;
526
+ }
527
+
528
+ static bool IsValidCompressedBuffer(const std::string& c) {
529
+ return snappy::IsValidCompressedBuffer(c.data(), c.size());
530
+ }
531
+ static bool Uncompress(const std::string& c, std::string* u) {
532
+ return snappy::Uncompress(c.data(), c.size(), u);
533
+ }
534
+
535
+ // This test checks to ensure that snappy doesn't coredump if it gets
536
+ // corrupted data.
537
+ TEST(CorruptedTest, VerifyCorrupted) {
538
+ std::string source = "making sure we don't crash with corrupted input";
539
+ VLOG(1) << source;
540
+ std::string dest;
541
+ std::string uncmp;
542
+ snappy::Compress(source.data(), source.size(), &dest);
543
+
544
+ // Mess around with the data. It's hard to simulate all possible
545
+ // corruptions; this is just one example ...
546
+ CHECK_GT(dest.size(), 3);
547
+ dest[1]--;
548
+ dest[3]++;
549
+ // this really ought to fail.
550
+ CHECK(!IsValidCompressedBuffer(dest));
551
+ CHECK(!Uncompress(dest, &uncmp));
552
+
553
+ // This is testing for a security bug - a buffer that decompresses to 100k
554
+ // but we lie in the snappy header and only reserve 0 bytes of memory :)
555
+ source.resize(100000);
556
+ for (char& source_char : source) {
557
+ source_char = 'A';
558
+ }
559
+ snappy::Compress(source.data(), source.size(), &dest);
560
+ dest[0] = dest[1] = dest[2] = dest[3] = 0;
561
+ CHECK(!IsValidCompressedBuffer(dest));
562
+ CHECK(!Uncompress(dest, &uncmp));
563
+
564
+ if (sizeof(void *) == 4) {
565
+ // Another security check; check a crazy big length can't DoS us with an
566
+ // over-allocation.
567
+ // Currently this is done only for 32-bit builds. On 64-bit builds,
568
+ // where 3 GB might be an acceptable allocation size, Uncompress()
569
+ // attempts to decompress, and sometimes causes the test to run out of
570
+ // memory.
571
+ dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
572
+ // This decodes to a really large size, i.e., about 3 GB.
573
+ dest[4] = 'k';
574
+ CHECK(!IsValidCompressedBuffer(dest));
575
+ CHECK(!Uncompress(dest, &uncmp));
576
+ } else {
577
+ LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
578
+ }
579
+
580
+ // This decodes to about 2 MB; much smaller, but should still fail.
581
+ dest[0] = dest[1] = dest[2] = '\xff';
582
+ dest[3] = 0x00;
583
+ CHECK(!IsValidCompressedBuffer(dest));
584
+ CHECK(!Uncompress(dest, &uncmp));
585
+
586
+ // try reading stuff in from a bad file.
587
+ for (int i = 1; i <= 3; ++i) {
588
+ std::string data =
589
+ ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
590
+ std::string uncmp;
591
+ // check that we don't return a crazy length
592
+ size_t ulen;
593
+ CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
594
+ || (ulen < (1<<20)));
595
+ uint32_t ulen2;
596
+ snappy::ByteArraySource source(data.data(), data.size());
597
+ CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
598
+ (ulen2 < (1<<20)));
599
+ CHECK(!IsValidCompressedBuffer(data));
600
+ CHECK(!Uncompress(data, &uncmp));
601
+ }
602
+ }
603
+
604
+ // Helper routines to construct arbitrary compressed strings.
605
+ // These mirror the compression code in snappy.cc, but are copied
606
+ // here so that we can bypass some limitations in the how snappy.cc
607
+ // invokes these routines.
608
+ static void AppendLiteral(std::string* dst, const std::string& literal) {
609
+ if (literal.empty()) return;
610
+ int n = literal.size() - 1;
611
+ if (n < 60) {
612
+ // Fit length in tag byte
613
+ dst->push_back(0 | (n << 2));
614
+ } else {
615
+ // Encode in upcoming bytes
616
+ char number[4];
617
+ int count = 0;
618
+ while (n > 0) {
619
+ number[count++] = n & 0xff;
620
+ n >>= 8;
621
+ }
622
+ dst->push_back(0 | ((59+count) << 2));
623
+ *dst += std::string(number, count);
624
+ }
625
+ *dst += literal;
626
+ }
627
+
628
+ static void AppendCopy(std::string* dst, int offset, int length) {
629
+ while (length > 0) {
630
+ // Figure out how much to copy in one shot
631
+ int to_copy;
632
+ if (length >= 68) {
633
+ to_copy = 64;
634
+ } else if (length > 64) {
635
+ to_copy = 60;
636
+ } else {
637
+ to_copy = length;
638
+ }
639
+ length -= to_copy;
640
+
641
+ if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
642
+ assert(to_copy-4 < 8); // Must fit in 3 bits
643
+ dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
644
+ dst->push_back(offset & 0xff);
645
+ } else if (offset < 65536) {
646
+ dst->push_back(2 | ((to_copy-1) << 2));
647
+ dst->push_back(offset & 0xff);
648
+ dst->push_back(offset >> 8);
649
+ } else {
650
+ dst->push_back(3 | ((to_copy-1) << 2));
651
+ dst->push_back(offset & 0xff);
652
+ dst->push_back((offset >> 8) & 0xff);
653
+ dst->push_back((offset >> 16) & 0xff);
654
+ dst->push_back((offset >> 24) & 0xff);
655
+ }
656
+ }
657
+ }
658
+
659
+ TEST(Snappy, SimpleTests) {
660
+ Verify("");
661
+ Verify("a");
662
+ Verify("ab");
663
+ Verify("abc");
664
+
665
+ Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
666
+ Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
667
+ Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
668
+ Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
669
+ Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
670
+ }
671
+
672
+ // Verify max blowup (lots of four-byte copies)
673
+ TEST(Snappy, MaxBlowup) {
674
+ std::mt19937 rng;
675
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
676
+ std::string input;
677
+ for (int i = 0; i < 80000; ++i)
678
+ input.push_back(static_cast<char>(uniform_byte(rng)));
679
+
680
+ for (int i = 0; i < 80000; i += 4) {
681
+ std::string four_bytes(input.end() - i - 4, input.end() - i);
682
+ input.append(four_bytes);
683
+ }
684
+ Verify(input);
685
+ }
686
+
687
+ TEST(Snappy, RandomData) {
688
+ std::minstd_rand0 rng(FLAGS_test_random_seed);
689
+ std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
690
+ std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
691
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
692
+ std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
693
+ std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
694
+ std::bernoulli_distribution one_in_ten(1.0 / 10);
695
+
696
+ constexpr int num_ops = 20000;
697
+ for (int i = 0; i < num_ops; ++i) {
698
+ if ((i % 1000) == 0) {
699
+ VLOG(0) << "Random op " << i << " of " << num_ops;
700
+ }
701
+
702
+ std::string x;
703
+ size_t len = uniform_4k(rng);
704
+ if (i < 100) {
705
+ len = 65536 + uniform_64k(rng);
706
+ }
707
+ while (x.size() < len) {
708
+ int run_len = 1;
709
+ if (one_in_ten(rng)) {
710
+ int skewed_bits = uniform_0_to_8(rng);
711
+ // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
712
+ std::uniform_int_distribution<int> skewed_low(0,
713
+ (1 << skewed_bits) - 1);
714
+ run_len = skewed_low(rng);
715
+ }
716
+ char c = static_cast<char>(uniform_byte(rng));
717
+ if (i >= 100) {
718
+ int skewed_bits = uniform_0_to_3(rng);
719
+ // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
720
+ std::uniform_int_distribution<int> skewed_low(0,
721
+ (1 << skewed_bits) - 1);
722
+ c = static_cast<char>(skewed_low(rng));
723
+ }
724
+ while (run_len-- > 0 && x.size() < len) {
725
+ x.push_back(c);
726
+ }
727
+ }
728
+
729
+ Verify(x);
730
+ }
731
+ }
732
+
733
+ TEST(Snappy, FourByteOffset) {
734
+ // The new compressor cannot generate four-byte offsets since
735
+ // it chops up the input into 32KB pieces. So we hand-emit the
736
+ // copy manually.
737
+
738
+ // The two fragments that make up the input string.
739
+ std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
740
+ std::string fragment2 = "some other string";
741
+
742
+ // How many times each fragment is emitted.
743
+ const int n1 = 2;
744
+ const int n2 = 100000 / fragment2.size();
745
+ const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
746
+
747
+ std::string compressed;
748
+ Varint::Append32(&compressed, length);
749
+
750
+ AppendLiteral(&compressed, fragment1);
751
+ std::string src = fragment1;
752
+ for (int i = 0; i < n2; ++i) {
753
+ AppendLiteral(&compressed, fragment2);
754
+ src += fragment2;
755
+ }
756
+ AppendCopy(&compressed, src.size(), fragment1.size());
757
+ src += fragment1;
758
+ CHECK_EQ(length, src.size());
759
+
760
+ std::string uncompressed;
761
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
762
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
763
+ &uncompressed));
764
+ CHECK_EQ(uncompressed, src);
765
+ }
766
+
767
+ TEST(Snappy, IOVecEdgeCases) {
768
+ // Test some tricky edge cases in the iovec output that are not necessarily
769
+ // exercised by random tests.
770
+
771
+ // Our output blocks look like this initially (the last iovec is bigger
772
+ // than depicted):
773
+ // [ ] [ ] [ ] [ ] [ ]
774
+ static const int kLengths[] = { 2, 1, 4, 8, 128 };
775
+
776
+ struct iovec iov[ARRAYSIZE(kLengths)];
777
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
778
+ iov[i].iov_base = new char[kLengths[i]];
779
+ iov[i].iov_len = kLengths[i];
780
+ }
781
+
782
+ std::string compressed;
783
+ Varint::Append32(&compressed, 22);
784
+
785
+ // A literal whose output crosses three blocks.
786
+ // [ab] [c] [123 ] [ ] [ ]
787
+ AppendLiteral(&compressed, "abc123");
788
+
789
+ // A copy whose output crosses two blocks (source and destination
790
+ // segments marked).
791
+ // [ab] [c] [1231] [23 ] [ ]
792
+ // ^--^ --
793
+ AppendCopy(&compressed, 3, 3);
794
+
795
+ // A copy where the input is, at first, in the block before the output:
796
+ //
797
+ // [ab] [c] [1231] [231231 ] [ ]
798
+ // ^--- ^---
799
+ // Then during the copy, the pointers move such that the input and
800
+ // output pointers are in the same block:
801
+ //
802
+ // [ab] [c] [1231] [23123123] [ ]
803
+ // ^- ^-
804
+ // And then they move again, so that the output pointer is no longer
805
+ // in the same block as the input pointer:
806
+ // [ab] [c] [1231] [23123123] [123 ]
807
+ // ^-- ^--
808
+ AppendCopy(&compressed, 6, 9);
809
+
810
+ // Finally, a copy where the input is from several blocks back,
811
+ // and it also crosses three blocks:
812
+ //
813
+ // [ab] [c] [1231] [23123123] [123b ]
814
+ // ^ ^
815
+ // [ab] [c] [1231] [23123123] [123bc ]
816
+ // ^ ^
817
+ // [ab] [c] [1231] [23123123] [123bc12 ]
818
+ // ^- ^-
819
+ AppendCopy(&compressed, 17, 4);
820
+
821
+ CHECK(snappy::RawUncompressToIOVec(
822
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
823
+ CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
824
+ CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
825
+ CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
826
+ CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
827
+ CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
828
+
829
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
830
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
831
+ }
832
+ }
833
+
834
+ TEST(Snappy, IOVecLiteralOverflow) {
835
+ static const int kLengths[] = { 3, 4 };
836
+
837
+ struct iovec iov[ARRAYSIZE(kLengths)];
838
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
839
+ iov[i].iov_base = new char[kLengths[i]];
840
+ iov[i].iov_len = kLengths[i];
841
+ }
842
+
843
+ std::string compressed;
844
+ Varint::Append32(&compressed, 8);
845
+
846
+ AppendLiteral(&compressed, "12345678");
847
+
848
+ CHECK(!snappy::RawUncompressToIOVec(
849
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
850
+
851
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
852
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
853
+ }
854
+ }
855
+
856
+ TEST(Snappy, IOVecCopyOverflow) {
857
+ static const int kLengths[] = { 3, 4 };
858
+
859
+ struct iovec iov[ARRAYSIZE(kLengths)];
860
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
861
+ iov[i].iov_base = new char[kLengths[i]];
862
+ iov[i].iov_len = kLengths[i];
863
+ }
864
+
865
+ std::string compressed;
866
+ Varint::Append32(&compressed, 8);
867
+
868
+ AppendLiteral(&compressed, "123");
869
+ AppendCopy(&compressed, 3, 5);
870
+
871
+ CHECK(!snappy::RawUncompressToIOVec(
872
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
873
+
874
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
875
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
876
+ }
877
+ }
878
+
879
+ static bool CheckUncompressedLength(const std::string& compressed,
880
+ size_t* ulength) {
881
+ const bool result1 = snappy::GetUncompressedLength(compressed.data(),
882
+ compressed.size(),
883
+ ulength);
884
+
885
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
886
+ uint32_t length;
887
+ const bool result2 = snappy::GetUncompressedLength(&source, &length);
888
+ CHECK_EQ(result1, result2);
889
+ return result1;
890
+ }
891
+
892
+ TEST(SnappyCorruption, TruncatedVarint) {
893
+ std::string compressed, uncompressed;
894
+ size_t ulength;
895
+ compressed.push_back('\xf0');
896
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
897
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
898
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
899
+ &uncompressed));
900
+ }
901
+
902
+ TEST(SnappyCorruption, UnterminatedVarint) {
903
+ std::string compressed, uncompressed;
904
+ size_t ulength;
905
+ compressed.push_back('\x80');
906
+ compressed.push_back('\x80');
907
+ compressed.push_back('\x80');
908
+ compressed.push_back('\x80');
909
+ compressed.push_back('\x80');
910
+ compressed.push_back(10);
911
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
912
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
913
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
914
+ &uncompressed));
915
+ }
916
+
917
+ TEST(SnappyCorruption, OverflowingVarint) {
918
+ std::string compressed, uncompressed;
919
+ size_t ulength;
920
+ compressed.push_back('\xfb');
921
+ compressed.push_back('\xff');
922
+ compressed.push_back('\xff');
923
+ compressed.push_back('\xff');
924
+ compressed.push_back('\x7f');
925
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
926
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
927
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
928
+ &uncompressed));
929
+ }
930
+
931
+ TEST(Snappy, ReadPastEndOfBuffer) {
932
+ // Check that we do not read past end of input
933
+
934
+ // Make a compressed string that ends with a single-byte literal
935
+ std::string compressed;
936
+ Varint::Append32(&compressed, 1);
937
+ AppendLiteral(&compressed, "x");
938
+
939
+ std::string uncompressed;
940
+ DataEndingAtUnreadablePage c(compressed);
941
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
942
+ CHECK_EQ(uncompressed, std::string("x"));
943
+ }
944
+
945
+ // Check for an infinite loop caused by a copy with offset==0
946
+ TEST(Snappy, ZeroOffsetCopy) {
947
+ const char* compressed = "\x40\x12\x00\x00";
948
+ // \x40 Length (must be > kMaxIncrementCopyOverflow)
949
+ // \x12\x00\x00 Copy with offset==0, length==5
950
+ char uncompressed[100];
951
+ EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
952
+ }
953
+
954
+ TEST(Snappy, ZeroOffsetCopyValidation) {
955
+ const char* compressed = "\x05\x12\x00\x00";
956
+ // \x05 Length
957
+ // \x12\x00\x00 Copy with offset==0, length==5
958
+ EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
959
+ }
960
+
961
+ namespace {
962
+
963
+ int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
964
+ uint64_t data;
965
+ std::pair<size_t, bool> p =
966
+ snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
967
+ CHECK_EQ(p.first < 8, p.second);
968
+ return p.first;
969
+ }
970
+
971
+ } // namespace
972
+
973
+ TEST(Snappy, FindMatchLength) {
974
+ // Exercise all different code paths through the function.
975
+ // 64-bit version:
976
+
977
+ // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
978
+ EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
979
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
980
+
981
+ // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
982
+ EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
983
+
984
+ // Same, but edge cases.
985
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
986
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
987
+
988
+ // Find non-match at once in first loop.
989
+ EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
990
+ EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
991
+ EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
992
+ EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
993
+
994
+ // Find non-match in first loop after one block.
995
+ EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
996
+ "abcdefgh?1234567xxxxxxxx", 24));
997
+ EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
998
+ "abcdefgh0?234567xxxxxxxx", 24));
999
+ EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1000
+ "abcdefgh01237654xxxxxxxx", 24));
1001
+ EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1002
+ "abcdefgh0123456?xxxxxxxx", 24));
1003
+
1004
+ // 32-bit version:
1005
+
1006
+ // Short matches.
1007
+ EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
1008
+ EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
1009
+ EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
1010
+ EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
1011
+ EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
1012
+ EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
1013
+ EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
1014
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
1015
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
1016
+ EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
1017
+
1018
+ // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
1019
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
1020
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
1021
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
1022
+
1023
+ // Same, but edge cases.
1024
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1025
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1026
+
1027
+ // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1028
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1029
+
1030
+ // Find non-match at once in first loop.
1031
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1032
+ "xxxxxx?123xxxxxxxx", 18));
1033
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1034
+ "xxxxxx0?23xxxxxxxx", 18));
1035
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1036
+ "xxxxxx0132xxxxxxxx", 18));
1037
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1038
+ "xxxxxx012?xxxxxxxx", 18));
1039
+
1040
+ // Same, but edge cases.
1041
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1042
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1043
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1044
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1045
+
1046
+ // Find non-match in first loop after one block.
1047
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1048
+ "xxxxxxabcd?123xx", 16));
1049
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1050
+ "xxxxxxabcd0?23xx", 16));
1051
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1052
+ "xxxxxxabcd0132xx", 16));
1053
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1054
+ "xxxxxxabcd012?xx", 16));
1055
+
1056
+ // Same, but edge cases.
1057
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1058
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1059
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1060
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1061
+ }
1062
+
1063
+ TEST(Snappy, FindMatchLengthRandom) {
1064
+ constexpr int kNumTrials = 10000;
1065
+ constexpr int kTypicalLength = 10;
1066
+ std::minstd_rand0 rng(FLAGS_test_random_seed);
1067
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
1068
+ std::bernoulli_distribution one_in_two(1.0 / 2);
1069
+ std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
1070
+
1071
+ for (int i = 0; i < kNumTrials; ++i) {
1072
+ std::string s, t;
1073
+ char a = static_cast<char>(uniform_byte(rng));
1074
+ char b = static_cast<char>(uniform_byte(rng));
1075
+ while (!one_in_typical_length(rng)) {
1076
+ s.push_back(one_in_two(rng) ? a : b);
1077
+ t.push_back(one_in_two(rng) ? a : b);
1078
+ }
1079
+ DataEndingAtUnreadablePage u(s);
1080
+ DataEndingAtUnreadablePage v(t);
1081
+ size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
1082
+ if (matched == t.size()) {
1083
+ EXPECT_EQ(s, t);
1084
+ } else {
1085
+ EXPECT_NE(s[matched], t[matched]);
1086
+ for (size_t j = 0; j < matched; ++j) {
1087
+ EXPECT_EQ(s[j], t[j]);
1088
+ }
1089
+ }
1090
+ }
1091
+ }
1092
+
1093
+ static uint16_t MakeEntry(unsigned int extra,
1094
+ unsigned int len,
1095
+ unsigned int copy_offset) {
1096
+ // Check that all of the fields fit within the allocated space
1097
+ assert(extra == (extra & 0x7)); // At most 3 bits
1098
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
1099
+ assert(len == (len & 0x7f)); // At most 7 bits
1100
+ return len | (copy_offset << 8) | (extra << 11);
1101
+ }
1102
+
1103
+ // Check that the decompression table is correct, and optionally print out
1104
+ // the computed one.
1105
+ TEST(Snappy, VerifyCharTable) {
1106
+ using snappy::internal::LITERAL;
1107
+ using snappy::internal::COPY_1_BYTE_OFFSET;
1108
+ using snappy::internal::COPY_2_BYTE_OFFSET;
1109
+ using snappy::internal::COPY_4_BYTE_OFFSET;
1110
+ using snappy::internal::char_table;
1111
+
1112
+ uint16_t dst[256];
1113
+
1114
+ // Place invalid entries in all places to detect missing initialization
1115
+ int assigned = 0;
1116
+ for (int i = 0; i < 256; ++i) {
1117
+ dst[i] = 0xffff;
1118
+ }
1119
+
1120
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
1121
+ for (uint8_t len = 1; len <= 60; ++len) {
1122
+ dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
1123
+ assigned++;
1124
+ }
1125
+
1126
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
1127
+ // encode the number of bytes of length info that follow the opcode.
1128
+ for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
1129
+ // We set the length field in the lookup table to 1 because extra
1130
+ // bytes encode len-1.
1131
+ dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1132
+ assigned++;
1133
+ }
1134
+
1135
+ // COPY_1_BYTE_OFFSET.
1136
+ //
1137
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
1138
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
1139
+ //
1140
+ // This format is used for length in range [4..11] and offset in
1141
+ // range [0..2047]
1142
+ for (uint8_t len = 4; len < 12; ++len) {
1143
+ for (uint16_t offset = 0; offset < 2048; offset += 256) {
1144
+ uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
1145
+ dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
1146
+ MakeEntry(1, len, offset_high);
1147
+ assigned++;
1148
+ }
1149
+ }
1150
+
1151
+ // COPY_2_BYTE_OFFSET.
1152
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1153
+ for (uint8_t len = 1; len <= 64; ++len) {
1154
+ dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
1155
+ assigned++;
1156
+ }
1157
+
1158
+ // COPY_4_BYTE_OFFSET.
1159
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1160
+ for (uint8_t len = 1; len <= 64; ++len) {
1161
+ dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
1162
+ assigned++;
1163
+ }
1164
+
1165
+ // Check that each entry was initialized exactly once.
1166
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1167
+ for (int i = 0; i < 256; ++i) {
1168
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1169
+ }
1170
+
1171
+ if (FLAGS_snappy_dump_decompression_table) {
1172
+ std::printf("static const uint16_t char_table[256] = {\n ");
1173
+ for (int i = 0; i < 256; ++i) {
1174
+ std::printf("0x%04x%s",
1175
+ dst[i],
1176
+ ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
1177
+ }
1178
+ std::printf("};\n");
1179
+ }
1180
+
1181
+ // Check that computed table matched recorded table.
1182
+ for (int i = 0; i < 256; ++i) {
1183
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1184
+ }
1185
+ }
1186
+
1187
+ static void CompressFile(const char* fname) {
1188
+ std::string fullinput;
1189
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1190
+
1191
+ std::string compressed;
1192
+ Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1193
+
1194
+ CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
1195
+ file::Defaults()));
1196
+ }
1197
+
1198
+ static void UncompressFile(const char* fname) {
1199
+ std::string fullinput;
1200
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1201
+
1202
+ size_t uncompLength;
1203
+ CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1204
+
1205
+ std::string uncompressed;
1206
+ uncompressed.resize(uncompLength);
1207
+ CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1208
+
1209
+ CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
1210
+ file::Defaults()));
1211
+ }
1212
+
1213
+ static void MeasureFile(const char* fname) {
1214
+ std::string fullinput;
1215
+ CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1216
+ std::printf("%-40s :\n", fname);
1217
+
1218
+ int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1219
+ int end_len = fullinput.size();
1220
+ if (FLAGS_end_len >= 0) {
1221
+ end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1222
+ }
1223
+ for (int len = start_len; len <= end_len; ++len) {
1224
+ const char* const input = fullinput.data();
1225
+ int repeats = (FLAGS_bytes + len) / (len + 1);
1226
+ if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1227
+ if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1228
+ if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1229
+
1230
+ // For block-size based measurements
1231
+ if (0 && FLAGS_snappy) {
1232
+ Measure(input, len, SNAPPY, repeats, 8<<10);
1233
+ Measure(input, len, SNAPPY, repeats, 16<<10);
1234
+ Measure(input, len, SNAPPY, repeats, 32<<10);
1235
+ Measure(input, len, SNAPPY, repeats, 64<<10);
1236
+ Measure(input, len, SNAPPY, repeats, 256<<10);
1237
+ Measure(input, len, SNAPPY, repeats, 1024<<10);
1238
+ }
1239
+ }
1240
+ }
1241
+
1242
+ static struct {
1243
+ const char* label;
1244
+ const char* filename;
1245
+ size_t size_limit;
1246
+ } files[] = {
1247
+ { "html", "html", 0 },
1248
+ { "urls", "urls.10K", 0 },
1249
+ { "jpg", "fireworks.jpeg", 0 },
1250
+ { "jpg_200", "fireworks.jpeg", 200 },
1251
+ { "pdf", "paper-100k.pdf", 0 },
1252
+ { "html4", "html_x_4", 0 },
1253
+ { "txt1", "alice29.txt", 0 },
1254
+ { "txt2", "asyoulik.txt", 0 },
1255
+ { "txt3", "lcet10.txt", 0 },
1256
+ { "txt4", "plrabn12.txt", 0 },
1257
+ { "pb", "geo.protodata", 0 },
1258
+ { "gaviota", "kppkn.gtb", 0 },
1259
+ };
1260
+
1261
+ static void BM_UFlat(int iters, int arg) {
1262
+ StopBenchmarkTiming();
1263
+
1264
+ // Pick file to process based on "arg"
1265
+ CHECK_GE(arg, 0);
1266
+ CHECK_LT(arg, ARRAYSIZE(files));
1267
+ std::string contents =
1268
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1269
+
1270
+ std::string zcontents;
1271
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1272
+ char* dst = new char[contents.size()];
1273
+
1274
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
1275
+ static_cast<int64_t>(contents.size()));
1276
+ SetBenchmarkLabel(files[arg].label);
1277
+ StartBenchmarkTiming();
1278
+ while (iters-- > 0) {
1279
+ CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1280
+ }
1281
+ StopBenchmarkTiming();
1282
+
1283
+ delete[] dst;
1284
+ }
1285
+ BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1286
+
1287
+ static void BM_UValidate(int iters, int arg) {
1288
+ StopBenchmarkTiming();
1289
+
1290
+ // Pick file to process based on "arg"
1291
+ CHECK_GE(arg, 0);
1292
+ CHECK_LT(arg, ARRAYSIZE(files));
1293
+ std::string contents =
1294
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1295
+
1296
+ std::string zcontents;
1297
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1298
+
1299
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
1300
+ static_cast<int64_t>(contents.size()));
1301
+ SetBenchmarkLabel(files[arg].label);
1302
+ StartBenchmarkTiming();
1303
+ while (iters-- > 0) {
1304
+ CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1305
+ }
1306
+ StopBenchmarkTiming();
1307
+ }
1308
+ BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1309
+
1310
+ static void BM_UIOVec(int iters, int arg) {
1311
+ StopBenchmarkTiming();
1312
+
1313
+ // Pick file to process based on "arg"
1314
+ CHECK_GE(arg, 0);
1315
+ CHECK_LT(arg, ARRAYSIZE(files));
1316
+ std::string contents =
1317
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1318
+
1319
+ std::string zcontents;
1320
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1321
+
1322
+ // Uncompress into an iovec containing ten entries.
1323
+ const int kNumEntries = 10;
1324
+ struct iovec iov[kNumEntries];
1325
+ char *dst = new char[contents.size()];
1326
+ size_t used_so_far = 0;
1327
+ for (int i = 0; i < kNumEntries; ++i) {
1328
+ iov[i].iov_base = dst + used_so_far;
1329
+ if (used_so_far == contents.size()) {
1330
+ iov[i].iov_len = 0;
1331
+ continue;
1332
+ }
1333
+
1334
+ if (i == kNumEntries - 1) {
1335
+ iov[i].iov_len = contents.size() - used_so_far;
1336
+ } else {
1337
+ iov[i].iov_len = contents.size() / kNumEntries;
1338
+ }
1339
+ used_so_far += iov[i].iov_len;
1340
+ }
1341
+
1342
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
1343
+ static_cast<int64_t>(contents.size()));
1344
+ SetBenchmarkLabel(files[arg].label);
1345
+ StartBenchmarkTiming();
1346
+ while (iters-- > 0) {
1347
+ CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1348
+ kNumEntries));
1349
+ }
1350
+ StopBenchmarkTiming();
1351
+
1352
+ delete[] dst;
1353
+ }
1354
+ BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1355
+
1356
+ static void BM_UFlatSink(int iters, int arg) {
1357
+ StopBenchmarkTiming();
1358
+
1359
+ // Pick file to process based on "arg"
1360
+ CHECK_GE(arg, 0);
1361
+ CHECK_LT(arg, ARRAYSIZE(files));
1362
+ std::string contents =
1363
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1364
+
1365
+ std::string zcontents;
1366
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
1367
+ char* dst = new char[contents.size()];
1368
+
1369
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
1370
+ static_cast<int64_t>(contents.size()));
1371
+ SetBenchmarkLabel(files[arg].label);
1372
+ StartBenchmarkTiming();
1373
+ while (iters-- > 0) {
1374
+ snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1375
+ snappy::UncheckedByteArraySink sink(dst);
1376
+ CHECK(snappy::Uncompress(&source, &sink));
1377
+ }
1378
+ StopBenchmarkTiming();
1379
+
1380
+ std::string s(dst, contents.size());
1381
+ CHECK_EQ(contents, s);
1382
+
1383
+ delete[] dst;
1384
+ }
1385
+
1386
+ BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1387
+
1388
+ static void BM_ZFlat(int iters, int arg) {
1389
+ StopBenchmarkTiming();
1390
+
1391
+ // Pick file to process based on "arg"
1392
+ CHECK_GE(arg, 0);
1393
+ CHECK_LT(arg, ARRAYSIZE(files));
1394
+ std::string contents =
1395
+ ReadTestDataFile(files[arg].filename, files[arg].size_limit);
1396
+
1397
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1398
+
1399
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) *
1400
+ static_cast<int64_t>(contents.size()));
1401
+ StartBenchmarkTiming();
1402
+
1403
+ size_t zsize = 0;
1404
+ while (iters-- > 0) {
1405
+ snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1406
+ }
1407
+ StopBenchmarkTiming();
1408
+ const double compression_ratio =
1409
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1410
+ SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label,
1411
+ 100.0 * compression_ratio));
1412
+ VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes",
1413
+ files[arg].label, static_cast<int>(contents.size()),
1414
+ static_cast<int>(zsize));
1415
+ delete[] dst;
1416
+ }
1417
+ BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1418
+
1419
+ static void BM_ZFlatAll(int iters, int arg) {
1420
+ StopBenchmarkTiming();
1421
+
1422
+ CHECK_EQ(arg, 0);
1423
+ const int num_files = ARRAYSIZE(files);
1424
+
1425
+ std::vector<std::string> contents(num_files);
1426
+ std::vector<char*> dst(num_files);
1427
+
1428
+ int64_t total_contents_size = 0;
1429
+ for (int i = 0; i < num_files; ++i) {
1430
+ contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit);
1431
+ dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
1432
+ total_contents_size += contents[i].size();
1433
+ }
1434
+
1435
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
1436
+ StartBenchmarkTiming();
1437
+
1438
+ size_t zsize = 0;
1439
+ while (iters-- > 0) {
1440
+ for (int i = 0; i < num_files; ++i) {
1441
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1442
+ &zsize);
1443
+ }
1444
+ }
1445
+ StopBenchmarkTiming();
1446
+
1447
+ for (char* dst_item : dst) {
1448
+ delete[] dst_item;
1449
+ }
1450
+ SetBenchmarkLabel(StrFormat("%d files", num_files));
1451
+ }
1452
+ BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0);
1453
+
1454
+ static void BM_ZFlatIncreasingTableSize(int iters, int arg) {
1455
+ StopBenchmarkTiming();
1456
+
1457
+ CHECK_EQ(arg, 0);
1458
+ CHECK_GT(ARRAYSIZE(files), 0);
1459
+ const std::string base_content =
1460
+ ReadTestDataFile(files[0].filename, files[0].size_limit);
1461
+
1462
+ std::vector<std::string> contents;
1463
+ std::vector<char*> dst;
1464
+ int64_t total_contents_size = 0;
1465
+ for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
1466
+ ++table_bits) {
1467
+ std::string content = base_content;
1468
+ content.resize(1 << table_bits);
1469
+ dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
1470
+ total_contents_size += content.size();
1471
+ contents.push_back(std::move(content));
1472
+ }
1473
+
1474
+ size_t zsize = 0;
1475
+ SetBenchmarkBytesProcessed(static_cast<int64_t>(iters) * total_contents_size);
1476
+ StartBenchmarkTiming();
1477
+ while (iters-- > 0) {
1478
+ for (size_t i = 0; i < contents.size(); ++i) {
1479
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
1480
+ &zsize);
1481
+ }
1482
+ }
1483
+ StopBenchmarkTiming();
1484
+
1485
+ for (char* dst_item : dst) {
1486
+ delete[] dst_item;
1487
+ }
1488
+ SetBenchmarkLabel(StrFormat("%zd tables", contents.size()));
1489
+ }
1490
+ BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0);
1491
+
1492
+ } // namespace snappy
1493
+
1494
+ int main(int argc, char** argv) {
1495
+ InitGoogle(argv[0], &argc, &argv, true);
1496
+ RunSpecifiedBenchmarks();
1497
+
1498
+ if (argc >= 2) {
1499
+ for (int arg = 1; arg < argc; ++arg) {
1500
+ if (FLAGS_write_compressed) {
1501
+ snappy::CompressFile(argv[arg]);
1502
+ } else if (FLAGS_write_uncompressed) {
1503
+ snappy::UncompressFile(argv[arg]);
1504
+ } else {
1505
+ snappy::MeasureFile(argv[arg]);
1506
+ }
1507
+ }
1508
+ return 0;
1509
+ }
1510
+
1511
+ return RUN_ALL_TESTS();
1512
+ }