snappy 0.0.17 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +2 -0
  3. data/.github/workflows/main.yml +34 -0
  4. data/.github/workflows/publish.yml +34 -0
  5. data/.gitignore +2 -1
  6. data/.gitmodules +1 -1
  7. data/Dockerfile +13 -0
  8. data/Gemfile +4 -0
  9. data/README.md +45 -5
  10. data/Rakefile +32 -29
  11. data/ext/api.c +6 -1
  12. data/ext/extconf.rb +31 -22
  13. data/lib/snappy/hadoop/reader.rb +62 -0
  14. data/lib/snappy/hadoop/writer.rb +51 -0
  15. data/lib/snappy/hadoop.rb +22 -0
  16. data/lib/snappy/reader.rb +14 -10
  17. data/lib/snappy/shim.rb +1 -1
  18. data/lib/snappy/version.rb +1 -1
  19. data/lib/snappy.rb +5 -4
  20. data/snappy.gemspec +14 -13
  21. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  22. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  23. data/test/snappy_hadoop_test.rb +26 -0
  24. data/test/snappy_reader_test.rb +148 -0
  25. data/test/snappy_test.rb +95 -0
  26. data/test/snappy_writer_test.rb +55 -0
  27. data/test/test_helper.rb +7 -0
  28. data/test.sh +3 -0
  29. data/vendor/snappy/CMakeLists.txt +420 -0
  30. data/vendor/snappy/CONTRIBUTING.md +31 -0
  31. data/vendor/snappy/NEWS +52 -0
  32. data/vendor/snappy/{README → README.md} +75 -49
  33. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  34. data/vendor/snappy/cmake/config.h.in +66 -0
  35. data/vendor/snappy/docs/README.md +72 -0
  36. data/vendor/snappy/snappy-internal.h +200 -32
  37. data/vendor/snappy/snappy-sinksource.cc +26 -9
  38. data/vendor/snappy/snappy-sinksource.h +11 -11
  39. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  40. data/vendor/snappy/snappy-stubs-internal.h +299 -302
  41. data/vendor/snappy/snappy-stubs-public.h.in +10 -47
  42. data/vendor/snappy/snappy-test.cc +94 -200
  43. data/vendor/snappy/snappy-test.h +101 -358
  44. data/vendor/snappy/snappy.cc +1437 -474
  45. data/vendor/snappy/snappy.h +31 -12
  46. data/vendor/snappy/snappy_benchmark.cc +378 -0
  47. data/vendor/snappy/snappy_compress_fuzzer.cc +60 -0
  48. data/vendor/snappy/snappy_test_data.cc +57 -0
  49. data/vendor/snappy/snappy_test_data.h +68 -0
  50. data/vendor/snappy/snappy_test_tool.cc +471 -0
  51. data/vendor/snappy/snappy_uncompress_fuzzer.cc +58 -0
  52. data/vendor/snappy/snappy_unittest.cc +271 -792
  53. metadata +42 -92
  54. data/.travis.yml +0 -26
  55. data/smoke.sh +0 -8
  56. data/test/test-snappy-reader.rb +0 -129
  57. data/test/test-snappy-writer.rb +0 -55
  58. data/test/test-snappy.rb +0 -58
  59. data/vendor/snappy/ChangeLog +0 -2468
  60. data/vendor/snappy/INSTALL +0 -370
  61. data/vendor/snappy/Makefile +0 -982
  62. data/vendor/snappy/Makefile.am +0 -26
  63. data/vendor/snappy/Makefile.in +0 -982
  64. data/vendor/snappy/aclocal.m4 +0 -9738
  65. data/vendor/snappy/autogen.sh +0 -12
  66. data/vendor/snappy/autom4te.cache/output.0 +0 -18856
  67. data/vendor/snappy/autom4te.cache/output.1 +0 -18852
  68. data/vendor/snappy/autom4te.cache/requests +0 -297
  69. data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
  70. data/vendor/snappy/autom4te.cache/traces.1 +0 -714
  71. data/vendor/snappy/config.guess +0 -1530
  72. data/vendor/snappy/config.h +0 -135
  73. data/vendor/snappy/config.h.in +0 -134
  74. data/vendor/snappy/config.log +0 -1640
  75. data/vendor/snappy/config.status +0 -2318
  76. data/vendor/snappy/config.sub +0 -1773
  77. data/vendor/snappy/configure +0 -18852
  78. data/vendor/snappy/configure.ac +0 -134
  79. data/vendor/snappy/depcomp +0 -688
  80. data/vendor/snappy/install-sh +0 -527
  81. data/vendor/snappy/libtool +0 -10246
  82. data/vendor/snappy/ltmain.sh +0 -9661
  83. data/vendor/snappy/m4/gtest.m4 +0 -74
  84. data/vendor/snappy/m4/libtool.m4 +0 -8001
  85. data/vendor/snappy/m4/ltoptions.m4 +0 -384
  86. data/vendor/snappy/m4/ltsugar.m4 +0 -123
  87. data/vendor/snappy/m4/ltversion.m4 +0 -23
  88. data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
  89. data/vendor/snappy/missing +0 -331
  90. data/vendor/snappy/snappy-stubs-public.h +0 -100
  91. data/vendor/snappy/snappy.pc +0 -10
  92. data/vendor/snappy/snappy.pc.in +0 -10
  93. data/vendor/snappy/stamp-h1 +0 -1
@@ -40,6 +40,8 @@
40
40
  #define THIRD_PARTY_SNAPPY_SNAPPY_H__
41
41
 
42
42
  #include <stddef.h>
43
+ #include <stdint.h>
44
+
43
45
  #include <string>
44
46
 
45
47
  #include "snappy-stubs-public.h"
@@ -63,26 +65,34 @@ namespace snappy {
63
65
  // Also note that this leaves "*source" in a state that is unsuitable for
64
66
  // further operations, such as RawUncompress(). You will need to rewind
65
67
  // or recreate the source yourself before attempting any further calls.
66
- bool GetUncompressedLength(Source* source, uint32* result);
68
+ bool GetUncompressedLength(Source* source, uint32_t* result);
67
69
 
68
70
  // ------------------------------------------------------------------------
69
71
  // Higher-level string based routines (should be sufficient for most users)
70
72
  // ------------------------------------------------------------------------
71
73
 
72
- // Sets "*output" to the compressed version of "input[0,input_length-1]".
73
- // Original contents of *output are lost.
74
+ // Sets "*compressed" to the compressed version of "input[0..input_length-1]".
75
+ // Original contents of *compressed are lost.
74
76
  //
75
- // REQUIRES: "input[]" is not an alias of "*output".
76
- size_t Compress(const char* input, size_t input_length, string* output);
77
-
78
- // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
77
+ // REQUIRES: "input[]" is not an alias of "*compressed".
78
+ size_t Compress(const char* input, size_t input_length,
79
+ std::string* compressed);
80
+
81
+ // Same as `Compress` above but taking an `iovec` array as input. Note that
82
+ // this function preprocesses the inputs to compute the sum of
83
+ // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
84
+ // `RawCompressFromIOVec` below.
85
+ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
86
+ std::string* compressed);
87
+
88
+ // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
79
89
  // Original contents of "*uncompressed" are lost.
80
90
  //
81
91
  // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
82
92
  //
83
93
  // returns false if the message is corrupted and could not be decompressed
84
94
  bool Uncompress(const char* compressed, size_t compressed_length,
85
- string* uncompressed);
95
+ std::string* uncompressed);
86
96
 
87
97
  // Decompresses "compressed" to "*uncompressed".
88
98
  //
@@ -121,6 +131,12 @@ namespace snappy {
121
131
  char* compressed,
122
132
  size_t* compressed_length);
123
133
 
134
+ // Same as `RawCompress` above but taking an `iovec` array as input. Note that
135
+ // `uncompressed_length` is the total number of bytes to be read from the
136
+ // elements of `iov` (_not_ the number of elements in `iov`).
137
+ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
138
+ char* compressed, size_t* compressed_length);
139
+
124
140
  // Given data in "compressed[0..compressed_length-1]" generated by
125
141
  // calling the Snappy::Compress routine, this routine
126
142
  // stores the uncompressed data to
@@ -193,11 +209,14 @@ namespace snappy {
193
209
  // Note that there might be older data around that is compressed with larger
194
210
  // block sizes, so the decompression code should not rely on the
195
211
  // non-existence of long backreferences.
196
- static const int kBlockLog = 16;
197
- static const size_t kBlockSize = 1 << kBlockLog;
212
+ static constexpr int kBlockLog = 16;
213
+ static constexpr size_t kBlockSize = 1 << kBlockLog;
214
+
215
+ static constexpr int kMinHashTableBits = 8;
216
+ static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
198
217
 
199
- static const int kMaxHashTableBits = 14;
200
- static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
218
+ static constexpr int kMaxHashTableBits = 14;
219
+ static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
201
220
  } // end namespace snappy
202
221
 
203
222
  #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__
@@ -0,0 +1,378 @@
1
+ // Copyright 2020 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include <cstddef>
30
+ #include <cstdint>
31
+ #include <string>
32
+ #include <vector>
33
+
34
+ #include "snappy-test.h"
35
+
36
+ #include "benchmark/benchmark.h"
37
+
38
+ #include "snappy-internal.h"
39
+ #include "snappy-sinksource.h"
40
+ #include "snappy.h"
41
+ #include "snappy_test_data.h"
42
+
43
+ namespace snappy {
44
+
45
+ namespace {
46
+
47
+ void BM_UFlat(benchmark::State& state) {
48
+ // Pick file to process based on state.range(0).
49
+ int file_index = state.range(0);
50
+
51
+ CHECK_GE(file_index, 0);
52
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
53
+ std::string contents =
54
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
55
+ kTestDataFiles[file_index].size_limit);
56
+
57
+ std::string zcontents;
58
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
59
+ char* dst = new char[contents.size()];
60
+
61
+ for (auto s : state) {
62
+ CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
63
+ benchmark::DoNotOptimize(dst);
64
+ }
65
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
66
+ static_cast<int64_t>(contents.size()));
67
+ state.SetLabel(kTestDataFiles[file_index].label);
68
+
69
+ delete[] dst;
70
+ }
71
+ BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
72
+
73
+ struct SourceFiles {
74
+ SourceFiles() {
75
+ for (int i = 0; i < kFiles; i++) {
76
+ std::string contents = ReadTestDataFile(kTestDataFiles[i].filename,
77
+ kTestDataFiles[i].size_limit);
78
+ max_size = std::max(max_size, contents.size());
79
+ sizes[i] = contents.size();
80
+ snappy::Compress(contents.data(), contents.size(), &zcontents[i]);
81
+ }
82
+ }
83
+ static constexpr int kFiles = ARRAYSIZE(kTestDataFiles);
84
+ std::string zcontents[kFiles];
85
+ size_t sizes[kFiles];
86
+ size_t max_size = 0;
87
+ };
88
+
89
+ void BM_UFlatMedley(benchmark::State& state) {
90
+ static const SourceFiles* const source = new SourceFiles();
91
+
92
+ std::vector<char> dst(source->max_size);
93
+
94
+ for (auto s : state) {
95
+ for (int i = 0; i < SourceFiles::kFiles; i++) {
96
+ CHECK(snappy::RawUncompress(source->zcontents[i].data(),
97
+ source->zcontents[i].size(), dst.data()));
98
+ benchmark::DoNotOptimize(dst);
99
+ }
100
+ }
101
+
102
+ int64_t source_sizes = 0;
103
+ for (int i = 0; i < SourceFiles::kFiles; i++) {
104
+ source_sizes += static_cast<int64_t>(source->sizes[i]);
105
+ }
106
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
107
+ source_sizes);
108
+ }
109
+ BENCHMARK(BM_UFlatMedley);
110
+
111
+ void BM_UValidate(benchmark::State& state) {
112
+ // Pick file to process based on state.range(0).
113
+ int file_index = state.range(0);
114
+
115
+ CHECK_GE(file_index, 0);
116
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
117
+ std::string contents =
118
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
119
+ kTestDataFiles[file_index].size_limit);
120
+
121
+ std::string zcontents;
122
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
123
+
124
+ for (auto s : state) {
125
+ CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
126
+ }
127
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
128
+ static_cast<int64_t>(contents.size()));
129
+ state.SetLabel(kTestDataFiles[file_index].label);
130
+ }
131
+ BENCHMARK(BM_UValidate)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
132
+
133
+ void BM_UValidateMedley(benchmark::State& state) {
134
+ static const SourceFiles* const source = new SourceFiles();
135
+
136
+ for (auto s : state) {
137
+ for (int i = 0; i < SourceFiles::kFiles; i++) {
138
+ CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(),
139
+ source->zcontents[i].size()));
140
+ }
141
+ }
142
+
143
+ int64_t source_sizes = 0;
144
+ for (int i = 0; i < SourceFiles::kFiles; i++) {
145
+ source_sizes += static_cast<int64_t>(source->sizes[i]);
146
+ }
147
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
148
+ source_sizes);
149
+ }
150
+ BENCHMARK(BM_UValidateMedley);
151
+
152
+ void BM_UIOVecSource(benchmark::State& state) {
153
+ // Pick file to process based on state.range(0).
154
+ int file_index = state.range(0);
155
+
156
+ CHECK_GE(file_index, 0);
157
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
158
+ std::string contents =
159
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
160
+ kTestDataFiles[file_index].size_limit);
161
+
162
+ // Create `iovec`s of the `contents`.
163
+ const int kNumEntries = 10;
164
+ struct iovec iov[kNumEntries];
165
+ size_t used_so_far = 0;
166
+ for (int i = 0; i < kNumEntries; ++i) {
167
+ iov[i].iov_base = const_cast<char*>(contents.data()) + used_so_far;
168
+ if (used_so_far == contents.size()) {
169
+ iov[i].iov_len = 0;
170
+ continue;
171
+ }
172
+ if (i == kNumEntries - 1) {
173
+ iov[i].iov_len = contents.size() - used_so_far;
174
+ } else {
175
+ iov[i].iov_len = contents.size() / kNumEntries;
176
+ }
177
+ used_so_far += iov[i].iov_len;
178
+ }
179
+
180
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
181
+ size_t zsize = 0;
182
+ for (auto s : state) {
183
+ snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize);
184
+ benchmark::DoNotOptimize(iov);
185
+ }
186
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
187
+ static_cast<int64_t>(contents.size()));
188
+ const double compression_ratio =
189
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
190
+ state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
191
+ 100.0 * compression_ratio));
192
+ VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
193
+ kTestDataFiles[file_index].label, contents.size(),
194
+ zsize);
195
+
196
+ delete[] dst;
197
+ }
198
+ BENCHMARK(BM_UIOVecSource)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
199
+
200
+ void BM_UIOVecSink(benchmark::State& state) {
201
+ // Pick file to process based on state.range(0).
202
+ int file_index = state.range(0);
203
+
204
+ CHECK_GE(file_index, 0);
205
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
206
+ std::string contents =
207
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
208
+ kTestDataFiles[file_index].size_limit);
209
+
210
+ std::string zcontents;
211
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
212
+
213
+ // Uncompress into an iovec containing ten entries.
214
+ const int kNumEntries = 10;
215
+ struct iovec iov[kNumEntries];
216
+ char *dst = new char[contents.size()];
217
+ size_t used_so_far = 0;
218
+ for (int i = 0; i < kNumEntries; ++i) {
219
+ iov[i].iov_base = dst + used_so_far;
220
+ if (used_so_far == contents.size()) {
221
+ iov[i].iov_len = 0;
222
+ continue;
223
+ }
224
+
225
+ if (i == kNumEntries - 1) {
226
+ iov[i].iov_len = contents.size() - used_so_far;
227
+ } else {
228
+ iov[i].iov_len = contents.size() / kNumEntries;
229
+ }
230
+ used_so_far += iov[i].iov_len;
231
+ }
232
+
233
+ for (auto s : state) {
234
+ CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
235
+ kNumEntries));
236
+ benchmark::DoNotOptimize(iov);
237
+ }
238
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
239
+ static_cast<int64_t>(contents.size()));
240
+ state.SetLabel(kTestDataFiles[file_index].label);
241
+
242
+ delete[] dst;
243
+ }
244
+ BENCHMARK(BM_UIOVecSink)->DenseRange(0, 4);
245
+
246
+ void BM_UFlatSink(benchmark::State& state) {
247
+ // Pick file to process based on state.range(0).
248
+ int file_index = state.range(0);
249
+
250
+ CHECK_GE(file_index, 0);
251
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
252
+ std::string contents =
253
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
254
+ kTestDataFiles[file_index].size_limit);
255
+
256
+ std::string zcontents;
257
+ snappy::Compress(contents.data(), contents.size(), &zcontents);
258
+ char* dst = new char[contents.size()];
259
+
260
+ for (auto s : state) {
261
+ snappy::ByteArraySource source(zcontents.data(), zcontents.size());
262
+ snappy::UncheckedByteArraySink sink(dst);
263
+ CHECK(snappy::Uncompress(&source, &sink));
264
+ benchmark::DoNotOptimize(sink);
265
+ }
266
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
267
+ static_cast<int64_t>(contents.size()));
268
+ state.SetLabel(kTestDataFiles[file_index].label);
269
+
270
+ std::string s(dst, contents.size());
271
+ CHECK_EQ(contents, s);
272
+
273
+ delete[] dst;
274
+ }
275
+
276
+ BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
277
+
278
+ void BM_ZFlat(benchmark::State& state) {
279
+ // Pick file to process based on state.range(0).
280
+ int file_index = state.range(0);
281
+
282
+ CHECK_GE(file_index, 0);
283
+ CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
284
+ std::string contents =
285
+ ReadTestDataFile(kTestDataFiles[file_index].filename,
286
+ kTestDataFiles[file_index].size_limit);
287
+ char* dst = new char[snappy::MaxCompressedLength(contents.size())];
288
+
289
+ size_t zsize = 0;
290
+ for (auto s : state) {
291
+ snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
292
+ benchmark::DoNotOptimize(dst);
293
+ }
294
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
295
+ static_cast<int64_t>(contents.size()));
296
+ const double compression_ratio =
297
+ static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
298
+ state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
299
+ 100.0 * compression_ratio));
300
+ VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
301
+ kTestDataFiles[file_index].label, contents.size(),
302
+ zsize);
303
+ delete[] dst;
304
+ }
305
+ BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(kTestDataFiles) - 1);
306
+
307
+ void BM_ZFlatAll(benchmark::State& state) {
308
+ const int num_files = ARRAYSIZE(kTestDataFiles);
309
+
310
+ std::vector<std::string> contents(num_files);
311
+ std::vector<char*> dst(num_files);
312
+
313
+ int64_t total_contents_size = 0;
314
+ for (int i = 0; i < num_files; ++i) {
315
+ contents[i] = ReadTestDataFile(kTestDataFiles[i].filename,
316
+ kTestDataFiles[i].size_limit);
317
+ dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
318
+ total_contents_size += contents[i].size();
319
+ }
320
+
321
+ size_t zsize = 0;
322
+ for (auto s : state) {
323
+ for (int i = 0; i < num_files; ++i) {
324
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
325
+ &zsize);
326
+ benchmark::DoNotOptimize(dst);
327
+ }
328
+ }
329
+
330
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
331
+ total_contents_size);
332
+
333
+ for (char* dst_item : dst) {
334
+ delete[] dst_item;
335
+ }
336
+ state.SetLabel(StrFormat("%d kTestDataFiles", num_files));
337
+ }
338
+ BENCHMARK(BM_ZFlatAll);
339
+
340
+ void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
341
+ CHECK_GT(ARRAYSIZE(kTestDataFiles), 0);
342
+ const std::string base_content = ReadTestDataFile(
343
+ kTestDataFiles[0].filename, kTestDataFiles[0].size_limit);
344
+
345
+ std::vector<std::string> contents;
346
+ std::vector<char*> dst;
347
+ int64_t total_contents_size = 0;
348
+ for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
349
+ ++table_bits) {
350
+ std::string content = base_content;
351
+ content.resize(1 << table_bits);
352
+ dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
353
+ total_contents_size += content.size();
354
+ contents.push_back(std::move(content));
355
+ }
356
+
357
+ size_t zsize = 0;
358
+ for (auto s : state) {
359
+ for (size_t i = 0; i < contents.size(); ++i) {
360
+ snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
361
+ &zsize);
362
+ benchmark::DoNotOptimize(dst);
363
+ }
364
+ }
365
+
366
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
367
+ total_contents_size);
368
+
369
+ for (char* dst_item : dst) {
370
+ delete[] dst_item;
371
+ }
372
+ state.SetLabel(StrFormat("%d tables", contents.size()));
373
+ }
374
+ BENCHMARK(BM_ZFlatIncreasingTableSize);
375
+
376
+ } // namespace
377
+
378
+ } // namespace snappy
@@ -0,0 +1,60 @@
1
+ // Copyright 2019 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // libFuzzer harness for fuzzing snappy compression code.
30
+
31
+ #include <stddef.h>
32
+ #include <stdint.h>
33
+
34
+ #include <cassert>
35
+ #include <string>
36
+
37
+ #include "snappy.h"
38
+
39
+ // Entry point for LibFuzzer.
40
+ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
41
+ std::string input(reinterpret_cast<const char*>(data), size);
42
+
43
+ std::string compressed;
44
+ size_t compressed_size =
45
+ snappy::Compress(input.data(), input.size(), &compressed);
46
+
47
+ (void)compressed_size; // Variable only used in debug builds.
48
+ assert(compressed_size == compressed.size());
49
+ assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
50
+ assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
51
+
52
+ std::string uncompressed_after_compress;
53
+ bool uncompress_succeeded = snappy::Uncompress(
54
+ compressed.data(), compressed.size(), &uncompressed_after_compress);
55
+
56
+ (void)uncompress_succeeded; // Variable only used in debug builds.
57
+ assert(uncompress_succeeded);
58
+ assert(input == uncompressed_after_compress);
59
+ return 0;
60
+ }
@@ -0,0 +1,57 @@
1
+ // Copyright 2020 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // Support code for reading test data.
30
+
31
+ #include "snappy_test_data.h"
32
+
33
+ #include <cstddef>
34
+ #include <cstdlib>
35
+ #include <string>
36
+
37
+ #include "snappy-test.h"
38
+
39
+ namespace snappy {
40
+
41
+ std::string ReadTestDataFile(const char* base, size_t size_limit) {
42
+ std::string srcdir;
43
+ const char* srcdir_env = std::getenv("srcdir"); // This is set by Automake.
44
+ if (srcdir_env) {
45
+ srcdir = std::string(srcdir_env) + "/";
46
+ }
47
+
48
+ std::string contents;
49
+ CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents,
50
+ file::Defaults()));
51
+ if (size_limit > 0) {
52
+ contents = contents.substr(0, size_limit);
53
+ }
54
+ return contents;
55
+ }
56
+
57
+ } // namespace snappy
@@ -0,0 +1,68 @@
1
+ // Copyright 2020 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // List of test case files.
30
+
31
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
32
+ #define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
33
+
34
+ #include <cstddef>
35
+ #include <string>
36
+
37
+ namespace snappy {
38
+
39
+ std::string ReadTestDataFile(const char* base, size_t size_limit);
40
+
41
+ // TODO: Replace anonymous namespace with inline variable when we can
42
+ // rely on C++17.
43
+ namespace {
44
+
45
+ constexpr struct {
46
+ const char* label;
47
+ const char* filename;
48
+ size_t size_limit;
49
+ } kTestDataFiles[] = {
50
+ { "html", "html", 0 },
51
+ { "urls", "urls.10K", 0 },
52
+ { "jpg", "fireworks.jpeg", 0 },
53
+ { "jpg_200", "fireworks.jpeg", 200 },
54
+ { "pdf", "paper-100k.pdf", 0 },
55
+ { "html4", "html_x_4", 0 },
56
+ { "txt1", "alice29.txt", 0 },
57
+ { "txt2", "asyoulik.txt", 0 },
58
+ { "txt3", "lcet10.txt", 0 },
59
+ { "txt4", "plrabn12.txt", 0 },
60
+ { "pb", "geo.protodata", 0 },
61
+ { "gaviota", "kppkn.gtb", 0 },
62
+ };
63
+
64
+ } // namespace
65
+
66
+ } // namespace snappy
67
+
68
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__