snappy 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +8 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/AUTHORS +1 -0
  21. data/vendor/snappy/CMakeLists.txt +174 -0
  22. data/vendor/snappy/CONTRIBUTING.md +26 -0
  23. data/vendor/snappy/COPYING +54 -0
  24. data/vendor/snappy/NEWS +180 -0
  25. data/vendor/snappy/README.md +149 -0
  26. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  27. data/vendor/snappy/cmake/config.h.in +62 -0
  28. data/vendor/snappy/format_description.txt +110 -0
  29. data/vendor/snappy/framing_format.txt +135 -0
  30. data/vendor/snappy/snappy-c.cc +90 -0
  31. data/vendor/snappy/snappy-c.h +138 -0
  32. data/vendor/snappy/snappy-internal.h +224 -0
  33. data/vendor/snappy/snappy-sinksource.cc +104 -0
  34. data/vendor/snappy/snappy-sinksource.h +182 -0
  35. data/vendor/snappy/snappy-stubs-internal.cc +42 -0
  36. data/vendor/snappy/snappy-stubs-internal.h +561 -0
  37. data/vendor/snappy/snappy-stubs-public.h.in +94 -0
  38. data/vendor/snappy/snappy-test.cc +612 -0
  39. data/vendor/snappy/snappy-test.h +573 -0
  40. data/vendor/snappy/snappy.cc +1515 -0
  41. data/vendor/snappy/snappy.h +203 -0
  42. data/vendor/snappy/snappy_unittest.cc +1410 -0
  43. metadata +38 -46
@@ -0,0 +1,94 @@
1
+ // Copyright 2011 Google Inc. All Rights Reserved.
2
+ // Author: sesse@google.com (Steinar H. Gunderson)
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ //
30
+ // Various type stubs for the open-source version of Snappy.
31
+ //
32
+ // This file cannot include config.h, as it is included from snappy.h,
33
+ // which is a public header. Instead, snappy-stubs-public.h is generated by
34
+ // from snappy-stubs-public.h.in at configure time.
35
+
36
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
38
+
39
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
40
+ #include <stdint.h>
41
+ #endif // HAVE_STDDEF_H
42
+
43
+ #if ${HAVE_STDDEF_H_01} // HAVE_STDDEF_H
44
+ #include <stddef.h>
45
+ #endif // HAVE_STDDEF_H
46
+
47
+ #if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H
48
+ #include <sys/uio.h>
49
+ #endif // HAVE_SYS_UIO_H
50
+
51
+ #define SNAPPY_MAJOR ${SNAPPY_MAJOR}
52
+ #define SNAPPY_MINOR ${SNAPPY_MINOR}
53
+ #define SNAPPY_PATCHLEVEL ${SNAPPY_PATCHLEVEL}
54
+ #define SNAPPY_VERSION \
55
+ ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
56
+
57
+ #include <string>
58
+
59
+ namespace snappy {
60
+
61
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
62
+ typedef int8_t int8;
63
+ typedef uint8_t uint8;
64
+ typedef int16_t int16;
65
+ typedef uint16_t uint16;
66
+ typedef int32_t int32;
67
+ typedef uint32_t uint32;
68
+ typedef int64_t int64;
69
+ typedef uint64_t uint64;
70
+ #else
71
+ typedef signed char int8;
72
+ typedef unsigned char uint8;
73
+ typedef short int16;
74
+ typedef unsigned short uint16;
75
+ typedef int int32;
76
+ typedef unsigned int uint32;
77
+ typedef long long int64;
78
+ typedef unsigned long long uint64;
79
+ #endif // HAVE_STDINT_H
80
+
81
+ typedef std::string string;
82
+
83
+ #if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H
84
+ // Windows does not have an iovec type, yet the concept is universally useful.
85
+ // It is simple to define it ourselves, so we put it inside our own namespace.
86
+ struct iovec {
87
+ void* iov_base;
88
+ size_t iov_len;
89
+ };
90
+ #endif // !HAVE_SYS_UIO_H
91
+
92
+ } // namespace snappy
93
+
94
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
@@ -0,0 +1,612 @@
1
+ // Copyright 2011 Google Inc. All Rights Reserved.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ //
29
+ // Various stubs for the unit tests for the open-source version of Snappy.
30
+
31
+ #ifdef HAVE_CONFIG_H
32
+ #include "config.h"
33
+ #endif
34
+
35
+ #ifdef HAVE_WINDOWS_H
36
+ // Needed to be able to use std::max without workarounds in the source code.
37
+ // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
38
+ #define NOMINMAX
39
+ #include <windows.h>
40
+ #endif
41
+
42
+ #include "snappy-test.h"
43
+
44
+ #include <algorithm>
45
+
46
+ DEFINE_bool(run_microbenchmarks, true,
47
+ "Run microbenchmarks before doing anything else.");
48
+
49
+ namespace snappy {
50
+
51
+ string ReadTestDataFile(const string& base, size_t size_limit) {
52
+ string contents;
53
+ const char* srcdir = getenv("srcdir"); // This is set by Automake.
54
+ string prefix;
55
+ if (srcdir) {
56
+ prefix = string(srcdir) + "/";
57
+ }
58
+ file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults()
59
+ ).CheckSuccess();
60
+ if (size_limit > 0) {
61
+ contents = contents.substr(0, size_limit);
62
+ }
63
+ return contents;
64
+ }
65
+
66
+ string ReadTestDataFile(const string& base) {
67
+ return ReadTestDataFile(base, 0);
68
+ }
69
+
70
+ string StringPrintf(const char* format, ...) {
71
+ char buf[4096];
72
+ va_list ap;
73
+ va_start(ap, format);
74
+ vsnprintf(buf, sizeof(buf), format, ap);
75
+ va_end(ap);
76
+ return buf;
77
+ }
78
+
79
+ bool benchmark_running = false;
80
+ int64 benchmark_real_time_us = 0;
81
+ int64 benchmark_cpu_time_us = 0;
82
+ string *benchmark_label = NULL;
83
+ int64 benchmark_bytes_processed = 0;
84
+
85
+ void ResetBenchmarkTiming() {
86
+ benchmark_real_time_us = 0;
87
+ benchmark_cpu_time_us = 0;
88
+ }
89
+
90
+ #ifdef WIN32
91
+ LARGE_INTEGER benchmark_start_real;
92
+ FILETIME benchmark_start_cpu;
93
+ #else // WIN32
94
+ struct timeval benchmark_start_real;
95
+ struct rusage benchmark_start_cpu;
96
+ #endif // WIN32
97
+
98
+ void StartBenchmarkTiming() {
99
+ #ifdef WIN32
100
+ QueryPerformanceCounter(&benchmark_start_real);
101
+ FILETIME dummy;
102
+ CHECK(GetProcessTimes(
103
+ GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_start_cpu));
104
+ #else
105
+ gettimeofday(&benchmark_start_real, NULL);
106
+ if (getrusage(RUSAGE_SELF, &benchmark_start_cpu) == -1) {
107
+ perror("getrusage(RUSAGE_SELF)");
108
+ exit(1);
109
+ }
110
+ #endif
111
+ benchmark_running = true;
112
+ }
113
+
114
+ void StopBenchmarkTiming() {
115
+ if (!benchmark_running) {
116
+ return;
117
+ }
118
+
119
+ #ifdef WIN32
120
+ LARGE_INTEGER benchmark_stop_real;
121
+ LARGE_INTEGER benchmark_frequency;
122
+ QueryPerformanceCounter(&benchmark_stop_real);
123
+ QueryPerformanceFrequency(&benchmark_frequency);
124
+
125
+ double elapsed_real = static_cast<double>(
126
+ benchmark_stop_real.QuadPart - benchmark_start_real.QuadPart) /
127
+ benchmark_frequency.QuadPart;
128
+ benchmark_real_time_us += elapsed_real * 1e6 + 0.5;
129
+
130
+ FILETIME benchmark_stop_cpu, dummy;
131
+ CHECK(GetProcessTimes(
132
+ GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_stop_cpu));
133
+
134
+ ULARGE_INTEGER start_ulargeint;
135
+ start_ulargeint.LowPart = benchmark_start_cpu.dwLowDateTime;
136
+ start_ulargeint.HighPart = benchmark_start_cpu.dwHighDateTime;
137
+
138
+ ULARGE_INTEGER stop_ulargeint;
139
+ stop_ulargeint.LowPart = benchmark_stop_cpu.dwLowDateTime;
140
+ stop_ulargeint.HighPart = benchmark_stop_cpu.dwHighDateTime;
141
+
142
+ benchmark_cpu_time_us +=
143
+ (stop_ulargeint.QuadPart - start_ulargeint.QuadPart + 5) / 10;
144
+ #else // WIN32
145
+ struct timeval benchmark_stop_real;
146
+ gettimeofday(&benchmark_stop_real, NULL);
147
+ benchmark_real_time_us +=
148
+ 1000000 * (benchmark_stop_real.tv_sec - benchmark_start_real.tv_sec);
149
+ benchmark_real_time_us +=
150
+ (benchmark_stop_real.tv_usec - benchmark_start_real.tv_usec);
151
+
152
+ struct rusage benchmark_stop_cpu;
153
+ if (getrusage(RUSAGE_SELF, &benchmark_stop_cpu) == -1) {
154
+ perror("getrusage(RUSAGE_SELF)");
155
+ exit(1);
156
+ }
157
+ benchmark_cpu_time_us += 1000000 * (benchmark_stop_cpu.ru_utime.tv_sec -
158
+ benchmark_start_cpu.ru_utime.tv_sec);
159
+ benchmark_cpu_time_us += (benchmark_stop_cpu.ru_utime.tv_usec -
160
+ benchmark_start_cpu.ru_utime.tv_usec);
161
+ #endif // WIN32
162
+
163
+ benchmark_running = false;
164
+ }
165
+
166
+ void SetBenchmarkLabel(const string& str) {
167
+ if (benchmark_label) {
168
+ delete benchmark_label;
169
+ }
170
+ benchmark_label = new string(str);
171
+ }
172
+
173
+ void SetBenchmarkBytesProcessed(int64 bytes) {
174
+ benchmark_bytes_processed = bytes;
175
+ }
176
+
177
+ struct BenchmarkRun {
178
+ int64 real_time_us;
179
+ int64 cpu_time_us;
180
+ };
181
+
182
+ struct BenchmarkCompareCPUTime {
183
+ bool operator() (const BenchmarkRun& a, const BenchmarkRun& b) const {
184
+ return a.cpu_time_us < b.cpu_time_us;
185
+ }
186
+ };
187
+
188
+ void Benchmark::Run() {
189
+ for (int test_case_num = start_; test_case_num <= stop_; ++test_case_num) {
190
+ // Run a few iterations first to find out approximately how fast
191
+ // the benchmark is.
192
+ const int kCalibrateIterations = 100;
193
+ ResetBenchmarkTiming();
194
+ StartBenchmarkTiming();
195
+ (*function_)(kCalibrateIterations, test_case_num);
196
+ StopBenchmarkTiming();
197
+
198
+ // Let each test case run for about 200ms, but at least as many
199
+ // as we used to calibrate.
200
+ // Run five times and pick the median.
201
+ const int kNumRuns = 5;
202
+ const int kMedianPos = kNumRuns / 2;
203
+ int num_iterations = 0;
204
+ if (benchmark_real_time_us > 0) {
205
+ num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
206
+ }
207
+ num_iterations = std::max(num_iterations, kCalibrateIterations);
208
+ BenchmarkRun benchmark_runs[kNumRuns];
209
+
210
+ for (int run = 0; run < kNumRuns; ++run) {
211
+ ResetBenchmarkTiming();
212
+ StartBenchmarkTiming();
213
+ (*function_)(num_iterations, test_case_num);
214
+ StopBenchmarkTiming();
215
+
216
+ benchmark_runs[run].real_time_us = benchmark_real_time_us;
217
+ benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us;
218
+ }
219
+
220
+ string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
221
+ string human_readable_speed;
222
+
223
+ std::nth_element(benchmark_runs,
224
+ benchmark_runs + kMedianPos,
225
+ benchmark_runs + kNumRuns,
226
+ BenchmarkCompareCPUTime());
227
+ int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
228
+ int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
229
+ if (cpu_time_us <= 0) {
230
+ human_readable_speed = "?";
231
+ } else {
232
+ int64 bytes_per_second =
233
+ benchmark_bytes_processed * 1000000 / cpu_time_us;
234
+ if (bytes_per_second < 1024) {
235
+ human_readable_speed = StringPrintf("%dB/s", bytes_per_second);
236
+ } else if (bytes_per_second < 1024 * 1024) {
237
+ human_readable_speed = StringPrintf(
238
+ "%.1fkB/s", bytes_per_second / 1024.0f);
239
+ } else if (bytes_per_second < 1024 * 1024 * 1024) {
240
+ human_readable_speed = StringPrintf(
241
+ "%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f));
242
+ } else {
243
+ human_readable_speed = StringPrintf(
244
+ "%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f));
245
+ }
246
+ }
247
+
248
+ fprintf(stderr,
249
+ #ifdef WIN32
250
+ "%-18s %10I64d %10I64d %10d %s %s\n",
251
+ #else
252
+ "%-18s %10lld %10lld %10d %s %s\n",
253
+ #endif
254
+ heading.c_str(),
255
+ static_cast<long long>(real_time_us * 1000 / num_iterations),
256
+ static_cast<long long>(cpu_time_us * 1000 / num_iterations),
257
+ num_iterations,
258
+ human_readable_speed.c_str(),
259
+ benchmark_label->c_str());
260
+ }
261
+ }
262
+
263
+ #ifdef HAVE_LIBZ
264
+
265
+ ZLib::ZLib()
266
+ : comp_init_(false),
267
+ uncomp_init_(false) {
268
+ Reinit();
269
+ }
270
+
271
+ ZLib::~ZLib() {
272
+ if (comp_init_) { deflateEnd(&comp_stream_); }
273
+ if (uncomp_init_) { inflateEnd(&uncomp_stream_); }
274
+ }
275
+
276
+ void ZLib::Reinit() {
277
+ compression_level_ = Z_DEFAULT_COMPRESSION;
278
+ window_bits_ = MAX_WBITS;
279
+ mem_level_ = 8; // DEF_MEM_LEVEL
280
+ if (comp_init_) {
281
+ deflateEnd(&comp_stream_);
282
+ comp_init_ = false;
283
+ }
284
+ if (uncomp_init_) {
285
+ inflateEnd(&uncomp_stream_);
286
+ uncomp_init_ = false;
287
+ }
288
+ first_chunk_ = true;
289
+ }
290
+
291
+ void ZLib::Reset() {
292
+ first_chunk_ = true;
293
+ }
294
+
295
+ // --------- COMPRESS MODE
296
+
297
+ // Initialization method to be called if we hit an error while
298
+ // compressing. On hitting an error, call this method before returning
299
+ // the error.
300
+ void ZLib::CompressErrorInit() {
301
+ deflateEnd(&comp_stream_);
302
+ comp_init_ = false;
303
+ Reset();
304
+ }
305
+
306
+ int ZLib::DeflateInit() {
307
+ return deflateInit2(&comp_stream_,
308
+ compression_level_,
309
+ Z_DEFLATED,
310
+ window_bits_,
311
+ mem_level_,
312
+ Z_DEFAULT_STRATEGY);
313
+ }
314
+
315
+ int ZLib::CompressInit(Bytef *dest, uLongf *destLen,
316
+ const Bytef *source, uLong *sourceLen) {
317
+ int err;
318
+
319
+ comp_stream_.next_in = (Bytef*)source;
320
+ comp_stream_.avail_in = (uInt)*sourceLen;
321
+ if ((uLong)comp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR;
322
+ comp_stream_.next_out = dest;
323
+ comp_stream_.avail_out = (uInt)*destLen;
324
+ if ((uLong)comp_stream_.avail_out != *destLen) return Z_BUF_ERROR;
325
+
326
+ if ( !first_chunk_ ) // only need to set up stream the first time through
327
+ return Z_OK;
328
+
329
+ if (comp_init_) { // we've already initted it
330
+ err = deflateReset(&comp_stream_);
331
+ if (err != Z_OK) {
332
+ LOG(WARNING) << "ERROR: Can't reset compress object; creating a new one";
333
+ deflateEnd(&comp_stream_);
334
+ comp_init_ = false;
335
+ }
336
+ }
337
+ if (!comp_init_) { // first use
338
+ comp_stream_.zalloc = (alloc_func)0;
339
+ comp_stream_.zfree = (free_func)0;
340
+ comp_stream_.opaque = (voidpf)0;
341
+ err = DeflateInit();
342
+ if (err != Z_OK) return err;
343
+ comp_init_ = true;
344
+ }
345
+ return Z_OK;
346
+ }
347
+
348
+ // In a perfect world we'd always have the full buffer to compress
349
+ // when the time came, and we could just call Compress(). Alas, we
350
+ // want to do chunked compression on our webserver. In this
351
+ // application, we compress the header, send it off, then compress the
352
+ // results, send them off, then compress the footer. Thus we need to
353
+ // use the chunked compression features of zlib.
354
+ int ZLib::CompressAtMostOrAll(Bytef *dest, uLongf *destLen,
355
+ const Bytef *source, uLong *sourceLen,
356
+ int flush_mode) { // Z_FULL_FLUSH or Z_FINISH
357
+ int err;
358
+
359
+ if ( (err=CompressInit(dest, destLen, source, sourceLen)) != Z_OK )
360
+ return err;
361
+
362
+ // This is used to figure out how many bytes we wrote *this chunk*
363
+ int compressed_size = comp_stream_.total_out;
364
+
365
+ // Some setup happens only for the first chunk we compress in a run
366
+ if ( first_chunk_ ) {
367
+ first_chunk_ = false;
368
+ }
369
+
370
+ // flush_mode is Z_FINISH for all mode, Z_SYNC_FLUSH for incremental
371
+ // compression.
372
+ err = deflate(&comp_stream_, flush_mode);
373
+
374
+ *sourceLen = comp_stream_.avail_in;
375
+
376
+ if ((err == Z_STREAM_END || err == Z_OK)
377
+ && comp_stream_.avail_in == 0
378
+ && comp_stream_.avail_out != 0 ) {
379
+ // we processed everything ok and the output buffer was large enough.
380
+ ;
381
+ } else if (err == Z_STREAM_END && comp_stream_.avail_in > 0) {
382
+ return Z_BUF_ERROR; // should never happen
383
+ } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
384
+ // an error happened
385
+ CompressErrorInit();
386
+ return err;
387
+ } else if (comp_stream_.avail_out == 0) { // not enough space
388
+ err = Z_BUF_ERROR;
389
+ }
390
+
391
+ assert(err == Z_OK || err == Z_STREAM_END || err == Z_BUF_ERROR);
392
+ if (err == Z_STREAM_END)
393
+ err = Z_OK;
394
+
395
+ // update the crc and other metadata
396
+ compressed_size = comp_stream_.total_out - compressed_size; // delta
397
+ *destLen = compressed_size;
398
+
399
+ return err;
400
+ }
401
+
402
+ int ZLib::CompressChunkOrAll(Bytef *dest, uLongf *destLen,
403
+ const Bytef *source, uLong sourceLen,
404
+ int flush_mode) { // Z_FULL_FLUSH or Z_FINISH
405
+ const int ret =
406
+ CompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode);
407
+ if (ret == Z_BUF_ERROR)
408
+ CompressErrorInit();
409
+ return ret;
410
+ }
411
+
412
+ // This routine only initializes the compression stream once. Thereafter, it
413
+ // just does a deflateReset on the stream, which should be faster.
414
+ int ZLib::Compress(Bytef *dest, uLongf *destLen,
415
+ const Bytef *source, uLong sourceLen) {
416
+ int err;
417
+ if ( (err=CompressChunkOrAll(dest, destLen, source, sourceLen,
418
+ Z_FINISH)) != Z_OK )
419
+ return err;
420
+ Reset(); // reset for next call to Compress
421
+
422
+ return Z_OK;
423
+ }
424
+
425
+
426
+ // --------- UNCOMPRESS MODE
427
+
428
+ int ZLib::InflateInit() {
429
+ return inflateInit2(&uncomp_stream_, MAX_WBITS);
430
+ }
431
+
432
+ // Initialization method to be called if we hit an error while
433
+ // uncompressing. On hitting an error, call this method before
434
+ // returning the error.
435
+ void ZLib::UncompressErrorInit() {
436
+ inflateEnd(&uncomp_stream_);
437
+ uncomp_init_ = false;
438
+ Reset();
439
+ }
440
+
441
+ int ZLib::UncompressInit(Bytef *dest, uLongf *destLen,
442
+ const Bytef *source, uLong *sourceLen) {
443
+ int err;
444
+
445
+ uncomp_stream_.next_in = (Bytef*)source;
446
+ uncomp_stream_.avail_in = (uInt)*sourceLen;
447
+ // Check for source > 64K on 16-bit machine:
448
+ if ((uLong)uncomp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR;
449
+
450
+ uncomp_stream_.next_out = dest;
451
+ uncomp_stream_.avail_out = (uInt)*destLen;
452
+ if ((uLong)uncomp_stream_.avail_out != *destLen) return Z_BUF_ERROR;
453
+
454
+ if ( !first_chunk_ ) // only need to set up stream the first time through
455
+ return Z_OK;
456
+
457
+ if (uncomp_init_) { // we've already initted it
458
+ err = inflateReset(&uncomp_stream_);
459
+ if (err != Z_OK) {
460
+ LOG(WARNING)
461
+ << "ERROR: Can't reset uncompress object; creating a new one";
462
+ UncompressErrorInit();
463
+ }
464
+ }
465
+ if (!uncomp_init_) {
466
+ uncomp_stream_.zalloc = (alloc_func)0;
467
+ uncomp_stream_.zfree = (free_func)0;
468
+ uncomp_stream_.opaque = (voidpf)0;
469
+ err = InflateInit();
470
+ if (err != Z_OK) return err;
471
+ uncomp_init_ = true;
472
+ }
473
+ return Z_OK;
474
+ }
475
+
476
+ // If you compressed your data a chunk at a time, with CompressChunk,
477
+ // you can uncompress it a chunk at a time with UncompressChunk.
478
+ // Only difference bewteen chunked and unchunked uncompression
479
+ // is the flush mode we use: Z_SYNC_FLUSH (chunked) or Z_FINISH (unchunked).
480
+ int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
481
+ const Bytef *source, uLong *sourceLen,
482
+ int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH
483
+ int err = Z_OK;
484
+
485
+ if ( (err=UncompressInit(dest, destLen, source, sourceLen)) != Z_OK ) {
486
+ LOG(WARNING) << "UncompressInit: Error: " << err << " SourceLen: "
487
+ << *sourceLen;
488
+ return err;
489
+ }
490
+
491
+ // This is used to figure out how many output bytes we wrote *this chunk*:
492
+ const uLong old_total_out = uncomp_stream_.total_out;
493
+
494
+ // This is used to figure out how many input bytes we read *this chunk*:
495
+ const uLong old_total_in = uncomp_stream_.total_in;
496
+
497
+ // Some setup happens only for the first chunk we compress in a run
498
+ if ( first_chunk_ ) {
499
+ first_chunk_ = false; // so we don't do this again
500
+
501
+ // For the first chunk *only* (to avoid infinite troubles), we let
502
+ // there be no actual data to uncompress. This sometimes triggers
503
+ // when the input is only the gzip header, say.
504
+ if ( *sourceLen == 0 ) {
505
+ *destLen = 0;
506
+ return Z_OK;
507
+ }
508
+ }
509
+
510
+ // We'll uncompress as much as we can. If we end OK great, otherwise
511
+ // if we get an error that seems to be the gzip footer, we store the
512
+ // gzip footer and return OK, otherwise we return the error.
513
+
514
+ // flush_mode is Z_SYNC_FLUSH for chunked mode, Z_FINISH for all mode.
515
+ err = inflate(&uncomp_stream_, flush_mode);
516
+
517
+ // Figure out how many bytes of the input zlib slurped up:
518
+ const uLong bytes_read = uncomp_stream_.total_in - old_total_in;
519
+ CHECK_LE(source + bytes_read, source + *sourceLen);
520
+ *sourceLen = uncomp_stream_.avail_in;
521
+
522
+ if ((err == Z_STREAM_END || err == Z_OK) // everything went ok
523
+ && uncomp_stream_.avail_in == 0) { // and we read it all
524
+ ;
525
+ } else if (err == Z_STREAM_END && uncomp_stream_.avail_in > 0) {
526
+ LOG(WARNING)
527
+ << "UncompressChunkOrAll: Received some extra data, bytes total: "
528
+ << uncomp_stream_.avail_in << " bytes: "
529
+ << std::string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
530
+ std::min(int(uncomp_stream_.avail_in), 20));
531
+ UncompressErrorInit();
532
+ return Z_DATA_ERROR; // what's the extra data for?
533
+ } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
534
+ // an error happened
535
+ LOG(WARNING) << "UncompressChunkOrAll: Error: " << err
536
+ << " avail_out: " << uncomp_stream_.avail_out;
537
+ UncompressErrorInit();
538
+ return err;
539
+ } else if (uncomp_stream_.avail_out == 0) {
540
+ err = Z_BUF_ERROR;
541
+ }
542
+
543
+ assert(err == Z_OK || err == Z_BUF_ERROR || err == Z_STREAM_END);
544
+ if (err == Z_STREAM_END)
545
+ err = Z_OK;
546
+
547
+ *destLen = uncomp_stream_.total_out - old_total_out; // size for this call
548
+
549
+ return err;
550
+ }
551
+
552
+ int ZLib::UncompressChunkOrAll(Bytef *dest, uLongf *destLen,
553
+ const Bytef *source, uLong sourceLen,
554
+ int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH
555
+ const int ret =
556
+ UncompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode);
557
+ if (ret == Z_BUF_ERROR)
558
+ UncompressErrorInit();
559
+ return ret;
560
+ }
561
+
562
+ int ZLib::UncompressAtMost(Bytef *dest, uLongf *destLen,
563
+ const Bytef *source, uLong *sourceLen) {
564
+ return UncompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH);
565
+ }
566
+
567
+ // We make sure we've uncompressed everything, that is, the current
568
+ // uncompress stream is at a compressed-buffer-EOF boundary. In gzip
569
+ // mode, we also check the gzip footer to make sure we pass the gzip
570
+ // consistency checks. We RETURN true iff both types of checks pass.
571
+ bool ZLib::UncompressChunkDone() {
572
+ assert(!first_chunk_ && uncomp_init_);
573
+ // Make sure we're at the end-of-compressed-data point. This means
574
+ // if we call inflate with Z_FINISH we won't consume any input or
575
+ // write any output
576
+ Bytef dummyin, dummyout;
577
+ uLongf dummylen = 0;
578
+ if ( UncompressChunkOrAll(&dummyout, &dummylen, &dummyin, 0, Z_FINISH)
579
+ != Z_OK ) {
580
+ return false;
581
+ }
582
+
583
+ // Make sure that when we exit, we can start a new round of chunks later
584
+ Reset();
585
+
586
+ return true;
587
+ }
588
+
589
+ // Uncompresses the source buffer into the destination buffer.
590
+ // The destination buffer must be long enough to hold the entire
591
+ // decompressed contents.
592
+ //
593
+ // We only initialize the uncomp_stream once. Thereafter, we use
594
+ // inflateReset, which should be faster.
595
+ //
596
+ // Returns Z_OK on success, otherwise, it returns a zlib error code.
597
+ int ZLib::Uncompress(Bytef *dest, uLongf *destLen,
598
+ const Bytef *source, uLong sourceLen) {
599
+ int err;
600
+ if ( (err=UncompressChunkOrAll(dest, destLen, source, sourceLen,
601
+ Z_FINISH)) != Z_OK ) {
602
+ Reset(); // let us try to compress again
603
+ return err;
604
+ }
605
+ if ( !UncompressChunkDone() ) // calls Reset()
606
+ return Z_DATA_ERROR;
607
+ return Z_OK; // stream_end is ok
608
+ }
609
+
610
+ #endif // HAVE_LIBZ
611
+
612
+ } // namespace snappy