snappy 0.0.17 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/Gemfile +4 -0
- data/README.md +29 -5
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +23 -16
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/reader.rb +14 -10
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy.rb +5 -4
- data/snappy.gemspec +13 -13
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/NEWS +40 -0
- data/vendor/snappy/{README → README.md} +27 -18
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +22 -18
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +116 -38
- data/vendor/snappy/snappy-stubs-public.h.in +20 -46
- data/vendor/snappy/snappy-test.cc +26 -22
- data/vendor/snappy/snappy-test.h +24 -98
- data/vendor/snappy/snappy.cc +380 -183
- data/vendor/snappy/snappy.h +14 -10
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +236 -261
- metadata +37 -92
- data/.travis.yml +0 -26
- data/smoke.sh +0 -8
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/INSTALL +0 -370
- data/vendor/snappy/Makefile +0 -982
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/Makefile.in +0 -982
- data/vendor/snappy/aclocal.m4 +0 -9738
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/autom4te.cache/output.0 +0 -18856
- data/vendor/snappy/autom4te.cache/output.1 +0 -18852
- data/vendor/snappy/autom4te.cache/requests +0 -297
- data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
- data/vendor/snappy/autom4te.cache/traces.1 +0 -714
- data/vendor/snappy/config.guess +0 -1530
- data/vendor/snappy/config.h +0 -135
- data/vendor/snappy/config.h.in +0 -134
- data/vendor/snappy/config.log +0 -1640
- data/vendor/snappy/config.status +0 -2318
- data/vendor/snappy/config.sub +0 -1773
- data/vendor/snappy/configure +0 -18852
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/depcomp +0 -688
- data/vendor/snappy/install-sh +0 -527
- data/vendor/snappy/libtool +0 -10246
- data/vendor/snappy/ltmain.sh +0 -9661
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/m4/libtool.m4 +0 -8001
- data/vendor/snappy/m4/ltoptions.m4 +0 -384
- data/vendor/snappy/m4/ltsugar.m4 +0 -123
- data/vendor/snappy/m4/ltversion.m4 +0 -23
- data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
- data/vendor/snappy/missing +0 -331
- data/vendor/snappy/snappy-stubs-public.h +0 -100
- data/vendor/snappy/snappy.pc +0 -10
- data/vendor/snappy/snappy.pc.in +0 -10
- data/vendor/snappy/stamp-h1 +0 -1
data/vendor/snappy/snappy.cc
CHANGED
@@ -30,16 +30,50 @@
|
|
30
30
|
#include "snappy-internal.h"
|
31
31
|
#include "snappy-sinksource.h"
|
32
32
|
|
33
|
-
#if defined(
|
34
|
-
|
33
|
+
#if !defined(SNAPPY_HAVE_SSSE3)
|
34
|
+
// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
|
35
|
+
// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
|
36
|
+
// defines __AVX__ when AVX support is available.
|
37
|
+
#if defined(__SSSE3__) || defined(__AVX__)
|
38
|
+
#define SNAPPY_HAVE_SSSE3 1
|
39
|
+
#else
|
40
|
+
#define SNAPPY_HAVE_SSSE3 0
|
41
|
+
#endif
|
42
|
+
#endif // !defined(SNAPPY_HAVE_SSSE3)
|
43
|
+
|
44
|
+
#if !defined(SNAPPY_HAVE_BMI2)
|
45
|
+
// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
|
46
|
+
// specifically, but it does define __AVX2__ when AVX2 support is available.
|
47
|
+
// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
|
48
|
+
//
|
49
|
+
// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
|
50
|
+
// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
|
51
|
+
// case issuing BMI2 instructions results in a compiler error.
|
52
|
+
#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
|
53
|
+
#define SNAPPY_HAVE_BMI2 1
|
54
|
+
#else
|
55
|
+
#define SNAPPY_HAVE_BMI2 0
|
35
56
|
#endif
|
57
|
+
#endif // !defined(SNAPPY_HAVE_BMI2)
|
58
|
+
|
59
|
+
#if SNAPPY_HAVE_SSSE3
|
60
|
+
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
61
|
+
// advanced SSE versions without checking with all the OWNERS.
|
62
|
+
#include <tmmintrin.h>
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#if SNAPPY_HAVE_BMI2
|
66
|
+
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
67
|
+
// advanced SSE versions without checking with all the OWNERS.
|
68
|
+
#include <immintrin.h>
|
69
|
+
#endif
|
70
|
+
|
36
71
|
#include <stdio.h>
|
37
72
|
|
38
73
|
#include <algorithm>
|
39
74
|
#include <string>
|
40
75
|
#include <vector>
|
41
76
|
|
42
|
-
|
43
77
|
namespace snappy {
|
44
78
|
|
45
79
|
using internal::COPY_1_BYTE_OFFSET;
|
@@ -47,7 +81,6 @@ using internal::COPY_2_BYTE_OFFSET;
|
|
47
81
|
using internal::LITERAL;
|
48
82
|
using internal::char_table;
|
49
83
|
using internal::kMaximumTagLength;
|
50
|
-
using internal::wordmask;
|
51
84
|
|
52
85
|
// Any hash function will produce a valid compressed bitstream, but a good
|
53
86
|
// hash function reduces the number of collisions and thus yields better
|
@@ -89,18 +122,18 @@ size_t MaxCompressedLength(size_t source_len) {
|
|
89
122
|
namespace {
|
90
123
|
|
91
124
|
void UnalignedCopy64(const void* src, void* dst) {
|
92
|
-
|
125
|
+
char tmp[8];
|
126
|
+
memcpy(tmp, src, 8);
|
127
|
+
memcpy(dst, tmp, 8);
|
93
128
|
}
|
94
129
|
|
95
130
|
void UnalignedCopy128(const void* src, void* dst) {
|
96
|
-
//
|
97
|
-
// SSE2
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
memcpy(dst, src, 16);
|
103
|
-
#endif
|
131
|
+
// memcpy gets vectorized when the appropriate compiler options are used.
|
132
|
+
// For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load
|
133
|
+
// and store.
|
134
|
+
char tmp[16];
|
135
|
+
memcpy(tmp, src, 16);
|
136
|
+
memcpy(dst, tmp, 16);
|
104
137
|
}
|
105
138
|
|
106
139
|
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
|
@@ -115,12 +148,35 @@ void UnalignedCopy128(const void* src, void* dst) {
|
|
115
148
|
// Note that this does not match the semantics of either memcpy() or memmove().
|
116
149
|
inline char* IncrementalCopySlow(const char* src, char* op,
|
117
150
|
char* const op_limit) {
|
151
|
+
// TODO: Remove pragma when LLVM is aware this
|
152
|
+
// function is only called in cold regions and when cold regions don't get
|
153
|
+
// vectorized or unrolled.
|
154
|
+
#ifdef __clang__
|
155
|
+
#pragma clang loop unroll(disable)
|
156
|
+
#endif
|
118
157
|
while (op < op_limit) {
|
119
158
|
*op++ = *src++;
|
120
159
|
}
|
121
160
|
return op_limit;
|
122
161
|
}
|
123
162
|
|
163
|
+
#if SNAPPY_HAVE_SSSE3
|
164
|
+
|
165
|
+
// This is a table of shuffle control masks that can be used as the source
|
166
|
+
// operand for PSHUFB to permute the contents of the destination XMM register
|
167
|
+
// into a repeating byte pattern.
|
168
|
+
alignas(16) const char pshufb_fill_patterns[7][16] = {
|
169
|
+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
170
|
+
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
|
171
|
+
{0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
|
172
|
+
{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
|
173
|
+
{0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0},
|
174
|
+
{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3},
|
175
|
+
{0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1},
|
176
|
+
};
|
177
|
+
|
178
|
+
#endif // SNAPPY_HAVE_SSSE3
|
179
|
+
|
124
180
|
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
|
125
181
|
// IncrementalCopySlow. buf_limit is the address past the end of the writable
|
126
182
|
// region of the buffer.
|
@@ -132,9 +188,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
132
188
|
// pat = op - src
|
133
189
|
// len = limit - op
|
134
190
|
assert(src < op);
|
191
|
+
assert(op <= op_limit);
|
135
192
|
assert(op_limit <= buf_limit);
|
136
193
|
// NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
|
137
|
-
// to optimize this function but we have to also handle
|
194
|
+
// to optimize this function but we have to also handle other cases in case
|
138
195
|
// the input does not satisfy these conditions.
|
139
196
|
|
140
197
|
size_t pattern_size = op - src;
|
@@ -163,26 +220,56 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
163
220
|
// copying 2x 8 bytes at a time.
|
164
221
|
|
165
222
|
// Handle the uncommon case where pattern is less than 8 bytes.
|
166
|
-
if (
|
167
|
-
|
168
|
-
//
|
169
|
-
//
|
223
|
+
if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
|
224
|
+
#if SNAPPY_HAVE_SSSE3
|
225
|
+
// Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
|
226
|
+
// to permute the register's contents in-place into a repeating sequence of
|
227
|
+
// the first "pattern_size" bytes.
|
228
|
+
// For example, suppose:
|
229
|
+
// src == "abc"
|
230
|
+
// op == op + 3
|
231
|
+
// After _mm_shuffle_epi8(), "pattern" will have five copies of "abc"
|
232
|
+
// followed by one byte of slop: abcabcabcabcabca.
|
170
233
|
//
|
171
|
-
//
|
172
|
-
//
|
173
|
-
//
|
174
|
-
|
175
|
-
|
176
|
-
|
234
|
+
// The non-SSE fallback implementation suffers from store-forwarding stalls
|
235
|
+
// because its loads and stores partly overlap. By expanding the pattern
|
236
|
+
// in-place, we avoid the penalty.
|
237
|
+
if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) {
|
238
|
+
const __m128i shuffle_mask = _mm_load_si128(
|
239
|
+
reinterpret_cast<const __m128i*>(pshufb_fill_patterns)
|
240
|
+
+ pattern_size - 1);
|
241
|
+
const __m128i pattern = _mm_shuffle_epi8(
|
242
|
+
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)), shuffle_mask);
|
243
|
+
// Uninitialized bytes are masked out by the shuffle mask.
|
244
|
+
// TODO: remove annotation and macro defs once MSan is fixed.
|
245
|
+
SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern));
|
246
|
+
pattern_size *= 16 / pattern_size;
|
247
|
+
char* op_end = std::min(op_limit, buf_limit - 15);
|
248
|
+
while (op < op_end) {
|
249
|
+
_mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern);
|
250
|
+
op += pattern_size;
|
251
|
+
}
|
252
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
253
|
+
}
|
254
|
+
return IncrementalCopySlow(src, op, op_limit);
|
255
|
+
#else // !SNAPPY_HAVE_SSSE3
|
256
|
+
// If plenty of buffer space remains, expand the pattern to at least 8
|
257
|
+
// bytes. The way the following loop is written, we need 8 bytes of buffer
|
258
|
+
// space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
|
259
|
+
// bytes if pattern_size is 2. Precisely encoding that is probably not
|
260
|
+
// worthwhile; instead, invoke the slow path if we cannot write 11 bytes
|
261
|
+
// (because 11 are required in the worst case).
|
262
|
+
if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
|
177
263
|
while (pattern_size < 8) {
|
178
264
|
UnalignedCopy64(src, op);
|
179
265
|
op += pattern_size;
|
180
266
|
pattern_size *= 2;
|
181
267
|
}
|
182
|
-
if (
|
268
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
183
269
|
} else {
|
184
270
|
return IncrementalCopySlow(src, op, op_limit);
|
185
271
|
}
|
272
|
+
#endif // SNAPPY_HAVE_SSSE3
|
186
273
|
}
|
187
274
|
assert(pattern_size >= 8);
|
188
275
|
|
@@ -190,16 +277,51 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
190
277
|
// UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
|
191
278
|
// because expanding the pattern to at least 8 bytes guarantees that
|
192
279
|
// op - src >= 8.
|
193
|
-
|
280
|
+
//
|
281
|
+
// Typically, the op_limit is the gating factor so try to simplify the loop
|
282
|
+
// based on that.
|
283
|
+
if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) {
|
284
|
+
// There is at least one, and at most four 16-byte blocks. Writing four
|
285
|
+
// conditionals instead of a loop allows FDO to layout the code with respect
|
286
|
+
// to the actual probabilities of each length.
|
287
|
+
// TODO: Replace with loop with trip count hint.
|
288
|
+
UnalignedCopy64(src, op);
|
289
|
+
UnalignedCopy64(src + 8, op + 8);
|
290
|
+
|
291
|
+
if (op + 16 < op_limit) {
|
292
|
+
UnalignedCopy64(src + 16, op + 16);
|
293
|
+
UnalignedCopy64(src + 24, op + 24);
|
294
|
+
}
|
295
|
+
if (op + 32 < op_limit) {
|
296
|
+
UnalignedCopy64(src + 32, op + 32);
|
297
|
+
UnalignedCopy64(src + 40, op + 40);
|
298
|
+
}
|
299
|
+
if (op + 48 < op_limit) {
|
300
|
+
UnalignedCopy64(src + 48, op + 48);
|
301
|
+
UnalignedCopy64(src + 56, op + 56);
|
302
|
+
}
|
303
|
+
return op_limit;
|
304
|
+
}
|
305
|
+
|
306
|
+
// Fall back to doing as much as we can with the available slop in the
|
307
|
+
// buffer. This code path is relatively cold however so we save code size by
|
308
|
+
// avoiding unrolling and vectorizing.
|
309
|
+
//
|
310
|
+
// TODO: Remove pragma when when cold regions don't get vectorized
|
311
|
+
// or unrolled.
|
312
|
+
#ifdef __clang__
|
313
|
+
#pragma clang loop unroll(disable)
|
314
|
+
#endif
|
315
|
+
for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
|
194
316
|
UnalignedCopy64(src, op);
|
195
317
|
UnalignedCopy64(src + 8, op + 8);
|
196
|
-
src += 16;
|
197
|
-
op += 16;
|
198
|
-
if (PREDICT_TRUE(op >= op_limit)) return op_limit;
|
199
318
|
}
|
319
|
+
if (op >= op_limit)
|
320
|
+
return op_limit;
|
321
|
+
|
200
322
|
// We only take this branch if we didn't have enough slop and we can do a
|
201
323
|
// single 8 byte copy.
|
202
|
-
if (
|
324
|
+
if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
|
203
325
|
UnalignedCopy64(src, op);
|
204
326
|
src += 8;
|
205
327
|
op += 8;
|
@@ -209,10 +331,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
209
331
|
|
210
332
|
} // namespace
|
211
333
|
|
334
|
+
template <bool allow_fast_path>
|
212
335
|
static inline char* EmitLiteral(char* op,
|
213
336
|
const char* literal,
|
214
|
-
int len
|
215
|
-
bool allow_fast_path) {
|
337
|
+
int len) {
|
216
338
|
// The vast majority of copies are below 16 bytes, for which a
|
217
339
|
// call to memcpy is overkill. This fast path can sometimes
|
218
340
|
// copy up to 15 bytes too much, but that is okay in the
|
@@ -237,31 +359,29 @@ static inline char* EmitLiteral(char* op,
|
|
237
359
|
// Fits in tag byte
|
238
360
|
*op++ = LITERAL | (n << 2);
|
239
361
|
} else {
|
240
|
-
|
241
|
-
char* base = op;
|
242
|
-
int count = 0;
|
243
|
-
op++;
|
244
|
-
while (n > 0) {
|
245
|
-
*op++ = n & 0xff;
|
246
|
-
n >>= 8;
|
247
|
-
count++;
|
248
|
-
}
|
362
|
+
int count = (Bits::Log2Floor(n) >> 3) + 1;
|
249
363
|
assert(count >= 1);
|
250
364
|
assert(count <= 4);
|
251
|
-
*
|
365
|
+
*op++ = LITERAL | ((59 + count) << 2);
|
366
|
+
// Encode in upcoming bytes.
|
367
|
+
// Write 4 bytes, though we may care about only 1 of them. The output buffer
|
368
|
+
// is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
|
369
|
+
// here and there is a memcpy of size 'len' below.
|
370
|
+
LittleEndian::Store32(op, n);
|
371
|
+
op += count;
|
252
372
|
}
|
253
373
|
memcpy(op, literal, len);
|
254
374
|
return op + len;
|
255
375
|
}
|
256
376
|
|
257
|
-
|
258
|
-
|
377
|
+
template <bool len_less_than_12>
|
378
|
+
static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
|
259
379
|
assert(len <= 64);
|
260
380
|
assert(len >= 4);
|
261
381
|
assert(offset < 65536);
|
262
382
|
assert(len_less_than_12 == (len < 12));
|
263
383
|
|
264
|
-
if (len_less_than_12 &&
|
384
|
+
if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
|
265
385
|
// offset fits in 11 bits. The 3 highest go in the top of the first byte,
|
266
386
|
// and the rest go in the second byte.
|
267
387
|
*op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
|
@@ -276,29 +396,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
|
|
276
396
|
return op;
|
277
397
|
}
|
278
398
|
|
279
|
-
|
280
|
-
|
399
|
+
template <bool len_less_than_12>
|
400
|
+
static inline char* EmitCopy(char* op, size_t offset, size_t len) {
|
281
401
|
assert(len_less_than_12 == (len < 12));
|
282
402
|
if (len_less_than_12) {
|
283
|
-
return EmitCopyAtMost64(op, offset, len
|
403
|
+
return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
|
284
404
|
} else {
|
285
405
|
// A special case for len <= 64 might help, but so far measurements suggest
|
286
406
|
// it's in the noise.
|
287
407
|
|
288
408
|
// Emit 64 byte copies but make sure to keep at least four bytes reserved.
|
289
|
-
while (
|
290
|
-
op = EmitCopyAtMost64(op, offset, 64
|
409
|
+
while (SNAPPY_PREDICT_FALSE(len >= 68)) {
|
410
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
|
291
411
|
len -= 64;
|
292
412
|
}
|
293
413
|
|
294
414
|
// One or two copies will now finish the job.
|
295
415
|
if (len > 64) {
|
296
|
-
op = EmitCopyAtMost64(op, offset, 60
|
416
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
|
297
417
|
len -= 60;
|
298
418
|
}
|
299
419
|
|
300
420
|
// Emit remainder.
|
301
|
-
|
421
|
+
if (len < 12) {
|
422
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
|
423
|
+
} else {
|
424
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
|
425
|
+
}
|
302
426
|
return op;
|
303
427
|
}
|
304
428
|
}
|
@@ -314,31 +438,45 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
|
|
314
438
|
}
|
315
439
|
}
|
316
440
|
|
317
|
-
namespace
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
size_t htsize = 256;
|
325
|
-
while (htsize < kMaxHashTableSize && htsize < input_size) {
|
326
|
-
htsize <<= 1;
|
441
|
+
namespace {
|
442
|
+
uint32 CalculateTableSize(uint32 input_size) {
|
443
|
+
static_assert(
|
444
|
+
kMaxHashTableSize >= kMinHashTableSize,
|
445
|
+
"kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
|
446
|
+
if (input_size > kMaxHashTableSize) {
|
447
|
+
return kMaxHashTableSize;
|
327
448
|
}
|
328
|
-
|
329
|
-
|
330
|
-
if (htsize <= ARRAYSIZE(small_table_)) {
|
331
|
-
table = small_table_;
|
332
|
-
} else {
|
333
|
-
if (large_table_ == NULL) {
|
334
|
-
large_table_ = new uint16[kMaxHashTableSize];
|
335
|
-
}
|
336
|
-
table = large_table_;
|
449
|
+
if (input_size < kMinHashTableSize) {
|
450
|
+
return kMinHashTableSize;
|
337
451
|
}
|
452
|
+
// This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
|
453
|
+
// 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
|
454
|
+
return 2u << Bits::Log2Floor(input_size - 1);
|
455
|
+
}
|
456
|
+
} // namespace
|
457
|
+
|
458
|
+
namespace internal {
|
459
|
+
WorkingMemory::WorkingMemory(size_t input_size) {
|
460
|
+
const size_t max_fragment_size = std::min(input_size, kBlockSize);
|
461
|
+
const size_t table_size = CalculateTableSize(max_fragment_size);
|
462
|
+
size_ = table_size * sizeof(*table_) + max_fragment_size +
|
463
|
+
MaxCompressedLength(max_fragment_size);
|
464
|
+
mem_ = std::allocator<char>().allocate(size_);
|
465
|
+
table_ = reinterpret_cast<uint16*>(mem_);
|
466
|
+
input_ = mem_ + table_size * sizeof(*table_);
|
467
|
+
output_ = input_ + max_fragment_size;
|
468
|
+
}
|
469
|
+
|
470
|
+
WorkingMemory::~WorkingMemory() {
|
471
|
+
std::allocator<char>().deallocate(mem_, size_);
|
472
|
+
}
|
338
473
|
|
474
|
+
uint16* WorkingMemory::GetHashTable(size_t fragment_size,
|
475
|
+
int* table_size) const {
|
476
|
+
const size_t htsize = CalculateTableSize(fragment_size);
|
477
|
+
memset(table_, 0, htsize * sizeof(*table_));
|
339
478
|
*table_size = htsize;
|
340
|
-
|
341
|
-
return table;
|
479
|
+
return table_;
|
342
480
|
}
|
343
481
|
} // end namespace internal
|
344
482
|
|
@@ -405,7 +543,7 @@ char* CompressFragment(const char* input,
|
|
405
543
|
// "ip" is the input pointer, and "op" is the output pointer.
|
406
544
|
const char* ip = input;
|
407
545
|
assert(input_size <= kBlockSize);
|
408
|
-
assert((table_size & (table_size - 1)) == 0);
|
546
|
+
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
409
547
|
const int shift = 32 - Bits::Log2Floor(table_size);
|
410
548
|
assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
|
411
549
|
const char* ip_end = input + input_size;
|
@@ -415,7 +553,7 @@ char* CompressFragment(const char* input,
|
|
415
553
|
const char* next_emit = ip;
|
416
554
|
|
417
555
|
const size_t kInputMarginBytes = 15;
|
418
|
-
if (
|
556
|
+
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
|
419
557
|
const char* ip_limit = input + input_size - kInputMarginBytes;
|
420
558
|
|
421
559
|
for (uint32 next_hash = Hash(++ip, shift); ; ) {
|
@@ -456,7 +594,7 @@ char* CompressFragment(const char* input,
|
|
456
594
|
uint32 bytes_between_hash_lookups = skip >> 5;
|
457
595
|
skip += bytes_between_hash_lookups;
|
458
596
|
next_ip = ip + bytes_between_hash_lookups;
|
459
|
-
if (
|
597
|
+
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
|
460
598
|
goto emit_remainder;
|
461
599
|
}
|
462
600
|
next_hash = Hash(next_ip, shift);
|
@@ -465,14 +603,14 @@ char* CompressFragment(const char* input,
|
|
465
603
|
assert(candidate < ip);
|
466
604
|
|
467
605
|
table[hash] = ip - base_ip;
|
468
|
-
} while (
|
469
|
-
|
606
|
+
} while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
|
607
|
+
UNALIGNED_LOAD32(candidate)));
|
470
608
|
|
471
609
|
// Step 2: A 4-byte match has been found. We'll later see if more
|
472
610
|
// than 4 bytes match. But, prior to the match, input
|
473
611
|
// bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
|
474
612
|
assert(next_emit + 16 <= ip_end);
|
475
|
-
op = EmitLiteral(op, next_emit, ip - next_emit
|
613
|
+
op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
|
476
614
|
|
477
615
|
// Step 3: Call EmitCopy, and then see if another EmitCopy could
|
478
616
|
// be our next move. Repeat until we find no match for the
|
@@ -495,9 +633,13 @@ char* CompressFragment(const char* input,
|
|
495
633
|
ip += matched;
|
496
634
|
size_t offset = base - candidate;
|
497
635
|
assert(0 == memcmp(base, candidate, matched));
|
498
|
-
|
636
|
+
if (p.second) {
|
637
|
+
op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
|
638
|
+
} else {
|
639
|
+
op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
|
640
|
+
}
|
499
641
|
next_emit = ip;
|
500
|
-
if (
|
642
|
+
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
|
501
643
|
goto emit_remainder;
|
502
644
|
}
|
503
645
|
// We are now looking for a 4-byte match again. We read
|
@@ -520,13 +662,18 @@ char* CompressFragment(const char* input,
|
|
520
662
|
emit_remainder:
|
521
663
|
// Emit the remaining bytes as a literal
|
522
664
|
if (next_emit < ip_end) {
|
523
|
-
op = EmitLiteral(op, next_emit,
|
665
|
+
op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit,
|
666
|
+
ip_end - next_emit);
|
524
667
|
}
|
525
668
|
|
526
669
|
return op;
|
527
670
|
}
|
528
671
|
} // end namespace internal
|
529
672
|
|
673
|
+
// Called back at avery compression call to trace parameters and sizes.
|
674
|
+
static inline void Report(const char *algorithm, size_t compressed_size,
|
675
|
+
size_t uncompressed_size) {}
|
676
|
+
|
530
677
|
// Signature of output types needed by decompression code.
|
531
678
|
// The decompression code is templatized on a type that obeys this
|
532
679
|
// signature so that we do not pay virtual function call overhead in
|
@@ -567,6 +714,28 @@ char* CompressFragment(const char* input,
|
|
567
714
|
// bool TryFastAppend(const char* ip, size_t available, size_t length);
|
568
715
|
// };
|
569
716
|
|
717
|
+
static inline uint32 ExtractLowBytes(uint32 v, int n) {
|
718
|
+
assert(n >= 0);
|
719
|
+
assert(n <= 4);
|
720
|
+
#if SNAPPY_HAVE_BMI2
|
721
|
+
return _bzhi_u32(v, 8 * n);
|
722
|
+
#else
|
723
|
+
// This needs to be wider than uint32 otherwise `mask << 32` will be
|
724
|
+
// undefined.
|
725
|
+
uint64 mask = 0xffffffff;
|
726
|
+
return v & ~(mask << (8 * n));
|
727
|
+
#endif
|
728
|
+
}
|
729
|
+
|
730
|
+
static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
|
731
|
+
assert(shift < 32);
|
732
|
+
static const uint8 masks[] = {
|
733
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
734
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
735
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
736
|
+
0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
|
737
|
+
return (value & masks[shift]) != 0;
|
738
|
+
}
|
570
739
|
|
571
740
|
// Helper class for decompression
|
572
741
|
class SnappyDecompressor {
|
@@ -605,7 +774,7 @@ class SnappyDecompressor {
|
|
605
774
|
}
|
606
775
|
|
607
776
|
// Read the uncompressed length stored at the start of the compressed data.
|
608
|
-
// On
|
777
|
+
// On success, stores the length in *result and returns true.
|
609
778
|
// On failure, returns false.
|
610
779
|
bool ReadUncompressedLength(uint32* result) {
|
611
780
|
assert(ip_ == NULL); // Must not have read anything yet
|
@@ -620,7 +789,7 @@ class SnappyDecompressor {
|
|
620
789
|
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
|
621
790
|
reader_->Skip(1);
|
622
791
|
uint32 val = c & 0x7f;
|
623
|
-
if (((val
|
792
|
+
if (LeftShiftOverflows(static_cast<uint8>(val), shift)) return false;
|
624
793
|
*result |= val << shift;
|
625
794
|
if (c < 128) {
|
626
795
|
break;
|
@@ -633,13 +802,27 @@ class SnappyDecompressor {
|
|
633
802
|
// Process the next item found in the input.
|
634
803
|
// Returns true if successful, false on error or end of input.
|
635
804
|
template <class Writer>
|
805
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
806
|
+
__attribute__((aligned(32)))
|
807
|
+
#endif
|
636
808
|
void DecompressAllTags(Writer* writer) {
|
637
|
-
|
638
|
-
//
|
639
|
-
//
|
640
|
-
|
641
|
-
|
809
|
+
// In x86, pad the function body to start 16 bytes later. This function has
|
810
|
+
// a couple of hotspots that are highly sensitive to alignment: we have
|
811
|
+
// observed regressions by more than 20% in some metrics just by moving the
|
812
|
+
// exact same code to a different position in the benchmark binary.
|
813
|
+
//
|
814
|
+
// Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit
|
815
|
+
// the "lucky" case consistently. Unfortunately, this is a very brittle
|
816
|
+
// workaround, and future differences in code generation may reintroduce
|
817
|
+
// this regression. If you experience a big, difficult to explain, benchmark
|
818
|
+
// performance regression here, first try removing this hack.
|
819
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
820
|
+
// Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions.
|
821
|
+
asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
|
822
|
+
asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
|
823
|
+
#endif
|
642
824
|
|
825
|
+
const char* ip = ip_;
|
643
826
|
// We could have put this refill fragment only at the beginning of the loop.
|
644
827
|
// However, duplicating it at the end of each branch gives the compiler more
|
645
828
|
// scope to optimize the <ip_limit_ - ip> expression based on the local
|
@@ -667,21 +850,22 @@ class SnappyDecompressor {
|
|
667
850
|
// txt[1-4] 25% 75%
|
668
851
|
// pb 24% 76%
|
669
852
|
// bin 24% 76%
|
670
|
-
if (
|
853
|
+
if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
|
671
854
|
size_t literal_length = (c >> 2) + 1u;
|
672
855
|
if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
|
673
856
|
assert(literal_length < 61);
|
674
857
|
ip += literal_length;
|
675
|
-
// NOTE
|
858
|
+
// NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
|
676
859
|
// will not return true unless there's already at least five spare
|
677
860
|
// bytes in addition to the literal.
|
678
861
|
continue;
|
679
862
|
}
|
680
|
-
if (
|
863
|
+
if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
|
681
864
|
// Long literal.
|
682
865
|
const size_t literal_length_length = literal_length - 60;
|
683
866
|
literal_length =
|
684
|
-
(LittleEndian::Load32(ip)
|
867
|
+
ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
|
868
|
+
1;
|
685
869
|
ip += literal_length_length;
|
686
870
|
}
|
687
871
|
|
@@ -704,7 +888,8 @@ class SnappyDecompressor {
|
|
704
888
|
MAYBE_REFILL();
|
705
889
|
} else {
|
706
890
|
const size_t entry = char_table[c];
|
707
|
-
const size_t trailer =
|
891
|
+
const size_t trailer =
|
892
|
+
ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11);
|
708
893
|
const size_t length = entry & 0xff;
|
709
894
|
ip += entry >> 11;
|
710
895
|
|
@@ -757,7 +942,7 @@ bool SnappyDecompressor::RefillTag() {
|
|
757
942
|
size_t length;
|
758
943
|
const char* src = reader_->Peek(&length);
|
759
944
|
if (length == 0) return false;
|
760
|
-
uint32 to_add = min<uint32>(needed - nbuf, length);
|
945
|
+
uint32 to_add = std::min<uint32>(needed - nbuf, length);
|
761
946
|
memcpy(scratch_ + nbuf, src, to_add);
|
762
947
|
nbuf += to_add;
|
763
948
|
reader_->Skip(to_add);
|
@@ -786,13 +971,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
|
|
786
971
|
SnappyDecompressor decompressor(r);
|
787
972
|
uint32 uncompressed_len = 0;
|
788
973
|
if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
|
789
|
-
|
974
|
+
|
975
|
+
return InternalUncompressAllTags(&decompressor, writer, r->Available(),
|
976
|
+
uncompressed_len);
|
790
977
|
}
|
791
978
|
|
792
979
|
template <typename Writer>
|
793
980
|
static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
|
794
981
|
Writer* writer,
|
982
|
+
uint32 compressed_len,
|
795
983
|
uint32 uncompressed_len) {
|
984
|
+
Report("snappy_uncompress", compressed_len, uncompressed_len);
|
985
|
+
|
796
986
|
writer->SetExpectedLength(uncompressed_len);
|
797
987
|
|
798
988
|
// Process the entire input
|
@@ -809,21 +999,20 @@ bool GetUncompressedLength(Source* source, uint32* result) {
|
|
809
999
|
size_t Compress(Source* reader, Sink* writer) {
|
810
1000
|
size_t written = 0;
|
811
1001
|
size_t N = reader->Available();
|
1002
|
+
const size_t uncompressed_size = N;
|
812
1003
|
char ulength[Varint::kMax32];
|
813
1004
|
char* p = Varint::Encode32(ulength, N);
|
814
1005
|
writer->Append(ulength, p-ulength);
|
815
1006
|
written += (p - ulength);
|
816
1007
|
|
817
|
-
internal::WorkingMemory wmem;
|
818
|
-
char* scratch = NULL;
|
819
|
-
char* scratch_output = NULL;
|
1008
|
+
internal::WorkingMemory wmem(N);
|
820
1009
|
|
821
1010
|
while (N > 0) {
|
822
1011
|
// Get next block to compress (without copying if possible)
|
823
1012
|
size_t fragment_size;
|
824
1013
|
const char* fragment = reader->Peek(&fragment_size);
|
825
1014
|
assert(fragment_size != 0); // premature end of input
|
826
|
-
const size_t num_to_read = min(N, kBlockSize);
|
1015
|
+
const size_t num_to_read = std::min(N, kBlockSize);
|
827
1016
|
size_t bytes_read = fragment_size;
|
828
1017
|
|
829
1018
|
size_t pending_advance = 0;
|
@@ -832,19 +1021,13 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
832
1021
|
pending_advance = num_to_read;
|
833
1022
|
fragment_size = num_to_read;
|
834
1023
|
} else {
|
835
|
-
|
836
|
-
if (scratch == NULL) {
|
837
|
-
// If this is the last iteration, we want to allocate N bytes
|
838
|
-
// of space, otherwise the max possible kBlockSize space.
|
839
|
-
// num_to_read contains exactly the correct value
|
840
|
-
scratch = new char[num_to_read];
|
841
|
-
}
|
1024
|
+
char* scratch = wmem.GetScratchInput();
|
842
1025
|
memcpy(scratch, fragment, bytes_read);
|
843
1026
|
reader->Skip(bytes_read);
|
844
1027
|
|
845
1028
|
while (bytes_read < num_to_read) {
|
846
1029
|
fragment = reader->Peek(&fragment_size);
|
847
|
-
size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
|
1030
|
+
size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
|
848
1031
|
memcpy(scratch + bytes_read, fragment, n);
|
849
1032
|
bytes_read += n;
|
850
1033
|
reader->Skip(n);
|
@@ -864,16 +1047,13 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
864
1047
|
|
865
1048
|
// Need a scratch buffer for the output, in case the byte sink doesn't
|
866
1049
|
// have room for us directly.
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
char* dest = writer->GetAppendBuffer(max_output, scratch_output);
|
875
|
-
char* end = internal::CompressFragment(fragment, fragment_size,
|
876
|
-
dest, table, table_size);
|
1050
|
+
|
1051
|
+
// Since we encode kBlockSize regions followed by a region
|
1052
|
+
// which is <= kBlockSize in length, a previously allocated
|
1053
|
+
// scratch_output[] region is big enough for this iteration.
|
1054
|
+
char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
|
1055
|
+
char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
|
1056
|
+
table_size);
|
877
1057
|
writer->Append(dest, end - dest);
|
878
1058
|
written += (end - dest);
|
879
1059
|
|
@@ -881,8 +1061,7 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
881
1061
|
reader->Skip(pending_advance);
|
882
1062
|
}
|
883
1063
|
|
884
|
-
|
885
|
-
delete[] scratch_output;
|
1064
|
+
Report("snappy_compress", written, uncompressed_size);
|
886
1065
|
|
887
1066
|
return written;
|
888
1067
|
}
|
@@ -896,14 +1075,22 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
896
1075
|
// Writer template argument to SnappyDecompressor::DecompressAllTags().
|
897
1076
|
class SnappyIOVecWriter {
|
898
1077
|
private:
|
1078
|
+
// output_iov_end_ is set to iov + count and used to determine when
|
1079
|
+
// the end of the iovs is reached.
|
1080
|
+
const struct iovec* output_iov_end_;
|
1081
|
+
|
1082
|
+
#if !defined(NDEBUG)
|
899
1083
|
const struct iovec* output_iov_;
|
900
|
-
|
1084
|
+
#endif // !defined(NDEBUG)
|
1085
|
+
|
1086
|
+
// Current iov that is being written into.
|
1087
|
+
const struct iovec* curr_iov_;
|
901
1088
|
|
902
|
-
//
|
903
|
-
|
1089
|
+
// Pointer to current iov's write location.
|
1090
|
+
char* curr_iov_output_;
|
904
1091
|
|
905
|
-
//
|
906
|
-
size_t
|
1092
|
+
// Remaining bytes to write into curr_iov_output.
|
1093
|
+
size_t curr_iov_remaining_;
|
907
1094
|
|
908
1095
|
// Total bytes decompressed into output_iov_ so far.
|
909
1096
|
size_t total_written_;
|
@@ -911,22 +1098,24 @@ class SnappyIOVecWriter {
|
|
911
1098
|
// Maximum number of bytes that will be decompressed into output_iov_.
|
912
1099
|
size_t output_limit_;
|
913
1100
|
|
914
|
-
inline char* GetIOVecPointer(
|
915
|
-
return reinterpret_cast<char*>(
|
916
|
-
offset;
|
1101
|
+
static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
|
1102
|
+
return reinterpret_cast<char*>(iov->iov_base) + offset;
|
917
1103
|
}
|
918
1104
|
|
919
1105
|
public:
|
920
1106
|
// Does not take ownership of iov. iov must be valid during the
|
921
1107
|
// entire lifetime of the SnappyIOVecWriter.
|
922
1108
|
inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
|
923
|
-
:
|
924
|
-
|
925
|
-
|
926
|
-
|
1109
|
+
: output_iov_end_(iov + iov_count),
|
1110
|
+
#if !defined(NDEBUG)
|
1111
|
+
output_iov_(iov),
|
1112
|
+
#endif // !defined(NDEBUG)
|
1113
|
+
curr_iov_(iov),
|
1114
|
+
curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
|
1115
|
+
: nullptr),
|
1116
|
+
curr_iov_remaining_(iov_count ? iov->iov_len : 0),
|
927
1117
|
total_written_(0),
|
928
|
-
output_limit_(-1) {
|
929
|
-
}
|
1118
|
+
output_limit_(-1) {}
|
930
1119
|
|
931
1120
|
inline void SetExpectedLength(size_t len) {
|
932
1121
|
output_limit_ = len;
|
@@ -941,23 +1130,25 @@ class SnappyIOVecWriter {
|
|
941
1130
|
return false;
|
942
1131
|
}
|
943
1132
|
|
1133
|
+
return AppendNoCheck(ip, len);
|
1134
|
+
}
|
1135
|
+
|
1136
|
+
inline bool AppendNoCheck(const char* ip, size_t len) {
|
944
1137
|
while (len > 0) {
|
945
|
-
|
946
|
-
if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
|
1138
|
+
if (curr_iov_remaining_ == 0) {
|
947
1139
|
// This iovec is full. Go to the next one.
|
948
|
-
if (
|
1140
|
+
if (curr_iov_ + 1 >= output_iov_end_) {
|
949
1141
|
return false;
|
950
1142
|
}
|
951
|
-
|
952
|
-
|
1143
|
+
++curr_iov_;
|
1144
|
+
curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
|
1145
|
+
curr_iov_remaining_ = curr_iov_->iov_len;
|
953
1146
|
}
|
954
1147
|
|
955
|
-
const size_t to_write = std::min(
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
to_write);
|
960
|
-
curr_iov_written_ += to_write;
|
1148
|
+
const size_t to_write = std::min(len, curr_iov_remaining_);
|
1149
|
+
memcpy(curr_iov_output_, ip, to_write);
|
1150
|
+
curr_iov_output_ += to_write;
|
1151
|
+
curr_iov_remaining_ -= to_write;
|
961
1152
|
total_written_ += to_write;
|
962
1153
|
ip += to_write;
|
963
1154
|
len -= to_write;
|
@@ -969,11 +1160,11 @@ class SnappyIOVecWriter {
|
|
969
1160
|
inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
|
970
1161
|
const size_t space_left = output_limit_ - total_written_;
|
971
1162
|
if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
|
972
|
-
|
1163
|
+
curr_iov_remaining_ >= 16) {
|
973
1164
|
// Fast path, used for the majority (about 95%) of invocations.
|
974
|
-
|
975
|
-
|
976
|
-
|
1165
|
+
UnalignedCopy128(ip, curr_iov_output_);
|
1166
|
+
curr_iov_output_ += len;
|
1167
|
+
curr_iov_remaining_ -= len;
|
977
1168
|
total_written_ += len;
|
978
1169
|
return true;
|
979
1170
|
}
|
@@ -982,7 +1173,9 @@ class SnappyIOVecWriter {
|
|
982
1173
|
}
|
983
1174
|
|
984
1175
|
inline bool AppendFromSelf(size_t offset, size_t len) {
|
985
|
-
|
1176
|
+
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1177
|
+
// the "offset - 1u" trick.
|
1178
|
+
if (offset - 1u >= total_written_) {
|
986
1179
|
return false;
|
987
1180
|
}
|
988
1181
|
const size_t space_left = output_limit_ - total_written_;
|
@@ -991,8 +1184,8 @@ class SnappyIOVecWriter {
|
|
991
1184
|
}
|
992
1185
|
|
993
1186
|
// Locate the iovec from which we need to start the copy.
|
994
|
-
|
995
|
-
size_t from_iov_offset =
|
1187
|
+
const iovec* from_iov = curr_iov_;
|
1188
|
+
size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
|
996
1189
|
while (offset > 0) {
|
997
1190
|
if (from_iov_offset >= offset) {
|
998
1191
|
from_iov_offset -= offset;
|
@@ -1000,47 +1193,47 @@ class SnappyIOVecWriter {
|
|
1000
1193
|
}
|
1001
1194
|
|
1002
1195
|
offset -= from_iov_offset;
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1196
|
+
--from_iov;
|
1197
|
+
#if !defined(NDEBUG)
|
1198
|
+
assert(from_iov >= output_iov_);
|
1199
|
+
#endif // !defined(NDEBUG)
|
1200
|
+
from_iov_offset = from_iov->iov_len;
|
1006
1201
|
}
|
1007
1202
|
|
1008
1203
|
// Copy <len> bytes starting from the iovec pointed to by from_iov_index to
|
1009
1204
|
// the current iovec.
|
1010
1205
|
while (len > 0) {
|
1011
|
-
assert(
|
1012
|
-
if (
|
1013
|
-
const size_t to_copy =
|
1014
|
-
|
1015
|
-
|
1016
|
-
Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
|
1206
|
+
assert(from_iov <= curr_iov_);
|
1207
|
+
if (from_iov != curr_iov_) {
|
1208
|
+
const size_t to_copy =
|
1209
|
+
std::min(from_iov->iov_len - from_iov_offset, len);
|
1210
|
+
AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
|
1017
1211
|
len -= to_copy;
|
1018
1212
|
if (len > 0) {
|
1019
|
-
++
|
1213
|
+
++from_iov;
|
1020
1214
|
from_iov_offset = 0;
|
1021
1215
|
}
|
1022
1216
|
} else {
|
1023
|
-
|
1024
|
-
size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
|
1025
|
-
curr_iov_written_,
|
1026
|
-
len);
|
1217
|
+
size_t to_copy = curr_iov_remaining_;
|
1027
1218
|
if (to_copy == 0) {
|
1028
1219
|
// This iovec is full. Go to the next one.
|
1029
|
-
if (
|
1220
|
+
if (curr_iov_ + 1 >= output_iov_end_) {
|
1030
1221
|
return false;
|
1031
1222
|
}
|
1032
|
-
++
|
1033
|
-
|
1223
|
+
++curr_iov_;
|
1224
|
+
curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
|
1225
|
+
curr_iov_remaining_ = curr_iov_->iov_len;
|
1034
1226
|
continue;
|
1035
1227
|
}
|
1036
1228
|
if (to_copy > len) {
|
1037
1229
|
to_copy = len;
|
1038
1230
|
}
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1231
|
+
|
1232
|
+
IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
|
1233
|
+
curr_iov_output_, curr_iov_output_ + to_copy,
|
1234
|
+
curr_iov_output_ + curr_iov_remaining_);
|
1235
|
+
curr_iov_output_ += to_copy;
|
1236
|
+
curr_iov_remaining_ -= to_copy;
|
1044
1237
|
from_iov_offset += to_copy;
|
1045
1238
|
total_written_ += to_copy;
|
1046
1239
|
len -= to_copy;
|
@@ -1149,7 +1342,7 @@ bool RawUncompress(Source* compressed, char* uncompressed) {
|
|
1149
1342
|
return InternalUncompress(compressed, &output);
|
1150
1343
|
}
|
1151
1344
|
|
1152
|
-
bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
|
1345
|
+
bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) {
|
1153
1346
|
size_t ulength;
|
1154
1347
|
if (!GetUncompressedLength(compressed, n, &ulength)) {
|
1155
1348
|
return false;
|
@@ -1217,7 +1410,8 @@ void RawCompress(const char* input,
|
|
1217
1410
|
*compressed_length = (writer.CurrentDestination() - compressed);
|
1218
1411
|
}
|
1219
1412
|
|
1220
|
-
size_t Compress(const char* input, size_t input_length,
|
1413
|
+
size_t Compress(const char* input, size_t input_length,
|
1414
|
+
std::string* compressed) {
|
1221
1415
|
// Pre-grow the buffer to the max length of the compressed output
|
1222
1416
|
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
|
1223
1417
|
|
@@ -1313,7 +1507,8 @@ class SnappyScatteredWriter {
|
|
1313
1507
|
char* const op_end = op_ptr_ + len;
|
1314
1508
|
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1315
1509
|
// the "offset - 1u" trick.
|
1316
|
-
if (
|
1510
|
+
if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
|
1511
|
+
op_end <= op_limit_)) {
|
1317
1512
|
// Fast path: src and dst in current block.
|
1318
1513
|
op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
|
1319
1514
|
return true;
|
@@ -1344,7 +1539,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
|
|
1344
1539
|
}
|
1345
1540
|
|
1346
1541
|
// Make new block
|
1347
|
-
size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
|
1542
|
+
size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
|
1348
1543
|
op_base_ = allocator_.Allocate(bsize);
|
1349
1544
|
op_ptr_ = op_base_;
|
1350
1545
|
op_limit_ = op_base_ + bsize;
|
@@ -1401,7 +1596,7 @@ class SnappySinkAllocator {
|
|
1401
1596
|
size_t size_written = 0;
|
1402
1597
|
size_t block_size;
|
1403
1598
|
for (int i = 0; i < blocks_.size(); ++i) {
|
1404
|
-
block_size = min<size_t>(blocks_[i].size, size - size_written);
|
1599
|
+
block_size = std::min<size_t>(blocks_[i].size, size - size_written);
|
1405
1600
|
dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
|
1406
1601
|
&SnappySinkAllocator::Deleter, NULL);
|
1407
1602
|
size_written += block_size;
|
@@ -1446,19 +1641,21 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
|
|
1446
1641
|
char* buf = uncompressed->GetAppendBufferVariable(
|
1447
1642
|
1, uncompressed_len, &c, 1, &allocated_size);
|
1448
1643
|
|
1644
|
+
const size_t compressed_len = compressed->Available();
|
1449
1645
|
// If we can get a flat buffer, then use it, otherwise do block by block
|
1450
1646
|
// uncompression
|
1451
1647
|
if (allocated_size >= uncompressed_len) {
|
1452
1648
|
SnappyArrayWriter writer(buf);
|
1453
|
-
bool result = InternalUncompressAllTags(
|
1454
|
-
|
1649
|
+
bool result = InternalUncompressAllTags(&decompressor, &writer,
|
1650
|
+
compressed_len, uncompressed_len);
|
1455
1651
|
uncompressed->Append(buf, writer.Produced());
|
1456
1652
|
return result;
|
1457
1653
|
} else {
|
1458
1654
|
SnappySinkAllocator allocator(uncompressed);
|
1459
1655
|
SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
|
1460
|
-
return InternalUncompressAllTags(&decompressor, &writer,
|
1656
|
+
return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
|
1657
|
+
uncompressed_len);
|
1461
1658
|
}
|
1462
1659
|
}
|
1463
1660
|
|
1464
|
-
}
|
1661
|
+
} // namespace snappy
|