snappy 0.0.17 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.dockerignore +2 -0
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/.gitignore +2 -1
- data/.gitmodules +1 -1
- data/Dockerfile +13 -0
- data/Gemfile +4 -0
- data/README.md +29 -5
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +23 -16
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/reader.rb +14 -10
- data/lib/snappy/shim.rb +1 -1
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy.rb +5 -4
- data/snappy.gemspec +13 -13
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/test.sh +3 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/NEWS +40 -0
- data/vendor/snappy/{README → README.md} +27 -18
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-internal.h +22 -18
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +116 -38
- data/vendor/snappy/snappy-stubs-public.h.in +20 -46
- data/vendor/snappy/snappy-test.cc +26 -22
- data/vendor/snappy/snappy-test.h +24 -98
- data/vendor/snappy/snappy.cc +380 -183
- data/vendor/snappy/snappy.h +14 -10
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +236 -261
- metadata +37 -92
- data/.travis.yml +0 -26
- data/smoke.sh +0 -8
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/INSTALL +0 -370
- data/vendor/snappy/Makefile +0 -982
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/Makefile.in +0 -982
- data/vendor/snappy/aclocal.m4 +0 -9738
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/autom4te.cache/output.0 +0 -18856
- data/vendor/snappy/autom4te.cache/output.1 +0 -18852
- data/vendor/snappy/autom4te.cache/requests +0 -297
- data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
- data/vendor/snappy/autom4te.cache/traces.1 +0 -714
- data/vendor/snappy/config.guess +0 -1530
- data/vendor/snappy/config.h +0 -135
- data/vendor/snappy/config.h.in +0 -134
- data/vendor/snappy/config.log +0 -1640
- data/vendor/snappy/config.status +0 -2318
- data/vendor/snappy/config.sub +0 -1773
- data/vendor/snappy/configure +0 -18852
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/depcomp +0 -688
- data/vendor/snappy/install-sh +0 -527
- data/vendor/snappy/libtool +0 -10246
- data/vendor/snappy/ltmain.sh +0 -9661
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/m4/libtool.m4 +0 -8001
- data/vendor/snappy/m4/ltoptions.m4 +0 -384
- data/vendor/snappy/m4/ltsugar.m4 +0 -123
- data/vendor/snappy/m4/ltversion.m4 +0 -23
- data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
- data/vendor/snappy/missing +0 -331
- data/vendor/snappy/snappy-stubs-public.h +0 -100
- data/vendor/snappy/snappy.pc +0 -10
- data/vendor/snappy/snappy.pc.in +0 -10
- data/vendor/snappy/stamp-h1 +0 -1
data/vendor/snappy/snappy.cc
CHANGED
@@ -30,16 +30,50 @@
|
|
30
30
|
#include "snappy-internal.h"
|
31
31
|
#include "snappy-sinksource.h"
|
32
32
|
|
33
|
-
#if defined(
|
34
|
-
|
33
|
+
#if !defined(SNAPPY_HAVE_SSSE3)
|
34
|
+
// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD
|
35
|
+
// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and
|
36
|
+
// defines __AVX__ when AVX support is available.
|
37
|
+
#if defined(__SSSE3__) || defined(__AVX__)
|
38
|
+
#define SNAPPY_HAVE_SSSE3 1
|
39
|
+
#else
|
40
|
+
#define SNAPPY_HAVE_SSSE3 0
|
41
|
+
#endif
|
42
|
+
#endif // !defined(SNAPPY_HAVE_SSSE3)
|
43
|
+
|
44
|
+
#if !defined(SNAPPY_HAVE_BMI2)
|
45
|
+
// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
|
46
|
+
// specifically, but it does define __AVX2__ when AVX2 support is available.
|
47
|
+
// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
|
48
|
+
//
|
49
|
+
// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
|
50
|
+
// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
|
51
|
+
// case issuing BMI2 instructions results in a compiler error.
|
52
|
+
#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
|
53
|
+
#define SNAPPY_HAVE_BMI2 1
|
54
|
+
#else
|
55
|
+
#define SNAPPY_HAVE_BMI2 0
|
35
56
|
#endif
|
57
|
+
#endif // !defined(SNAPPY_HAVE_BMI2)
|
58
|
+
|
59
|
+
#if SNAPPY_HAVE_SSSE3
|
60
|
+
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
61
|
+
// advanced SSE versions without checking with all the OWNERS.
|
62
|
+
#include <tmmintrin.h>
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#if SNAPPY_HAVE_BMI2
|
66
|
+
// Please do not replace with <x86intrin.h>. or with headers that assume more
|
67
|
+
// advanced SSE versions without checking with all the OWNERS.
|
68
|
+
#include <immintrin.h>
|
69
|
+
#endif
|
70
|
+
|
36
71
|
#include <stdio.h>
|
37
72
|
|
38
73
|
#include <algorithm>
|
39
74
|
#include <string>
|
40
75
|
#include <vector>
|
41
76
|
|
42
|
-
|
43
77
|
namespace snappy {
|
44
78
|
|
45
79
|
using internal::COPY_1_BYTE_OFFSET;
|
@@ -47,7 +81,6 @@ using internal::COPY_2_BYTE_OFFSET;
|
|
47
81
|
using internal::LITERAL;
|
48
82
|
using internal::char_table;
|
49
83
|
using internal::kMaximumTagLength;
|
50
|
-
using internal::wordmask;
|
51
84
|
|
52
85
|
// Any hash function will produce a valid compressed bitstream, but a good
|
53
86
|
// hash function reduces the number of collisions and thus yields better
|
@@ -89,18 +122,18 @@ size_t MaxCompressedLength(size_t source_len) {
|
|
89
122
|
namespace {
|
90
123
|
|
91
124
|
void UnalignedCopy64(const void* src, void* dst) {
|
92
|
-
|
125
|
+
char tmp[8];
|
126
|
+
memcpy(tmp, src, 8);
|
127
|
+
memcpy(dst, tmp, 8);
|
93
128
|
}
|
94
129
|
|
95
130
|
void UnalignedCopy128(const void* src, void* dst) {
|
96
|
-
//
|
97
|
-
// SSE2
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
memcpy(dst, src, 16);
|
103
|
-
#endif
|
131
|
+
// memcpy gets vectorized when the appropriate compiler options are used.
|
132
|
+
// For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load
|
133
|
+
// and store.
|
134
|
+
char tmp[16];
|
135
|
+
memcpy(tmp, src, 16);
|
136
|
+
memcpy(dst, tmp, 16);
|
104
137
|
}
|
105
138
|
|
106
139
|
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
|
@@ -115,12 +148,35 @@ void UnalignedCopy128(const void* src, void* dst) {
|
|
115
148
|
// Note that this does not match the semantics of either memcpy() or memmove().
|
116
149
|
inline char* IncrementalCopySlow(const char* src, char* op,
|
117
150
|
char* const op_limit) {
|
151
|
+
// TODO: Remove pragma when LLVM is aware this
|
152
|
+
// function is only called in cold regions and when cold regions don't get
|
153
|
+
// vectorized or unrolled.
|
154
|
+
#ifdef __clang__
|
155
|
+
#pragma clang loop unroll(disable)
|
156
|
+
#endif
|
118
157
|
while (op < op_limit) {
|
119
158
|
*op++ = *src++;
|
120
159
|
}
|
121
160
|
return op_limit;
|
122
161
|
}
|
123
162
|
|
163
|
+
#if SNAPPY_HAVE_SSSE3
|
164
|
+
|
165
|
+
// This is a table of shuffle control masks that can be used as the source
|
166
|
+
// operand for PSHUFB to permute the contents of the destination XMM register
|
167
|
+
// into a repeating byte pattern.
|
168
|
+
alignas(16) const char pshufb_fill_patterns[7][16] = {
|
169
|
+
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
170
|
+
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
|
171
|
+
{0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
|
172
|
+
{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
|
173
|
+
{0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0},
|
174
|
+
{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3},
|
175
|
+
{0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1},
|
176
|
+
};
|
177
|
+
|
178
|
+
#endif // SNAPPY_HAVE_SSSE3
|
179
|
+
|
124
180
|
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
|
125
181
|
// IncrementalCopySlow. buf_limit is the address past the end of the writable
|
126
182
|
// region of the buffer.
|
@@ -132,9 +188,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
132
188
|
// pat = op - src
|
133
189
|
// len = limit - op
|
134
190
|
assert(src < op);
|
191
|
+
assert(op <= op_limit);
|
135
192
|
assert(op_limit <= buf_limit);
|
136
193
|
// NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
|
137
|
-
// to optimize this function but we have to also handle
|
194
|
+
// to optimize this function but we have to also handle other cases in case
|
138
195
|
// the input does not satisfy these conditions.
|
139
196
|
|
140
197
|
size_t pattern_size = op - src;
|
@@ -163,26 +220,56 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
163
220
|
// copying 2x 8 bytes at a time.
|
164
221
|
|
165
222
|
// Handle the uncommon case where pattern is less than 8 bytes.
|
166
|
-
if (
|
167
|
-
|
168
|
-
//
|
169
|
-
//
|
223
|
+
if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
|
224
|
+
#if SNAPPY_HAVE_SSSE3
|
225
|
+
// Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
|
226
|
+
// to permute the register's contents in-place into a repeating sequence of
|
227
|
+
// the first "pattern_size" bytes.
|
228
|
+
// For example, suppose:
|
229
|
+
// src == "abc"
|
230
|
+
// op == op + 3
|
231
|
+
// After _mm_shuffle_epi8(), "pattern" will have five copies of "abc"
|
232
|
+
// followed by one byte of slop: abcabcabcabcabca.
|
170
233
|
//
|
171
|
-
//
|
172
|
-
//
|
173
|
-
//
|
174
|
-
|
175
|
-
|
176
|
-
|
234
|
+
// The non-SSE fallback implementation suffers from store-forwarding stalls
|
235
|
+
// because its loads and stores partly overlap. By expanding the pattern
|
236
|
+
// in-place, we avoid the penalty.
|
237
|
+
if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) {
|
238
|
+
const __m128i shuffle_mask = _mm_load_si128(
|
239
|
+
reinterpret_cast<const __m128i*>(pshufb_fill_patterns)
|
240
|
+
+ pattern_size - 1);
|
241
|
+
const __m128i pattern = _mm_shuffle_epi8(
|
242
|
+
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(src)), shuffle_mask);
|
243
|
+
// Uninitialized bytes are masked out by the shuffle mask.
|
244
|
+
// TODO: remove annotation and macro defs once MSan is fixed.
|
245
|
+
SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern));
|
246
|
+
pattern_size *= 16 / pattern_size;
|
247
|
+
char* op_end = std::min(op_limit, buf_limit - 15);
|
248
|
+
while (op < op_end) {
|
249
|
+
_mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern);
|
250
|
+
op += pattern_size;
|
251
|
+
}
|
252
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
253
|
+
}
|
254
|
+
return IncrementalCopySlow(src, op, op_limit);
|
255
|
+
#else // !SNAPPY_HAVE_SSSE3
|
256
|
+
// If plenty of buffer space remains, expand the pattern to at least 8
|
257
|
+
// bytes. The way the following loop is written, we need 8 bytes of buffer
|
258
|
+
// space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
|
259
|
+
// bytes if pattern_size is 2. Precisely encoding that is probably not
|
260
|
+
// worthwhile; instead, invoke the slow path if we cannot write 11 bytes
|
261
|
+
// (because 11 are required in the worst case).
|
262
|
+
if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
|
177
263
|
while (pattern_size < 8) {
|
178
264
|
UnalignedCopy64(src, op);
|
179
265
|
op += pattern_size;
|
180
266
|
pattern_size *= 2;
|
181
267
|
}
|
182
|
-
if (
|
268
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
183
269
|
} else {
|
184
270
|
return IncrementalCopySlow(src, op, op_limit);
|
185
271
|
}
|
272
|
+
#endif // SNAPPY_HAVE_SSSE3
|
186
273
|
}
|
187
274
|
assert(pattern_size >= 8);
|
188
275
|
|
@@ -190,16 +277,51 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
190
277
|
// UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
|
191
278
|
// because expanding the pattern to at least 8 bytes guarantees that
|
192
279
|
// op - src >= 8.
|
193
|
-
|
280
|
+
//
|
281
|
+
// Typically, the op_limit is the gating factor so try to simplify the loop
|
282
|
+
// based on that.
|
283
|
+
if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) {
|
284
|
+
// There is at least one, and at most four 16-byte blocks. Writing four
|
285
|
+
// conditionals instead of a loop allows FDO to layout the code with respect
|
286
|
+
// to the actual probabilities of each length.
|
287
|
+
// TODO: Replace with loop with trip count hint.
|
288
|
+
UnalignedCopy64(src, op);
|
289
|
+
UnalignedCopy64(src + 8, op + 8);
|
290
|
+
|
291
|
+
if (op + 16 < op_limit) {
|
292
|
+
UnalignedCopy64(src + 16, op + 16);
|
293
|
+
UnalignedCopy64(src + 24, op + 24);
|
294
|
+
}
|
295
|
+
if (op + 32 < op_limit) {
|
296
|
+
UnalignedCopy64(src + 32, op + 32);
|
297
|
+
UnalignedCopy64(src + 40, op + 40);
|
298
|
+
}
|
299
|
+
if (op + 48 < op_limit) {
|
300
|
+
UnalignedCopy64(src + 48, op + 48);
|
301
|
+
UnalignedCopy64(src + 56, op + 56);
|
302
|
+
}
|
303
|
+
return op_limit;
|
304
|
+
}
|
305
|
+
|
306
|
+
// Fall back to doing as much as we can with the available slop in the
|
307
|
+
// buffer. This code path is relatively cold however so we save code size by
|
308
|
+
// avoiding unrolling and vectorizing.
|
309
|
+
//
|
310
|
+
// TODO: Remove pragma when when cold regions don't get vectorized
|
311
|
+
// or unrolled.
|
312
|
+
#ifdef __clang__
|
313
|
+
#pragma clang loop unroll(disable)
|
314
|
+
#endif
|
315
|
+
for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
|
194
316
|
UnalignedCopy64(src, op);
|
195
317
|
UnalignedCopy64(src + 8, op + 8);
|
196
|
-
src += 16;
|
197
|
-
op += 16;
|
198
|
-
if (PREDICT_TRUE(op >= op_limit)) return op_limit;
|
199
318
|
}
|
319
|
+
if (op >= op_limit)
|
320
|
+
return op_limit;
|
321
|
+
|
200
322
|
// We only take this branch if we didn't have enough slop and we can do a
|
201
323
|
// single 8 byte copy.
|
202
|
-
if (
|
324
|
+
if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
|
203
325
|
UnalignedCopy64(src, op);
|
204
326
|
src += 8;
|
205
327
|
op += 8;
|
@@ -209,10 +331,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
209
331
|
|
210
332
|
} // namespace
|
211
333
|
|
334
|
+
template <bool allow_fast_path>
|
212
335
|
static inline char* EmitLiteral(char* op,
|
213
336
|
const char* literal,
|
214
|
-
int len
|
215
|
-
bool allow_fast_path) {
|
337
|
+
int len) {
|
216
338
|
// The vast majority of copies are below 16 bytes, for which a
|
217
339
|
// call to memcpy is overkill. This fast path can sometimes
|
218
340
|
// copy up to 15 bytes too much, but that is okay in the
|
@@ -237,31 +359,29 @@ static inline char* EmitLiteral(char* op,
|
|
237
359
|
// Fits in tag byte
|
238
360
|
*op++ = LITERAL | (n << 2);
|
239
361
|
} else {
|
240
|
-
|
241
|
-
char* base = op;
|
242
|
-
int count = 0;
|
243
|
-
op++;
|
244
|
-
while (n > 0) {
|
245
|
-
*op++ = n & 0xff;
|
246
|
-
n >>= 8;
|
247
|
-
count++;
|
248
|
-
}
|
362
|
+
int count = (Bits::Log2Floor(n) >> 3) + 1;
|
249
363
|
assert(count >= 1);
|
250
364
|
assert(count <= 4);
|
251
|
-
*
|
365
|
+
*op++ = LITERAL | ((59 + count) << 2);
|
366
|
+
// Encode in upcoming bytes.
|
367
|
+
// Write 4 bytes, though we may care about only 1 of them. The output buffer
|
368
|
+
// is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
|
369
|
+
// here and there is a memcpy of size 'len' below.
|
370
|
+
LittleEndian::Store32(op, n);
|
371
|
+
op += count;
|
252
372
|
}
|
253
373
|
memcpy(op, literal, len);
|
254
374
|
return op + len;
|
255
375
|
}
|
256
376
|
|
257
|
-
|
258
|
-
|
377
|
+
template <bool len_less_than_12>
|
378
|
+
static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
|
259
379
|
assert(len <= 64);
|
260
380
|
assert(len >= 4);
|
261
381
|
assert(offset < 65536);
|
262
382
|
assert(len_less_than_12 == (len < 12));
|
263
383
|
|
264
|
-
if (len_less_than_12 &&
|
384
|
+
if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
|
265
385
|
// offset fits in 11 bits. The 3 highest go in the top of the first byte,
|
266
386
|
// and the rest go in the second byte.
|
267
387
|
*op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
|
@@ -276,29 +396,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
|
|
276
396
|
return op;
|
277
397
|
}
|
278
398
|
|
279
|
-
|
280
|
-
|
399
|
+
template <bool len_less_than_12>
|
400
|
+
static inline char* EmitCopy(char* op, size_t offset, size_t len) {
|
281
401
|
assert(len_less_than_12 == (len < 12));
|
282
402
|
if (len_less_than_12) {
|
283
|
-
return EmitCopyAtMost64(op, offset, len
|
403
|
+
return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
|
284
404
|
} else {
|
285
405
|
// A special case for len <= 64 might help, but so far measurements suggest
|
286
406
|
// it's in the noise.
|
287
407
|
|
288
408
|
// Emit 64 byte copies but make sure to keep at least four bytes reserved.
|
289
|
-
while (
|
290
|
-
op = EmitCopyAtMost64(op, offset, 64
|
409
|
+
while (SNAPPY_PREDICT_FALSE(len >= 68)) {
|
410
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
|
291
411
|
len -= 64;
|
292
412
|
}
|
293
413
|
|
294
414
|
// One or two copies will now finish the job.
|
295
415
|
if (len > 64) {
|
296
|
-
op = EmitCopyAtMost64(op, offset, 60
|
416
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
|
297
417
|
len -= 60;
|
298
418
|
}
|
299
419
|
|
300
420
|
// Emit remainder.
|
301
|
-
|
421
|
+
if (len < 12) {
|
422
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
|
423
|
+
} else {
|
424
|
+
op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
|
425
|
+
}
|
302
426
|
return op;
|
303
427
|
}
|
304
428
|
}
|
@@ -314,31 +438,45 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
|
|
314
438
|
}
|
315
439
|
}
|
316
440
|
|
317
|
-
namespace
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
size_t htsize = 256;
|
325
|
-
while (htsize < kMaxHashTableSize && htsize < input_size) {
|
326
|
-
htsize <<= 1;
|
441
|
+
namespace {
|
442
|
+
uint32 CalculateTableSize(uint32 input_size) {
|
443
|
+
static_assert(
|
444
|
+
kMaxHashTableSize >= kMinHashTableSize,
|
445
|
+
"kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
|
446
|
+
if (input_size > kMaxHashTableSize) {
|
447
|
+
return kMaxHashTableSize;
|
327
448
|
}
|
328
|
-
|
329
|
-
|
330
|
-
if (htsize <= ARRAYSIZE(small_table_)) {
|
331
|
-
table = small_table_;
|
332
|
-
} else {
|
333
|
-
if (large_table_ == NULL) {
|
334
|
-
large_table_ = new uint16[kMaxHashTableSize];
|
335
|
-
}
|
336
|
-
table = large_table_;
|
449
|
+
if (input_size < kMinHashTableSize) {
|
450
|
+
return kMinHashTableSize;
|
337
451
|
}
|
452
|
+
// This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
|
453
|
+
// 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
|
454
|
+
return 2u << Bits::Log2Floor(input_size - 1);
|
455
|
+
}
|
456
|
+
} // namespace
|
457
|
+
|
458
|
+
namespace internal {
|
459
|
+
WorkingMemory::WorkingMemory(size_t input_size) {
|
460
|
+
const size_t max_fragment_size = std::min(input_size, kBlockSize);
|
461
|
+
const size_t table_size = CalculateTableSize(max_fragment_size);
|
462
|
+
size_ = table_size * sizeof(*table_) + max_fragment_size +
|
463
|
+
MaxCompressedLength(max_fragment_size);
|
464
|
+
mem_ = std::allocator<char>().allocate(size_);
|
465
|
+
table_ = reinterpret_cast<uint16*>(mem_);
|
466
|
+
input_ = mem_ + table_size * sizeof(*table_);
|
467
|
+
output_ = input_ + max_fragment_size;
|
468
|
+
}
|
469
|
+
|
470
|
+
WorkingMemory::~WorkingMemory() {
|
471
|
+
std::allocator<char>().deallocate(mem_, size_);
|
472
|
+
}
|
338
473
|
|
474
|
+
uint16* WorkingMemory::GetHashTable(size_t fragment_size,
|
475
|
+
int* table_size) const {
|
476
|
+
const size_t htsize = CalculateTableSize(fragment_size);
|
477
|
+
memset(table_, 0, htsize * sizeof(*table_));
|
339
478
|
*table_size = htsize;
|
340
|
-
|
341
|
-
return table;
|
479
|
+
return table_;
|
342
480
|
}
|
343
481
|
} // end namespace internal
|
344
482
|
|
@@ -405,7 +543,7 @@ char* CompressFragment(const char* input,
|
|
405
543
|
// "ip" is the input pointer, and "op" is the output pointer.
|
406
544
|
const char* ip = input;
|
407
545
|
assert(input_size <= kBlockSize);
|
408
|
-
assert((table_size & (table_size - 1)) == 0);
|
546
|
+
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
409
547
|
const int shift = 32 - Bits::Log2Floor(table_size);
|
410
548
|
assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
|
411
549
|
const char* ip_end = input + input_size;
|
@@ -415,7 +553,7 @@ char* CompressFragment(const char* input,
|
|
415
553
|
const char* next_emit = ip;
|
416
554
|
|
417
555
|
const size_t kInputMarginBytes = 15;
|
418
|
-
if (
|
556
|
+
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
|
419
557
|
const char* ip_limit = input + input_size - kInputMarginBytes;
|
420
558
|
|
421
559
|
for (uint32 next_hash = Hash(++ip, shift); ; ) {
|
@@ -456,7 +594,7 @@ char* CompressFragment(const char* input,
|
|
456
594
|
uint32 bytes_between_hash_lookups = skip >> 5;
|
457
595
|
skip += bytes_between_hash_lookups;
|
458
596
|
next_ip = ip + bytes_between_hash_lookups;
|
459
|
-
if (
|
597
|
+
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
|
460
598
|
goto emit_remainder;
|
461
599
|
}
|
462
600
|
next_hash = Hash(next_ip, shift);
|
@@ -465,14 +603,14 @@ char* CompressFragment(const char* input,
|
|
465
603
|
assert(candidate < ip);
|
466
604
|
|
467
605
|
table[hash] = ip - base_ip;
|
468
|
-
} while (
|
469
|
-
|
606
|
+
} while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
|
607
|
+
UNALIGNED_LOAD32(candidate)));
|
470
608
|
|
471
609
|
// Step 2: A 4-byte match has been found. We'll later see if more
|
472
610
|
// than 4 bytes match. But, prior to the match, input
|
473
611
|
// bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
|
474
612
|
assert(next_emit + 16 <= ip_end);
|
475
|
-
op = EmitLiteral(op, next_emit, ip - next_emit
|
613
|
+
op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
|
476
614
|
|
477
615
|
// Step 3: Call EmitCopy, and then see if another EmitCopy could
|
478
616
|
// be our next move. Repeat until we find no match for the
|
@@ -495,9 +633,13 @@ char* CompressFragment(const char* input,
|
|
495
633
|
ip += matched;
|
496
634
|
size_t offset = base - candidate;
|
497
635
|
assert(0 == memcmp(base, candidate, matched));
|
498
|
-
|
636
|
+
if (p.second) {
|
637
|
+
op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
|
638
|
+
} else {
|
639
|
+
op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
|
640
|
+
}
|
499
641
|
next_emit = ip;
|
500
|
-
if (
|
642
|
+
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
|
501
643
|
goto emit_remainder;
|
502
644
|
}
|
503
645
|
// We are now looking for a 4-byte match again. We read
|
@@ -520,13 +662,18 @@ char* CompressFragment(const char* input,
|
|
520
662
|
emit_remainder:
|
521
663
|
// Emit the remaining bytes as a literal
|
522
664
|
if (next_emit < ip_end) {
|
523
|
-
op = EmitLiteral(op, next_emit,
|
665
|
+
op = EmitLiteral</*allow_fast_path=*/false>(op, next_emit,
|
666
|
+
ip_end - next_emit);
|
524
667
|
}
|
525
668
|
|
526
669
|
return op;
|
527
670
|
}
|
528
671
|
} // end namespace internal
|
529
672
|
|
673
|
+
// Called back at avery compression call to trace parameters and sizes.
|
674
|
+
static inline void Report(const char *algorithm, size_t compressed_size,
|
675
|
+
size_t uncompressed_size) {}
|
676
|
+
|
530
677
|
// Signature of output types needed by decompression code.
|
531
678
|
// The decompression code is templatized on a type that obeys this
|
532
679
|
// signature so that we do not pay virtual function call overhead in
|
@@ -567,6 +714,28 @@ char* CompressFragment(const char* input,
|
|
567
714
|
// bool TryFastAppend(const char* ip, size_t available, size_t length);
|
568
715
|
// };
|
569
716
|
|
717
|
+
static inline uint32 ExtractLowBytes(uint32 v, int n) {
|
718
|
+
assert(n >= 0);
|
719
|
+
assert(n <= 4);
|
720
|
+
#if SNAPPY_HAVE_BMI2
|
721
|
+
return _bzhi_u32(v, 8 * n);
|
722
|
+
#else
|
723
|
+
// This needs to be wider than uint32 otherwise `mask << 32` will be
|
724
|
+
// undefined.
|
725
|
+
uint64 mask = 0xffffffff;
|
726
|
+
return v & ~(mask << (8 * n));
|
727
|
+
#endif
|
728
|
+
}
|
729
|
+
|
730
|
+
static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
|
731
|
+
assert(shift < 32);
|
732
|
+
static const uint8 masks[] = {
|
733
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
734
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
735
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
|
736
|
+
0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
|
737
|
+
return (value & masks[shift]) != 0;
|
738
|
+
}
|
570
739
|
|
571
740
|
// Helper class for decompression
|
572
741
|
class SnappyDecompressor {
|
@@ -605,7 +774,7 @@ class SnappyDecompressor {
|
|
605
774
|
}
|
606
775
|
|
607
776
|
// Read the uncompressed length stored at the start of the compressed data.
|
608
|
-
// On
|
777
|
+
// On success, stores the length in *result and returns true.
|
609
778
|
// On failure, returns false.
|
610
779
|
bool ReadUncompressedLength(uint32* result) {
|
611
780
|
assert(ip_ == NULL); // Must not have read anything yet
|
@@ -620,7 +789,7 @@ class SnappyDecompressor {
|
|
620
789
|
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
|
621
790
|
reader_->Skip(1);
|
622
791
|
uint32 val = c & 0x7f;
|
623
|
-
if (((val
|
792
|
+
if (LeftShiftOverflows(static_cast<uint8>(val), shift)) return false;
|
624
793
|
*result |= val << shift;
|
625
794
|
if (c < 128) {
|
626
795
|
break;
|
@@ -633,13 +802,27 @@ class SnappyDecompressor {
|
|
633
802
|
// Process the next item found in the input.
|
634
803
|
// Returns true if successful, false on error or end of input.
|
635
804
|
template <class Writer>
|
805
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
806
|
+
__attribute__((aligned(32)))
|
807
|
+
#endif
|
636
808
|
void DecompressAllTags(Writer* writer) {
|
637
|
-
|
638
|
-
//
|
639
|
-
//
|
640
|
-
|
641
|
-
|
809
|
+
// In x86, pad the function body to start 16 bytes later. This function has
|
810
|
+
// a couple of hotspots that are highly sensitive to alignment: we have
|
811
|
+
// observed regressions by more than 20% in some metrics just by moving the
|
812
|
+
// exact same code to a different position in the benchmark binary.
|
813
|
+
//
|
814
|
+
// Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit
|
815
|
+
// the "lucky" case consistently. Unfortunately, this is a very brittle
|
816
|
+
// workaround, and future differences in code generation may reintroduce
|
817
|
+
// this regression. If you experience a big, difficult to explain, benchmark
|
818
|
+
// performance regression here, first try removing this hack.
|
819
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
820
|
+
// Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions.
|
821
|
+
asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
|
822
|
+
asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
|
823
|
+
#endif
|
642
824
|
|
825
|
+
const char* ip = ip_;
|
643
826
|
// We could have put this refill fragment only at the beginning of the loop.
|
644
827
|
// However, duplicating it at the end of each branch gives the compiler more
|
645
828
|
// scope to optimize the <ip_limit_ - ip> expression based on the local
|
@@ -667,21 +850,22 @@ class SnappyDecompressor {
|
|
667
850
|
// txt[1-4] 25% 75%
|
668
851
|
// pb 24% 76%
|
669
852
|
// bin 24% 76%
|
670
|
-
if (
|
853
|
+
if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
|
671
854
|
size_t literal_length = (c >> 2) + 1u;
|
672
855
|
if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
|
673
856
|
assert(literal_length < 61);
|
674
857
|
ip += literal_length;
|
675
|
-
// NOTE
|
858
|
+
// NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
|
676
859
|
// will not return true unless there's already at least five spare
|
677
860
|
// bytes in addition to the literal.
|
678
861
|
continue;
|
679
862
|
}
|
680
|
-
if (
|
863
|
+
if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
|
681
864
|
// Long literal.
|
682
865
|
const size_t literal_length_length = literal_length - 60;
|
683
866
|
literal_length =
|
684
|
-
(LittleEndian::Load32(ip)
|
867
|
+
ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
|
868
|
+
1;
|
685
869
|
ip += literal_length_length;
|
686
870
|
}
|
687
871
|
|
@@ -704,7 +888,8 @@ class SnappyDecompressor {
|
|
704
888
|
MAYBE_REFILL();
|
705
889
|
} else {
|
706
890
|
const size_t entry = char_table[c];
|
707
|
-
const size_t trailer =
|
891
|
+
const size_t trailer =
|
892
|
+
ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11);
|
708
893
|
const size_t length = entry & 0xff;
|
709
894
|
ip += entry >> 11;
|
710
895
|
|
@@ -757,7 +942,7 @@ bool SnappyDecompressor::RefillTag() {
|
|
757
942
|
size_t length;
|
758
943
|
const char* src = reader_->Peek(&length);
|
759
944
|
if (length == 0) return false;
|
760
|
-
uint32 to_add = min<uint32>(needed - nbuf, length);
|
945
|
+
uint32 to_add = std::min<uint32>(needed - nbuf, length);
|
761
946
|
memcpy(scratch_ + nbuf, src, to_add);
|
762
947
|
nbuf += to_add;
|
763
948
|
reader_->Skip(to_add);
|
@@ -786,13 +971,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
|
|
786
971
|
SnappyDecompressor decompressor(r);
|
787
972
|
uint32 uncompressed_len = 0;
|
788
973
|
if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
|
789
|
-
|
974
|
+
|
975
|
+
return InternalUncompressAllTags(&decompressor, writer, r->Available(),
|
976
|
+
uncompressed_len);
|
790
977
|
}
|
791
978
|
|
792
979
|
template <typename Writer>
|
793
980
|
static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
|
794
981
|
Writer* writer,
|
982
|
+
uint32 compressed_len,
|
795
983
|
uint32 uncompressed_len) {
|
984
|
+
Report("snappy_uncompress", compressed_len, uncompressed_len);
|
985
|
+
|
796
986
|
writer->SetExpectedLength(uncompressed_len);
|
797
987
|
|
798
988
|
// Process the entire input
|
@@ -809,21 +999,20 @@ bool GetUncompressedLength(Source* source, uint32* result) {
|
|
809
999
|
size_t Compress(Source* reader, Sink* writer) {
|
810
1000
|
size_t written = 0;
|
811
1001
|
size_t N = reader->Available();
|
1002
|
+
const size_t uncompressed_size = N;
|
812
1003
|
char ulength[Varint::kMax32];
|
813
1004
|
char* p = Varint::Encode32(ulength, N);
|
814
1005
|
writer->Append(ulength, p-ulength);
|
815
1006
|
written += (p - ulength);
|
816
1007
|
|
817
|
-
internal::WorkingMemory wmem;
|
818
|
-
char* scratch = NULL;
|
819
|
-
char* scratch_output = NULL;
|
1008
|
+
internal::WorkingMemory wmem(N);
|
820
1009
|
|
821
1010
|
while (N > 0) {
|
822
1011
|
// Get next block to compress (without copying if possible)
|
823
1012
|
size_t fragment_size;
|
824
1013
|
const char* fragment = reader->Peek(&fragment_size);
|
825
1014
|
assert(fragment_size != 0); // premature end of input
|
826
|
-
const size_t num_to_read = min(N, kBlockSize);
|
1015
|
+
const size_t num_to_read = std::min(N, kBlockSize);
|
827
1016
|
size_t bytes_read = fragment_size;
|
828
1017
|
|
829
1018
|
size_t pending_advance = 0;
|
@@ -832,19 +1021,13 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
832
1021
|
pending_advance = num_to_read;
|
833
1022
|
fragment_size = num_to_read;
|
834
1023
|
} else {
|
835
|
-
|
836
|
-
if (scratch == NULL) {
|
837
|
-
// If this is the last iteration, we want to allocate N bytes
|
838
|
-
// of space, otherwise the max possible kBlockSize space.
|
839
|
-
// num_to_read contains exactly the correct value
|
840
|
-
scratch = new char[num_to_read];
|
841
|
-
}
|
1024
|
+
char* scratch = wmem.GetScratchInput();
|
842
1025
|
memcpy(scratch, fragment, bytes_read);
|
843
1026
|
reader->Skip(bytes_read);
|
844
1027
|
|
845
1028
|
while (bytes_read < num_to_read) {
|
846
1029
|
fragment = reader->Peek(&fragment_size);
|
847
|
-
size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
|
1030
|
+
size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
|
848
1031
|
memcpy(scratch + bytes_read, fragment, n);
|
849
1032
|
bytes_read += n;
|
850
1033
|
reader->Skip(n);
|
@@ -864,16 +1047,13 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
864
1047
|
|
865
1048
|
// Need a scratch buffer for the output, in case the byte sink doesn't
|
866
1049
|
// have room for us directly.
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
char* dest = writer->GetAppendBuffer(max_output, scratch_output);
|
875
|
-
char* end = internal::CompressFragment(fragment, fragment_size,
|
876
|
-
dest, table, table_size);
|
1050
|
+
|
1051
|
+
// Since we encode kBlockSize regions followed by a region
|
1052
|
+
// which is <= kBlockSize in length, a previously allocated
|
1053
|
+
// scratch_output[] region is big enough for this iteration.
|
1054
|
+
char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
|
1055
|
+
char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
|
1056
|
+
table_size);
|
877
1057
|
writer->Append(dest, end - dest);
|
878
1058
|
written += (end - dest);
|
879
1059
|
|
@@ -881,8 +1061,7 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
881
1061
|
reader->Skip(pending_advance);
|
882
1062
|
}
|
883
1063
|
|
884
|
-
|
885
|
-
delete[] scratch_output;
|
1064
|
+
Report("snappy_compress", written, uncompressed_size);
|
886
1065
|
|
887
1066
|
return written;
|
888
1067
|
}
|
@@ -896,14 +1075,22 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
896
1075
|
// Writer template argument to SnappyDecompressor::DecompressAllTags().
|
897
1076
|
class SnappyIOVecWriter {
|
898
1077
|
private:
|
1078
|
+
// output_iov_end_ is set to iov + count and used to determine when
|
1079
|
+
// the end of the iovs is reached.
|
1080
|
+
const struct iovec* output_iov_end_;
|
1081
|
+
|
1082
|
+
#if !defined(NDEBUG)
|
899
1083
|
const struct iovec* output_iov_;
|
900
|
-
|
1084
|
+
#endif // !defined(NDEBUG)
|
1085
|
+
|
1086
|
+
// Current iov that is being written into.
|
1087
|
+
const struct iovec* curr_iov_;
|
901
1088
|
|
902
|
-
//
|
903
|
-
|
1089
|
+
// Pointer to current iov's write location.
|
1090
|
+
char* curr_iov_output_;
|
904
1091
|
|
905
|
-
//
|
906
|
-
size_t
|
1092
|
+
// Remaining bytes to write into curr_iov_output.
|
1093
|
+
size_t curr_iov_remaining_;
|
907
1094
|
|
908
1095
|
// Total bytes decompressed into output_iov_ so far.
|
909
1096
|
size_t total_written_;
|
@@ -911,22 +1098,24 @@ class SnappyIOVecWriter {
|
|
911
1098
|
// Maximum number of bytes that will be decompressed into output_iov_.
|
912
1099
|
size_t output_limit_;
|
913
1100
|
|
914
|
-
inline char* GetIOVecPointer(
|
915
|
-
return reinterpret_cast<char*>(
|
916
|
-
offset;
|
1101
|
+
static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
|
1102
|
+
return reinterpret_cast<char*>(iov->iov_base) + offset;
|
917
1103
|
}
|
918
1104
|
|
919
1105
|
public:
|
920
1106
|
// Does not take ownership of iov. iov must be valid during the
|
921
1107
|
// entire lifetime of the SnappyIOVecWriter.
|
922
1108
|
inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
|
923
|
-
:
|
924
|
-
|
925
|
-
|
926
|
-
|
1109
|
+
: output_iov_end_(iov + iov_count),
|
1110
|
+
#if !defined(NDEBUG)
|
1111
|
+
output_iov_(iov),
|
1112
|
+
#endif // !defined(NDEBUG)
|
1113
|
+
curr_iov_(iov),
|
1114
|
+
curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
|
1115
|
+
: nullptr),
|
1116
|
+
curr_iov_remaining_(iov_count ? iov->iov_len : 0),
|
927
1117
|
total_written_(0),
|
928
|
-
output_limit_(-1) {
|
929
|
-
}
|
1118
|
+
output_limit_(-1) {}
|
930
1119
|
|
931
1120
|
inline void SetExpectedLength(size_t len) {
|
932
1121
|
output_limit_ = len;
|
@@ -941,23 +1130,25 @@ class SnappyIOVecWriter {
|
|
941
1130
|
return false;
|
942
1131
|
}
|
943
1132
|
|
1133
|
+
return AppendNoCheck(ip, len);
|
1134
|
+
}
|
1135
|
+
|
1136
|
+
inline bool AppendNoCheck(const char* ip, size_t len) {
|
944
1137
|
while (len > 0) {
|
945
|
-
|
946
|
-
if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
|
1138
|
+
if (curr_iov_remaining_ == 0) {
|
947
1139
|
// This iovec is full. Go to the next one.
|
948
|
-
if (
|
1140
|
+
if (curr_iov_ + 1 >= output_iov_end_) {
|
949
1141
|
return false;
|
950
1142
|
}
|
951
|
-
|
952
|
-
|
1143
|
+
++curr_iov_;
|
1144
|
+
curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
|
1145
|
+
curr_iov_remaining_ = curr_iov_->iov_len;
|
953
1146
|
}
|
954
1147
|
|
955
|
-
const size_t to_write = std::min(
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
to_write);
|
960
|
-
curr_iov_written_ += to_write;
|
1148
|
+
const size_t to_write = std::min(len, curr_iov_remaining_);
|
1149
|
+
memcpy(curr_iov_output_, ip, to_write);
|
1150
|
+
curr_iov_output_ += to_write;
|
1151
|
+
curr_iov_remaining_ -= to_write;
|
961
1152
|
total_written_ += to_write;
|
962
1153
|
ip += to_write;
|
963
1154
|
len -= to_write;
|
@@ -969,11 +1160,11 @@ class SnappyIOVecWriter {
|
|
969
1160
|
inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
|
970
1161
|
const size_t space_left = output_limit_ - total_written_;
|
971
1162
|
if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
|
972
|
-
|
1163
|
+
curr_iov_remaining_ >= 16) {
|
973
1164
|
// Fast path, used for the majority (about 95%) of invocations.
|
974
|
-
|
975
|
-
|
976
|
-
|
1165
|
+
UnalignedCopy128(ip, curr_iov_output_);
|
1166
|
+
curr_iov_output_ += len;
|
1167
|
+
curr_iov_remaining_ -= len;
|
977
1168
|
total_written_ += len;
|
978
1169
|
return true;
|
979
1170
|
}
|
@@ -982,7 +1173,9 @@ class SnappyIOVecWriter {
|
|
982
1173
|
}
|
983
1174
|
|
984
1175
|
inline bool AppendFromSelf(size_t offset, size_t len) {
|
985
|
-
|
1176
|
+
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1177
|
+
// the "offset - 1u" trick.
|
1178
|
+
if (offset - 1u >= total_written_) {
|
986
1179
|
return false;
|
987
1180
|
}
|
988
1181
|
const size_t space_left = output_limit_ - total_written_;
|
@@ -991,8 +1184,8 @@ class SnappyIOVecWriter {
|
|
991
1184
|
}
|
992
1185
|
|
993
1186
|
// Locate the iovec from which we need to start the copy.
|
994
|
-
|
995
|
-
size_t from_iov_offset =
|
1187
|
+
const iovec* from_iov = curr_iov_;
|
1188
|
+
size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
|
996
1189
|
while (offset > 0) {
|
997
1190
|
if (from_iov_offset >= offset) {
|
998
1191
|
from_iov_offset -= offset;
|
@@ -1000,47 +1193,47 @@ class SnappyIOVecWriter {
|
|
1000
1193
|
}
|
1001
1194
|
|
1002
1195
|
offset -= from_iov_offset;
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1196
|
+
--from_iov;
|
1197
|
+
#if !defined(NDEBUG)
|
1198
|
+
assert(from_iov >= output_iov_);
|
1199
|
+
#endif // !defined(NDEBUG)
|
1200
|
+
from_iov_offset = from_iov->iov_len;
|
1006
1201
|
}
|
1007
1202
|
|
1008
1203
|
// Copy <len> bytes starting from the iovec pointed to by from_iov_index to
|
1009
1204
|
// the current iovec.
|
1010
1205
|
while (len > 0) {
|
1011
|
-
assert(
|
1012
|
-
if (
|
1013
|
-
const size_t to_copy =
|
1014
|
-
|
1015
|
-
|
1016
|
-
Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
|
1206
|
+
assert(from_iov <= curr_iov_);
|
1207
|
+
if (from_iov != curr_iov_) {
|
1208
|
+
const size_t to_copy =
|
1209
|
+
std::min(from_iov->iov_len - from_iov_offset, len);
|
1210
|
+
AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
|
1017
1211
|
len -= to_copy;
|
1018
1212
|
if (len > 0) {
|
1019
|
-
++
|
1213
|
+
++from_iov;
|
1020
1214
|
from_iov_offset = 0;
|
1021
1215
|
}
|
1022
1216
|
} else {
|
1023
|
-
|
1024
|
-
size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
|
1025
|
-
curr_iov_written_,
|
1026
|
-
len);
|
1217
|
+
size_t to_copy = curr_iov_remaining_;
|
1027
1218
|
if (to_copy == 0) {
|
1028
1219
|
// This iovec is full. Go to the next one.
|
1029
|
-
if (
|
1220
|
+
if (curr_iov_ + 1 >= output_iov_end_) {
|
1030
1221
|
return false;
|
1031
1222
|
}
|
1032
|
-
++
|
1033
|
-
|
1223
|
+
++curr_iov_;
|
1224
|
+
curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
|
1225
|
+
curr_iov_remaining_ = curr_iov_->iov_len;
|
1034
1226
|
continue;
|
1035
1227
|
}
|
1036
1228
|
if (to_copy > len) {
|
1037
1229
|
to_copy = len;
|
1038
1230
|
}
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1231
|
+
|
1232
|
+
IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
|
1233
|
+
curr_iov_output_, curr_iov_output_ + to_copy,
|
1234
|
+
curr_iov_output_ + curr_iov_remaining_);
|
1235
|
+
curr_iov_output_ += to_copy;
|
1236
|
+
curr_iov_remaining_ -= to_copy;
|
1044
1237
|
from_iov_offset += to_copy;
|
1045
1238
|
total_written_ += to_copy;
|
1046
1239
|
len -= to_copy;
|
@@ -1149,7 +1342,7 @@ bool RawUncompress(Source* compressed, char* uncompressed) {
|
|
1149
1342
|
return InternalUncompress(compressed, &output);
|
1150
1343
|
}
|
1151
1344
|
|
1152
|
-
bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
|
1345
|
+
bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) {
|
1153
1346
|
size_t ulength;
|
1154
1347
|
if (!GetUncompressedLength(compressed, n, &ulength)) {
|
1155
1348
|
return false;
|
@@ -1217,7 +1410,8 @@ void RawCompress(const char* input,
|
|
1217
1410
|
*compressed_length = (writer.CurrentDestination() - compressed);
|
1218
1411
|
}
|
1219
1412
|
|
1220
|
-
size_t Compress(const char* input, size_t input_length,
|
1413
|
+
size_t Compress(const char* input, size_t input_length,
|
1414
|
+
std::string* compressed) {
|
1221
1415
|
// Pre-grow the buffer to the max length of the compressed output
|
1222
1416
|
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
|
1223
1417
|
|
@@ -1313,7 +1507,8 @@ class SnappyScatteredWriter {
|
|
1313
1507
|
char* const op_end = op_ptr_ + len;
|
1314
1508
|
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1315
1509
|
// the "offset - 1u" trick.
|
1316
|
-
if (
|
1510
|
+
if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
|
1511
|
+
op_end <= op_limit_)) {
|
1317
1512
|
// Fast path: src and dst in current block.
|
1318
1513
|
op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
|
1319
1514
|
return true;
|
@@ -1344,7 +1539,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
|
|
1344
1539
|
}
|
1345
1540
|
|
1346
1541
|
// Make new block
|
1347
|
-
size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
|
1542
|
+
size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
|
1348
1543
|
op_base_ = allocator_.Allocate(bsize);
|
1349
1544
|
op_ptr_ = op_base_;
|
1350
1545
|
op_limit_ = op_base_ + bsize;
|
@@ -1401,7 +1596,7 @@ class SnappySinkAllocator {
|
|
1401
1596
|
size_t size_written = 0;
|
1402
1597
|
size_t block_size;
|
1403
1598
|
for (int i = 0; i < blocks_.size(); ++i) {
|
1404
|
-
block_size = min<size_t>(blocks_[i].size, size - size_written);
|
1599
|
+
block_size = std::min<size_t>(blocks_[i].size, size - size_written);
|
1405
1600
|
dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
|
1406
1601
|
&SnappySinkAllocator::Deleter, NULL);
|
1407
1602
|
size_written += block_size;
|
@@ -1446,19 +1641,21 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
|
|
1446
1641
|
char* buf = uncompressed->GetAppendBufferVariable(
|
1447
1642
|
1, uncompressed_len, &c, 1, &allocated_size);
|
1448
1643
|
|
1644
|
+
const size_t compressed_len = compressed->Available();
|
1449
1645
|
// If we can get a flat buffer, then use it, otherwise do block by block
|
1450
1646
|
// uncompression
|
1451
1647
|
if (allocated_size >= uncompressed_len) {
|
1452
1648
|
SnappyArrayWriter writer(buf);
|
1453
|
-
bool result = InternalUncompressAllTags(
|
1454
|
-
|
1649
|
+
bool result = InternalUncompressAllTags(&decompressor, &writer,
|
1650
|
+
compressed_len, uncompressed_len);
|
1455
1651
|
uncompressed->Append(buf, writer.Produced());
|
1456
1652
|
return result;
|
1457
1653
|
} else {
|
1458
1654
|
SnappySinkAllocator allocator(uncompressed);
|
1459
1655
|
SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
|
1460
|
-
return InternalUncompressAllTags(&decompressor, &writer,
|
1656
|
+
return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
|
1657
|
+
uncompressed_len);
|
1461
1658
|
}
|
1462
1659
|
}
|
1463
1660
|
|
1464
|
-
}
|
1661
|
+
} // namespace snappy
|