snappy 0.0.15-java → 0.0.16-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +23 -1
- data/ext/extconf.rb +1 -9
- data/lib/snappy.rb +2 -1
- data/lib/snappy/reader.rb +7 -3
- data/lib/snappy/shim.rb +30 -0
- data/lib/snappy/version.rb +3 -1
- data/lib/snappy/writer.rb +8 -9
- data/smoke.sh +8 -0
- data/vendor/snappy/COPYING +1 -1
- data/vendor/snappy/ChangeLog +2468 -1916
- data/vendor/snappy/Makefile.am +3 -0
- data/vendor/snappy/NEWS +20 -0
- data/vendor/snappy/README +10 -6
- data/vendor/snappy/autogen.sh +6 -1
- data/vendor/snappy/configure.ac +4 -3
- data/vendor/snappy/snappy-c.h +3 -3
- data/vendor/snappy/snappy-internal.h +98 -21
- data/vendor/snappy/snappy-sinksource.cc +33 -0
- data/vendor/snappy/snappy-sinksource.h +51 -6
- data/vendor/snappy/snappy-stubs-internal.h +44 -7
- data/vendor/snappy/snappy-stubs-public.h.in +5 -3
- data/vendor/snappy/snappy-test.cc +5 -2
- data/vendor/snappy/snappy-test.h +22 -5
- data/vendor/snappy/snappy.cc +474 -316
- data/vendor/snappy/snappy.h +23 -4
- data/vendor/snappy/snappy.pc.in +10 -0
- data/vendor/snappy/snappy_unittest.cc +225 -49
- metadata +6 -3
@@ -33,8 +33,8 @@
|
|
33
33
|
// which is a public header. Instead, snappy-stubs-public.h is generated by
|
34
34
|
// from snappy-stubs-public.h.in at configure time.
|
35
35
|
|
36
|
-
#ifndef
|
37
|
-
#define
|
36
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
37
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
38
38
|
|
39
39
|
#if @ac_cv_have_stdint_h@
|
40
40
|
#include <stdint.h>
|
@@ -80,9 +80,11 @@ typedef unsigned long long uint64;
|
|
80
80
|
|
81
81
|
typedef std::string string;
|
82
82
|
|
83
|
+
#ifndef DISALLOW_COPY_AND_ASSIGN
|
83
84
|
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
84
85
|
TypeName(const TypeName&); \
|
85
86
|
void operator=(const TypeName&)
|
87
|
+
#endif
|
86
88
|
|
87
89
|
#if !@ac_cv_have_sys_uio_h@
|
88
90
|
// Windows does not have an iovec type, yet the concept is universally useful.
|
@@ -95,4 +97,4 @@ struct iovec {
|
|
95
97
|
|
96
98
|
} // namespace snappy
|
97
99
|
|
98
|
-
#endif //
|
100
|
+
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
@@ -28,13 +28,16 @@
|
|
28
28
|
//
|
29
29
|
// Various stubs for the unit tests for the open-source version of Snappy.
|
30
30
|
|
31
|
-
#
|
31
|
+
#ifdef HAVE_CONFIG_H
|
32
|
+
#include "config.h"
|
33
|
+
#endif
|
32
34
|
|
33
35
|
#ifdef HAVE_WINDOWS_H
|
34
|
-
#define WIN32_LEAN_AND_MEAN
|
35
36
|
#include <windows.h>
|
36
37
|
#endif
|
37
38
|
|
39
|
+
#include "snappy-test.h"
|
40
|
+
|
38
41
|
#include <algorithm>
|
39
42
|
|
40
43
|
DEFINE_bool(run_microbenchmarks, true,
|
data/vendor/snappy/snappy-test.h
CHANGED
@@ -28,8 +28,8 @@
|
|
28
28
|
//
|
29
29
|
// Various stubs for the unit tests for the open-source version of Snappy.
|
30
30
|
|
31
|
-
#ifndef
|
32
|
-
#define
|
31
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
|
32
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
|
33
33
|
|
34
34
|
#include <iostream>
|
35
35
|
#include <string>
|
@@ -52,7 +52,6 @@
|
|
52
52
|
#endif
|
53
53
|
|
54
54
|
#ifdef HAVE_WINDOWS_H
|
55
|
-
#define WIN32_LEAN_AND_MEAN
|
56
55
|
#include <windows.h>
|
57
56
|
#endif
|
58
57
|
|
@@ -132,7 +131,7 @@ namespace File {
|
|
132
131
|
} // namespace File
|
133
132
|
|
134
133
|
namespace file {
|
135
|
-
int Defaults() { }
|
134
|
+
int Defaults() { return 0; }
|
136
135
|
|
137
136
|
class DummyStatus {
|
138
137
|
public:
|
@@ -158,6 +157,8 @@ namespace file {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
fclose(fp);
|
160
|
+
|
161
|
+
return DummyStatus();
|
161
162
|
}
|
162
163
|
|
163
164
|
DummyStatus SetContents(const string& filename,
|
@@ -176,6 +177,8 @@ namespace file {
|
|
176
177
|
}
|
177
178
|
|
178
179
|
fclose(fp);
|
180
|
+
|
181
|
+
return DummyStatus();
|
179
182
|
}
|
180
183
|
} // namespace file
|
181
184
|
|
@@ -193,6 +196,7 @@ void Test_Snappy_RandomData();
|
|
193
196
|
void Test_Snappy_FourByteOffset();
|
194
197
|
void Test_SnappyCorruption_TruncatedVarint();
|
195
198
|
void Test_SnappyCorruption_UnterminatedVarint();
|
199
|
+
void Test_SnappyCorruption_OverflowingVarint();
|
196
200
|
void Test_Snappy_ReadPastEndOfBuffer();
|
197
201
|
void Test_Snappy_FindMatchLength();
|
198
202
|
void Test_Snappy_FindMatchLengthRandom();
|
@@ -497,6 +501,7 @@ static inline int RUN_ALL_TESTS() {
|
|
497
501
|
snappy::Test_Snappy_FourByteOffset();
|
498
502
|
snappy::Test_SnappyCorruption_TruncatedVarint();
|
499
503
|
snappy::Test_SnappyCorruption_UnterminatedVarint();
|
504
|
+
snappy::Test_SnappyCorruption_OverflowingVarint();
|
500
505
|
snappy::Test_Snappy_ReadPastEndOfBuffer();
|
501
506
|
snappy::Test_Snappy_FindMatchLength();
|
502
507
|
snappy::Test_Snappy_FindMatchLengthRandom();
|
@@ -544,6 +549,13 @@ class LogMessage {
|
|
544
549
|
PREDICT_TRUE(condition) ? (void)0 : \
|
545
550
|
snappy::LogMessageVoidify() & snappy::LogMessageCrash()
|
546
551
|
|
552
|
+
#ifdef _MSC_VER
|
553
|
+
// ~LogMessageCrash calls abort() and therefore never exits. This is by design
|
554
|
+
// so temporarily disable warning C4722.
|
555
|
+
#pragma warning(push)
|
556
|
+
#pragma warning(disable:4722)
|
557
|
+
#endif
|
558
|
+
|
547
559
|
class LogMessageCrash : public LogMessage {
|
548
560
|
public:
|
549
561
|
LogMessageCrash() { }
|
@@ -553,6 +565,10 @@ class LogMessageCrash : public LogMessage {
|
|
553
565
|
}
|
554
566
|
};
|
555
567
|
|
568
|
+
#ifdef _MSC_VER
|
569
|
+
#pragma warning(pop)
|
570
|
+
#endif
|
571
|
+
|
556
572
|
// This class is used to explicitly ignore values in the conditional
|
557
573
|
// logging macros. This avoids compiler warnings like "value computed
|
558
574
|
// is not used" and "statement has no effect".
|
@@ -572,6 +588,7 @@ class LogMessageVoidify {
|
|
572
588
|
#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
|
573
589
|
#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
|
574
590
|
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
|
591
|
+
#define CHECK_OK(cond) (cond).CheckSuccess()
|
575
592
|
|
576
593
|
} // namespace
|
577
594
|
|
@@ -579,4 +596,4 @@ using snappy::CompressFile;
|
|
579
596
|
using snappy::UncompressFile;
|
580
597
|
using snappy::MeasureFile;
|
581
598
|
|
582
|
-
#endif //
|
599
|
+
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
|
data/vendor/snappy/snappy.cc
CHANGED
@@ -30,6 +30,9 @@
|
|
30
30
|
#include "snappy-internal.h"
|
31
31
|
#include "snappy-sinksource.h"
|
32
32
|
|
33
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
34
|
+
#include <emmintrin.h>
|
35
|
+
#endif
|
33
36
|
#include <stdio.h>
|
34
37
|
|
35
38
|
#include <algorithm>
|
@@ -39,6 +42,13 @@
|
|
39
42
|
|
40
43
|
namespace snappy {
|
41
44
|
|
45
|
+
using internal::COPY_1_BYTE_OFFSET;
|
46
|
+
using internal::COPY_2_BYTE_OFFSET;
|
47
|
+
using internal::LITERAL;
|
48
|
+
using internal::char_table;
|
49
|
+
using internal::kMaximumTagLength;
|
50
|
+
using internal::wordmask;
|
51
|
+
|
42
52
|
// Any hash function will produce a valid compressed bitstream, but a good
|
43
53
|
// hash function reduces the number of collisions and thus yields better
|
44
54
|
// compression for compressible input, and more speed for incompressible
|
@@ -76,79 +86,125 @@ size_t MaxCompressedLength(size_t source_len) {
|
|
76
86
|
return 32 + source_len + source_len/6;
|
77
87
|
}
|
78
88
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
COPY_4_BYTE_OFFSET = 3
|
84
|
-
};
|
85
|
-
static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
|
86
|
-
|
87
|
-
// Copy "len" bytes from "src" to "op", one byte at a time. Used for
|
88
|
-
// handling COPY operations where the input and output regions may
|
89
|
-
// overlap. For example, suppose:
|
90
|
-
// src == "ab"
|
91
|
-
// op == src + 2
|
92
|
-
// len == 20
|
93
|
-
// After IncrementalCopy(src, op, len), the result will have
|
94
|
-
// eleven copies of "ab"
|
95
|
-
// ababababababababababab
|
96
|
-
// Note that this does not match the semantics of either memcpy()
|
97
|
-
// or memmove().
|
98
|
-
static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
|
99
|
-
assert(len > 0);
|
100
|
-
do {
|
101
|
-
*op++ = *src++;
|
102
|
-
} while (--len > 0);
|
89
|
+
namespace {
|
90
|
+
|
91
|
+
void UnalignedCopy64(const void* src, void* dst) {
|
92
|
+
memcpy(dst, src, 8);
|
103
93
|
}
|
104
94
|
|
105
|
-
|
106
|
-
//
|
107
|
-
//
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
// abxxxxxxxxxxxx
|
116
|
-
// [------] src
|
117
|
-
// [------] op
|
118
|
-
//
|
119
|
-
// a single eight-byte copy from <src> to <op> will repeat the pattern once,
|
120
|
-
// after which we can move <op> two bytes without moving <src>:
|
121
|
-
//
|
122
|
-
// ababxxxxxxxxxx
|
123
|
-
// [------] src
|
124
|
-
// [------] op
|
125
|
-
//
|
126
|
-
// and repeat the exercise until the two no longer overlap.
|
127
|
-
//
|
128
|
-
// This allows us to do very well in the special case of one single byte
|
129
|
-
// repeated many times, without taking a big hit for more general cases.
|
130
|
-
//
|
131
|
-
// The worst case of extra writing past the end of the match occurs when
|
132
|
-
// op - src == 1 and len == 1; the last copy will read from byte positions
|
133
|
-
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
134
|
-
// position 1. Thus, ten excess bytes.
|
95
|
+
void UnalignedCopy128(const void* src, void* dst) {
|
96
|
+
// TODO(alkis): Remove this when we upgrade to a recent compiler that emits
|
97
|
+
// SSE2 moves for memcpy(dst, src, 16).
|
98
|
+
#ifdef __SSE2__
|
99
|
+
__m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
|
100
|
+
_mm_storeu_si128(static_cast<__m128i*>(dst), x);
|
101
|
+
#else
|
102
|
+
memcpy(dst, src, 16);
|
103
|
+
#endif
|
104
|
+
}
|
135
105
|
|
136
|
-
|
106
|
+
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
|
107
|
+
// for handling COPY operations where the input and output regions may overlap.
|
108
|
+
// For example, suppose:
|
109
|
+
// src == "ab"
|
110
|
+
// op == src + 2
|
111
|
+
// op_limit == op + 20
|
112
|
+
// After IncrementalCopySlow(src, op, op_limit), the result will have eleven
|
113
|
+
// copies of "ab"
|
114
|
+
// ababababababababababab
|
115
|
+
// Note that this does not match the semantics of either memcpy() or memmove().
|
116
|
+
inline char* IncrementalCopySlow(const char* src, char* op,
|
117
|
+
char* const op_limit) {
|
118
|
+
while (op < op_limit) {
|
119
|
+
*op++ = *src++;
|
120
|
+
}
|
121
|
+
return op_limit;
|
122
|
+
}
|
137
123
|
|
138
|
-
|
124
|
+
// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
|
125
|
+
// IncrementalCopySlow. buf_limit is the address past the end of the writable
|
126
|
+
// region of the buffer.
|
127
|
+
inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
128
|
+
char* const buf_limit) {
|
129
|
+
// Terminology:
|
130
|
+
//
|
131
|
+
// slop = buf_limit - op
|
132
|
+
// pat = op - src
|
133
|
+
// len = limit - op
|
134
|
+
assert(src < op);
|
135
|
+
assert(op_limit <= buf_limit);
|
136
|
+
// NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
|
137
|
+
// to optimize this function but we have to also handle these cases in case
|
138
|
+
// the input does not satisfy these conditions.
|
139
|
+
|
140
|
+
size_t pattern_size = op - src;
|
141
|
+
// The cases are split into different branches to allow the branch predictor,
|
142
|
+
// FDO, and static prediction hints to work better. For each input we list the
|
143
|
+
// ratio of invocations that match each condition.
|
144
|
+
//
|
145
|
+
// input slop < 16 pat < 8 len > 16
|
146
|
+
// ------------------------------------------
|
147
|
+
// html|html4|cp 0% 1.01% 27.73%
|
148
|
+
// urls 0% 0.88% 14.79%
|
149
|
+
// jpg 0% 64.29% 7.14%
|
150
|
+
// pdf 0% 2.56% 58.06%
|
151
|
+
// txt[1-4] 0% 0.23% 0.97%
|
152
|
+
// pb 0% 0.96% 13.88%
|
153
|
+
// bin 0.01% 22.27% 41.17%
|
154
|
+
//
|
155
|
+
// It is very rare that we don't have enough slop for doing block copies. It
|
156
|
+
// is also rare that we need to expand a pattern. Small patterns are common
|
157
|
+
// for incompressible formats and for those we are plenty fast already.
|
158
|
+
// Lengths are normally not greater than 16 but they vary depending on the
|
159
|
+
// input. In general if we always predict len <= 16 it would be an ok
|
160
|
+
// prediction.
|
161
|
+
//
|
162
|
+
// In order to be fast we want a pattern >= 8 bytes and an unrolled loop
|
163
|
+
// copying 2x 8 bytes at a time.
|
164
|
+
|
165
|
+
// Handle the uncommon case where pattern is less than 8 bytes.
|
166
|
+
if (PREDICT_FALSE(pattern_size < 8)) {
|
167
|
+
// Expand pattern to at least 8 bytes. The worse case scenario in terms of
|
168
|
+
// buffer usage is when the pattern is size 3. ^ is the original position
|
169
|
+
// of op. x are irrelevant bytes copied by the last UnalignedCopy64.
|
170
|
+
//
|
171
|
+
// abc
|
172
|
+
// abcabcxxxxx
|
173
|
+
// abcabcabcabcxxxxx
|
174
|
+
// ^
|
175
|
+
// The last x is 14 bytes after ^.
|
176
|
+
if (PREDICT_TRUE(op <= buf_limit - 14)) {
|
177
|
+
while (pattern_size < 8) {
|
178
|
+
UnalignedCopy64(src, op);
|
179
|
+
op += pattern_size;
|
180
|
+
pattern_size *= 2;
|
181
|
+
}
|
182
|
+
if (PREDICT_TRUE(op >= op_limit)) return op_limit;
|
183
|
+
} else {
|
184
|
+
return IncrementalCopySlow(src, op, op_limit);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
assert(pattern_size >= 8);
|
139
188
|
|
140
|
-
|
141
|
-
|
189
|
+
// Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
|
190
|
+
// UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
|
191
|
+
// because expanding the pattern to at least 8 bytes guarantees that
|
192
|
+
// op - src >= 8.
|
193
|
+
while (op <= buf_limit - 16) {
|
142
194
|
UnalignedCopy64(src, op);
|
143
|
-
|
144
|
-
|
195
|
+
UnalignedCopy64(src + 8, op + 8);
|
196
|
+
src += 16;
|
197
|
+
op += 16;
|
198
|
+
if (PREDICT_TRUE(op >= op_limit)) return op_limit;
|
145
199
|
}
|
146
|
-
|
200
|
+
// We only take this branch if we didn't have enough slop and we can do a
|
201
|
+
// single 8 byte copy.
|
202
|
+
if (PREDICT_FALSE(op <= buf_limit - 8)) {
|
147
203
|
UnalignedCopy64(src, op);
|
148
204
|
src += 8;
|
149
205
|
op += 8;
|
150
|
-
len -= 8;
|
151
206
|
}
|
207
|
+
return IncrementalCopySlow(src, op, op_limit);
|
152
208
|
}
|
153
209
|
|
154
210
|
} // namespace
|
@@ -157,26 +213,29 @@ static inline char* EmitLiteral(char* op,
|
|
157
213
|
const char* literal,
|
158
214
|
int len,
|
159
215
|
bool allow_fast_path) {
|
160
|
-
|
161
|
-
|
216
|
+
// The vast majority of copies are below 16 bytes, for which a
|
217
|
+
// call to memcpy is overkill. This fast path can sometimes
|
218
|
+
// copy up to 15 bytes too much, but that is okay in the
|
219
|
+
// main loop, since we have a bit to go on for both sides:
|
220
|
+
//
|
221
|
+
// - The input will always have kInputMarginBytes = 15 extra
|
222
|
+
// available bytes, as long as we're in the main loop, and
|
223
|
+
// if not, allow_fast_path = false.
|
224
|
+
// - The output will always have 32 spare bytes (see
|
225
|
+
// MaxCompressedLength).
|
226
|
+
assert(len > 0); // Zero-length literals are disallowed
|
227
|
+
int n = len - 1;
|
228
|
+
if (allow_fast_path && len <= 16) {
|
162
229
|
// Fits in tag byte
|
163
230
|
*op++ = LITERAL | (n << 2);
|
164
231
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
//
|
171
|
-
|
172
|
-
// if not, allow_fast_path = false.
|
173
|
-
// - The output will always have 32 spare bytes (see
|
174
|
-
// MaxCompressedLength).
|
175
|
-
if (allow_fast_path && len <= 16) {
|
176
|
-
UnalignedCopy64(literal, op);
|
177
|
-
UnalignedCopy64(literal + 8, op + 8);
|
178
|
-
return op + len;
|
179
|
-
}
|
232
|
+
UnalignedCopy128(literal, op);
|
233
|
+
return op + len;
|
234
|
+
}
|
235
|
+
|
236
|
+
if (n < 60) {
|
237
|
+
// Fits in tag byte
|
238
|
+
*op++ = LITERAL | (n << 2);
|
180
239
|
} else {
|
181
240
|
// Encode in upcoming bytes
|
182
241
|
char* base = op;
|
@@ -195,42 +254,54 @@ static inline char* EmitLiteral(char* op,
|
|
195
254
|
return op + len;
|
196
255
|
}
|
197
256
|
|
198
|
-
static inline char*
|
257
|
+
static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
|
258
|
+
bool len_less_than_12) {
|
199
259
|
assert(len <= 64);
|
200
260
|
assert(len >= 4);
|
201
261
|
assert(offset < 65536);
|
262
|
+
assert(len_less_than_12 == (len < 12));
|
202
263
|
|
203
|
-
if (
|
204
|
-
|
205
|
-
|
206
|
-
*op++ = COPY_1_BYTE_OFFSET + ((
|
264
|
+
if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
|
265
|
+
// offset fits in 11 bits. The 3 highest go in the top of the first byte,
|
266
|
+
// and the rest go in the second byte.
|
267
|
+
*op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
|
207
268
|
*op++ = offset & 0xff;
|
208
269
|
} else {
|
209
|
-
|
210
|
-
|
211
|
-
|
270
|
+
// Write 4 bytes, though we only care about 3 of them. The output buffer
|
271
|
+
// is required to have some slack, so the extra byte won't overrun it.
|
272
|
+
uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
|
273
|
+
LittleEndian::Store32(op, u);
|
274
|
+
op += 3;
|
212
275
|
}
|
213
276
|
return op;
|
214
277
|
}
|
215
278
|
|
216
|
-
static inline char* EmitCopy(char* op, size_t offset,
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
len
|
221
|
-
}
|
279
|
+
static inline char* EmitCopy(char* op, size_t offset, size_t len,
|
280
|
+
bool len_less_than_12) {
|
281
|
+
assert(len_less_than_12 == (len < 12));
|
282
|
+
if (len_less_than_12) {
|
283
|
+
return EmitCopyAtMost64(op, offset, len, true);
|
284
|
+
} else {
|
285
|
+
// A special case for len <= 64 might help, but so far measurements suggest
|
286
|
+
// it's in the noise.
|
222
287
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
288
|
+
// Emit 64 byte copies but make sure to keep at least four bytes reserved.
|
289
|
+
while (PREDICT_FALSE(len >= 68)) {
|
290
|
+
op = EmitCopyAtMost64(op, offset, 64, false);
|
291
|
+
len -= 64;
|
292
|
+
}
|
228
293
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
294
|
+
// One or two copies will now finish the job.
|
295
|
+
if (len > 64) {
|
296
|
+
op = EmitCopyAtMost64(op, offset, 60, false);
|
297
|
+
len -= 60;
|
298
|
+
}
|
233
299
|
|
300
|
+
// Emit remainder.
|
301
|
+
op = EmitCopyAtMost64(op, offset, len, len < 12);
|
302
|
+
return op;
|
303
|
+
}
|
304
|
+
}
|
234
305
|
|
235
306
|
bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
|
236
307
|
uint32 v = 0;
|
@@ -364,9 +435,9 @@ char* CompressFragment(const char* input,
|
|
364
435
|
//
|
365
436
|
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
366
437
|
// found, start looking only at every other byte. If 32 more bytes are
|
367
|
-
// scanned, look at every third byte, etc.. When a match is
|
368
|
-
// immediately go back to looking at every byte. This is a small
|
369
|
-
// (~5% performance, ~0.1% density) for compressible data due to more
|
438
|
+
// scanned (or skipped), look at every third byte, etc.. When a match is
|
439
|
+
// found, immediately go back to looking at every byte. This is a small
|
440
|
+
// loss (~5% performance, ~0.1% density) for compressible data due to more
|
370
441
|
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
371
442
|
// win since the compressor quickly "realizes" the data is incompressible
|
372
443
|
// and doesn't bother looking for matches everywhere.
|
@@ -382,7 +453,8 @@ char* CompressFragment(const char* input,
|
|
382
453
|
ip = next_ip;
|
383
454
|
uint32 hash = next_hash;
|
384
455
|
assert(hash == Hash(ip, shift));
|
385
|
-
uint32 bytes_between_hash_lookups = skip
|
456
|
+
uint32 bytes_between_hash_lookups = skip >> 5;
|
457
|
+
skip += bytes_between_hash_lookups;
|
386
458
|
next_ip = ip + bytes_between_hash_lookups;
|
387
459
|
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
388
460
|
goto emit_remainder;
|
@@ -417,19 +489,21 @@ char* CompressFragment(const char* input,
|
|
417
489
|
// We have a 4-byte match at ip, and no need to emit any
|
418
490
|
// "literal bytes" prior to ip.
|
419
491
|
const char* base = ip;
|
420
|
-
|
492
|
+
std::pair<size_t, bool> p =
|
493
|
+
FindMatchLength(candidate + 4, ip + 4, ip_end);
|
494
|
+
size_t matched = 4 + p.first;
|
421
495
|
ip += matched;
|
422
496
|
size_t offset = base - candidate;
|
423
497
|
assert(0 == memcmp(base, candidate, matched));
|
424
|
-
op = EmitCopy(op, offset, matched);
|
425
|
-
// We could immediately start working at ip now, but to improve
|
426
|
-
// compression we first update table[Hash(ip - 1, ...)].
|
427
|
-
const char* insert_tail = ip - 1;
|
498
|
+
op = EmitCopy(op, offset, matched, p.second);
|
428
499
|
next_emit = ip;
|
429
500
|
if (PREDICT_FALSE(ip >= ip_limit)) {
|
430
501
|
goto emit_remainder;
|
431
502
|
}
|
432
|
-
|
503
|
+
// We are now looking for a 4-byte match again. We read
|
504
|
+
// table[Hash(ip, shift)] for that. To improve compression,
|
505
|
+
// we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
|
506
|
+
input_bytes = GetEightBytesAt(ip - 1);
|
433
507
|
uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
|
434
508
|
table[prev_hash] = ip - base_ip - 1;
|
435
509
|
uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
|
@@ -493,162 +567,6 @@ char* CompressFragment(const char* input,
|
|
493
567
|
// bool TryFastAppend(const char* ip, size_t available, size_t length);
|
494
568
|
// };
|
495
569
|
|
496
|
-
// -----------------------------------------------------------------------
|
497
|
-
// Lookup table for decompression code. Generated by ComputeTable() below.
|
498
|
-
// -----------------------------------------------------------------------
|
499
|
-
|
500
|
-
// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
|
501
|
-
static const uint32 wordmask[] = {
|
502
|
-
0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
|
503
|
-
};
|
504
|
-
|
505
|
-
// Data stored per entry in lookup table:
|
506
|
-
// Range Bits-used Description
|
507
|
-
// ------------------------------------
|
508
|
-
// 1..64 0..7 Literal/copy length encoded in opcode byte
|
509
|
-
// 0..7 8..10 Copy offset encoded in opcode byte / 256
|
510
|
-
// 0..4 11..13 Extra bytes after opcode
|
511
|
-
//
|
512
|
-
// We use eight bits for the length even though 7 would have sufficed
|
513
|
-
// because of efficiency reasons:
|
514
|
-
// (1) Extracting a byte is faster than a bit-field
|
515
|
-
// (2) It properly aligns copy offset so we do not need a <<8
|
516
|
-
static const uint16 char_table[256] = {
|
517
|
-
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
|
518
|
-
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
|
519
|
-
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
|
520
|
-
0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
|
521
|
-
0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
|
522
|
-
0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
|
523
|
-
0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
|
524
|
-
0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
|
525
|
-
0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
|
526
|
-
0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
|
527
|
-
0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
|
528
|
-
0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
|
529
|
-
0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
|
530
|
-
0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
|
531
|
-
0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
|
532
|
-
0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
|
533
|
-
0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
|
534
|
-
0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
|
535
|
-
0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
|
536
|
-
0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
|
537
|
-
0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
|
538
|
-
0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
|
539
|
-
0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
|
540
|
-
0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
|
541
|
-
0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
|
542
|
-
0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
|
543
|
-
0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
|
544
|
-
0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
|
545
|
-
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
|
546
|
-
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
|
547
|
-
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
|
548
|
-
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
|
549
|
-
};
|
550
|
-
|
551
|
-
// In debug mode, allow optional computation of the table at startup.
|
552
|
-
// Also, check that the decompression table is correct.
|
553
|
-
#ifndef NDEBUG
|
554
|
-
DEFINE_bool(snappy_dump_decompression_table, false,
|
555
|
-
"If true, we print the decompression table at startup.");
|
556
|
-
|
557
|
-
static uint16 MakeEntry(unsigned int extra,
|
558
|
-
unsigned int len,
|
559
|
-
unsigned int copy_offset) {
|
560
|
-
// Check that all of the fields fit within the allocated space
|
561
|
-
assert(extra == (extra & 0x7)); // At most 3 bits
|
562
|
-
assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
|
563
|
-
assert(len == (len & 0x7f)); // At most 7 bits
|
564
|
-
return len | (copy_offset << 8) | (extra << 11);
|
565
|
-
}
|
566
|
-
|
567
|
-
static void ComputeTable() {
|
568
|
-
uint16 dst[256];
|
569
|
-
|
570
|
-
// Place invalid entries in all places to detect missing initialization
|
571
|
-
int assigned = 0;
|
572
|
-
for (int i = 0; i < 256; i++) {
|
573
|
-
dst[i] = 0xffff;
|
574
|
-
}
|
575
|
-
|
576
|
-
// Small LITERAL entries. We store (len-1) in the top 6 bits.
|
577
|
-
for (unsigned int len = 1; len <= 60; len++) {
|
578
|
-
dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
|
579
|
-
assigned++;
|
580
|
-
}
|
581
|
-
|
582
|
-
// Large LITERAL entries. We use 60..63 in the high 6 bits to
|
583
|
-
// encode the number of bytes of length info that follow the opcode.
|
584
|
-
for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
|
585
|
-
// We set the length field in the lookup table to 1 because extra
|
586
|
-
// bytes encode len-1.
|
587
|
-
dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
|
588
|
-
assigned++;
|
589
|
-
}
|
590
|
-
|
591
|
-
// COPY_1_BYTE_OFFSET.
|
592
|
-
//
|
593
|
-
// The tag byte in the compressed data stores len-4 in 3 bits, and
|
594
|
-
// offset/256 in 5 bits. offset%256 is stored in the next byte.
|
595
|
-
//
|
596
|
-
// This format is used for length in range [4..11] and offset in
|
597
|
-
// range [0..2047]
|
598
|
-
for (unsigned int len = 4; len < 12; len++) {
|
599
|
-
for (unsigned int offset = 0; offset < 2048; offset += 256) {
|
600
|
-
dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
|
601
|
-
MakeEntry(1, len, offset>>8);
|
602
|
-
assigned++;
|
603
|
-
}
|
604
|
-
}
|
605
|
-
|
606
|
-
// COPY_2_BYTE_OFFSET.
|
607
|
-
// Tag contains len-1 in top 6 bits, and offset in next two bytes.
|
608
|
-
for (unsigned int len = 1; len <= 64; len++) {
|
609
|
-
dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
|
610
|
-
assigned++;
|
611
|
-
}
|
612
|
-
|
613
|
-
// COPY_4_BYTE_OFFSET.
|
614
|
-
// Tag contents len-1 in top 6 bits, and offset in next four bytes.
|
615
|
-
for (unsigned int len = 1; len <= 64; len++) {
|
616
|
-
dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
|
617
|
-
assigned++;
|
618
|
-
}
|
619
|
-
|
620
|
-
// Check that each entry was initialized exactly once.
|
621
|
-
if (assigned != 256) {
|
622
|
-
fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
|
623
|
-
abort();
|
624
|
-
}
|
625
|
-
for (int i = 0; i < 256; i++) {
|
626
|
-
if (dst[i] == 0xffff) {
|
627
|
-
fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
|
628
|
-
abort();
|
629
|
-
}
|
630
|
-
}
|
631
|
-
|
632
|
-
if (FLAGS_snappy_dump_decompression_table) {
|
633
|
-
printf("static const uint16 char_table[256] = {\n ");
|
634
|
-
for (int i = 0; i < 256; i++) {
|
635
|
-
printf("0x%04x%s",
|
636
|
-
dst[i],
|
637
|
-
((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
|
638
|
-
}
|
639
|
-
printf("};\n");
|
640
|
-
}
|
641
|
-
|
642
|
-
// Check that computed table matched recorded table
|
643
|
-
for (int i = 0; i < 256; i++) {
|
644
|
-
if (dst[i] != char_table[i]) {
|
645
|
-
fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
|
646
|
-
i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
|
647
|
-
abort();
|
648
|
-
}
|
649
|
-
}
|
650
|
-
}
|
651
|
-
#endif /* !NDEBUG */
|
652
570
|
|
653
571
|
// Helper class for decompression
|
654
572
|
class SnappyDecompressor {
|
@@ -701,7 +619,9 @@ class SnappyDecompressor {
|
|
701
619
|
if (n == 0) return false;
|
702
620
|
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
|
703
621
|
reader_->Skip(1);
|
704
|
-
|
622
|
+
uint32 val = c & 0x7f;
|
623
|
+
if (((val << shift) >> shift) != val) return false;
|
624
|
+
*result |= val << shift;
|
705
625
|
if (c < 128) {
|
706
626
|
break;
|
707
627
|
}
|
@@ -715,6 +635,10 @@ class SnappyDecompressor {
|
|
715
635
|
template <class Writer>
|
716
636
|
void DecompressAllTags(Writer* writer) {
|
717
637
|
const char* ip = ip_;
|
638
|
+
// For position-independent executables, accessing global arrays can be
|
639
|
+
// slow. Move wordmask array onto the stack to mitigate this.
|
640
|
+
uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
|
641
|
+
memcpy(wordmask, internal::wordmask, sizeof(wordmask));
|
718
642
|
|
719
643
|
// We could have put this refill fragment only at the beginning of the loop.
|
720
644
|
// However, duplicating it at the end of each branch gives the compiler more
|
@@ -731,7 +655,19 @@ class SnappyDecompressor {
|
|
731
655
|
for ( ;; ) {
|
732
656
|
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
|
733
657
|
|
734
|
-
|
658
|
+
// Ratio of iterations that have LITERAL vs non-LITERAL for different
|
659
|
+
// inputs.
|
660
|
+
//
|
661
|
+
// input LITERAL NON_LITERAL
|
662
|
+
// -----------------------------------
|
663
|
+
// html|html4|cp 23% 77%
|
664
|
+
// urls 36% 64%
|
665
|
+
// jpg 47% 53%
|
666
|
+
// pdf 19% 81%
|
667
|
+
// txt[1-4] 25% 75%
|
668
|
+
// pb 24% 76%
|
669
|
+
// bin 24% 76%
|
670
|
+
if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
|
735
671
|
size_t literal_length = (c >> 2) + 1u;
|
736
672
|
if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
|
737
673
|
assert(literal_length < 61);
|
@@ -767,15 +703,15 @@ class SnappyDecompressor {
|
|
767
703
|
ip += literal_length;
|
768
704
|
MAYBE_REFILL();
|
769
705
|
} else {
|
770
|
-
const
|
771
|
-
const
|
772
|
-
const
|
706
|
+
const size_t entry = char_table[c];
|
707
|
+
const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
|
708
|
+
const size_t length = entry & 0xff;
|
773
709
|
ip += entry >> 11;
|
774
710
|
|
775
711
|
// copy_offset/256 is encoded in bits 8..10. By just fetching
|
776
712
|
// those bits, we get copy_offset (since the bit-field starts at
|
777
713
|
// bit 8).
|
778
|
-
const
|
714
|
+
const size_t copy_offset = entry & 0x700;
|
779
715
|
if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
|
780
716
|
return;
|
781
717
|
}
|
@@ -795,10 +731,8 @@ bool SnappyDecompressor::RefillTag() {
|
|
795
731
|
size_t n;
|
796
732
|
ip = reader_->Peek(&n);
|
797
733
|
peeked_ = n;
|
798
|
-
|
799
|
-
|
800
|
-
return false;
|
801
|
-
}
|
734
|
+
eof_ = (n == 0);
|
735
|
+
if (eof_) return false;
|
802
736
|
ip_limit_ = ip + n;
|
803
737
|
}
|
804
738
|
|
@@ -863,6 +797,7 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
|
|
863
797
|
|
864
798
|
// Process the entire input
|
865
799
|
decompressor->DecompressAllTags(writer);
|
800
|
+
writer->Flush();
|
866
801
|
return (decompressor->eof() && writer->CheckLength());
|
867
802
|
}
|
868
803
|
|
@@ -965,7 +900,7 @@ class SnappyIOVecWriter {
|
|
965
900
|
const size_t output_iov_count_;
|
966
901
|
|
967
902
|
// We are currently writing into output_iov_[curr_iov_index_].
|
968
|
-
|
903
|
+
size_t curr_iov_index_;
|
969
904
|
|
970
905
|
// Bytes written to output_iov_[curr_iov_index_] so far.
|
971
906
|
size_t curr_iov_written_;
|
@@ -976,7 +911,7 @@ class SnappyIOVecWriter {
|
|
976
911
|
// Maximum number of bytes that will be decompressed into output_iov_.
|
977
912
|
size_t output_limit_;
|
978
913
|
|
979
|
-
inline char* GetIOVecPointer(
|
914
|
+
inline char* GetIOVecPointer(size_t index, size_t offset) {
|
980
915
|
return reinterpret_cast<char*>(output_iov_[index].iov_base) +
|
981
916
|
offset;
|
982
917
|
}
|
@@ -1037,8 +972,7 @@ class SnappyIOVecWriter {
|
|
1037
972
|
output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
|
1038
973
|
// Fast path, used for the majority (about 95%) of invocations.
|
1039
974
|
char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
|
1040
|
-
|
1041
|
-
UnalignedCopy64(ip + 8, ptr + 8);
|
975
|
+
UnalignedCopy128(ip, ptr);
|
1042
976
|
curr_iov_written_ += len;
|
1043
977
|
total_written_ += len;
|
1044
978
|
return true;
|
@@ -1057,7 +991,7 @@ class SnappyIOVecWriter {
|
|
1057
991
|
}
|
1058
992
|
|
1059
993
|
// Locate the iovec from which we need to start the copy.
|
1060
|
-
|
994
|
+
size_t from_iov_index = curr_iov_index_;
|
1061
995
|
size_t from_iov_offset = curr_iov_written_;
|
1062
996
|
while (offset > 0) {
|
1063
997
|
if (from_iov_offset >= offset) {
|
@@ -1066,8 +1000,8 @@ class SnappyIOVecWriter {
|
|
1066
1000
|
}
|
1067
1001
|
|
1068
1002
|
offset -= from_iov_offset;
|
1003
|
+
assert(from_iov_index > 0);
|
1069
1004
|
--from_iov_index;
|
1070
|
-
assert(from_iov_index >= 0);
|
1071
1005
|
from_iov_offset = output_iov_[from_iov_index].iov_len;
|
1072
1006
|
}
|
1073
1007
|
|
@@ -1102,9 +1036,10 @@ class SnappyIOVecWriter {
|
|
1102
1036
|
if (to_copy > len) {
|
1103
1037
|
to_copy = len;
|
1104
1038
|
}
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1039
|
+
IncrementalCopySlow(
|
1040
|
+
GetIOVecPointer(from_iov_index, from_iov_offset),
|
1041
|
+
GetIOVecPointer(curr_iov_index_, curr_iov_written_),
|
1042
|
+
GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
|
1108
1043
|
curr_iov_written_ += to_copy;
|
1109
1044
|
from_iov_offset += to_copy;
|
1110
1045
|
total_written_ += to_copy;
|
@@ -1115,6 +1050,7 @@ class SnappyIOVecWriter {
|
|
1115
1050
|
return true;
|
1116
1051
|
}
|
1117
1052
|
|
1053
|
+
inline void Flush() {}
|
1118
1054
|
};
|
1119
1055
|
|
1120
1056
|
bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
|
@@ -1145,7 +1081,8 @@ class SnappyArrayWriter {
|
|
1145
1081
|
public:
|
1146
1082
|
inline explicit SnappyArrayWriter(char* dst)
|
1147
1083
|
: base_(dst),
|
1148
|
-
op_(dst)
|
1084
|
+
op_(dst),
|
1085
|
+
op_limit_(dst) {
|
1149
1086
|
}
|
1150
1087
|
|
1151
1088
|
inline void SetExpectedLength(size_t len) {
|
@@ -1172,8 +1109,7 @@ class SnappyArrayWriter {
|
|
1172
1109
|
const size_t space_left = op_limit_ - op;
|
1173
1110
|
if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
|
1174
1111
|
// Fast path, used for the majority (about 95%) of invocations.
|
1175
|
-
|
1176
|
-
UnalignedCopy64(ip + 8, op + 8);
|
1112
|
+
UnalignedCopy128(ip, op);
|
1177
1113
|
op_ = op + len;
|
1178
1114
|
return true;
|
1179
1115
|
} else {
|
@@ -1182,8 +1118,7 @@ class SnappyArrayWriter {
|
|
1182
1118
|
}
|
1183
1119
|
|
1184
1120
|
inline bool AppendFromSelf(size_t offset, size_t len) {
|
1185
|
-
char*
|
1186
|
-
const size_t space_left = op_limit_ - op;
|
1121
|
+
char* const op_end = op_ + len;
|
1187
1122
|
|
1188
1123
|
// Check if we try to append from before the start of the buffer.
|
1189
1124
|
// Normally this would just be a check for "produced < offset",
|
@@ -1192,29 +1127,16 @@ class SnappyArrayWriter {
|
|
1192
1127
|
// to a very big number. This is convenient, as offset==0 is another
|
1193
1128
|
// invalid case that we also want to catch, so that we do not go
|
1194
1129
|
// into an infinite loop.
|
1195
|
-
|
1196
|
-
|
1197
|
-
if (produced <= offset - 1u) {
|
1198
|
-
return false;
|
1199
|
-
}
|
1200
|
-
if (len <= 16 && offset >= 8 && space_left >= 16) {
|
1201
|
-
// Fast path, used for the majority (70-80%) of dynamic invocations.
|
1202
|
-
UnalignedCopy64(op - offset, op);
|
1203
|
-
UnalignedCopy64(op - offset + 8, op + 8);
|
1204
|
-
} else {
|
1205
|
-
if (space_left >= len + kMaxIncrementCopyOverflow) {
|
1206
|
-
IncrementalCopyFastPath(op - offset, op, len);
|
1207
|
-
} else {
|
1208
|
-
if (space_left < len) {
|
1209
|
-
return false;
|
1210
|
-
}
|
1211
|
-
IncrementalCopy(op - offset, op, len);
|
1212
|
-
}
|
1213
|
-
}
|
1130
|
+
if (Produced() <= offset - 1u || op_end > op_limit_) return false;
|
1131
|
+
op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
|
1214
1132
|
|
1215
|
-
op_ = op + len;
|
1216
1133
|
return true;
|
1217
1134
|
}
|
1135
|
+
inline size_t Produced() const {
|
1136
|
+
assert(op_ >= base_);
|
1137
|
+
return op_ - base_;
|
1138
|
+
}
|
1139
|
+
inline void Flush() {}
|
1218
1140
|
};
|
1219
1141
|
|
1220
1142
|
bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
|
@@ -1241,7 +1163,6 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
|
|
1241
1163
|
return RawUncompress(compressed, n, string_as_array(uncompressed));
|
1242
1164
|
}
|
1243
1165
|
|
1244
|
-
|
1245
1166
|
// A Writer that drops everything on the floor and just does validation
|
1246
1167
|
class SnappyDecompressionValidator {
|
1247
1168
|
private:
|
@@ -1249,7 +1170,7 @@ class SnappyDecompressionValidator {
|
|
1249
1170
|
size_t produced_;
|
1250
1171
|
|
1251
1172
|
public:
|
1252
|
-
inline SnappyDecompressionValidator() : produced_(0) { }
|
1173
|
+
inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
|
1253
1174
|
inline void SetExpectedLength(size_t len) {
|
1254
1175
|
expected_ = len;
|
1255
1176
|
}
|
@@ -1270,6 +1191,7 @@ class SnappyDecompressionValidator {
|
|
1270
1191
|
produced_ += len;
|
1271
1192
|
return produced_ <= expected_;
|
1272
1193
|
}
|
1194
|
+
inline void Flush() {}
|
1273
1195
|
};
|
1274
1196
|
|
1275
1197
|
bool IsValidCompressedBuffer(const char* compressed, size_t n) {
|
@@ -1278,6 +1200,11 @@ bool IsValidCompressedBuffer(const char* compressed, size_t n) {
|
|
1278
1200
|
return InternalUncompress(&reader, &writer);
|
1279
1201
|
}
|
1280
1202
|
|
1203
|
+
bool IsValidCompressed(Source* compressed) {
|
1204
|
+
SnappyDecompressionValidator writer;
|
1205
|
+
return InternalUncompress(compressed, &writer);
|
1206
|
+
}
|
1207
|
+
|
1281
1208
|
void RawCompress(const char* input,
|
1282
1209
|
size_t input_length,
|
1283
1210
|
char* compressed,
|
@@ -1292,7 +1219,7 @@ void RawCompress(const char* input,
|
|
1292
1219
|
|
1293
1220
|
size_t Compress(const char* input, size_t input_length, string* compressed) {
|
1294
1221
|
// Pre-grow the buffer to the max length of the compressed output
|
1295
|
-
compressed
|
1222
|
+
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
|
1296
1223
|
|
1297
1224
|
size_t compressed_length;
|
1298
1225
|
RawCompress(input, input_length, string_as_array(compressed),
|
@@ -1301,6 +1228,237 @@ size_t Compress(const char* input, size_t input_length, string* compressed) {
|
|
1301
1228
|
return compressed_length;
|
1302
1229
|
}
|
1303
1230
|
|
1231
|
+
// -----------------------------------------------------------------------
|
1232
|
+
// Sink interface
|
1233
|
+
// -----------------------------------------------------------------------
|
1304
1234
|
|
1305
|
-
|
1235
|
+
// A type that decompresses into a Sink. The template parameter
|
1236
|
+
// Allocator must export one method "char* Allocate(int size);", which
|
1237
|
+
// allocates a buffer of "size" and appends that to the destination.
|
1238
|
+
template <typename Allocator>
|
1239
|
+
class SnappyScatteredWriter {
|
1240
|
+
Allocator allocator_;
|
1241
|
+
|
1242
|
+
// We need random access into the data generated so far. Therefore
|
1243
|
+
// we keep track of all of the generated data as an array of blocks.
|
1244
|
+
// All of the blocks except the last have length kBlockSize.
|
1245
|
+
std::vector<char*> blocks_;
|
1246
|
+
size_t expected_;
|
1247
|
+
|
1248
|
+
// Total size of all fully generated blocks so far
|
1249
|
+
size_t full_size_;
|
1250
|
+
|
1251
|
+
// Pointer into current output block
|
1252
|
+
char* op_base_; // Base of output block
|
1253
|
+
char* op_ptr_; // Pointer to next unfilled byte in block
|
1254
|
+
char* op_limit_; // Pointer just past block
|
1255
|
+
|
1256
|
+
inline size_t Size() const {
|
1257
|
+
return full_size_ + (op_ptr_ - op_base_);
|
1258
|
+
}
|
1259
|
+
|
1260
|
+
bool SlowAppend(const char* ip, size_t len);
|
1261
|
+
bool SlowAppendFromSelf(size_t offset, size_t len);
|
1262
|
+
|
1263
|
+
public:
|
1264
|
+
inline explicit SnappyScatteredWriter(const Allocator& allocator)
|
1265
|
+
: allocator_(allocator),
|
1266
|
+
full_size_(0),
|
1267
|
+
op_base_(NULL),
|
1268
|
+
op_ptr_(NULL),
|
1269
|
+
op_limit_(NULL) {
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
inline void SetExpectedLength(size_t len) {
|
1273
|
+
assert(blocks_.empty());
|
1274
|
+
expected_ = len;
|
1275
|
+
}
|
1276
|
+
|
1277
|
+
inline bool CheckLength() const {
|
1278
|
+
return Size() == expected_;
|
1279
|
+
}
|
1280
|
+
|
1281
|
+
// Return the number of bytes actually uncompressed so far
|
1282
|
+
inline size_t Produced() const {
|
1283
|
+
return Size();
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
inline bool Append(const char* ip, size_t len) {
|
1287
|
+
size_t avail = op_limit_ - op_ptr_;
|
1288
|
+
if (len <= avail) {
|
1289
|
+
// Fast path
|
1290
|
+
memcpy(op_ptr_, ip, len);
|
1291
|
+
op_ptr_ += len;
|
1292
|
+
return true;
|
1293
|
+
} else {
|
1294
|
+
return SlowAppend(ip, len);
|
1295
|
+
}
|
1296
|
+
}
|
1297
|
+
|
1298
|
+
inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
|
1299
|
+
char* op = op_ptr_;
|
1300
|
+
const int space_left = op_limit_ - op;
|
1301
|
+
if (length <= 16 && available >= 16 + kMaximumTagLength &&
|
1302
|
+
space_left >= 16) {
|
1303
|
+
// Fast path, used for the majority (about 95%) of invocations.
|
1304
|
+
UnalignedCopy128(ip, op);
|
1305
|
+
op_ptr_ = op + length;
|
1306
|
+
return true;
|
1307
|
+
} else {
|
1308
|
+
return false;
|
1309
|
+
}
|
1310
|
+
}
|
1306
1311
|
|
1312
|
+
inline bool AppendFromSelf(size_t offset, size_t len) {
|
1313
|
+
char* const op_end = op_ptr_ + len;
|
1314
|
+
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1315
|
+
// the "offset - 1u" trick.
|
1316
|
+
if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
|
1317
|
+
// Fast path: src and dst in current block.
|
1318
|
+
op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
|
1319
|
+
return true;
|
1320
|
+
}
|
1321
|
+
return SlowAppendFromSelf(offset, len);
|
1322
|
+
}
|
1323
|
+
|
1324
|
+
// Called at the end of the decompress. We ask the allocator
|
1325
|
+
// write all blocks to the sink.
|
1326
|
+
inline void Flush() { allocator_.Flush(Produced()); }
|
1327
|
+
};
|
1328
|
+
|
1329
|
+
template<typename Allocator>
|
1330
|
+
bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
|
1331
|
+
size_t avail = op_limit_ - op_ptr_;
|
1332
|
+
while (len > avail) {
|
1333
|
+
// Completely fill this block
|
1334
|
+
memcpy(op_ptr_, ip, avail);
|
1335
|
+
op_ptr_ += avail;
|
1336
|
+
assert(op_limit_ - op_ptr_ == 0);
|
1337
|
+
full_size_ += (op_ptr_ - op_base_);
|
1338
|
+
len -= avail;
|
1339
|
+
ip += avail;
|
1340
|
+
|
1341
|
+
// Bounds check
|
1342
|
+
if (full_size_ + len > expected_) {
|
1343
|
+
return false;
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
// Make new block
|
1347
|
+
size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
|
1348
|
+
op_base_ = allocator_.Allocate(bsize);
|
1349
|
+
op_ptr_ = op_base_;
|
1350
|
+
op_limit_ = op_base_ + bsize;
|
1351
|
+
blocks_.push_back(op_base_);
|
1352
|
+
avail = bsize;
|
1353
|
+
}
|
1354
|
+
|
1355
|
+
memcpy(op_ptr_, ip, len);
|
1356
|
+
op_ptr_ += len;
|
1357
|
+
return true;
|
1358
|
+
}
|
1359
|
+
|
1360
|
+
template<typename Allocator>
|
1361
|
+
bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
|
1362
|
+
size_t len) {
|
1363
|
+
// Overflow check
|
1364
|
+
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1365
|
+
// the "offset - 1u" trick.
|
1366
|
+
const size_t cur = Size();
|
1367
|
+
if (offset - 1u >= cur) return false;
|
1368
|
+
if (expected_ - cur < len) return false;
|
1369
|
+
|
1370
|
+
// Currently we shouldn't ever hit this path because Compress() chops the
|
1371
|
+
// input into blocks and does not create cross-block copies. However, it is
|
1372
|
+
// nice if we do not rely on that, since we can get better compression if we
|
1373
|
+
// allow cross-block copies and thus might want to change the compressor in
|
1374
|
+
// the future.
|
1375
|
+
size_t src = cur - offset;
|
1376
|
+
while (len-- > 0) {
|
1377
|
+
char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
|
1378
|
+
Append(&c, 1);
|
1379
|
+
src++;
|
1380
|
+
}
|
1381
|
+
return true;
|
1382
|
+
}
|
1383
|
+
|
1384
|
+
class SnappySinkAllocator {
|
1385
|
+
public:
|
1386
|
+
explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
|
1387
|
+
~SnappySinkAllocator() {}
|
1388
|
+
|
1389
|
+
char* Allocate(int size) {
|
1390
|
+
Datablock block(new char[size], size);
|
1391
|
+
blocks_.push_back(block);
|
1392
|
+
return block.data;
|
1393
|
+
}
|
1394
|
+
|
1395
|
+
// We flush only at the end, because the writer wants
|
1396
|
+
// random access to the blocks and once we hand the
|
1397
|
+
// block over to the sink, we can't access it anymore.
|
1398
|
+
// Also we don't write more than has been actually written
|
1399
|
+
// to the blocks.
|
1400
|
+
void Flush(size_t size) {
|
1401
|
+
size_t size_written = 0;
|
1402
|
+
size_t block_size;
|
1403
|
+
for (int i = 0; i < blocks_.size(); ++i) {
|
1404
|
+
block_size = min<size_t>(blocks_[i].size, size - size_written);
|
1405
|
+
dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
|
1406
|
+
&SnappySinkAllocator::Deleter, NULL);
|
1407
|
+
size_written += block_size;
|
1408
|
+
}
|
1409
|
+
blocks_.clear();
|
1410
|
+
}
|
1411
|
+
|
1412
|
+
private:
|
1413
|
+
struct Datablock {
|
1414
|
+
char* data;
|
1415
|
+
size_t size;
|
1416
|
+
Datablock(char* p, size_t s) : data(p), size(s) {}
|
1417
|
+
};
|
1418
|
+
|
1419
|
+
static void Deleter(void* arg, const char* bytes, size_t size) {
|
1420
|
+
delete[] bytes;
|
1421
|
+
}
|
1422
|
+
|
1423
|
+
Sink* dest_;
|
1424
|
+
std::vector<Datablock> blocks_;
|
1425
|
+
|
1426
|
+
// Note: copying this object is allowed
|
1427
|
+
};
|
1428
|
+
|
1429
|
+
size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
|
1430
|
+
SnappySinkAllocator allocator(uncompressed);
|
1431
|
+
SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
|
1432
|
+
InternalUncompress(compressed, &writer);
|
1433
|
+
return writer.Produced();
|
1434
|
+
}
|
1435
|
+
|
1436
|
+
bool Uncompress(Source* compressed, Sink* uncompressed) {
|
1437
|
+
// Read the uncompressed length from the front of the compressed input
|
1438
|
+
SnappyDecompressor decompressor(compressed);
|
1439
|
+
uint32 uncompressed_len = 0;
|
1440
|
+
if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
|
1441
|
+
return false;
|
1442
|
+
}
|
1443
|
+
|
1444
|
+
char c;
|
1445
|
+
size_t allocated_size;
|
1446
|
+
char* buf = uncompressed->GetAppendBufferVariable(
|
1447
|
+
1, uncompressed_len, &c, 1, &allocated_size);
|
1448
|
+
|
1449
|
+
// If we can get a flat buffer, then use it, otherwise do block by block
|
1450
|
+
// uncompression
|
1451
|
+
if (allocated_size >= uncompressed_len) {
|
1452
|
+
SnappyArrayWriter writer(buf);
|
1453
|
+
bool result = InternalUncompressAllTags(
|
1454
|
+
&decompressor, &writer, uncompressed_len);
|
1455
|
+
uncompressed->Append(buf, writer.Produced());
|
1456
|
+
return result;
|
1457
|
+
} else {
|
1458
|
+
SnappySinkAllocator allocator(uncompressed);
|
1459
|
+
SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
|
1460
|
+
return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
|
1461
|
+
}
|
1462
|
+
}
|
1463
|
+
|
1464
|
+
} // end namespace snappy
|