sereal 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/sereal/buffer.h +89 -0
- data/ext/sereal/decode.c +238 -0
- data/ext/sereal/decode.h +282 -0
- data/ext/sereal/encode.c +269 -0
- data/ext/sereal/encode.h +1 -0
- data/ext/sereal/extconf.rb +8 -0
- data/ext/sereal/proto.h +73 -0
- data/ext/sereal/sereal.c +12 -0
- data/ext/sereal/sereal.h +73 -0
- data/ext/sereal/snappy/csnappy.h +129 -0
- data/ext/sereal/snappy/csnappy_compress.c +659 -0
- data/ext/sereal/snappy/csnappy_decompress.c +414 -0
- data/ext/sereal/snappy/csnappy_internal.h +147 -0
- data/ext/sereal/snappy/csnappy_internal_userspace.h +301 -0
- metadata +75 -0
@@ -0,0 +1,659 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011, Google Inc.
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are
|
7
|
+
met:
|
8
|
+
|
9
|
+
* Redistributions of source code must retain the above copyright
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
11
|
+
* Redistributions in binary form must reproduce the above
|
12
|
+
copyright notice, this list of conditions and the following disclaimer
|
13
|
+
in the documentation and/or other materials provided with the
|
14
|
+
distribution.
|
15
|
+
* Neither the name of Google Inc. nor the names of its
|
16
|
+
contributors may be used to endorse or promote products derived from
|
17
|
+
this software without specific prior written permission.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
|
31
|
+
File modified for the Linux Kernel by
|
32
|
+
Zeev Tarantov <zeev.tarantov@gmail.com>
|
33
|
+
*/
|
34
|
+
|
35
|
+
#include "csnappy_internal.h"
|
36
|
+
#ifdef __KERNEL__
|
37
|
+
#include <linux/kernel.h>
|
38
|
+
#include <linux/module.h>
|
39
|
+
#endif
|
40
|
+
#include "csnappy.h"
|
41
|
+
|
42
|
+
|
43
|
+
static inline char*
|
44
|
+
encode_varint32(char *sptr, uint32_t v)
|
45
|
+
{
|
46
|
+
uint8_t* ptr = (uint8_t *)sptr;
|
47
|
+
static const int B = 128;
|
48
|
+
if (v < (1<<7)) {
|
49
|
+
*(ptr++) = v;
|
50
|
+
} else if (v < (1<<14)) {
|
51
|
+
*(ptr++) = v | B;
|
52
|
+
*(ptr++) = v>>7;
|
53
|
+
} else if (v < (1<<21)) {
|
54
|
+
*(ptr++) = v | B;
|
55
|
+
*(ptr++) = (v>>7) | B;
|
56
|
+
*(ptr++) = v>>14;
|
57
|
+
} else if (v < (1<<28)) {
|
58
|
+
*(ptr++) = v | B;
|
59
|
+
*(ptr++) = (v>>7) | B;
|
60
|
+
*(ptr++) = (v>>14) | B;
|
61
|
+
*(ptr++) = v>>21;
|
62
|
+
} else {
|
63
|
+
*(ptr++) = v | B;
|
64
|
+
*(ptr++) = (v>>7) | B;
|
65
|
+
*(ptr++) = (v>>14) | B;
|
66
|
+
*(ptr++) = (v>>21) | B;
|
67
|
+
*(ptr++) = v>>28;
|
68
|
+
}
|
69
|
+
return (char *)ptr;
|
70
|
+
}
|
71
|
+
|
72
|
+
/*
|
73
|
+
* *** DO NOT CHANGE THE VALUE OF kBlockSize ***
|
74
|
+
|
75
|
+
* New Compression code chops up the input into blocks of at most
|
76
|
+
* the following size. This ensures that back-references in the
|
77
|
+
* output never cross kBlockSize block boundaries. This can be
|
78
|
+
* helpful in implementing blocked decompression. However the
|
79
|
+
* decompression code should not rely on this guarantee since older
|
80
|
+
* compression code may not obey it.
|
81
|
+
*/
|
82
|
+
#define kBlockLog 15
|
83
|
+
#define kBlockSize (1 << kBlockLog)
|
84
|
+
|
85
|
+
|
86
|
+
#if defined(__arm__) && !(ARCH_ARM_HAVE_UNALIGNED)
|
87
|
+
|
88
|
+
static uint8_t* emit_literal(
|
89
|
+
uint8_t *op,
|
90
|
+
const uint8_t *src,
|
91
|
+
const uint8_t *end)
|
92
|
+
{
|
93
|
+
uint32_t length = end - src;
|
94
|
+
uint32_t n = length - 1;
|
95
|
+
if (!length)
|
96
|
+
return op;
|
97
|
+
if (n < 60) {
|
98
|
+
/* Fits in tag byte */
|
99
|
+
*op++ = LITERAL | (n << 2);
|
100
|
+
} else {
|
101
|
+
/* Encode in upcoming bytes */
|
102
|
+
uint8_t *base = op;
|
103
|
+
op++;
|
104
|
+
do {
|
105
|
+
*op++ = n & 0xff;
|
106
|
+
n >>= 8;
|
107
|
+
} while (n > 0);
|
108
|
+
*base = LITERAL | ((59 + (op - base - 1)) << 2);
|
109
|
+
}
|
110
|
+
memcpy(op, src, length);
|
111
|
+
return op + length;
|
112
|
+
}
|
113
|
+
|
114
|
+
static uint8_t* emit_copy(
|
115
|
+
uint8_t *op,
|
116
|
+
uint32_t offset,
|
117
|
+
uint32_t len)
|
118
|
+
{
|
119
|
+
DCHECK_GT(offset, 0);
|
120
|
+
|
121
|
+
/* Emit 64 byte copies but make sure to keep at least four bytes
|
122
|
+
* reserved */
|
123
|
+
while (unlikely(len >= 68)) {
|
124
|
+
*op++ = COPY_2_BYTE_OFFSET | ((64 - 1) << 2);
|
125
|
+
*op++ = offset & 255;
|
126
|
+
*op++ = offset >> 8;
|
127
|
+
len -= 64;
|
128
|
+
}
|
129
|
+
|
130
|
+
/* Emit an extra 60 byte copy if have too much data to fit in one
|
131
|
+
* copy */
|
132
|
+
if (unlikely(len > 64)) {
|
133
|
+
*op++ = COPY_2_BYTE_OFFSET | ((60 - 1) << 2);
|
134
|
+
*op++ = offset & 255;
|
135
|
+
*op++ = offset >> 8;
|
136
|
+
len -= 60;
|
137
|
+
}
|
138
|
+
|
139
|
+
/* Emit remainder */
|
140
|
+
DCHECK_GE(len, 4);
|
141
|
+
if ((len < 12) && (offset < 2048)) {
|
142
|
+
int len_minus_4 = len - 4;
|
143
|
+
*op++ = COPY_1_BYTE_OFFSET |
|
144
|
+
((len_minus_4) << 2) |
|
145
|
+
((offset >> 8) << 5);
|
146
|
+
*op++ = offset & 0xff;
|
147
|
+
} else {
|
148
|
+
*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
|
149
|
+
*op++ = offset & 255;
|
150
|
+
*op++ = offset >> 8;
|
151
|
+
}
|
152
|
+
return op;
|
153
|
+
}
|
154
|
+
|
155
|
+
static uint32_t find_match_length(
|
156
|
+
const uint8_t *s1,
|
157
|
+
const uint8_t *s2,
|
158
|
+
const uint8_t *s2_end)
|
159
|
+
{
|
160
|
+
const uint8_t * const s2_start = s2;
|
161
|
+
while (s2 < s2_end && *s1++ == *s2++) /*nothing*/;
|
162
|
+
return s2 - s2_start - 1;
|
163
|
+
}
|
164
|
+
|
165
|
+
static uint32_t hash(uint32_t v)
|
166
|
+
{
|
167
|
+
return v * UINT32_C(0x1e35a7bd);
|
168
|
+
}
|
169
|
+
|
170
|
+
char*
|
171
|
+
csnappy_compress_fragment(
|
172
|
+
const char *input,
|
173
|
+
const uint32_t input_size,
|
174
|
+
char *dst,
|
175
|
+
void *working_memory,
|
176
|
+
const int workmem_bytes_power_of_two)
|
177
|
+
{
|
178
|
+
const uint8_t * const src_start = (const uint8_t *)input;
|
179
|
+
const uint8_t * const src_end_minus4 = src_start + input_size - 4;
|
180
|
+
const uint8_t *src = src_start, *done_upto = src_start, *match;
|
181
|
+
uint8_t *op = (uint8_t *)dst;
|
182
|
+
uint16_t *wm = (uint16_t *)working_memory;
|
183
|
+
int shift = 33 - workmem_bytes_power_of_two;
|
184
|
+
uint32_t curr_val, curr_hash, match_val, offset, length;
|
185
|
+
if (unlikely(input_size < 4))
|
186
|
+
goto the_end;
|
187
|
+
memset(wm, 0, 1 << workmem_bytes_power_of_two);
|
188
|
+
for (;;) {
|
189
|
+
curr_val = (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
|
190
|
+
do {
|
191
|
+
src++;
|
192
|
+
if (unlikely(src >= src_end_minus4))
|
193
|
+
goto the_end;
|
194
|
+
curr_val = (curr_val >> 8) | (src[3] << 24);
|
195
|
+
DCHECK_EQ(curr_val, get_unaligned_le32(src));
|
196
|
+
curr_hash = hash(curr_val) >> shift;
|
197
|
+
match = src_start + wm[curr_hash];
|
198
|
+
DCHECK_LT(match, src);
|
199
|
+
wm[curr_hash] = src - src_start;
|
200
|
+
match_val = get_unaligned_le32(match);
|
201
|
+
} while (likely(curr_val != match_val));
|
202
|
+
offset = src - match;
|
203
|
+
length = 4 + find_match_length(
|
204
|
+
match + 4, src + 4, src_end_minus4 + 4);
|
205
|
+
DCHECK_EQ(memcmp(src, match, length), 0);
|
206
|
+
op = emit_literal(op, done_upto, src);
|
207
|
+
op = emit_copy(op, offset, length);
|
208
|
+
done_upto = src + length;
|
209
|
+
src = done_upto - 1;
|
210
|
+
}
|
211
|
+
the_end:
|
212
|
+
op = emit_literal(op, done_upto, src_end_minus4 + 4);
|
213
|
+
return (char *)op;
|
214
|
+
}
|
215
|
+
|
216
|
+
#else /* !simple */
|
217
|
+
|
218
|
+
/*
|
219
|
+
* Any hash function will produce a valid compressed bitstream, but a good
|
220
|
+
* hash function reduces the number of collisions and thus yields better
|
221
|
+
* compression for compressible input, and more speed for incompressible
|
222
|
+
* input. Of course, it doesn't hurt if the hash function is reasonably fast
|
223
|
+
* either, as it gets called a lot.
|
224
|
+
*/
|
225
|
+
static inline uint32_t HashBytes(uint32_t bytes, int shift)
|
226
|
+
{
|
227
|
+
uint32_t kMul = 0x1e35a7bd;
|
228
|
+
return (bytes * kMul) >> shift;
|
229
|
+
}
|
230
|
+
static inline uint32_t Hash(const char *p, int shift)
|
231
|
+
{
|
232
|
+
return HashBytes(UNALIGNED_LOAD32(p), shift);
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
/*
|
237
|
+
* Return the largest n such that
|
238
|
+
*
|
239
|
+
* s1[0,n-1] == s2[0,n-1]
|
240
|
+
* and n <= (s2_limit - s2).
|
241
|
+
*
|
242
|
+
* Does not read *s2_limit or beyond.
|
243
|
+
* Does not read *(s1 + (s2_limit - s2)) or beyond.
|
244
|
+
* Requires that s2_limit >= s2.
|
245
|
+
*
|
246
|
+
* Separate implementation for x86_64, for speed. Uses the fact that
|
247
|
+
* x86_64 is little endian.
|
248
|
+
*/
|
249
|
+
#if defined(__x86_64__)
|
250
|
+
static inline int
|
251
|
+
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
|
252
|
+
{
|
253
|
+
uint64_t x;
|
254
|
+
int matched, matching_bits;
|
255
|
+
DCHECK_GE(s2_limit, s2);
|
256
|
+
matched = 0;
|
257
|
+
/*
|
258
|
+
* Find out how long the match is. We loop over the data 64 bits at a
|
259
|
+
* time until we find a 64-bit block that doesn't match; then we find
|
260
|
+
* the first non-matching bit and use that to calculate the total
|
261
|
+
* length of the match.
|
262
|
+
*/
|
263
|
+
while (likely(s2 <= s2_limit - 8)) {
|
264
|
+
if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
|
265
|
+
UNALIGNED_LOAD64(s2))) {
|
266
|
+
s2 += 8;
|
267
|
+
matched += 8;
|
268
|
+
} else {
|
269
|
+
/*
|
270
|
+
* On current (mid-2008) Opteron models there is a 3%
|
271
|
+
* more efficient code sequence to find the first
|
272
|
+
* non-matching byte. However, what follows is ~10%
|
273
|
+
* better on Intel Core 2 and newer, and we expect AMD's
|
274
|
+
* bsf instruction to improve.
|
275
|
+
*/
|
276
|
+
x = UNALIGNED_LOAD64(s1 + matched) ^
|
277
|
+
UNALIGNED_LOAD64(s2);
|
278
|
+
matching_bits = FindLSBSetNonZero64(x);
|
279
|
+
matched += matching_bits >> 3;
|
280
|
+
return matched;
|
281
|
+
}
|
282
|
+
}
|
283
|
+
while (likely(s2 < s2_limit)) {
|
284
|
+
if (likely(s1[matched] == *s2)) {
|
285
|
+
++s2;
|
286
|
+
++matched;
|
287
|
+
} else {
|
288
|
+
return matched;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
return matched;
|
292
|
+
}
|
293
|
+
#else /* !defined(__x86_64__) */
|
294
|
+
static inline int
|
295
|
+
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
|
296
|
+
{
|
297
|
+
/* Implementation based on the x86-64 version, above. */
|
298
|
+
int matched = 0;
|
299
|
+
DCHECK_GE(s2_limit, s2);
|
300
|
+
|
301
|
+
while (s2 <= s2_limit - 4 &&
|
302
|
+
UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
|
303
|
+
s2 += 4;
|
304
|
+
matched += 4;
|
305
|
+
}
|
306
|
+
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
307
|
+
if (s2 <= s2_limit - 4) {
|
308
|
+
uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
|
309
|
+
UNALIGNED_LOAD32(s2);
|
310
|
+
int matching_bits = FindLSBSetNonZero(x);
|
311
|
+
matched += matching_bits >> 3;
|
312
|
+
} else {
|
313
|
+
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
|
314
|
+
++s2;
|
315
|
+
++matched;
|
316
|
+
}
|
317
|
+
}
|
318
|
+
#else
|
319
|
+
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
|
320
|
+
++s2;
|
321
|
+
++matched;
|
322
|
+
}
|
323
|
+
#endif
|
324
|
+
return matched;
|
325
|
+
}
|
326
|
+
#endif /* !defined(__x86_64__) */
|
327
|
+
|
328
|
+
|
329
|
+
static inline char*
|
330
|
+
EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
|
331
|
+
{
|
332
|
+
int n = len - 1; /* Zero-length literals are disallowed */
|
333
|
+
if (n < 60) {
|
334
|
+
/* Fits in tag byte */
|
335
|
+
*op++ = LITERAL | (n << 2);
|
336
|
+
/*
|
337
|
+
The vast majority of copies are below 16 bytes, for which a
|
338
|
+
call to memcpy is overkill. This fast path can sometimes
|
339
|
+
copy up to 15 bytes too much, but that is okay in the
|
340
|
+
main loop, since we have a bit to go on for both sides:
|
341
|
+
- The input will always have kInputMarginBytes = 15 extra
|
342
|
+
available bytes, as long as we're in the main loop, and
|
343
|
+
if not, allow_fast_path = false.
|
344
|
+
- The output will always have 32 spare bytes (see
|
345
|
+
snappy_max_compressed_length).
|
346
|
+
*/
|
347
|
+
if (allow_fast_path && len <= 16) {
|
348
|
+
UnalignedCopy64(literal, op);
|
349
|
+
UnalignedCopy64(literal + 8, op + 8);
|
350
|
+
return op + len;
|
351
|
+
}
|
352
|
+
} else {
|
353
|
+
/* Encode in upcoming bytes */
|
354
|
+
char *base = op;
|
355
|
+
int count = 0;
|
356
|
+
op++;
|
357
|
+
while (n > 0) {
|
358
|
+
*op++ = n & 0xff;
|
359
|
+
n >>= 8;
|
360
|
+
count++;
|
361
|
+
}
|
362
|
+
DCHECK_GE(count, 1);
|
363
|
+
DCHECK_LE(count, 4);
|
364
|
+
*base = LITERAL | ((59+count) << 2);
|
365
|
+
}
|
366
|
+
memcpy(op, literal, len);
|
367
|
+
return op + len;
|
368
|
+
}
|
369
|
+
|
370
|
+
static inline char*
|
371
|
+
EmitCopyLessThan64(char *op, int offset, int len)
|
372
|
+
{
|
373
|
+
DCHECK_LE(len, 64);
|
374
|
+
DCHECK_GE(len, 4);
|
375
|
+
DCHECK_LT(offset, 65536);
|
376
|
+
|
377
|
+
if ((len < 12) && (offset < 2048)) {
|
378
|
+
int len_minus_4 = len - 4;
|
379
|
+
DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
|
380
|
+
*op++ = COPY_1_BYTE_OFFSET |
|
381
|
+
((len_minus_4) << 2) |
|
382
|
+
((offset >> 8) << 5);
|
383
|
+
*op++ = offset & 0xff;
|
384
|
+
} else {
|
385
|
+
*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
|
386
|
+
put_unaligned_le16(offset, op);
|
387
|
+
op += 2;
|
388
|
+
}
|
389
|
+
return op;
|
390
|
+
}
|
391
|
+
|
392
|
+
static inline char*
|
393
|
+
EmitCopy(char *op, int offset, int len)
|
394
|
+
{
|
395
|
+
/* Emit 64 byte copies but make sure to keep at least four bytes
|
396
|
+
* reserved */
|
397
|
+
while (len >= 68) {
|
398
|
+
op = EmitCopyLessThan64(op, offset, 64);
|
399
|
+
len -= 64;
|
400
|
+
}
|
401
|
+
|
402
|
+
/* Emit an extra 60 byte copy if have too much data to fit in one
|
403
|
+
* copy */
|
404
|
+
if (len > 64) {
|
405
|
+
op = EmitCopyLessThan64(op, offset, 60);
|
406
|
+
len -= 60;
|
407
|
+
}
|
408
|
+
|
409
|
+
/* Emit remainder */
|
410
|
+
op = EmitCopyLessThan64(op, offset, len);
|
411
|
+
return op;
|
412
|
+
}
|
413
|
+
|
414
|
+
|
415
|
+
/*
|
416
|
+
For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
|
417
|
+
equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
|
418
|
+
empirically found that overlapping loads such as
|
419
|
+
UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
|
420
|
+
are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
|
421
|
+
|
422
|
+
We have different versions for 64- and 32-bit; ideally we would avoid the
|
423
|
+
two functions and just inline the UNALIGNED_LOAD64 call into
|
424
|
+
GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
|
425
|
+
enough to avoid loading the value multiple times then. For 64-bit, the load
|
426
|
+
is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
|
427
|
+
done at GetUint32AtOffset() time.
|
428
|
+
*/
|
429
|
+
|
430
|
+
#if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8)
|
431
|
+
|
432
|
+
typedef uint64_t EightBytesReference;
|
433
|
+
|
434
|
+
static inline EightBytesReference GetEightBytesAt(const char* ptr) {
|
435
|
+
return UNALIGNED_LOAD64(ptr);
|
436
|
+
}
|
437
|
+
|
438
|
+
static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) {
|
439
|
+
DCHECK_GE(offset, 0);
|
440
|
+
DCHECK_LE(offset, 4);
|
441
|
+
#ifdef __LITTLE_ENDIAN
|
442
|
+
return v >> (8 * offset);
|
443
|
+
#else
|
444
|
+
return v >> (32 - 8 * offset);
|
445
|
+
#endif
|
446
|
+
}
|
447
|
+
|
448
|
+
#else /* !ARCH_K8 */
|
449
|
+
|
450
|
+
typedef const char* EightBytesReference;
|
451
|
+
|
452
|
+
static inline EightBytesReference GetEightBytesAt(const char* ptr) {
|
453
|
+
return ptr;
|
454
|
+
}
|
455
|
+
|
456
|
+
static inline uint32_t GetUint32AtOffset(const char* v, int offset) {
|
457
|
+
DCHECK_GE(offset, 0);
|
458
|
+
DCHECK_LE(offset, 4);
|
459
|
+
return UNALIGNED_LOAD32(v + offset);
|
460
|
+
}
|
461
|
+
|
462
|
+
#endif /* !ARCH_K8 */
|
463
|
+
|
464
|
+
|
465
|
+
#define kInputMarginBytes 15
|
466
|
+
char*
|
467
|
+
csnappy_compress_fragment(
|
468
|
+
const char *input,
|
469
|
+
const uint32_t input_size,
|
470
|
+
char *op,
|
471
|
+
void *working_memory,
|
472
|
+
const int workmem_bytes_power_of_two)
|
473
|
+
{
|
474
|
+
const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
|
475
|
+
*candidate, *base;
|
476
|
+
uint16_t *table = (uint16_t *)working_memory;
|
477
|
+
EightBytesReference input_bytes;
|
478
|
+
uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
|
479
|
+
int shift, matched;
|
480
|
+
|
481
|
+
DCHECK_GE(workmem_bytes_power_of_two, 9);
|
482
|
+
DCHECK_LE(workmem_bytes_power_of_two, 15);
|
483
|
+
/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
|
484
|
+
* How many bits of 32bit hash function result are discarded? */
|
485
|
+
shift = 33 - workmem_bytes_power_of_two;
|
486
|
+
/* "ip" is the input pointer, and "op" is the output pointer. */
|
487
|
+
ip = input;
|
488
|
+
DCHECK_LE(input_size, kBlockSize);
|
489
|
+
ip_end = input + input_size;
|
490
|
+
base_ip = ip;
|
491
|
+
/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
|
492
|
+
[next_emit, ip_end) after the main loop. */
|
493
|
+
next_emit = ip;
|
494
|
+
|
495
|
+
if (unlikely(input_size < kInputMarginBytes))
|
496
|
+
goto emit_remainder;
|
497
|
+
|
498
|
+
memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
|
499
|
+
|
500
|
+
ip_limit = input + input_size - kInputMarginBytes;
|
501
|
+
next_hash = Hash(++ip, shift);
|
502
|
+
|
503
|
+
main_loop:
|
504
|
+
DCHECK_LT(next_emit, ip);
|
505
|
+
/*
|
506
|
+
* The body of this loop calls EmitLiteral once and then EmitCopy one or
|
507
|
+
* more times. (The exception is that when we're close to exhausting
|
508
|
+
* the input we goto emit_remainder.)
|
509
|
+
*
|
510
|
+
* In the first iteration of this loop we're just starting, so
|
511
|
+
* there's nothing to copy, so calling EmitLiteral once is
|
512
|
+
* necessary. And we only start a new iteration when the
|
513
|
+
* current iteration has determined that a call to EmitLiteral will
|
514
|
+
* precede the next call to EmitCopy (if any).
|
515
|
+
*
|
516
|
+
* Step 1: Scan forward in the input looking for a 4-byte-long match.
|
517
|
+
* If we get close to exhausting the input then goto emit_remainder.
|
518
|
+
*
|
519
|
+
* Heuristic match skipping: If 32 bytes are scanned with no matches
|
520
|
+
* found, start looking only at every other byte. If 32 more bytes are
|
521
|
+
* scanned, look at every third byte, etc.. When a match is found,
|
522
|
+
* immediately go back to looking at every byte. This is a small loss
|
523
|
+
* (~5% performance, ~0.1% density) for compressible data due to more
|
524
|
+
* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
525
|
+
* win since the compressor quickly "realizes" the data is incompressible
|
526
|
+
* and doesn't bother looking for matches everywhere.
|
527
|
+
*
|
528
|
+
* The "skip" variable keeps track of how many bytes there are since the
|
529
|
+
* last match; dividing it by 32 (ie. right-shifting by five) gives the
|
530
|
+
* number of bytes to move ahead for each iteration.
|
531
|
+
*/
|
532
|
+
skip = 32;
|
533
|
+
|
534
|
+
next_ip = ip;
|
535
|
+
do {
|
536
|
+
ip = next_ip;
|
537
|
+
hash = next_hash;
|
538
|
+
DCHECK_EQ(hash, Hash(ip, shift));
|
539
|
+
next_ip = ip + (skip++ >> 5);
|
540
|
+
if (unlikely(next_ip > ip_limit))
|
541
|
+
goto emit_remainder;
|
542
|
+
next_hash = Hash(next_ip, shift);
|
543
|
+
candidate = base_ip + table[hash];
|
544
|
+
DCHECK_GE(candidate, base_ip);
|
545
|
+
DCHECK_LT(candidate, ip);
|
546
|
+
|
547
|
+
table[hash] = ip - base_ip;
|
548
|
+
} while (likely(UNALIGNED_LOAD32(ip) !=
|
549
|
+
UNALIGNED_LOAD32(candidate)));
|
550
|
+
|
551
|
+
/*
|
552
|
+
* Step 2: A 4-byte match has been found. We'll later see if more
|
553
|
+
* than 4 bytes match. But, prior to the match, input
|
554
|
+
* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
|
555
|
+
*/
|
556
|
+
DCHECK_LE(next_emit + 16, ip_end);
|
557
|
+
op = EmitLiteral(op, next_emit, ip - next_emit, 1);
|
558
|
+
|
559
|
+
/*
|
560
|
+
* Step 3: Call EmitCopy, and then see if another EmitCopy could
|
561
|
+
* be our next move. Repeat until we find no match for the
|
562
|
+
* input immediately after what was consumed by the last EmitCopy call.
|
563
|
+
*
|
564
|
+
* If we exit this loop normally then we need to call EmitLiteral next,
|
565
|
+
* though we don't yet know how big the literal will be. We handle that
|
566
|
+
* by proceeding to the next iteration of the main loop. We also can exit
|
567
|
+
* this loop via goto if we get close to exhausting the input.
|
568
|
+
*/
|
569
|
+
candidate_bytes = 0;
|
570
|
+
|
571
|
+
do {
|
572
|
+
/* We have a 4-byte match at ip, and no need to emit any
|
573
|
+
"literal bytes" prior to ip. */
|
574
|
+
base = ip;
|
575
|
+
matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
|
576
|
+
ip += matched;
|
577
|
+
DCHECK_EQ(0, memcmp(base, candidate, matched));
|
578
|
+
op = EmitCopy(op, base - candidate, matched);
|
579
|
+
/* We could immediately start working at ip now, but to improve
|
580
|
+
compression we first update table[Hash(ip - 1, ...)]. */
|
581
|
+
next_emit = ip;
|
582
|
+
if (unlikely(ip >= ip_limit))
|
583
|
+
goto emit_remainder;
|
584
|
+
input_bytes = GetEightBytesAt(ip - 1);
|
585
|
+
prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
|
586
|
+
table[prev_hash] = ip - base_ip - 1;
|
587
|
+
cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
|
588
|
+
candidate = base_ip + table[cur_hash];
|
589
|
+
candidate_bytes = UNALIGNED_LOAD32(candidate);
|
590
|
+
table[cur_hash] = ip - base_ip;
|
591
|
+
} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
|
592
|
+
|
593
|
+
next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
|
594
|
+
++ip;
|
595
|
+
goto main_loop;
|
596
|
+
|
597
|
+
emit_remainder:
|
598
|
+
/* Emit the remaining bytes as a literal */
|
599
|
+
if (next_emit < ip_end)
|
600
|
+
op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
|
601
|
+
|
602
|
+
return op;
|
603
|
+
}
|
604
|
+
#endif /* !simple */
|
605
|
+
#if defined(__KERNEL__) && !defined(STATIC)
|
606
|
+
EXPORT_SYMBOL(csnappy_compress_fragment);
|
607
|
+
#endif
|
608
|
+
|
609
|
+
uint32_t __attribute__((const))
|
610
|
+
csnappy_max_compressed_length(uint32_t source_len)
|
611
|
+
{
|
612
|
+
return 32 + source_len + source_len/6;
|
613
|
+
}
|
614
|
+
#if defined(__KERNEL__) && !defined(STATIC)
|
615
|
+
EXPORT_SYMBOL(csnappy_max_compressed_length);
|
616
|
+
#endif
|
617
|
+
|
618
|
+
void
|
619
|
+
csnappy_compress(
|
620
|
+
const char *input,
|
621
|
+
uint32_t input_length,
|
622
|
+
char *compressed,
|
623
|
+
uint32_t *compressed_length,
|
624
|
+
void *working_memory,
|
625
|
+
const int workmem_bytes_power_of_two)
|
626
|
+
{
|
627
|
+
int workmem_size;
|
628
|
+
int num_to_read;
|
629
|
+
uint32_t written = 0;
|
630
|
+
char *p = encode_varint32(compressed, input_length);
|
631
|
+
written += (p - compressed);
|
632
|
+
compressed = p;
|
633
|
+
while (input_length > 0) {
|
634
|
+
num_to_read = min(input_length, (uint32_t)kBlockSize);
|
635
|
+
workmem_size = workmem_bytes_power_of_two;
|
636
|
+
if (unlikely(num_to_read < kBlockSize)) {
|
637
|
+
for (workmem_size = 9;
|
638
|
+
workmem_size < workmem_bytes_power_of_two;
|
639
|
+
++workmem_size) {
|
640
|
+
if ((1 << (workmem_size-1)) >= num_to_read)
|
641
|
+
break;
|
642
|
+
}
|
643
|
+
}
|
644
|
+
p = csnappy_compress_fragment(
|
645
|
+
input, num_to_read, compressed,
|
646
|
+
working_memory, workmem_size);
|
647
|
+
written += (p - compressed);
|
648
|
+
compressed = p;
|
649
|
+
input_length -= num_to_read;
|
650
|
+
input += num_to_read;
|
651
|
+
}
|
652
|
+
*compressed_length = written;
|
653
|
+
}
|
654
|
+
#if defined(__KERNEL__) && !defined(STATIC)
|
655
|
+
EXPORT_SYMBOL(csnappy_compress);
|
656
|
+
|
657
|
+
MODULE_LICENSE("BSD");
|
658
|
+
MODULE_DESCRIPTION("Snappy Compressor");
|
659
|
+
#endif
|