sereal 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,659 @@
1
+ /*
2
+ Copyright 2011, Google Inc.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are
7
+ met:
8
+
9
+ * Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+ * Redistributions in binary form must reproduce the above
12
+ copyright notice, this list of conditions and the following disclaimer
13
+ in the documentation and/or other materials provided with the
14
+ distribution.
15
+ * Neither the name of Google Inc. nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ File modified for the Linux Kernel by
32
+ Zeev Tarantov <zeev.tarantov@gmail.com>
33
+ */
34
+
35
+ #include "csnappy_internal.h"
36
+ #ifdef __KERNEL__
37
+ #include <linux/kernel.h>
38
+ #include <linux/module.h>
39
+ #endif
40
+ #include "csnappy.h"
41
+
42
+
43
+ static inline char*
44
+ encode_varint32(char *sptr, uint32_t v)
45
+ {
46
+ uint8_t* ptr = (uint8_t *)sptr;
47
+ static const int B = 128;
48
+ if (v < (1<<7)) {
49
+ *(ptr++) = v;
50
+ } else if (v < (1<<14)) {
51
+ *(ptr++) = v | B;
52
+ *(ptr++) = v>>7;
53
+ } else if (v < (1<<21)) {
54
+ *(ptr++) = v | B;
55
+ *(ptr++) = (v>>7) | B;
56
+ *(ptr++) = v>>14;
57
+ } else if (v < (1<<28)) {
58
+ *(ptr++) = v | B;
59
+ *(ptr++) = (v>>7) | B;
60
+ *(ptr++) = (v>>14) | B;
61
+ *(ptr++) = v>>21;
62
+ } else {
63
+ *(ptr++) = v | B;
64
+ *(ptr++) = (v>>7) | B;
65
+ *(ptr++) = (v>>14) | B;
66
+ *(ptr++) = (v>>21) | B;
67
+ *(ptr++) = v>>28;
68
+ }
69
+ return (char *)ptr;
70
+ }
71
+
72
+ /*
73
+ * *** DO NOT CHANGE THE VALUE OF kBlockSize ***
74
+
75
+ * New Compression code chops up the input into blocks of at most
76
+ * the following size. This ensures that back-references in the
77
+ * output never cross kBlockSize block boundaries. This can be
78
+ * helpful in implementing blocked decompression. However the
79
+ * decompression code should not rely on this guarantee since older
80
+ * compression code may not obey it.
81
+ */
82
+ #define kBlockLog 15
83
+ #define kBlockSize (1 << kBlockLog)
84
+
85
+
86
+ #if defined(__arm__) && !(ARCH_ARM_HAVE_UNALIGNED)
87
+
88
+ static uint8_t* emit_literal(
89
+ uint8_t *op,
90
+ const uint8_t *src,
91
+ const uint8_t *end)
92
+ {
93
+ uint32_t length = end - src;
94
+ uint32_t n = length - 1;
95
+ if (!length)
96
+ return op;
97
+ if (n < 60) {
98
+ /* Fits in tag byte */
99
+ *op++ = LITERAL | (n << 2);
100
+ } else {
101
+ /* Encode in upcoming bytes */
102
+ uint8_t *base = op;
103
+ op++;
104
+ do {
105
+ *op++ = n & 0xff;
106
+ n >>= 8;
107
+ } while (n > 0);
108
+ *base = LITERAL | ((59 + (op - base - 1)) << 2);
109
+ }
110
+ memcpy(op, src, length);
111
+ return op + length;
112
+ }
113
+
114
+ static uint8_t* emit_copy(
115
+ uint8_t *op,
116
+ uint32_t offset,
117
+ uint32_t len)
118
+ {
119
+ DCHECK_GT(offset, 0);
120
+
121
+ /* Emit 64 byte copies but make sure to keep at least four bytes
122
+ * reserved */
123
+ while (unlikely(len >= 68)) {
124
+ *op++ = COPY_2_BYTE_OFFSET | ((64 - 1) << 2);
125
+ *op++ = offset & 255;
126
+ *op++ = offset >> 8;
127
+ len -= 64;
128
+ }
129
+
130
+ /* Emit an extra 60 byte copy if have too much data to fit in one
131
+ * copy */
132
+ if (unlikely(len > 64)) {
133
+ *op++ = COPY_2_BYTE_OFFSET | ((60 - 1) << 2);
134
+ *op++ = offset & 255;
135
+ *op++ = offset >> 8;
136
+ len -= 60;
137
+ }
138
+
139
+ /* Emit remainder */
140
+ DCHECK_GE(len, 4);
141
+ if ((len < 12) && (offset < 2048)) {
142
+ int len_minus_4 = len - 4;
143
+ *op++ = COPY_1_BYTE_OFFSET |
144
+ ((len_minus_4) << 2) |
145
+ ((offset >> 8) << 5);
146
+ *op++ = offset & 0xff;
147
+ } else {
148
+ *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
149
+ *op++ = offset & 255;
150
+ *op++ = offset >> 8;
151
+ }
152
+ return op;
153
+ }
154
+
155
+ static uint32_t find_match_length(
156
+ const uint8_t *s1,
157
+ const uint8_t *s2,
158
+ const uint8_t *s2_end)
159
+ {
160
+ const uint8_t * const s2_start = s2;
161
+ while (s2 < s2_end && *s1++ == *s2++) /*nothing*/;
162
+ return s2 - s2_start - 1;
163
+ }
164
+
165
+ static uint32_t hash(uint32_t v)
166
+ {
167
+ return v * UINT32_C(0x1e35a7bd);
168
+ }
169
+
170
+ char*
171
+ csnappy_compress_fragment(
172
+ const char *input,
173
+ const uint32_t input_size,
174
+ char *dst,
175
+ void *working_memory,
176
+ const int workmem_bytes_power_of_two)
177
+ {
178
+ const uint8_t * const src_start = (const uint8_t *)input;
179
+ const uint8_t * const src_end_minus4 = src_start + input_size - 4;
180
+ const uint8_t *src = src_start, *done_upto = src_start, *match;
181
+ uint8_t *op = (uint8_t *)dst;
182
+ uint16_t *wm = (uint16_t *)working_memory;
183
+ int shift = 33 - workmem_bytes_power_of_two;
184
+ uint32_t curr_val, curr_hash, match_val, offset, length;
185
+ if (unlikely(input_size < 4))
186
+ goto the_end;
187
+ memset(wm, 0, 1 << workmem_bytes_power_of_two);
188
+ for (;;) {
189
+ curr_val = (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
190
+ do {
191
+ src++;
192
+ if (unlikely(src >= src_end_minus4))
193
+ goto the_end;
194
+ curr_val = (curr_val >> 8) | (src[3] << 24);
195
+ DCHECK_EQ(curr_val, get_unaligned_le32(src));
196
+ curr_hash = hash(curr_val) >> shift;
197
+ match = src_start + wm[curr_hash];
198
+ DCHECK_LT(match, src);
199
+ wm[curr_hash] = src - src_start;
200
+ match_val = get_unaligned_le32(match);
201
+ } while (likely(curr_val != match_val));
202
+ offset = src - match;
203
+ length = 4 + find_match_length(
204
+ match + 4, src + 4, src_end_minus4 + 4);
205
+ DCHECK_EQ(memcmp(src, match, length), 0);
206
+ op = emit_literal(op, done_upto, src);
207
+ op = emit_copy(op, offset, length);
208
+ done_upto = src + length;
209
+ src = done_upto - 1;
210
+ }
211
+ the_end:
212
+ op = emit_literal(op, done_upto, src_end_minus4 + 4);
213
+ return (char *)op;
214
+ }
215
+
216
+ #else /* !simple */
217
+
218
+ /*
219
+ * Any hash function will produce a valid compressed bitstream, but a good
220
+ * hash function reduces the number of collisions and thus yields better
221
+ * compression for compressible input, and more speed for incompressible
222
+ * input. Of course, it doesn't hurt if the hash function is reasonably fast
223
+ * either, as it gets called a lot.
224
+ */
225
+ static inline uint32_t HashBytes(uint32_t bytes, int shift)
226
+ {
227
+ uint32_t kMul = 0x1e35a7bd;
228
+ return (bytes * kMul) >> shift;
229
+ }
230
+ static inline uint32_t Hash(const char *p, int shift)
231
+ {
232
+ return HashBytes(UNALIGNED_LOAD32(p), shift);
233
+ }
234
+
235
+
236
+ /*
237
+ * Return the largest n such that
238
+ *
239
+ * s1[0,n-1] == s2[0,n-1]
240
+ * and n <= (s2_limit - s2).
241
+ *
242
+ * Does not read *s2_limit or beyond.
243
+ * Does not read *(s1 + (s2_limit - s2)) or beyond.
244
+ * Requires that s2_limit >= s2.
245
+ *
246
+ * Separate implementation for x86_64, for speed. Uses the fact that
247
+ * x86_64 is little endian.
248
+ */
249
+ #if defined(__x86_64__)
250
+ static inline int
251
+ FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
252
+ {
253
+ uint64_t x;
254
+ int matched, matching_bits;
255
+ DCHECK_GE(s2_limit, s2);
256
+ matched = 0;
257
+ /*
258
+ * Find out how long the match is. We loop over the data 64 bits at a
259
+ * time until we find a 64-bit block that doesn't match; then we find
260
+ * the first non-matching bit and use that to calculate the total
261
+ * length of the match.
262
+ */
263
+ while (likely(s2 <= s2_limit - 8)) {
264
+ if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
265
+ UNALIGNED_LOAD64(s2))) {
266
+ s2 += 8;
267
+ matched += 8;
268
+ } else {
269
+ /*
270
+ * On current (mid-2008) Opteron models there is a 3%
271
+ * more efficient code sequence to find the first
272
+ * non-matching byte. However, what follows is ~10%
273
+ * better on Intel Core 2 and newer, and we expect AMD's
274
+ * bsf instruction to improve.
275
+ */
276
+ x = UNALIGNED_LOAD64(s1 + matched) ^
277
+ UNALIGNED_LOAD64(s2);
278
+ matching_bits = FindLSBSetNonZero64(x);
279
+ matched += matching_bits >> 3;
280
+ return matched;
281
+ }
282
+ }
283
+ while (likely(s2 < s2_limit)) {
284
+ if (likely(s1[matched] == *s2)) {
285
+ ++s2;
286
+ ++matched;
287
+ } else {
288
+ return matched;
289
+ }
290
+ }
291
+ return matched;
292
+ }
293
+ #else /* !defined(__x86_64__) */
294
+ static inline int
295
+ FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
296
+ {
297
+ /* Implementation based on the x86-64 version, above. */
298
+ int matched = 0;
299
+ DCHECK_GE(s2_limit, s2);
300
+
301
+ while (s2 <= s2_limit - 4 &&
302
+ UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
303
+ s2 += 4;
304
+ matched += 4;
305
+ }
306
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
307
+ if (s2 <= s2_limit - 4) {
308
+ uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
309
+ UNALIGNED_LOAD32(s2);
310
+ int matching_bits = FindLSBSetNonZero(x);
311
+ matched += matching_bits >> 3;
312
+ } else {
313
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
314
+ ++s2;
315
+ ++matched;
316
+ }
317
+ }
318
+ #else
319
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
320
+ ++s2;
321
+ ++matched;
322
+ }
323
+ #endif
324
+ return matched;
325
+ }
326
+ #endif /* !defined(__x86_64__) */
327
+
328
+
329
+ static inline char*
330
+ EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
331
+ {
332
+ int n = len - 1; /* Zero-length literals are disallowed */
333
+ if (n < 60) {
334
+ /* Fits in tag byte */
335
+ *op++ = LITERAL | (n << 2);
336
+ /*
337
+ The vast majority of copies are below 16 bytes, for which a
338
+ call to memcpy is overkill. This fast path can sometimes
339
+ copy up to 15 bytes too much, but that is okay in the
340
+ main loop, since we have a bit to go on for both sides:
341
+ - The input will always have kInputMarginBytes = 15 extra
342
+ available bytes, as long as we're in the main loop, and
343
+ if not, allow_fast_path = false.
344
+ - The output will always have 32 spare bytes (see
345
+ snappy_max_compressed_length).
346
+ */
347
+ if (allow_fast_path && len <= 16) {
348
+ UnalignedCopy64(literal, op);
349
+ UnalignedCopy64(literal + 8, op + 8);
350
+ return op + len;
351
+ }
352
+ } else {
353
+ /* Encode in upcoming bytes */
354
+ char *base = op;
355
+ int count = 0;
356
+ op++;
357
+ while (n > 0) {
358
+ *op++ = n & 0xff;
359
+ n >>= 8;
360
+ count++;
361
+ }
362
+ DCHECK_GE(count, 1);
363
+ DCHECK_LE(count, 4);
364
+ *base = LITERAL | ((59+count) << 2);
365
+ }
366
+ memcpy(op, literal, len);
367
+ return op + len;
368
+ }
369
+
370
+ static inline char*
371
+ EmitCopyLessThan64(char *op, int offset, int len)
372
+ {
373
+ DCHECK_LE(len, 64);
374
+ DCHECK_GE(len, 4);
375
+ DCHECK_LT(offset, 65536);
376
+
377
+ if ((len < 12) && (offset < 2048)) {
378
+ int len_minus_4 = len - 4;
379
+ DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
380
+ *op++ = COPY_1_BYTE_OFFSET |
381
+ ((len_minus_4) << 2) |
382
+ ((offset >> 8) << 5);
383
+ *op++ = offset & 0xff;
384
+ } else {
385
+ *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
386
+ put_unaligned_le16(offset, op);
387
+ op += 2;
388
+ }
389
+ return op;
390
+ }
391
+
392
+ static inline char*
393
+ EmitCopy(char *op, int offset, int len)
394
+ {
395
+ /* Emit 64 byte copies but make sure to keep at least four bytes
396
+ * reserved */
397
+ while (len >= 68) {
398
+ op = EmitCopyLessThan64(op, offset, 64);
399
+ len -= 64;
400
+ }
401
+
402
+ /* Emit an extra 60 byte copy if have too much data to fit in one
403
+ * copy */
404
+ if (len > 64) {
405
+ op = EmitCopyLessThan64(op, offset, 60);
406
+ len -= 60;
407
+ }
408
+
409
+ /* Emit remainder */
410
+ op = EmitCopyLessThan64(op, offset, len);
411
+ return op;
412
+ }
413
+
414
+
415
+ /*
416
+ For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
417
+ equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
418
+ empirically found that overlapping loads such as
419
+ UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
420
+ are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
421
+
422
+ We have different versions for 64- and 32-bit; ideally we would avoid the
423
+ two functions and just inline the UNALIGNED_LOAD64 call into
424
+ GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
425
+ enough to avoid loading the value multiple times then. For 64-bit, the load
426
+ is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
427
+ done at GetUint32AtOffset() time.
428
+ */
429
+
430
+ #if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8)
431
+
432
+ typedef uint64_t EightBytesReference;
433
+
434
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
435
+ return UNALIGNED_LOAD64(ptr);
436
+ }
437
+
438
+ static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) {
439
+ DCHECK_GE(offset, 0);
440
+ DCHECK_LE(offset, 4);
441
+ #ifdef __LITTLE_ENDIAN
442
+ return v >> (8 * offset);
443
+ #else
444
+ return v >> (32 - 8 * offset);
445
+ #endif
446
+ }
447
+
448
+ #else /* !ARCH_K8 */
449
+
450
+ typedef const char* EightBytesReference;
451
+
452
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
453
+ return ptr;
454
+ }
455
+
456
+ static inline uint32_t GetUint32AtOffset(const char* v, int offset) {
457
+ DCHECK_GE(offset, 0);
458
+ DCHECK_LE(offset, 4);
459
+ return UNALIGNED_LOAD32(v + offset);
460
+ }
461
+
462
+ #endif /* !ARCH_K8 */
463
+
464
+
465
+ #define kInputMarginBytes 15
466
+ char*
467
+ csnappy_compress_fragment(
468
+ const char *input,
469
+ const uint32_t input_size,
470
+ char *op,
471
+ void *working_memory,
472
+ const int workmem_bytes_power_of_two)
473
+ {
474
+ const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
475
+ *candidate, *base;
476
+ uint16_t *table = (uint16_t *)working_memory;
477
+ EightBytesReference input_bytes;
478
+ uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
479
+ int shift, matched;
480
+
481
+ DCHECK_GE(workmem_bytes_power_of_two, 9);
482
+ DCHECK_LE(workmem_bytes_power_of_two, 15);
483
+ /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
484
+ * How many bits of 32bit hash function result are discarded? */
485
+ shift = 33 - workmem_bytes_power_of_two;
486
+ /* "ip" is the input pointer, and "op" is the output pointer. */
487
+ ip = input;
488
+ DCHECK_LE(input_size, kBlockSize);
489
+ ip_end = input + input_size;
490
+ base_ip = ip;
491
+ /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
492
+ [next_emit, ip_end) after the main loop. */
493
+ next_emit = ip;
494
+
495
+ if (unlikely(input_size < kInputMarginBytes))
496
+ goto emit_remainder;
497
+
498
+ memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
499
+
500
+ ip_limit = input + input_size - kInputMarginBytes;
501
+ next_hash = Hash(++ip, shift);
502
+
503
+ main_loop:
504
+ DCHECK_LT(next_emit, ip);
505
+ /*
506
+ * The body of this loop calls EmitLiteral once and then EmitCopy one or
507
+ * more times. (The exception is that when we're close to exhausting
508
+ * the input we goto emit_remainder.)
509
+ *
510
+ * In the first iteration of this loop we're just starting, so
511
+ * there's nothing to copy, so calling EmitLiteral once is
512
+ * necessary. And we only start a new iteration when the
513
+ * current iteration has determined that a call to EmitLiteral will
514
+ * precede the next call to EmitCopy (if any).
515
+ *
516
+ * Step 1: Scan forward in the input looking for a 4-byte-long match.
517
+ * If we get close to exhausting the input then goto emit_remainder.
518
+ *
519
+ * Heuristic match skipping: If 32 bytes are scanned with no matches
520
+ * found, start looking only at every other byte. If 32 more bytes are
521
+ * scanned, look at every third byte, etc.. When a match is found,
522
+ * immediately go back to looking at every byte. This is a small loss
523
+ * (~5% performance, ~0.1% density) for compressible data due to more
524
+ * bookkeeping, but for non-compressible data (such as JPEG) it's a huge
525
+ * win since the compressor quickly "realizes" the data is incompressible
526
+ * and doesn't bother looking for matches everywhere.
527
+ *
528
+ * The "skip" variable keeps track of how many bytes there are since the
529
+ * last match; dividing it by 32 (ie. right-shifting by five) gives the
530
+ * number of bytes to move ahead for each iteration.
531
+ */
532
+ skip = 32;
533
+
534
+ next_ip = ip;
535
+ do {
536
+ ip = next_ip;
537
+ hash = next_hash;
538
+ DCHECK_EQ(hash, Hash(ip, shift));
539
+ next_ip = ip + (skip++ >> 5);
540
+ if (unlikely(next_ip > ip_limit))
541
+ goto emit_remainder;
542
+ next_hash = Hash(next_ip, shift);
543
+ candidate = base_ip + table[hash];
544
+ DCHECK_GE(candidate, base_ip);
545
+ DCHECK_LT(candidate, ip);
546
+
547
+ table[hash] = ip - base_ip;
548
+ } while (likely(UNALIGNED_LOAD32(ip) !=
549
+ UNALIGNED_LOAD32(candidate)));
550
+
551
+ /*
552
+ * Step 2: A 4-byte match has been found. We'll later see if more
553
+ * than 4 bytes match. But, prior to the match, input
554
+ * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
555
+ */
556
+ DCHECK_LE(next_emit + 16, ip_end);
557
+ op = EmitLiteral(op, next_emit, ip - next_emit, 1);
558
+
559
+ /*
560
+ * Step 3: Call EmitCopy, and then see if another EmitCopy could
561
+ * be our next move. Repeat until we find no match for the
562
+ * input immediately after what was consumed by the last EmitCopy call.
563
+ *
564
+ * If we exit this loop normally then we need to call EmitLiteral next,
565
+ * though we don't yet know how big the literal will be. We handle that
566
+ * by proceeding to the next iteration of the main loop. We also can exit
567
+ * this loop via goto if we get close to exhausting the input.
568
+ */
569
+ candidate_bytes = 0;
570
+
571
+ do {
572
+ /* We have a 4-byte match at ip, and no need to emit any
573
+ "literal bytes" prior to ip. */
574
+ base = ip;
575
+ matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
576
+ ip += matched;
577
+ DCHECK_EQ(0, memcmp(base, candidate, matched));
578
+ op = EmitCopy(op, base - candidate, matched);
579
+ /* We could immediately start working at ip now, but to improve
580
+ compression we first update table[Hash(ip - 1, ...)]. */
581
+ next_emit = ip;
582
+ if (unlikely(ip >= ip_limit))
583
+ goto emit_remainder;
584
+ input_bytes = GetEightBytesAt(ip - 1);
585
+ prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
586
+ table[prev_hash] = ip - base_ip - 1;
587
+ cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
588
+ candidate = base_ip + table[cur_hash];
589
+ candidate_bytes = UNALIGNED_LOAD32(candidate);
590
+ table[cur_hash] = ip - base_ip;
591
+ } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
592
+
593
+ next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
594
+ ++ip;
595
+ goto main_loop;
596
+
597
+ emit_remainder:
598
+ /* Emit the remaining bytes as a literal */
599
+ if (next_emit < ip_end)
600
+ op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
601
+
602
+ return op;
603
+ }
604
+ #endif /* !simple */
605
+ #if defined(__KERNEL__) && !defined(STATIC)
606
+ EXPORT_SYMBOL(csnappy_compress_fragment);
607
+ #endif
608
+
609
+ uint32_t __attribute__((const))
610
+ csnappy_max_compressed_length(uint32_t source_len)
611
+ {
612
+ return 32 + source_len + source_len/6;
613
+ }
614
+ #if defined(__KERNEL__) && !defined(STATIC)
615
+ EXPORT_SYMBOL(csnappy_max_compressed_length);
616
+ #endif
617
+
618
+ void
619
+ csnappy_compress(
620
+ const char *input,
621
+ uint32_t input_length,
622
+ char *compressed,
623
+ uint32_t *compressed_length,
624
+ void *working_memory,
625
+ const int workmem_bytes_power_of_two)
626
+ {
627
+ int workmem_size;
628
+ int num_to_read;
629
+ uint32_t written = 0;
630
+ char *p = encode_varint32(compressed, input_length);
631
+ written += (p - compressed);
632
+ compressed = p;
633
+ while (input_length > 0) {
634
+ num_to_read = min(input_length, (uint32_t)kBlockSize);
635
+ workmem_size = workmem_bytes_power_of_two;
636
+ if (unlikely(num_to_read < kBlockSize)) {
637
+ for (workmem_size = 9;
638
+ workmem_size < workmem_bytes_power_of_two;
639
+ ++workmem_size) {
640
+ if ((1 << (workmem_size-1)) >= num_to_read)
641
+ break;
642
+ }
643
+ }
644
+ p = csnappy_compress_fragment(
645
+ input, num_to_read, compressed,
646
+ working_memory, workmem_size);
647
+ written += (p - compressed);
648
+ compressed = p;
649
+ input_length -= num_to_read;
650
+ input += num_to_read;
651
+ }
652
+ *compressed_length = written;
653
+ }
654
+ #if defined(__KERNEL__) && !defined(STATIC)
655
+ EXPORT_SYMBOL(csnappy_compress);
656
+
657
+ MODULE_LICENSE("BSD");
658
+ MODULE_DESCRIPTION("Snappy Compressor");
659
+ #endif