sereal 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,659 @@
1
+ /*
2
+ Copyright 2011, Google Inc.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are
7
+ met:
8
+
9
+ * Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+ * Redistributions in binary form must reproduce the above
12
+ copyright notice, this list of conditions and the following disclaimer
13
+ in the documentation and/or other materials provided with the
14
+ distribution.
15
+ * Neither the name of Google Inc. nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ File modified for the Linux Kernel by
32
+ Zeev Tarantov <zeev.tarantov@gmail.com>
33
+ */
34
+
35
+ #include "csnappy_internal.h"
36
+ #ifdef __KERNEL__
37
+ #include <linux/kernel.h>
38
+ #include <linux/module.h>
39
+ #endif
40
+ #include "csnappy.h"
41
+
42
+
43
+ static inline char*
44
+ encode_varint32(char *sptr, uint32_t v)
45
+ {
46
+ uint8_t* ptr = (uint8_t *)sptr;
47
+ static const int B = 128;
48
+ if (v < (1<<7)) {
49
+ *(ptr++) = v;
50
+ } else if (v < (1<<14)) {
51
+ *(ptr++) = v | B;
52
+ *(ptr++) = v>>7;
53
+ } else if (v < (1<<21)) {
54
+ *(ptr++) = v | B;
55
+ *(ptr++) = (v>>7) | B;
56
+ *(ptr++) = v>>14;
57
+ } else if (v < (1<<28)) {
58
+ *(ptr++) = v | B;
59
+ *(ptr++) = (v>>7) | B;
60
+ *(ptr++) = (v>>14) | B;
61
+ *(ptr++) = v>>21;
62
+ } else {
63
+ *(ptr++) = v | B;
64
+ *(ptr++) = (v>>7) | B;
65
+ *(ptr++) = (v>>14) | B;
66
+ *(ptr++) = (v>>21) | B;
67
+ *(ptr++) = v>>28;
68
+ }
69
+ return (char *)ptr;
70
+ }
71
+
72
+ /*
73
+ * *** DO NOT CHANGE THE VALUE OF kBlockSize ***
74
+
75
+ * New Compression code chops up the input into blocks of at most
76
+ * the following size. This ensures that back-references in the
77
+ * output never cross kBlockSize block boundaries. This can be
78
+ * helpful in implementing blocked decompression. However the
79
+ * decompression code should not rely on this guarantee since older
80
+ * compression code may not obey it.
81
+ */
82
+ #define kBlockLog 15
83
+ #define kBlockSize (1 << kBlockLog)
84
+
85
+
86
+ #if defined(__arm__) && !(ARCH_ARM_HAVE_UNALIGNED)
87
+
88
+ static uint8_t* emit_literal(
89
+ uint8_t *op,
90
+ const uint8_t *src,
91
+ const uint8_t *end)
92
+ {
93
+ uint32_t length = end - src;
94
+ uint32_t n = length - 1;
95
+ if (!length)
96
+ return op;
97
+ if (n < 60) {
98
+ /* Fits in tag byte */
99
+ *op++ = LITERAL | (n << 2);
100
+ } else {
101
+ /* Encode in upcoming bytes */
102
+ uint8_t *base = op;
103
+ op++;
104
+ do {
105
+ *op++ = n & 0xff;
106
+ n >>= 8;
107
+ } while (n > 0);
108
+ *base = LITERAL | ((59 + (op - base - 1)) << 2);
109
+ }
110
+ memcpy(op, src, length);
111
+ return op + length;
112
+ }
113
+
114
+ static uint8_t* emit_copy(
115
+ uint8_t *op,
116
+ uint32_t offset,
117
+ uint32_t len)
118
+ {
119
+ DCHECK_GT(offset, 0);
120
+
121
+ /* Emit 64 byte copies but make sure to keep at least four bytes
122
+ * reserved */
123
+ while (unlikely(len >= 68)) {
124
+ *op++ = COPY_2_BYTE_OFFSET | ((64 - 1) << 2);
125
+ *op++ = offset & 255;
126
+ *op++ = offset >> 8;
127
+ len -= 64;
128
+ }
129
+
130
+ /* Emit an extra 60 byte copy if have too much data to fit in one
131
+ * copy */
132
+ if (unlikely(len > 64)) {
133
+ *op++ = COPY_2_BYTE_OFFSET | ((60 - 1) << 2);
134
+ *op++ = offset & 255;
135
+ *op++ = offset >> 8;
136
+ len -= 60;
137
+ }
138
+
139
+ /* Emit remainder */
140
+ DCHECK_GE(len, 4);
141
+ if ((len < 12) && (offset < 2048)) {
142
+ int len_minus_4 = len - 4;
143
+ *op++ = COPY_1_BYTE_OFFSET |
144
+ ((len_minus_4) << 2) |
145
+ ((offset >> 8) << 5);
146
+ *op++ = offset & 0xff;
147
+ } else {
148
+ *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
149
+ *op++ = offset & 255;
150
+ *op++ = offset >> 8;
151
+ }
152
+ return op;
153
+ }
154
+
155
+ static uint32_t find_match_length(
156
+ const uint8_t *s1,
157
+ const uint8_t *s2,
158
+ const uint8_t *s2_end)
159
+ {
160
+ const uint8_t * const s2_start = s2;
161
+ while (s2 < s2_end && *s1++ == *s2++) /*nothing*/;
162
+ return s2 - s2_start - 1;
163
+ }
164
+
165
+ static uint32_t hash(uint32_t v)
166
+ {
167
+ return v * UINT32_C(0x1e35a7bd);
168
+ }
169
+
170
+ char*
171
+ csnappy_compress_fragment(
172
+ const char *input,
173
+ const uint32_t input_size,
174
+ char *dst,
175
+ void *working_memory,
176
+ const int workmem_bytes_power_of_two)
177
+ {
178
+ const uint8_t * const src_start = (const uint8_t *)input;
179
+ const uint8_t * const src_end_minus4 = src_start + input_size - 4;
180
+ const uint8_t *src = src_start, *done_upto = src_start, *match;
181
+ uint8_t *op = (uint8_t *)dst;
182
+ uint16_t *wm = (uint16_t *)working_memory;
183
+ int shift = 33 - workmem_bytes_power_of_two;
184
+ uint32_t curr_val, curr_hash, match_val, offset, length;
185
+ if (unlikely(input_size < 4))
186
+ goto the_end;
187
+ memset(wm, 0, 1 << workmem_bytes_power_of_two);
188
+ for (;;) {
189
+ curr_val = (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
190
+ do {
191
+ src++;
192
+ if (unlikely(src >= src_end_minus4))
193
+ goto the_end;
194
+ curr_val = (curr_val >> 8) | (src[3] << 24);
195
+ DCHECK_EQ(curr_val, get_unaligned_le32(src));
196
+ curr_hash = hash(curr_val) >> shift;
197
+ match = src_start + wm[curr_hash];
198
+ DCHECK_LT(match, src);
199
+ wm[curr_hash] = src - src_start;
200
+ match_val = get_unaligned_le32(match);
201
+ } while (likely(curr_val != match_val));
202
+ offset = src - match;
203
+ length = 4 + find_match_length(
204
+ match + 4, src + 4, src_end_minus4 + 4);
205
+ DCHECK_EQ(memcmp(src, match, length), 0);
206
+ op = emit_literal(op, done_upto, src);
207
+ op = emit_copy(op, offset, length);
208
+ done_upto = src + length;
209
+ src = done_upto - 1;
210
+ }
211
+ the_end:
212
+ op = emit_literal(op, done_upto, src_end_minus4 + 4);
213
+ return (char *)op;
214
+ }
215
+
216
+ #else /* !simple */
217
+
218
+ /*
219
+ * Any hash function will produce a valid compressed bitstream, but a good
220
+ * hash function reduces the number of collisions and thus yields better
221
+ * compression for compressible input, and more speed for incompressible
222
+ * input. Of course, it doesn't hurt if the hash function is reasonably fast
223
+ * either, as it gets called a lot.
224
+ */
225
+ static inline uint32_t HashBytes(uint32_t bytes, int shift)
226
+ {
227
+ uint32_t kMul = 0x1e35a7bd;
228
+ return (bytes * kMul) >> shift;
229
+ }
230
+ static inline uint32_t Hash(const char *p, int shift)
231
+ {
232
+ return HashBytes(UNALIGNED_LOAD32(p), shift);
233
+ }
234
+
235
+
236
+ /*
237
+ * Return the largest n such that
238
+ *
239
+ * s1[0,n-1] == s2[0,n-1]
240
+ * and n <= (s2_limit - s2).
241
+ *
242
+ * Does not read *s2_limit or beyond.
243
+ * Does not read *(s1 + (s2_limit - s2)) or beyond.
244
+ * Requires that s2_limit >= s2.
245
+ *
246
+ * Separate implementation for x86_64, for speed. Uses the fact that
247
+ * x86_64 is little endian.
248
+ */
249
+ #if defined(__x86_64__)
250
+ static inline int
251
+ FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
252
+ {
253
+ uint64_t x;
254
+ int matched, matching_bits;
255
+ DCHECK_GE(s2_limit, s2);
256
+ matched = 0;
257
+ /*
258
+ * Find out how long the match is. We loop over the data 64 bits at a
259
+ * time until we find a 64-bit block that doesn't match; then we find
260
+ * the first non-matching bit and use that to calculate the total
261
+ * length of the match.
262
+ */
263
+ while (likely(s2 <= s2_limit - 8)) {
264
+ if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
265
+ UNALIGNED_LOAD64(s2))) {
266
+ s2 += 8;
267
+ matched += 8;
268
+ } else {
269
+ /*
270
+ * On current (mid-2008) Opteron models there is a 3%
271
+ * more efficient code sequence to find the first
272
+ * non-matching byte. However, what follows is ~10%
273
+ * better on Intel Core 2 and newer, and we expect AMD's
274
+ * bsf instruction to improve.
275
+ */
276
+ x = UNALIGNED_LOAD64(s1 + matched) ^
277
+ UNALIGNED_LOAD64(s2);
278
+ matching_bits = FindLSBSetNonZero64(x);
279
+ matched += matching_bits >> 3;
280
+ return matched;
281
+ }
282
+ }
283
+ while (likely(s2 < s2_limit)) {
284
+ if (likely(s1[matched] == *s2)) {
285
+ ++s2;
286
+ ++matched;
287
+ } else {
288
+ return matched;
289
+ }
290
+ }
291
+ return matched;
292
+ }
293
+ #else /* !defined(__x86_64__) */
294
+ static inline int
295
+ FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
296
+ {
297
+ /* Implementation based on the x86-64 version, above. */
298
+ int matched = 0;
299
+ DCHECK_GE(s2_limit, s2);
300
+
301
+ while (s2 <= s2_limit - 4 &&
302
+ UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
303
+ s2 += 4;
304
+ matched += 4;
305
+ }
306
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
307
+ if (s2 <= s2_limit - 4) {
308
+ uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
309
+ UNALIGNED_LOAD32(s2);
310
+ int matching_bits = FindLSBSetNonZero(x);
311
+ matched += matching_bits >> 3;
312
+ } else {
313
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
314
+ ++s2;
315
+ ++matched;
316
+ }
317
+ }
318
+ #else
319
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
320
+ ++s2;
321
+ ++matched;
322
+ }
323
+ #endif
324
+ return matched;
325
+ }
326
+ #endif /* !defined(__x86_64__) */
327
+
328
+
329
+ static inline char*
330
+ EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
331
+ {
332
+ int n = len - 1; /* Zero-length literals are disallowed */
333
+ if (n < 60) {
334
+ /* Fits in tag byte */
335
+ *op++ = LITERAL | (n << 2);
336
+ /*
337
+ The vast majority of copies are below 16 bytes, for which a
338
+ call to memcpy is overkill. This fast path can sometimes
339
+ copy up to 15 bytes too much, but that is okay in the
340
+ main loop, since we have a bit to go on for both sides:
341
+ - The input will always have kInputMarginBytes = 15 extra
342
+ available bytes, as long as we're in the main loop, and
343
+ if not, allow_fast_path = false.
344
+ - The output will always have 32 spare bytes (see
345
+ snappy_max_compressed_length).
346
+ */
347
+ if (allow_fast_path && len <= 16) {
348
+ UnalignedCopy64(literal, op);
349
+ UnalignedCopy64(literal + 8, op + 8);
350
+ return op + len;
351
+ }
352
+ } else {
353
+ /* Encode in upcoming bytes */
354
+ char *base = op;
355
+ int count = 0;
356
+ op++;
357
+ while (n > 0) {
358
+ *op++ = n & 0xff;
359
+ n >>= 8;
360
+ count++;
361
+ }
362
+ DCHECK_GE(count, 1);
363
+ DCHECK_LE(count, 4);
364
+ *base = LITERAL | ((59+count) << 2);
365
+ }
366
+ memcpy(op, literal, len);
367
+ return op + len;
368
+ }
369
+
370
+ static inline char*
371
+ EmitCopyLessThan64(char *op, int offset, int len)
372
+ {
373
+ DCHECK_LE(len, 64);
374
+ DCHECK_GE(len, 4);
375
+ DCHECK_LT(offset, 65536);
376
+
377
+ if ((len < 12) && (offset < 2048)) {
378
+ int len_minus_4 = len - 4;
379
+ DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
380
+ *op++ = COPY_1_BYTE_OFFSET |
381
+ ((len_minus_4) << 2) |
382
+ ((offset >> 8) << 5);
383
+ *op++ = offset & 0xff;
384
+ } else {
385
+ *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
386
+ put_unaligned_le16(offset, op);
387
+ op += 2;
388
+ }
389
+ return op;
390
+ }
391
+
392
+ static inline char*
393
+ EmitCopy(char *op, int offset, int len)
394
+ {
395
+ /* Emit 64 byte copies but make sure to keep at least four bytes
396
+ * reserved */
397
+ while (len >= 68) {
398
+ op = EmitCopyLessThan64(op, offset, 64);
399
+ len -= 64;
400
+ }
401
+
402
+ /* Emit an extra 60 byte copy if have too much data to fit in one
403
+ * copy */
404
+ if (len > 64) {
405
+ op = EmitCopyLessThan64(op, offset, 60);
406
+ len -= 60;
407
+ }
408
+
409
+ /* Emit remainder */
410
+ op = EmitCopyLessThan64(op, offset, len);
411
+ return op;
412
+ }
413
+
414
+
415
+ /*
416
+ For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
417
+ equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
418
+ empirically found that overlapping loads such as
419
+ UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
420
+ are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
421
+
422
+ We have different versions for 64- and 32-bit; ideally we would avoid the
423
+ two functions and just inline the UNALIGNED_LOAD64 call into
424
+ GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
425
+ enough to avoid loading the value multiple times then. For 64-bit, the load
426
+ is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
427
+ done at GetUint32AtOffset() time.
428
+ */
429
+
430
+ #if defined(__x86_64__) || (__SIZEOF_SIZE_T__ == 8)
431
+
432
+ typedef uint64_t EightBytesReference;
433
+
434
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
435
+ return UNALIGNED_LOAD64(ptr);
436
+ }
437
+
438
+ static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) {
439
+ DCHECK_GE(offset, 0);
440
+ DCHECK_LE(offset, 4);
441
+ #ifdef __LITTLE_ENDIAN
442
+ return v >> (8 * offset);
443
+ #else
444
+ return v >> (32 - 8 * offset);
445
+ #endif
446
+ }
447
+
448
+ #else /* !ARCH_K8 */
449
+
450
+ typedef const char* EightBytesReference;
451
+
452
+ static inline EightBytesReference GetEightBytesAt(const char* ptr) {
453
+ return ptr;
454
+ }
455
+
456
+ static inline uint32_t GetUint32AtOffset(const char* v, int offset) {
457
+ DCHECK_GE(offset, 0);
458
+ DCHECK_LE(offset, 4);
459
+ return UNALIGNED_LOAD32(v + offset);
460
+ }
461
+
462
+ #endif /* !ARCH_K8 */
463
+
464
+
465
+ #define kInputMarginBytes 15
466
+ char*
467
+ csnappy_compress_fragment(
468
+ const char *input,
469
+ const uint32_t input_size,
470
+ char *op,
471
+ void *working_memory,
472
+ const int workmem_bytes_power_of_two)
473
+ {
474
+ const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
475
+ *candidate, *base;
476
+ uint16_t *table = (uint16_t *)working_memory;
477
+ EightBytesReference input_bytes;
478
+ uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
479
+ int shift, matched;
480
+
481
+ DCHECK_GE(workmem_bytes_power_of_two, 9);
482
+ DCHECK_LE(workmem_bytes_power_of_two, 15);
483
+ /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
484
+ * How many bits of 32bit hash function result are discarded? */
485
+ shift = 33 - workmem_bytes_power_of_two;
486
+ /* "ip" is the input pointer, and "op" is the output pointer. */
487
+ ip = input;
488
+ DCHECK_LE(input_size, kBlockSize);
489
+ ip_end = input + input_size;
490
+ base_ip = ip;
491
+ /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
492
+ [next_emit, ip_end) after the main loop. */
493
+ next_emit = ip;
494
+
495
+ if (unlikely(input_size < kInputMarginBytes))
496
+ goto emit_remainder;
497
+
498
+ memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
499
+
500
+ ip_limit = input + input_size - kInputMarginBytes;
501
+ next_hash = Hash(++ip, shift);
502
+
503
+ main_loop:
504
+ DCHECK_LT(next_emit, ip);
505
+ /*
506
+ * The body of this loop calls EmitLiteral once and then EmitCopy one or
507
+ * more times. (The exception is that when we're close to exhausting
508
+ * the input we goto emit_remainder.)
509
+ *
510
+ * In the first iteration of this loop we're just starting, so
511
+ * there's nothing to copy, so calling EmitLiteral once is
512
+ * necessary. And we only start a new iteration when the
513
+ * current iteration has determined that a call to EmitLiteral will
514
+ * precede the next call to EmitCopy (if any).
515
+ *
516
+ * Step 1: Scan forward in the input looking for a 4-byte-long match.
517
+ * If we get close to exhausting the input then goto emit_remainder.
518
+ *
519
+ * Heuristic match skipping: If 32 bytes are scanned with no matches
520
+ * found, start looking only at every other byte. If 32 more bytes are
521
+ * scanned, look at every third byte, etc.. When a match is found,
522
+ * immediately go back to looking at every byte. This is a small loss
523
+ * (~5% performance, ~0.1% density) for compressible data due to more
524
+ * bookkeeping, but for non-compressible data (such as JPEG) it's a huge
525
+ * win since the compressor quickly "realizes" the data is incompressible
526
+ * and doesn't bother looking for matches everywhere.
527
+ *
528
+ * The "skip" variable keeps track of how many bytes there are since the
529
+ * last match; dividing it by 32 (ie. right-shifting by five) gives the
530
+ * number of bytes to move ahead for each iteration.
531
+ */
532
+ skip = 32;
533
+
534
+ next_ip = ip;
535
+ do {
536
+ ip = next_ip;
537
+ hash = next_hash;
538
+ DCHECK_EQ(hash, Hash(ip, shift));
539
+ next_ip = ip + (skip++ >> 5);
540
+ if (unlikely(next_ip > ip_limit))
541
+ goto emit_remainder;
542
+ next_hash = Hash(next_ip, shift);
543
+ candidate = base_ip + table[hash];
544
+ DCHECK_GE(candidate, base_ip);
545
+ DCHECK_LT(candidate, ip);
546
+
547
+ table[hash] = ip - base_ip;
548
+ } while (likely(UNALIGNED_LOAD32(ip) !=
549
+ UNALIGNED_LOAD32(candidate)));
550
+
551
+ /*
552
+ * Step 2: A 4-byte match has been found. We'll later see if more
553
+ * than 4 bytes match. But, prior to the match, input
554
+ * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
555
+ */
556
+ DCHECK_LE(next_emit + 16, ip_end);
557
+ op = EmitLiteral(op, next_emit, ip - next_emit, 1);
558
+
559
+ /*
560
+ * Step 3: Call EmitCopy, and then see if another EmitCopy could
561
+ * be our next move. Repeat until we find no match for the
562
+ * input immediately after what was consumed by the last EmitCopy call.
563
+ *
564
+ * If we exit this loop normally then we need to call EmitLiteral next,
565
+ * though we don't yet know how big the literal will be. We handle that
566
+ * by proceeding to the next iteration of the main loop. We also can exit
567
+ * this loop via goto if we get close to exhausting the input.
568
+ */
569
+ candidate_bytes = 0;
570
+
571
+ do {
572
+ /* We have a 4-byte match at ip, and no need to emit any
573
+ "literal bytes" prior to ip. */
574
+ base = ip;
575
+ matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
576
+ ip += matched;
577
+ DCHECK_EQ(0, memcmp(base, candidate, matched));
578
+ op = EmitCopy(op, base - candidate, matched);
579
+ /* We could immediately start working at ip now, but to improve
580
+ compression we first update table[Hash(ip - 1, ...)]. */
581
+ next_emit = ip;
582
+ if (unlikely(ip >= ip_limit))
583
+ goto emit_remainder;
584
+ input_bytes = GetEightBytesAt(ip - 1);
585
+ prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
586
+ table[prev_hash] = ip - base_ip - 1;
587
+ cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
588
+ candidate = base_ip + table[cur_hash];
589
+ candidate_bytes = UNALIGNED_LOAD32(candidate);
590
+ table[cur_hash] = ip - base_ip;
591
+ } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
592
+
593
+ next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
594
+ ++ip;
595
+ goto main_loop;
596
+
597
+ emit_remainder:
598
+ /* Emit the remaining bytes as a literal */
599
+ if (next_emit < ip_end)
600
+ op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
601
+
602
+ return op;
603
+ }
604
+ #endif /* !simple */
605
+ #if defined(__KERNEL__) && !defined(STATIC)
606
+ EXPORT_SYMBOL(csnappy_compress_fragment);
607
+ #endif
608
+
609
+ uint32_t __attribute__((const))
610
+ csnappy_max_compressed_length(uint32_t source_len)
611
+ {
612
+ return 32 + source_len + source_len/6;
613
+ }
614
+ #if defined(__KERNEL__) && !defined(STATIC)
615
+ EXPORT_SYMBOL(csnappy_max_compressed_length);
616
+ #endif
617
+
618
+ void
619
+ csnappy_compress(
620
+ const char *input,
621
+ uint32_t input_length,
622
+ char *compressed,
623
+ uint32_t *compressed_length,
624
+ void *working_memory,
625
+ const int workmem_bytes_power_of_two)
626
+ {
627
+ int workmem_size;
628
+ int num_to_read;
629
+ uint32_t written = 0;
630
+ char *p = encode_varint32(compressed, input_length);
631
+ written += (p - compressed);
632
+ compressed = p;
633
+ while (input_length > 0) {
634
+ num_to_read = min(input_length, (uint32_t)kBlockSize);
635
+ workmem_size = workmem_bytes_power_of_two;
636
+ if (unlikely(num_to_read < kBlockSize)) {
637
+ for (workmem_size = 9;
638
+ workmem_size < workmem_bytes_power_of_two;
639
+ ++workmem_size) {
640
+ if ((1 << (workmem_size-1)) >= num_to_read)
641
+ break;
642
+ }
643
+ }
644
+ p = csnappy_compress_fragment(
645
+ input, num_to_read, compressed,
646
+ working_memory, workmem_size);
647
+ written += (p - compressed);
648
+ compressed = p;
649
+ input_length -= num_to_read;
650
+ input += num_to_read;
651
+ }
652
+ *compressed_length = written;
653
+ }
654
+ #if defined(__KERNEL__) && !defined(STATIC)
655
+ EXPORT_SYMBOL(csnappy_compress);
656
+
657
+ MODULE_LICENSE("BSD");
658
+ MODULE_DESCRIPTION("Snappy Compressor");
659
+ #endif