lz4-ruby 0.2.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './compressor.rb'
4
+
5
+ class LZ4Compressor < Compressor
6
+ def require_libs
7
+ require 'lz4-ruby'
8
+ end
9
+
10
+ def compress_text(text)
11
+ return LZ4::compress(text)
12
+ end
13
+
14
+ def uncompress_text(compressed)
15
+ return LZ4::uncompress(compressed)
16
+ end
17
+ end
18
+
19
+ def create_compressor(chunk_size)
20
+ return LZ4Compressor.new(chunk_size)
21
+ end
22
+
23
+ if $0 == __FILE__
24
+ Compressor.unit_driver() { |chunk_size| LZ4Compressor.new(chunk_size) }
25
+ end
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './compressor.rb'
4
+
5
+ class LZOCompressor < Compressor
6
+ def require_libs
7
+ require 'lzoruby'
8
+ end
9
+
10
+ def compress_text(text)
11
+ return LZO.compress(text)
12
+ end
13
+
14
+ def uncompress_text(compressed)
15
+ return LZO.decompress(compressed)
16
+ end
17
+ end
18
+
19
+ def create_compressor(chunk_size)
20
+ return LZOCompressor.new(chunk_size)
21
+ end
22
+
23
+ if $0 == __FILE__
24
+ Compressor.unit_driver() { |chunk_size| LZOCompressor.new(chunk_size) }
25
+ end
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require './compressor.rb'
4
+
5
+ class SnappyCompressor < Compressor
6
+ def require_libs
7
+ require 'snappy'
8
+ end
9
+
10
+ def compress_text(text)
11
+ return Snappy.deflate(text)
12
+ end
13
+
14
+ def uncompress_text(compressed)
15
+ return Snappy.inflate(compressed)
16
+ end
17
+ end
18
+
19
+ def create_compressor(chunk_size)
20
+ return SnappyCompressor.new(chunk_size)
21
+ end
22
+
23
+ if $0 == __FILE__
24
+ Compressor.unit_driver() { |chunk_size| SnappyCompressor.new(chunk_size) }
25
+ end
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS += " -Wall "
4
+
5
+ create_makefile('lz4ruby')
6
+
data/ext/lz4ruby/lz4.c ADDED
@@ -0,0 +1,861 @@
1
+ /*
2
+ LZ4 - Fast LZ compression algorithm
3
+ Copyright (C) 2011-2012, Yann Collet.
4
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are
8
+ met:
9
+
10
+ * Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the following disclaimer
14
+ in the documentation and/or other materials provided with the
15
+ distribution.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ You can contact the author at :
30
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31
+ - LZ4 source repository : http://code.google.com/p/lz4/
32
+ */
33
+
34
+ //**************************************
35
+ // Tuning parameters
36
+ //**************************************
37
+ // MEMORY_USAGE :
38
+ // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
39
+ // Increasing memory usage improves compression ratio
40
+ // Reduced memory usage can improve speed, due to cache effect
41
+ // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
42
+ #define MEMORY_USAGE 14
43
+
44
+ // NOTCOMPRESSIBLE_DETECTIONLEVEL :
45
+ // Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
46
+ // This may decrease compression ratio dramatically, but will be faster on incompressible data
47
+ // Increasing this value will make the algorithm search more before declaring a segment "incompressible"
48
+ // This could improve compression a bit, but will be slower on incompressible data
49
+ // The default value (6) is recommended
50
+ #define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
51
+
52
+ // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
53
+ // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
54
+ // You can set this option to 1 in situations where data will remain within closed environment
55
+ // This option is useless on Little_Endian CPU (such as x86)
56
+ //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
57
+
58
+
59
+
60
+ //**************************************
61
+ // CPU Feature Detection
62
+ //**************************************
63
+ // 32 or 64 bits ?
64
+ #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode
65
+ # define LZ4_ARCH64 1
66
+ #else
67
+ # define LZ4_ARCH64 0
68
+ #endif
69
+
70
+ // Little Endian or Big Endian ?
71
+ // Note : overwrite the below #define if you know your architecture endianess
72
+ #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
73
+ # define LZ4_BIG_ENDIAN 1
74
+ #else
75
+ // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
76
+ #endif
77
+
78
+ // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
79
+ // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
80
+ // If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance
81
+ #if defined(__ARM_FEATURE_UNALIGNED)
82
+ # define LZ4_FORCE_UNALIGNED_ACCESS 1
83
+ #endif
84
+
85
+ // Define this parameter if your target system or compiler does not support hardware bit count
86
+ #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
87
+ # define LZ4_FORCE_SW_BITCOUNT
88
+ #endif
89
+
90
+
91
+ //**************************************
92
+ // Compiler Options
93
+ //**************************************
94
+ #if __STDC_VERSION__ >= 199901L // C99
95
+ /* "restrict" is a known keyword */
96
+ #else
97
+ # define restrict // Disable restrict
98
+ #endif
99
+
100
+ #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
101
+
102
+ #ifdef _MSC_VER // Visual Studio
103
+ # define inline __forceinline // Visual is not C99, but supports some kind of inline
104
+ # include <intrin.h> // For Visual 2005
105
+ # if LZ4_ARCH64 // 64-bit
106
+ # pragma intrinsic(_BitScanForward64) // For Visual 2005
107
+ # pragma intrinsic(_BitScanReverse64) // For Visual 2005
108
+ # else
109
+ # pragma intrinsic(_BitScanForward) // For Visual 2005
110
+ # pragma intrinsic(_BitScanReverse) // For Visual 2005
111
+ # endif
112
+ #endif
113
+
114
+ #ifdef _MSC_VER
115
+ # define lz4_bswap16(x) _byteswap_ushort(x)
116
+ #else
117
+ # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
118
+ #endif
119
+
120
+ #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
121
+ # define expect(expr,value) (__builtin_expect ((expr),(value)) )
122
+ #else
123
+ # define expect(expr,value) (expr)
124
+ #endif
125
+
126
+ #define likely(expr) expect((expr) != 0, 1)
127
+ #define unlikely(expr) expect((expr) != 0, 0)
128
+
129
+
130
+ //**************************************
131
+ // Includes
132
+ //**************************************
133
+ #include <stdlib.h> // for malloc
134
+ #include <string.h> // for memset
135
+ #include "lz4.h"
136
+
137
+
138
+ //**************************************
139
+ // Basic Types
140
+ //**************************************
141
+ #if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
142
+ # define BYTE unsigned __int8
143
+ # define U16 unsigned __int16
144
+ # define U32 unsigned __int32
145
+ # define S32 __int32
146
+ # define U64 unsigned __int64
147
+ #else
148
+ # include <stdint.h>
149
+ # define BYTE uint8_t
150
+ # define U16 uint16_t
151
+ # define U32 uint32_t
152
+ # define S32 int32_t
153
+ # define U64 uint64_t
154
+ #endif
155
+
156
+ #ifndef LZ4_FORCE_UNALIGNED_ACCESS
157
+ # pragma pack(push, 1)
158
+ #endif
159
+
160
+ typedef struct _U16_S { U16 v; } U16_S;
161
+ typedef struct _U32_S { U32 v; } U32_S;
162
+ typedef struct _U64_S { U64 v; } U64_S;
163
+
164
+ #ifndef LZ4_FORCE_UNALIGNED_ACCESS
165
+ # pragma pack(pop)
166
+ #endif
167
+
168
+ #define A64(x) (((U64_S *)(x))->v)
169
+ #define A32(x) (((U32_S *)(x))->v)
170
+ #define A16(x) (((U16_S *)(x))->v)
171
+
172
+
173
+ //**************************************
174
+ // Constants
175
+ //**************************************
176
+ #define MINMATCH 4
177
+
178
+ #define HASH_LOG (MEMORY_USAGE-2)
179
+ #define HASHTABLESIZE (1 << HASH_LOG)
180
+ #define HASH_MASK (HASHTABLESIZE - 1)
181
+
182
+ #define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
183
+ #define STACKLIMIT 13
184
+ #define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
185
+ #define COPYLENGTH 8
186
+ #define LASTLITERALS 5
187
+ #define MFLIMIT (COPYLENGTH+MINMATCH)
188
+ #define MINLENGTH (MFLIMIT+1)
189
+
190
+ #define MAXD_LOG 16
191
+ #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
192
+
193
+ #define ML_BITS 4
194
+ #define ML_MASK ((1U<<ML_BITS)-1)
195
+ #define RUN_BITS (8-ML_BITS)
196
+ #define RUN_MASK ((1U<<RUN_BITS)-1)
197
+
198
+
199
+ //**************************************
200
+ // Architecture-specific macros
201
+ //**************************************
202
+ #if LZ4_ARCH64 // 64-bit
203
+ # define STEPSIZE 8
204
+ # define UARCH U64
205
+ # define AARCH A64
206
+ # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
207
+ # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
208
+ # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e)
209
+ # define HTYPE U32
210
+ # define INITBASE(base) const BYTE* const base = ip
211
+ #else // 32-bit
212
+ # define STEPSIZE 4
213
+ # define UARCH U32
214
+ # define AARCH A32
215
+ # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
216
+ # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
217
+ # define LZ4_SECURECOPY LZ4_WILDCOPY
218
+ # define HTYPE const BYTE*
219
+ # define INITBASE(base) const int base = 0
220
+ #endif
221
+
222
+ #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
223
+ # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
224
+ # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
225
+ #else // Little Endian
226
+ # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
227
+ # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
228
+ #endif
229
+
230
+
231
+ //**************************************
232
+ // Local structures
233
+ //**************************************
234
+ struct refTables
235
+ {
236
+ HTYPE hashTable[HASHTABLESIZE];
237
+ };
238
+
239
+
240
+ //**************************************
241
+ // Macros
242
+ //**************************************
243
+ #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
244
+ #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
245
+ #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
246
+ #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+l; LZ4_WILDCOPY(s,d,e); d=e; }
247
+
248
+
249
+ //****************************
250
+ // Private functions
251
+ //****************************
252
+ #if LZ4_ARCH64
253
+
254
+ static inline int LZ4_NbCommonBytes (register U64 val)
255
+ {
256
+ #if defined(LZ4_BIG_ENDIAN)
257
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
258
+ unsigned long r = 0;
259
+ _BitScanReverse64( &r, val );
260
+ return (int)(r>>3);
261
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
262
+ return (__builtin_clzll(val) >> 3);
263
+ #else
264
+ int r;
265
+ if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
266
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
267
+ r += (!val);
268
+ return r;
269
+ #endif
270
+ #else
271
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
272
+ unsigned long r = 0;
273
+ _BitScanForward64( &r, val );
274
+ return (int)(r>>3);
275
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
276
+ return (__builtin_ctzll(val) >> 3);
277
+ #else
278
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
279
+ return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
280
+ #endif
281
+ #endif
282
+ }
283
+
284
+ #else
285
+
286
+ static inline int LZ4_NbCommonBytes (register U32 val)
287
+ {
288
+ #if defined(LZ4_BIG_ENDIAN)
289
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
290
+ unsigned long r = 0;
291
+ _BitScanReverse( &r, val );
292
+ return (int)(r>>3);
293
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
294
+ return (__builtin_clz(val) >> 3);
295
+ #else
296
+ int r;
297
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
298
+ r += (!val);
299
+ return r;
300
+ #endif
301
+ #else
302
+ #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
303
+ unsigned long r = 0;
304
+ _BitScanForward( &r, val );
305
+ return (int)(r>>3);
306
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
307
+ return (__builtin_ctz(val) >> 3);
308
+ #else
309
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
310
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
311
+ #endif
312
+ #endif
313
+ }
314
+
315
+ #endif
316
+
317
+
318
+
319
+ //******************************
320
+ // Compression functions
321
+ //******************************
322
+
323
+ // LZ4_compressCtx :
324
+ // -----------------
325
+ // Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
326
+ // If it cannot achieve it, compression will stop, and result of the function will be zero.
327
+ // return : the number of bytes written in buffer 'dest', or 0 if the compression fails
328
+
329
+ static inline int LZ4_compressCtx(void** ctx,
330
+ const char* source,
331
+ char* dest,
332
+ int isize,
333
+ int maxOutputSize)
334
+ {
335
+ #if HEAPMODE
336
+ struct refTables *srt = (struct refTables *) (*ctx);
337
+ HTYPE* HashTable;
338
+ #else
339
+ HTYPE HashTable[HASHTABLESIZE] = {0};
340
+ #endif
341
+
342
+ const BYTE* ip = (BYTE*) source;
343
+ INITBASE(base);
344
+ const BYTE* anchor = ip;
345
+ const BYTE* const iend = ip + isize;
346
+ const BYTE* const mflimit = iend - MFLIMIT;
347
+ #define matchlimit (iend - LASTLITERALS)
348
+
349
+ BYTE* op = (BYTE*) dest;
350
+ BYTE* const oend = op + maxOutputSize;
351
+
352
+ int len, length;
353
+ const int skipStrength = SKIPSTRENGTH;
354
+ U32 forwardH;
355
+
356
+
357
+ // Init
358
+ if (isize<MINLENGTH) goto _last_literals;
359
+ #if HEAPMODE
360
+ if (*ctx == NULL)
361
+ {
362
+ srt = (struct refTables *) malloc ( sizeof(struct refTables) );
363
+ *ctx = (void*) srt;
364
+ }
365
+ HashTable = (HTYPE*)(srt->hashTable);
366
+ memset((void*)HashTable, 0, sizeof(srt->hashTable));
367
+ #else
368
+ (void) ctx;
369
+ #endif
370
+
371
+
372
+ // First Byte
373
+ HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
374
+ ip++; forwardH = LZ4_HASH_VALUE(ip);
375
+
376
+ // Main Loop
377
+ for ( ; ; )
378
+ {
379
+ int findMatchAttempts = (1U << skipStrength) + 3;
380
+ const BYTE* forwardIp = ip;
381
+ const BYTE* ref;
382
+ BYTE* token;
383
+
384
+ // Find a match
385
+ do {
386
+ U32 h = forwardH;
387
+ int step = findMatchAttempts++ >> skipStrength;
388
+ ip = forwardIp;
389
+ forwardIp = ip + step;
390
+
391
+ if unlikely(forwardIp > mflimit) { goto _last_literals; }
392
+
393
+ forwardH = LZ4_HASH_VALUE(forwardIp);
394
+ ref = base + HashTable[h];
395
+ HashTable[h] = ip - base;
396
+
397
+ } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
398
+
399
+ // Catch up
400
+ while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
401
+
402
+ // Encode Literal length
403
+ length = (int)(ip - anchor);
404
+ token = op++;
405
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
406
+ #ifdef _MSC_VER
407
+ if (length>=(int)RUN_MASK)
408
+ {
409
+ int len = length-RUN_MASK;
410
+ *token=(RUN_MASK<<ML_BITS);
411
+ if (len>254)
412
+ {
413
+ do { *op++ = 255; len -= 255; } while (len>254);
414
+ *op++ = (BYTE)len;
415
+ memcpy(op, anchor, length);
416
+ op += length;
417
+ goto _next_match;
418
+ }
419
+ else
420
+ *op++ = (BYTE)len;
421
+ }
422
+ else *token = (length<<ML_BITS);
423
+ #else
424
+ if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
425
+ else *token = (length<<ML_BITS);
426
+ #endif
427
+
428
+ // Copy Literals
429
+ LZ4_BLINDCOPY(anchor, op, length);
430
+
431
+ _next_match:
432
+ // Encode Offset
433
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
434
+
435
+ // Start Counting
436
+ ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
437
+ anchor = ip;
438
+ while likely(ip<matchlimit-(STEPSIZE-1))
439
+ {
440
+ UARCH diff = AARCH(ref) ^ AARCH(ip);
441
+ if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
442
+ ip += LZ4_NbCommonBytes(diff);
443
+ goto _endCount;
444
+ }
445
+ if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
446
+ if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
447
+ if ((ip<matchlimit) && (*ref == *ip)) ip++;
448
+ _endCount:
449
+
450
+ // Encode MatchLength
451
+ len = (int)(ip - anchor);
452
+ if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
453
+ else *token += len;
454
+
455
+ // Test end of chunk
456
+ if (ip > mflimit) { anchor = ip; break; }
457
+
458
+ // Fill table
459
+ HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
460
+
461
+ // Test next position
462
+ ref = base + HashTable[LZ4_HASH_VALUE(ip)];
463
+ HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
464
+ if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
465
+
466
+ // Prepare next loop
467
+ anchor = ip++;
468
+ forwardH = LZ4_HASH_VALUE(ip);
469
+ }
470
+
471
+ _last_literals:
472
+ // Encode Last Literals
473
+ {
474
+ int lastRun = (int)(iend - anchor);
475
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
476
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
477
+ else *op++ = (lastRun<<ML_BITS);
478
+ memcpy(op, anchor, iend - anchor);
479
+ op += iend-anchor;
480
+ }
481
+
482
+ // End
483
+ return (int) (((char*)op)-dest);
484
+ }
485
+
486
+
487
+
488
+ // Note : this function is valid only if isize < LZ4_64KLIMIT
489
+ #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
490
+ #define HASHLOG64K (HASH_LOG+1)
491
+ #define HASH64KTABLESIZE (1U<<HASHLOG64K)
492
+ #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
493
+ #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
494
+ static inline int LZ4_compress64kCtx(void** ctx,
495
+ const char* source,
496
+ char* dest,
497
+ int isize,
498
+ int maxOutputSize)
499
+ {
500
+ #if HEAPMODE
501
+ struct refTables *srt = (struct refTables *) (*ctx);
502
+ U16* HashTable;
503
+ #else
504
+ U16 HashTable[HASH64KTABLESIZE] = {0};
505
+ #endif
506
+
507
+ const BYTE* ip = (BYTE*) source;
508
+ const BYTE* anchor = ip;
509
+ const BYTE* const base = ip;
510
+ const BYTE* const iend = ip + isize;
511
+ const BYTE* const mflimit = iend - MFLIMIT;
512
+ #define matchlimit (iend - LASTLITERALS)
513
+
514
+ BYTE* op = (BYTE*) dest;
515
+ BYTE* const oend = op + maxOutputSize;
516
+
517
+ int len, length;
518
+ const int skipStrength = SKIPSTRENGTH;
519
+ U32 forwardH;
520
+
521
+
522
+ // Init
523
+ if (isize<MINLENGTH) goto _last_literals;
524
+ #if HEAPMODE
525
+ if (*ctx == NULL)
526
+ {
527
+ srt = (struct refTables *) malloc ( sizeof(struct refTables) );
528
+ *ctx = (void*) srt;
529
+ }
530
+ HashTable = (U16*)(srt->hashTable);
531
+ memset((void*)HashTable, 0, sizeof(srt->hashTable));
532
+ #else
533
+ (void) ctx;
534
+ #endif
535
+
536
+
537
+ // First Byte
538
+ ip++; forwardH = LZ4_HASH64K_VALUE(ip);
539
+
540
+ // Main Loop
541
+ for ( ; ; )
542
+ {
543
+ int findMatchAttempts = (1U << skipStrength) + 3;
544
+ const BYTE* forwardIp = ip;
545
+ const BYTE* ref;
546
+ BYTE* token;
547
+
548
+ // Find a match
549
+ do {
550
+ U32 h = forwardH;
551
+ int step = findMatchAttempts++ >> skipStrength;
552
+ ip = forwardIp;
553
+ forwardIp = ip + step;
554
+
555
+ if (forwardIp > mflimit) { goto _last_literals; }
556
+
557
+ forwardH = LZ4_HASH64K_VALUE(forwardIp);
558
+ ref = base + HashTable[h];
559
+ HashTable[h] = (U16)(ip - base);
560
+
561
+ } while (A32(ref) != A32(ip));
562
+
563
+ // Catch up
564
+ while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
565
+
566
+ // Encode Literal length
567
+ length = (int)(ip - anchor);
568
+ token = op++;
569
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
570
+ #ifdef _MSC_VER
571
+ if (length>=(int)RUN_MASK)
572
+ {
573
+ int len = length-RUN_MASK;
574
+ *token=(RUN_MASK<<ML_BITS);
575
+ if (len>254)
576
+ {
577
+ do { *op++ = 255; len -= 255; } while (len>254);
578
+ *op++ = (BYTE)len;
579
+ memcpy(op, anchor, length);
580
+ op += length;
581
+ goto _next_match;
582
+ }
583
+ else
584
+ *op++ = (BYTE)len;
585
+ }
586
+ else *token = (length<<ML_BITS);
587
+ #else
588
+ if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
589
+ else *token = (length<<ML_BITS);
590
+ #endif
591
+
592
+ // Copy Literals
593
+ LZ4_BLINDCOPY(anchor, op, length);
594
+
595
+ _next_match:
596
+ // Encode Offset
597
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
598
+
599
+ // Start Counting
600
+ ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
601
+ anchor = ip;
602
+ while (ip<matchlimit-(STEPSIZE-1))
603
+ {
604
+ UARCH diff = AARCH(ref) ^ AARCH(ip);
605
+ if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
606
+ ip += LZ4_NbCommonBytes(diff);
607
+ goto _endCount;
608
+ }
609
+ if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
610
+ if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
611
+ if ((ip<matchlimit) && (*ref == *ip)) ip++;
612
+ _endCount:
613
+
614
+ // Encode MatchLength
615
+ len = (int)(ip - anchor);
616
+ if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
617
+ else *token += len;
618
+
619
+ // Test end of chunk
620
+ if (ip > mflimit) { anchor = ip; break; }
621
+
622
+ // Fill table
623
+ HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
624
+
625
+ // Test next position
626
+ ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
627
+ HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
628
+ if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
629
+
630
+ // Prepare next loop
631
+ anchor = ip++;
632
+ forwardH = LZ4_HASH64K_VALUE(ip);
633
+ }
634
+
635
+ _last_literals:
636
+ // Encode Last Literals
637
+ {
638
+ int lastRun = (int)(iend - anchor);
639
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
640
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
641
+ else *op++ = (lastRun<<ML_BITS);
642
+ memcpy(op, anchor, iend - anchor);
643
+ op += iend-anchor;
644
+ }
645
+
646
+ // End
647
+ return (int) (((char*)op)-dest);
648
+ }
649
+
650
+
651
+ int LZ4_compress_limitedOutput(const char* source,
652
+ char* dest,
653
+ int isize,
654
+ int maxOutputSize)
655
+ {
656
+ #if HEAPMODE
657
+ void* ctx = malloc(sizeof(struct refTables));
658
+ int result;
659
+ if (isize < LZ4_64KLIMIT)
660
+ result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
661
+ else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
662
+ free(ctx);
663
+ return result;
664
+ #else
665
+ if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
666
+ return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
667
+ #endif
668
+ }
669
+
670
+
671
+ int LZ4_compress(const char* source,
672
+ char* dest,
673
+ int isize)
674
+ {
675
+ return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
676
+ }
677
+
678
+
679
+
680
+
681
+ //****************************
682
+ // Decompression functions
683
+ //****************************
684
+
685
+ // Note : The decoding functions LZ4_uncompress() and LZ4_uncompress_unknownOutputSize()
686
+ // are safe against "buffer overflow" attack type.
687
+ // They will never write nor read outside of the provided output buffers.
688
+ // LZ4_uncompress_unknownOutputSize() also insures that it will never read outside of the input buffer.
689
+ // A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream.
690
+
691
+ int LZ4_uncompress(const char* source,
692
+ char* dest,
693
+ int osize)
694
+ {
695
+ // Local Variables
696
+ const BYTE* restrict ip = (const BYTE*) source;
697
+ const BYTE* restrict ref;
698
+
699
+ BYTE* restrict op = (BYTE*) dest;
700
+ BYTE* const oend = op + osize;
701
+ BYTE* cpy;
702
+
703
+ BYTE token;
704
+
705
+ int len, length;
706
+ size_t dec[] ={0, 3, 2, 3, 0, 0, 0, 0};
707
+
708
+
709
+ // Main Loop
710
+ while (1)
711
+ {
712
+ // get runlength
713
+ token = *ip++;
714
+ if ((length=(token>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
715
+
716
+ // copy literals
717
+ cpy = op+length;
718
+ if unlikely(cpy>oend-COPYLENGTH)
719
+ {
720
+ if (cpy > oend) goto _output_error; // Error : request to write beyond destination buffer
721
+ memcpy(op, ip, length);
722
+ ip += length;
723
+ break; // Necessarily EOF
724
+ }
725
+ LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
726
+
727
+ // get offset
728
+ LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
729
+ if (ref < (BYTE* const)dest) goto _output_error; // Error : offset create reference outside destination buffer
730
+
731
+ // get matchlength
732
+ if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
733
+
734
+ // copy repeated sequence
735
+ if unlikely(op-ref<STEPSIZE)
736
+ {
737
+ #if LZ4_ARCH64
738
+ size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
739
+ size_t dec2 = dec2table[op-ref];
740
+ #else
741
+ const int dec2 = 0;
742
+ #endif
743
+ *op++ = *ref++;
744
+ *op++ = *ref++;
745
+ *op++ = *ref++;
746
+ *op++ = *ref++;
747
+ ref -= dec[op-ref];
748
+ A32(op)=A32(ref); op += STEPSIZE-4;
749
+ ref -= dec2;
750
+ } else { LZ4_COPYSTEP(ref,op); }
751
+ cpy = op + length - (STEPSIZE-4);
752
+ if (cpy>oend-COPYLENGTH)
753
+ {
754
+ if (cpy > oend) goto _output_error; // Error : request to write beyond destination buffer
755
+ LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
756
+ while(op<cpy) *op++=*ref++;
757
+ op=cpy;
758
+ if (op == oend) break; // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
759
+ continue;
760
+ }
761
+ LZ4_SECURECOPY(ref, op, cpy);
762
+ op=cpy; // correction
763
+ }
764
+
765
+ // end of decoding
766
+ return (int) (((char*)ip)-source);
767
+
768
+ // write overflow error detected
769
+ _output_error:
770
+ return (int) (-(((char*)ip)-source));
771
+ }
772
+
773
+
774
+ int LZ4_uncompress_unknownOutputSize(
775
+ const char* source,
776
+ char* dest,
777
+ int isize,
778
+ int maxOutputSize)
779
+ {
780
+ // Local Variables
781
+ const BYTE* restrict ip = (const BYTE*) source;
782
+ const BYTE* const iend = ip + isize;
783
+ const BYTE* restrict ref;
784
+
785
+ BYTE* restrict op = (BYTE*) dest;
786
+ BYTE* const oend = op + maxOutputSize;
787
+ BYTE* cpy;
788
+
789
+ size_t dec[] ={0, 3, 2, 3, 0, 0, 0, 0};
790
+
791
+
792
+ // Main Loop
793
+ while (ip<iend)
794
+ {
795
+ BYTE token;
796
+ int length;
797
+
798
+ // get runlength
799
+ token = *ip++;
800
+ if ((length=(token>>ML_BITS)) == RUN_MASK) { int s=255; while ((ip<iend) && (s==255)) { s=*ip++; length += s; } }
801
+
802
+ // copy literals
803
+ cpy = op+length;
804
+ if ((cpy>oend-COPYLENGTH) || (ip+length>iend-COPYLENGTH))
805
+ {
806
+ if (cpy > oend) goto _output_error; // Error : request to write beyond destination buffer
807
+ if (ip+length > iend) goto _output_error; // Error : request to read beyond source buffer
808
+ memcpy(op, ip, length);
809
+ op += length;
810
+ ip += length;
811
+ if (ip<iend) goto _output_error; // Error : LZ4 format violation
812
+ break; // Necessarily EOF, due to parsing restrictions
813
+ }
814
+ LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
815
+
816
+ // get offset
817
+ LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
818
+ if (ref < (BYTE* const)dest) goto _output_error; // Error : offset creates reference outside of destination buffer
819
+
820
+ // get matchlength
821
+ if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
822
+
823
+ // copy repeated sequence
824
+ if unlikely(op-ref<STEPSIZE)
825
+ {
826
+ #if LZ4_ARCH64
827
+ size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
828
+ size_t dec2 = dec2table[op-ref];
829
+ #else
830
+ const int dec2 = 0;
831
+ #endif
832
+ *op++ = *ref++;
833
+ *op++ = *ref++;
834
+ *op++ = *ref++;
835
+ *op++ = *ref++;
836
+ ref -= dec[op-ref];
837
+ A32(op)=A32(ref); op += STEPSIZE-4;
838
+ ref -= dec2;
839
+ } else { LZ4_COPYSTEP(ref,op); }
840
+ cpy = op + length - (STEPSIZE-4);
841
+ if (cpy>oend-COPYLENGTH)
842
+ {
843
+ if (cpy > oend) goto _output_error; // Error : request to write outside of destination buffer
844
+ LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
845
+ while(op<cpy) *op++=*ref++;
846
+ op=cpy;
847
+ if (op == oend) break; // Check EOF (should never happen, since last 5 bytes are supposed to be literals)
848
+ continue;
849
+ }
850
+ LZ4_SECURECOPY(ref, op, cpy);
851
+ op=cpy; // correction
852
+ }
853
+
854
+ // end of decoding
855
+ return (int) (((char*)op)-dest);
856
+
857
+ // write overflow error detected
858
+ _output_error:
859
+ return (int) (-(((char*)ip)-source));
860
+ }
861
+