lz4-ruby 0.2.0-x86-mingw32 → 0.3.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
data/ext/lz4ruby/lz4hc.c CHANGED
@@ -1,671 +1,890 @@
1
- /*
2
- LZ4 HC - High Compression Mode of LZ4
3
- Copyright (C) 2011-2012, Yann Collet.
4
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
-
6
- Redistribution and use in source and binary forms, with or without
7
- modification, are permitted provided that the following conditions are
8
- met:
9
-
10
- * Redistributions of source code must retain the above copyright
11
- notice, this list of conditions and the following disclaimer.
12
- * Redistributions in binary form must reproduce the above
13
- copyright notice, this list of conditions and the following disclaimer
14
- in the documentation and/or other materials provided with the
15
- distribution.
16
-
17
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
-
29
- You can contact the author at :
30
- - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31
- - LZ4 source repository : http://code.google.com/p/lz4/
32
- */
33
-
34
-
35
- //**************************************
36
- // CPU Feature Detection
37
- //**************************************
38
- // 32 or 64 bits ?
39
- #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode
40
- #define LZ4_ARCH64 1
41
- #else
42
- #define LZ4_ARCH64 0
43
- #endif
44
-
45
- // Little Endian or Big Endian ?
46
- #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
47
- #define LZ4_BIG_ENDIAN 1
48
- #else
49
- // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
50
- #endif
51
-
52
- // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
53
- // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
54
- // If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance
55
- #if defined(__ARM_FEATURE_UNALIGNED)
56
- #define LZ4_FORCE_UNALIGNED_ACCESS 1
57
- #endif
58
-
59
-
60
- //**************************************
61
- // Compiler Options
62
- //**************************************
63
- #if __STDC_VERSION__ >= 199901L // C99
64
- /* "restrict" is a known keyword */
65
- #else
66
- #define restrict // Disable restrict
67
- #endif
68
-
69
- #ifdef _MSC_VER
70
- #define inline __forceinline // Visual is not C99, but supports some kind of inline
71
- #include <intrin.h> // For Visual 2005
72
- # if LZ4_ARCH64 // 64-bit
73
- # pragma intrinsic(_BitScanForward64) // For Visual 2005
74
- # pragma intrinsic(_BitScanReverse64) // For Visual 2005
75
- # else
76
- # pragma intrinsic(_BitScanForward) // For Visual 2005
77
- # pragma intrinsic(_BitScanReverse) // For Visual 2005
78
- # endif
79
- #endif
80
-
81
- #ifdef _MSC_VER // Visual Studio
82
- #define lz4_bswap16(x) _byteswap_ushort(x)
83
- #else
84
- #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
85
- #endif
86
-
87
-
88
- //**************************************
89
- // Includes
90
- //**************************************
91
- #include <stdlib.h> // calloc, free
92
- #include <string.h> // memset, memcpy
93
- #include "lz4hc.h"
94
-
95
- #define ALLOCATOR(s) calloc(1,s)
96
- #define FREEMEM free
97
- #define MEM_INIT memset
98
-
99
-
100
- //**************************************
101
- // Basic Types
102
- //**************************************
103
- #if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
104
- #define BYTE unsigned __int8
105
- #define U16 unsigned __int16
106
- #define U32 unsigned __int32
107
- #define S32 __int32
108
- #define U64 unsigned __int64
109
- #else
110
- #include <stdint.h>
111
- #define BYTE uint8_t
112
- #define U16 uint16_t
113
- #define U32 uint32_t
114
- #define S32 int32_t
115
- #define U64 uint64_t
116
- #endif
117
-
118
- #ifndef LZ4_FORCE_UNALIGNED_ACCESS
119
- #pragma pack(push, 1)
120
- #endif
121
-
122
- typedef struct _U16_S { U16 v; } U16_S;
123
- typedef struct _U32_S { U32 v; } U32_S;
124
- typedef struct _U64_S { U64 v; } U64_S;
125
-
126
- #ifndef LZ4_FORCE_UNALIGNED_ACCESS
127
- #pragma pack(pop)
128
- #endif
129
-
130
- #define A64(x) (((U64_S *)(x))->v)
131
- #define A32(x) (((U32_S *)(x))->v)
132
- #define A16(x) (((U16_S *)(x))->v)
133
-
134
-
135
- //**************************************
136
- // Constants
137
- //**************************************
138
- #define MINMATCH 4
139
-
140
- #define DICTIONARY_LOGSIZE 16
141
- #define MAXD (1<<DICTIONARY_LOGSIZE)
142
- #define MAXD_MASK ((U32)(MAXD - 1))
143
- #define MAX_DISTANCE (MAXD - 1)
144
-
145
- #define HASH_LOG (DICTIONARY_LOGSIZE-1)
146
- #define HASHTABLESIZE (1 << HASH_LOG)
147
- #define HASH_MASK (HASHTABLESIZE - 1)
148
-
149
- #define MAX_NB_ATTEMPTS 256
150
-
151
- #define ML_BITS 4
152
- #define ML_MASK (size_t)((1U<<ML_BITS)-1)
153
- #define RUN_BITS (8-ML_BITS)
154
- #define RUN_MASK ((1U<<RUN_BITS)-1)
155
-
156
- #define COPYLENGTH 8
157
- #define LASTLITERALS 5
158
- #define MFLIMIT (COPYLENGTH+MINMATCH)
159
- #define MINLENGTH (MFLIMIT+1)
160
- #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
161
-
162
-
163
- //**************************************
164
- // Architecture-specific macros
165
- //**************************************
166
- #if LZ4_ARCH64 // 64-bit
167
- #define STEPSIZE 8
168
- #define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
169
- #define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
170
- #define UARCH U64
171
- #define AARCH A64
172
- #define HTYPE U32
173
- #define INITBASE(b,s) const BYTE* const b = s
174
- #else // 32-bit
175
- #define STEPSIZE 4
176
- #define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
177
- #define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
178
- #define UARCH U32
179
- #define AARCH A32
180
- #define HTYPE const BYTE*
181
- #define INITBASE(b,s) const int b = 0
182
- #endif
183
-
184
- #if defined(LZ4_BIG_ENDIAN)
185
- #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
186
- #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
187
- #else // Little Endian
188
- #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
189
- #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
190
- #endif
191
-
192
-
193
- //************************************************************
194
- // Local Types
195
- //************************************************************
196
- typedef struct
197
- {
198
- const BYTE* base;
199
- HTYPE hashTable[HASHTABLESIZE];
200
- U16 chainTable[MAXD];
201
- const BYTE* nextToUpdate;
202
- } LZ4HC_Data_Structure;
203
-
204
-
205
- //**************************************
206
- // Macros
207
- //**************************************
208
- #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
209
- #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
210
- #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
211
- #define HASH_VALUE(p) HASH_FUNCTION(*(U32*)(p))
212
- #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
213
- #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
214
- #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
215
- #define ADD_HASH(p) { size_t delta = (p) - HASH_POINTER(p); if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; DELTANEXT(p) = (U16)delta; HashTable[HASH_VALUE(p)] = (p) - base; }
216
-
217
-
218
- //**************************************
219
- // Private functions
220
- //**************************************
221
- #if LZ4_ARCH64
222
-
223
- inline static int LZ4_NbCommonBytes (register U64 val)
224
- {
225
- #if defined(LZ4_BIG_ENDIAN)
226
- #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
227
- unsigned long r = 0;
228
- _BitScanReverse64( &r, val );
229
- return (int)(r>>3);
230
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
231
- return (__builtin_clzll(val) >> 3);
232
- #else
233
- int r;
234
- if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
235
- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
236
- r += (!val);
237
- return r;
238
- #endif
239
- #else
240
- #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
241
- unsigned long r = 0;
242
- _BitScanForward64( &r, val );
243
- return (int)(r>>3);
244
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
245
- return (__builtin_ctzll(val) >> 3);
246
- #else
247
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
248
- return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
249
- #endif
250
- #endif
251
- }
252
-
253
- #else
254
-
255
- inline static int LZ4_NbCommonBytes (register U32 val)
256
- {
257
- #if defined(LZ4_BIG_ENDIAN)
258
- #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
259
- unsigned long r = 0;
260
- _BitScanReverse( &r, val );
261
- return (int)(r>>3);
262
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
263
- return (__builtin_clz(val) >> 3);
264
- #else
265
- int r;
266
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
267
- r += (!val);
268
- return r;
269
- #endif
270
- #else
271
- #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
272
- unsigned long r = 0;
273
- _BitScanForward( &r, val );
274
- return (int)(r>>3);
275
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
276
- return (__builtin_ctz(val) >> 3);
277
- #else
278
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
279
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
280
- #endif
281
- #endif
282
- }
283
-
284
- #endif
285
-
286
-
287
- inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)
288
- {
289
- MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
290
- MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
291
- hc4->nextToUpdate = base + LZ4_ARCH64;
292
- hc4->base = base;
293
- return 1;
294
- }
295
-
296
-
297
- inline static void* LZ4HC_Create (const BYTE* base)
298
- {
299
- void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
300
-
301
- LZ4HC_Init (hc4, base);
302
- return hc4;
303
- }
304
-
305
-
306
- inline static int LZ4HC_Free (void** LZ4HC_Data)
307
- {
308
- FREEMEM(*LZ4HC_Data);
309
- *LZ4HC_Data = NULL;
310
- return (1);
311
- }
312
-
313
-
314
- inline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
315
- {
316
- U16* chainTable = hc4->chainTable;
317
- HTYPE* HashTable = hc4->hashTable;
318
- INITBASE(base,hc4->base);
319
-
320
- while(hc4->nextToUpdate < ip)
321
- {
322
- ADD_HASH(hc4->nextToUpdate);
323
- hc4->nextToUpdate++;
324
- }
325
- }
326
-
327
-
328
- inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
329
- {
330
- U16* const chainTable = hc4->chainTable;
331
- HTYPE* const HashTable = hc4->hashTable;
332
- const BYTE* ref;
333
- INITBASE(base,hc4->base);
334
- int nbAttempts=MAX_NB_ATTEMPTS;
335
- int ml=0;
336
-
337
- // HC4 match finder
338
- LZ4HC_Insert(hc4, ip);
339
- ref = HASH_POINTER(ip);
340
- while ((ref > (ip-MAX_DISTANCE)) && (nbAttempts))
341
- {
342
- nbAttempts--;
343
- if (*(ref+ml) == *(ip+ml))
344
- if (*(U32*)ref == *(U32*)ip)
345
- {
346
- const BYTE* reft = ref+MINMATCH;
347
- const BYTE* ipt = ip+MINMATCH;
348
-
349
- while (ipt<matchlimit-(STEPSIZE-1))
350
- {
351
- UARCH diff = AARCH(reft) ^ AARCH(ipt);
352
- if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
353
- ipt += LZ4_NbCommonBytes(diff);
354
- goto _endCount;
355
- }
356
- if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
357
- if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
358
- if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
359
- _endCount:
360
-
361
- if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
362
- }
363
- ref = GETNEXT(ref);
364
- }
365
-
366
- return ml;
367
- }
368
-
369
-
370
- inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
371
- {
372
- U16* const chainTable = hc4->chainTable;
373
- HTYPE* const HashTable = hc4->hashTable;
374
- INITBASE(base,hc4->base);
375
- const BYTE* ref;
376
- int nbAttempts = MAX_NB_ATTEMPTS;
377
- int delta = (int)(ip-startLimit);
378
-
379
- // First Match
380
- LZ4HC_Insert(hc4, ip);
381
- ref = HASH_POINTER(ip);
382
-
383
- while ((ref > ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts))
384
- {
385
- nbAttempts--;
386
- if (*(startLimit + longest) == *(ref - delta + longest))
387
- if (*(U32*)ref == *(U32*)ip)
388
- {
389
- const BYTE* reft = ref+MINMATCH;
390
- const BYTE* ipt = ip+MINMATCH;
391
- const BYTE* startt = ip;
392
-
393
- while (ipt<matchlimit-(STEPSIZE-1))
394
- {
395
- UARCH diff = AARCH(reft) ^ AARCH(ipt);
396
- if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
397
- ipt += LZ4_NbCommonBytes(diff);
398
- goto _endCount;
399
- }
400
- if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
401
- if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
402
- if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
403
- _endCount:
404
-
405
- reft = ref;
406
- while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}
407
-
408
- if ((ipt-startt) > longest)
409
- {
410
- longest = (int)(ipt-startt);
411
- *matchpos = reft;
412
- *startpos = startt;
413
- }
414
- }
415
- ref = GETNEXT(ref);
416
- }
417
-
418
- return longest;
419
- }
420
-
421
-
422
- inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
423
- {
424
- int length, len;
425
- BYTE* token;
426
-
427
- // Encode Literal length
428
- length = (int)(*ip - *anchor);
429
- token = (*op)++;
430
- if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
431
- else *token = (length<<ML_BITS);
432
-
433
- // Copy Literals
434
- LZ4_BLINDCOPY(*anchor, *op, length);
435
-
436
- // Encode Offset
437
- LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
438
-
439
- // Encode MatchLength
440
- len = (int)(ml-MINMATCH);
441
- if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; }
442
- else *token += len;
443
-
444
- // Prepare next loop
445
- *ip += ml;
446
- *anchor = *ip;
447
-
448
- return 0;
449
- }
450
-
451
-
452
- //****************************
453
- // Compression CODE
454
- //****************************
455
-
456
- int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
457
- const char* source,
458
- char* dest,
459
- int isize)
460
- {
461
- const BYTE* ip = (const BYTE*) source;
462
- const BYTE* anchor = ip;
463
- const BYTE* const iend = ip + isize;
464
- const BYTE* const mflimit = iend - MFLIMIT;
465
- const BYTE* const matchlimit = (iend - LASTLITERALS);
466
-
467
- BYTE* op = (BYTE*) dest;
468
-
469
- int ml, ml2, ml3, ml0;
470
- const BYTE* ref=NULL;
471
- const BYTE* start2=NULL;
472
- const BYTE* ref2=NULL;
473
- const BYTE* start3=NULL;
474
- const BYTE* ref3=NULL;
475
- const BYTE* start0;
476
- const BYTE* ref0;
477
-
478
- ip++;
479
-
480
- // Main Loop
481
- while (ip < mflimit)
482
- {
483
- ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
484
- if (!ml) { ip++; continue; }
485
-
486
- // saved, in case we would skip too much
487
- start0 = ip;
488
- ref0 = ref;
489
- ml0 = ml;
490
-
491
- _Search2:
492
- if (ip+ml < mflimit)
493
- ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
494
- else ml2=ml;
495
-
496
- if (ml2 == ml) // No better match
497
- {
498
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
499
- continue;
500
- }
501
-
502
- if (start0 < ip)
503
- {
504
- if (start2 < ip + ml0) // empirical
505
- {
506
- ip = start0;
507
- ref = ref0;
508
- ml = ml0;
509
- }
510
- }
511
-
512
- // Here, start0==ip
513
- if ((start2 - ip) < 3) // First Match too small : removed
514
- {
515
- ml = ml2;
516
- ip = start2;
517
- ref =ref2;
518
- goto _Search2;
519
- }
520
-
521
- _Search3:
522
- // Currently we have :
523
- // ml2 > ml1, and
524
- // ip1+3 <= ip2 (usually < ip1+ml1)
525
- if ((start2 - ip) < OPTIMAL_ML)
526
- {
527
- int correction;
528
- int new_ml = ml;
529
- if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
530
- if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
531
- correction = new_ml - (int)(start2 - ip);
532
- if (correction > 0)
533
- {
534
- start2 += correction;
535
- ref2 += correction;
536
- ml2 -= correction;
537
- }
538
- }
539
- // Now, we have start2 = ip+new_ml, with new_ml=min(ml, OPTIMAL_ML=18)
540
-
541
- if (start2 + ml2 < mflimit)
542
- ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
543
- else ml3=ml2;
544
-
545
- if (ml3 == ml2) // No better match : 2 sequences to encode
546
- {
547
- // ip & ref are known; Now for ml
548
- if (start2 < ip+ml)
549
- {
550
- if ((start2 - ip) < OPTIMAL_ML)
551
- {
552
- int correction;
553
- if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
554
- if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
555
- correction = ml - (int)(start2 - ip);
556
- if (correction > 0)
557
- {
558
- start2 += correction;
559
- ref2 += correction;
560
- ml2 -= correction;
561
- }
562
- }
563
- else
564
- {
565
- ml = (int)(start2 - ip);
566
- }
567
- }
568
- // Now, encode 2 sequences
569
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
570
- ip = start2;
571
- LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
572
- continue;
573
- }
574
-
575
- if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
576
- {
577
- if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
578
- {
579
- if (start2 < ip+ml)
580
- {
581
- int correction = (int)(ip+ml - start2);
582
- start2 += correction;
583
- ref2 += correction;
584
- ml2 -= correction;
585
- if (ml2 < MINMATCH)
586
- {
587
- start2 = start3;
588
- ref2 = ref3;
589
- ml2 = ml3;
590
- }
591
- }
592
-
593
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
594
- ip = start3;
595
- ref = ref3;
596
- ml = ml3;
597
-
598
- start0 = start2;
599
- ref0 = ref2;
600
- ml0 = ml2;
601
- goto _Search2;
602
- }
603
-
604
- start2 = start3;
605
- ref2 = ref3;
606
- ml2 = ml3;
607
- goto _Search3;
608
- }
609
-
610
- // OK, now we have 3 ascending matches; let's write at least the first one
611
- // ip & ref are known; Now for ml
612
- if (start2 < ip+ml)
613
- {
614
- if ((start2 - ip) < (int)ML_MASK)
615
- {
616
- int correction;
617
- if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
618
- if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
619
- correction = ml - (int)(start2 - ip);
620
- if (correction > 0)
621
- {
622
- start2 += correction;
623
- ref2 += correction;
624
- ml2 -= correction;
625
- }
626
- }
627
- else
628
- {
629
- ml = (int)(start2 - ip);
630
- }
631
- }
632
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
633
-
634
- ip = start2;
635
- ref = ref2;
636
- ml = ml2;
637
-
638
- start2 = start3;
639
- ref2 = ref3;
640
- ml2 = ml3;
641
-
642
- goto _Search3;
643
-
644
- }
645
-
646
- // Encode Last Literals
647
- {
648
- int lastRun = (int)(iend - anchor);
649
- if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
650
- else *op++ = (lastRun<<ML_BITS);
651
- memcpy(op, anchor, iend - anchor);
652
- op += iend-anchor;
653
- }
654
-
655
- // End
656
- return (int) (((char*)op)-dest);
657
- }
658
-
659
-
660
- int LZ4_compressHC(const char* source,
661
- char* dest,
662
- int isize)
663
- {
664
- void* ctx = LZ4HC_Create((const BYTE*)source);
665
- int result = LZ4_compressHCCtx(ctx, source, dest, isize);
666
- LZ4HC_Free (&ctx);
667
-
668
- return result;
669
- }
670
-
671
-
1
+ /*
2
+ LZ4 HC - High Compression Mode of LZ4
3
+ Copyright (C) 2011-2014, Yann Collet.
4
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are
8
+ met:
9
+
10
+ * Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the following disclaimer
14
+ in the documentation and/or other materials provided with the
15
+ distribution.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ You can contact the author at :
30
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31
+ - LZ4 source repository : http://code.google.com/p/lz4/
32
+ */
33
+
34
+
35
+
36
+ /**************************************
37
+ Tuning Parameter
38
+ **************************************/
39
+ #define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8
40
+
41
+
42
+ /**************************************
43
+ Memory routines
44
+ **************************************/
45
+ #include <stdlib.h> /* calloc, free */
46
+ #define ALLOCATOR(s) calloc(1,s)
47
+ #define FREEMEM free
48
+ #include <string.h> /* memset, memcpy */
49
+ #define MEM_INIT memset
50
+
51
+
52
+ /**************************************
53
+ CPU Feature Detection
54
+ **************************************/
55
+ /* 32 or 64 bits ? */
56
+ #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
57
+ || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
58
+ || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
59
+ || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */
60
+ # define LZ4_ARCH64 1
61
+ #else
62
+ # define LZ4_ARCH64 0
63
+ #endif
64
+
65
+ /*
66
+ * Little Endian or Big Endian ?
67
+ * Overwrite the #define below if you know your architecture endianess
68
+ */
69
+ #if defined (__GLIBC__)
70
+ # include <endian.h>
71
+ # if (__BYTE_ORDER == __BIG_ENDIAN)
72
+ # define LZ4_BIG_ENDIAN 1
73
+ # endif
74
+ #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
75
+ # define LZ4_BIG_ENDIAN 1
76
+ #elif defined(__sparc) || defined(__sparc__) \
77
+ || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
78
+ || defined(__hpux) || defined(__hppa) \
79
+ || defined(_MIPSEB) || defined(__s390__)
80
+ # define LZ4_BIG_ENDIAN 1
81
+ #else
82
+ /* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */
83
+ #endif
84
+
85
+ /*
86
+ * Unaligned memory access is automatically enabled for "common" CPU, such as x86.
87
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
88
+ * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
89
+ */
90
+ #if defined(__ARM_FEATURE_UNALIGNED)
91
+ # define LZ4_FORCE_UNALIGNED_ACCESS 1
92
+ #endif
93
+
94
+ /* Define this parameter if your target system or compiler does not support hardware bit count */
95
+ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */
96
+ # define LZ4_FORCE_SW_BITCOUNT
97
+ #endif
98
+
99
+
100
+ /**************************************
101
+ Compiler Options
102
+ **************************************/
103
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
104
+ /* "restrict" is a known keyword */
105
+ #else
106
+ # define restrict /* Disable restrict */
107
+ #endif
108
+
109
+ #ifdef _MSC_VER /* Visual Studio */
110
+ # define FORCE_INLINE static __forceinline
111
+ # include <intrin.h> /* For Visual 2005 */
112
+ # if LZ4_ARCH64 /* 64-bits */
113
+ # pragma intrinsic(_BitScanForward64) /* For Visual 2005 */
114
+ # pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */
115
+ # else /* 32-bits */
116
+ # pragma intrinsic(_BitScanForward) /* For Visual 2005 */
117
+ # pragma intrinsic(_BitScanReverse) /* For Visual 2005 */
118
+ # endif
119
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
120
+ # pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */
121
+ #else
122
+ # ifdef __GNUC__
123
+ # define FORCE_INLINE static inline __attribute__((always_inline))
124
+ # else
125
+ # define FORCE_INLINE static inline
126
+ # endif
127
+ #endif
128
+
129
+ #ifdef _MSC_VER /* Visual Studio */
130
+ # define lz4_bswap16(x) _byteswap_ushort(x)
131
+ #else
132
+ # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
133
+ #endif
134
+
135
+
136
+ /**************************************
137
+ Includes
138
+ **************************************/
139
+ #include "lz4hc.h"
140
+ #include "lz4.h"
141
+
142
+
143
+ /**************************************
144
+ Basic Types
145
+ **************************************/
146
+ #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
147
+ # include <stdint.h>
148
+ typedef uint8_t BYTE;
149
+ typedef uint16_t U16;
150
+ typedef uint32_t U32;
151
+ typedef int32_t S32;
152
+ typedef uint64_t U64;
153
+ #else
154
+ typedef unsigned char BYTE;
155
+ typedef unsigned short U16;
156
+ typedef unsigned int U32;
157
+ typedef signed int S32;
158
+ typedef unsigned long long U64;
159
+ #endif
160
+
161
+ #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
162
+ # define _PACKED __attribute__ ((packed))
163
+ #else
164
+ # define _PACKED
165
+ #endif
166
+
167
+ #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
168
+ # ifdef __IBMC__
169
+ # pragma pack(1)
170
+ # else
171
+ # pragma pack(push, 1)
172
+ # endif
173
+ #endif
174
+
175
+ typedef struct _U16_S { U16 v; } _PACKED U16_S;
176
+ typedef struct _U32_S { U32 v; } _PACKED U32_S;
177
+ typedef struct _U64_S { U64 v; } _PACKED U64_S;
178
+
179
+ #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
180
+ # pragma pack(pop)
181
+ #endif
182
+
183
+ #define A64(x) (((U64_S *)(x))->v)
184
+ #define A32(x) (((U32_S *)(x))->v)
185
+ #define A16(x) (((U16_S *)(x))->v)
186
+
187
+
188
+ /**************************************
189
+ Constants
190
+ **************************************/
191
+ #define MINMATCH 4
192
+
193
+ #define DICTIONARY_LOGSIZE 16
194
+ #define MAXD (1<<DICTIONARY_LOGSIZE)
195
+ #define MAXD_MASK ((U32)(MAXD - 1))
196
+ #define MAX_DISTANCE (MAXD - 1)
197
+
198
+ #define HASH_LOG (DICTIONARY_LOGSIZE-1)
199
+ #define HASHTABLESIZE (1 << HASH_LOG)
200
+ #define HASH_MASK (HASHTABLESIZE - 1)
201
+
202
+ #define ML_BITS 4
203
+ #define ML_MASK (size_t)((1U<<ML_BITS)-1)
204
+ #define RUN_BITS (8-ML_BITS)
205
+ #define RUN_MASK ((1U<<RUN_BITS)-1)
206
+
207
+ #define COPYLENGTH 8
208
+ #define LASTLITERALS 5
209
+ #define MFLIMIT (COPYLENGTH+MINMATCH)
210
+ #define MINLENGTH (MFLIMIT+1)
211
+ #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
212
+
213
+ #define KB *(1U<<10)
214
+ #define MB *(1U<<20)
215
+ #define GB *(1U<<30)
216
+
217
+
218
+ /**************************************
219
+ Architecture-specific macros
220
+ **************************************/
221
+ #if LZ4_ARCH64 /* 64-bit */
222
+ # define STEPSIZE 8
223
+ # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
224
+ # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
225
+ # define AARCH A64
226
+ # define HTYPE U32
227
+ # define INITBASE(b,s) const BYTE* const b = s
228
+ #else /* 32-bit */
229
+ # define STEPSIZE 4
230
+ # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
231
+ # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
232
+ # define AARCH A32
233
+ # define HTYPE U32
234
+ # define INITBASE(b,s) const BYTE* const b = s
235
+ #endif
236
+
237
+ #if defined(LZ4_BIG_ENDIAN)
238
+ # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
239
+ # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
240
+ #else /* Little Endian */
241
+ # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
242
+ # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
243
+ #endif
244
+
245
+
246
+ /**************************************
247
+ Local Types
248
+ **************************************/
249
+ typedef struct
250
+ {
251
+ const BYTE* inputBuffer;
252
+ const BYTE* base;
253
+ const BYTE* end;
254
+ HTYPE hashTable[HASHTABLESIZE];
255
+ U16 chainTable[MAXD];
256
+ const BYTE* nextToUpdate;
257
+ } LZ4HC_Data_Structure;
258
+
259
+
260
+ /**************************************
261
+ Macros
262
+ **************************************/
263
+ #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
264
+ #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
265
+ #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
266
+ #define HASH_VALUE(p) HASH_FUNCTION(A32(p))
267
+ #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
268
+ #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
269
+ #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
270
+
271
+
272
+ /**************************************
273
+ Private functions
274
+ **************************************/
275
+ #if LZ4_ARCH64
276
+
277
+ FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
278
+ {
279
+ #if defined(LZ4_BIG_ENDIAN)
280
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
281
+ unsigned long r = 0;
282
+ _BitScanReverse64( &r, val );
283
+ return (int)(r>>3);
284
+ # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
285
+ return (__builtin_clzll(val) >> 3);
286
+ # else
287
+ int r;
288
+ if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
289
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
290
+ r += (!val);
291
+ return r;
292
+ # endif
293
+ #else
294
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
295
+ unsigned long r = 0;
296
+ _BitScanForward64( &r, val );
297
+ return (int)(r>>3);
298
+ # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
299
+ return (__builtin_ctzll(val) >> 3);
300
+ # else
301
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
302
+ return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
303
+ # endif
304
+ #endif
305
+ }
306
+
307
+ #else
308
+
309
+ FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
310
+ {
311
+ #if defined(LZ4_BIG_ENDIAN)
312
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
313
+ unsigned long r;
314
+ _BitScanReverse( &r, val );
315
+ return (int)(r>>3);
316
+ # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
317
+ return (__builtin_clz(val) >> 3);
318
+ # else
319
+ int r;
320
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
321
+ r += (!val);
322
+ return r;
323
+ # endif
324
+ #else
325
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
326
+ unsigned long r;
327
+ _BitScanForward( &r, val );
328
+ return (int)(r>>3);
329
+ # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
330
+ return (__builtin_ctz(val) >> 3);
331
+ # else
332
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
333
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
334
+ # endif
335
+ #endif
336
+ }
337
+
338
+ #endif
339
+
340
+
341
+ int LZ4_sizeofStreamStateHC()
342
+ {
343
+ return sizeof(LZ4HC_Data_Structure);
344
+ }
345
+
346
+ FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
347
+ {
348
+ MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
349
+ MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
350
+ hc4->nextToUpdate = base + 1;
351
+ hc4->base = base;
352
+ hc4->inputBuffer = base;
353
+ hc4->end = base;
354
+ }
355
+
356
+ int LZ4_resetStreamStateHC(void* state, const char* inputBuffer)
357
+ {
358
+ if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */
359
+ LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
360
+ return 0;
361
+ }
362
+
363
+
364
+ void* LZ4_createHC (const char* inputBuffer)
365
+ {
366
+ void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
367
+ LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
368
+ return hc4;
369
+ }
370
+
371
+
372
+ int LZ4_freeHC (void* LZ4HC_Data)
373
+ {
374
+ FREEMEM(LZ4HC_Data);
375
+ return (0);
376
+ }
377
+
378
+
379
+ /* Update chains up to ip (excluded) */
380
+ FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
381
+ {
382
+ U16* chainTable = hc4->chainTable;
383
+ HTYPE* HashTable = hc4->hashTable;
384
+ INITBASE(base,hc4->base);
385
+
386
+ while(hc4->nextToUpdate < ip)
387
+ {
388
+ const BYTE* const p = hc4->nextToUpdate;
389
+ size_t delta = (p) - HASH_POINTER(p);
390
+ if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
391
+ DELTANEXT(p) = (U16)delta;
392
+ HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
393
+ hc4->nextToUpdate++;
394
+ }
395
+ }
396
+
397
+
398
+ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
399
+ {
400
+ LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
401
+ U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
402
+ distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */
403
+ LZ4HC_Insert(hc4, hc4->end - MINMATCH);
404
+ memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
405
+ hc4->nextToUpdate -= distance;
406
+ hc4->base -= distance;
407
+ if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */
408
+ {
409
+ int i;
410
+ hc4->base += 1 GB;
411
+ for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
412
+ }
413
+ hc4->end -= distance;
414
+ return (char*)(hc4->end);
415
+ }
416
+
417
+
418
+ FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
419
+ {
420
+ const BYTE* p1t = p1;
421
+
422
+ while (p1t<matchlimit-(STEPSIZE-1))
423
+ {
424
+ size_t diff = AARCH(p2) ^ AARCH(p1t);
425
+ if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
426
+ p1t += LZ4_NbCommonBytes(diff);
427
+ return (p1t - p1);
428
+ }
429
+ if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
430
+ if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
431
+ if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
432
+ return (p1t - p1);
433
+ }
434
+
435
+
436
+ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos, const int maxNbAttempts)
437
+ {
438
+ U16* const chainTable = hc4->chainTable;
439
+ HTYPE* const HashTable = hc4->hashTable;
440
+ const BYTE* ref;
441
+ INITBASE(base,hc4->base);
442
+ int nbAttempts=maxNbAttempts;
443
+ size_t repl=0, ml=0;
444
+ U16 delta=0; /* useless assignment, to remove an uninitialization warning */
445
+
446
+ /* HC4 match finder */
447
+ LZ4HC_Insert(hc4, ip);
448
+ ref = HASH_POINTER(ip);
449
+
450
+ #define REPEAT_OPTIMIZATION
451
+ #ifdef REPEAT_OPTIMIZATION
452
+ /* Detect repetitive sequences of length <= 4 */
453
+ if ((U32)(ip-ref) <= 4) /* potential repetition */
454
+ {
455
+ if (A32(ref) == A32(ip)) /* confirmed */
456
+ {
457
+ delta = (U16)(ip-ref);
458
+ repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
459
+ *matchpos = ref;
460
+ }
461
+ ref = GETNEXT(ref);
462
+ }
463
+ #endif
464
+
465
+ while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
466
+ {
467
+ nbAttempts--;
468
+ if (*(ref+ml) == *(ip+ml))
469
+ if (A32(ref) == A32(ip))
470
+ {
471
+ size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
472
+ if (mlt > ml) { ml = mlt; *matchpos = ref; }
473
+ }
474
+ ref = GETNEXT(ref);
475
+ }
476
+
477
+ #ifdef REPEAT_OPTIMIZATION
478
+ /* Complete table */
479
+ if (repl)
480
+ {
481
+ const BYTE* ptr = ip;
482
+ const BYTE* end;
483
+
484
+ end = ip + repl - (MINMATCH-1);
485
+ while(ptr < end-delta)
486
+ {
487
+ DELTANEXT(ptr) = delta; /* Pre-Load */
488
+ ptr++;
489
+ }
490
+ do
491
+ {
492
+ DELTANEXT(ptr) = delta;
493
+ HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */
494
+ ptr++;
495
+ } while(ptr < end);
496
+ hc4->nextToUpdate = end;
497
+ }
498
+ #endif
499
+
500
+ return (int)ml;
501
+ }
502
+
503
+
504
+ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts)
505
+ {
506
+ U16* const chainTable = hc4->chainTable;
507
+ HTYPE* const HashTable = hc4->hashTable;
508
+ INITBASE(base,hc4->base);
509
+ const BYTE* ref;
510
+ int nbAttempts = maxNbAttempts;
511
+ int delta = (int)(ip-startLimit);
512
+
513
+ /* First Match */
514
+ LZ4HC_Insert(hc4, ip);
515
+ ref = HASH_POINTER(ip);
516
+
517
+ while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
518
+ {
519
+ nbAttempts--;
520
+ if (*(startLimit + longest) == *(ref - delta + longest))
521
+ if (A32(ref) == A32(ip))
522
+ {
523
+ #if 1
524
+ const BYTE* reft = ref+MINMATCH;
525
+ const BYTE* ipt = ip+MINMATCH;
526
+ const BYTE* startt = ip;
527
+
528
+ while (ipt<matchlimit-(STEPSIZE-1))
529
+ {
530
+ size_t diff = AARCH(reft) ^ AARCH(ipt);
531
+ if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
532
+ ipt += LZ4_NbCommonBytes(diff);
533
+ goto _endCount;
534
+ }
535
+ if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
536
+ if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
537
+ if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
538
+ _endCount:
539
+ reft = ref;
540
+ #else
541
+ /* Easier for code maintenance, but unfortunately slower too */
542
+ const BYTE* startt = ip;
543
+ const BYTE* reft = ref;
544
+ const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
545
+ #endif
546
+
547
+ while ((startt>startLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
548
+
549
+ if ((ipt-startt) > longest)
550
+ {
551
+ longest = (int)(ipt-startt);
552
+ *matchpos = reft;
553
+ *startpos = startt;
554
+ }
555
+ }
556
+ ref = GETNEXT(ref);
557
+ }
558
+
559
+ return longest;
560
+ }
561
+
562
+
563
+ typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
564
+
565
+ FORCE_INLINE int LZ4HC_encodeSequence (
566
+ const BYTE** ip,
567
+ BYTE** op,
568
+ const BYTE** anchor,
569
+ int matchLength,
570
+ const BYTE* ref,
571
+ limitedOutput_directive limitedOutputBuffer,
572
+ BYTE* oend)
573
+ {
574
+ int length;
575
+ BYTE* token;
576
+
577
+ /* Encode Literal length */
578
+ length = (int)(*ip - *anchor);
579
+ token = (*op)++;
580
+ if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */
581
+ if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
582
+ else *token = (BYTE)(length<<ML_BITS);
583
+
584
+ /* Copy Literals */
585
+ LZ4_BLINDCOPY(*anchor, *op, length);
586
+
587
+ /* Encode Offset */
588
+ LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
589
+
590
+ /* Encode MatchLength */
591
+ length = (int)(matchLength-MINMATCH);
592
+ if ((limitedOutputBuffer) && (*op + (1 + LASTLITERALS) + (length>>8) > oend)) return 1; /* Check output limit */
593
+ if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
594
+ else *token += (BYTE)(length);
595
+
596
+ /* Prepare next loop */
597
+ *ip += matchLength;
598
+ *anchor = *ip;
599
+
600
+ return 0;
601
+ }
602
+
603
+
604
+ #define MAX_COMPRESSION_LEVEL 16
605
+ static int LZ4HC_compress_generic (
606
+ void* ctxvoid,
607
+ const char* source,
608
+ char* dest,
609
+ int inputSize,
610
+ int maxOutputSize,
611
+ int compressionLevel,
612
+ limitedOutput_directive limit
613
+ )
614
+ {
615
+ LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
616
+ const BYTE* ip = (const BYTE*) source;
617
+ const BYTE* anchor = ip;
618
+ const BYTE* const iend = ip + inputSize;
619
+ const BYTE* const mflimit = iend - MFLIMIT;
620
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
621
+
622
+ BYTE* op = (BYTE*) dest;
623
+ BYTE* const oend = op + maxOutputSize;
624
+
625
+ const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1<<LZ4HC_DEFAULT_COMPRESSIONLEVEL;
626
+ int ml, ml2, ml3, ml0;
627
+ const BYTE* ref=NULL;
628
+ const BYTE* start2=NULL;
629
+ const BYTE* ref2=NULL;
630
+ const BYTE* start3=NULL;
631
+ const BYTE* ref3=NULL;
632
+ const BYTE* start0;
633
+ const BYTE* ref0;
634
+
635
+
636
+ /* Ensure blocks follow each other */
637
+ if (ip != ctx->end) return 0;
638
+ ctx->end += inputSize;
639
+
640
+ ip++;
641
+
642
+ /* Main Loop */
643
+ while (ip < mflimit)
644
+ {
645
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
646
+ if (!ml) { ip++; continue; }
647
+
648
+ /* saved, in case we would skip too much */
649
+ start0 = ip;
650
+ ref0 = ref;
651
+ ml0 = ml;
652
+
653
+ _Search2:
654
+ if (ip+ml < mflimit)
655
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
656
+ else ml2 = ml;
657
+
658
+ if (ml2 == ml) /* No better match */
659
+ {
660
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
661
+ continue;
662
+ }
663
+
664
+ if (start0 < ip)
665
+ {
666
+ if (start2 < ip + ml0) /* empirical */
667
+ {
668
+ ip = start0;
669
+ ref = ref0;
670
+ ml = ml0;
671
+ }
672
+ }
673
+
674
+ /* Here, start0==ip */
675
+ if ((start2 - ip) < 3) /* First Match too small : removed */
676
+ {
677
+ ml = ml2;
678
+ ip = start2;
679
+ ref =ref2;
680
+ goto _Search2;
681
+ }
682
+
683
+ _Search3:
684
+ /*
685
+ * Currently we have :
686
+ * ml2 > ml1, and
687
+ * ip1+3 <= ip2 (usually < ip1+ml1)
688
+ */
689
+ if ((start2 - ip) < OPTIMAL_ML)
690
+ {
691
+ int correction;
692
+ int new_ml = ml;
693
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
694
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
695
+ correction = new_ml - (int)(start2 - ip);
696
+ if (correction > 0)
697
+ {
698
+ start2 += correction;
699
+ ref2 += correction;
700
+ ml2 -= correction;
701
+ }
702
+ }
703
+ /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
704
+
705
+ if (start2 + ml2 < mflimit)
706
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
707
+ else ml3 = ml2;
708
+
709
+ if (ml3 == ml2) /* No better match : 2 sequences to encode */
710
+ {
711
+ /* ip & ref are known; Now for ml */
712
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
713
+ /* Now, encode 2 sequences */
714
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
715
+ ip = start2;
716
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
717
+ continue;
718
+ }
719
+
720
+ if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
721
+ {
722
+ if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
723
+ {
724
+ if (start2 < ip+ml)
725
+ {
726
+ int correction = (int)(ip+ml - start2);
727
+ start2 += correction;
728
+ ref2 += correction;
729
+ ml2 -= correction;
730
+ if (ml2 < MINMATCH)
731
+ {
732
+ start2 = start3;
733
+ ref2 = ref3;
734
+ ml2 = ml3;
735
+ }
736
+ }
737
+
738
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
739
+ ip = start3;
740
+ ref = ref3;
741
+ ml = ml3;
742
+
743
+ start0 = start2;
744
+ ref0 = ref2;
745
+ ml0 = ml2;
746
+ goto _Search2;
747
+ }
748
+
749
+ start2 = start3;
750
+ ref2 = ref3;
751
+ ml2 = ml3;
752
+ goto _Search3;
753
+ }
754
+
755
+ /*
756
+ * OK, now we have 3 ascending matches; let's write at least the first one
757
+ * ip & ref are known; Now for ml
758
+ */
759
+ if (start2 < ip+ml)
760
+ {
761
+ if ((start2 - ip) < (int)ML_MASK)
762
+ {
763
+ int correction;
764
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
765
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
766
+ correction = ml - (int)(start2 - ip);
767
+ if (correction > 0)
768
+ {
769
+ start2 += correction;
770
+ ref2 += correction;
771
+ ml2 -= correction;
772
+ }
773
+ }
774
+ else
775
+ {
776
+ ml = (int)(start2 - ip);
777
+ }
778
+ }
779
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
780
+
781
+ ip = start2;
782
+ ref = ref2;
783
+ ml = ml2;
784
+
785
+ start2 = start3;
786
+ ref2 = ref3;
787
+ ml2 = ml3;
788
+
789
+ goto _Search3;
790
+
791
+ }
792
+
793
+ /* Encode Last Literals */
794
+ {
795
+ int lastRun = (int)(iend - anchor);
796
+ if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
797
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
798
+ else *op++ = (BYTE)(lastRun<<ML_BITS);
799
+ memcpy(op, anchor, iend - anchor);
800
+ op += iend-anchor;
801
+ }
802
+
803
+ /* End */
804
+ return (int) (((char*)op)-dest);
805
+ }
806
+
807
+
808
+ int LZ4_compressHC2(const char* source, char* dest, int inputSize, int compressionLevel)
809
+ {
810
+ void* ctx = LZ4_createHC(source);
811
+ int result;
812
+ if (ctx==NULL) return 0;
813
+
814
+ result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, compressionLevel, noLimit);
815
+
816
+ LZ4_freeHC(ctx);
817
+ return result;
818
+ }
819
+
820
+ int LZ4_compressHC(const char* source, char* dest, int inputSize) { return LZ4_compressHC2(source, dest, inputSize, 0); }
821
+
822
+ int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
823
+ {
824
+ void* ctx = LZ4_createHC(source);
825
+ int result;
826
+ if (ctx==NULL) return 0;
827
+
828
+ result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
829
+
830
+ LZ4_freeHC(ctx);
831
+ return result;
832
+ }
833
+
834
+ int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
835
+ {
836
+ return LZ4_compressHC2_limitedOutput(source, dest, inputSize, maxOutputSize, 0);
837
+ }
838
+
839
+
840
+ /*****************************
841
+ Using external allocation
842
+ *****************************/
843
+ int LZ4_sizeofStateHC() { return sizeof(LZ4HC_Data_Structure); }
844
+
845
+
846
+ int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel)
847
+ {
848
+ if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
849
+ LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
850
+ return LZ4HC_compress_generic (state, source, dest, inputSize, 0, compressionLevel, noLimit);
851
+ }
852
+
853
+ int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize)
854
+ { return LZ4_compressHC2_withStateHC (state, source, dest, inputSize, 0); }
855
+
856
+
857
+ int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
858
+ {
859
+ if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
860
+ LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
861
+ return LZ4HC_compress_generic (state, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
862
+ }
863
+
864
+ int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
865
+ { return LZ4_compressHC2_limitedOutput_withStateHC (state, source, dest, inputSize, maxOutputSize, 0); }
866
+
867
+
868
+ /****************************
869
+ Stream functions
870
+ ****************************/
871
+
872
+ int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
873
+ {
874
+ return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, 0, noLimit);
875
+ }
876
+
877
+ int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
878
+ {
879
+ return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
880
+ }
881
+
882
+ int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
883
+ {
884
+ return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, 0, limitedOutput);
885
+ }
886
+
887
+ int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
888
+ {
889
+ return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
890
+ }