extlz4 0.2.4.3 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -43,7 +43,15 @@ extern "C" {
43
43
  /* lz4frame_static.h should be used solely in the context of static linking.
44
44
  * It contains definitions which are not stable and may change in the future.
45
45
  * Never use it in the context of DLL linking.
46
+ *
47
+ * Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
48
+ * your own risk.
46
49
  */
50
+ #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
51
+ #define LZ4FLIB_STATIC_API LZ4FLIB_API
52
+ #else
53
+ #define LZ4FLIB_STATIC_API
54
+ #endif
47
55
 
48
56
 
49
57
  /* --- Dependency --- */
@@ -79,7 +87,7 @@ extern "C" {
79
87
  /* enum list is exposed, to handle specific errors */
80
88
  typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
81
89
 
82
- LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
90
+ LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
83
91
 
84
92
 
85
93
 
@@ -93,8 +101,8 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
93
101
  * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
94
102
  * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
95
103
  * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
96
- LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
97
- void LZ4F_freeCDict(LZ4F_CDict* CDict);
104
+ LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
105
+ LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
98
106
 
99
107
 
100
108
  /*! LZ4_compressFrame_usingCDict() :
@@ -106,10 +114,11 @@ void LZ4F_freeCDict(LZ4F_CDict* CDict);
106
114
  * but it's not recommended, as it's the only way to provide dictID in the frame header.
107
115
  * @return : number of bytes written into dstBuffer.
108
116
  * or an error code if it fails (can be tested using LZ4F_isError()) */
109
- size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
110
- const void* src, size_t srcSize,
111
- const LZ4F_CDict* cdict,
112
- const LZ4F_preferences_t* preferencesPtr);
117
+ LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
118
+ void* dst, size_t dstCapacity,
119
+ const void* src, size_t srcSize,
120
+ const LZ4F_CDict* cdict,
121
+ const LZ4F_preferences_t* preferencesPtr);
113
122
 
114
123
 
115
124
  /*! LZ4F_compressBegin_usingCDict() :
@@ -119,21 +128,23 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
119
128
  * however, it's the only way to provide dictID in the frame header.
120
129
  * @return : number of bytes written into dstBuffer for the header,
121
130
  * or an error code (which can be tested using LZ4F_isError()) */
122
- size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
123
- void* dstBuffer, size_t dstCapacity,
124
- const LZ4F_CDict* cdict,
125
- const LZ4F_preferences_t* prefsPtr);
131
+ LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
132
+ LZ4F_cctx* cctx,
133
+ void* dstBuffer, size_t dstCapacity,
134
+ const LZ4F_CDict* cdict,
135
+ const LZ4F_preferences_t* prefsPtr);
126
136
 
127
137
 
128
138
  /*! LZ4F_decompress_usingDict() :
129
139
  * Same as LZ4F_decompress(), using a predefined dictionary.
130
140
  * Dictionary is used "in place", without any preprocessing.
131
141
  * It must remain accessible throughout the entire frame decoding. */
132
- size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
133
- void* dstBuffer, size_t* dstSizePtr,
134
- const void* srcBuffer, size_t* srcSizePtr,
135
- const void* dict, size_t dictSize,
136
- const LZ4F_decompressOptions_t* decompressOptionsPtr);
142
+ LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
143
+ LZ4F_dctx* dctxPtr,
144
+ void* dstBuffer, size_t* dstSizePtr,
145
+ const void* srcBuffer, size_t* srcSizePtr,
146
+ const void* dict, size_t dictSize,
147
+ const LZ4F_decompressOptions_t* decompressOptionsPtr);
137
148
 
138
149
 
139
150
  #if defined (__cplusplus)
@@ -49,6 +49,7 @@
49
49
 
50
50
 
51
51
  /*=== Dependency ===*/
52
+ #define LZ4_HC_STATIC_LINKING_ONLY
52
53
  #include "lz4hc.h"
53
54
 
54
55
 
@@ -96,7 +97,7 @@ static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
96
97
 
97
98
 
98
99
  /* Update chains up to ip (excluded) */
99
- FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
100
+ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
100
101
  {
101
102
  U16* const chainTable = hc4->chainTable;
102
103
  U32* const hashTable = hc4->hashTable;
@@ -116,56 +117,73 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
116
117
  hc4->nextToUpdate = target;
117
118
  }
118
119
 
119
-
120
- FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
121
- const BYTE* const ip, const BYTE* const iLimit,
122
- const BYTE** matchpos,
123
- const int maxNbAttempts)
120
+ /** LZ4HC_countBack() :
121
+ * @return : negative value, nb of common bytes before ip/match */
122
+ LZ4_FORCE_INLINE
123
+ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
124
+ const BYTE* const iMin, const BYTE* const mMin)
124
125
  {
125
- U16* const chainTable = hc4->chainTable;
126
- U32* const HashTable = hc4->hashTable;
127
- const BYTE* const base = hc4->base;
128
- const BYTE* const dictBase = hc4->dictBase;
129
- const U32 dictLimit = hc4->dictLimit;
130
- const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
131
- U32 matchIndex;
132
- int nbAttempts = maxNbAttempts;
133
- size_t ml = 0;
126
+ int back=0;
127
+ while ( (ip+back > iMin)
128
+ && (match+back > mMin)
129
+ && (ip[back-1] == match[back-1]))
130
+ back--;
131
+ return back;
132
+ }
134
133
 
135
- /* HC4 match finder */
136
- LZ4HC_Insert(hc4, ip);
137
- matchIndex = HashTable[LZ4HC_hashPtr(ip)];
134
+ /* LZ4HC_countPattern() :
135
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
136
+ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
137
+ {
138
+ const BYTE* const iStart = ip;
139
+ reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
140
+
141
+ while (likely(ip < iEnd-(sizeof(pattern)-1))) {
142
+ reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
143
+ if (!diff) { ip+=sizeof(pattern); continue; }
144
+ ip += LZ4_NbCommonBytes(diff);
145
+ return (unsigned)(ip - iStart);
146
+ }
138
147
 
139
- while ((matchIndex>=lowLimit) && (nbAttempts)) {
140
- nbAttempts--;
141
- if (matchIndex >= dictLimit) {
142
- const BYTE* const match = base + matchIndex;
143
- if ( (*(match+ml) == *(ip+ml)) /* can be longer */
144
- && (LZ4_read32(match) == LZ4_read32(ip)) )
145
- {
146
- size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
147
- if (mlt > ml) { ml = mlt; *matchpos = match; }
148
- }
149
- } else {
150
- const BYTE* const match = dictBase + matchIndex;
151
- if (LZ4_read32(match) == LZ4_read32(ip)) {
152
- size_t mlt;
153
- const BYTE* vLimit = ip + (dictLimit - matchIndex);
154
- if (vLimit > iLimit) vLimit = iLimit;
155
- mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
156
- if ((ip+mlt == vLimit) && (vLimit < iLimit))
157
- mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
158
- if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
159
- }
148
+ if (LZ4_isLittleEndian()) {
149
+ reg_t patternByte = pattern;
150
+ while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
151
+ ip++; patternByte >>= 8;
152
+ }
153
+ } else { /* big endian */
154
+ U32 bitOffset = (sizeof(pattern)*8) - 8;
155
+ while (ip < iEnd) {
156
+ BYTE const byte = (BYTE)(pattern >> bitOffset);
157
+ if (*ip != byte) break;
158
+ ip ++; bitOffset -= 8;
160
159
  }
161
- matchIndex -= DELTANEXTU16(chainTable, matchIndex);
162
160
  }
163
161
 
164
- return (int)ml;
162
+ return (unsigned)(ip - iStart);
163
+ }
164
+
165
+ /* LZ4HC_reverseCountPattern() :
166
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
167
+ * read using natural platform endianess */
168
+ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
169
+ {
170
+ const BYTE* const iStart = ip;
171
+
172
+ while (likely(ip >= iLow+4)) {
173
+ if (LZ4_read32(ip-4) != pattern) break;
174
+ ip -= 4;
175
+ }
176
+ { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
177
+ while (likely(ip>iLow)) {
178
+ if (ip[-1] != *bytePtr) break;
179
+ ip--; bytePtr--;
180
+ } }
181
+ return (unsigned)(iStart - ip);
165
182
  }
166
183
 
184
+ typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
167
185
 
168
- FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
186
+ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
169
187
  LZ4HC_CCtx_internal* hc4,
170
188
  const BYTE* const ip,
171
189
  const BYTE* const iLowLimit,
@@ -173,67 +191,126 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
173
191
  int longest,
174
192
  const BYTE** matchpos,
175
193
  const BYTE** startpos,
176
- const int maxNbAttempts)
194
+ const int maxNbAttempts,
195
+ const int patternAnalysis)
177
196
  {
178
197
  U16* const chainTable = hc4->chainTable;
179
198
  U32* const HashTable = hc4->hashTable;
180
199
  const BYTE* const base = hc4->base;
181
200
  const U32 dictLimit = hc4->dictLimit;
182
201
  const BYTE* const lowPrefixPtr = base + dictLimit;
183
- const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
202
+ const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
184
203
  const BYTE* const dictBase = hc4->dictBase;
185
204
  int const delta = (int)(ip-iLowLimit);
186
205
  int nbAttempts = maxNbAttempts;
206
+ U32 const pattern = LZ4_read32(ip);
187
207
  U32 matchIndex;
208
+ repeat_state_e repeat = rep_untested;
209
+ size_t srcPatternLength = 0;
188
210
 
189
-
211
+ DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
190
212
  /* First Match */
191
213
  LZ4HC_Insert(hc4, ip);
192
214
  matchIndex = HashTable[LZ4HC_hashPtr(ip)];
215
+ DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
216
+ matchIndex, lowLimit);
193
217
 
194
218
  while ((matchIndex>=lowLimit) && (nbAttempts)) {
219
+ DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
195
220
  nbAttempts--;
196
221
  if (matchIndex >= dictLimit) {
197
222
  const BYTE* const matchPtr = base + matchIndex;
198
223
  if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
199
- if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
224
+ if (LZ4_read32(matchPtr) == pattern) {
200
225
  int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
226
+ #if 0
227
+ /* more generic but unfortunately slower on clang */
228
+ int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
229
+ #else
201
230
  int back = 0;
202
-
203
231
  while ( (ip+back > iLowLimit)
204
232
  && (matchPtr+back > lowPrefixPtr)
205
233
  && (ip[back-1] == matchPtr[back-1])) {
206
234
  back--;
207
235
  }
208
-
236
+ #endif
209
237
  mlt -= back;
210
238
 
211
239
  if (mlt > longest) {
212
240
  longest = mlt;
213
241
  *matchpos = matchPtr+back;
214
242
  *startpos = ip+back;
215
- } } }
216
- } else {
243
+ } }
244
+ }
245
+ } else { /* matchIndex < dictLimit */
217
246
  const BYTE* const matchPtr = dictBase + matchIndex;
218
- if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
247
+ if (LZ4_read32(matchPtr) == pattern) {
219
248
  int mlt;
220
- int back=0;
249
+ int back = 0;
221
250
  const BYTE* vLimit = ip + (dictLimit - matchIndex);
222
251
  if (vLimit > iHighLimit) vLimit = iHighLimit;
223
252
  mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
224
253
  if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
225
254
  mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
226
- while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
255
+ while ( (ip+back > iLowLimit)
256
+ && (matchIndex+back > lowLimit)
257
+ && (ip[back-1] == matchPtr[back-1]))
258
+ back--;
227
259
  mlt -= back;
228
- if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
229
- }
230
- }
231
- matchIndex -= DELTANEXTU16(chainTable, matchIndex);
232
- }
260
+ if (mlt > longest) {
261
+ longest = mlt;
262
+ *matchpos = base + matchIndex + back;
263
+ *startpos = ip + back;
264
+ } } }
265
+
266
+ { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
267
+ matchIndex -= nextOffset;
268
+ if (patternAnalysis && nextOffset==1) {
269
+ /* may be a repeated pattern */
270
+ if (repeat == rep_untested) {
271
+ if ( ((pattern & 0xFFFF) == (pattern >> 16))
272
+ & ((pattern & 0xFF) == (pattern >> 24)) ) {
273
+ repeat = rep_confirmed;
274
+ srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
275
+ } else {
276
+ repeat = rep_not;
277
+ } }
278
+ if ( (repeat == rep_confirmed)
279
+ && (matchIndex >= dictLimit) ) { /* same segment only */
280
+ const BYTE* const matchPtr = base + matchIndex;
281
+ if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
282
+ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
283
+ const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
284
+ size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
285
+ size_t const currentSegmentLength = backLength + forwardPatternLength;
286
+
287
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
288
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
289
+ matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
290
+ } else {
291
+ matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
292
+ }
293
+ } } } }
294
+ } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
233
295
 
234
296
  return longest;
235
297
  }
236
298
 
299
+ LZ4_FORCE_INLINE
300
+ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
301
+ const BYTE* const ip, const BYTE* const iLimit,
302
+ const BYTE** matchpos,
303
+ const int maxNbAttempts,
304
+ const int patternAnalysis)
305
+ {
306
+ const BYTE* uselessPtr = ip;
307
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
308
+ * but this won't be the case here, as we define iLowLimit==ip,
309
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
310
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
311
+ }
312
+
313
+
237
314
 
238
315
  typedef enum {
239
316
  noLimit = 0,
@@ -241,14 +318,10 @@ typedef enum {
241
318
  limitedDestSize = 2,
242
319
  } limitedOutput_directive;
243
320
 
244
- #ifndef LZ4HC_DEBUG
245
- # define LZ4HC_DEBUG 0
246
- #endif
247
-
248
321
  /* LZ4HC_encodeSequence() :
249
322
  * @return : 0 if ok,
250
323
  * 1 if buffer issue detected */
251
- FORCE_INLINE int LZ4HC_encodeSequence (
324
+ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
252
325
  const BYTE** ip,
253
326
  BYTE** op,
254
327
  const BYTE** anchor,
@@ -260,9 +333,21 @@ FORCE_INLINE int LZ4HC_encodeSequence (
260
333
  size_t length;
261
334
  BYTE* const token = (*op)++;
262
335
 
263
- #if LZ4HC_DEBUG
264
- printf("literal : %u -- match : %u -- offset : %u\n",
265
- (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
336
+ #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
337
+ static const BYTE* start = NULL;
338
+ static U32 totalCost = 0;
339
+ U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
340
+ U32 const ll = (U32)(*ip - *anchor);
341
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
342
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
343
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
344
+ if (start==NULL) start = *anchor; /* only works for single segment */
345
+ //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
346
+ DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
347
+ pos,
348
+ (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
349
+ cost, totalCost);
350
+ totalCost += cost;
266
351
  #endif
267
352
 
268
353
  /* Encode Literal length */
@@ -285,6 +370,7 @@ FORCE_INLINE int LZ4HC_encodeSequence (
285
370
  LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
286
371
 
287
372
  /* Encode MatchLength */
373
+ assert(matchLength >= MINMATCH);
288
374
  length = (size_t)(matchLength - MINMATCH);
289
375
  if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
290
376
  if (length >= ML_MASK) {
@@ -319,6 +405,7 @@ static int LZ4HC_compress_hashChain (
319
405
  )
320
406
  {
321
407
  const int inputSize = *srcSizePtr;
408
+ const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */
322
409
 
323
410
  const BYTE* ip = (const BYTE*) source;
324
411
  const BYTE* anchor = ip;
@@ -341,19 +428,13 @@ static int LZ4HC_compress_hashChain (
341
428
 
342
429
  /* init */
343
430
  *srcSizePtr = 0;
344
- if (limit == limitedDestSize && maxOutputSize < 1) return 0; /* Impossible to store anything */
345
- if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
346
-
347
- ctx->end += inputSize;
348
- if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */
431
+ if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
349
432
  if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
350
433
 
351
- ip++;
352
-
353
434
  /* Main Loop */
354
435
  while (ip < mflimit) {
355
- ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
356
- if (!ml) { ip++; continue; }
436
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
437
+ if (ml<MINMATCH) { ip++; continue; }
357
438
 
358
439
  /* saved, in case we would skip too much */
359
440
  start0 = ip;
@@ -362,7 +443,9 @@ static int LZ4HC_compress_hashChain (
362
443
 
363
444
  _Search2:
364
445
  if (ip+ml < mflimit)
365
- ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
446
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
447
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
448
+ maxNbAttempts, patternAnalysis);
366
449
  else
367
450
  ml2 = ml;
368
451
 
@@ -407,7 +490,9 @@ _Search3:
407
490
  /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
408
491
 
409
492
  if (start2 + ml2 < mflimit)
410
- ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
493
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
494
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
495
+ maxNbAttempts, patternAnalysis);
411
496
  else
412
497
  ml3 = ml2;
413
498
 
@@ -527,14 +612,6 @@ _dest_overflow:
527
612
  return 0;
528
613
  }
529
614
 
530
- static int LZ4HC_getSearchNum(int compressionLevel)
531
- {
532
- switch (compressionLevel) {
533
- default: return 0; /* unused */
534
- case 11: return 128;
535
- case 12: return 1<<10;
536
- }
537
- }
538
615
 
539
616
  static int LZ4HC_compress_generic (
540
617
  LZ4HC_CCtx_internal* const ctx,
@@ -546,24 +623,47 @@ static int LZ4HC_compress_generic (
546
623
  limitedOutput_directive limit
547
624
  )
548
625
  {
549
- if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe to reconsider */
550
- if (cLevel > 9) {
551
- if (limit == limitedDestSize) cLevel = 10;
552
- switch (cLevel) {
553
- case 10:
554
- return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << 12, limit);
555
- case 11:
556
- ctx->searchNum = LZ4HC_getSearchNum(cLevel);
557
- return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 128, 0);
558
- default:
559
- cLevel = 12;
560
- /* fall-through */
561
- case 12:
562
- ctx->searchNum = LZ4HC_getSearchNum(cLevel);
563
- return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, LZ4_OPT_NUM, 1);
564
- }
626
+ typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
627
+ typedef struct {
628
+ lz4hc_strat_e strat;
629
+ U32 nbSearches;
630
+ U32 targetLength;
631
+ } cParams_t;
632
+ static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
633
+ { lz4hc, 2, 16 }, /* 0, unused */
634
+ { lz4hc, 2, 16 }, /* 1, unused */
635
+ { lz4hc, 2, 16 }, /* 2, unused */
636
+ { lz4hc, 4, 16 }, /* 3 */
637
+ { lz4hc, 8, 16 }, /* 4 */
638
+ { lz4hc, 16, 16 }, /* 5 */
639
+ { lz4hc, 32, 16 }, /* 6 */
640
+ { lz4hc, 64, 16 }, /* 7 */
641
+ { lz4hc, 128, 16 }, /* 8 */
642
+ { lz4hc, 256, 16 }, /* 9 */
643
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
644
+ { lz4opt, 512,128 }, /*11 */
645
+ { lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
646
+ };
647
+
648
+ if (limit == limitedDestSize && dstCapacity < 1) return 0; /* Impossible to store anything */
649
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
650
+
651
+ ctx->end += *srcSizePtr;
652
+ if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
653
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
654
+ assert(cLevel >= 0);
655
+ assert(cLevel <= LZ4HC_CLEVEL_MAX);
656
+ { cParams_t const cParam = clTable[cLevel];
657
+ if (cParam.strat == lz4hc)
658
+ return LZ4HC_compress_hashChain(ctx,
659
+ src, dst, srcSizePtr, dstCapacity,
660
+ cParam.nbSearches, limit);
661
+ assert(cParam.strat == lz4opt);
662
+ return LZ4HC_compress_optimal(ctx,
663
+ src, dst, srcSizePtr, dstCapacity,
664
+ cParam.nbSearches, cParam.targetLength, limit,
665
+ cLevel == LZ4HC_CLEVEL_MAX); /* ultra mode */
565
666
  }
566
- return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */
567
667
  }
568
668
 
569
669
 
@@ -596,8 +696,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
596
696
  }
597
697
 
598
698
  /* LZ4_compress_HC_destSize() :
599
- * currently, only compatible with Hash Chain implementation,
600
- * hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/
699
+ * only compatible with regular HC parser */
601
700
  int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
602
701
  {
603
702
  LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
@@ -624,18 +723,13 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
624
723
  {
625
724
  LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
626
725
  LZ4_streamHCPtr->internal_donotuse.base = NULL;
627
- if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; /* cap compression level */
628
- LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
629
- LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
726
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
630
727
  }
631
728
 
632
729
  void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
633
730
  {
634
- int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel;
635
- int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN;
636
- int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX;
637
- compressionLevel = MIN(compressionLevel, minCLevel);
638
- compressionLevel = MAX(compressionLevel, maxCLevel);
731
+ if (compressionLevel < 1) compressionLevel = 1;
732
+ if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
639
733
  LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
640
734
  }
641
735
 
@@ -648,10 +742,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
648
742
  }
649
743
  LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
650
744
  ctxPtr->end = (const BYTE*)dictionary + dictSize;
651
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
652
- LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
653
- else
654
- if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
745
+ if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
655
746
  return dictSize;
656
747
  }
657
748
 
@@ -660,10 +751,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
660
751
 
661
752
  static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
662
753
  {
663
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
664
- LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
665
- else
666
- if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
754
+ if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
667
755
 
668
756
  /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
669
757
  ctxPtr->lowLimit = ctxPtr->dictLimit;
@@ -717,8 +805,6 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src,
717
805
 
718
806
  int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
719
807
  {
720
- LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
721
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src); /* not compatible with btopt implementation */
722
808
  return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
723
809
  }
724
810