extlz4 0.2.4.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,15 @@ extern "C" {
43
43
  /* lz4frame_static.h should be used solely in the context of static linking.
44
44
  * It contains definitions which are not stable and may change in the future.
45
45
  * Never use it in the context of DLL linking.
46
+ *
47
+ * Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
48
+ * your own risk.
46
49
  */
50
+ #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
51
+ #define LZ4FLIB_STATIC_API LZ4FLIB_API
52
+ #else
53
+ #define LZ4FLIB_STATIC_API
54
+ #endif
47
55
 
48
56
 
49
57
  /* --- Dependency --- */
@@ -79,7 +87,7 @@ extern "C" {
79
87
  /* enum list is exposed, to handle specific errors */
80
88
  typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
81
89
 
82
- LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
90
+ LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
83
91
 
84
92
 
85
93
 
@@ -93,8 +101,8 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
93
101
  * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
94
102
  * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
95
103
  * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
96
- LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
97
- void LZ4F_freeCDict(LZ4F_CDict* CDict);
104
+ LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
105
+ LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
98
106
 
99
107
 
100
108
  /*! LZ4_compressFrame_usingCDict() :
@@ -106,10 +114,11 @@ void LZ4F_freeCDict(LZ4F_CDict* CDict);
106
114
  * but it's not recommended, as it's the only way to provide dictID in the frame header.
107
115
  * @return : number of bytes written into dstBuffer.
108
116
  * or an error code if it fails (can be tested using LZ4F_isError()) */
109
- size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
110
- const void* src, size_t srcSize,
111
- const LZ4F_CDict* cdict,
112
- const LZ4F_preferences_t* preferencesPtr);
117
+ LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
118
+ void* dst, size_t dstCapacity,
119
+ const void* src, size_t srcSize,
120
+ const LZ4F_CDict* cdict,
121
+ const LZ4F_preferences_t* preferencesPtr);
113
122
 
114
123
 
115
124
  /*! LZ4F_compressBegin_usingCDict() :
@@ -119,21 +128,23 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
119
128
  * however, it's the only way to provide dictID in the frame header.
120
129
  * @return : number of bytes written into dstBuffer for the header,
121
130
  * or an error code (which can be tested using LZ4F_isError()) */
122
- size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
123
- void* dstBuffer, size_t dstCapacity,
124
- const LZ4F_CDict* cdict,
125
- const LZ4F_preferences_t* prefsPtr);
131
+ LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
132
+ LZ4F_cctx* cctx,
133
+ void* dstBuffer, size_t dstCapacity,
134
+ const LZ4F_CDict* cdict,
135
+ const LZ4F_preferences_t* prefsPtr);
126
136
 
127
137
 
128
138
  /*! LZ4F_decompress_usingDict() :
129
139
  * Same as LZ4F_decompress(), using a predefined dictionary.
130
140
  * Dictionary is used "in place", without any preprocessing.
131
141
  * It must remain accessible throughout the entire frame decoding. */
132
- size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
133
- void* dstBuffer, size_t* dstSizePtr,
134
- const void* srcBuffer, size_t* srcSizePtr,
135
- const void* dict, size_t dictSize,
136
- const LZ4F_decompressOptions_t* decompressOptionsPtr);
142
+ LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
143
+ LZ4F_dctx* dctxPtr,
144
+ void* dstBuffer, size_t* dstSizePtr,
145
+ const void* srcBuffer, size_t* srcSizePtr,
146
+ const void* dict, size_t dictSize,
147
+ const LZ4F_decompressOptions_t* decompressOptionsPtr);
137
148
 
138
149
 
139
150
  #if defined (__cplusplus)
@@ -49,6 +49,7 @@
49
49
 
50
50
 
51
51
  /*=== Dependency ===*/
52
+ #define LZ4_HC_STATIC_LINKING_ONLY
52
53
  #include "lz4hc.h"
53
54
 
54
55
 
@@ -96,7 +97,7 @@ static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
96
97
 
97
98
 
98
99
  /* Update chains up to ip (excluded) */
99
- FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
100
+ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
100
101
  {
101
102
  U16* const chainTable = hc4->chainTable;
102
103
  U32* const hashTable = hc4->hashTable;
@@ -116,56 +117,73 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
116
117
  hc4->nextToUpdate = target;
117
118
  }
118
119
 
119
-
120
- FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
121
- const BYTE* const ip, const BYTE* const iLimit,
122
- const BYTE** matchpos,
123
- const int maxNbAttempts)
120
+ /** LZ4HC_countBack() :
121
+ * @return : negative value, nb of common bytes before ip/match */
122
+ LZ4_FORCE_INLINE
123
+ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
124
+ const BYTE* const iMin, const BYTE* const mMin)
124
125
  {
125
- U16* const chainTable = hc4->chainTable;
126
- U32* const HashTable = hc4->hashTable;
127
- const BYTE* const base = hc4->base;
128
- const BYTE* const dictBase = hc4->dictBase;
129
- const U32 dictLimit = hc4->dictLimit;
130
- const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
131
- U32 matchIndex;
132
- int nbAttempts = maxNbAttempts;
133
- size_t ml = 0;
126
+ int back=0;
127
+ while ( (ip+back > iMin)
128
+ && (match+back > mMin)
129
+ && (ip[back-1] == match[back-1]))
130
+ back--;
131
+ return back;
132
+ }
134
133
 
135
- /* HC4 match finder */
136
- LZ4HC_Insert(hc4, ip);
137
- matchIndex = HashTable[LZ4HC_hashPtr(ip)];
134
+ /* LZ4HC_countPattern() :
135
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
136
+ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
137
+ {
138
+ const BYTE* const iStart = ip;
139
+ reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
140
+
141
+ while (likely(ip < iEnd-(sizeof(pattern)-1))) {
142
+ reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
143
+ if (!diff) { ip+=sizeof(pattern); continue; }
144
+ ip += LZ4_NbCommonBytes(diff);
145
+ return (unsigned)(ip - iStart);
146
+ }
138
147
 
139
- while ((matchIndex>=lowLimit) && (nbAttempts)) {
140
- nbAttempts--;
141
- if (matchIndex >= dictLimit) {
142
- const BYTE* const match = base + matchIndex;
143
- if ( (*(match+ml) == *(ip+ml)) /* can be longer */
144
- && (LZ4_read32(match) == LZ4_read32(ip)) )
145
- {
146
- size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
147
- if (mlt > ml) { ml = mlt; *matchpos = match; }
148
- }
149
- } else {
150
- const BYTE* const match = dictBase + matchIndex;
151
- if (LZ4_read32(match) == LZ4_read32(ip)) {
152
- size_t mlt;
153
- const BYTE* vLimit = ip + (dictLimit - matchIndex);
154
- if (vLimit > iLimit) vLimit = iLimit;
155
- mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
156
- if ((ip+mlt == vLimit) && (vLimit < iLimit))
157
- mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
158
- if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
159
- }
148
+ if (LZ4_isLittleEndian()) {
149
+ reg_t patternByte = pattern;
150
+ while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
151
+ ip++; patternByte >>= 8;
152
+ }
153
+ } else { /* big endian */
154
+ U32 bitOffset = (sizeof(pattern)*8) - 8;
155
+ while (ip < iEnd) {
156
+ BYTE const byte = (BYTE)(pattern >> bitOffset);
157
+ if (*ip != byte) break;
158
+ ip ++; bitOffset -= 8;
160
159
  }
161
- matchIndex -= DELTANEXTU16(chainTable, matchIndex);
162
160
  }
163
161
 
164
- return (int)ml;
162
+ return (unsigned)(ip - iStart);
163
+ }
164
+
165
+ /* LZ4HC_reverseCountPattern() :
166
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
167
+ * read using natural platform endianess */
168
+ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
169
+ {
170
+ const BYTE* const iStart = ip;
171
+
172
+ while (likely(ip >= iLow+4)) {
173
+ if (LZ4_read32(ip-4) != pattern) break;
174
+ ip -= 4;
175
+ }
176
+ { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
177
+ while (likely(ip>iLow)) {
178
+ if (ip[-1] != *bytePtr) break;
179
+ ip--; bytePtr--;
180
+ } }
181
+ return (unsigned)(iStart - ip);
165
182
  }
166
183
 
184
+ typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
167
185
 
168
- FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
186
+ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
169
187
  LZ4HC_CCtx_internal* hc4,
170
188
  const BYTE* const ip,
171
189
  const BYTE* const iLowLimit,
@@ -173,67 +191,126 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
173
191
  int longest,
174
192
  const BYTE** matchpos,
175
193
  const BYTE** startpos,
176
- const int maxNbAttempts)
194
+ const int maxNbAttempts,
195
+ const int patternAnalysis)
177
196
  {
178
197
  U16* const chainTable = hc4->chainTable;
179
198
  U32* const HashTable = hc4->hashTable;
180
199
  const BYTE* const base = hc4->base;
181
200
  const U32 dictLimit = hc4->dictLimit;
182
201
  const BYTE* const lowPrefixPtr = base + dictLimit;
183
- const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
202
+ const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
184
203
  const BYTE* const dictBase = hc4->dictBase;
185
204
  int const delta = (int)(ip-iLowLimit);
186
205
  int nbAttempts = maxNbAttempts;
206
+ U32 const pattern = LZ4_read32(ip);
187
207
  U32 matchIndex;
208
+ repeat_state_e repeat = rep_untested;
209
+ size_t srcPatternLength = 0;
188
210
 
189
-
211
+ DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
190
212
  /* First Match */
191
213
  LZ4HC_Insert(hc4, ip);
192
214
  matchIndex = HashTable[LZ4HC_hashPtr(ip)];
215
+ DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
216
+ matchIndex, lowLimit);
193
217
 
194
218
  while ((matchIndex>=lowLimit) && (nbAttempts)) {
219
+ DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
195
220
  nbAttempts--;
196
221
  if (matchIndex >= dictLimit) {
197
222
  const BYTE* const matchPtr = base + matchIndex;
198
223
  if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
199
- if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
224
+ if (LZ4_read32(matchPtr) == pattern) {
200
225
  int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
226
+ #if 0
227
+ /* more generic but unfortunately slower on clang */
228
+ int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
229
+ #else
201
230
  int back = 0;
202
-
203
231
  while ( (ip+back > iLowLimit)
204
232
  && (matchPtr+back > lowPrefixPtr)
205
233
  && (ip[back-1] == matchPtr[back-1])) {
206
234
  back--;
207
235
  }
208
-
236
+ #endif
209
237
  mlt -= back;
210
238
 
211
239
  if (mlt > longest) {
212
240
  longest = mlt;
213
241
  *matchpos = matchPtr+back;
214
242
  *startpos = ip+back;
215
- } } }
216
- } else {
243
+ } }
244
+ }
245
+ } else { /* matchIndex < dictLimit */
217
246
  const BYTE* const matchPtr = dictBase + matchIndex;
218
- if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
247
+ if (LZ4_read32(matchPtr) == pattern) {
219
248
  int mlt;
220
- int back=0;
249
+ int back = 0;
221
250
  const BYTE* vLimit = ip + (dictLimit - matchIndex);
222
251
  if (vLimit > iHighLimit) vLimit = iHighLimit;
223
252
  mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
224
253
  if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
225
254
  mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
226
- while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
255
+ while ( (ip+back > iLowLimit)
256
+ && (matchIndex+back > lowLimit)
257
+ && (ip[back-1] == matchPtr[back-1]))
258
+ back--;
227
259
  mlt -= back;
228
- if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
229
- }
230
- }
231
- matchIndex -= DELTANEXTU16(chainTable, matchIndex);
232
- }
260
+ if (mlt > longest) {
261
+ longest = mlt;
262
+ *matchpos = base + matchIndex + back;
263
+ *startpos = ip + back;
264
+ } } }
265
+
266
+ { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
267
+ matchIndex -= nextOffset;
268
+ if (patternAnalysis && nextOffset==1) {
269
+ /* may be a repeated pattern */
270
+ if (repeat == rep_untested) {
271
+ if ( ((pattern & 0xFFFF) == (pattern >> 16))
272
+ & ((pattern & 0xFF) == (pattern >> 24)) ) {
273
+ repeat = rep_confirmed;
274
+ srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
275
+ } else {
276
+ repeat = rep_not;
277
+ } }
278
+ if ( (repeat == rep_confirmed)
279
+ && (matchIndex >= dictLimit) ) { /* same segment only */
280
+ const BYTE* const matchPtr = base + matchIndex;
281
+ if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
282
+ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
283
+ const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
284
+ size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
285
+ size_t const currentSegmentLength = backLength + forwardPatternLength;
286
+
287
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
288
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
289
+ matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
290
+ } else {
291
+ matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
292
+ }
293
+ } } } }
294
+ } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
233
295
 
234
296
  return longest;
235
297
  }
236
298
 
299
+ LZ4_FORCE_INLINE
300
+ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
301
+ const BYTE* const ip, const BYTE* const iLimit,
302
+ const BYTE** matchpos,
303
+ const int maxNbAttempts,
304
+ const int patternAnalysis)
305
+ {
306
+ const BYTE* uselessPtr = ip;
307
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
308
+ * but this won't be the case here, as we define iLowLimit==ip,
309
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
310
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
311
+ }
312
+
313
+
237
314
 
238
315
  typedef enum {
239
316
  noLimit = 0,
@@ -241,14 +318,10 @@ typedef enum {
241
318
  limitedDestSize = 2,
242
319
  } limitedOutput_directive;
243
320
 
244
- #ifndef LZ4HC_DEBUG
245
- # define LZ4HC_DEBUG 0
246
- #endif
247
-
248
321
  /* LZ4HC_encodeSequence() :
249
322
  * @return : 0 if ok,
250
323
  * 1 if buffer issue detected */
251
- FORCE_INLINE int LZ4HC_encodeSequence (
324
+ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
252
325
  const BYTE** ip,
253
326
  BYTE** op,
254
327
  const BYTE** anchor,
@@ -260,9 +333,21 @@ FORCE_INLINE int LZ4HC_encodeSequence (
260
333
  size_t length;
261
334
  BYTE* const token = (*op)++;
262
335
 
263
- #if LZ4HC_DEBUG
264
- printf("literal : %u -- match : %u -- offset : %u\n",
265
- (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
336
+ #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
337
+ static const BYTE* start = NULL;
338
+ static U32 totalCost = 0;
339
+ U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
340
+ U32 const ll = (U32)(*ip - *anchor);
341
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
342
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
343
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
344
+ if (start==NULL) start = *anchor; /* only works for single segment */
345
+ //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
346
+ DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
347
+ pos,
348
+ (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
349
+ cost, totalCost);
350
+ totalCost += cost;
266
351
  #endif
267
352
 
268
353
  /* Encode Literal length */
@@ -285,6 +370,7 @@ FORCE_INLINE int LZ4HC_encodeSequence (
285
370
  LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
286
371
 
287
372
  /* Encode MatchLength */
373
+ assert(matchLength >= MINMATCH);
288
374
  length = (size_t)(matchLength - MINMATCH);
289
375
  if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
290
376
  if (length >= ML_MASK) {
@@ -319,6 +405,7 @@ static int LZ4HC_compress_hashChain (
319
405
  )
320
406
  {
321
407
  const int inputSize = *srcSizePtr;
408
+ const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */
322
409
 
323
410
  const BYTE* ip = (const BYTE*) source;
324
411
  const BYTE* anchor = ip;
@@ -341,19 +428,13 @@ static int LZ4HC_compress_hashChain (
341
428
 
342
429
  /* init */
343
430
  *srcSizePtr = 0;
344
- if (limit == limitedDestSize && maxOutputSize < 1) return 0; /* Impossible to store anything */
345
- if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
346
-
347
- ctx->end += inputSize;
348
- if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */
431
+ if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
349
432
  if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
350
433
 
351
- ip++;
352
-
353
434
  /* Main Loop */
354
435
  while (ip < mflimit) {
355
- ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
356
- if (!ml) { ip++; continue; }
436
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
437
+ if (ml<MINMATCH) { ip++; continue; }
357
438
 
358
439
  /* saved, in case we would skip too much */
359
440
  start0 = ip;
@@ -362,7 +443,9 @@ static int LZ4HC_compress_hashChain (
362
443
 
363
444
  _Search2:
364
445
  if (ip+ml < mflimit)
365
- ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
446
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
447
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
448
+ maxNbAttempts, patternAnalysis);
366
449
  else
367
450
  ml2 = ml;
368
451
 
@@ -407,7 +490,9 @@ _Search3:
407
490
  /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
408
491
 
409
492
  if (start2 + ml2 < mflimit)
410
- ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
493
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
494
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
495
+ maxNbAttempts, patternAnalysis);
411
496
  else
412
497
  ml3 = ml2;
413
498
 
@@ -527,14 +612,6 @@ _dest_overflow:
527
612
  return 0;
528
613
  }
529
614
 
530
- static int LZ4HC_getSearchNum(int compressionLevel)
531
- {
532
- switch (compressionLevel) {
533
- default: return 0; /* unused */
534
- case 11: return 128;
535
- case 12: return 1<<10;
536
- }
537
- }
538
615
 
539
616
  static int LZ4HC_compress_generic (
540
617
  LZ4HC_CCtx_internal* const ctx,
@@ -546,24 +623,47 @@ static int LZ4HC_compress_generic (
546
623
  limitedOutput_directive limit
547
624
  )
548
625
  {
549
- if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe to reconsider */
550
- if (cLevel > 9) {
551
- if (limit == limitedDestSize) cLevel = 10;
552
- switch (cLevel) {
553
- case 10:
554
- return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << 12, limit);
555
- case 11:
556
- ctx->searchNum = LZ4HC_getSearchNum(cLevel);
557
- return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 128, 0);
558
- default:
559
- cLevel = 12;
560
- /* fall-through */
561
- case 12:
562
- ctx->searchNum = LZ4HC_getSearchNum(cLevel);
563
- return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, LZ4_OPT_NUM, 1);
564
- }
626
+ typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
627
+ typedef struct {
628
+ lz4hc_strat_e strat;
629
+ U32 nbSearches;
630
+ U32 targetLength;
631
+ } cParams_t;
632
+ static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
633
+ { lz4hc, 2, 16 }, /* 0, unused */
634
+ { lz4hc, 2, 16 }, /* 1, unused */
635
+ { lz4hc, 2, 16 }, /* 2, unused */
636
+ { lz4hc, 4, 16 }, /* 3 */
637
+ { lz4hc, 8, 16 }, /* 4 */
638
+ { lz4hc, 16, 16 }, /* 5 */
639
+ { lz4hc, 32, 16 }, /* 6 */
640
+ { lz4hc, 64, 16 }, /* 7 */
641
+ { lz4hc, 128, 16 }, /* 8 */
642
+ { lz4hc, 256, 16 }, /* 9 */
643
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
644
+ { lz4opt, 512,128 }, /*11 */
645
+ { lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
646
+ };
647
+
648
+ if (limit == limitedDestSize && dstCapacity < 1) return 0; /* Impossible to store anything */
649
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
650
+
651
+ ctx->end += *srcSizePtr;
652
+ if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
653
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
654
+ assert(cLevel >= 0);
655
+ assert(cLevel <= LZ4HC_CLEVEL_MAX);
656
+ { cParams_t const cParam = clTable[cLevel];
657
+ if (cParam.strat == lz4hc)
658
+ return LZ4HC_compress_hashChain(ctx,
659
+ src, dst, srcSizePtr, dstCapacity,
660
+ cParam.nbSearches, limit);
661
+ assert(cParam.strat == lz4opt);
662
+ return LZ4HC_compress_optimal(ctx,
663
+ src, dst, srcSizePtr, dstCapacity,
664
+ cParam.nbSearches, cParam.targetLength, limit,
665
+ cLevel == LZ4HC_CLEVEL_MAX); /* ultra mode */
565
666
  }
566
- return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */
567
667
  }
568
668
 
569
669
 
@@ -596,8 +696,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
596
696
  }
597
697
 
598
698
  /* LZ4_compress_HC_destSize() :
599
- * currently, only compatible with Hash Chain implementation,
600
- * hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/
699
+ * only compatible with regular HC parser */
601
700
  int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
602
701
  {
603
702
  LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
@@ -624,18 +723,13 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
624
723
  {
625
724
  LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
626
725
  LZ4_streamHCPtr->internal_donotuse.base = NULL;
627
- if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; /* cap compression level */
628
- LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
629
- LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
726
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
630
727
  }
631
728
 
632
729
  void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
633
730
  {
634
- int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel;
635
- int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN;
636
- int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX;
637
- compressionLevel = MIN(compressionLevel, minCLevel);
638
- compressionLevel = MAX(compressionLevel, maxCLevel);
731
+ if (compressionLevel < 1) compressionLevel = 1;
732
+ if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
639
733
  LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
640
734
  }
641
735
 
@@ -648,10 +742,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
648
742
  }
649
743
  LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
650
744
  ctxPtr->end = (const BYTE*)dictionary + dictSize;
651
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
652
- LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
653
- else
654
- if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
745
+ if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
655
746
  return dictSize;
656
747
  }
657
748
 
@@ -660,10 +751,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
660
751
 
661
752
  static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
662
753
  {
663
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN)
664
- LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
665
- else
666
- if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
754
+ if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
667
755
 
668
756
  /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
669
757
  ctxPtr->lowLimit = ctxPtr->dictLimit;
@@ -717,8 +805,6 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src,
717
805
 
718
806
  int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
719
807
  {
720
- LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
721
- if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src); /* not compatible with btopt implementation */
722
808
  return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
723
809
  }
724
810