extlz4 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +21 -3
  4. data/contrib/lz4/CODING_STYLE +57 -0
  5. data/contrib/lz4/LICENSE +1 -1
  6. data/contrib/lz4/Makefile.inc +17 -15
  7. data/contrib/lz4/NEWS +25 -0
  8. data/contrib/lz4/README.md +16 -5
  9. data/contrib/lz4/SECURITY.md +17 -0
  10. data/contrib/lz4/build/README.md +4 -15
  11. data/contrib/lz4/build/VS2022/_build.bat +39 -0
  12. data/contrib/lz4/build/VS2022/_setup.bat +35 -0
  13. data/contrib/lz4/build/VS2022/_test.bat +38 -0
  14. data/contrib/lz4/build/VS2022/build-and-test-win32-debug.bat +26 -0
  15. data/contrib/lz4/build/VS2022/build-and-test-win32-release.bat +26 -0
  16. data/contrib/lz4/build/VS2022/build-and-test-x64-debug.bat +26 -0
  17. data/contrib/lz4/build/VS2022/build-and-test-x64-release.bat +26 -0
  18. data/contrib/lz4/build/VS2022/datagen/datagen.vcxproj +7 -3
  19. data/contrib/lz4/build/{VS2017 → VS2022}/lz4/lz4.vcxproj +21 -7
  20. data/contrib/lz4/build/VS2022/lz4.sln +5 -2
  21. data/contrib/lz4/build/cmake/CMakeLists.txt +95 -100
  22. data/contrib/lz4/build/meson/GetLz4LibraryVersion.py +39 -0
  23. data/contrib/lz4/build/meson/README.md +34 -0
  24. data/contrib/lz4/build/meson/meson/contrib/gen_manual/meson.build +42 -0
  25. data/contrib/lz4/build/meson/meson/contrib/meson.build +11 -0
  26. data/contrib/lz4/build/meson/meson/examples/meson.build +32 -0
  27. data/contrib/lz4/build/meson/meson/lib/meson.build +87 -0
  28. data/contrib/lz4/build/meson/meson/meson.build +135 -0
  29. data/contrib/lz4/build/meson/meson/ossfuzz/meson.build +35 -0
  30. data/contrib/lz4/build/meson/meson/programs/meson.build +91 -0
  31. data/contrib/lz4/build/meson/meson/tests/meson.build +162 -0
  32. data/contrib/lz4/build/meson/meson.build +31 -0
  33. data/contrib/lz4/build/meson/meson_options.txt +44 -0
  34. data/contrib/lz4/build/visual/README.md +5 -0
  35. data/contrib/lz4/build/visual/generate_solution.cmd +55 -0
  36. data/contrib/lz4/build/visual/generate_vs2015.cmd +3 -0
  37. data/contrib/lz4/build/visual/generate_vs2017.cmd +3 -0
  38. data/contrib/lz4/build/visual/generate_vs2019.cmd +3 -0
  39. data/contrib/lz4/build/visual/generate_vs2022.cmd +3 -0
  40. data/contrib/lz4/lib/README.md +25 -1
  41. data/contrib/lz4/lib/lz4.c +206 -99
  42. data/contrib/lz4/lib/lz4.h +111 -69
  43. data/contrib/lz4/lib/lz4file.c +111 -81
  44. data/contrib/lz4/lib/lz4file.h +2 -2
  45. data/contrib/lz4/lib/lz4frame.c +179 -121
  46. data/contrib/lz4/lib/lz4frame.h +162 -103
  47. data/contrib/lz4/lib/lz4hc.c +943 -382
  48. data/contrib/lz4/lib/lz4hc.h +43 -42
  49. data/contrib/lz4/lib/xxhash.c +21 -21
  50. data/contrib/lz4/ossfuzz/decompress_fuzzer.c +1 -1
  51. data/contrib/lz4/ossfuzz/fuzz_helpers.h +1 -1
  52. data/ext/blockapi.c +11 -11
  53. data/ext/frameapi.c +23 -23
  54. metadata +34 -28
  55. data/contrib/lz4/build/VS2010/datagen/datagen.vcxproj +0 -169
  56. data/contrib/lz4/build/VS2010/frametest/frametest.vcxproj +0 -176
  57. data/contrib/lz4/build/VS2010/fullbench/fullbench.vcxproj +0 -176
  58. data/contrib/lz4/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +0 -180
  59. data/contrib/lz4/build/VS2010/fuzzer/fuzzer.vcxproj +0 -173
  60. data/contrib/lz4/build/VS2010/liblz4/liblz4.vcxproj +0 -175
  61. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.rc +0 -51
  62. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.vcxproj +0 -179
  63. data/contrib/lz4/build/VS2010/lz4/lz4.vcxproj +0 -189
  64. data/contrib/lz4/build/VS2010/lz4.sln +0 -98
  65. data/contrib/lz4/build/VS2017/datagen/datagen.vcxproj +0 -173
  66. data/contrib/lz4/build/VS2017/frametest/frametest.vcxproj +0 -180
  67. data/contrib/lz4/build/VS2017/fullbench/fullbench.vcxproj +0 -180
  68. data/contrib/lz4/build/VS2017/fullbench-dll/fullbench-dll.vcxproj +0 -184
  69. data/contrib/lz4/build/VS2017/fuzzer/fuzzer.vcxproj +0 -177
  70. data/contrib/lz4/build/VS2017/liblz4/liblz4.vcxproj +0 -179
  71. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.rc +0 -51
  72. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.vcxproj +0 -183
  73. data/contrib/lz4/build/VS2017/lz4/lz4.rc +0 -51
  74. data/contrib/lz4/build/VS2017/lz4.sln +0 -103
  75. /data/contrib/lz4/build/{VS2010 → VS2022}/lz4/lz4.rc +0 -0
@@ -39,9 +39,10 @@
39
39
  ***************************************/
40
40
 
41
41
  /*! HEAPMODE :
42
- * Select how default compression function will allocate workplace memory,
43
- * in stack (0:fastest), or in heap (1:requires malloc()).
44
- * Since workplace is rather large, heap mode is recommended.
42
+ * Select how stateless HC compression functions like `LZ4_compress_HC()`
43
+ * allocate memory for their workspace:
44
+ * in stack (0:fastest), or in heap (1:default, requires malloc()).
45
+ * Since workspace is rather large, heap mode is recommended.
45
46
  **/
46
47
  #ifndef LZ4HC_HEAPMODE
47
48
  # define LZ4HC_HEAPMODE 1
@@ -51,19 +52,19 @@
51
52
  /*=== Dependency ===*/
52
53
  #define LZ4_HC_STATIC_LINKING_ONLY
53
54
  #include "lz4hc.h"
55
+ #include <limits.h>
54
56
 
55
57
 
56
- /*=== Common definitions ===*/
57
- #if defined(__GNUC__)
58
+ /*=== Shared lz4.c code ===*/
59
+ #ifndef LZ4_SRC_INCLUDED
60
+ # if defined(__GNUC__)
58
61
  # pragma GCC diagnostic ignored "-Wunused-function"
59
- #endif
60
- #if defined (__clang__)
62
+ # endif
63
+ # if defined (__clang__)
61
64
  # pragma clang diagnostic ignored "-Wunused-function"
62
- #endif
63
-
64
- #define LZ4_COMMONDEFS_ONLY
65
- #ifndef LZ4_SRC_INCLUDED
66
- #include "lz4.c" /* LZ4_count, constants, mem */
65
+ # endif
66
+ # define LZ4_COMMONDEFS_ONLY
67
+ # include "lz4.c" /* LZ4_count, constants, mem */
67
68
  #endif
68
69
 
69
70
 
@@ -79,17 +80,158 @@ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
79
80
  /*=== Macros ===*/
80
81
  #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
81
82
  #define MAX(a,b) ( (a) > (b) ? (a) : (b) )
82
- #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
83
- #define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
83
+
84
+
85
+ /*=== Levels definition ===*/
86
+ typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e;
87
+ typedef struct {
88
+ lz4hc_strat_e strat;
89
+ int nbSearches;
90
+ U32 targetLength;
91
+ } cParams_t;
92
+ static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = {
93
+ { lz4mid, 2, 16 }, /* 0, unused */
94
+ { lz4mid, 2, 16 }, /* 1, unused */
95
+ { lz4mid, 2, 16 }, /* 2 */
96
+ { lz4hc, 4, 16 }, /* 3 */
97
+ { lz4hc, 8, 16 }, /* 4 */
98
+ { lz4hc, 16, 16 }, /* 5 */
99
+ { lz4hc, 32, 16 }, /* 6 */
100
+ { lz4hc, 64, 16 }, /* 7 */
101
+ { lz4hc, 128, 16 }, /* 8 */
102
+ { lz4hc, 256, 16 }, /* 9 */
103
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
104
+ { lz4opt, 512,128 }, /*11 */
105
+ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
106
+ };
107
+
108
+ static cParams_t LZ4HC_getCLevelParams(int cLevel)
109
+ {
110
+ /* note : clevel convention is a bit different from lz4frame,
111
+ * possibly something worth revisiting for consistency */
112
+ if (cLevel < 1)
113
+ cLevel = LZ4HC_CLEVEL_DEFAULT;
114
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
115
+ return k_clTable[cLevel];
116
+ }
117
+
118
+
119
+ /*=== Hashing ===*/
120
+ #define LZ4HC_HASHSIZE 4
121
+ #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
122
+ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
123
+
124
+ #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
125
+ /* lie to the compiler about data alignment; use with caution */
126
+ static U64 LZ4_read64(const void* memPtr) { return *(const U64*) memPtr; }
127
+
128
+ #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
129
+ /* __pack instructions are safer, but compiler specific */
130
+ LZ4_PACK(typedef struct { U64 u64; }) LZ4_unalign64;
131
+ static U64 LZ4_read64(const void* ptr) { return ((const LZ4_unalign64*)ptr)->u64; }
132
+
133
+ #else /* safe and portable access using memcpy() */
134
+ static U64 LZ4_read64(const void* memPtr)
135
+ {
136
+ U64 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
137
+ }
138
+
139
+ #endif /* LZ4_FORCE_MEMORY_ACCESS */
140
+
141
+ #define LZ4MID_HASHSIZE 8
142
+ #define LZ4MID_HASHLOG (LZ4HC_HASH_LOG-1)
143
+ #define LZ4MID_HASHTABLESIZE (1 << LZ4MID_HASHLOG)
144
+
145
+ static U32 LZ4MID_hash4(U32 v) { return (v * 2654435761U) >> (32-LZ4MID_HASHLOG); }
146
+ static U32 LZ4MID_hash4Ptr(const void* ptr) { return LZ4MID_hash4(LZ4_read32(ptr)); }
147
+ /* note: hash7 hashes the lower 56-bits.
148
+ * It presumes input was read using little endian.*/
149
+ static U32 LZ4MID_hash7(U64 v) { return (U32)(((v << (64-56)) * 58295818150454627ULL) >> (64-LZ4MID_HASHLOG)) ; }
150
+ static U64 LZ4_readLE64(const void* memPtr);
151
+ static U32 LZ4MID_hash8Ptr(const void* ptr) { return LZ4MID_hash7(LZ4_readLE64(ptr)); }
152
+
153
+ static U64 LZ4_readLE64(const void* memPtr)
154
+ {
155
+ if (LZ4_isLittleEndian()) {
156
+ return LZ4_read64(memPtr);
157
+ } else {
158
+ const BYTE* p = (const BYTE*)memPtr;
159
+ /* note: relies on the compiler to simplify this expression */
160
+ return (U64)p[0] | ((U64)p[1]<<8) | ((U64)p[2]<<16) | ((U64)p[3]<<24)
161
+ | ((U64)p[4]<<32) | ((U64)p[5]<<40) | ((U64)p[6]<<48) | ((U64)p[7]<<56);
162
+ }
163
+ }
164
+
165
+
166
+ /*=== Count match length ===*/
167
+ LZ4_FORCE_INLINE
168
+ unsigned LZ4HC_NbCommonBytes32(U32 val)
169
+ {
170
+ assert(val != 0);
171
+ if (LZ4_isLittleEndian()) {
172
+ # if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
173
+ unsigned long r;
174
+ _BitScanReverse(&r, val);
175
+ return (unsigned)((31 - r) >> 3);
176
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
177
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
178
+ !defined(LZ4_FORCE_SW_BITCOUNT)
179
+ return (unsigned)__builtin_clz(val) >> 3;
180
+ # else
181
+ val >>= 8;
182
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
183
+ (val + 0x00FF0000)) >> 24;
184
+ return (unsigned)val ^ 3;
185
+ # endif
186
+ } else {
187
+ # if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
188
+ unsigned long r;
189
+ _BitScanForward(&r, val);
190
+ return (unsigned)(r >> 3);
191
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
192
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
193
+ !defined(LZ4_FORCE_SW_BITCOUNT)
194
+ return (unsigned)__builtin_ctz(val) >> 3;
195
+ # else
196
+ const U32 m = 0x01010101;
197
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
198
+ # endif
199
+ }
200
+ }
201
+
202
+ /** LZ4HC_countBack() :
203
+ * @return : negative value, nb of common bytes before ip/match */
204
+ LZ4_FORCE_INLINE
205
+ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
206
+ const BYTE* const iMin, const BYTE* const mMin)
207
+ {
208
+ int back = 0;
209
+ int const min = (int)MAX(iMin - ip, mMin - match);
210
+ assert(min <= 0);
211
+ assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
212
+ assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
213
+
214
+ while ((back - min) > 3) {
215
+ U32 const v = LZ4_read32(ip + back - 4) ^ LZ4_read32(match + back - 4);
216
+ if (v) {
217
+ return (back - (int)LZ4HC_NbCommonBytes32(v));
218
+ } else back -= 4; /* 4-byte step */
219
+ }
220
+ /* check remainder if any */
221
+ while ( (back > min)
222
+ && (ip[back-1] == match[back-1]) )
223
+ back--;
224
+ return back;
225
+ }
226
+
227
+ /*=== Chain table updates ===*/
84
228
  #define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
85
229
  /* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
86
230
  #define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
87
231
 
88
- static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
89
-
90
232
 
91
233
  /**************************************
92
- * HC Compression
234
+ * Init
93
235
  **************************************/
94
236
  static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
95
237
  {
@@ -101,6 +243,7 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
101
243
  {
102
244
  size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart);
103
245
  size_t newStartingOffset = bufferSize + hc4->dictLimit;
246
+ DEBUGLOG(5, "LZ4HC_init_internal");
104
247
  assert(newStartingOffset >= bufferSize); /* check overflow */
105
248
  if (newStartingOffset > 1 GB) {
106
249
  LZ4HC_clearTables(hc4);
@@ -116,6 +259,524 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
116
259
  }
117
260
 
118
261
 
262
+ /**************************************
263
+ * Encode
264
+ **************************************/
265
+ /* LZ4HC_encodeSequence() :
266
+ * @return : 0 if ok,
267
+ * 1 if buffer issue detected */
268
+ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
269
+ const BYTE** _ip,
270
+ BYTE** _op,
271
+ const BYTE** _anchor,
272
+ int matchLength,
273
+ int offset,
274
+ limitedOutput_directive limit,
275
+ BYTE* oend)
276
+ {
277
+ #define ip (*_ip)
278
+ #define op (*_op)
279
+ #define anchor (*_anchor)
280
+
281
+ size_t length;
282
+ BYTE* const token = op++;
283
+
284
+ #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
285
+ static const BYTE* start = NULL;
286
+ static U32 totalCost = 0;
287
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
288
+ U32 const ll = (U32)(ip - anchor);
289
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
290
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
291
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
292
+ if (start==NULL) start = anchor; /* only works for single segment */
293
+ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
294
+ DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u",
295
+ pos,
296
+ (U32)(ip - anchor), matchLength, offset,
297
+ cost, totalCost);
298
+ totalCost += cost;
299
+ #endif
300
+
301
+ /* Encode Literal length */
302
+ length = (size_t)(ip - anchor);
303
+ LZ4_STATIC_ASSERT(notLimited == 0);
304
+ /* Check output limit */
305
+ if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
306
+ DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
307
+ (int)length, (int)(oend - op));
308
+ return 1;
309
+ }
310
+ if (length >= RUN_MASK) {
311
+ size_t len = length - RUN_MASK;
312
+ *token = (RUN_MASK << ML_BITS);
313
+ for(; len >= 255 ; len -= 255) *op++ = 255;
314
+ *op++ = (BYTE)len;
315
+ } else {
316
+ *token = (BYTE)(length << ML_BITS);
317
+ }
318
+
319
+ /* Copy Literals */
320
+ LZ4_wildCopy8(op, anchor, op + length);
321
+ op += length;
322
+
323
+ /* Encode Offset */
324
+ assert(offset <= LZ4_DISTANCE_MAX );
325
+ assert(offset > 0);
326
+ LZ4_writeLE16(op, (U16)(offset)); op += 2;
327
+
328
+ /* Encode MatchLength */
329
+ assert(matchLength >= MINMATCH);
330
+ length = (size_t)matchLength - MINMATCH;
331
+ if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
332
+ DEBUGLOG(6, "Not enough room to write match length");
333
+ return 1; /* Check output limit */
334
+ }
335
+ if (length >= ML_MASK) {
336
+ *token += ML_MASK;
337
+ length -= ML_MASK;
338
+ for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
339
+ if (length >= 255) { length -= 255; *op++ = 255; }
340
+ *op++ = (BYTE)length;
341
+ } else {
342
+ *token += (BYTE)(length);
343
+ }
344
+
345
+ /* Prepare next loop */
346
+ ip += matchLength;
347
+ anchor = ip;
348
+
349
+ return 0;
350
+
351
+ #undef ip
352
+ #undef op
353
+ #undef anchor
354
+ }
355
+
356
+
357
+ typedef struct {
358
+ int off;
359
+ int len;
360
+ int back; /* negative value */
361
+ } LZ4HC_match_t;
362
+
363
+ LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex,
364
+ const BYTE* const iLowLimit, const BYTE* const iHighLimit,
365
+ const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex,
366
+ int currentBestML, int nbAttempts)
367
+ {
368
+ size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
369
+ U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
370
+ U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
371
+ int offset = 0, sBack = 0;
372
+ assert(lDictEndIndex <= 1 GB);
373
+ if (lDictMatchIndex>0)
374
+ DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex);
375
+ while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
376
+ const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + lDictMatchIndex;
377
+
378
+ if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
379
+ int mlt;
380
+ int back = 0;
381
+ const BYTE* vLimit = ip + (lDictEndIndex - lDictMatchIndex);
382
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
383
+ mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
384
+ back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0;
385
+ mlt -= back;
386
+ if (mlt > currentBestML) {
387
+ currentBestML = mlt;
388
+ offset = (int)(ipIndex - matchIndex);
389
+ sBack = back;
390
+ DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML);
391
+ } }
392
+
393
+ { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex);
394
+ lDictMatchIndex -= nextOffset;
395
+ matchIndex -= nextOffset;
396
+ } }
397
+
398
+ { LZ4HC_match_t md;
399
+ md.len = currentBestML;
400
+ md.off = offset;
401
+ md.back = sBack;
402
+ return md;
403
+ }
404
+ }
405
+
406
+ typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)(const BYTE* ip, U32 ipIndex,
407
+ const BYTE* const iHighLimit,
408
+ const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex);
409
+
410
+ static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE* ip, U32 ipIndex,
411
+ const BYTE* const iHighLimit,
412
+ const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
413
+ {
414
+ return LZ4HC_searchExtDict(ip,ipIndex,
415
+ ip, iHighLimit,
416
+ dictCtx, gDictEndIndex,
417
+ MINMATCH-1, 2);
418
+ }
419
+
420
+ static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex,
421
+ const BYTE* const iHighLimit,
422
+ const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
423
+ {
424
+ size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
425
+ const U32* const hash4Table = dictCtx->hashTable;
426
+ const U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
427
+ DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex);
428
+
429
+ /* search long match first */
430
+ { U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)];
431
+ U32 m8Index = l8DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
432
+ assert(lDictEndIndex <= 1 GB);
433
+ if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) {
434
+ const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l8DictMatchIndex;
435
+ const size_t safeLen = MIN(lDictEndIndex - l8DictMatchIndex, (size_t)(iHighLimit - ip));
436
+ int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen);
437
+ if (mlt >= MINMATCH) {
438
+ LZ4HC_match_t md;
439
+ DEBUGLOG(7, "Found long ExtDict match of len=%u", mlt);
440
+ md.len = mlt;
441
+ md.off = (int)(ipIndex - m8Index);
442
+ md.back = 0;
443
+ return md;
444
+ }
445
+ }
446
+ }
447
+
448
+ /* search for short match second */
449
+ { U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)];
450
+ U32 m4Index = l4DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
451
+ if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) {
452
+ const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l4DictMatchIndex;
453
+ const size_t safeLen = MIN(lDictEndIndex - l4DictMatchIndex, (size_t)(iHighLimit - ip));
454
+ int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen);
455
+ if (mlt >= MINMATCH) {
456
+ LZ4HC_match_t md;
457
+ DEBUGLOG(7, "Found short ExtDict match of len=%u", mlt);
458
+ md.len = mlt;
459
+ md.off = (int)(ipIndex - m4Index);
460
+ md.back = 0;
461
+ return md;
462
+ }
463
+ }
464
+ }
465
+
466
+ /* nothing found */
467
+ { LZ4HC_match_t const md = {0, 0, 0 };
468
+ return md;
469
+ }
470
+ }
471
+
472
+ /**************************************
473
+ * Mid Compression (level 2)
474
+ **************************************/
475
+
476
+ LZ4_FORCE_INLINE void
477
+ LZ4MID_addPosition(U32* hTable, U32 hValue, U32 index)
478
+ {
479
+ hTable[hValue] = index;
480
+ }
481
+
482
+ #define ADDPOS8(_p, _idx) LZ4MID_addPosition(hash8Table, LZ4MID_hash8Ptr(_p), _idx)
483
+ #define ADDPOS4(_p, _idx) LZ4MID_addPosition(hash4Table, LZ4MID_hash4Ptr(_p), _idx)
484
+
485
+ /* Fill hash tables with references into dictionary.
486
+ * The resulting table is only exploitable by LZ4MID (level 2) */
487
+ static void
488
+ LZ4MID_fillHTable (LZ4HC_CCtx_internal* cctx, const void* dict, size_t size)
489
+ {
490
+ U32* const hash4Table = cctx->hashTable;
491
+ U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
492
+ const BYTE* const prefixPtr = (const BYTE*)dict;
493
+ U32 const prefixIdx = cctx->dictLimit;
494
+ U32 const target = prefixIdx + (U32)size - LZ4MID_HASHSIZE;
495
+ U32 idx = cctx->nextToUpdate;
496
+ assert(dict == cctx->prefixStart);
497
+ DEBUGLOG(4, "LZ4MID_fillHTable (size:%zu)", size);
498
+ if (size <= LZ4MID_HASHSIZE)
499
+ return;
500
+
501
+ for (; idx < target; idx += 3) {
502
+ ADDPOS4(prefixPtr+idx-prefixIdx, idx);
503
+ ADDPOS8(prefixPtr+idx+1-prefixIdx, idx+1);
504
+ }
505
+
506
+ idx = (size > 32 KB + LZ4MID_HASHSIZE) ? target - 32 KB : cctx->nextToUpdate;
507
+ for (; idx < target; idx += 1) {
508
+ ADDPOS8(prefixPtr+idx-prefixIdx, idx);
509
+ }
510
+
511
+ cctx->nextToUpdate = target;
512
+ }
513
+
514
+ static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_internal* dictCtx)
515
+ {
516
+ if (dictCtx == NULL) return NULL;
517
+ if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid)
518
+ return LZ4MID_searchExtDict;
519
+ return LZ4MID_searchHCDict;
520
+ }
521
+
522
+ static int LZ4MID_compress (
523
+ LZ4HC_CCtx_internal* const ctx,
524
+ const char* const src,
525
+ char* const dst,
526
+ int* srcSizePtr,
527
+ int const maxOutputSize,
528
+ const limitedOutput_directive limit,
529
+ const dictCtx_directive dict
530
+ )
531
+ {
532
+ U32* const hash4Table = ctx->hashTable;
533
+ U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
534
+ const BYTE* ip = (const BYTE*)src;
535
+ const BYTE* anchor = ip;
536
+ const BYTE* const iend = ip + *srcSizePtr;
537
+ const BYTE* const mflimit = iend - MFLIMIT;
538
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
539
+ const BYTE* const ilimit = (iend - LZ4MID_HASHSIZE);
540
+ BYTE* op = (BYTE*)dst;
541
+ BYTE* oend = op + maxOutputSize;
542
+
543
+ const BYTE* const prefixPtr = ctx->prefixStart;
544
+ const U32 prefixIdx = ctx->dictLimit;
545
+ const U32 ilimitIdx = (U32)(ilimit - prefixPtr) + prefixIdx;
546
+ const BYTE* const dictStart = ctx->dictStart;
547
+ const U32 dictIdx = ctx->lowLimit;
548
+ const U32 gDictEndIndex = ctx->lowLimit;
549
+ const LZ4MID_searchIntoDict_f searchIntoDict = (dict == usingDictCtxHc) ? select_searchDict_function(ctx->dictCtx) : NULL;
550
+ unsigned matchLength;
551
+ unsigned matchDistance;
552
+
553
+ /* input sanitization */
554
+ DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr);
555
+ if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc");
556
+ assert(*srcSizePtr >= 0);
557
+ if (*srcSizePtr) assert(src != NULL);
558
+ if (maxOutputSize) assert(dst != NULL);
559
+ if (*srcSizePtr < 0) return 0; /* invalid */
560
+ if (maxOutputSize < 0) return 0; /* invalid */
561
+ if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) {
562
+ /* forbidden: no input is allowed to be that large */
563
+ return 0;
564
+ }
565
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
566
+ if (*srcSizePtr < LZ4_minLength)
567
+ goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */
568
+
569
+ /* main loop */
570
+ while (ip <= mflimit) {
571
+ const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx;
572
+ /* search long match */
573
+ { U32 const h8 = LZ4MID_hash8Ptr(ip);
574
+ U32 const pos8 = hash8Table[h8];
575
+ assert(h8 < LZ4MID_HASHTABLESIZE);
576
+ assert(pos8 < ipIndex);
577
+ LZ4MID_addPosition(hash8Table, h8, ipIndex);
578
+ if (ipIndex - pos8 <= LZ4_DISTANCE_MAX) {
579
+ /* match candidate found */
580
+ if (pos8 >= prefixIdx) {
581
+ const BYTE* const matchPtr = prefixPtr + pos8 - prefixIdx;
582
+ assert(matchPtr < ip);
583
+ matchLength = LZ4_count(ip, matchPtr, matchlimit);
584
+ if (matchLength >= MINMATCH) {
585
+ DEBUGLOG(7, "found long match at pos %u (len=%u)", pos8, matchLength);
586
+ matchDistance = ipIndex - pos8;
587
+ goto _lz4mid_encode_sequence;
588
+ }
589
+ } else {
590
+ if (pos8 >= dictIdx) {
591
+ /* extDict match candidate */
592
+ const BYTE* const matchPtr = dictStart + (pos8 - dictIdx);
593
+ const size_t safeLen = MIN(prefixIdx - pos8, (size_t)(matchlimit - ip));
594
+ matchLength = LZ4_count(ip, matchPtr, ip + safeLen);
595
+ if (matchLength >= MINMATCH) {
596
+ DEBUGLOG(7, "found long match at ExtDict pos %u (len=%u)", pos8, matchLength);
597
+ matchDistance = ipIndex - pos8;
598
+ goto _lz4mid_encode_sequence;
599
+ }
600
+ }
601
+ }
602
+ } }
603
+ /* search short match */
604
+ { U32 const h4 = LZ4MID_hash4Ptr(ip);
605
+ U32 const pos4 = hash4Table[h4];
606
+ assert(h4 < LZ4MID_HASHTABLESIZE);
607
+ assert(pos4 < ipIndex);
608
+ LZ4MID_addPosition(hash4Table, h4, ipIndex);
609
+ if (ipIndex - pos4 <= LZ4_DISTANCE_MAX) {
610
+ /* match candidate found */
611
+ if (pos4 >= prefixIdx) {
612
+ /* only search within prefix */
613
+ const BYTE* const matchPtr = prefixPtr + (pos4 - prefixIdx);
614
+ assert(matchPtr < ip);
615
+ assert(matchPtr >= prefixPtr);
616
+ matchLength = LZ4_count(ip, matchPtr, matchlimit);
617
+ if (matchLength >= MINMATCH) {
618
+ /* short match found, let's just check ip+1 for longer */
619
+ U32 const h8 = LZ4MID_hash8Ptr(ip+1);
620
+ U32 const pos8 = hash8Table[h8];
621
+ U32 const m2Distance = ipIndex + 1 - pos8;
622
+ matchDistance = ipIndex - pos4;
623
+ if ( m2Distance <= LZ4_DISTANCE_MAX
624
+ && pos8 >= prefixIdx /* only search within prefix */
625
+ && likely(ip < mflimit)
626
+ ) {
627
+ const BYTE* const m2Ptr = prefixPtr + (pos8 - prefixIdx);
628
+ unsigned ml2 = LZ4_count(ip+1, m2Ptr, matchlimit);
629
+ if (ml2 > matchLength) {
630
+ LZ4MID_addPosition(hash8Table, h8, ipIndex+1);
631
+ ip++;
632
+ matchLength = ml2;
633
+ matchDistance = m2Distance;
634
+ } }
635
+ goto _lz4mid_encode_sequence;
636
+ }
637
+ } else {
638
+ if (pos4 >= dictIdx) {
639
+ /* extDict match candidate */
640
+ const BYTE* const matchPtr = dictStart + (pos4 - dictIdx);
641
+ const size_t safeLen = MIN(prefixIdx - pos4, (size_t)(matchlimit - ip));
642
+ matchLength = LZ4_count(ip, matchPtr, ip + safeLen);
643
+ if (matchLength >= MINMATCH) {
644
+ DEBUGLOG(7, "found match at ExtDict pos %u (len=%u)", pos4, matchLength);
645
+ matchDistance = ipIndex - pos4;
646
+ goto _lz4mid_encode_sequence;
647
+ }
648
+ }
649
+ }
650
+ } }
651
+ /* no match found in prefix */
652
+ if ( (dict == usingDictCtxHc)
653
+ && (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) {
654
+ /* search a match into external dictionary */
655
+ LZ4HC_match_t dMatch = searchIntoDict(ip, ipIndex,
656
+ matchlimit,
657
+ ctx->dictCtx, gDictEndIndex);
658
+ if (dMatch.len >= MINMATCH) {
659
+ DEBUGLOG(7, "found Dictionary match (offset=%i)", dMatch.off);
660
+ assert(dMatch.back == 0);
661
+ matchLength = (unsigned)dMatch.len;
662
+ matchDistance = (unsigned)dMatch.off;
663
+ goto _lz4mid_encode_sequence;
664
+ }
665
+ }
666
+ /* no match found */
667
+ ip += 1 + ((ip-anchor) >> 9); /* skip faster over incompressible data */
668
+ continue;
669
+
670
+ _lz4mid_encode_sequence:
671
+ /* catch back */
672
+ while (((ip > anchor) & ((U32)(ip-prefixPtr) > matchDistance)) && (unlikely(ip[-1] == ip[-(int)matchDistance-1]))) {
673
+ ip--; matchLength++;
674
+ };
675
+
676
+ /* fill table with beginning of match */
677
+ ADDPOS8(ip+1, ipIndex+1);
678
+ ADDPOS8(ip+2, ipIndex+2);
679
+ ADDPOS4(ip+1, ipIndex+1);
680
+
681
+ /* encode */
682
+ { BYTE* const saved_op = op;
683
+ /* LZ4HC_encodeSequence always updates @op; on success, it updates @ip and @anchor */
684
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
685
+ (int)matchLength, (int)matchDistance,
686
+ limit, oend) ) {
687
+ op = saved_op; /* restore @op value before failed LZ4HC_encodeSequence */
688
+ goto _lz4mid_dest_overflow;
689
+ }
690
+ }
691
+
692
+ /* fill table with end of match */
693
+ { U32 endMatchIdx = (U32)(ip-prefixPtr) + prefixIdx;
694
+ U32 pos_m2 = endMatchIdx - 2;
695
+ if (pos_m2 < ilimitIdx) {
696
+ if (likely(ip - prefixPtr > 5)) {
697
+ ADDPOS8(ip-5, endMatchIdx - 5);
698
+ }
699
+ ADDPOS8(ip-3, endMatchIdx - 3);
700
+ ADDPOS8(ip-2, endMatchIdx - 2);
701
+ ADDPOS4(ip-2, endMatchIdx - 2);
702
+ ADDPOS4(ip-1, endMatchIdx - 1);
703
+ }
704
+ }
705
+ }
706
+
707
+ _lz4mid_last_literals:
708
+ /* Encode Last Literals */
709
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
710
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
711
+ size_t const totalSize = 1 + llAdd + lastRunSize;
712
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
713
+ if (limit && (op + totalSize > oend)) {
714
+ if (limit == limitedOutput) return 0; /* not enough space in @dst */
715
+ /* adapt lastRunSize to fill 'dest' */
716
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
717
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
718
+ lastRunSize -= llAdd;
719
+ }
720
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
721
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
722
+
723
+ if (lastRunSize >= RUN_MASK) {
724
+ size_t accumulator = lastRunSize - RUN_MASK;
725
+ *op++ = (RUN_MASK << ML_BITS);
726
+ for(; accumulator >= 255 ; accumulator -= 255)
727
+ *op++ = 255;
728
+ *op++ = (BYTE) accumulator;
729
+ } else {
730
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
731
+ }
732
+ assert(lastRunSize <= (size_t)(oend - op));
733
+ LZ4_memcpy(op, anchor, lastRunSize);
734
+ op += lastRunSize;
735
+ }
736
+
737
+ /* End */
738
+ DEBUGLOG(5, "compressed %i bytes into %i bytes", *srcSizePtr, (int)((char*)op - dst));
739
+ assert(ip >= (const BYTE*)src);
740
+ assert(ip <= iend);
741
+ *srcSizePtr = (int)(ip - (const BYTE*)src);
742
+ assert((char*)op >= dst);
743
+ assert(op <= oend);
744
+ assert((char*)op - dst < INT_MAX);
745
+ return (int)((char*)op - dst);
746
+
747
+ _lz4mid_dest_overflow:
748
+ if (limit == fillOutput) {
749
+ /* Assumption : @ip, @anchor, @optr and @matchLength must be set correctly */
750
+ size_t const ll = (size_t)(ip - anchor);
751
+ size_t const ll_addbytes = (ll + 240) / 255;
752
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
753
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
754
+ DEBUGLOG(6, "Last sequence is overflowing : %u literals, %u remaining space",
755
+ (unsigned)ll, (unsigned)(oend-op));
756
+ if (op + ll_totalCost <= maxLitPos) {
757
+ /* ll validated; now adjust match length */
758
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
759
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
760
+ assert(maxMlSize < INT_MAX);
761
+ if ((size_t)matchLength > maxMlSize) matchLength= (unsigned)maxMlSize;
762
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + matchLength >= MFLIMIT) {
763
+ DEBUGLOG(6, "Let's encode a last sequence (ll=%u, ml=%u)", (unsigned)ll, matchLength);
764
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
765
+ (int)matchLength, (int)matchDistance,
766
+ notLimited, oend);
767
+ } }
768
+ DEBUGLOG(6, "Let's finish with a run of literals (%u bytes left)", (unsigned)(oend-op));
769
+ goto _lz4mid_last_literals;
770
+ }
771
+ /* compression failed */
772
+ return 0;
773
+ }
774
+
775
+
776
+ /**************************************
777
+ * HC Compression - Search
778
+ **************************************/
779
+
119
780
  /* Update chains up to ip (excluded) */
120
781
  LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
121
782
  {
@@ -130,31 +791,14 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
130
791
 
131
792
  while (idx < target) {
132
793
  U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx);
133
- size_t delta = idx - hashTable[h];
134
- if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
135
- DELTANEXTU16(chainTable, idx) = (U16)delta;
136
- hashTable[h] = idx;
137
- idx++;
138
- }
139
-
140
- hc4->nextToUpdate = target;
141
- }
142
-
143
- /** LZ4HC_countBack() :
144
- * @return : negative value, nb of common bytes before ip/match */
145
- LZ4_FORCE_INLINE
146
- int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
147
- const BYTE* const iMin, const BYTE* const mMin)
148
- {
149
- int back = 0;
150
- int const min = (int)MAX(iMin - ip, mMin - match);
151
- assert(min <= 0);
152
- assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
153
- assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
154
- while ( (back > min)
155
- && (ip[back-1] == match[back-1]) )
156
- back--;
157
- return back;
794
+ size_t delta = idx - hashTable[h];
795
+ if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
796
+ DELTANEXTU16(chainTable, idx) = (U16)delta;
797
+ hashTable[h] = idx;
798
+ idx++;
799
+ }
800
+
801
+ hc4->nextToUpdate = target;
158
802
  }
159
803
 
160
804
  #if defined(_MSC_VER)
@@ -236,22 +880,21 @@ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
236
880
  typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
237
881
  typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
238
882
 
239
- LZ4_FORCE_INLINE int
883
+
884
+ LZ4_FORCE_INLINE LZ4HC_match_t
240
885
  LZ4HC_InsertAndGetWiderMatch (
241
886
  LZ4HC_CCtx_internal* const hc4,
242
887
  const BYTE* const ip,
243
888
  const BYTE* const iLowLimit, const BYTE* const iHighLimit,
244
889
  int longest,
245
- const BYTE** matchpos,
246
- const BYTE** startpos,
247
890
  const int maxNbAttempts,
248
891
  const int patternAnalysis, const int chainSwap,
249
892
  const dictCtx_directive dict,
250
893
  const HCfavor_e favorDecSpeed)
251
894
  {
252
895
  U16* const chainTable = hc4->chainTable;
253
- U32* const HashTable = hc4->hashTable;
254
- const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
896
+ U32* const hashTable = hc4->hashTable;
897
+ const LZ4HC_CCtx_internal* const dictCtx = hc4->dictCtx;
255
898
  const BYTE* const prefixPtr = hc4->prefixStart;
256
899
  const U32 prefixIdx = hc4->dictLimit;
257
900
  const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx;
@@ -267,22 +910,24 @@ LZ4HC_InsertAndGetWiderMatch (
267
910
  U32 matchIndex;
268
911
  repeat_state_e repeat = rep_untested;
269
912
  size_t srcPatternLength = 0;
913
+ int offset = 0, sBack = 0;
270
914
 
271
915
  DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
272
916
  /* First Match */
273
- LZ4HC_Insert(hc4, ip);
274
- matchIndex = HashTable[LZ4HC_hashPtr(ip)];
275
- DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
276
- matchIndex, lowestMatchIndex);
917
+ LZ4HC_Insert(hc4, ip); /* insert all prior positions up to ip (excluded) */
918
+ matchIndex = hashTable[LZ4HC_hashPtr(ip)];
919
+ DEBUGLOG(7, "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)",
920
+ ipIndex, matchIndex, lowestMatchIndex);
277
921
 
278
922
  while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
279
923
  int matchLength=0;
280
924
  nbAttempts--;
281
925
  assert(matchIndex < ipIndex);
282
926
  if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
283
- /* do nothing */
927
+ /* do nothing:
928
+ * favorDecSpeed intentionally skips matches with offset < 8 */
284
929
  } else if (matchIndex >= prefixIdx) { /* within current Prefix */
285
- const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx;
930
+ const BYTE* const matchPtr = prefixPtr + (matchIndex - prefixIdx);
286
931
  assert(matchPtr < ip);
287
932
  assert(longest >= 1);
288
933
  if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
@@ -292,10 +937,11 @@ LZ4HC_InsertAndGetWiderMatch (
292
937
  matchLength -= back;
293
938
  if (matchLength > longest) {
294
939
  longest = matchLength;
295
- *matchpos = matchPtr + back;
296
- *startpos = ip + back;
940
+ offset = (int)(ipIndex - matchIndex);
941
+ sBack = back;
942
+ DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back);
297
943
  } } }
298
- } else { /* lowestMatchIndex <= matchIndex < dictLimit */
944
+ } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */
299
945
  const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx);
300
946
  assert(matchIndex >= dictIdx);
301
947
  if ( likely(matchIndex <= prefixIdx - 4)
@@ -310,8 +956,9 @@ LZ4HC_InsertAndGetWiderMatch (
310
956
  matchLength -= back;
311
957
  if (matchLength > longest) {
312
958
  longest = matchLength;
313
- *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
314
- *startpos = ip + back;
959
+ offset = (int)(ipIndex - matchIndex);
960
+ sBack = back;
961
+ DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back);
315
962
  } } }
316
963
 
317
964
  if (chainSwap && matchLength==longest) { /* better match => select a better chain */
@@ -344,6 +991,7 @@ LZ4HC_InsertAndGetWiderMatch (
344
991
  if (repeat == rep_untested) {
345
992
  if ( ((pattern & 0xFFFF) == (pattern >> 16))
346
993
  & ((pattern & 0xFF) == (pattern >> 24)) ) {
994
+ DEBUGLOG(7, "Repeat pattern detected, char %02X", pattern >> 24);
347
995
  repeat = rep_confirmed;
348
996
  srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
349
997
  } else {
@@ -352,7 +1000,7 @@ LZ4HC_InsertAndGetWiderMatch (
352
1000
  if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
353
1001
  && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) {
354
1002
  const int extDict = matchCandidateIdx < prefixIdx;
355
- const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx;
1003
+ const BYTE* const matchPtr = extDict ? dictStart + (matchCandidateIdx - dictIdx) : prefixPtr + (matchCandidateIdx - prefixIdx);
356
1004
  if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
357
1005
  const BYTE* const iLimit = extDict ? dictEnd : iHighLimit;
358
1006
  size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
@@ -398,8 +1046,9 @@ LZ4HC_InsertAndGetWiderMatch (
398
1046
  if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break;
399
1047
  assert(maxML < 2 GB);
400
1048
  longest = (int)maxML;
401
- *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
402
- *startpos = ip;
1049
+ offset = (int)(ipIndex - matchIndex);
1050
+ assert(sBack == 0);
1051
+ DEBUGLOG(7, "Found repeat pattern match of len=%i, offset=%i", longest, offset);
403
1052
  }
404
1053
  { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
405
1054
  if (distToNextPattern > matchIndex) break; /* avoid overflow */
@@ -416,11 +1065,12 @@ LZ4HC_InsertAndGetWiderMatch (
416
1065
 
417
1066
  if ( dict == usingDictCtxHc
418
1067
  && nbAttempts > 0
419
- && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
1068
+ && withinStartDistance) {
420
1069
  size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
421
1070
  U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
422
1071
  assert(dictEndOffset <= 1 GB);
423
1072
  matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
1073
+ if (dictMatchIndex>0) DEBUGLOG(7, "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", dictEndOffset, dictMatchIndex, (int)dictMatchIndex - (int)dictEndOffset);
424
1074
  while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
425
1075
  const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex;
426
1076
 
@@ -434,8 +1084,9 @@ LZ4HC_InsertAndGetWiderMatch (
434
1084
  mlt -= back;
435
1085
  if (mlt > longest) {
436
1086
  longest = mlt;
437
- *matchpos = prefixPtr - prefixIdx + matchIndex + back;
438
- *startpos = ip + back;
1087
+ offset = (int)(ipIndex - matchIndex);
1088
+ sBack = back;
1089
+ DEBUGLOG(7, "found match of length %i within extDictCtx", longest);
439
1090
  } }
440
1091
 
441
1092
  { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
@@ -443,112 +1094,29 @@ LZ4HC_InsertAndGetWiderMatch (
443
1094
  matchIndex -= nextOffset;
444
1095
  } } }
445
1096
 
446
- return longest;
1097
+ { LZ4HC_match_t md;
1098
+ assert(longest >= 0);
1099
+ md.len = longest;
1100
+ md.off = offset;
1101
+ md.back = sBack;
1102
+ return md;
1103
+ }
447
1104
  }
448
1105
 
449
- LZ4_FORCE_INLINE int
1106
+ LZ4_FORCE_INLINE LZ4HC_match_t
450
1107
  LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
451
1108
  const BYTE* const ip, const BYTE* const iLimit,
452
- const BYTE** matchpos,
453
1109
  const int maxNbAttempts,
454
1110
  const int patternAnalysis,
455
1111
  const dictCtx_directive dict)
456
1112
  {
457
- const BYTE* uselessPtr = ip;
1113
+ DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch");
458
1114
  /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
459
1115
  * but this won't be the case here, as we define iLowLimit==ip,
460
1116
  * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
461
- return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
1117
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
462
1118
  }
463
1119
 
464
- /* LZ4HC_encodeSequence() :
465
- * @return : 0 if ok,
466
- * 1 if buffer issue detected */
467
- LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
468
- const BYTE** _ip,
469
- BYTE** _op,
470
- const BYTE** _anchor,
471
- int matchLength,
472
- const BYTE* const match,
473
- limitedOutput_directive limit,
474
- BYTE* oend)
475
- {
476
- #define ip (*_ip)
477
- #define op (*_op)
478
- #define anchor (*_anchor)
479
-
480
- size_t length;
481
- BYTE* const token = op++;
482
-
483
- #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
484
- static const BYTE* start = NULL;
485
- static U32 totalCost = 0;
486
- U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
487
- U32 const ll = (U32)(ip - anchor);
488
- U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
489
- U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
490
- U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
491
- if (start==NULL) start = anchor; /* only works for single segment */
492
- /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
493
- DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
494
- pos,
495
- (U32)(ip - anchor), matchLength, (U32)(ip-match),
496
- cost, totalCost);
497
- totalCost += cost;
498
- #endif
499
-
500
- /* Encode Literal length */
501
- length = (size_t)(ip - anchor);
502
- LZ4_STATIC_ASSERT(notLimited == 0);
503
- /* Check output limit */
504
- if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
505
- DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
506
- (int)length, (int)(oend - op));
507
- return 1;
508
- }
509
- if (length >= RUN_MASK) {
510
- size_t len = length - RUN_MASK;
511
- *token = (RUN_MASK << ML_BITS);
512
- for(; len >= 255 ; len -= 255) *op++ = 255;
513
- *op++ = (BYTE)len;
514
- } else {
515
- *token = (BYTE)(length << ML_BITS);
516
- }
517
-
518
- /* Copy Literals */
519
- LZ4_wildCopy8(op, anchor, op + length);
520
- op += length;
521
-
522
- /* Encode Offset */
523
- assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
524
- LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
525
-
526
- /* Encode MatchLength */
527
- assert(matchLength >= MINMATCH);
528
- length = (size_t)matchLength - MINMATCH;
529
- if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
530
- DEBUGLOG(6, "Not enough room to write match length");
531
- return 1; /* Check output limit */
532
- }
533
- if (length >= ML_MASK) {
534
- *token += ML_MASK;
535
- length -= ML_MASK;
536
- for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
537
- if (length >= 255) { length -= 255; *op++ = 255; }
538
- *op++ = (BYTE)length;
539
- } else {
540
- *token += (BYTE)(length);
541
- }
542
-
543
- /* Prepare next loop */
544
- ip += matchLength;
545
- anchor = ip;
546
-
547
- return 0;
548
- }
549
- #undef ip
550
- #undef op
551
- #undef anchor
552
1120
 
553
1121
  LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
554
1122
  LZ4HC_CCtx_internal* const ctx,
@@ -574,127 +1142,130 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
574
1142
  BYTE* op = (BYTE*) dest;
575
1143
  BYTE* oend = op + maxOutputSize;
576
1144
 
577
- int ml0, ml, ml2, ml3;
578
1145
  const BYTE* start0;
579
- const BYTE* ref0;
580
- const BYTE* ref = NULL;
581
1146
  const BYTE* start2 = NULL;
582
- const BYTE* ref2 = NULL;
583
1147
  const BYTE* start3 = NULL;
584
- const BYTE* ref3 = NULL;
1148
+ LZ4HC_match_t m0, m1, m2, m3;
1149
+ const LZ4HC_match_t nomatch = {0, 0, 0};
585
1150
 
586
1151
  /* init */
1152
+ DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict);
587
1153
  *srcSizePtr = 0;
588
1154
  if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
589
1155
  if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
590
1156
 
591
1157
  /* Main Loop */
592
1158
  while (ip <= mflimit) {
593
- ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
594
- if (ml<MINMATCH) { ip++; continue; }
1159
+ m1 = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict);
1160
+ if (m1.len<MINMATCH) { ip++; continue; }
595
1161
 
596
1162
  /* saved, in case we would skip too much */
597
- start0 = ip; ref0 = ref; ml0 = ml;
1163
+ start0 = ip; m0 = m1;
598
1164
 
599
1165
  _Search2:
600
- if (ip+ml <= mflimit) {
601
- ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
602
- ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
1166
+ DEBUGLOG(7, "_Search2 (currently found match of size %i)", m1.len);
1167
+ if (ip+m1.len <= mflimit) {
1168
+ start2 = ip + m1.len - 2;
1169
+ m2 = LZ4HC_InsertAndGetWiderMatch(ctx,
1170
+ start2, ip + 0, matchlimit, m1.len,
603
1171
  maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
1172
+ start2 += m2.back;
604
1173
  } else {
605
- ml2 = ml;
1174
+ m2 = nomatch; /* do not search further */
606
1175
  }
607
1176
 
608
- if (ml2 == ml) { /* No better match => encode ML1 */
1177
+ if (m2.len <= m1.len) { /* No better match => encode ML1 immediately */
609
1178
  optr = op;
610
- if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
1179
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
1180
+ m1.len, m1.off,
1181
+ limit, oend) )
1182
+ goto _dest_overflow;
611
1183
  continue;
612
1184
  }
613
1185
 
614
1186
  if (start0 < ip) { /* first match was skipped at least once */
615
- if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
616
- ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
1187
+ if (start2 < ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */
1188
+ ip = start0; m1 = m0; /* restore initial Match1 */
617
1189
  } }
618
1190
 
619
1191
  /* Here, start0==ip */
620
1192
  if ((start2 - ip) < 3) { /* First Match too small : removed */
621
- ml = ml2;
622
1193
  ip = start2;
623
- ref =ref2;
1194
+ m1 = m2;
624
1195
  goto _Search2;
625
1196
  }
626
1197
 
627
1198
  _Search3:
628
- /* At this stage, we have :
629
- * ml2 > ml1, and
630
- * ip1+3 <= ip2 (usually < ip1+ml1) */
631
1199
  if ((start2 - ip) < OPTIMAL_ML) {
632
1200
  int correction;
633
- int new_ml = ml;
1201
+ int new_ml = m1.len;
634
1202
  if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
635
- if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
1203
+ if (ip+new_ml > start2 + m2.len - MINMATCH)
1204
+ new_ml = (int)(start2 - ip) + m2.len - MINMATCH;
636
1205
  correction = new_ml - (int)(start2 - ip);
637
1206
  if (correction > 0) {
638
1207
  start2 += correction;
639
- ref2 += correction;
640
- ml2 -= correction;
1208
+ m2.len -= correction;
641
1209
  }
642
1210
  }
643
- /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
644
1211
 
645
- if (start2 + ml2 <= mflimit) {
646
- ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
647
- start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
1212
+ if (start2 + m2.len <= mflimit) {
1213
+ start3 = start2 + m2.len - 3;
1214
+ m3 = LZ4HC_InsertAndGetWiderMatch(ctx,
1215
+ start3, start2, matchlimit, m2.len,
648
1216
  maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
1217
+ start3 += m3.back;
649
1218
  } else {
650
- ml3 = ml2;
1219
+ m3 = nomatch; /* do not search further */
651
1220
  }
652
1221
 
653
- if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
1222
+ if (m3.len <= m2.len) { /* No better match => encode ML1 and ML2 */
654
1223
  /* ip & ref are known; Now for ml */
655
- if (start2 < ip+ml) ml = (int)(start2 - ip);
1224
+ if (start2 < ip+m1.len) m1.len = (int)(start2 - ip);
656
1225
  /* Now, encode 2 sequences */
657
1226
  optr = op;
658
- if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
1227
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
1228
+ m1.len, m1.off,
1229
+ limit, oend) )
1230
+ goto _dest_overflow;
659
1231
  ip = start2;
660
1232
  optr = op;
661
- if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
662
- ml = ml2;
663
- ref = ref2;
1233
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
1234
+ m2.len, m2.off,
1235
+ limit, oend) ) {
1236
+ m1 = m2;
664
1237
  goto _dest_overflow;
665
1238
  }
666
1239
  continue;
667
1240
  }
668
1241
 
669
- if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
670
- if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
671
- if (start2 < ip+ml) {
672
- int correction = (int)(ip+ml - start2);
1242
+ if (start3 < ip+m1.len+3) { /* Not enough space for match 2 : remove it */
1243
+ if (start3 >= (ip+m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
1244
+ if (start2 < ip+m1.len) {
1245
+ int correction = (int)(ip+m1.len - start2);
673
1246
  start2 += correction;
674
- ref2 += correction;
675
- ml2 -= correction;
676
- if (ml2 < MINMATCH) {
1247
+ m2.len -= correction;
1248
+ if (m2.len < MINMATCH) {
677
1249
  start2 = start3;
678
- ref2 = ref3;
679
- ml2 = ml3;
1250
+ m2 = m3;
680
1251
  }
681
1252
  }
682
1253
 
683
1254
  optr = op;
684
- if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
1255
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
1256
+ m1.len, m1.off,
1257
+ limit, oend) )
1258
+ goto _dest_overflow;
685
1259
  ip = start3;
686
- ref = ref3;
687
- ml = ml3;
1260
+ m1 = m3;
688
1261
 
689
1262
  start0 = start2;
690
- ref0 = ref2;
691
- ml0 = ml2;
1263
+ m0 = m2;
692
1264
  goto _Search2;
693
1265
  }
694
1266
 
695
1267
  start2 = start3;
696
- ref2 = ref3;
697
- ml2 = ml3;
1268
+ m2 = m3;
698
1269
  goto _Search3;
699
1270
  }
700
1271
 
@@ -703,29 +1274,32 @@ _Search3:
703
1274
  * let's write the first one ML1.
704
1275
  * ip & ref are known; Now decide ml.
705
1276
  */
706
- if (start2 < ip+ml) {
1277
+ if (start2 < ip+m1.len) {
707
1278
  if ((start2 - ip) < OPTIMAL_ML) {
708
1279
  int correction;
709
- if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
710
- if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
711
- correction = ml - (int)(start2 - ip);
1280
+ if (m1.len > OPTIMAL_ML) m1.len = OPTIMAL_ML;
1281
+ if (ip + m1.len > start2 + m2.len - MINMATCH)
1282
+ m1.len = (int)(start2 - ip) + m2.len - MINMATCH;
1283
+ correction = m1.len - (int)(start2 - ip);
712
1284
  if (correction > 0) {
713
1285
  start2 += correction;
714
- ref2 += correction;
715
- ml2 -= correction;
1286
+ m2.len -= correction;
716
1287
  }
717
1288
  } else {
718
- ml = (int)(start2 - ip);
1289
+ m1.len = (int)(start2 - ip);
719
1290
  }
720
1291
  }
721
1292
  optr = op;
722
- if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
1293
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
1294
+ m1.len, m1.off,
1295
+ limit, oend) )
1296
+ goto _dest_overflow;
723
1297
 
724
1298
  /* ML2 becomes ML1 */
725
- ip = start2; ref = ref2; ml = ml2;
1299
+ ip = start2; m1 = m2;
726
1300
 
727
1301
  /* ML3 becomes ML2 */
728
- start2 = start3; ref2 = ref3; ml2 = ml3;
1302
+ start2 = start3; m2 = m3;
729
1303
 
730
1304
  /* let's find a new ML3 */
731
1305
  goto _Search3;
@@ -765,7 +1339,7 @@ _last_literals:
765
1339
 
766
1340
  _dest_overflow:
767
1341
  if (limit == fillOutput) {
768
- /* Assumption : ip, anchor, ml and ref must be set correctly */
1342
+ /* Assumption : @ip, @anchor, @optr and @m1 must be set correctly */
769
1343
  size_t const ll = (size_t)(ip - anchor);
770
1344
  size_t const ll_addbytes = (ll + 240) / 255;
771
1345
  size_t const ll_totalCost = 1 + ll_addbytes + ll;
@@ -776,10 +1350,10 @@ _dest_overflow:
776
1350
  /* ll validated; now adjust match length */
777
1351
  size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
778
1352
  size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
779
- assert(maxMlSize < INT_MAX); assert(ml >= 0);
780
- if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
781
- if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
782
- LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
1353
+ assert(maxMlSize < INT_MAX); assert(m1.len >= 0);
1354
+ if ((size_t)m1.len > maxMlSize) m1.len = (int)maxMlSize;
1355
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + m1.len >= MFLIMIT) {
1356
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, notLimited, oend);
783
1357
  } }
784
1358
  goto _last_literals;
785
1359
  }
@@ -796,54 +1370,34 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
796
1370
  const dictCtx_directive dict,
797
1371
  const HCfavor_e favorDecSpeed);
798
1372
 
799
-
800
- LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
801
- LZ4HC_CCtx_internal* const ctx,
802
- const char* const src,
803
- char* const dst,
804
- int* const srcSizePtr,
805
- int const dstCapacity,
806
- int cLevel,
807
- const limitedOutput_directive limit,
808
- const dictCtx_directive dict
809
- )
1373
+ LZ4_FORCE_INLINE int
1374
+ LZ4HC_compress_generic_internal (
1375
+ LZ4HC_CCtx_internal* const ctx,
1376
+ const char* const src,
1377
+ char* const dst,
1378
+ int* const srcSizePtr,
1379
+ int const dstCapacity,
1380
+ int cLevel,
1381
+ const limitedOutput_directive limit,
1382
+ const dictCtx_directive dict
1383
+ )
810
1384
  {
811
- typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
812
- typedef struct {
813
- lz4hc_strat_e strat;
814
- int nbSearches;
815
- U32 targetLength;
816
- } cParams_t;
817
- static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
818
- { lz4hc, 2, 16 }, /* 0, unused */
819
- { lz4hc, 2, 16 }, /* 1, unused */
820
- { lz4hc, 2, 16 }, /* 2, unused */
821
- { lz4hc, 4, 16 }, /* 3 */
822
- { lz4hc, 8, 16 }, /* 4 */
823
- { lz4hc, 16, 16 }, /* 5 */
824
- { lz4hc, 32, 16 }, /* 6 */
825
- { lz4hc, 64, 16 }, /* 7 */
826
- { lz4hc, 128, 16 }, /* 8 */
827
- { lz4hc, 256, 16 }, /* 9 */
828
- { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
829
- { lz4opt, 512,128 }, /*11 */
830
- { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
831
- };
832
-
833
- DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
834
- ctx, src, *srcSizePtr, limit);
1385
+ DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)",
1386
+ src, *srcSizePtr);
835
1387
 
836
1388
  if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
837
- if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
1389
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
838
1390
 
839
1391
  ctx->end += *srcSizePtr;
840
- if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
841
- cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
842
- { cParams_t const cParam = clTable[cLevel];
1392
+ { cParams_t const cParam = LZ4HC_getCLevelParams(cLevel);
843
1393
  HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
844
1394
  int result;
845
1395
 
846
- if (cParam.strat == lz4hc) {
1396
+ if (cParam.strat == lz4mid) {
1397
+ result = LZ4MID_compress(ctx,
1398
+ src, dst, srcSizePtr, dstCapacity,
1399
+ limit, dict);
1400
+ } else if (cParam.strat == lz4hc) {
847
1401
  result = LZ4HC_compress_hashChain(ctx,
848
1402
  src, dst, srcSizePtr, dstCapacity,
849
1403
  cParam.nbSearches, limit, dict);
@@ -852,7 +1406,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
852
1406
  result = LZ4HC_compress_optimal(ctx,
853
1407
  src, dst, srcSizePtr, dstCapacity,
854
1408
  cParam.nbSearches, cParam.targetLength, limit,
855
- cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
1409
+ cLevel >= LZ4HC_CLEVEL_MAX, /* ultra mode */
856
1410
  dict, favor);
857
1411
  }
858
1412
  if (result <= 0) ctx->dirty = 1;
@@ -877,6 +1431,13 @@ LZ4HC_compress_generic_noDictCtx (
877
1431
  return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
878
1432
  }
879
1433
 
1434
+ static int isStateCompatible(const LZ4HC_CCtx_internal* ctx1, const LZ4HC_CCtx_internal* ctx2)
1435
+ {
1436
+ int const isMid1 = LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid;
1437
+ int const isMid2 = LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid;
1438
+ return !(isMid1 ^ isMid2);
1439
+ }
1440
+
880
1441
  static int
881
1442
  LZ4HC_compress_generic_dictCtx (
882
1443
  LZ4HC_CCtx_internal* const ctx,
@@ -893,7 +1454,7 @@ LZ4HC_compress_generic_dictCtx (
893
1454
  if (position >= 64 KB) {
894
1455
  ctx->dictCtx = NULL;
895
1456
  return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
896
- } else if (position == 0 && *srcSizePtr > 4 KB) {
1457
+ } else if (position == 0 && *srcSizePtr > 4 KB && isStateCompatible(ctx, ctx->dictCtx)) {
897
1458
  LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
898
1459
  LZ4HC_setExternalDict(ctx, (const BYTE *)src);
899
1460
  ctx->compressionLevel = (short)cLevel;
@@ -965,6 +1526,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
965
1526
  LZ4_streamHC_t state;
966
1527
  LZ4_streamHC_t* const statePtr = &state;
967
1528
  #endif
1529
+ DEBUGLOG(5, "LZ4_compress_HC")
968
1530
  cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
969
1531
  #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
970
1532
  FREEMEM(statePtr);
@@ -1032,18 +1594,16 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
1032
1594
 
1033
1595
  void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
1034
1596
  {
1035
- DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
1036
- if (LZ4_streamHCPtr->internal_donotuse.dirty) {
1597
+ LZ4HC_CCtx_internal* const s = &LZ4_streamHCPtr->internal_donotuse;
1598
+ DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
1599
+ if (s->dirty) {
1037
1600
  LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
1038
1601
  } else {
1039
- /* preserve end - prefixStart : can trigger clearTable's threshold */
1040
- if (LZ4_streamHCPtr->internal_donotuse.end != NULL) {
1041
- LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.prefixStart;
1042
- } else {
1043
- assert(LZ4_streamHCPtr->internal_donotuse.prefixStart == NULL);
1044
- }
1045
- LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL;
1046
- LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
1602
+ assert(s->end >= s->prefixStart);
1603
+ s->dictLimit += (U32)(s->end - s->prefixStart);
1604
+ s->prefixStart = NULL;
1605
+ s->end = NULL;
1606
+ s->dictCtx = NULL;
1047
1607
  }
1048
1608
  LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
1049
1609
  }
@@ -1067,7 +1627,9 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
1067
1627
  const char* dictionary, int dictSize)
1068
1628
  {
1069
1629
  LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
1070
- DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
1630
+ cParams_t cp;
1631
+ DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", LZ4_streamHCPtr, dictionary, dictSize, ctxPtr->compressionLevel);
1632
+ assert(dictSize >= 0);
1071
1633
  assert(LZ4_streamHCPtr != NULL);
1072
1634
  if (dictSize > 64 KB) {
1073
1635
  dictionary += (size_t)dictSize - 64 KB;
@@ -1077,10 +1639,15 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
1077
1639
  { int const cLevel = ctxPtr->compressionLevel;
1078
1640
  LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
1079
1641
  LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
1642
+ cp = LZ4HC_getCLevelParams(cLevel);
1080
1643
  }
1081
1644
  LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
1082
1645
  ctxPtr->end = (const BYTE*)dictionary + dictSize;
1083
- if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
1646
+ if (cp.strat == lz4mid) {
1647
+ LZ4MID_fillHTable (ctxPtr, dictionary, (size_t)dictSize);
1648
+ } else {
1649
+ if (dictSize >= LZ4HC_HASHSIZE) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
1650
+ }
1084
1651
  return dictSize;
1085
1652
  }
1086
1653
 
@@ -1093,8 +1660,10 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC
1093
1660
  static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
1094
1661
  {
1095
1662
  DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
1096
- if (ctxPtr->end >= ctxPtr->prefixStart + 4)
1097
- LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
1663
+ if ( (ctxPtr->end >= ctxPtr->prefixStart + 4)
1664
+ && (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid) ) {
1665
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
1666
+ }
1098
1667
 
1099
1668
  /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
1100
1669
  ctxPtr->lowLimit = ctxPtr->dictLimit;
@@ -1119,7 +1688,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
1119
1688
  LZ4_streamHCPtr, src, *srcSizePtr, limit);
1120
1689
  assert(ctxPtr != NULL);
1121
1690
  /* auto-init if forgotten */
1122
- if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
1691
+ if (ctxPtr->prefixStart == NULL)
1692
+ LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
1123
1693
 
1124
1694
  /* Check overflow */
1125
1695
  if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) {
@@ -1140,7 +1710,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
1140
1710
  if (sourceEnd > dictEnd) sourceEnd = dictEnd;
1141
1711
  ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart);
1142
1712
  ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart);
1143
- if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) {
1713
+ /* invalidate dictionary is it's too small */
1714
+ if (ctxPtr->dictLimit - ctxPtr->lowLimit < LZ4HC_HASHSIZE) {
1144
1715
  ctxPtr->lowLimit = ctxPtr->dictLimit;
1145
1716
  ctxPtr->dictStart = ctxPtr->prefixStart;
1146
1717
  } } }
@@ -1150,6 +1721,7 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
1150
1721
 
1151
1722
  int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
1152
1723
  {
1724
+ DEBUGLOG(5, "LZ4_compress_HC_continue");
1153
1725
  if (dstCapacity < LZ4_compressBound(srcSize))
1154
1726
  return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
1155
1727
  else
@@ -1162,7 +1734,6 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch
1162
1734
  }
1163
1735
 
1164
1736
 
1165
-
1166
1737
  /* LZ4_saveDictHC :
1167
1738
  * save history content
1168
1739
  * into a user-provided buffer
@@ -1179,10 +1750,10 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
1179
1750
  if (dictSize > prefixSize) dictSize = prefixSize;
1180
1751
  if (safeBuffer == NULL) assert(dictSize == 0);
1181
1752
  if (dictSize > 0)
1182
- LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
1753
+ LZ4_memmove(safeBuffer, streamPtr->end - dictSize, (size_t)dictSize);
1183
1754
  { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit;
1184
- streamPtr->end = (const BYTE*)safeBuffer + dictSize;
1185
- streamPtr->prefixStart = streamPtr->end - dictSize;
1755
+ streamPtr->end = (safeBuffer == NULL) ? NULL : (const BYTE*)safeBuffer + dictSize;
1756
+ streamPtr->prefixStart = (const BYTE*)safeBuffer;
1186
1757
  streamPtr->dictLimit = endIndex - (U32)dictSize;
1187
1758
  streamPtr->lowLimit = endIndex - (U32)dictSize;
1188
1759
  streamPtr->dictStart = streamPtr->prefixStart;
@@ -1193,75 +1764,6 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
1193
1764
  }
1194
1765
 
1195
1766
 
1196
- /***************************************************
1197
- * Deprecated Functions
1198
- ***************************************************/
1199
-
1200
- /* These functions currently generate deprecation warnings */
1201
-
1202
- /* Wrappers for deprecated compression functions */
1203
- int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
1204
- int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
1205
- int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
1206
- int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
1207
- int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
1208
- int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
1209
- int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
1210
- int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
1211
- int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
1212
- int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
1213
-
1214
-
1215
- /* Deprecated streaming functions */
1216
- int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }
1217
-
1218
- /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
1219
- * @return : 0 on success, !=0 if error */
1220
- int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
1221
- {
1222
- LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
1223
- if (hc4 == NULL) return 1; /* init failed */
1224
- LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
1225
- return 0;
1226
- }
1227
-
1228
- #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1229
- void* LZ4_createHC (const char* inputBuffer)
1230
- {
1231
- LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
1232
- if (hc4 == NULL) return NULL; /* not enough memory */
1233
- LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
1234
- return hc4;
1235
- }
1236
-
1237
- int LZ4_freeHC (void* LZ4HC_Data)
1238
- {
1239
- if (!LZ4HC_Data) return 0; /* support free on NULL */
1240
- FREEMEM(LZ4HC_Data);
1241
- return 0;
1242
- }
1243
- #endif
1244
-
1245
- int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
1246
- {
1247
- return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
1248
- }
1249
-
1250
- int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
1251
- {
1252
- return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
1253
- }
1254
-
1255
- char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
1256
- {
1257
- LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data;
1258
- const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit;
1259
- LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
1260
- /* avoid const char * -> char * conversion warning :( */
1261
- return (char*)(uptrval)bufferStart;
1262
- }
1263
-
1264
-
1265
1767
  /* ================================================
1266
1768
  * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
1267
1769
  * ===============================================*/
@@ -1282,7 +1784,6 @@ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
1282
1784
  return price;
1283
1785
  }
1284
1786
 
1285
-
1286
1787
  /* requires mlen >= MINMATCH */
1287
1788
  LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
1288
1789
  {
@@ -1298,12 +1799,6 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
1298
1799
  return price;
1299
1800
  }
1300
1801
 
1301
-
1302
- typedef struct {
1303
- int off;
1304
- int len;
1305
- } LZ4HC_match_t;
1306
-
1307
1802
  LZ4_FORCE_INLINE LZ4HC_match_t
1308
1803
  LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
1309
1804
  const BYTE* ip, const BYTE* const iHighLimit,
@@ -1311,19 +1806,17 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
1311
1806
  const dictCtx_directive dict,
1312
1807
  const HCfavor_e favorDecSpeed)
1313
1808
  {
1314
- LZ4HC_match_t match = { 0 , 0 };
1315
- const BYTE* matchPtr = NULL;
1809
+ LZ4HC_match_t const match0 = { 0 , 0, 0 };
1316
1810
  /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
1317
1811
  * but this won't be the case here, as we define iLowLimit==ip,
1318
- * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
1319
- int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
1320
- if (matchLength <= minLen) return match;
1812
+ ** so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
1813
+ LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
1814
+ assert(md.back == 0);
1815
+ if (md.len <= minLen) return match0;
1321
1816
  if (favorDecSpeed) {
1322
- if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */
1817
+ if ((md.len>18) & (md.len<=36)) md.len=18; /* favor dec.speed (shortcut) */
1323
1818
  }
1324
- match.len = matchLength;
1325
- match.off = (int)(ip-matchPtr);
1326
- return match;
1819
+ return md;
1327
1820
  }
1328
1821
 
1329
1822
 
@@ -1356,7 +1849,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1356
1849
  BYTE* opSaved = (BYTE*) dst;
1357
1850
  BYTE* oend = op + dstCapacity;
1358
1851
  int ovml = MINMATCH; /* overflow - last sequence */
1359
- const BYTE* ovref = NULL;
1852
+ int ovoff = 0;
1360
1853
 
1361
1854
  /* init */
1362
1855
  #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
@@ -1379,11 +1872,10 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1379
1872
  if ((size_t)firstMatch.len > sufficient_len) {
1380
1873
  /* good enough solution : immediate encoding */
1381
1874
  int const firstML = firstMatch.len;
1382
- const BYTE* const matchPos = ip - firstMatch.off;
1383
1875
  opSaved = op;
1384
- if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
1876
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */
1385
1877
  ovml = firstML;
1386
- ovref = matchPos;
1878
+ ovoff = firstMatch.off;
1387
1879
  goto _dest_overflow;
1388
1880
  }
1389
1881
  continue;
@@ -1401,11 +1893,11 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1401
1893
  rPos, cost, opt[rPos].litlen);
1402
1894
  } }
1403
1895
  /* set prices using initial match */
1404
- { int mlen = MINMATCH;
1405
- int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
1896
+ { int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
1406
1897
  int const offset = firstMatch.off;
1898
+ int mlen;
1407
1899
  assert(matchML < LZ4_OPT_NUM);
1408
- for ( ; mlen <= matchML ; mlen++) {
1900
+ for (mlen = MINMATCH ; mlen <= matchML ; mlen++) {
1409
1901
  int const cost = LZ4HC_sequencePrice(llen, mlen);
1410
1902
  opt[mlen].mlen = mlen;
1411
1903
  opt[mlen].off = offset;
@@ -1557,9 +2049,9 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
1557
2049
  assert(ml >= MINMATCH);
1558
2050
  assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
1559
2051
  opSaved = op;
1560
- if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
2052
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */
1561
2053
  ovml = ml;
1562
- ovref = ip - offset;
2054
+ ovoff = offset;
1563
2055
  goto _dest_overflow;
1564
2056
  } } }
1565
2057
  } /* while (ip <= mflimit) */
@@ -1618,14 +2110,83 @@ if (limit == fillOutput) {
1618
2110
  if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
1619
2111
  DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
1620
2112
  DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
1621
- LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
2113
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovoff, notLimited, oend);
1622
2114
  DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
1623
2115
  } }
1624
2116
  goto _last_literals;
1625
2117
  }
1626
2118
  _return_label:
1627
2119
  #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
1628
- FREEMEM(opt);
2120
+ if (opt) FREEMEM(opt);
1629
2121
  #endif
1630
2122
  return retval;
1631
2123
  }
2124
+
2125
+
2126
+ /***************************************************
2127
+ * Deprecated Functions
2128
+ ***************************************************/
2129
+
2130
+ /* These functions currently generate deprecation warnings */
2131
+
2132
+ /* Wrappers for deprecated compression functions */
2133
+ int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
2134
+ int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
2135
+ int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
2136
+ int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
2137
+ int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
2138
+ int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
2139
+ int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
2140
+ int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
2141
+ int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
2142
+ int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
2143
+
2144
+
2145
+ /* Deprecated streaming functions */
2146
+ int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }
2147
+
2148
+ /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
2149
+ * @return : 0 on success, !=0 if error */
2150
+ int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
2151
+ {
2152
+ LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
2153
+ if (hc4 == NULL) return 1; /* init failed */
2154
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
2155
+ return 0;
2156
+ }
2157
+
2158
+ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2159
+ void* LZ4_createHC (const char* inputBuffer)
2160
+ {
2161
+ LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
2162
+ if (hc4 == NULL) return NULL; /* not enough memory */
2163
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
2164
+ return hc4;
2165
+ }
2166
+
2167
+ int LZ4_freeHC (void* LZ4HC_Data)
2168
+ {
2169
+ if (!LZ4HC_Data) return 0; /* support free on NULL */
2170
+ FREEMEM(LZ4HC_Data);
2171
+ return 0;
2172
+ }
2173
+ #endif
2174
+
2175
+ int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
2176
+ {
2177
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
2178
+ }
2179
+
2180
+ int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
2181
+ {
2182
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
2183
+ }
2184
+
2185
+ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
2186
+ {
2187
+ LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
2188
+ const BYTE* const bufferStart = s->prefixStart - s->dictLimit + s->lowLimit;
2189
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel);
2190
+ /* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */
2191
+ return (char*)(uptrval)bufferStart;
2192
+ }