extlz4 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +21 -3
- data/contrib/lz4/CODING_STYLE +57 -0
- data/contrib/lz4/LICENSE +1 -1
- data/contrib/lz4/Makefile.inc +17 -15
- data/contrib/lz4/NEWS +25 -0
- data/contrib/lz4/README.md +16 -5
- data/contrib/lz4/SECURITY.md +17 -0
- data/contrib/lz4/build/README.md +4 -15
- data/contrib/lz4/build/VS2022/_build.bat +39 -0
- data/contrib/lz4/build/VS2022/_setup.bat +35 -0
- data/contrib/lz4/build/VS2022/_test.bat +38 -0
- data/contrib/lz4/build/VS2022/build-and-test-win32-debug.bat +26 -0
- data/contrib/lz4/build/VS2022/build-and-test-win32-release.bat +26 -0
- data/contrib/lz4/build/VS2022/build-and-test-x64-debug.bat +26 -0
- data/contrib/lz4/build/VS2022/build-and-test-x64-release.bat +26 -0
- data/contrib/lz4/build/VS2022/datagen/datagen.vcxproj +7 -3
- data/contrib/lz4/build/{VS2017 → VS2022}/lz4/lz4.vcxproj +21 -7
- data/contrib/lz4/build/VS2022/lz4.sln +5 -2
- data/contrib/lz4/build/cmake/CMakeLists.txt +95 -100
- data/contrib/lz4/build/meson/GetLz4LibraryVersion.py +39 -0
- data/contrib/lz4/build/meson/README.md +34 -0
- data/contrib/lz4/build/meson/meson/contrib/gen_manual/meson.build +42 -0
- data/contrib/lz4/build/meson/meson/contrib/meson.build +11 -0
- data/contrib/lz4/build/meson/meson/examples/meson.build +32 -0
- data/contrib/lz4/build/meson/meson/lib/meson.build +87 -0
- data/contrib/lz4/build/meson/meson/meson.build +135 -0
- data/contrib/lz4/build/meson/meson/ossfuzz/meson.build +35 -0
- data/contrib/lz4/build/meson/meson/programs/meson.build +91 -0
- data/contrib/lz4/build/meson/meson/tests/meson.build +162 -0
- data/contrib/lz4/build/meson/meson.build +31 -0
- data/contrib/lz4/build/meson/meson_options.txt +44 -0
- data/contrib/lz4/build/visual/README.md +5 -0
- data/contrib/lz4/build/visual/generate_solution.cmd +55 -0
- data/contrib/lz4/build/visual/generate_vs2015.cmd +3 -0
- data/contrib/lz4/build/visual/generate_vs2017.cmd +3 -0
- data/contrib/lz4/build/visual/generate_vs2019.cmd +3 -0
- data/contrib/lz4/build/visual/generate_vs2022.cmd +3 -0
- data/contrib/lz4/lib/README.md +25 -1
- data/contrib/lz4/lib/lz4.c +206 -99
- data/contrib/lz4/lib/lz4.h +111 -69
- data/contrib/lz4/lib/lz4file.c +111 -81
- data/contrib/lz4/lib/lz4file.h +2 -2
- data/contrib/lz4/lib/lz4frame.c +179 -121
- data/contrib/lz4/lib/lz4frame.h +162 -103
- data/contrib/lz4/lib/lz4hc.c +943 -382
- data/contrib/lz4/lib/lz4hc.h +43 -42
- data/contrib/lz4/lib/xxhash.c +21 -21
- data/contrib/lz4/ossfuzz/decompress_fuzzer.c +1 -1
- data/contrib/lz4/ossfuzz/fuzz_helpers.h +1 -1
- data/ext/blockapi.c +11 -11
- data/ext/frameapi.c +23 -23
- metadata +34 -28
- data/contrib/lz4/build/VS2010/datagen/datagen.vcxproj +0 -169
- data/contrib/lz4/build/VS2010/frametest/frametest.vcxproj +0 -176
- data/contrib/lz4/build/VS2010/fullbench/fullbench.vcxproj +0 -176
- data/contrib/lz4/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +0 -180
- data/contrib/lz4/build/VS2010/fuzzer/fuzzer.vcxproj +0 -173
- data/contrib/lz4/build/VS2010/liblz4/liblz4.vcxproj +0 -175
- data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.rc +0 -51
- data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.vcxproj +0 -179
- data/contrib/lz4/build/VS2010/lz4/lz4.vcxproj +0 -189
- data/contrib/lz4/build/VS2010/lz4.sln +0 -98
- data/contrib/lz4/build/VS2017/datagen/datagen.vcxproj +0 -173
- data/contrib/lz4/build/VS2017/frametest/frametest.vcxproj +0 -180
- data/contrib/lz4/build/VS2017/fullbench/fullbench.vcxproj +0 -180
- data/contrib/lz4/build/VS2017/fullbench-dll/fullbench-dll.vcxproj +0 -184
- data/contrib/lz4/build/VS2017/fuzzer/fuzzer.vcxproj +0 -177
- data/contrib/lz4/build/VS2017/liblz4/liblz4.vcxproj +0 -179
- data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.rc +0 -51
- data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.vcxproj +0 -183
- data/contrib/lz4/build/VS2017/lz4/lz4.rc +0 -51
- data/contrib/lz4/build/VS2017/lz4.sln +0 -103
- /data/contrib/lz4/build/{VS2010 → VS2022}/lz4/lz4.rc +0 -0
data/contrib/lz4/lib/lz4hc.c
CHANGED
@@ -39,9 +39,10 @@
|
|
39
39
|
***************************************/
|
40
40
|
|
41
41
|
/*! HEAPMODE :
|
42
|
-
* Select how
|
43
|
-
*
|
44
|
-
*
|
42
|
+
* Select how stateless HC compression functions like `LZ4_compress_HC()`
|
43
|
+
* allocate memory for their workspace:
|
44
|
+
* in stack (0:fastest), or in heap (1:default, requires malloc()).
|
45
|
+
* Since workspace is rather large, heap mode is recommended.
|
45
46
|
**/
|
46
47
|
#ifndef LZ4HC_HEAPMODE
|
47
48
|
# define LZ4HC_HEAPMODE 1
|
@@ -51,19 +52,19 @@
|
|
51
52
|
/*=== Dependency ===*/
|
52
53
|
#define LZ4_HC_STATIC_LINKING_ONLY
|
53
54
|
#include "lz4hc.h"
|
55
|
+
#include <limits.h>
|
54
56
|
|
55
57
|
|
56
|
-
/*===
|
57
|
-
#
|
58
|
+
/*=== Shared lz4.c code ===*/
|
59
|
+
#ifndef LZ4_SRC_INCLUDED
|
60
|
+
# if defined(__GNUC__)
|
58
61
|
# pragma GCC diagnostic ignored "-Wunused-function"
|
59
|
-
#endif
|
60
|
-
#if defined (__clang__)
|
62
|
+
# endif
|
63
|
+
# if defined (__clang__)
|
61
64
|
# pragma clang diagnostic ignored "-Wunused-function"
|
62
|
-
#endif
|
63
|
-
|
64
|
-
#
|
65
|
-
#ifndef LZ4_SRC_INCLUDED
|
66
|
-
#include "lz4.c" /* LZ4_count, constants, mem */
|
65
|
+
# endif
|
66
|
+
# define LZ4_COMMONDEFS_ONLY
|
67
|
+
# include "lz4.c" /* LZ4_count, constants, mem */
|
67
68
|
#endif
|
68
69
|
|
69
70
|
|
@@ -79,17 +80,158 @@ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
|
|
79
80
|
/*=== Macros ===*/
|
80
81
|
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
|
81
82
|
#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
|
82
|
-
|
83
|
-
|
83
|
+
|
84
|
+
|
85
|
+
/*=== Levels definition ===*/
|
86
|
+
typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e;
|
87
|
+
typedef struct {
|
88
|
+
lz4hc_strat_e strat;
|
89
|
+
int nbSearches;
|
90
|
+
U32 targetLength;
|
91
|
+
} cParams_t;
|
92
|
+
static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = {
|
93
|
+
{ lz4mid, 2, 16 }, /* 0, unused */
|
94
|
+
{ lz4mid, 2, 16 }, /* 1, unused */
|
95
|
+
{ lz4mid, 2, 16 }, /* 2 */
|
96
|
+
{ lz4hc, 4, 16 }, /* 3 */
|
97
|
+
{ lz4hc, 8, 16 }, /* 4 */
|
98
|
+
{ lz4hc, 16, 16 }, /* 5 */
|
99
|
+
{ lz4hc, 32, 16 }, /* 6 */
|
100
|
+
{ lz4hc, 64, 16 }, /* 7 */
|
101
|
+
{ lz4hc, 128, 16 }, /* 8 */
|
102
|
+
{ lz4hc, 256, 16 }, /* 9 */
|
103
|
+
{ lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
|
104
|
+
{ lz4opt, 512,128 }, /*11 */
|
105
|
+
{ lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
|
106
|
+
};
|
107
|
+
|
108
|
+
static cParams_t LZ4HC_getCLevelParams(int cLevel)
|
109
|
+
{
|
110
|
+
/* note : clevel convention is a bit different from lz4frame,
|
111
|
+
* possibly something worth revisiting for consistency */
|
112
|
+
if (cLevel < 1)
|
113
|
+
cLevel = LZ4HC_CLEVEL_DEFAULT;
|
114
|
+
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
|
115
|
+
return k_clTable[cLevel];
|
116
|
+
}
|
117
|
+
|
118
|
+
|
119
|
+
/*=== Hashing ===*/
|
120
|
+
#define LZ4HC_HASHSIZE 4
|
121
|
+
#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
|
122
|
+
static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
|
123
|
+
|
124
|
+
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
|
125
|
+
/* lie to the compiler about data alignment; use with caution */
|
126
|
+
static U64 LZ4_read64(const void* memPtr) { return *(const U64*) memPtr; }
|
127
|
+
|
128
|
+
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
|
129
|
+
/* __pack instructions are safer, but compiler specific */
|
130
|
+
LZ4_PACK(typedef struct { U64 u64; }) LZ4_unalign64;
|
131
|
+
static U64 LZ4_read64(const void* ptr) { return ((const LZ4_unalign64*)ptr)->u64; }
|
132
|
+
|
133
|
+
#else /* safe and portable access using memcpy() */
|
134
|
+
static U64 LZ4_read64(const void* memPtr)
|
135
|
+
{
|
136
|
+
U64 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
|
137
|
+
}
|
138
|
+
|
139
|
+
#endif /* LZ4_FORCE_MEMORY_ACCESS */
|
140
|
+
|
141
|
+
#define LZ4MID_HASHSIZE 8
|
142
|
+
#define LZ4MID_HASHLOG (LZ4HC_HASH_LOG-1)
|
143
|
+
#define LZ4MID_HASHTABLESIZE (1 << LZ4MID_HASHLOG)
|
144
|
+
|
145
|
+
static U32 LZ4MID_hash4(U32 v) { return (v * 2654435761U) >> (32-LZ4MID_HASHLOG); }
|
146
|
+
static U32 LZ4MID_hash4Ptr(const void* ptr) { return LZ4MID_hash4(LZ4_read32(ptr)); }
|
147
|
+
/* note: hash7 hashes the lower 56-bits.
|
148
|
+
* It presumes input was read using little endian.*/
|
149
|
+
static U32 LZ4MID_hash7(U64 v) { return (U32)(((v << (64-56)) * 58295818150454627ULL) >> (64-LZ4MID_HASHLOG)) ; }
|
150
|
+
static U64 LZ4_readLE64(const void* memPtr);
|
151
|
+
static U32 LZ4MID_hash8Ptr(const void* ptr) { return LZ4MID_hash7(LZ4_readLE64(ptr)); }
|
152
|
+
|
153
|
+
static U64 LZ4_readLE64(const void* memPtr)
|
154
|
+
{
|
155
|
+
if (LZ4_isLittleEndian()) {
|
156
|
+
return LZ4_read64(memPtr);
|
157
|
+
} else {
|
158
|
+
const BYTE* p = (const BYTE*)memPtr;
|
159
|
+
/* note: relies on the compiler to simplify this expression */
|
160
|
+
return (U64)p[0] | ((U64)p[1]<<8) | ((U64)p[2]<<16) | ((U64)p[3]<<24)
|
161
|
+
| ((U64)p[4]<<32) | ((U64)p[5]<<40) | ((U64)p[6]<<48) | ((U64)p[7]<<56);
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
|
166
|
+
/*=== Count match length ===*/
|
167
|
+
LZ4_FORCE_INLINE
|
168
|
+
unsigned LZ4HC_NbCommonBytes32(U32 val)
|
169
|
+
{
|
170
|
+
assert(val != 0);
|
171
|
+
if (LZ4_isLittleEndian()) {
|
172
|
+
# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
173
|
+
unsigned long r;
|
174
|
+
_BitScanReverse(&r, val);
|
175
|
+
return (unsigned)((31 - r) >> 3);
|
176
|
+
# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
|
177
|
+
((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
|
178
|
+
!defined(LZ4_FORCE_SW_BITCOUNT)
|
179
|
+
return (unsigned)__builtin_clz(val) >> 3;
|
180
|
+
# else
|
181
|
+
val >>= 8;
|
182
|
+
val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
|
183
|
+
(val + 0x00FF0000)) >> 24;
|
184
|
+
return (unsigned)val ^ 3;
|
185
|
+
# endif
|
186
|
+
} else {
|
187
|
+
# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
188
|
+
unsigned long r;
|
189
|
+
_BitScanForward(&r, val);
|
190
|
+
return (unsigned)(r >> 3);
|
191
|
+
# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
|
192
|
+
((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
|
193
|
+
!defined(LZ4_FORCE_SW_BITCOUNT)
|
194
|
+
return (unsigned)__builtin_ctz(val) >> 3;
|
195
|
+
# else
|
196
|
+
const U32 m = 0x01010101;
|
197
|
+
return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
|
198
|
+
# endif
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
/** LZ4HC_countBack() :
|
203
|
+
* @return : negative value, nb of common bytes before ip/match */
|
204
|
+
LZ4_FORCE_INLINE
|
205
|
+
int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
206
|
+
const BYTE* const iMin, const BYTE* const mMin)
|
207
|
+
{
|
208
|
+
int back = 0;
|
209
|
+
int const min = (int)MAX(iMin - ip, mMin - match);
|
210
|
+
assert(min <= 0);
|
211
|
+
assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
|
212
|
+
assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
|
213
|
+
|
214
|
+
while ((back - min) > 3) {
|
215
|
+
U32 const v = LZ4_read32(ip + back - 4) ^ LZ4_read32(match + back - 4);
|
216
|
+
if (v) {
|
217
|
+
return (back - (int)LZ4HC_NbCommonBytes32(v));
|
218
|
+
} else back -= 4; /* 4-byte step */
|
219
|
+
}
|
220
|
+
/* check remainder if any */
|
221
|
+
while ( (back > min)
|
222
|
+
&& (ip[back-1] == match[back-1]) )
|
223
|
+
back--;
|
224
|
+
return back;
|
225
|
+
}
|
226
|
+
|
227
|
+
/*=== Chain table updates ===*/
|
84
228
|
#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
|
85
229
|
/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
|
86
230
|
#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
|
87
231
|
|
88
|
-
static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
|
89
|
-
|
90
232
|
|
91
233
|
/**************************************
|
92
|
-
*
|
234
|
+
* Init
|
93
235
|
**************************************/
|
94
236
|
static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
|
95
237
|
{
|
@@ -101,6 +243,7 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
|
101
243
|
{
|
102
244
|
size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart);
|
103
245
|
size_t newStartingOffset = bufferSize + hc4->dictLimit;
|
246
|
+
DEBUGLOG(5, "LZ4HC_init_internal");
|
104
247
|
assert(newStartingOffset >= bufferSize); /* check overflow */
|
105
248
|
if (newStartingOffset > 1 GB) {
|
106
249
|
LZ4HC_clearTables(hc4);
|
@@ -116,6 +259,524 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
|
116
259
|
}
|
117
260
|
|
118
261
|
|
262
|
+
/**************************************
|
263
|
+
* Encode
|
264
|
+
**************************************/
|
265
|
+
/* LZ4HC_encodeSequence() :
|
266
|
+
* @return : 0 if ok,
|
267
|
+
* 1 if buffer issue detected */
|
268
|
+
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
269
|
+
const BYTE** _ip,
|
270
|
+
BYTE** _op,
|
271
|
+
const BYTE** _anchor,
|
272
|
+
int matchLength,
|
273
|
+
int offset,
|
274
|
+
limitedOutput_directive limit,
|
275
|
+
BYTE* oend)
|
276
|
+
{
|
277
|
+
#define ip (*_ip)
|
278
|
+
#define op (*_op)
|
279
|
+
#define anchor (*_anchor)
|
280
|
+
|
281
|
+
size_t length;
|
282
|
+
BYTE* const token = op++;
|
283
|
+
|
284
|
+
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
|
285
|
+
static const BYTE* start = NULL;
|
286
|
+
static U32 totalCost = 0;
|
287
|
+
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
|
288
|
+
U32 const ll = (U32)(ip - anchor);
|
289
|
+
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
290
|
+
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
291
|
+
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
292
|
+
if (start==NULL) start = anchor; /* only works for single segment */
|
293
|
+
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
|
294
|
+
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u",
|
295
|
+
pos,
|
296
|
+
(U32)(ip - anchor), matchLength, offset,
|
297
|
+
cost, totalCost);
|
298
|
+
totalCost += cost;
|
299
|
+
#endif
|
300
|
+
|
301
|
+
/* Encode Literal length */
|
302
|
+
length = (size_t)(ip - anchor);
|
303
|
+
LZ4_STATIC_ASSERT(notLimited == 0);
|
304
|
+
/* Check output limit */
|
305
|
+
if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
|
306
|
+
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
|
307
|
+
(int)length, (int)(oend - op));
|
308
|
+
return 1;
|
309
|
+
}
|
310
|
+
if (length >= RUN_MASK) {
|
311
|
+
size_t len = length - RUN_MASK;
|
312
|
+
*token = (RUN_MASK << ML_BITS);
|
313
|
+
for(; len >= 255 ; len -= 255) *op++ = 255;
|
314
|
+
*op++ = (BYTE)len;
|
315
|
+
} else {
|
316
|
+
*token = (BYTE)(length << ML_BITS);
|
317
|
+
}
|
318
|
+
|
319
|
+
/* Copy Literals */
|
320
|
+
LZ4_wildCopy8(op, anchor, op + length);
|
321
|
+
op += length;
|
322
|
+
|
323
|
+
/* Encode Offset */
|
324
|
+
assert(offset <= LZ4_DISTANCE_MAX );
|
325
|
+
assert(offset > 0);
|
326
|
+
LZ4_writeLE16(op, (U16)(offset)); op += 2;
|
327
|
+
|
328
|
+
/* Encode MatchLength */
|
329
|
+
assert(matchLength >= MINMATCH);
|
330
|
+
length = (size_t)matchLength - MINMATCH;
|
331
|
+
if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
|
332
|
+
DEBUGLOG(6, "Not enough room to write match length");
|
333
|
+
return 1; /* Check output limit */
|
334
|
+
}
|
335
|
+
if (length >= ML_MASK) {
|
336
|
+
*token += ML_MASK;
|
337
|
+
length -= ML_MASK;
|
338
|
+
for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
|
339
|
+
if (length >= 255) { length -= 255; *op++ = 255; }
|
340
|
+
*op++ = (BYTE)length;
|
341
|
+
} else {
|
342
|
+
*token += (BYTE)(length);
|
343
|
+
}
|
344
|
+
|
345
|
+
/* Prepare next loop */
|
346
|
+
ip += matchLength;
|
347
|
+
anchor = ip;
|
348
|
+
|
349
|
+
return 0;
|
350
|
+
|
351
|
+
#undef ip
|
352
|
+
#undef op
|
353
|
+
#undef anchor
|
354
|
+
}
|
355
|
+
|
356
|
+
|
357
|
+
typedef struct {
|
358
|
+
int off;
|
359
|
+
int len;
|
360
|
+
int back; /* negative value */
|
361
|
+
} LZ4HC_match_t;
|
362
|
+
|
363
|
+
LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex,
|
364
|
+
const BYTE* const iLowLimit, const BYTE* const iHighLimit,
|
365
|
+
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex,
|
366
|
+
int currentBestML, int nbAttempts)
|
367
|
+
{
|
368
|
+
size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
|
369
|
+
U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
|
370
|
+
U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
|
371
|
+
int offset = 0, sBack = 0;
|
372
|
+
assert(lDictEndIndex <= 1 GB);
|
373
|
+
if (lDictMatchIndex>0)
|
374
|
+
DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex);
|
375
|
+
while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
|
376
|
+
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + lDictMatchIndex;
|
377
|
+
|
378
|
+
if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
|
379
|
+
int mlt;
|
380
|
+
int back = 0;
|
381
|
+
const BYTE* vLimit = ip + (lDictEndIndex - lDictMatchIndex);
|
382
|
+
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
383
|
+
mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
|
384
|
+
back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0;
|
385
|
+
mlt -= back;
|
386
|
+
if (mlt > currentBestML) {
|
387
|
+
currentBestML = mlt;
|
388
|
+
offset = (int)(ipIndex - matchIndex);
|
389
|
+
sBack = back;
|
390
|
+
DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML);
|
391
|
+
} }
|
392
|
+
|
393
|
+
{ U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex);
|
394
|
+
lDictMatchIndex -= nextOffset;
|
395
|
+
matchIndex -= nextOffset;
|
396
|
+
} }
|
397
|
+
|
398
|
+
{ LZ4HC_match_t md;
|
399
|
+
md.len = currentBestML;
|
400
|
+
md.off = offset;
|
401
|
+
md.back = sBack;
|
402
|
+
return md;
|
403
|
+
}
|
404
|
+
}
|
405
|
+
|
406
|
+
typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)(const BYTE* ip, U32 ipIndex,
|
407
|
+
const BYTE* const iHighLimit,
|
408
|
+
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex);
|
409
|
+
|
410
|
+
static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE* ip, U32 ipIndex,
|
411
|
+
const BYTE* const iHighLimit,
|
412
|
+
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
|
413
|
+
{
|
414
|
+
return LZ4HC_searchExtDict(ip,ipIndex,
|
415
|
+
ip, iHighLimit,
|
416
|
+
dictCtx, gDictEndIndex,
|
417
|
+
MINMATCH-1, 2);
|
418
|
+
}
|
419
|
+
|
420
|
+
static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex,
|
421
|
+
const BYTE* const iHighLimit,
|
422
|
+
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
|
423
|
+
{
|
424
|
+
size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
|
425
|
+
const U32* const hash4Table = dictCtx->hashTable;
|
426
|
+
const U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
|
427
|
+
DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex);
|
428
|
+
|
429
|
+
/* search long match first */
|
430
|
+
{ U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)];
|
431
|
+
U32 m8Index = l8DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
|
432
|
+
assert(lDictEndIndex <= 1 GB);
|
433
|
+
if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) {
|
434
|
+
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l8DictMatchIndex;
|
435
|
+
const size_t safeLen = MIN(lDictEndIndex - l8DictMatchIndex, (size_t)(iHighLimit - ip));
|
436
|
+
int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen);
|
437
|
+
if (mlt >= MINMATCH) {
|
438
|
+
LZ4HC_match_t md;
|
439
|
+
DEBUGLOG(7, "Found long ExtDict match of len=%u", mlt);
|
440
|
+
md.len = mlt;
|
441
|
+
md.off = (int)(ipIndex - m8Index);
|
442
|
+
md.back = 0;
|
443
|
+
return md;
|
444
|
+
}
|
445
|
+
}
|
446
|
+
}
|
447
|
+
|
448
|
+
/* search for short match second */
|
449
|
+
{ U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)];
|
450
|
+
U32 m4Index = l4DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
|
451
|
+
if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) {
|
452
|
+
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l4DictMatchIndex;
|
453
|
+
const size_t safeLen = MIN(lDictEndIndex - l4DictMatchIndex, (size_t)(iHighLimit - ip));
|
454
|
+
int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen);
|
455
|
+
if (mlt >= MINMATCH) {
|
456
|
+
LZ4HC_match_t md;
|
457
|
+
DEBUGLOG(7, "Found short ExtDict match of len=%u", mlt);
|
458
|
+
md.len = mlt;
|
459
|
+
md.off = (int)(ipIndex - m4Index);
|
460
|
+
md.back = 0;
|
461
|
+
return md;
|
462
|
+
}
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
/* nothing found */
|
467
|
+
{ LZ4HC_match_t const md = {0, 0, 0 };
|
468
|
+
return md;
|
469
|
+
}
|
470
|
+
}
|
471
|
+
|
472
|
+
/**************************************
|
473
|
+
* Mid Compression (level 2)
|
474
|
+
**************************************/
|
475
|
+
|
476
|
+
LZ4_FORCE_INLINE void
|
477
|
+
LZ4MID_addPosition(U32* hTable, U32 hValue, U32 index)
|
478
|
+
{
|
479
|
+
hTable[hValue] = index;
|
480
|
+
}
|
481
|
+
|
482
|
+
#define ADDPOS8(_p, _idx) LZ4MID_addPosition(hash8Table, LZ4MID_hash8Ptr(_p), _idx)
|
483
|
+
#define ADDPOS4(_p, _idx) LZ4MID_addPosition(hash4Table, LZ4MID_hash4Ptr(_p), _idx)
|
484
|
+
|
485
|
+
/* Fill hash tables with references into dictionary.
|
486
|
+
* The resulting table is only exploitable by LZ4MID (level 2) */
|
487
|
+
static void
|
488
|
+
LZ4MID_fillHTable (LZ4HC_CCtx_internal* cctx, const void* dict, size_t size)
|
489
|
+
{
|
490
|
+
U32* const hash4Table = cctx->hashTable;
|
491
|
+
U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
|
492
|
+
const BYTE* const prefixPtr = (const BYTE*)dict;
|
493
|
+
U32 const prefixIdx = cctx->dictLimit;
|
494
|
+
U32 const target = prefixIdx + (U32)size - LZ4MID_HASHSIZE;
|
495
|
+
U32 idx = cctx->nextToUpdate;
|
496
|
+
assert(dict == cctx->prefixStart);
|
497
|
+
DEBUGLOG(4, "LZ4MID_fillHTable (size:%zu)", size);
|
498
|
+
if (size <= LZ4MID_HASHSIZE)
|
499
|
+
return;
|
500
|
+
|
501
|
+
for (; idx < target; idx += 3) {
|
502
|
+
ADDPOS4(prefixPtr+idx-prefixIdx, idx);
|
503
|
+
ADDPOS8(prefixPtr+idx+1-prefixIdx, idx+1);
|
504
|
+
}
|
505
|
+
|
506
|
+
idx = (size > 32 KB + LZ4MID_HASHSIZE) ? target - 32 KB : cctx->nextToUpdate;
|
507
|
+
for (; idx < target; idx += 1) {
|
508
|
+
ADDPOS8(prefixPtr+idx-prefixIdx, idx);
|
509
|
+
}
|
510
|
+
|
511
|
+
cctx->nextToUpdate = target;
|
512
|
+
}
|
513
|
+
|
514
|
+
static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_internal* dictCtx)
|
515
|
+
{
|
516
|
+
if (dictCtx == NULL) return NULL;
|
517
|
+
if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid)
|
518
|
+
return LZ4MID_searchExtDict;
|
519
|
+
return LZ4MID_searchHCDict;
|
520
|
+
}
|
521
|
+
|
522
|
+
static int LZ4MID_compress (
|
523
|
+
LZ4HC_CCtx_internal* const ctx,
|
524
|
+
const char* const src,
|
525
|
+
char* const dst,
|
526
|
+
int* srcSizePtr,
|
527
|
+
int const maxOutputSize,
|
528
|
+
const limitedOutput_directive limit,
|
529
|
+
const dictCtx_directive dict
|
530
|
+
)
|
531
|
+
{
|
532
|
+
U32* const hash4Table = ctx->hashTable;
|
533
|
+
U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE;
|
534
|
+
const BYTE* ip = (const BYTE*)src;
|
535
|
+
const BYTE* anchor = ip;
|
536
|
+
const BYTE* const iend = ip + *srcSizePtr;
|
537
|
+
const BYTE* const mflimit = iend - MFLIMIT;
|
538
|
+
const BYTE* const matchlimit = (iend - LASTLITERALS);
|
539
|
+
const BYTE* const ilimit = (iend - LZ4MID_HASHSIZE);
|
540
|
+
BYTE* op = (BYTE*)dst;
|
541
|
+
BYTE* oend = op + maxOutputSize;
|
542
|
+
|
543
|
+
const BYTE* const prefixPtr = ctx->prefixStart;
|
544
|
+
const U32 prefixIdx = ctx->dictLimit;
|
545
|
+
const U32 ilimitIdx = (U32)(ilimit - prefixPtr) + prefixIdx;
|
546
|
+
const BYTE* const dictStart = ctx->dictStart;
|
547
|
+
const U32 dictIdx = ctx->lowLimit;
|
548
|
+
const U32 gDictEndIndex = ctx->lowLimit;
|
549
|
+
const LZ4MID_searchIntoDict_f searchIntoDict = (dict == usingDictCtxHc) ? select_searchDict_function(ctx->dictCtx) : NULL;
|
550
|
+
unsigned matchLength;
|
551
|
+
unsigned matchDistance;
|
552
|
+
|
553
|
+
/* input sanitization */
|
554
|
+
DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr);
|
555
|
+
if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc");
|
556
|
+
assert(*srcSizePtr >= 0);
|
557
|
+
if (*srcSizePtr) assert(src != NULL);
|
558
|
+
if (maxOutputSize) assert(dst != NULL);
|
559
|
+
if (*srcSizePtr < 0) return 0; /* invalid */
|
560
|
+
if (maxOutputSize < 0) return 0; /* invalid */
|
561
|
+
if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) {
|
562
|
+
/* forbidden: no input is allowed to be that large */
|
563
|
+
return 0;
|
564
|
+
}
|
565
|
+
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
566
|
+
if (*srcSizePtr < LZ4_minLength)
|
567
|
+
goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */
|
568
|
+
|
569
|
+
/* main loop */
|
570
|
+
while (ip <= mflimit) {
|
571
|
+
const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx;
|
572
|
+
/* search long match */
|
573
|
+
{ U32 const h8 = LZ4MID_hash8Ptr(ip);
|
574
|
+
U32 const pos8 = hash8Table[h8];
|
575
|
+
assert(h8 < LZ4MID_HASHTABLESIZE);
|
576
|
+
assert(pos8 < ipIndex);
|
577
|
+
LZ4MID_addPosition(hash8Table, h8, ipIndex);
|
578
|
+
if (ipIndex - pos8 <= LZ4_DISTANCE_MAX) {
|
579
|
+
/* match candidate found */
|
580
|
+
if (pos8 >= prefixIdx) {
|
581
|
+
const BYTE* const matchPtr = prefixPtr + pos8 - prefixIdx;
|
582
|
+
assert(matchPtr < ip);
|
583
|
+
matchLength = LZ4_count(ip, matchPtr, matchlimit);
|
584
|
+
if (matchLength >= MINMATCH) {
|
585
|
+
DEBUGLOG(7, "found long match at pos %u (len=%u)", pos8, matchLength);
|
586
|
+
matchDistance = ipIndex - pos8;
|
587
|
+
goto _lz4mid_encode_sequence;
|
588
|
+
}
|
589
|
+
} else {
|
590
|
+
if (pos8 >= dictIdx) {
|
591
|
+
/* extDict match candidate */
|
592
|
+
const BYTE* const matchPtr = dictStart + (pos8 - dictIdx);
|
593
|
+
const size_t safeLen = MIN(prefixIdx - pos8, (size_t)(matchlimit - ip));
|
594
|
+
matchLength = LZ4_count(ip, matchPtr, ip + safeLen);
|
595
|
+
if (matchLength >= MINMATCH) {
|
596
|
+
DEBUGLOG(7, "found long match at ExtDict pos %u (len=%u)", pos8, matchLength);
|
597
|
+
matchDistance = ipIndex - pos8;
|
598
|
+
goto _lz4mid_encode_sequence;
|
599
|
+
}
|
600
|
+
}
|
601
|
+
}
|
602
|
+
} }
|
603
|
+
/* search short match */
|
604
|
+
{ U32 const h4 = LZ4MID_hash4Ptr(ip);
|
605
|
+
U32 const pos4 = hash4Table[h4];
|
606
|
+
assert(h4 < LZ4MID_HASHTABLESIZE);
|
607
|
+
assert(pos4 < ipIndex);
|
608
|
+
LZ4MID_addPosition(hash4Table, h4, ipIndex);
|
609
|
+
if (ipIndex - pos4 <= LZ4_DISTANCE_MAX) {
|
610
|
+
/* match candidate found */
|
611
|
+
if (pos4 >= prefixIdx) {
|
612
|
+
/* only search within prefix */
|
613
|
+
const BYTE* const matchPtr = prefixPtr + (pos4 - prefixIdx);
|
614
|
+
assert(matchPtr < ip);
|
615
|
+
assert(matchPtr >= prefixPtr);
|
616
|
+
matchLength = LZ4_count(ip, matchPtr, matchlimit);
|
617
|
+
if (matchLength >= MINMATCH) {
|
618
|
+
/* short match found, let's just check ip+1 for longer */
|
619
|
+
U32 const h8 = LZ4MID_hash8Ptr(ip+1);
|
620
|
+
U32 const pos8 = hash8Table[h8];
|
621
|
+
U32 const m2Distance = ipIndex + 1 - pos8;
|
622
|
+
matchDistance = ipIndex - pos4;
|
623
|
+
if ( m2Distance <= LZ4_DISTANCE_MAX
|
624
|
+
&& pos8 >= prefixIdx /* only search within prefix */
|
625
|
+
&& likely(ip < mflimit)
|
626
|
+
) {
|
627
|
+
const BYTE* const m2Ptr = prefixPtr + (pos8 - prefixIdx);
|
628
|
+
unsigned ml2 = LZ4_count(ip+1, m2Ptr, matchlimit);
|
629
|
+
if (ml2 > matchLength) {
|
630
|
+
LZ4MID_addPosition(hash8Table, h8, ipIndex+1);
|
631
|
+
ip++;
|
632
|
+
matchLength = ml2;
|
633
|
+
matchDistance = m2Distance;
|
634
|
+
} }
|
635
|
+
goto _lz4mid_encode_sequence;
|
636
|
+
}
|
637
|
+
} else {
|
638
|
+
if (pos4 >= dictIdx) {
|
639
|
+
/* extDict match candidate */
|
640
|
+
const BYTE* const matchPtr = dictStart + (pos4 - dictIdx);
|
641
|
+
const size_t safeLen = MIN(prefixIdx - pos4, (size_t)(matchlimit - ip));
|
642
|
+
matchLength = LZ4_count(ip, matchPtr, ip + safeLen);
|
643
|
+
if (matchLength >= MINMATCH) {
|
644
|
+
DEBUGLOG(7, "found match at ExtDict pos %u (len=%u)", pos4, matchLength);
|
645
|
+
matchDistance = ipIndex - pos4;
|
646
|
+
goto _lz4mid_encode_sequence;
|
647
|
+
}
|
648
|
+
}
|
649
|
+
}
|
650
|
+
} }
|
651
|
+
/* no match found in prefix */
|
652
|
+
if ( (dict == usingDictCtxHc)
|
653
|
+
&& (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) {
|
654
|
+
/* search a match into external dictionary */
|
655
|
+
LZ4HC_match_t dMatch = searchIntoDict(ip, ipIndex,
|
656
|
+
matchlimit,
|
657
|
+
ctx->dictCtx, gDictEndIndex);
|
658
|
+
if (dMatch.len >= MINMATCH) {
|
659
|
+
DEBUGLOG(7, "found Dictionary match (offset=%i)", dMatch.off);
|
660
|
+
assert(dMatch.back == 0);
|
661
|
+
matchLength = (unsigned)dMatch.len;
|
662
|
+
matchDistance = (unsigned)dMatch.off;
|
663
|
+
goto _lz4mid_encode_sequence;
|
664
|
+
}
|
665
|
+
}
|
666
|
+
/* no match found */
|
667
|
+
ip += 1 + ((ip-anchor) >> 9); /* skip faster over incompressible data */
|
668
|
+
continue;
|
669
|
+
|
670
|
+
_lz4mid_encode_sequence:
|
671
|
+
/* catch back */
|
672
|
+
while (((ip > anchor) & ((U32)(ip-prefixPtr) > matchDistance)) && (unlikely(ip[-1] == ip[-(int)matchDistance-1]))) {
|
673
|
+
ip--; matchLength++;
|
674
|
+
};
|
675
|
+
|
676
|
+
/* fill table with beginning of match */
|
677
|
+
ADDPOS8(ip+1, ipIndex+1);
|
678
|
+
ADDPOS8(ip+2, ipIndex+2);
|
679
|
+
ADDPOS4(ip+1, ipIndex+1);
|
680
|
+
|
681
|
+
/* encode */
|
682
|
+
{ BYTE* const saved_op = op;
|
683
|
+
/* LZ4HC_encodeSequence always updates @op; on success, it updates @ip and @anchor */
|
684
|
+
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
685
|
+
(int)matchLength, (int)matchDistance,
|
686
|
+
limit, oend) ) {
|
687
|
+
op = saved_op; /* restore @op value before failed LZ4HC_encodeSequence */
|
688
|
+
goto _lz4mid_dest_overflow;
|
689
|
+
}
|
690
|
+
}
|
691
|
+
|
692
|
+
/* fill table with end of match */
|
693
|
+
{ U32 endMatchIdx = (U32)(ip-prefixPtr) + prefixIdx;
|
694
|
+
U32 pos_m2 = endMatchIdx - 2;
|
695
|
+
if (pos_m2 < ilimitIdx) {
|
696
|
+
if (likely(ip - prefixPtr > 5)) {
|
697
|
+
ADDPOS8(ip-5, endMatchIdx - 5);
|
698
|
+
}
|
699
|
+
ADDPOS8(ip-3, endMatchIdx - 3);
|
700
|
+
ADDPOS8(ip-2, endMatchIdx - 2);
|
701
|
+
ADDPOS4(ip-2, endMatchIdx - 2);
|
702
|
+
ADDPOS4(ip-1, endMatchIdx - 1);
|
703
|
+
}
|
704
|
+
}
|
705
|
+
}
|
706
|
+
|
707
|
+
_lz4mid_last_literals:
|
708
|
+
/* Encode Last Literals */
|
709
|
+
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
|
710
|
+
size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
|
711
|
+
size_t const totalSize = 1 + llAdd + lastRunSize;
|
712
|
+
if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
|
713
|
+
if (limit && (op + totalSize > oend)) {
|
714
|
+
if (limit == limitedOutput) return 0; /* not enough space in @dst */
|
715
|
+
/* adapt lastRunSize to fill 'dest' */
|
716
|
+
lastRunSize = (size_t)(oend - op) - 1 /*token*/;
|
717
|
+
llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
|
718
|
+
lastRunSize -= llAdd;
|
719
|
+
}
|
720
|
+
DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
|
721
|
+
ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
|
722
|
+
|
723
|
+
if (lastRunSize >= RUN_MASK) {
|
724
|
+
size_t accumulator = lastRunSize - RUN_MASK;
|
725
|
+
*op++ = (RUN_MASK << ML_BITS);
|
726
|
+
for(; accumulator >= 255 ; accumulator -= 255)
|
727
|
+
*op++ = 255;
|
728
|
+
*op++ = (BYTE) accumulator;
|
729
|
+
} else {
|
730
|
+
*op++ = (BYTE)(lastRunSize << ML_BITS);
|
731
|
+
}
|
732
|
+
assert(lastRunSize <= (size_t)(oend - op));
|
733
|
+
LZ4_memcpy(op, anchor, lastRunSize);
|
734
|
+
op += lastRunSize;
|
735
|
+
}
|
736
|
+
|
737
|
+
/* End */
|
738
|
+
DEBUGLOG(5, "compressed %i bytes into %i bytes", *srcSizePtr, (int)((char*)op - dst));
|
739
|
+
assert(ip >= (const BYTE*)src);
|
740
|
+
assert(ip <= iend);
|
741
|
+
*srcSizePtr = (int)(ip - (const BYTE*)src);
|
742
|
+
assert((char*)op >= dst);
|
743
|
+
assert(op <= oend);
|
744
|
+
assert((char*)op - dst < INT_MAX);
|
745
|
+
return (int)((char*)op - dst);
|
746
|
+
|
747
|
+
_lz4mid_dest_overflow:
|
748
|
+
if (limit == fillOutput) {
|
749
|
+
/* Assumption : @ip, @anchor, @optr and @matchLength must be set correctly */
|
750
|
+
size_t const ll = (size_t)(ip - anchor);
|
751
|
+
size_t const ll_addbytes = (ll + 240) / 255;
|
752
|
+
size_t const ll_totalCost = 1 + ll_addbytes + ll;
|
753
|
+
BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
|
754
|
+
DEBUGLOG(6, "Last sequence is overflowing : %u literals, %u remaining space",
|
755
|
+
(unsigned)ll, (unsigned)(oend-op));
|
756
|
+
if (op + ll_totalCost <= maxLitPos) {
|
757
|
+
/* ll validated; now adjust match length */
|
758
|
+
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
|
759
|
+
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
|
760
|
+
assert(maxMlSize < INT_MAX);
|
761
|
+
if ((size_t)matchLength > maxMlSize) matchLength= (unsigned)maxMlSize;
|
762
|
+
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + matchLength >= MFLIMIT) {
|
763
|
+
DEBUGLOG(6, "Let's encode a last sequence (ll=%u, ml=%u)", (unsigned)ll, matchLength);
|
764
|
+
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
765
|
+
(int)matchLength, (int)matchDistance,
|
766
|
+
notLimited, oend);
|
767
|
+
} }
|
768
|
+
DEBUGLOG(6, "Let's finish with a run of literals (%u bytes left)", (unsigned)(oend-op));
|
769
|
+
goto _lz4mid_last_literals;
|
770
|
+
}
|
771
|
+
/* compression failed */
|
772
|
+
return 0;
|
773
|
+
}
|
774
|
+
|
775
|
+
|
776
|
+
/**************************************
|
777
|
+
* HC Compression - Search
|
778
|
+
**************************************/
|
779
|
+
|
119
780
|
/* Update chains up to ip (excluded) */
|
120
781
|
LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
121
782
|
{
|
@@ -130,31 +791,14 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
|
130
791
|
|
131
792
|
while (idx < target) {
|
132
793
|
U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx);
|
133
|
-
size_t delta = idx - hashTable[h];
|
134
|
-
if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
|
135
|
-
DELTANEXTU16(chainTable, idx) = (U16)delta;
|
136
|
-
hashTable[h] = idx;
|
137
|
-
idx++;
|
138
|
-
}
|
139
|
-
|
140
|
-
hc4->nextToUpdate = target;
|
141
|
-
}
|
142
|
-
|
143
|
-
/** LZ4HC_countBack() :
|
144
|
-
* @return : negative value, nb of common bytes before ip/match */
|
145
|
-
LZ4_FORCE_INLINE
|
146
|
-
int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
147
|
-
const BYTE* const iMin, const BYTE* const mMin)
|
148
|
-
{
|
149
|
-
int back = 0;
|
150
|
-
int const min = (int)MAX(iMin - ip, mMin - match);
|
151
|
-
assert(min <= 0);
|
152
|
-
assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
|
153
|
-
assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
|
154
|
-
while ( (back > min)
|
155
|
-
&& (ip[back-1] == match[back-1]) )
|
156
|
-
back--;
|
157
|
-
return back;
|
794
|
+
size_t delta = idx - hashTable[h];
|
795
|
+
if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
|
796
|
+
DELTANEXTU16(chainTable, idx) = (U16)delta;
|
797
|
+
hashTable[h] = idx;
|
798
|
+
idx++;
|
799
|
+
}
|
800
|
+
|
801
|
+
hc4->nextToUpdate = target;
|
158
802
|
}
|
159
803
|
|
160
804
|
#if defined(_MSC_VER)
|
@@ -236,22 +880,21 @@ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
|
|
236
880
|
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
|
237
881
|
typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
|
238
882
|
|
239
|
-
|
883
|
+
|
884
|
+
LZ4_FORCE_INLINE LZ4HC_match_t
|
240
885
|
LZ4HC_InsertAndGetWiderMatch (
|
241
886
|
LZ4HC_CCtx_internal* const hc4,
|
242
887
|
const BYTE* const ip,
|
243
888
|
const BYTE* const iLowLimit, const BYTE* const iHighLimit,
|
244
889
|
int longest,
|
245
|
-
const BYTE** matchpos,
|
246
|
-
const BYTE** startpos,
|
247
890
|
const int maxNbAttempts,
|
248
891
|
const int patternAnalysis, const int chainSwap,
|
249
892
|
const dictCtx_directive dict,
|
250
893
|
const HCfavor_e favorDecSpeed)
|
251
894
|
{
|
252
895
|
U16* const chainTable = hc4->chainTable;
|
253
|
-
U32* const
|
254
|
-
const LZ4HC_CCtx_internal
|
896
|
+
U32* const hashTable = hc4->hashTable;
|
897
|
+
const LZ4HC_CCtx_internal* const dictCtx = hc4->dictCtx;
|
255
898
|
const BYTE* const prefixPtr = hc4->prefixStart;
|
256
899
|
const U32 prefixIdx = hc4->dictLimit;
|
257
900
|
const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx;
|
@@ -267,22 +910,24 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
267
910
|
U32 matchIndex;
|
268
911
|
repeat_state_e repeat = rep_untested;
|
269
912
|
size_t srcPatternLength = 0;
|
913
|
+
int offset = 0, sBack = 0;
|
270
914
|
|
271
915
|
DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
|
272
916
|
/* First Match */
|
273
|
-
LZ4HC_Insert(hc4, ip);
|
274
|
-
matchIndex =
|
275
|
-
DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
|
276
|
-
matchIndex, lowestMatchIndex);
|
917
|
+
LZ4HC_Insert(hc4, ip); /* insert all prior positions up to ip (excluded) */
|
918
|
+
matchIndex = hashTable[LZ4HC_hashPtr(ip)];
|
919
|
+
DEBUGLOG(7, "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)",
|
920
|
+
ipIndex, matchIndex, lowestMatchIndex);
|
277
921
|
|
278
922
|
while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
|
279
923
|
int matchLength=0;
|
280
924
|
nbAttempts--;
|
281
925
|
assert(matchIndex < ipIndex);
|
282
926
|
if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
|
283
|
-
/* do nothing
|
927
|
+
/* do nothing:
|
928
|
+
* favorDecSpeed intentionally skips matches with offset < 8 */
|
284
929
|
} else if (matchIndex >= prefixIdx) { /* within current Prefix */
|
285
|
-
const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx;
|
930
|
+
const BYTE* const matchPtr = prefixPtr + (matchIndex - prefixIdx);
|
286
931
|
assert(matchPtr < ip);
|
287
932
|
assert(longest >= 1);
|
288
933
|
if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
|
@@ -292,10 +937,11 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
292
937
|
matchLength -= back;
|
293
938
|
if (matchLength > longest) {
|
294
939
|
longest = matchLength;
|
295
|
-
|
296
|
-
|
940
|
+
offset = (int)(ipIndex - matchIndex);
|
941
|
+
sBack = back;
|
942
|
+
DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back);
|
297
943
|
} } }
|
298
|
-
} else { /* lowestMatchIndex <= matchIndex < dictLimit */
|
944
|
+
} else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */
|
299
945
|
const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx);
|
300
946
|
assert(matchIndex >= dictIdx);
|
301
947
|
if ( likely(matchIndex <= prefixIdx - 4)
|
@@ -310,8 +956,9 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
310
956
|
matchLength -= back;
|
311
957
|
if (matchLength > longest) {
|
312
958
|
longest = matchLength;
|
313
|
-
|
314
|
-
|
959
|
+
offset = (int)(ipIndex - matchIndex);
|
960
|
+
sBack = back;
|
961
|
+
DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back);
|
315
962
|
} } }
|
316
963
|
|
317
964
|
if (chainSwap && matchLength==longest) { /* better match => select a better chain */
|
@@ -344,6 +991,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
344
991
|
if (repeat == rep_untested) {
|
345
992
|
if ( ((pattern & 0xFFFF) == (pattern >> 16))
|
346
993
|
& ((pattern & 0xFF) == (pattern >> 24)) ) {
|
994
|
+
DEBUGLOG(7, "Repeat pattern detected, char %02X", pattern >> 24);
|
347
995
|
repeat = rep_confirmed;
|
348
996
|
srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
|
349
997
|
} else {
|
@@ -352,7 +1000,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
352
1000
|
if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
|
353
1001
|
&& LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) {
|
354
1002
|
const int extDict = matchCandidateIdx < prefixIdx;
|
355
|
-
const BYTE* const matchPtr =
|
1003
|
+
const BYTE* const matchPtr = extDict ? dictStart + (matchCandidateIdx - dictIdx) : prefixPtr + (matchCandidateIdx - prefixIdx);
|
356
1004
|
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
|
357
1005
|
const BYTE* const iLimit = extDict ? dictEnd : iHighLimit;
|
358
1006
|
size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
|
@@ -398,8 +1046,9 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
398
1046
|
if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break;
|
399
1047
|
assert(maxML < 2 GB);
|
400
1048
|
longest = (int)maxML;
|
401
|
-
|
402
|
-
|
1049
|
+
offset = (int)(ipIndex - matchIndex);
|
1050
|
+
assert(sBack == 0);
|
1051
|
+
DEBUGLOG(7, "Found repeat pattern match of len=%i, offset=%i", longest, offset);
|
403
1052
|
}
|
404
1053
|
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
|
405
1054
|
if (distToNextPattern > matchIndex) break; /* avoid overflow */
|
@@ -416,11 +1065,12 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
416
1065
|
|
417
1066
|
if ( dict == usingDictCtxHc
|
418
1067
|
&& nbAttempts > 0
|
419
|
-
&&
|
1068
|
+
&& withinStartDistance) {
|
420
1069
|
size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
|
421
1070
|
U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
|
422
1071
|
assert(dictEndOffset <= 1 GB);
|
423
1072
|
matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
|
1073
|
+
if (dictMatchIndex>0) DEBUGLOG(7, "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", dictEndOffset, dictMatchIndex, (int)dictMatchIndex - (int)dictEndOffset);
|
424
1074
|
while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
|
425
1075
|
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex;
|
426
1076
|
|
@@ -434,8 +1084,9 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
434
1084
|
mlt -= back;
|
435
1085
|
if (mlt > longest) {
|
436
1086
|
longest = mlt;
|
437
|
-
|
438
|
-
|
1087
|
+
offset = (int)(ipIndex - matchIndex);
|
1088
|
+
sBack = back;
|
1089
|
+
DEBUGLOG(7, "found match of length %i within extDictCtx", longest);
|
439
1090
|
} }
|
440
1091
|
|
441
1092
|
{ U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
|
@@ -443,112 +1094,29 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
443
1094
|
matchIndex -= nextOffset;
|
444
1095
|
} } }
|
445
1096
|
|
446
|
-
|
1097
|
+
{ LZ4HC_match_t md;
|
1098
|
+
assert(longest >= 0);
|
1099
|
+
md.len = longest;
|
1100
|
+
md.off = offset;
|
1101
|
+
md.back = sBack;
|
1102
|
+
return md;
|
1103
|
+
}
|
447
1104
|
}
|
448
1105
|
|
449
|
-
LZ4_FORCE_INLINE
|
1106
|
+
LZ4_FORCE_INLINE LZ4HC_match_t
|
450
1107
|
LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
|
451
1108
|
const BYTE* const ip, const BYTE* const iLimit,
|
452
|
-
const BYTE** matchpos,
|
453
1109
|
const int maxNbAttempts,
|
454
1110
|
const int patternAnalysis,
|
455
1111
|
const dictCtx_directive dict)
|
456
1112
|
{
|
457
|
-
|
1113
|
+
DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch");
|
458
1114
|
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
459
1115
|
* but this won't be the case here, as we define iLowLimit==ip,
|
460
1116
|
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
461
|
-
return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1,
|
1117
|
+
return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
|
462
1118
|
}
|
463
1119
|
|
464
|
-
/* LZ4HC_encodeSequence() :
|
465
|
-
* @return : 0 if ok,
|
466
|
-
* 1 if buffer issue detected */
|
467
|
-
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
468
|
-
const BYTE** _ip,
|
469
|
-
BYTE** _op,
|
470
|
-
const BYTE** _anchor,
|
471
|
-
int matchLength,
|
472
|
-
const BYTE* const match,
|
473
|
-
limitedOutput_directive limit,
|
474
|
-
BYTE* oend)
|
475
|
-
{
|
476
|
-
#define ip (*_ip)
|
477
|
-
#define op (*_op)
|
478
|
-
#define anchor (*_anchor)
|
479
|
-
|
480
|
-
size_t length;
|
481
|
-
BYTE* const token = op++;
|
482
|
-
|
483
|
-
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
|
484
|
-
static const BYTE* start = NULL;
|
485
|
-
static U32 totalCost = 0;
|
486
|
-
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
|
487
|
-
U32 const ll = (U32)(ip - anchor);
|
488
|
-
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
489
|
-
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
490
|
-
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
491
|
-
if (start==NULL) start = anchor; /* only works for single segment */
|
492
|
-
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
|
493
|
-
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
|
494
|
-
pos,
|
495
|
-
(U32)(ip - anchor), matchLength, (U32)(ip-match),
|
496
|
-
cost, totalCost);
|
497
|
-
totalCost += cost;
|
498
|
-
#endif
|
499
|
-
|
500
|
-
/* Encode Literal length */
|
501
|
-
length = (size_t)(ip - anchor);
|
502
|
-
LZ4_STATIC_ASSERT(notLimited == 0);
|
503
|
-
/* Check output limit */
|
504
|
-
if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
|
505
|
-
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
|
506
|
-
(int)length, (int)(oend - op));
|
507
|
-
return 1;
|
508
|
-
}
|
509
|
-
if (length >= RUN_MASK) {
|
510
|
-
size_t len = length - RUN_MASK;
|
511
|
-
*token = (RUN_MASK << ML_BITS);
|
512
|
-
for(; len >= 255 ; len -= 255) *op++ = 255;
|
513
|
-
*op++ = (BYTE)len;
|
514
|
-
} else {
|
515
|
-
*token = (BYTE)(length << ML_BITS);
|
516
|
-
}
|
517
|
-
|
518
|
-
/* Copy Literals */
|
519
|
-
LZ4_wildCopy8(op, anchor, op + length);
|
520
|
-
op += length;
|
521
|
-
|
522
|
-
/* Encode Offset */
|
523
|
-
assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
|
524
|
-
LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
|
525
|
-
|
526
|
-
/* Encode MatchLength */
|
527
|
-
assert(matchLength >= MINMATCH);
|
528
|
-
length = (size_t)matchLength - MINMATCH;
|
529
|
-
if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
|
530
|
-
DEBUGLOG(6, "Not enough room to write match length");
|
531
|
-
return 1; /* Check output limit */
|
532
|
-
}
|
533
|
-
if (length >= ML_MASK) {
|
534
|
-
*token += ML_MASK;
|
535
|
-
length -= ML_MASK;
|
536
|
-
for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
|
537
|
-
if (length >= 255) { length -= 255; *op++ = 255; }
|
538
|
-
*op++ = (BYTE)length;
|
539
|
-
} else {
|
540
|
-
*token += (BYTE)(length);
|
541
|
-
}
|
542
|
-
|
543
|
-
/* Prepare next loop */
|
544
|
-
ip += matchLength;
|
545
|
-
anchor = ip;
|
546
|
-
|
547
|
-
return 0;
|
548
|
-
}
|
549
|
-
#undef ip
|
550
|
-
#undef op
|
551
|
-
#undef anchor
|
552
1120
|
|
553
1121
|
LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
554
1122
|
LZ4HC_CCtx_internal* const ctx,
|
@@ -574,127 +1142,130 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
|
574
1142
|
BYTE* op = (BYTE*) dest;
|
575
1143
|
BYTE* oend = op + maxOutputSize;
|
576
1144
|
|
577
|
-
int ml0, ml, ml2, ml3;
|
578
1145
|
const BYTE* start0;
|
579
|
-
const BYTE* ref0;
|
580
|
-
const BYTE* ref = NULL;
|
581
1146
|
const BYTE* start2 = NULL;
|
582
|
-
const BYTE* ref2 = NULL;
|
583
1147
|
const BYTE* start3 = NULL;
|
584
|
-
|
1148
|
+
LZ4HC_match_t m0, m1, m2, m3;
|
1149
|
+
const LZ4HC_match_t nomatch = {0, 0, 0};
|
585
1150
|
|
586
1151
|
/* init */
|
1152
|
+
DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict);
|
587
1153
|
*srcSizePtr = 0;
|
588
1154
|
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
589
1155
|
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
590
1156
|
|
591
1157
|
/* Main Loop */
|
592
1158
|
while (ip <= mflimit) {
|
593
|
-
|
594
|
-
if (
|
1159
|
+
m1 = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict);
|
1160
|
+
if (m1.len<MINMATCH) { ip++; continue; }
|
595
1161
|
|
596
1162
|
/* saved, in case we would skip too much */
|
597
|
-
start0 = ip;
|
1163
|
+
start0 = ip; m0 = m1;
|
598
1164
|
|
599
1165
|
_Search2:
|
600
|
-
|
601
|
-
|
602
|
-
|
1166
|
+
DEBUGLOG(7, "_Search2 (currently found match of size %i)", m1.len);
|
1167
|
+
if (ip+m1.len <= mflimit) {
|
1168
|
+
start2 = ip + m1.len - 2;
|
1169
|
+
m2 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
1170
|
+
start2, ip + 0, matchlimit, m1.len,
|
603
1171
|
maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
|
1172
|
+
start2 += m2.back;
|
604
1173
|
} else {
|
605
|
-
|
1174
|
+
m2 = nomatch; /* do not search further */
|
606
1175
|
}
|
607
1176
|
|
608
|
-
if (
|
1177
|
+
if (m2.len <= m1.len) { /* No better match => encode ML1 immediately */
|
609
1178
|
optr = op;
|
610
|
-
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1179
|
+
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1180
|
+
m1.len, m1.off,
|
1181
|
+
limit, oend) )
|
1182
|
+
goto _dest_overflow;
|
611
1183
|
continue;
|
612
1184
|
}
|
613
1185
|
|
614
1186
|
if (start0 < ip) { /* first match was skipped at least once */
|
615
|
-
if (start2 < ip +
|
616
|
-
ip = start0;
|
1187
|
+
if (start2 < ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */
|
1188
|
+
ip = start0; m1 = m0; /* restore initial Match1 */
|
617
1189
|
} }
|
618
1190
|
|
619
1191
|
/* Here, start0==ip */
|
620
1192
|
if ((start2 - ip) < 3) { /* First Match too small : removed */
|
621
|
-
ml = ml2;
|
622
1193
|
ip = start2;
|
623
|
-
|
1194
|
+
m1 = m2;
|
624
1195
|
goto _Search2;
|
625
1196
|
}
|
626
1197
|
|
627
1198
|
_Search3:
|
628
|
-
/* At this stage, we have :
|
629
|
-
* ml2 > ml1, and
|
630
|
-
* ip1+3 <= ip2 (usually < ip1+ml1) */
|
631
1199
|
if ((start2 - ip) < OPTIMAL_ML) {
|
632
1200
|
int correction;
|
633
|
-
int new_ml =
|
1201
|
+
int new_ml = m1.len;
|
634
1202
|
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
|
635
|
-
if (ip+new_ml > start2 +
|
1203
|
+
if (ip+new_ml > start2 + m2.len - MINMATCH)
|
1204
|
+
new_ml = (int)(start2 - ip) + m2.len - MINMATCH;
|
636
1205
|
correction = new_ml - (int)(start2 - ip);
|
637
1206
|
if (correction > 0) {
|
638
1207
|
start2 += correction;
|
639
|
-
|
640
|
-
ml2 -= correction;
|
1208
|
+
m2.len -= correction;
|
641
1209
|
}
|
642
1210
|
}
|
643
|
-
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
|
644
1211
|
|
645
|
-
if (start2 +
|
646
|
-
|
647
|
-
|
1212
|
+
if (start2 + m2.len <= mflimit) {
|
1213
|
+
start3 = start2 + m2.len - 3;
|
1214
|
+
m3 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
1215
|
+
start3, start2, matchlimit, m2.len,
|
648
1216
|
maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
|
1217
|
+
start3 += m3.back;
|
649
1218
|
} else {
|
650
|
-
|
1219
|
+
m3 = nomatch; /* do not search further */
|
651
1220
|
}
|
652
1221
|
|
653
|
-
if (
|
1222
|
+
if (m3.len <= m2.len) { /* No better match => encode ML1 and ML2 */
|
654
1223
|
/* ip & ref are known; Now for ml */
|
655
|
-
if (start2 < ip+
|
1224
|
+
if (start2 < ip+m1.len) m1.len = (int)(start2 - ip);
|
656
1225
|
/* Now, encode 2 sequences */
|
657
1226
|
optr = op;
|
658
|
-
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1227
|
+
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1228
|
+
m1.len, m1.off,
|
1229
|
+
limit, oend) )
|
1230
|
+
goto _dest_overflow;
|
659
1231
|
ip = start2;
|
660
1232
|
optr = op;
|
661
|
-
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
662
|
-
|
663
|
-
|
1233
|
+
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1234
|
+
m2.len, m2.off,
|
1235
|
+
limit, oend) ) {
|
1236
|
+
m1 = m2;
|
664
1237
|
goto _dest_overflow;
|
665
1238
|
}
|
666
1239
|
continue;
|
667
1240
|
}
|
668
1241
|
|
669
|
-
if (start3 < ip+
|
670
|
-
if (start3 >= (ip+
|
671
|
-
if (start2 < ip+
|
672
|
-
int correction = (int)(ip+
|
1242
|
+
if (start3 < ip+m1.len+3) { /* Not enough space for match 2 : remove it */
|
1243
|
+
if (start3 >= (ip+m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
|
1244
|
+
if (start2 < ip+m1.len) {
|
1245
|
+
int correction = (int)(ip+m1.len - start2);
|
673
1246
|
start2 += correction;
|
674
|
-
|
675
|
-
|
676
|
-
if (ml2 < MINMATCH) {
|
1247
|
+
m2.len -= correction;
|
1248
|
+
if (m2.len < MINMATCH) {
|
677
1249
|
start2 = start3;
|
678
|
-
|
679
|
-
ml2 = ml3;
|
1250
|
+
m2 = m3;
|
680
1251
|
}
|
681
1252
|
}
|
682
1253
|
|
683
1254
|
optr = op;
|
684
|
-
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1255
|
+
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1256
|
+
m1.len, m1.off,
|
1257
|
+
limit, oend) )
|
1258
|
+
goto _dest_overflow;
|
685
1259
|
ip = start3;
|
686
|
-
|
687
|
-
ml = ml3;
|
1260
|
+
m1 = m3;
|
688
1261
|
|
689
1262
|
start0 = start2;
|
690
|
-
|
691
|
-
ml0 = ml2;
|
1263
|
+
m0 = m2;
|
692
1264
|
goto _Search2;
|
693
1265
|
}
|
694
1266
|
|
695
1267
|
start2 = start3;
|
696
|
-
|
697
|
-
ml2 = ml3;
|
1268
|
+
m2 = m3;
|
698
1269
|
goto _Search3;
|
699
1270
|
}
|
700
1271
|
|
@@ -703,29 +1274,32 @@ _Search3:
|
|
703
1274
|
* let's write the first one ML1.
|
704
1275
|
* ip & ref are known; Now decide ml.
|
705
1276
|
*/
|
706
|
-
if (start2 < ip+
|
1277
|
+
if (start2 < ip+m1.len) {
|
707
1278
|
if ((start2 - ip) < OPTIMAL_ML) {
|
708
1279
|
int correction;
|
709
|
-
if (
|
710
|
-
if (ip +
|
711
|
-
|
1280
|
+
if (m1.len > OPTIMAL_ML) m1.len = OPTIMAL_ML;
|
1281
|
+
if (ip + m1.len > start2 + m2.len - MINMATCH)
|
1282
|
+
m1.len = (int)(start2 - ip) + m2.len - MINMATCH;
|
1283
|
+
correction = m1.len - (int)(start2 - ip);
|
712
1284
|
if (correction > 0) {
|
713
1285
|
start2 += correction;
|
714
|
-
|
715
|
-
ml2 -= correction;
|
1286
|
+
m2.len -= correction;
|
716
1287
|
}
|
717
1288
|
} else {
|
718
|
-
|
1289
|
+
m1.len = (int)(start2 - ip);
|
719
1290
|
}
|
720
1291
|
}
|
721
1292
|
optr = op;
|
722
|
-
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1293
|
+
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1294
|
+
m1.len, m1.off,
|
1295
|
+
limit, oend) )
|
1296
|
+
goto _dest_overflow;
|
723
1297
|
|
724
1298
|
/* ML2 becomes ML1 */
|
725
|
-
ip = start2;
|
1299
|
+
ip = start2; m1 = m2;
|
726
1300
|
|
727
1301
|
/* ML3 becomes ML2 */
|
728
|
-
start2 = start3;
|
1302
|
+
start2 = start3; m2 = m3;
|
729
1303
|
|
730
1304
|
/* let's find a new ML3 */
|
731
1305
|
goto _Search3;
|
@@ -765,7 +1339,7 @@ _last_literals:
|
|
765
1339
|
|
766
1340
|
_dest_overflow:
|
767
1341
|
if (limit == fillOutput) {
|
768
|
-
/* Assumption : ip, anchor,
|
1342
|
+
/* Assumption : @ip, @anchor, @optr and @m1 must be set correctly */
|
769
1343
|
size_t const ll = (size_t)(ip - anchor);
|
770
1344
|
size_t const ll_addbytes = (ll + 240) / 255;
|
771
1345
|
size_t const ll_totalCost = 1 + ll_addbytes + ll;
|
@@ -776,10 +1350,10 @@ _dest_overflow:
|
|
776
1350
|
/* ll validated; now adjust match length */
|
777
1351
|
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
|
778
1352
|
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
|
779
|
-
assert(maxMlSize < INT_MAX); assert(
|
780
|
-
if ((size_t)
|
781
|
-
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 +
|
782
|
-
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor),
|
1353
|
+
assert(maxMlSize < INT_MAX); assert(m1.len >= 0);
|
1354
|
+
if ((size_t)m1.len > maxMlSize) m1.len = (int)maxMlSize;
|
1355
|
+
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + m1.len >= MFLIMIT) {
|
1356
|
+
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, notLimited, oend);
|
783
1357
|
} }
|
784
1358
|
goto _last_literals;
|
785
1359
|
}
|
@@ -796,54 +1370,34 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
|
|
796
1370
|
const dictCtx_directive dict,
|
797
1371
|
const HCfavor_e favorDecSpeed);
|
798
1372
|
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
1373
|
+
LZ4_FORCE_INLINE int
|
1374
|
+
LZ4HC_compress_generic_internal (
|
1375
|
+
LZ4HC_CCtx_internal* const ctx,
|
1376
|
+
const char* const src,
|
1377
|
+
char* const dst,
|
1378
|
+
int* const srcSizePtr,
|
1379
|
+
int const dstCapacity,
|
1380
|
+
int cLevel,
|
1381
|
+
const limitedOutput_directive limit,
|
1382
|
+
const dictCtx_directive dict
|
1383
|
+
)
|
810
1384
|
{
|
811
|
-
|
812
|
-
|
813
|
-
lz4hc_strat_e strat;
|
814
|
-
int nbSearches;
|
815
|
-
U32 targetLength;
|
816
|
-
} cParams_t;
|
817
|
-
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
|
818
|
-
{ lz4hc, 2, 16 }, /* 0, unused */
|
819
|
-
{ lz4hc, 2, 16 }, /* 1, unused */
|
820
|
-
{ lz4hc, 2, 16 }, /* 2, unused */
|
821
|
-
{ lz4hc, 4, 16 }, /* 3 */
|
822
|
-
{ lz4hc, 8, 16 }, /* 4 */
|
823
|
-
{ lz4hc, 16, 16 }, /* 5 */
|
824
|
-
{ lz4hc, 32, 16 }, /* 6 */
|
825
|
-
{ lz4hc, 64, 16 }, /* 7 */
|
826
|
-
{ lz4hc, 128, 16 }, /* 8 */
|
827
|
-
{ lz4hc, 256, 16 }, /* 9 */
|
828
|
-
{ lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
|
829
|
-
{ lz4opt, 512,128 }, /*11 */
|
830
|
-
{ lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
|
831
|
-
};
|
832
|
-
|
833
|
-
DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
|
834
|
-
ctx, src, *srcSizePtr, limit);
|
1385
|
+
DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)",
|
1386
|
+
src, *srcSizePtr);
|
835
1387
|
|
836
1388
|
if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
|
837
|
-
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;
|
1389
|
+
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
|
838
1390
|
|
839
1391
|
ctx->end += *srcSizePtr;
|
840
|
-
|
841
|
-
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
|
842
|
-
{ cParams_t const cParam = clTable[cLevel];
|
1392
|
+
{ cParams_t const cParam = LZ4HC_getCLevelParams(cLevel);
|
843
1393
|
HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
|
844
1394
|
int result;
|
845
1395
|
|
846
|
-
if (cParam.strat ==
|
1396
|
+
if (cParam.strat == lz4mid) {
|
1397
|
+
result = LZ4MID_compress(ctx,
|
1398
|
+
src, dst, srcSizePtr, dstCapacity,
|
1399
|
+
limit, dict);
|
1400
|
+
} else if (cParam.strat == lz4hc) {
|
847
1401
|
result = LZ4HC_compress_hashChain(ctx,
|
848
1402
|
src, dst, srcSizePtr, dstCapacity,
|
849
1403
|
cParam.nbSearches, limit, dict);
|
@@ -852,7 +1406,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
|
|
852
1406
|
result = LZ4HC_compress_optimal(ctx,
|
853
1407
|
src, dst, srcSizePtr, dstCapacity,
|
854
1408
|
cParam.nbSearches, cParam.targetLength, limit,
|
855
|
-
cLevel
|
1409
|
+
cLevel >= LZ4HC_CLEVEL_MAX, /* ultra mode */
|
856
1410
|
dict, favor);
|
857
1411
|
}
|
858
1412
|
if (result <= 0) ctx->dirty = 1;
|
@@ -877,6 +1431,13 @@ LZ4HC_compress_generic_noDictCtx (
|
|
877
1431
|
return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
|
878
1432
|
}
|
879
1433
|
|
1434
|
+
static int isStateCompatible(const LZ4HC_CCtx_internal* ctx1, const LZ4HC_CCtx_internal* ctx2)
|
1435
|
+
{
|
1436
|
+
int const isMid1 = LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid;
|
1437
|
+
int const isMid2 = LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid;
|
1438
|
+
return !(isMid1 ^ isMid2);
|
1439
|
+
}
|
1440
|
+
|
880
1441
|
static int
|
881
1442
|
LZ4HC_compress_generic_dictCtx (
|
882
1443
|
LZ4HC_CCtx_internal* const ctx,
|
@@ -893,7 +1454,7 @@ LZ4HC_compress_generic_dictCtx (
|
|
893
1454
|
if (position >= 64 KB) {
|
894
1455
|
ctx->dictCtx = NULL;
|
895
1456
|
return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
|
896
|
-
} else if (position == 0 && *srcSizePtr > 4 KB) {
|
1457
|
+
} else if (position == 0 && *srcSizePtr > 4 KB && isStateCompatible(ctx, ctx->dictCtx)) {
|
897
1458
|
LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
|
898
1459
|
LZ4HC_setExternalDict(ctx, (const BYTE *)src);
|
899
1460
|
ctx->compressionLevel = (short)cLevel;
|
@@ -965,6 +1526,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
|
|
965
1526
|
LZ4_streamHC_t state;
|
966
1527
|
LZ4_streamHC_t* const statePtr = &state;
|
967
1528
|
#endif
|
1529
|
+
DEBUGLOG(5, "LZ4_compress_HC")
|
968
1530
|
cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
|
969
1531
|
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
970
1532
|
FREEMEM(statePtr);
|
@@ -1032,18 +1594,16 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
|
1032
1594
|
|
1033
1595
|
void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
1034
1596
|
{
|
1035
|
-
|
1036
|
-
|
1597
|
+
LZ4HC_CCtx_internal* const s = &LZ4_streamHCPtr->internal_donotuse;
|
1598
|
+
DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
|
1599
|
+
if (s->dirty) {
|
1037
1600
|
LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
|
1038
1601
|
} else {
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
}
|
1045
|
-
LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL;
|
1046
|
-
LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
|
1602
|
+
assert(s->end >= s->prefixStart);
|
1603
|
+
s->dictLimit += (U32)(s->end - s->prefixStart);
|
1604
|
+
s->prefixStart = NULL;
|
1605
|
+
s->end = NULL;
|
1606
|
+
s->dictCtx = NULL;
|
1047
1607
|
}
|
1048
1608
|
LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
|
1049
1609
|
}
|
@@ -1067,7 +1627,9 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
|
|
1067
1627
|
const char* dictionary, int dictSize)
|
1068
1628
|
{
|
1069
1629
|
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
1070
|
-
|
1630
|
+
cParams_t cp;
|
1631
|
+
DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", LZ4_streamHCPtr, dictionary, dictSize, ctxPtr->compressionLevel);
|
1632
|
+
assert(dictSize >= 0);
|
1071
1633
|
assert(LZ4_streamHCPtr != NULL);
|
1072
1634
|
if (dictSize > 64 KB) {
|
1073
1635
|
dictionary += (size_t)dictSize - 64 KB;
|
@@ -1077,10 +1639,15 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
|
|
1077
1639
|
{ int const cLevel = ctxPtr->compressionLevel;
|
1078
1640
|
LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
|
1079
1641
|
LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
|
1642
|
+
cp = LZ4HC_getCLevelParams(cLevel);
|
1080
1643
|
}
|
1081
1644
|
LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
|
1082
1645
|
ctxPtr->end = (const BYTE*)dictionary + dictSize;
|
1083
|
-
if (
|
1646
|
+
if (cp.strat == lz4mid) {
|
1647
|
+
LZ4MID_fillHTable (ctxPtr, dictionary, (size_t)dictSize);
|
1648
|
+
} else {
|
1649
|
+
if (dictSize >= LZ4HC_HASHSIZE) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
1650
|
+
}
|
1084
1651
|
return dictSize;
|
1085
1652
|
}
|
1086
1653
|
|
@@ -1093,8 +1660,10 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC
|
|
1093
1660
|
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
|
1094
1661
|
{
|
1095
1662
|
DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
|
1096
|
-
if (ctxPtr->end >= ctxPtr->prefixStart + 4)
|
1097
|
-
|
1663
|
+
if ( (ctxPtr->end >= ctxPtr->prefixStart + 4)
|
1664
|
+
&& (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid) ) {
|
1665
|
+
LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
1666
|
+
}
|
1098
1667
|
|
1099
1668
|
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
|
1100
1669
|
ctxPtr->lowLimit = ctxPtr->dictLimit;
|
@@ -1119,7 +1688,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
|
1119
1688
|
LZ4_streamHCPtr, src, *srcSizePtr, limit);
|
1120
1689
|
assert(ctxPtr != NULL);
|
1121
1690
|
/* auto-init if forgotten */
|
1122
|
-
if (ctxPtr->prefixStart == NULL)
|
1691
|
+
if (ctxPtr->prefixStart == NULL)
|
1692
|
+
LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
|
1123
1693
|
|
1124
1694
|
/* Check overflow */
|
1125
1695
|
if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) {
|
@@ -1140,7 +1710,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
|
1140
1710
|
if (sourceEnd > dictEnd) sourceEnd = dictEnd;
|
1141
1711
|
ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart);
|
1142
1712
|
ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart);
|
1143
|
-
|
1713
|
+
/* invalidate dictionary is it's too small */
|
1714
|
+
if (ctxPtr->dictLimit - ctxPtr->lowLimit < LZ4HC_HASHSIZE) {
|
1144
1715
|
ctxPtr->lowLimit = ctxPtr->dictLimit;
|
1145
1716
|
ctxPtr->dictStart = ctxPtr->prefixStart;
|
1146
1717
|
} } }
|
@@ -1150,6 +1721,7 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
|
1150
1721
|
|
1151
1722
|
int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
|
1152
1723
|
{
|
1724
|
+
DEBUGLOG(5, "LZ4_compress_HC_continue");
|
1153
1725
|
if (dstCapacity < LZ4_compressBound(srcSize))
|
1154
1726
|
return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
|
1155
1727
|
else
|
@@ -1162,7 +1734,6 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch
|
|
1162
1734
|
}
|
1163
1735
|
|
1164
1736
|
|
1165
|
-
|
1166
1737
|
/* LZ4_saveDictHC :
|
1167
1738
|
* save history content
|
1168
1739
|
* into a user-provided buffer
|
@@ -1179,10 +1750,10 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
|
|
1179
1750
|
if (dictSize > prefixSize) dictSize = prefixSize;
|
1180
1751
|
if (safeBuffer == NULL) assert(dictSize == 0);
|
1181
1752
|
if (dictSize > 0)
|
1182
|
-
LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
|
1753
|
+
LZ4_memmove(safeBuffer, streamPtr->end - dictSize, (size_t)dictSize);
|
1183
1754
|
{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit;
|
1184
|
-
streamPtr->end = (const BYTE*)safeBuffer + dictSize;
|
1185
|
-
streamPtr->prefixStart =
|
1755
|
+
streamPtr->end = (safeBuffer == NULL) ? NULL : (const BYTE*)safeBuffer + dictSize;
|
1756
|
+
streamPtr->prefixStart = (const BYTE*)safeBuffer;
|
1186
1757
|
streamPtr->dictLimit = endIndex - (U32)dictSize;
|
1187
1758
|
streamPtr->lowLimit = endIndex - (U32)dictSize;
|
1188
1759
|
streamPtr->dictStart = streamPtr->prefixStart;
|
@@ -1193,75 +1764,6 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
|
|
1193
1764
|
}
|
1194
1765
|
|
1195
1766
|
|
1196
|
-
/***************************************************
|
1197
|
-
* Deprecated Functions
|
1198
|
-
***************************************************/
|
1199
|
-
|
1200
|
-
/* These functions currently generate deprecation warnings */
|
1201
|
-
|
1202
|
-
/* Wrappers for deprecated compression functions */
|
1203
|
-
int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
|
1204
|
-
int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
|
1205
|
-
int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
|
1206
|
-
int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
|
1207
|
-
int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
|
1208
|
-
int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
|
1209
|
-
int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
|
1210
|
-
int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
|
1211
|
-
int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
|
1212
|
-
int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
|
1213
|
-
|
1214
|
-
|
1215
|
-
/* Deprecated streaming functions */
|
1216
|
-
int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }
|
1217
|
-
|
1218
|
-
/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
|
1219
|
-
* @return : 0 on success, !=0 if error */
|
1220
|
-
int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
|
1221
|
-
{
|
1222
|
-
LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
|
1223
|
-
if (hc4 == NULL) return 1; /* init failed */
|
1224
|
-
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
|
1225
|
-
return 0;
|
1226
|
-
}
|
1227
|
-
|
1228
|
-
#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
|
1229
|
-
void* LZ4_createHC (const char* inputBuffer)
|
1230
|
-
{
|
1231
|
-
LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
|
1232
|
-
if (hc4 == NULL) return NULL; /* not enough memory */
|
1233
|
-
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
|
1234
|
-
return hc4;
|
1235
|
-
}
|
1236
|
-
|
1237
|
-
int LZ4_freeHC (void* LZ4HC_Data)
|
1238
|
-
{
|
1239
|
-
if (!LZ4HC_Data) return 0; /* support free on NULL */
|
1240
|
-
FREEMEM(LZ4HC_Data);
|
1241
|
-
return 0;
|
1242
|
-
}
|
1243
|
-
#endif
|
1244
|
-
|
1245
|
-
int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
|
1246
|
-
{
|
1247
|
-
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
|
1248
|
-
}
|
1249
|
-
|
1250
|
-
int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
|
1251
|
-
{
|
1252
|
-
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
|
1253
|
-
}
|
1254
|
-
|
1255
|
-
char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
|
1256
|
-
{
|
1257
|
-
LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data;
|
1258
|
-
const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit;
|
1259
|
-
LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
|
1260
|
-
/* avoid const char * -> char * conversion warning :( */
|
1261
|
-
return (char*)(uptrval)bufferStart;
|
1262
|
-
}
|
1263
|
-
|
1264
|
-
|
1265
1767
|
/* ================================================
|
1266
1768
|
* LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
|
1267
1769
|
* ===============================================*/
|
@@ -1282,7 +1784,6 @@ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
|
|
1282
1784
|
return price;
|
1283
1785
|
}
|
1284
1786
|
|
1285
|
-
|
1286
1787
|
/* requires mlen >= MINMATCH */
|
1287
1788
|
LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
|
1288
1789
|
{
|
@@ -1298,12 +1799,6 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
|
|
1298
1799
|
return price;
|
1299
1800
|
}
|
1300
1801
|
|
1301
|
-
|
1302
|
-
typedef struct {
|
1303
|
-
int off;
|
1304
|
-
int len;
|
1305
|
-
} LZ4HC_match_t;
|
1306
|
-
|
1307
1802
|
LZ4_FORCE_INLINE LZ4HC_match_t
|
1308
1803
|
LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
|
1309
1804
|
const BYTE* ip, const BYTE* const iHighLimit,
|
@@ -1311,19 +1806,17 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
|
|
1311
1806
|
const dictCtx_directive dict,
|
1312
1807
|
const HCfavor_e favorDecSpeed)
|
1313
1808
|
{
|
1314
|
-
LZ4HC_match_t
|
1315
|
-
const BYTE* matchPtr = NULL;
|
1809
|
+
LZ4HC_match_t const match0 = { 0 , 0, 0 };
|
1316
1810
|
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
1317
1811
|
* but this won't be the case here, as we define iLowLimit==ip,
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1812
|
+
** so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
1813
|
+
LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
|
1814
|
+
assert(md.back == 0);
|
1815
|
+
if (md.len <= minLen) return match0;
|
1321
1816
|
if (favorDecSpeed) {
|
1322
|
-
if ((
|
1817
|
+
if ((md.len>18) & (md.len<=36)) md.len=18; /* favor dec.speed (shortcut) */
|
1323
1818
|
}
|
1324
|
-
|
1325
|
-
match.off = (int)(ip-matchPtr);
|
1326
|
-
return match;
|
1819
|
+
return md;
|
1327
1820
|
}
|
1328
1821
|
|
1329
1822
|
|
@@ -1356,7 +1849,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
1356
1849
|
BYTE* opSaved = (BYTE*) dst;
|
1357
1850
|
BYTE* oend = op + dstCapacity;
|
1358
1851
|
int ovml = MINMATCH; /* overflow - last sequence */
|
1359
|
-
|
1852
|
+
int ovoff = 0;
|
1360
1853
|
|
1361
1854
|
/* init */
|
1362
1855
|
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
@@ -1379,11 +1872,10 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
1379
1872
|
if ((size_t)firstMatch.len > sufficient_len) {
|
1380
1873
|
/* good enough solution : immediate encoding */
|
1381
1874
|
int const firstML = firstMatch.len;
|
1382
|
-
const BYTE* const matchPos = ip - firstMatch.off;
|
1383
1875
|
opSaved = op;
|
1384
|
-
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML,
|
1876
|
+
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */
|
1385
1877
|
ovml = firstML;
|
1386
|
-
|
1878
|
+
ovoff = firstMatch.off;
|
1387
1879
|
goto _dest_overflow;
|
1388
1880
|
}
|
1389
1881
|
continue;
|
@@ -1401,11 +1893,11 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
1401
1893
|
rPos, cost, opt[rPos].litlen);
|
1402
1894
|
} }
|
1403
1895
|
/* set prices using initial match */
|
1404
|
-
{ int
|
1405
|
-
int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
1896
|
+
{ int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
1406
1897
|
int const offset = firstMatch.off;
|
1898
|
+
int mlen;
|
1407
1899
|
assert(matchML < LZ4_OPT_NUM);
|
1408
|
-
for ( ; mlen <= matchML ; mlen++) {
|
1900
|
+
for (mlen = MINMATCH ; mlen <= matchML ; mlen++) {
|
1409
1901
|
int const cost = LZ4HC_sequencePrice(llen, mlen);
|
1410
1902
|
opt[mlen].mlen = mlen;
|
1411
1903
|
opt[mlen].off = offset;
|
@@ -1557,9 +2049,9 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
|
1557
2049
|
assert(ml >= MINMATCH);
|
1558
2050
|
assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
|
1559
2051
|
opSaved = op;
|
1560
|
-
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml,
|
2052
|
+
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */
|
1561
2053
|
ovml = ml;
|
1562
|
-
|
2054
|
+
ovoff = offset;
|
1563
2055
|
goto _dest_overflow;
|
1564
2056
|
} } }
|
1565
2057
|
} /* while (ip <= mflimit) */
|
@@ -1618,14 +2110,83 @@ if (limit == fillOutput) {
|
|
1618
2110
|
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
|
1619
2111
|
DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
|
1620
2112
|
DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
|
1621
|
-
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml,
|
2113
|
+
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovoff, notLimited, oend);
|
1622
2114
|
DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
|
1623
2115
|
} }
|
1624
2116
|
goto _last_literals;
|
1625
2117
|
}
|
1626
2118
|
_return_label:
|
1627
2119
|
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
1628
|
-
FREEMEM(opt);
|
2120
|
+
if (opt) FREEMEM(opt);
|
1629
2121
|
#endif
|
1630
2122
|
return retval;
|
1631
2123
|
}
|
2124
|
+
|
2125
|
+
|
2126
|
+
/***************************************************
|
2127
|
+
* Deprecated Functions
|
2128
|
+
***************************************************/
|
2129
|
+
|
2130
|
+
/* These functions currently generate deprecation warnings */
|
2131
|
+
|
2132
|
+
/* Wrappers for deprecated compression functions */
|
2133
|
+
int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
|
2134
|
+
int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
|
2135
|
+
int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
|
2136
|
+
int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
|
2137
|
+
int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
|
2138
|
+
int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
|
2139
|
+
int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
|
2140
|
+
int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
|
2141
|
+
int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
|
2142
|
+
int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
|
2143
|
+
|
2144
|
+
|
2145
|
+
/* Deprecated streaming functions */
|
2146
|
+
int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }
|
2147
|
+
|
2148
|
+
/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
|
2149
|
+
* @return : 0 on success, !=0 if error */
|
2150
|
+
int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
|
2151
|
+
{
|
2152
|
+
LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
|
2153
|
+
if (hc4 == NULL) return 1; /* init failed */
|
2154
|
+
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
|
2155
|
+
return 0;
|
2156
|
+
}
|
2157
|
+
|
2158
|
+
#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
|
2159
|
+
void* LZ4_createHC (const char* inputBuffer)
|
2160
|
+
{
|
2161
|
+
LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
|
2162
|
+
if (hc4 == NULL) return NULL; /* not enough memory */
|
2163
|
+
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
|
2164
|
+
return hc4;
|
2165
|
+
}
|
2166
|
+
|
2167
|
+
int LZ4_freeHC (void* LZ4HC_Data)
|
2168
|
+
{
|
2169
|
+
if (!LZ4HC_Data) return 0; /* support free on NULL */
|
2170
|
+
FREEMEM(LZ4HC_Data);
|
2171
|
+
return 0;
|
2172
|
+
}
|
2173
|
+
#endif
|
2174
|
+
|
2175
|
+
int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
|
2176
|
+
{
|
2177
|
+
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
|
2178
|
+
}
|
2179
|
+
|
2180
|
+
int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
|
2181
|
+
{
|
2182
|
+
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
|
2183
|
+
}
|
2184
|
+
|
2185
|
+
char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
|
2186
|
+
{
|
2187
|
+
LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
|
2188
|
+
const BYTE* const bufferStart = s->prefixStart - s->dictLimit + s->lowLimit;
|
2189
|
+
LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel);
|
2190
|
+
/* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */
|
2191
|
+
return (char*)(uptrval)bufferStart;
|
2192
|
+
}
|