extlz4 0.2.4.3 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +5 -0
- data/README.md +3 -3
- data/contrib/lz4/INSTALL +1 -0
- data/contrib/lz4/NEWS +13 -0
- data/contrib/lz4/README.md +1 -0
- data/contrib/lz4/circle.yml +0 -1
- data/contrib/lz4/lib/README.md +28 -28
- data/contrib/lz4/lib/lz4.c +139 -53
- data/contrib/lz4/lib/lz4.h +85 -69
- data/contrib/lz4/lib/lz4frame.c +63 -57
- data/contrib/lz4/lib/lz4frame_static.h +27 -16
- data/contrib/lz4/lib/lz4hc.c +208 -122
- data/contrib/lz4/lib/lz4hc.h +23 -29
- data/contrib/lz4/lib/lz4opt.h +247 -257
- data/contrib/lz4/lib/xxhash.c +16 -16
- data/lib/extlz4/version.rb +1 -1
- metadata +1 -1
@@ -43,7 +43,15 @@ extern "C" {
|
|
43
43
|
/* lz4frame_static.h should be used solely in the context of static linking.
|
44
44
|
* It contains definitions which are not stable and may change in the future.
|
45
45
|
* Never use it in the context of DLL linking.
|
46
|
+
*
|
47
|
+
* Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
|
48
|
+
* your own risk.
|
46
49
|
*/
|
50
|
+
#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
|
51
|
+
#define LZ4FLIB_STATIC_API LZ4FLIB_API
|
52
|
+
#else
|
53
|
+
#define LZ4FLIB_STATIC_API
|
54
|
+
#endif
|
47
55
|
|
48
56
|
|
49
57
|
/* --- Dependency --- */
|
@@ -79,7 +87,7 @@ extern "C" {
|
|
79
87
|
/* enum list is exposed, to handle specific errors */
|
80
88
|
typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
|
81
89
|
|
82
|
-
LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
|
90
|
+
LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
|
83
91
|
|
84
92
|
|
85
93
|
|
@@ -93,8 +101,8 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
|
|
93
101
|
* LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
|
94
102
|
* LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
|
95
103
|
* `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
|
96
|
-
LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
|
97
|
-
void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
104
|
+
LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
|
105
|
+
LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
98
106
|
|
99
107
|
|
100
108
|
/*! LZ4_compressFrame_usingCDict() :
|
@@ -106,10 +114,11 @@ void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
|
106
114
|
* but it's not recommended, as it's the only way to provide dictID in the frame header.
|
107
115
|
* @return : number of bytes written into dstBuffer.
|
108
116
|
* or an error code if it fails (can be tested using LZ4F_isError()) */
|
109
|
-
size_t LZ4F_compressFrame_usingCDict(
|
110
|
-
|
111
|
-
|
112
|
-
|
117
|
+
LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
|
118
|
+
void* dst, size_t dstCapacity,
|
119
|
+
const void* src, size_t srcSize,
|
120
|
+
const LZ4F_CDict* cdict,
|
121
|
+
const LZ4F_preferences_t* preferencesPtr);
|
113
122
|
|
114
123
|
|
115
124
|
/*! LZ4F_compressBegin_usingCDict() :
|
@@ -119,21 +128,23 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
|
|
119
128
|
* however, it's the only way to provide dictID in the frame header.
|
120
129
|
* @return : number of bytes written into dstBuffer for the header,
|
121
130
|
* or an error code (which can be tested using LZ4F_isError()) */
|
122
|
-
size_t LZ4F_compressBegin_usingCDict(
|
123
|
-
|
124
|
-
|
125
|
-
|
131
|
+
LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
|
132
|
+
LZ4F_cctx* cctx,
|
133
|
+
void* dstBuffer, size_t dstCapacity,
|
134
|
+
const LZ4F_CDict* cdict,
|
135
|
+
const LZ4F_preferences_t* prefsPtr);
|
126
136
|
|
127
137
|
|
128
138
|
/*! LZ4F_decompress_usingDict() :
|
129
139
|
* Same as LZ4F_decompress(), using a predefined dictionary.
|
130
140
|
* Dictionary is used "in place", without any preprocessing.
|
131
141
|
* It must remain accessible throughout the entire frame decoding. */
|
132
|
-
size_t LZ4F_decompress_usingDict(
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
142
|
+
LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
|
143
|
+
LZ4F_dctx* dctxPtr,
|
144
|
+
void* dstBuffer, size_t* dstSizePtr,
|
145
|
+
const void* srcBuffer, size_t* srcSizePtr,
|
146
|
+
const void* dict, size_t dictSize,
|
147
|
+
const LZ4F_decompressOptions_t* decompressOptionsPtr);
|
137
148
|
|
138
149
|
|
139
150
|
#if defined (__cplusplus)
|
data/contrib/lz4/lib/lz4hc.c
CHANGED
@@ -49,6 +49,7 @@
|
|
49
49
|
|
50
50
|
|
51
51
|
/*=== Dependency ===*/
|
52
|
+
#define LZ4_HC_STATIC_LINKING_ONLY
|
52
53
|
#include "lz4hc.h"
|
53
54
|
|
54
55
|
|
@@ -96,7 +97,7 @@ static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
|
96
97
|
|
97
98
|
|
98
99
|
/* Update chains up to ip (excluded) */
|
99
|
-
|
100
|
+
LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
100
101
|
{
|
101
102
|
U16* const chainTable = hc4->chainTable;
|
102
103
|
U32* const hashTable = hc4->hashTable;
|
@@ -116,56 +117,73 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
|
116
117
|
hc4->nextToUpdate = target;
|
117
118
|
}
|
118
119
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
120
|
+
/** LZ4HC_countBack() :
|
121
|
+
* @return : negative value, nb of common bytes before ip/match */
|
122
|
+
LZ4_FORCE_INLINE
|
123
|
+
int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
124
|
+
const BYTE* const iMin, const BYTE* const mMin)
|
124
125
|
{
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
int nbAttempts = maxNbAttempts;
|
133
|
-
size_t ml = 0;
|
126
|
+
int back=0;
|
127
|
+
while ( (ip+back > iMin)
|
128
|
+
&& (match+back > mMin)
|
129
|
+
&& (ip[back-1] == match[back-1]))
|
130
|
+
back--;
|
131
|
+
return back;
|
132
|
+
}
|
134
133
|
|
135
|
-
|
136
|
-
|
137
|
-
|
134
|
+
/* LZ4HC_countPattern() :
|
135
|
+
* pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
|
136
|
+
static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
|
137
|
+
{
|
138
|
+
const BYTE* const iStart = ip;
|
139
|
+
reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
|
140
|
+
|
141
|
+
while (likely(ip < iEnd-(sizeof(pattern)-1))) {
|
142
|
+
reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
|
143
|
+
if (!diff) { ip+=sizeof(pattern); continue; }
|
144
|
+
ip += LZ4_NbCommonBytes(diff);
|
145
|
+
return (unsigned)(ip - iStart);
|
146
|
+
}
|
138
147
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
const BYTE* const match = dictBase + matchIndex;
|
151
|
-
if (LZ4_read32(match) == LZ4_read32(ip)) {
|
152
|
-
size_t mlt;
|
153
|
-
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
154
|
-
if (vLimit > iLimit) vLimit = iLimit;
|
155
|
-
mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
|
156
|
-
if ((ip+mlt == vLimit) && (vLimit < iLimit))
|
157
|
-
mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
|
158
|
-
if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
|
159
|
-
}
|
148
|
+
if (LZ4_isLittleEndian()) {
|
149
|
+
reg_t patternByte = pattern;
|
150
|
+
while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
|
151
|
+
ip++; patternByte >>= 8;
|
152
|
+
}
|
153
|
+
} else { /* big endian */
|
154
|
+
U32 bitOffset = (sizeof(pattern)*8) - 8;
|
155
|
+
while (ip < iEnd) {
|
156
|
+
BYTE const byte = (BYTE)(pattern >> bitOffset);
|
157
|
+
if (*ip != byte) break;
|
158
|
+
ip ++; bitOffset -= 8;
|
160
159
|
}
|
161
|
-
matchIndex -= DELTANEXTU16(chainTable, matchIndex);
|
162
160
|
}
|
163
161
|
|
164
|
-
return (
|
162
|
+
return (unsigned)(ip - iStart);
|
163
|
+
}
|
164
|
+
|
165
|
+
/* LZ4HC_reverseCountPattern() :
|
166
|
+
* pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
|
167
|
+
* read using natural platform endianess */
|
168
|
+
static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
|
169
|
+
{
|
170
|
+
const BYTE* const iStart = ip;
|
171
|
+
|
172
|
+
while (likely(ip >= iLow+4)) {
|
173
|
+
if (LZ4_read32(ip-4) != pattern) break;
|
174
|
+
ip -= 4;
|
175
|
+
}
|
176
|
+
{ const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
|
177
|
+
while (likely(ip>iLow)) {
|
178
|
+
if (ip[-1] != *bytePtr) break;
|
179
|
+
ip--; bytePtr--;
|
180
|
+
} }
|
181
|
+
return (unsigned)(iStart - ip);
|
165
182
|
}
|
166
183
|
|
184
|
+
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
|
167
185
|
|
168
|
-
|
186
|
+
LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
169
187
|
LZ4HC_CCtx_internal* hc4,
|
170
188
|
const BYTE* const ip,
|
171
189
|
const BYTE* const iLowLimit,
|
@@ -173,67 +191,126 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
|
173
191
|
int longest,
|
174
192
|
const BYTE** matchpos,
|
175
193
|
const BYTE** startpos,
|
176
|
-
const int maxNbAttempts
|
194
|
+
const int maxNbAttempts,
|
195
|
+
const int patternAnalysis)
|
177
196
|
{
|
178
197
|
U16* const chainTable = hc4->chainTable;
|
179
198
|
U32* const HashTable = hc4->hashTable;
|
180
199
|
const BYTE* const base = hc4->base;
|
181
200
|
const U32 dictLimit = hc4->dictLimit;
|
182
201
|
const BYTE* const lowPrefixPtr = base + dictLimit;
|
183
|
-
const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) -
|
202
|
+
const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
|
184
203
|
const BYTE* const dictBase = hc4->dictBase;
|
185
204
|
int const delta = (int)(ip-iLowLimit);
|
186
205
|
int nbAttempts = maxNbAttempts;
|
206
|
+
U32 const pattern = LZ4_read32(ip);
|
187
207
|
U32 matchIndex;
|
208
|
+
repeat_state_e repeat = rep_untested;
|
209
|
+
size_t srcPatternLength = 0;
|
188
210
|
|
189
|
-
|
211
|
+
DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
|
190
212
|
/* First Match */
|
191
213
|
LZ4HC_Insert(hc4, ip);
|
192
214
|
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
|
215
|
+
DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
|
216
|
+
matchIndex, lowLimit);
|
193
217
|
|
194
218
|
while ((matchIndex>=lowLimit) && (nbAttempts)) {
|
219
|
+
DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
|
195
220
|
nbAttempts--;
|
196
221
|
if (matchIndex >= dictLimit) {
|
197
222
|
const BYTE* const matchPtr = base + matchIndex;
|
198
223
|
if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
|
199
|
-
if (LZ4_read32(matchPtr) ==
|
224
|
+
if (LZ4_read32(matchPtr) == pattern) {
|
200
225
|
int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
|
226
|
+
#if 0
|
227
|
+
/* more generic but unfortunately slower on clang */
|
228
|
+
int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
|
229
|
+
#else
|
201
230
|
int back = 0;
|
202
|
-
|
203
231
|
while ( (ip+back > iLowLimit)
|
204
232
|
&& (matchPtr+back > lowPrefixPtr)
|
205
233
|
&& (ip[back-1] == matchPtr[back-1])) {
|
206
234
|
back--;
|
207
235
|
}
|
208
|
-
|
236
|
+
#endif
|
209
237
|
mlt -= back;
|
210
238
|
|
211
239
|
if (mlt > longest) {
|
212
240
|
longest = mlt;
|
213
241
|
*matchpos = matchPtr+back;
|
214
242
|
*startpos = ip+back;
|
215
|
-
|
216
|
-
|
243
|
+
} }
|
244
|
+
}
|
245
|
+
} else { /* matchIndex < dictLimit */
|
217
246
|
const BYTE* const matchPtr = dictBase + matchIndex;
|
218
|
-
if (LZ4_read32(matchPtr) ==
|
247
|
+
if (LZ4_read32(matchPtr) == pattern) {
|
219
248
|
int mlt;
|
220
|
-
int back=0;
|
249
|
+
int back = 0;
|
221
250
|
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
222
251
|
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
223
252
|
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
|
224
253
|
if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
|
225
254
|
mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
|
226
|
-
while ((ip+back > iLowLimit)
|
255
|
+
while ( (ip+back > iLowLimit)
|
256
|
+
&& (matchIndex+back > lowLimit)
|
257
|
+
&& (ip[back-1] == matchPtr[back-1]))
|
258
|
+
back--;
|
227
259
|
mlt -= back;
|
228
|
-
if (mlt > longest) {
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
260
|
+
if (mlt > longest) {
|
261
|
+
longest = mlt;
|
262
|
+
*matchpos = base + matchIndex + back;
|
263
|
+
*startpos = ip + back;
|
264
|
+
} } }
|
265
|
+
|
266
|
+
{ U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
|
267
|
+
matchIndex -= nextOffset;
|
268
|
+
if (patternAnalysis && nextOffset==1) {
|
269
|
+
/* may be a repeated pattern */
|
270
|
+
if (repeat == rep_untested) {
|
271
|
+
if ( ((pattern & 0xFFFF) == (pattern >> 16))
|
272
|
+
& ((pattern & 0xFF) == (pattern >> 24)) ) {
|
273
|
+
repeat = rep_confirmed;
|
274
|
+
srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
|
275
|
+
} else {
|
276
|
+
repeat = rep_not;
|
277
|
+
} }
|
278
|
+
if ( (repeat == rep_confirmed)
|
279
|
+
&& (matchIndex >= dictLimit) ) { /* same segment only */
|
280
|
+
const BYTE* const matchPtr = base + matchIndex;
|
281
|
+
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
|
282
|
+
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
|
283
|
+
const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
|
284
|
+
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
|
285
|
+
size_t const currentSegmentLength = backLength + forwardPatternLength;
|
286
|
+
|
287
|
+
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
288
|
+
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
289
|
+
matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
290
|
+
} else {
|
291
|
+
matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
|
292
|
+
}
|
293
|
+
} } } }
|
294
|
+
} /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
|
233
295
|
|
234
296
|
return longest;
|
235
297
|
}
|
236
298
|
|
299
|
+
LZ4_FORCE_INLINE
|
300
|
+
int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
|
301
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
302
|
+
const BYTE** matchpos,
|
303
|
+
const int maxNbAttempts,
|
304
|
+
const int patternAnalysis)
|
305
|
+
{
|
306
|
+
const BYTE* uselessPtr = ip;
|
307
|
+
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
308
|
+
* but this won't be the case here, as we define iLowLimit==ip,
|
309
|
+
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
310
|
+
return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
|
311
|
+
}
|
312
|
+
|
313
|
+
|
237
314
|
|
238
315
|
typedef enum {
|
239
316
|
noLimit = 0,
|
@@ -241,14 +318,10 @@ typedef enum {
|
|
241
318
|
limitedDestSize = 2,
|
242
319
|
} limitedOutput_directive;
|
243
320
|
|
244
|
-
#ifndef LZ4HC_DEBUG
|
245
|
-
# define LZ4HC_DEBUG 0
|
246
|
-
#endif
|
247
|
-
|
248
321
|
/* LZ4HC_encodeSequence() :
|
249
322
|
* @return : 0 if ok,
|
250
323
|
* 1 if buffer issue detected */
|
251
|
-
|
324
|
+
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
252
325
|
const BYTE** ip,
|
253
326
|
BYTE** op,
|
254
327
|
const BYTE** anchor,
|
@@ -260,9 +333,21 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
|
260
333
|
size_t length;
|
261
334
|
BYTE* const token = (*op)++;
|
262
335
|
|
263
|
-
#if
|
264
|
-
|
265
|
-
|
336
|
+
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
|
337
|
+
static const BYTE* start = NULL;
|
338
|
+
static U32 totalCost = 0;
|
339
|
+
U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
|
340
|
+
U32 const ll = (U32)(*ip - *anchor);
|
341
|
+
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
342
|
+
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
343
|
+
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
344
|
+
if (start==NULL) start = *anchor; /* only works for single segment */
|
345
|
+
//g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
|
346
|
+
DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
|
347
|
+
pos,
|
348
|
+
(U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
|
349
|
+
cost, totalCost);
|
350
|
+
totalCost += cost;
|
266
351
|
#endif
|
267
352
|
|
268
353
|
/* Encode Literal length */
|
@@ -285,6 +370,7 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
|
285
370
|
LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
|
286
371
|
|
287
372
|
/* Encode MatchLength */
|
373
|
+
assert(matchLength >= MINMATCH);
|
288
374
|
length = (size_t)(matchLength - MINMATCH);
|
289
375
|
if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
|
290
376
|
if (length >= ML_MASK) {
|
@@ -319,6 +405,7 @@ static int LZ4HC_compress_hashChain (
|
|
319
405
|
)
|
320
406
|
{
|
321
407
|
const int inputSize = *srcSizePtr;
|
408
|
+
const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */
|
322
409
|
|
323
410
|
const BYTE* ip = (const BYTE*) source;
|
324
411
|
const BYTE* anchor = ip;
|
@@ -341,19 +428,13 @@ static int LZ4HC_compress_hashChain (
|
|
341
428
|
|
342
429
|
/* init */
|
343
430
|
*srcSizePtr = 0;
|
344
|
-
if (limit == limitedDestSize
|
345
|
-
if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
|
346
|
-
|
347
|
-
ctx->end += inputSize;
|
348
|
-
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */
|
431
|
+
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
349
432
|
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
350
433
|
|
351
|
-
ip++;
|
352
|
-
|
353
434
|
/* Main Loop */
|
354
435
|
while (ip < mflimit) {
|
355
|
-
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit,
|
356
|
-
if (
|
436
|
+
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
|
437
|
+
if (ml<MINMATCH) { ip++; continue; }
|
357
438
|
|
358
439
|
/* saved, in case we would skip too much */
|
359
440
|
start0 = ip;
|
@@ -362,7 +443,9 @@ static int LZ4HC_compress_hashChain (
|
|
362
443
|
|
363
444
|
_Search2:
|
364
445
|
if (ip+ml < mflimit)
|
365
|
-
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
446
|
+
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
447
|
+
ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
|
448
|
+
maxNbAttempts, patternAnalysis);
|
366
449
|
else
|
367
450
|
ml2 = ml;
|
368
451
|
|
@@ -407,7 +490,9 @@ _Search3:
|
|
407
490
|
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
|
408
491
|
|
409
492
|
if (start2 + ml2 < mflimit)
|
410
|
-
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
493
|
+
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
494
|
+
start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
|
495
|
+
maxNbAttempts, patternAnalysis);
|
411
496
|
else
|
412
497
|
ml3 = ml2;
|
413
498
|
|
@@ -527,14 +612,6 @@ _dest_overflow:
|
|
527
612
|
return 0;
|
528
613
|
}
|
529
614
|
|
530
|
-
static int LZ4HC_getSearchNum(int compressionLevel)
|
531
|
-
{
|
532
|
-
switch (compressionLevel) {
|
533
|
-
default: return 0; /* unused */
|
534
|
-
case 11: return 128;
|
535
|
-
case 12: return 1<<10;
|
536
|
-
}
|
537
|
-
}
|
538
615
|
|
539
616
|
static int LZ4HC_compress_generic (
|
540
617
|
LZ4HC_CCtx_internal* const ctx,
|
@@ -546,24 +623,47 @@ static int LZ4HC_compress_generic (
|
|
546
623
|
limitedOutput_directive limit
|
547
624
|
)
|
548
625
|
{
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
}
|
626
|
+
typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
|
627
|
+
typedef struct {
|
628
|
+
lz4hc_strat_e strat;
|
629
|
+
U32 nbSearches;
|
630
|
+
U32 targetLength;
|
631
|
+
} cParams_t;
|
632
|
+
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
|
633
|
+
{ lz4hc, 2, 16 }, /* 0, unused */
|
634
|
+
{ lz4hc, 2, 16 }, /* 1, unused */
|
635
|
+
{ lz4hc, 2, 16 }, /* 2, unused */
|
636
|
+
{ lz4hc, 4, 16 }, /* 3 */
|
637
|
+
{ lz4hc, 8, 16 }, /* 4 */
|
638
|
+
{ lz4hc, 16, 16 }, /* 5 */
|
639
|
+
{ lz4hc, 32, 16 }, /* 6 */
|
640
|
+
{ lz4hc, 64, 16 }, /* 7 */
|
641
|
+
{ lz4hc, 128, 16 }, /* 8 */
|
642
|
+
{ lz4hc, 256, 16 }, /* 9 */
|
643
|
+
{ lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
|
644
|
+
{ lz4opt, 512,128 }, /*11 */
|
645
|
+
{ lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
|
646
|
+
};
|
647
|
+
|
648
|
+
if (limit == limitedDestSize && dstCapacity < 1) return 0; /* Impossible to store anything */
|
649
|
+
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
|
650
|
+
|
651
|
+
ctx->end += *srcSizePtr;
|
652
|
+
if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
|
653
|
+
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
|
654
|
+
assert(cLevel >= 0);
|
655
|
+
assert(cLevel <= LZ4HC_CLEVEL_MAX);
|
656
|
+
{ cParams_t const cParam = clTable[cLevel];
|
657
|
+
if (cParam.strat == lz4hc)
|
658
|
+
return LZ4HC_compress_hashChain(ctx,
|
659
|
+
src, dst, srcSizePtr, dstCapacity,
|
660
|
+
cParam.nbSearches, limit);
|
661
|
+
assert(cParam.strat == lz4opt);
|
662
|
+
return LZ4HC_compress_optimal(ctx,
|
663
|
+
src, dst, srcSizePtr, dstCapacity,
|
664
|
+
cParam.nbSearches, cParam.targetLength, limit,
|
665
|
+
cLevel == LZ4HC_CLEVEL_MAX); /* ultra mode */
|
565
666
|
}
|
566
|
-
return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */
|
567
667
|
}
|
568
668
|
|
569
669
|
|
@@ -596,8 +696,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
|
|
596
696
|
}
|
597
697
|
|
598
698
|
/* LZ4_compress_HC_destSize() :
|
599
|
-
*
|
600
|
-
* hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/
|
699
|
+
* only compatible with regular HC parser */
|
601
700
|
int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
|
602
701
|
{
|
603
702
|
LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
|
@@ -624,18 +723,13 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
|
624
723
|
{
|
625
724
|
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
626
725
|
LZ4_streamHCPtr->internal_donotuse.base = NULL;
|
627
|
-
|
628
|
-
LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
|
629
|
-
LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
|
726
|
+
LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
|
630
727
|
}
|
631
728
|
|
632
729
|
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
633
730
|
{
|
634
|
-
|
635
|
-
|
636
|
-
int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX;
|
637
|
-
compressionLevel = MIN(compressionLevel, minCLevel);
|
638
|
-
compressionLevel = MAX(compressionLevel, maxCLevel);
|
731
|
+
if (compressionLevel < 1) compressionLevel = 1;
|
732
|
+
if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
|
639
733
|
LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
|
640
734
|
}
|
641
735
|
|
@@ -648,10 +742,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
|
|
648
742
|
}
|
649
743
|
LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
|
650
744
|
ctxPtr->end = (const BYTE*)dictionary + dictSize;
|
651
|
-
if (
|
652
|
-
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
653
|
-
else
|
654
|
-
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
745
|
+
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
655
746
|
return dictSize;
|
656
747
|
}
|
657
748
|
|
@@ -660,10 +751,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
|
|
660
751
|
|
661
752
|
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
|
662
753
|
{
|
663
|
-
if (ctxPtr->
|
664
|
-
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
665
|
-
else
|
666
|
-
if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
754
|
+
if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
667
755
|
|
668
756
|
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
|
669
757
|
ctxPtr->lowLimit = ctxPtr->dictLimit;
|
@@ -717,8 +805,6 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src,
|
|
717
805
|
|
718
806
|
int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
|
719
807
|
{
|
720
|
-
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
721
|
-
if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src); /* not compatible with btopt implementation */
|
722
808
|
return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
|
723
809
|
}
|
724
810
|
|