extlz4 0.2.4.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +5 -0
- data/README.md +3 -3
- data/contrib/lz4/INSTALL +1 -0
- data/contrib/lz4/NEWS +13 -0
- data/contrib/lz4/README.md +1 -0
- data/contrib/lz4/circle.yml +0 -1
- data/contrib/lz4/lib/README.md +28 -28
- data/contrib/lz4/lib/lz4.c +139 -53
- data/contrib/lz4/lib/lz4.h +85 -69
- data/contrib/lz4/lib/lz4frame.c +63 -57
- data/contrib/lz4/lib/lz4frame_static.h +27 -16
- data/contrib/lz4/lib/lz4hc.c +208 -122
- data/contrib/lz4/lib/lz4hc.h +23 -29
- data/contrib/lz4/lib/lz4opt.h +247 -257
- data/contrib/lz4/lib/xxhash.c +16 -16
- data/lib/extlz4/version.rb +1 -1
- metadata +1 -1
@@ -43,7 +43,15 @@ extern "C" {
|
|
43
43
|
/* lz4frame_static.h should be used solely in the context of static linking.
|
44
44
|
* It contains definitions which are not stable and may change in the future.
|
45
45
|
* Never use it in the context of DLL linking.
|
46
|
+
*
|
47
|
+
* Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
|
48
|
+
* your own risk.
|
46
49
|
*/
|
50
|
+
#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
|
51
|
+
#define LZ4FLIB_STATIC_API LZ4FLIB_API
|
52
|
+
#else
|
53
|
+
#define LZ4FLIB_STATIC_API
|
54
|
+
#endif
|
47
55
|
|
48
56
|
|
49
57
|
/* --- Dependency --- */
|
@@ -79,7 +87,7 @@ extern "C" {
|
|
79
87
|
/* enum list is exposed, to handle specific errors */
|
80
88
|
typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
|
81
89
|
|
82
|
-
LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
|
90
|
+
LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
|
83
91
|
|
84
92
|
|
85
93
|
|
@@ -93,8 +101,8 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
|
|
93
101
|
* LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
|
94
102
|
* LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
|
95
103
|
* `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
|
96
|
-
LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
|
97
|
-
void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
104
|
+
LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
|
105
|
+
LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
98
106
|
|
99
107
|
|
100
108
|
/*! LZ4_compressFrame_usingCDict() :
|
@@ -106,10 +114,11 @@ void LZ4F_freeCDict(LZ4F_CDict* CDict);
|
|
106
114
|
* but it's not recommended, as it's the only way to provide dictID in the frame header.
|
107
115
|
* @return : number of bytes written into dstBuffer.
|
108
116
|
* or an error code if it fails (can be tested using LZ4F_isError()) */
|
109
|
-
size_t LZ4F_compressFrame_usingCDict(
|
110
|
-
|
111
|
-
|
112
|
-
|
117
|
+
LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
|
118
|
+
void* dst, size_t dstCapacity,
|
119
|
+
const void* src, size_t srcSize,
|
120
|
+
const LZ4F_CDict* cdict,
|
121
|
+
const LZ4F_preferences_t* preferencesPtr);
|
113
122
|
|
114
123
|
|
115
124
|
/*! LZ4F_compressBegin_usingCDict() :
|
@@ -119,21 +128,23 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
|
|
119
128
|
* however, it's the only way to provide dictID in the frame header.
|
120
129
|
* @return : number of bytes written into dstBuffer for the header,
|
121
130
|
* or an error code (which can be tested using LZ4F_isError()) */
|
122
|
-
size_t LZ4F_compressBegin_usingCDict(
|
123
|
-
|
124
|
-
|
125
|
-
|
131
|
+
LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
|
132
|
+
LZ4F_cctx* cctx,
|
133
|
+
void* dstBuffer, size_t dstCapacity,
|
134
|
+
const LZ4F_CDict* cdict,
|
135
|
+
const LZ4F_preferences_t* prefsPtr);
|
126
136
|
|
127
137
|
|
128
138
|
/*! LZ4F_decompress_usingDict() :
|
129
139
|
* Same as LZ4F_decompress(), using a predefined dictionary.
|
130
140
|
* Dictionary is used "in place", without any preprocessing.
|
131
141
|
* It must remain accessible throughout the entire frame decoding. */
|
132
|
-
size_t LZ4F_decompress_usingDict(
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
142
|
+
LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
|
143
|
+
LZ4F_dctx* dctxPtr,
|
144
|
+
void* dstBuffer, size_t* dstSizePtr,
|
145
|
+
const void* srcBuffer, size_t* srcSizePtr,
|
146
|
+
const void* dict, size_t dictSize,
|
147
|
+
const LZ4F_decompressOptions_t* decompressOptionsPtr);
|
137
148
|
|
138
149
|
|
139
150
|
#if defined (__cplusplus)
|
data/contrib/lz4/lib/lz4hc.c
CHANGED
@@ -49,6 +49,7 @@
|
|
49
49
|
|
50
50
|
|
51
51
|
/*=== Dependency ===*/
|
52
|
+
#define LZ4_HC_STATIC_LINKING_ONLY
|
52
53
|
#include "lz4hc.h"
|
53
54
|
|
54
55
|
|
@@ -96,7 +97,7 @@ static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
|
96
97
|
|
97
98
|
|
98
99
|
/* Update chains up to ip (excluded) */
|
99
|
-
|
100
|
+
LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
100
101
|
{
|
101
102
|
U16* const chainTable = hc4->chainTable;
|
102
103
|
U32* const hashTable = hc4->hashTable;
|
@@ -116,56 +117,73 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
|
|
116
117
|
hc4->nextToUpdate = target;
|
117
118
|
}
|
118
119
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
120
|
+
/** LZ4HC_countBack() :
|
121
|
+
* @return : negative value, nb of common bytes before ip/match */
|
122
|
+
LZ4_FORCE_INLINE
|
123
|
+
int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
124
|
+
const BYTE* const iMin, const BYTE* const mMin)
|
124
125
|
{
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
int nbAttempts = maxNbAttempts;
|
133
|
-
size_t ml = 0;
|
126
|
+
int back=0;
|
127
|
+
while ( (ip+back > iMin)
|
128
|
+
&& (match+back > mMin)
|
129
|
+
&& (ip[back-1] == match[back-1]))
|
130
|
+
back--;
|
131
|
+
return back;
|
132
|
+
}
|
134
133
|
|
135
|
-
|
136
|
-
|
137
|
-
|
134
|
+
/* LZ4HC_countPattern() :
|
135
|
+
* pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
|
136
|
+
static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
|
137
|
+
{
|
138
|
+
const BYTE* const iStart = ip;
|
139
|
+
reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
|
140
|
+
|
141
|
+
while (likely(ip < iEnd-(sizeof(pattern)-1))) {
|
142
|
+
reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
|
143
|
+
if (!diff) { ip+=sizeof(pattern); continue; }
|
144
|
+
ip += LZ4_NbCommonBytes(diff);
|
145
|
+
return (unsigned)(ip - iStart);
|
146
|
+
}
|
138
147
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
const BYTE* const match = dictBase + matchIndex;
|
151
|
-
if (LZ4_read32(match) == LZ4_read32(ip)) {
|
152
|
-
size_t mlt;
|
153
|
-
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
154
|
-
if (vLimit > iLimit) vLimit = iLimit;
|
155
|
-
mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
|
156
|
-
if ((ip+mlt == vLimit) && (vLimit < iLimit))
|
157
|
-
mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
|
158
|
-
if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
|
159
|
-
}
|
148
|
+
if (LZ4_isLittleEndian()) {
|
149
|
+
reg_t patternByte = pattern;
|
150
|
+
while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
|
151
|
+
ip++; patternByte >>= 8;
|
152
|
+
}
|
153
|
+
} else { /* big endian */
|
154
|
+
U32 bitOffset = (sizeof(pattern)*8) - 8;
|
155
|
+
while (ip < iEnd) {
|
156
|
+
BYTE const byte = (BYTE)(pattern >> bitOffset);
|
157
|
+
if (*ip != byte) break;
|
158
|
+
ip ++; bitOffset -= 8;
|
160
159
|
}
|
161
|
-
matchIndex -= DELTANEXTU16(chainTable, matchIndex);
|
162
160
|
}
|
163
161
|
|
164
|
-
return (
|
162
|
+
return (unsigned)(ip - iStart);
|
163
|
+
}
|
164
|
+
|
165
|
+
/* LZ4HC_reverseCountPattern() :
|
166
|
+
* pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
|
167
|
+
* read using natural platform endianess */
|
168
|
+
static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
|
169
|
+
{
|
170
|
+
const BYTE* const iStart = ip;
|
171
|
+
|
172
|
+
while (likely(ip >= iLow+4)) {
|
173
|
+
if (LZ4_read32(ip-4) != pattern) break;
|
174
|
+
ip -= 4;
|
175
|
+
}
|
176
|
+
{ const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
|
177
|
+
while (likely(ip>iLow)) {
|
178
|
+
if (ip[-1] != *bytePtr) break;
|
179
|
+
ip--; bytePtr--;
|
180
|
+
} }
|
181
|
+
return (unsigned)(iStart - ip);
|
165
182
|
}
|
166
183
|
|
184
|
+
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
|
167
185
|
|
168
|
-
|
186
|
+
LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
169
187
|
LZ4HC_CCtx_internal* hc4,
|
170
188
|
const BYTE* const ip,
|
171
189
|
const BYTE* const iLowLimit,
|
@@ -173,67 +191,126 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
|
|
173
191
|
int longest,
|
174
192
|
const BYTE** matchpos,
|
175
193
|
const BYTE** startpos,
|
176
|
-
const int maxNbAttempts
|
194
|
+
const int maxNbAttempts,
|
195
|
+
const int patternAnalysis)
|
177
196
|
{
|
178
197
|
U16* const chainTable = hc4->chainTable;
|
179
198
|
U32* const HashTable = hc4->hashTable;
|
180
199
|
const BYTE* const base = hc4->base;
|
181
200
|
const U32 dictLimit = hc4->dictLimit;
|
182
201
|
const BYTE* const lowPrefixPtr = base + dictLimit;
|
183
|
-
const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) -
|
202
|
+
const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
|
184
203
|
const BYTE* const dictBase = hc4->dictBase;
|
185
204
|
int const delta = (int)(ip-iLowLimit);
|
186
205
|
int nbAttempts = maxNbAttempts;
|
206
|
+
U32 const pattern = LZ4_read32(ip);
|
187
207
|
U32 matchIndex;
|
208
|
+
repeat_state_e repeat = rep_untested;
|
209
|
+
size_t srcPatternLength = 0;
|
188
210
|
|
189
|
-
|
211
|
+
DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
|
190
212
|
/* First Match */
|
191
213
|
LZ4HC_Insert(hc4, ip);
|
192
214
|
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
|
215
|
+
DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
|
216
|
+
matchIndex, lowLimit);
|
193
217
|
|
194
218
|
while ((matchIndex>=lowLimit) && (nbAttempts)) {
|
219
|
+
DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
|
195
220
|
nbAttempts--;
|
196
221
|
if (matchIndex >= dictLimit) {
|
197
222
|
const BYTE* const matchPtr = base + matchIndex;
|
198
223
|
if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
|
199
|
-
if (LZ4_read32(matchPtr) ==
|
224
|
+
if (LZ4_read32(matchPtr) == pattern) {
|
200
225
|
int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
|
226
|
+
#if 0
|
227
|
+
/* more generic but unfortunately slower on clang */
|
228
|
+
int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
|
229
|
+
#else
|
201
230
|
int back = 0;
|
202
|
-
|
203
231
|
while ( (ip+back > iLowLimit)
|
204
232
|
&& (matchPtr+back > lowPrefixPtr)
|
205
233
|
&& (ip[back-1] == matchPtr[back-1])) {
|
206
234
|
back--;
|
207
235
|
}
|
208
|
-
|
236
|
+
#endif
|
209
237
|
mlt -= back;
|
210
238
|
|
211
239
|
if (mlt > longest) {
|
212
240
|
longest = mlt;
|
213
241
|
*matchpos = matchPtr+back;
|
214
242
|
*startpos = ip+back;
|
215
|
-
|
216
|
-
|
243
|
+
} }
|
244
|
+
}
|
245
|
+
} else { /* matchIndex < dictLimit */
|
217
246
|
const BYTE* const matchPtr = dictBase + matchIndex;
|
218
|
-
if (LZ4_read32(matchPtr) ==
|
247
|
+
if (LZ4_read32(matchPtr) == pattern) {
|
219
248
|
int mlt;
|
220
|
-
int back=0;
|
249
|
+
int back = 0;
|
221
250
|
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
222
251
|
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
223
252
|
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
|
224
253
|
if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
|
225
254
|
mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
|
226
|
-
while ((ip+back > iLowLimit)
|
255
|
+
while ( (ip+back > iLowLimit)
|
256
|
+
&& (matchIndex+back > lowLimit)
|
257
|
+
&& (ip[back-1] == matchPtr[back-1]))
|
258
|
+
back--;
|
227
259
|
mlt -= back;
|
228
|
-
if (mlt > longest) {
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
260
|
+
if (mlt > longest) {
|
261
|
+
longest = mlt;
|
262
|
+
*matchpos = base + matchIndex + back;
|
263
|
+
*startpos = ip + back;
|
264
|
+
} } }
|
265
|
+
|
266
|
+
{ U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
|
267
|
+
matchIndex -= nextOffset;
|
268
|
+
if (patternAnalysis && nextOffset==1) {
|
269
|
+
/* may be a repeated pattern */
|
270
|
+
if (repeat == rep_untested) {
|
271
|
+
if ( ((pattern & 0xFFFF) == (pattern >> 16))
|
272
|
+
& ((pattern & 0xFF) == (pattern >> 24)) ) {
|
273
|
+
repeat = rep_confirmed;
|
274
|
+
srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
|
275
|
+
} else {
|
276
|
+
repeat = rep_not;
|
277
|
+
} }
|
278
|
+
if ( (repeat == rep_confirmed)
|
279
|
+
&& (matchIndex >= dictLimit) ) { /* same segment only */
|
280
|
+
const BYTE* const matchPtr = base + matchIndex;
|
281
|
+
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
|
282
|
+
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
|
283
|
+
const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
|
284
|
+
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
|
285
|
+
size_t const currentSegmentLength = backLength + forwardPatternLength;
|
286
|
+
|
287
|
+
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
288
|
+
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
289
|
+
matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
290
|
+
} else {
|
291
|
+
matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
|
292
|
+
}
|
293
|
+
} } } }
|
294
|
+
} /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
|
233
295
|
|
234
296
|
return longest;
|
235
297
|
}
|
236
298
|
|
299
|
+
LZ4_FORCE_INLINE
|
300
|
+
int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
|
301
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
302
|
+
const BYTE** matchpos,
|
303
|
+
const int maxNbAttempts,
|
304
|
+
const int patternAnalysis)
|
305
|
+
{
|
306
|
+
const BYTE* uselessPtr = ip;
|
307
|
+
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
308
|
+
* but this won't be the case here, as we define iLowLimit==ip,
|
309
|
+
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
310
|
+
return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
|
311
|
+
}
|
312
|
+
|
313
|
+
|
237
314
|
|
238
315
|
typedef enum {
|
239
316
|
noLimit = 0,
|
@@ -241,14 +318,10 @@ typedef enum {
|
|
241
318
|
limitedDestSize = 2,
|
242
319
|
} limitedOutput_directive;
|
243
320
|
|
244
|
-
#ifndef LZ4HC_DEBUG
|
245
|
-
# define LZ4HC_DEBUG 0
|
246
|
-
#endif
|
247
|
-
|
248
321
|
/* LZ4HC_encodeSequence() :
|
249
322
|
* @return : 0 if ok,
|
250
323
|
* 1 if buffer issue detected */
|
251
|
-
|
324
|
+
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
252
325
|
const BYTE** ip,
|
253
326
|
BYTE** op,
|
254
327
|
const BYTE** anchor,
|
@@ -260,9 +333,21 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
|
260
333
|
size_t length;
|
261
334
|
BYTE* const token = (*op)++;
|
262
335
|
|
263
|
-
#if
|
264
|
-
|
265
|
-
|
336
|
+
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
|
337
|
+
static const BYTE* start = NULL;
|
338
|
+
static U32 totalCost = 0;
|
339
|
+
U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
|
340
|
+
U32 const ll = (U32)(*ip - *anchor);
|
341
|
+
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
342
|
+
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
343
|
+
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
344
|
+
if (start==NULL) start = *anchor; /* only works for single segment */
|
345
|
+
//g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
|
346
|
+
DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
|
347
|
+
pos,
|
348
|
+
(U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
|
349
|
+
cost, totalCost);
|
350
|
+
totalCost += cost;
|
266
351
|
#endif
|
267
352
|
|
268
353
|
/* Encode Literal length */
|
@@ -285,6 +370,7 @@ FORCE_INLINE int LZ4HC_encodeSequence (
|
|
285
370
|
LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
|
286
371
|
|
287
372
|
/* Encode MatchLength */
|
373
|
+
assert(matchLength >= MINMATCH);
|
288
374
|
length = (size_t)(matchLength - MINMATCH);
|
289
375
|
if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
|
290
376
|
if (length >= ML_MASK) {
|
@@ -319,6 +405,7 @@ static int LZ4HC_compress_hashChain (
|
|
319
405
|
)
|
320
406
|
{
|
321
407
|
const int inputSize = *srcSizePtr;
|
408
|
+
const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */
|
322
409
|
|
323
410
|
const BYTE* ip = (const BYTE*) source;
|
324
411
|
const BYTE* anchor = ip;
|
@@ -341,19 +428,13 @@ static int LZ4HC_compress_hashChain (
|
|
341
428
|
|
342
429
|
/* init */
|
343
430
|
*srcSizePtr = 0;
|
344
|
-
if (limit == limitedDestSize
|
345
|
-
if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
|
346
|
-
|
347
|
-
ctx->end += inputSize;
|
348
|
-
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */
|
431
|
+
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
349
432
|
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
350
433
|
|
351
|
-
ip++;
|
352
|
-
|
353
434
|
/* Main Loop */
|
354
435
|
while (ip < mflimit) {
|
355
|
-
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit,
|
356
|
-
if (
|
436
|
+
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
|
437
|
+
if (ml<MINMATCH) { ip++; continue; }
|
357
438
|
|
358
439
|
/* saved, in case we would skip too much */
|
359
440
|
start0 = ip;
|
@@ -362,7 +443,9 @@ static int LZ4HC_compress_hashChain (
|
|
362
443
|
|
363
444
|
_Search2:
|
364
445
|
if (ip+ml < mflimit)
|
365
|
-
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
446
|
+
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
447
|
+
ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
|
448
|
+
maxNbAttempts, patternAnalysis);
|
366
449
|
else
|
367
450
|
ml2 = ml;
|
368
451
|
|
@@ -407,7 +490,9 @@ _Search3:
|
|
407
490
|
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
|
408
491
|
|
409
492
|
if (start2 + ml2 < mflimit)
|
410
|
-
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
493
|
+
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
|
494
|
+
start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
|
495
|
+
maxNbAttempts, patternAnalysis);
|
411
496
|
else
|
412
497
|
ml3 = ml2;
|
413
498
|
|
@@ -527,14 +612,6 @@ _dest_overflow:
|
|
527
612
|
return 0;
|
528
613
|
}
|
529
614
|
|
530
|
-
static int LZ4HC_getSearchNum(int compressionLevel)
|
531
|
-
{
|
532
|
-
switch (compressionLevel) {
|
533
|
-
default: return 0; /* unused */
|
534
|
-
case 11: return 128;
|
535
|
-
case 12: return 1<<10;
|
536
|
-
}
|
537
|
-
}
|
538
615
|
|
539
616
|
static int LZ4HC_compress_generic (
|
540
617
|
LZ4HC_CCtx_internal* const ctx,
|
@@ -546,24 +623,47 @@ static int LZ4HC_compress_generic (
|
|
546
623
|
limitedOutput_directive limit
|
547
624
|
)
|
548
625
|
{
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
}
|
626
|
+
typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
|
627
|
+
typedef struct {
|
628
|
+
lz4hc_strat_e strat;
|
629
|
+
U32 nbSearches;
|
630
|
+
U32 targetLength;
|
631
|
+
} cParams_t;
|
632
|
+
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
|
633
|
+
{ lz4hc, 2, 16 }, /* 0, unused */
|
634
|
+
{ lz4hc, 2, 16 }, /* 1, unused */
|
635
|
+
{ lz4hc, 2, 16 }, /* 2, unused */
|
636
|
+
{ lz4hc, 4, 16 }, /* 3 */
|
637
|
+
{ lz4hc, 8, 16 }, /* 4 */
|
638
|
+
{ lz4hc, 16, 16 }, /* 5 */
|
639
|
+
{ lz4hc, 32, 16 }, /* 6 */
|
640
|
+
{ lz4hc, 64, 16 }, /* 7 */
|
641
|
+
{ lz4hc, 128, 16 }, /* 8 */
|
642
|
+
{ lz4hc, 256, 16 }, /* 9 */
|
643
|
+
{ lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
|
644
|
+
{ lz4opt, 512,128 }, /*11 */
|
645
|
+
{ lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
|
646
|
+
};
|
647
|
+
|
648
|
+
if (limit == limitedDestSize && dstCapacity < 1) return 0; /* Impossible to store anything */
|
649
|
+
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
|
650
|
+
|
651
|
+
ctx->end += *srcSizePtr;
|
652
|
+
if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
|
653
|
+
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
|
654
|
+
assert(cLevel >= 0);
|
655
|
+
assert(cLevel <= LZ4HC_CLEVEL_MAX);
|
656
|
+
{ cParams_t const cParam = clTable[cLevel];
|
657
|
+
if (cParam.strat == lz4hc)
|
658
|
+
return LZ4HC_compress_hashChain(ctx,
|
659
|
+
src, dst, srcSizePtr, dstCapacity,
|
660
|
+
cParam.nbSearches, limit);
|
661
|
+
assert(cParam.strat == lz4opt);
|
662
|
+
return LZ4HC_compress_optimal(ctx,
|
663
|
+
src, dst, srcSizePtr, dstCapacity,
|
664
|
+
cParam.nbSearches, cParam.targetLength, limit,
|
665
|
+
cLevel == LZ4HC_CLEVEL_MAX); /* ultra mode */
|
565
666
|
}
|
566
|
-
return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */
|
567
667
|
}
|
568
668
|
|
569
669
|
|
@@ -596,8 +696,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
|
|
596
696
|
}
|
597
697
|
|
598
698
|
/* LZ4_compress_HC_destSize() :
|
599
|
-
*
|
600
|
-
* hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/
|
699
|
+
* only compatible with regular HC parser */
|
601
700
|
int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
|
602
701
|
{
|
603
702
|
LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
|
@@ -624,18 +723,13 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
|
624
723
|
{
|
625
724
|
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
626
725
|
LZ4_streamHCPtr->internal_donotuse.base = NULL;
|
627
|
-
|
628
|
-
LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
|
629
|
-
LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel);
|
726
|
+
LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
|
630
727
|
}
|
631
728
|
|
632
729
|
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
|
633
730
|
{
|
634
|
-
|
635
|
-
|
636
|
-
int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX;
|
637
|
-
compressionLevel = MIN(compressionLevel, minCLevel);
|
638
|
-
compressionLevel = MAX(compressionLevel, maxCLevel);
|
731
|
+
if (compressionLevel < 1) compressionLevel = 1;
|
732
|
+
if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
|
639
733
|
LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
|
640
734
|
}
|
641
735
|
|
@@ -648,10 +742,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
|
|
648
742
|
}
|
649
743
|
LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
|
650
744
|
ctxPtr->end = (const BYTE*)dictionary + dictSize;
|
651
|
-
if (
|
652
|
-
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
653
|
-
else
|
654
|
-
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
745
|
+
if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
|
655
746
|
return dictSize;
|
656
747
|
}
|
657
748
|
|
@@ -660,10 +751,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int
|
|
660
751
|
|
661
752
|
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
|
662
753
|
{
|
663
|
-
if (ctxPtr->
|
664
|
-
LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS);
|
665
|
-
else
|
666
|
-
if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
754
|
+
if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
|
667
755
|
|
668
756
|
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
|
669
757
|
ctxPtr->lowLimit = ctxPtr->dictLimit;
|
@@ -717,8 +805,6 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src,
|
|
717
805
|
|
718
806
|
int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
|
719
807
|
{
|
720
|
-
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
721
|
-
if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src); /* not compatible with btopt implementation */
|
722
808
|
return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
|
723
809
|
}
|
724
810
|
|