extlz4 0.2.4.3 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +5 -0
- data/README.md +3 -3
- data/contrib/lz4/INSTALL +1 -0
- data/contrib/lz4/NEWS +13 -0
- data/contrib/lz4/README.md +1 -0
- data/contrib/lz4/circle.yml +0 -1
- data/contrib/lz4/lib/README.md +28 -28
- data/contrib/lz4/lib/lz4.c +139 -53
- data/contrib/lz4/lib/lz4.h +85 -69
- data/contrib/lz4/lib/lz4frame.c +63 -57
- data/contrib/lz4/lib/lz4frame_static.h +27 -16
- data/contrib/lz4/lib/lz4hc.c +208 -122
- data/contrib/lz4/lib/lz4hc.h +23 -29
- data/contrib/lz4/lib/lz4opt.h +247 -257
- data/contrib/lz4/lib/xxhash.c +16 -16
- data/lib/extlz4/version.rb +1 -1
- metadata +1 -1
data/contrib/lz4/lib/lz4hc.h
CHANGED
@@ -39,14 +39,14 @@ extern "C" {
|
|
39
39
|
#endif
|
40
40
|
|
41
41
|
/* --- Dependency --- */
|
42
|
-
/* note : lz4hc
|
42
|
+
/* note : lz4hc requires lz4.h/lz4.c for compilation */
|
43
43
|
#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
44
44
|
|
45
45
|
|
46
46
|
/* --- Useful constants --- */
|
47
47
|
#define LZ4HC_CLEVEL_MIN 3
|
48
48
|
#define LZ4HC_CLEVEL_DEFAULT 9
|
49
|
-
#define LZ4HC_CLEVEL_OPT_MIN
|
49
|
+
#define LZ4HC_CLEVEL_OPT_MIN 10
|
50
50
|
#define LZ4HC_CLEVEL_MAX 12
|
51
51
|
|
52
52
|
|
@@ -54,12 +54,12 @@ extern "C" {
|
|
54
54
|
* Block Compression
|
55
55
|
**************************************/
|
56
56
|
/*! LZ4_compress_HC() :
|
57
|
-
*
|
57
|
+
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
|
58
58
|
* `dst` must be already allocated.
|
59
|
-
*
|
60
|
-
*
|
61
|
-
* `compressionLevel` :
|
62
|
-
* Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
59
|
+
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
60
|
+
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
61
|
+
* `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
|
62
|
+
* Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
63
63
|
* @return : the number of bytes written into 'dst'
|
64
64
|
* or 0 if compression fails.
|
65
65
|
*/
|
@@ -72,12 +72,12 @@ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dst
|
|
72
72
|
|
73
73
|
|
74
74
|
/*! LZ4_compress_HC_extStateHC() :
|
75
|
-
*
|
75
|
+
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
76
76
|
* `state` size is provided by LZ4_sizeofStateHC().
|
77
|
-
*
|
77
|
+
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
|
78
78
|
*/
|
79
|
-
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
80
79
|
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
80
|
+
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
81
81
|
|
82
82
|
|
83
83
|
/*-************************************
|
@@ -87,10 +87,10 @@ LZ4LIB_API int LZ4_sizeofStateHC(void);
|
|
87
87
|
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
88
88
|
|
89
89
|
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
90
|
-
*
|
91
|
-
*
|
92
|
-
*
|
93
|
-
*
|
90
|
+
* These functions create and release memory for LZ4 HC streaming state.
|
91
|
+
* Newly created states are automatically initialized.
|
92
|
+
* Existing states can be re-used several times, using LZ4_resetStreamHC().
|
93
|
+
* These methods are API and ABI stable, they can be used in combination with a DLL.
|
94
94
|
*/
|
95
95
|
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
96
96
|
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
@@ -123,13 +123,13 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in
|
|
123
123
|
*/
|
124
124
|
|
125
125
|
|
126
|
-
|
126
|
+
/*-**************************************************************
|
127
127
|
* PRIVATE DEFINITIONS :
|
128
128
|
* Do not use these definitions.
|
129
129
|
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
|
130
130
|
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
|
131
|
-
|
132
|
-
#define LZ4HC_DICTIONARY_LOGSIZE
|
131
|
+
****************************************************************/
|
132
|
+
#define LZ4HC_DICTIONARY_LOGSIZE 16
|
133
133
|
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
134
134
|
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
135
135
|
|
@@ -152,8 +152,7 @@ typedef struct
|
|
152
152
|
uint32_t dictLimit; /* below that point, need extDict */
|
153
153
|
uint32_t lowLimit; /* below that point, no more dict */
|
154
154
|
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
155
|
-
|
156
|
-
uint32_t compressionLevel;
|
155
|
+
int compressionLevel;
|
157
156
|
} LZ4HC_CCtx_internal;
|
158
157
|
|
159
158
|
#else
|
@@ -169,13 +168,12 @@ typedef struct
|
|
169
168
|
unsigned int dictLimit; /* below that point, need extDict */
|
170
169
|
unsigned int lowLimit; /* below that point, no more dict */
|
171
170
|
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
172
|
-
unsigned int searchNum; /* only for optimal parser */
|
173
171
|
int compressionLevel;
|
174
172
|
} LZ4HC_CCtx_internal;
|
175
173
|
|
176
174
|
#endif
|
177
175
|
|
178
|
-
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /*
|
176
|
+
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */
|
179
177
|
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
180
178
|
union LZ4_streamHC_u {
|
181
179
|
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
@@ -197,7 +195,6 @@ union LZ4_streamHC_u {
|
|
197
195
|
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
198
196
|
|
199
197
|
/* deprecated compression functions */
|
200
|
-
/* these functions will trigger warning messages in future releases */
|
201
198
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
202
199
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
203
200
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
@@ -225,14 +222,15 @@ LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStr
|
|
225
222
|
|
226
223
|
#endif /* LZ4_HC_H_19834876238432 */
|
227
224
|
|
228
|
-
|
225
|
+
|
226
|
+
/*-**************************************************
|
229
227
|
* !!!!! STATIC LINKING ONLY !!!!!
|
230
228
|
* Following definitions are considered experimental.
|
231
229
|
* They should not be linked from DLL,
|
232
230
|
* as there is no guarantee of API stability yet.
|
233
231
|
* Prototypes will be promoted to "stable" status
|
234
232
|
* after successfull usage in real-life scenarios.
|
235
|
-
|
233
|
+
***************************************************/
|
236
234
|
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
237
235
|
#ifndef LZ4_HC_SLO_098092834
|
238
236
|
#define LZ4_HC_SLO_098092834
|
@@ -258,17 +256,13 @@ int LZ4_compress_HC_destSize(void* LZ4HC_Data,
|
|
258
256
|
* @return : the number of bytes written into 'dst'
|
259
257
|
* or 0 if compression fails.
|
260
258
|
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
|
261
|
-
* Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN
|
262
|
-
* beyond that level, compression performance will be much reduced due to internal incompatibilities
|
263
259
|
*/
|
264
260
|
int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
|
265
261
|
const char* src, char* dst,
|
266
262
|
int* srcSizePtr, int targetDstSize);
|
267
263
|
|
268
264
|
/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
|
269
|
-
* It's possible to change compression level
|
270
|
-
* but that requires to stay in the same mode (aka 1-10 or 11-12).
|
271
|
-
* This function ensures this condition.
|
265
|
+
* It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*()
|
272
266
|
*/
|
273
267
|
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
274
268
|
|
data/contrib/lz4/lib/lz4opt.h
CHANGED
@@ -35,12 +35,6 @@
|
|
35
35
|
|
36
36
|
#define LZ4_OPT_NUM (1<<12)
|
37
37
|
|
38
|
-
|
39
|
-
typedef struct {
|
40
|
-
int off;
|
41
|
-
int len;
|
42
|
-
} LZ4HC_match_t;
|
43
|
-
|
44
38
|
typedef struct {
|
45
39
|
int price;
|
46
40
|
int off;
|
@@ -50,317 +44,313 @@ typedef struct {
|
|
50
44
|
|
51
45
|
|
52
46
|
/* price in bytes */
|
53
|
-
|
47
|
+
LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
|
54
48
|
{
|
55
|
-
|
56
|
-
if (litlen >= (
|
49
|
+
int price = litlen;
|
50
|
+
if (litlen >= (int)RUN_MASK)
|
57
51
|
price += 1 + (litlen-RUN_MASK)/255;
|
58
52
|
return price;
|
59
53
|
}
|
60
54
|
|
61
55
|
|
62
56
|
/* requires mlen >= MINMATCH */
|
63
|
-
|
57
|
+
LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
|
64
58
|
{
|
65
|
-
|
59
|
+
int price = 1 + 2 ; /* token + 16-bit offset */
|
66
60
|
|
67
61
|
price += LZ4HC_literalsPrice(litlen);
|
68
62
|
|
69
|
-
if (mlen >= (
|
70
|
-
price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
63
|
+
if (mlen >= (int)(ML_MASK+MINMATCH))
|
64
|
+
price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
71
65
|
|
72
66
|
return price;
|
73
67
|
}
|
74
68
|
|
75
69
|
|
76
70
|
/*-*************************************
|
77
|
-
*
|
71
|
+
* Match finder
|
78
72
|
***************************************/
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
size_t best_mlen,
|
84
|
-
LZ4HC_match_t* matches,
|
85
|
-
int* matchNum)
|
86
|
-
{
|
87
|
-
U16* const chainTable = ctx->chainTable;
|
88
|
-
U32* const HashTable = ctx->hashTable;
|
89
|
-
const BYTE* const base = ctx->base;
|
90
|
-
const U32 dictLimit = ctx->dictLimit;
|
91
|
-
const U32 current = (U32)(ip - base);
|
92
|
-
const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
|
93
|
-
const BYTE* const dictBase = ctx->dictBase;
|
94
|
-
const BYTE* match;
|
95
|
-
int nbAttempts = ctx->searchNum;
|
96
|
-
int mnum = 0;
|
97
|
-
U16 *ptr0, *ptr1, delta0, delta1;
|
98
|
-
U32 matchIndex;
|
99
|
-
size_t matchLength = 0;
|
100
|
-
U32* HashPos;
|
101
|
-
|
102
|
-
if (ip + MINMATCH > iHighLimit) return 1;
|
103
|
-
|
104
|
-
/* HC4 match finder */
|
105
|
-
HashPos = &HashTable[LZ4HC_hashPtr(ip)];
|
106
|
-
matchIndex = *HashPos;
|
107
|
-
*HashPos = current;
|
108
|
-
|
109
|
-
ptr0 = &DELTANEXTMAXD(current*2+1);
|
110
|
-
ptr1 = &DELTANEXTMAXD(current*2);
|
111
|
-
delta0 = delta1 = (U16)(current - matchIndex);
|
112
|
-
|
113
|
-
while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
|
114
|
-
nbAttempts--;
|
115
|
-
if (matchIndex >= dictLimit) {
|
116
|
-
match = base + matchIndex;
|
117
|
-
matchLength = LZ4_count(ip, match, iHighLimit);
|
118
|
-
} else {
|
119
|
-
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
120
|
-
match = dictBase + matchIndex;
|
121
|
-
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
122
|
-
matchLength = LZ4_count(ip, match, vLimit);
|
123
|
-
if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
|
124
|
-
matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
|
125
|
-
if (matchIndex+matchLength >= dictLimit)
|
126
|
-
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
127
|
-
}
|
128
|
-
|
129
|
-
if (matchLength > best_mlen) {
|
130
|
-
best_mlen = matchLength;
|
131
|
-
if (matches) {
|
132
|
-
if (matchIndex >= dictLimit)
|
133
|
-
matches[mnum].off = (int)(ip - match);
|
134
|
-
else
|
135
|
-
matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
|
136
|
-
matches[mnum].len = (int)matchLength;
|
137
|
-
mnum++;
|
138
|
-
}
|
139
|
-
if (best_mlen > LZ4_OPT_NUM) break;
|
140
|
-
}
|
141
|
-
|
142
|
-
if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
|
143
|
-
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
|
144
|
-
|
145
|
-
DEBUGLOG(6, "ip :%016llX", (U64)ip);
|
146
|
-
DEBUGLOG(6, "match:%016llX", (U64)match);
|
147
|
-
if (*(ip+matchLength) < *(match+matchLength)) {
|
148
|
-
*ptr0 = delta0;
|
149
|
-
ptr0 = &DELTANEXTMAXD(matchIndex*2);
|
150
|
-
if (*ptr0 == (U16)-1) break;
|
151
|
-
delta0 = *ptr0;
|
152
|
-
delta1 += delta0;
|
153
|
-
matchIndex -= delta0;
|
154
|
-
} else {
|
155
|
-
*ptr1 = delta1;
|
156
|
-
ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
|
157
|
-
if (*ptr1 == (U16)-1) break;
|
158
|
-
delta1 = *ptr1;
|
159
|
-
delta0 += delta1;
|
160
|
-
matchIndex -= delta1;
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
*ptr0 = (U16)-1;
|
165
|
-
*ptr1 = (U16)-1;
|
166
|
-
if (matchNum) *matchNum = mnum;
|
167
|
-
/* if (best_mlen > 8) return best_mlen-8; */
|
168
|
-
if (!matchNum) return 1;
|
169
|
-
return 1;
|
170
|
-
}
|
171
|
-
|
172
|
-
|
173
|
-
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
|
174
|
-
{
|
175
|
-
const BYTE* const base = ctx->base;
|
176
|
-
const U32 target = (U32)(ip - base);
|
177
|
-
U32 idx = ctx->nextToUpdate;
|
178
|
-
while(idx < target)
|
179
|
-
idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
|
180
|
-
}
|
181
|
-
|
73
|
+
typedef struct {
|
74
|
+
int off;
|
75
|
+
int len;
|
76
|
+
} LZ4HC_match_t;
|
182
77
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
|
78
|
+
LZ4_FORCE_INLINE
|
79
|
+
LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
|
80
|
+
const BYTE* ip, const BYTE* const iHighLimit,
|
81
|
+
int minLen, int nbSearches)
|
188
82
|
{
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
opt[pos].mlen = (int)ml; \
|
202
|
-
opt[pos].off = (int)offset; \
|
203
|
-
opt[pos].litlen = (int)ll; \
|
204
|
-
opt[pos].price = (int)cost; \
|
83
|
+
LZ4HC_match_t match = { 0 , 0 };
|
84
|
+
const BYTE* matchPtr = NULL;
|
85
|
+
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
86
|
+
* but this won't be the case here, as we define iLowLimit==ip,
|
87
|
+
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
88
|
+
int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
|
89
|
+
ip, ip, iHighLimit, minLen, &matchPtr, &ip,
|
90
|
+
nbSearches, 1 /* patternAnalysis */);
|
91
|
+
if (matchLength <= minLen) return match;
|
92
|
+
match.len = matchLength;
|
93
|
+
match.off = (int)(ip-matchPtr);
|
94
|
+
return match;
|
205
95
|
}
|
206
96
|
|
207
97
|
|
208
98
|
static int LZ4HC_compress_optimal (
|
209
99
|
LZ4HC_CCtx_internal* ctx,
|
210
100
|
const char* const source,
|
211
|
-
char*
|
212
|
-
int
|
213
|
-
int
|
214
|
-
|
101
|
+
char* dst,
|
102
|
+
int* srcSizePtr,
|
103
|
+
int dstCapacity,
|
104
|
+
int const nbSearches,
|
215
105
|
size_t sufficient_len,
|
216
|
-
|
106
|
+
limitedOutput_directive limit,
|
107
|
+
int const fullUpdate
|
217
108
|
)
|
218
109
|
{
|
219
|
-
|
220
|
-
|
110
|
+
#define TRAILING_LITERALS 3
|
111
|
+
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */
|
221
112
|
|
222
113
|
const BYTE* ip = (const BYTE*) source;
|
223
114
|
const BYTE* anchor = ip;
|
224
|
-
const BYTE* const iend = ip +
|
115
|
+
const BYTE* const iend = ip + *srcSizePtr;
|
225
116
|
const BYTE* const mflimit = iend - MFLIMIT;
|
226
|
-
const BYTE* const matchlimit =
|
227
|
-
BYTE* op = (BYTE*)
|
228
|
-
BYTE*
|
117
|
+
const BYTE* const matchlimit = iend - LASTLITERALS;
|
118
|
+
BYTE* op = (BYTE*) dst;
|
119
|
+
BYTE* opSaved = (BYTE*) dst;
|
120
|
+
BYTE* oend = op + dstCapacity;
|
229
121
|
|
230
122
|
/* init */
|
231
123
|
DEBUGLOG(5, "LZ4HC_compress_optimal");
|
124
|
+
*srcSizePtr = 0;
|
125
|
+
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
232
126
|
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
233
|
-
ctx->end += inputSize;
|
234
|
-
ip++;
|
235
127
|
|
236
128
|
/* Main Loop */
|
129
|
+
assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
|
237
130
|
while (ip < mflimit) {
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */
|
131
|
+
int const llen = (int)(ip - anchor);
|
132
|
+
int best_mlen, best_off;
|
133
|
+
int cur, last_match_pos = 0;
|
242
134
|
|
243
|
-
|
244
|
-
if (
|
135
|
+
LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches);
|
136
|
+
if (firstMatch.len==0) { ip++; continue; }
|
245
137
|
|
246
|
-
if ((size_t)
|
138
|
+
if ((size_t)firstMatch.len > sufficient_len) {
|
247
139
|
/* good enough solution : immediate encoding */
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
140
|
+
int const firstML = firstMatch.len;
|
141
|
+
const BYTE* const matchPos = ip - firstMatch.off;
|
142
|
+
opSaved = op;
|
143
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */
|
144
|
+
goto _dest_overflow;
|
145
|
+
continue;
|
253
146
|
}
|
254
147
|
|
255
|
-
/* set prices
|
256
|
-
{
|
257
|
-
for (
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
148
|
+
/* set prices for first positions (literals) */
|
149
|
+
{ int rPos;
|
150
|
+
for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
|
151
|
+
int const cost = LZ4HC_literalsPrice(llen + rPos);
|
152
|
+
opt[rPos].mlen = 1;
|
153
|
+
opt[rPos].off = 0;
|
154
|
+
opt[rPos].litlen = llen + rPos;
|
155
|
+
opt[rPos].price = cost;
|
156
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
|
157
|
+
rPos, cost, opt[rPos].litlen);
|
158
|
+
} }
|
159
|
+
/* set prices using initial match */
|
160
|
+
{ int mlen = MINMATCH;
|
161
|
+
int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
162
|
+
int const offset = firstMatch.off;
|
163
|
+
assert(matchML < LZ4_OPT_NUM);
|
164
|
+
for ( ; mlen <= matchML ; mlen++) {
|
165
|
+
int const cost = LZ4HC_sequencePrice(llen, mlen);
|
166
|
+
opt[mlen].mlen = mlen;
|
167
|
+
opt[mlen].off = offset;
|
168
|
+
opt[mlen].litlen = llen;
|
169
|
+
opt[mlen].price = cost;
|
170
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
|
171
|
+
mlen, cost, mlen);
|
172
|
+
} }
|
173
|
+
last_match_pos = firstMatch.len;
|
174
|
+
{ int addLit;
|
175
|
+
for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
|
176
|
+
opt[last_match_pos+addLit].mlen = 1; /* literal */
|
177
|
+
opt[last_match_pos+addLit].off = 0;
|
178
|
+
opt[last_match_pos+addLit].litlen = addLit;
|
179
|
+
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
|
180
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
|
181
|
+
last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
|
182
|
+
} }
|
266
183
|
|
267
184
|
/* check further positions */
|
268
|
-
|
269
|
-
for (cur = 1; cur <= last_pos; cur++) {
|
185
|
+
for (cur = 1; cur < last_match_pos; cur++) {
|
270
186
|
const BYTE* const curPtr = ip + cur;
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
}
|
286
|
-
|
287
|
-
if (price < (size_t)opt[cur].price)
|
288
|
-
SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */
|
187
|
+
LZ4HC_match_t newMatch;
|
188
|
+
|
189
|
+
if (curPtr >= mflimit) break;
|
190
|
+
DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
|
191
|
+
cur, opt[cur].price, opt[cur+1].price, cur+1);
|
192
|
+
if (fullUpdate) {
|
193
|
+
/* not useful to search here if next position has same (or lower) cost */
|
194
|
+
if ( (opt[cur+1].price <= opt[cur].price)
|
195
|
+
/* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
|
196
|
+
&& (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
|
197
|
+
continue;
|
198
|
+
} else {
|
199
|
+
/* not useful to search here if next position has same (or lower) cost */
|
200
|
+
if (opt[cur+1].price <= opt[cur].price) continue;
|
289
201
|
}
|
290
202
|
|
291
|
-
|
203
|
+
DEBUGLOG(7, "search at rPos:%u", cur);
|
204
|
+
if (fullUpdate)
|
205
|
+
newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches);
|
206
|
+
else
|
207
|
+
/* only test matches of minimum length; slightly faster, but misses a few bytes */
|
208
|
+
newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches);
|
209
|
+
if (!newMatch.len) continue;
|
292
210
|
|
293
|
-
|
294
|
-
|
211
|
+
if ( ((size_t)newMatch.len > sufficient_len)
|
212
|
+
|| (newMatch.len + cur >= LZ4_OPT_NUM) ) {
|
295
213
|
/* immediate encoding */
|
296
|
-
best_mlen =
|
297
|
-
best_off =
|
298
|
-
|
214
|
+
best_mlen = newMatch.len;
|
215
|
+
best_off = newMatch.off;
|
216
|
+
last_match_pos = cur + 1;
|
299
217
|
goto encode;
|
300
218
|
}
|
301
219
|
|
302
|
-
/* set
|
303
|
-
{
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
opt[cur].off = (int)best_off;
|
338
|
-
best_mlen = ml;
|
339
|
-
best_off = offset;
|
340
|
-
if (ml > cur) break; /* can this happen ? */
|
341
|
-
cur -= ml;
|
342
|
-
}
|
220
|
+
/* before match : set price with literals at beginning */
|
221
|
+
{ int const baseLitlen = opt[cur].litlen;
|
222
|
+
int litlen;
|
223
|
+
for (litlen = 1; litlen < MINMATCH; litlen++) {
|
224
|
+
int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
|
225
|
+
int const pos = cur + litlen;
|
226
|
+
if (price < opt[pos].price) {
|
227
|
+
opt[pos].mlen = 1; /* literal */
|
228
|
+
opt[pos].off = 0;
|
229
|
+
opt[pos].litlen = baseLitlen+litlen;
|
230
|
+
opt[pos].price = price;
|
231
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
|
232
|
+
pos, price, opt[pos].litlen);
|
233
|
+
} } }
|
234
|
+
|
235
|
+
/* set prices using match at position = cur */
|
236
|
+
{ int const matchML = newMatch.len;
|
237
|
+
int ml = MINMATCH;
|
238
|
+
|
239
|
+
assert(cur + newMatch.len < LZ4_OPT_NUM);
|
240
|
+
for ( ; ml <= matchML ; ml++) {
|
241
|
+
int const pos = cur + ml;
|
242
|
+
int const offset = newMatch.off;
|
243
|
+
int price;
|
244
|
+
int ll;
|
245
|
+
DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
|
246
|
+
pos, last_match_pos);
|
247
|
+
if (opt[cur].mlen == 1) {
|
248
|
+
ll = opt[cur].litlen;
|
249
|
+
price = ((cur > ll) ? opt[cur - ll].price : 0)
|
250
|
+
+ LZ4HC_sequencePrice(ll, ml);
|
251
|
+
} else {
|
252
|
+
ll = 0;
|
253
|
+
price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
|
254
|
+
}
|
343
255
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
256
|
+
if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) {
|
257
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
|
258
|
+
pos, price, ml);
|
259
|
+
assert(pos < LZ4_OPT_NUM);
|
260
|
+
if ( (ml == matchML) /* last pos of last match */
|
261
|
+
&& (last_match_pos < pos) )
|
262
|
+
last_match_pos = pos;
|
263
|
+
opt[pos].mlen = ml;
|
264
|
+
opt[pos].off = offset;
|
265
|
+
opt[pos].litlen = ll;
|
266
|
+
opt[pos].price = price;
|
267
|
+
} } }
|
268
|
+
/* complete following positions with literals */
|
269
|
+
{ int addLit;
|
270
|
+
for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
|
271
|
+
opt[last_match_pos+addLit].mlen = 1; /* literal */
|
272
|
+
opt[last_match_pos+addLit].off = 0;
|
273
|
+
opt[last_match_pos+addLit].litlen = addLit;
|
274
|
+
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
|
275
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
|
276
|
+
} }
|
277
|
+
} /* for (cur = 1; cur <= last_match_pos; cur++) */
|
278
|
+
|
279
|
+
best_mlen = opt[last_match_pos].mlen;
|
280
|
+
best_off = opt[last_match_pos].off;
|
281
|
+
cur = last_match_pos - best_mlen;
|
282
|
+
|
283
|
+
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
284
|
+
assert(cur < LZ4_OPT_NUM);
|
285
|
+
assert(last_match_pos >= 1); /* == 1 when only one candidate */
|
286
|
+
DEBUGLOG(6, "reverse traversal, looking for shortest path")
|
287
|
+
DEBUGLOG(6, "last_match_pos = %i", last_match_pos);
|
288
|
+
{ int candidate_pos = cur;
|
289
|
+
int selected_matchLength = best_mlen;
|
290
|
+
int selected_offset = best_off;
|
291
|
+
while (1) { /* from end to beginning */
|
292
|
+
int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */
|
293
|
+
int const next_offset = opt[candidate_pos].off;
|
294
|
+
DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
|
295
|
+
opt[candidate_pos].mlen = selected_matchLength;
|
296
|
+
opt[candidate_pos].off = selected_offset;
|
297
|
+
selected_matchLength = next_matchLength;
|
298
|
+
selected_offset = next_offset;
|
299
|
+
if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
|
300
|
+
assert(next_matchLength > 0); /* can be 1, means literal */
|
301
|
+
candidate_pos -= next_matchLength;
|
302
|
+
} }
|
303
|
+
|
304
|
+
/* encode all recorded sequences in order */
|
305
|
+
{ int rPos = 0; /* relative position (to ip) */
|
306
|
+
while (rPos < last_match_pos) {
|
307
|
+
int const ml = opt[rPos].mlen;
|
308
|
+
int const offset = opt[rPos].off;
|
309
|
+
if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
|
310
|
+
rPos += ml;
|
311
|
+
assert(ml >= MINMATCH);
|
312
|
+
assert((offset >= 1) && (offset <= MAX_DISTANCE));
|
313
|
+
opSaved = op;
|
314
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */
|
315
|
+
goto _dest_overflow;
|
316
|
+
} }
|
353
317
|
} /* while (ip < mflimit) */
|
354
318
|
|
319
|
+
_last_literals:
|
355
320
|
/* Encode Last Literals */
|
356
|
-
{
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
321
|
+
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
|
322
|
+
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
323
|
+
size_t const totalSize = 1 + litLength + lastRunSize;
|
324
|
+
if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */
|
325
|
+
if (limit && (op + totalSize > oend)) {
|
326
|
+
if (limit == limitedOutput) return 0; /* Check output limit */
|
327
|
+
/* adapt lastRunSize to fill 'dst' */
|
328
|
+
lastRunSize = (size_t)(oend - op) - 1;
|
329
|
+
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
330
|
+
lastRunSize -= litLength;
|
331
|
+
}
|
332
|
+
ip = anchor + lastRunSize;
|
333
|
+
|
334
|
+
if (lastRunSize >= RUN_MASK) {
|
335
|
+
size_t accumulator = lastRunSize - RUN_MASK;
|
336
|
+
*op++ = (RUN_MASK << ML_BITS);
|
337
|
+
for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
|
338
|
+
*op++ = (BYTE) accumulator;
|
339
|
+
} else {
|
340
|
+
*op++ = (BYTE)(lastRunSize << ML_BITS);
|
341
|
+
}
|
342
|
+
memcpy(op, anchor, lastRunSize);
|
343
|
+
op += lastRunSize;
|
362
344
|
}
|
363
345
|
|
364
346
|
/* End */
|
365
|
-
|
347
|
+
*srcSizePtr = (int) (((const char*)ip) - source);
|
348
|
+
return (int) ((char*)op-dst);
|
349
|
+
|
350
|
+
_dest_overflow:
|
351
|
+
if (limit == limitedDestSize) {
|
352
|
+
op = opSaved; /* restore correct out pointer */
|
353
|
+
goto _last_literals;
|
354
|
+
}
|
355
|
+
return 0;
|
366
356
|
}
|