extlz4 0.2.4.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +5 -0
- data/README.md +3 -3
- data/contrib/lz4/INSTALL +1 -0
- data/contrib/lz4/NEWS +13 -0
- data/contrib/lz4/README.md +1 -0
- data/contrib/lz4/circle.yml +0 -1
- data/contrib/lz4/lib/README.md +28 -28
- data/contrib/lz4/lib/lz4.c +139 -53
- data/contrib/lz4/lib/lz4.h +85 -69
- data/contrib/lz4/lib/lz4frame.c +63 -57
- data/contrib/lz4/lib/lz4frame_static.h +27 -16
- data/contrib/lz4/lib/lz4hc.c +208 -122
- data/contrib/lz4/lib/lz4hc.h +23 -29
- data/contrib/lz4/lib/lz4opt.h +247 -257
- data/contrib/lz4/lib/xxhash.c +16 -16
- data/lib/extlz4/version.rb +1 -1
- metadata +1 -1
data/contrib/lz4/lib/lz4hc.h
CHANGED
@@ -39,14 +39,14 @@ extern "C" {
|
|
39
39
|
#endif
|
40
40
|
|
41
41
|
/* --- Dependency --- */
|
42
|
-
/* note : lz4hc
|
42
|
+
/* note : lz4hc requires lz4.h/lz4.c for compilation */
|
43
43
|
#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
44
44
|
|
45
45
|
|
46
46
|
/* --- Useful constants --- */
|
47
47
|
#define LZ4HC_CLEVEL_MIN 3
|
48
48
|
#define LZ4HC_CLEVEL_DEFAULT 9
|
49
|
-
#define LZ4HC_CLEVEL_OPT_MIN
|
49
|
+
#define LZ4HC_CLEVEL_OPT_MIN 10
|
50
50
|
#define LZ4HC_CLEVEL_MAX 12
|
51
51
|
|
52
52
|
|
@@ -54,12 +54,12 @@ extern "C" {
|
|
54
54
|
* Block Compression
|
55
55
|
**************************************/
|
56
56
|
/*! LZ4_compress_HC() :
|
57
|
-
*
|
57
|
+
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
|
58
58
|
* `dst` must be already allocated.
|
59
|
-
*
|
60
|
-
*
|
61
|
-
* `compressionLevel` :
|
62
|
-
* Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
59
|
+
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
60
|
+
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
61
|
+
* `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
|
62
|
+
* Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
63
63
|
* @return : the number of bytes written into 'dst'
|
64
64
|
* or 0 if compression fails.
|
65
65
|
*/
|
@@ -72,12 +72,12 @@ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dst
|
|
72
72
|
|
73
73
|
|
74
74
|
/*! LZ4_compress_HC_extStateHC() :
|
75
|
-
*
|
75
|
+
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
76
76
|
* `state` size is provided by LZ4_sizeofStateHC().
|
77
|
-
*
|
77
|
+
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
|
78
78
|
*/
|
79
|
-
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
80
79
|
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
80
|
+
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
81
81
|
|
82
82
|
|
83
83
|
/*-************************************
|
@@ -87,10 +87,10 @@ LZ4LIB_API int LZ4_sizeofStateHC(void);
|
|
87
87
|
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
88
88
|
|
89
89
|
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
90
|
-
*
|
91
|
-
*
|
92
|
-
*
|
93
|
-
*
|
90
|
+
* These functions create and release memory for LZ4 HC streaming state.
|
91
|
+
* Newly created states are automatically initialized.
|
92
|
+
* Existing states can be re-used several times, using LZ4_resetStreamHC().
|
93
|
+
* These methods are API and ABI stable, they can be used in combination with a DLL.
|
94
94
|
*/
|
95
95
|
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
96
96
|
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
@@ -123,13 +123,13 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in
|
|
123
123
|
*/
|
124
124
|
|
125
125
|
|
126
|
-
|
126
|
+
/*-**************************************************************
|
127
127
|
* PRIVATE DEFINITIONS :
|
128
128
|
* Do not use these definitions.
|
129
129
|
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
|
130
130
|
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
|
131
|
-
|
132
|
-
#define LZ4HC_DICTIONARY_LOGSIZE
|
131
|
+
****************************************************************/
|
132
|
+
#define LZ4HC_DICTIONARY_LOGSIZE 16
|
133
133
|
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
134
134
|
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
135
135
|
|
@@ -152,8 +152,7 @@ typedef struct
|
|
152
152
|
uint32_t dictLimit; /* below that point, need extDict */
|
153
153
|
uint32_t lowLimit; /* below that point, no more dict */
|
154
154
|
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
155
|
-
|
156
|
-
uint32_t compressionLevel;
|
155
|
+
int compressionLevel;
|
157
156
|
} LZ4HC_CCtx_internal;
|
158
157
|
|
159
158
|
#else
|
@@ -169,13 +168,12 @@ typedef struct
|
|
169
168
|
unsigned int dictLimit; /* below that point, need extDict */
|
170
169
|
unsigned int lowLimit; /* below that point, no more dict */
|
171
170
|
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
172
|
-
unsigned int searchNum; /* only for optimal parser */
|
173
171
|
int compressionLevel;
|
174
172
|
} LZ4HC_CCtx_internal;
|
175
173
|
|
176
174
|
#endif
|
177
175
|
|
178
|
-
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /*
|
176
|
+
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */
|
179
177
|
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
180
178
|
union LZ4_streamHC_u {
|
181
179
|
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
@@ -197,7 +195,6 @@ union LZ4_streamHC_u {
|
|
197
195
|
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
198
196
|
|
199
197
|
/* deprecated compression functions */
|
200
|
-
/* these functions will trigger warning messages in future releases */
|
201
198
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
202
199
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
203
200
|
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
@@ -225,14 +222,15 @@ LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStr
|
|
225
222
|
|
226
223
|
#endif /* LZ4_HC_H_19834876238432 */
|
227
224
|
|
228
|
-
|
225
|
+
|
226
|
+
/*-**************************************************
|
229
227
|
* !!!!! STATIC LINKING ONLY !!!!!
|
230
228
|
* Following definitions are considered experimental.
|
231
229
|
* They should not be linked from DLL,
|
232
230
|
* as there is no guarantee of API stability yet.
|
233
231
|
* Prototypes will be promoted to "stable" status
|
234
232
|
* after successfull usage in real-life scenarios.
|
235
|
-
|
233
|
+
***************************************************/
|
236
234
|
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
237
235
|
#ifndef LZ4_HC_SLO_098092834
|
238
236
|
#define LZ4_HC_SLO_098092834
|
@@ -258,17 +256,13 @@ int LZ4_compress_HC_destSize(void* LZ4HC_Data,
|
|
258
256
|
* @return : the number of bytes written into 'dst'
|
259
257
|
* or 0 if compression fails.
|
260
258
|
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
|
261
|
-
* Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN
|
262
|
-
* beyond that level, compression performance will be much reduced due to internal incompatibilities
|
263
259
|
*/
|
264
260
|
int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
|
265
261
|
const char* src, char* dst,
|
266
262
|
int* srcSizePtr, int targetDstSize);
|
267
263
|
|
268
264
|
/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
|
269
|
-
* It's possible to change compression level
|
270
|
-
* but that requires to stay in the same mode (aka 1-10 or 11-12).
|
271
|
-
* This function ensures this condition.
|
265
|
+
* It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*()
|
272
266
|
*/
|
273
267
|
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
274
268
|
|
data/contrib/lz4/lib/lz4opt.h
CHANGED
@@ -35,12 +35,6 @@
|
|
35
35
|
|
36
36
|
#define LZ4_OPT_NUM (1<<12)
|
37
37
|
|
38
|
-
|
39
|
-
typedef struct {
|
40
|
-
int off;
|
41
|
-
int len;
|
42
|
-
} LZ4HC_match_t;
|
43
|
-
|
44
38
|
typedef struct {
|
45
39
|
int price;
|
46
40
|
int off;
|
@@ -50,317 +44,313 @@ typedef struct {
|
|
50
44
|
|
51
45
|
|
52
46
|
/* price in bytes */
|
53
|
-
|
47
|
+
LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
|
54
48
|
{
|
55
|
-
|
56
|
-
if (litlen >= (
|
49
|
+
int price = litlen;
|
50
|
+
if (litlen >= (int)RUN_MASK)
|
57
51
|
price += 1 + (litlen-RUN_MASK)/255;
|
58
52
|
return price;
|
59
53
|
}
|
60
54
|
|
61
55
|
|
62
56
|
/* requires mlen >= MINMATCH */
|
63
|
-
|
57
|
+
LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
|
64
58
|
{
|
65
|
-
|
59
|
+
int price = 1 + 2 ; /* token + 16-bit offset */
|
66
60
|
|
67
61
|
price += LZ4HC_literalsPrice(litlen);
|
68
62
|
|
69
|
-
if (mlen >= (
|
70
|
-
price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
63
|
+
if (mlen >= (int)(ML_MASK+MINMATCH))
|
64
|
+
price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
71
65
|
|
72
66
|
return price;
|
73
67
|
}
|
74
68
|
|
75
69
|
|
76
70
|
/*-*************************************
|
77
|
-
*
|
71
|
+
* Match finder
|
78
72
|
***************************************/
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
size_t best_mlen,
|
84
|
-
LZ4HC_match_t* matches,
|
85
|
-
int* matchNum)
|
86
|
-
{
|
87
|
-
U16* const chainTable = ctx->chainTable;
|
88
|
-
U32* const HashTable = ctx->hashTable;
|
89
|
-
const BYTE* const base = ctx->base;
|
90
|
-
const U32 dictLimit = ctx->dictLimit;
|
91
|
-
const U32 current = (U32)(ip - base);
|
92
|
-
const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
|
93
|
-
const BYTE* const dictBase = ctx->dictBase;
|
94
|
-
const BYTE* match;
|
95
|
-
int nbAttempts = ctx->searchNum;
|
96
|
-
int mnum = 0;
|
97
|
-
U16 *ptr0, *ptr1, delta0, delta1;
|
98
|
-
U32 matchIndex;
|
99
|
-
size_t matchLength = 0;
|
100
|
-
U32* HashPos;
|
101
|
-
|
102
|
-
if (ip + MINMATCH > iHighLimit) return 1;
|
103
|
-
|
104
|
-
/* HC4 match finder */
|
105
|
-
HashPos = &HashTable[LZ4HC_hashPtr(ip)];
|
106
|
-
matchIndex = *HashPos;
|
107
|
-
*HashPos = current;
|
108
|
-
|
109
|
-
ptr0 = &DELTANEXTMAXD(current*2+1);
|
110
|
-
ptr1 = &DELTANEXTMAXD(current*2);
|
111
|
-
delta0 = delta1 = (U16)(current - matchIndex);
|
112
|
-
|
113
|
-
while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
|
114
|
-
nbAttempts--;
|
115
|
-
if (matchIndex >= dictLimit) {
|
116
|
-
match = base + matchIndex;
|
117
|
-
matchLength = LZ4_count(ip, match, iHighLimit);
|
118
|
-
} else {
|
119
|
-
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
120
|
-
match = dictBase + matchIndex;
|
121
|
-
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
122
|
-
matchLength = LZ4_count(ip, match, vLimit);
|
123
|
-
if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
|
124
|
-
matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
|
125
|
-
if (matchIndex+matchLength >= dictLimit)
|
126
|
-
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
127
|
-
}
|
128
|
-
|
129
|
-
if (matchLength > best_mlen) {
|
130
|
-
best_mlen = matchLength;
|
131
|
-
if (matches) {
|
132
|
-
if (matchIndex >= dictLimit)
|
133
|
-
matches[mnum].off = (int)(ip - match);
|
134
|
-
else
|
135
|
-
matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
|
136
|
-
matches[mnum].len = (int)matchLength;
|
137
|
-
mnum++;
|
138
|
-
}
|
139
|
-
if (best_mlen > LZ4_OPT_NUM) break;
|
140
|
-
}
|
141
|
-
|
142
|
-
if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
|
143
|
-
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
|
144
|
-
|
145
|
-
DEBUGLOG(6, "ip :%016llX", (U64)ip);
|
146
|
-
DEBUGLOG(6, "match:%016llX", (U64)match);
|
147
|
-
if (*(ip+matchLength) < *(match+matchLength)) {
|
148
|
-
*ptr0 = delta0;
|
149
|
-
ptr0 = &DELTANEXTMAXD(matchIndex*2);
|
150
|
-
if (*ptr0 == (U16)-1) break;
|
151
|
-
delta0 = *ptr0;
|
152
|
-
delta1 += delta0;
|
153
|
-
matchIndex -= delta0;
|
154
|
-
} else {
|
155
|
-
*ptr1 = delta1;
|
156
|
-
ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
|
157
|
-
if (*ptr1 == (U16)-1) break;
|
158
|
-
delta1 = *ptr1;
|
159
|
-
delta0 += delta1;
|
160
|
-
matchIndex -= delta1;
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
*ptr0 = (U16)-1;
|
165
|
-
*ptr1 = (U16)-1;
|
166
|
-
if (matchNum) *matchNum = mnum;
|
167
|
-
/* if (best_mlen > 8) return best_mlen-8; */
|
168
|
-
if (!matchNum) return 1;
|
169
|
-
return 1;
|
170
|
-
}
|
171
|
-
|
172
|
-
|
173
|
-
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
|
174
|
-
{
|
175
|
-
const BYTE* const base = ctx->base;
|
176
|
-
const U32 target = (U32)(ip - base);
|
177
|
-
U32 idx = ctx->nextToUpdate;
|
178
|
-
while(idx < target)
|
179
|
-
idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
|
180
|
-
}
|
181
|
-
|
73
|
+
typedef struct {
|
74
|
+
int off;
|
75
|
+
int len;
|
76
|
+
} LZ4HC_match_t;
|
182
77
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
|
78
|
+
LZ4_FORCE_INLINE
|
79
|
+
LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
|
80
|
+
const BYTE* ip, const BYTE* const iHighLimit,
|
81
|
+
int minLen, int nbSearches)
|
188
82
|
{
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
opt[pos].mlen = (int)ml; \
|
202
|
-
opt[pos].off = (int)offset; \
|
203
|
-
opt[pos].litlen = (int)ll; \
|
204
|
-
opt[pos].price = (int)cost; \
|
83
|
+
LZ4HC_match_t match = { 0 , 0 };
|
84
|
+
const BYTE* matchPtr = NULL;
|
85
|
+
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
|
86
|
+
* but this won't be the case here, as we define iLowLimit==ip,
|
87
|
+
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
|
88
|
+
int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
|
89
|
+
ip, ip, iHighLimit, minLen, &matchPtr, &ip,
|
90
|
+
nbSearches, 1 /* patternAnalysis */);
|
91
|
+
if (matchLength <= minLen) return match;
|
92
|
+
match.len = matchLength;
|
93
|
+
match.off = (int)(ip-matchPtr);
|
94
|
+
return match;
|
205
95
|
}
|
206
96
|
|
207
97
|
|
208
98
|
static int LZ4HC_compress_optimal (
|
209
99
|
LZ4HC_CCtx_internal* ctx,
|
210
100
|
const char* const source,
|
211
|
-
char*
|
212
|
-
int
|
213
|
-
int
|
214
|
-
|
101
|
+
char* dst,
|
102
|
+
int* srcSizePtr,
|
103
|
+
int dstCapacity,
|
104
|
+
int const nbSearches,
|
215
105
|
size_t sufficient_len,
|
216
|
-
|
106
|
+
limitedOutput_directive limit,
|
107
|
+
int const fullUpdate
|
217
108
|
)
|
218
109
|
{
|
219
|
-
|
220
|
-
|
110
|
+
#define TRAILING_LITERALS 3
|
111
|
+
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */
|
221
112
|
|
222
113
|
const BYTE* ip = (const BYTE*) source;
|
223
114
|
const BYTE* anchor = ip;
|
224
|
-
const BYTE* const iend = ip +
|
115
|
+
const BYTE* const iend = ip + *srcSizePtr;
|
225
116
|
const BYTE* const mflimit = iend - MFLIMIT;
|
226
|
-
const BYTE* const matchlimit =
|
227
|
-
BYTE* op = (BYTE*)
|
228
|
-
BYTE*
|
117
|
+
const BYTE* const matchlimit = iend - LASTLITERALS;
|
118
|
+
BYTE* op = (BYTE*) dst;
|
119
|
+
BYTE* opSaved = (BYTE*) dst;
|
120
|
+
BYTE* oend = op + dstCapacity;
|
229
121
|
|
230
122
|
/* init */
|
231
123
|
DEBUGLOG(5, "LZ4HC_compress_optimal");
|
124
|
+
*srcSizePtr = 0;
|
125
|
+
if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
232
126
|
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
233
|
-
ctx->end += inputSize;
|
234
|
-
ip++;
|
235
127
|
|
236
128
|
/* Main Loop */
|
129
|
+
assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
|
237
130
|
while (ip < mflimit) {
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */
|
131
|
+
int const llen = (int)(ip - anchor);
|
132
|
+
int best_mlen, best_off;
|
133
|
+
int cur, last_match_pos = 0;
|
242
134
|
|
243
|
-
|
244
|
-
if (
|
135
|
+
LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches);
|
136
|
+
if (firstMatch.len==0) { ip++; continue; }
|
245
137
|
|
246
|
-
if ((size_t)
|
138
|
+
if ((size_t)firstMatch.len > sufficient_len) {
|
247
139
|
/* good enough solution : immediate encoding */
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
140
|
+
int const firstML = firstMatch.len;
|
141
|
+
const BYTE* const matchPos = ip - firstMatch.off;
|
142
|
+
opSaved = op;
|
143
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */
|
144
|
+
goto _dest_overflow;
|
145
|
+
continue;
|
253
146
|
}
|
254
147
|
|
255
|
-
/* set prices
|
256
|
-
{
|
257
|
-
for (
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
148
|
+
/* set prices for first positions (literals) */
|
149
|
+
{ int rPos;
|
150
|
+
for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
|
151
|
+
int const cost = LZ4HC_literalsPrice(llen + rPos);
|
152
|
+
opt[rPos].mlen = 1;
|
153
|
+
opt[rPos].off = 0;
|
154
|
+
opt[rPos].litlen = llen + rPos;
|
155
|
+
opt[rPos].price = cost;
|
156
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
|
157
|
+
rPos, cost, opt[rPos].litlen);
|
158
|
+
} }
|
159
|
+
/* set prices using initial match */
|
160
|
+
{ int mlen = MINMATCH;
|
161
|
+
int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
162
|
+
int const offset = firstMatch.off;
|
163
|
+
assert(matchML < LZ4_OPT_NUM);
|
164
|
+
for ( ; mlen <= matchML ; mlen++) {
|
165
|
+
int const cost = LZ4HC_sequencePrice(llen, mlen);
|
166
|
+
opt[mlen].mlen = mlen;
|
167
|
+
opt[mlen].off = offset;
|
168
|
+
opt[mlen].litlen = llen;
|
169
|
+
opt[mlen].price = cost;
|
170
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
|
171
|
+
mlen, cost, mlen);
|
172
|
+
} }
|
173
|
+
last_match_pos = firstMatch.len;
|
174
|
+
{ int addLit;
|
175
|
+
for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
|
176
|
+
opt[last_match_pos+addLit].mlen = 1; /* literal */
|
177
|
+
opt[last_match_pos+addLit].off = 0;
|
178
|
+
opt[last_match_pos+addLit].litlen = addLit;
|
179
|
+
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
|
180
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
|
181
|
+
last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
|
182
|
+
} }
|
266
183
|
|
267
184
|
/* check further positions */
|
268
|
-
|
269
|
-
for (cur = 1; cur <= last_pos; cur++) {
|
185
|
+
for (cur = 1; cur < last_match_pos; cur++) {
|
270
186
|
const BYTE* const curPtr = ip + cur;
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
}
|
286
|
-
|
287
|
-
if (price < (size_t)opt[cur].price)
|
288
|
-
SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */
|
187
|
+
LZ4HC_match_t newMatch;
|
188
|
+
|
189
|
+
if (curPtr >= mflimit) break;
|
190
|
+
DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
|
191
|
+
cur, opt[cur].price, opt[cur+1].price, cur+1);
|
192
|
+
if (fullUpdate) {
|
193
|
+
/* not useful to search here if next position has same (or lower) cost */
|
194
|
+
if ( (opt[cur+1].price <= opt[cur].price)
|
195
|
+
/* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
|
196
|
+
&& (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
|
197
|
+
continue;
|
198
|
+
} else {
|
199
|
+
/* not useful to search here if next position has same (or lower) cost */
|
200
|
+
if (opt[cur+1].price <= opt[cur].price) continue;
|
289
201
|
}
|
290
202
|
|
291
|
-
|
203
|
+
DEBUGLOG(7, "search at rPos:%u", cur);
|
204
|
+
if (fullUpdate)
|
205
|
+
newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches);
|
206
|
+
else
|
207
|
+
/* only test matches of minimum length; slightly faster, but misses a few bytes */
|
208
|
+
newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches);
|
209
|
+
if (!newMatch.len) continue;
|
292
210
|
|
293
|
-
|
294
|
-
|
211
|
+
if ( ((size_t)newMatch.len > sufficient_len)
|
212
|
+
|| (newMatch.len + cur >= LZ4_OPT_NUM) ) {
|
295
213
|
/* immediate encoding */
|
296
|
-
best_mlen =
|
297
|
-
best_off =
|
298
|
-
|
214
|
+
best_mlen = newMatch.len;
|
215
|
+
best_off = newMatch.off;
|
216
|
+
last_match_pos = cur + 1;
|
299
217
|
goto encode;
|
300
218
|
}
|
301
219
|
|
302
|
-
/* set
|
303
|
-
{
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
opt[cur].off = (int)best_off;
|
338
|
-
best_mlen = ml;
|
339
|
-
best_off = offset;
|
340
|
-
if (ml > cur) break; /* can this happen ? */
|
341
|
-
cur -= ml;
|
342
|
-
}
|
220
|
+
/* before match : set price with literals at beginning */
|
221
|
+
{ int const baseLitlen = opt[cur].litlen;
|
222
|
+
int litlen;
|
223
|
+
for (litlen = 1; litlen < MINMATCH; litlen++) {
|
224
|
+
int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
|
225
|
+
int const pos = cur + litlen;
|
226
|
+
if (price < opt[pos].price) {
|
227
|
+
opt[pos].mlen = 1; /* literal */
|
228
|
+
opt[pos].off = 0;
|
229
|
+
opt[pos].litlen = baseLitlen+litlen;
|
230
|
+
opt[pos].price = price;
|
231
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
|
232
|
+
pos, price, opt[pos].litlen);
|
233
|
+
} } }
|
234
|
+
|
235
|
+
/* set prices using match at position = cur */
|
236
|
+
{ int const matchML = newMatch.len;
|
237
|
+
int ml = MINMATCH;
|
238
|
+
|
239
|
+
assert(cur + newMatch.len < LZ4_OPT_NUM);
|
240
|
+
for ( ; ml <= matchML ; ml++) {
|
241
|
+
int const pos = cur + ml;
|
242
|
+
int const offset = newMatch.off;
|
243
|
+
int price;
|
244
|
+
int ll;
|
245
|
+
DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
|
246
|
+
pos, last_match_pos);
|
247
|
+
if (opt[cur].mlen == 1) {
|
248
|
+
ll = opt[cur].litlen;
|
249
|
+
price = ((cur > ll) ? opt[cur - ll].price : 0)
|
250
|
+
+ LZ4HC_sequencePrice(ll, ml);
|
251
|
+
} else {
|
252
|
+
ll = 0;
|
253
|
+
price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
|
254
|
+
}
|
343
255
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
256
|
+
if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) {
|
257
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
|
258
|
+
pos, price, ml);
|
259
|
+
assert(pos < LZ4_OPT_NUM);
|
260
|
+
if ( (ml == matchML) /* last pos of last match */
|
261
|
+
&& (last_match_pos < pos) )
|
262
|
+
last_match_pos = pos;
|
263
|
+
opt[pos].mlen = ml;
|
264
|
+
opt[pos].off = offset;
|
265
|
+
opt[pos].litlen = ll;
|
266
|
+
opt[pos].price = price;
|
267
|
+
} } }
|
268
|
+
/* complete following positions with literals */
|
269
|
+
{ int addLit;
|
270
|
+
for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
|
271
|
+
opt[last_match_pos+addLit].mlen = 1; /* literal */
|
272
|
+
opt[last_match_pos+addLit].off = 0;
|
273
|
+
opt[last_match_pos+addLit].litlen = addLit;
|
274
|
+
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
|
275
|
+
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
|
276
|
+
} }
|
277
|
+
} /* for (cur = 1; cur <= last_match_pos; cur++) */
|
278
|
+
|
279
|
+
best_mlen = opt[last_match_pos].mlen;
|
280
|
+
best_off = opt[last_match_pos].off;
|
281
|
+
cur = last_match_pos - best_mlen;
|
282
|
+
|
283
|
+
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
284
|
+
assert(cur < LZ4_OPT_NUM);
|
285
|
+
assert(last_match_pos >= 1); /* == 1 when only one candidate */
|
286
|
+
DEBUGLOG(6, "reverse traversal, looking for shortest path")
|
287
|
+
DEBUGLOG(6, "last_match_pos = %i", last_match_pos);
|
288
|
+
{ int candidate_pos = cur;
|
289
|
+
int selected_matchLength = best_mlen;
|
290
|
+
int selected_offset = best_off;
|
291
|
+
while (1) { /* from end to beginning */
|
292
|
+
int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */
|
293
|
+
int const next_offset = opt[candidate_pos].off;
|
294
|
+
DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
|
295
|
+
opt[candidate_pos].mlen = selected_matchLength;
|
296
|
+
opt[candidate_pos].off = selected_offset;
|
297
|
+
selected_matchLength = next_matchLength;
|
298
|
+
selected_offset = next_offset;
|
299
|
+
if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
|
300
|
+
assert(next_matchLength > 0); /* can be 1, means literal */
|
301
|
+
candidate_pos -= next_matchLength;
|
302
|
+
} }
|
303
|
+
|
304
|
+
/* encode all recorded sequences in order */
|
305
|
+
{ int rPos = 0; /* relative position (to ip) */
|
306
|
+
while (rPos < last_match_pos) {
|
307
|
+
int const ml = opt[rPos].mlen;
|
308
|
+
int const offset = opt[rPos].off;
|
309
|
+
if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
|
310
|
+
rPos += ml;
|
311
|
+
assert(ml >= MINMATCH);
|
312
|
+
assert((offset >= 1) && (offset <= MAX_DISTANCE));
|
313
|
+
opSaved = op;
|
314
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */
|
315
|
+
goto _dest_overflow;
|
316
|
+
} }
|
353
317
|
} /* while (ip < mflimit) */
|
354
318
|
|
319
|
+
_last_literals:
|
355
320
|
/* Encode Last Literals */
|
356
|
-
{
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
321
|
+
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
|
322
|
+
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
323
|
+
size_t const totalSize = 1 + litLength + lastRunSize;
|
324
|
+
if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */
|
325
|
+
if (limit && (op + totalSize > oend)) {
|
326
|
+
if (limit == limitedOutput) return 0; /* Check output limit */
|
327
|
+
/* adapt lastRunSize to fill 'dst' */
|
328
|
+
lastRunSize = (size_t)(oend - op) - 1;
|
329
|
+
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
330
|
+
lastRunSize -= litLength;
|
331
|
+
}
|
332
|
+
ip = anchor + lastRunSize;
|
333
|
+
|
334
|
+
if (lastRunSize >= RUN_MASK) {
|
335
|
+
size_t accumulator = lastRunSize - RUN_MASK;
|
336
|
+
*op++ = (RUN_MASK << ML_BITS);
|
337
|
+
for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
|
338
|
+
*op++ = (BYTE) accumulator;
|
339
|
+
} else {
|
340
|
+
*op++ = (BYTE)(lastRunSize << ML_BITS);
|
341
|
+
}
|
342
|
+
memcpy(op, anchor, lastRunSize);
|
343
|
+
op += lastRunSize;
|
362
344
|
}
|
363
345
|
|
364
346
|
/* End */
|
365
|
-
|
347
|
+
*srcSizePtr = (int) (((const char*)ip) - source);
|
348
|
+
return (int) ((char*)op-dst);
|
349
|
+
|
350
|
+
_dest_overflow:
|
351
|
+
if (limit == limitedDestSize) {
|
352
|
+
op = opSaved; /* restore correct out pointer */
|
353
|
+
goto _last_literals;
|
354
|
+
}
|
355
|
+
return 0;
|
366
356
|
}
|