extzstd 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
|
@@ -15,22 +15,50 @@
|
|
|
15
15
|
extern "C" {
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
|
-
#include "
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
void
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
size_t
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
size_t
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
size_t
|
|
18
|
+
#include "zstd_compress_internal.h"
|
|
19
|
+
|
|
20
|
+
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
|
21
|
+
|
|
22
|
+
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
|
23
|
+
|
|
24
|
+
size_t ZSTD_compressBlock_btlazy2(
|
|
25
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
26
|
+
void const* src, size_t srcSize);
|
|
27
|
+
size_t ZSTD_compressBlock_lazy2(
|
|
28
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
29
|
+
void const* src, size_t srcSize);
|
|
30
|
+
size_t ZSTD_compressBlock_lazy(
|
|
31
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
32
|
+
void const* src, size_t srcSize);
|
|
33
|
+
size_t ZSTD_compressBlock_greedy(
|
|
34
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
35
|
+
void const* src, size_t srcSize);
|
|
36
|
+
|
|
37
|
+
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
|
38
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
39
|
+
void const* src, size_t srcSize);
|
|
40
|
+
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
|
41
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
42
|
+
void const* src, size_t srcSize);
|
|
43
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
|
44
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
45
|
+
void const* src, size_t srcSize);
|
|
46
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
|
47
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
48
|
+
void const* src, size_t srcSize);
|
|
49
|
+
|
|
50
|
+
size_t ZSTD_compressBlock_greedy_extDict(
|
|
51
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
52
|
+
void const* src, size_t srcSize);
|
|
53
|
+
size_t ZSTD_compressBlock_lazy_extDict(
|
|
54
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
55
|
+
void const* src, size_t srcSize);
|
|
56
|
+
size_t ZSTD_compressBlock_lazy2_extDict(
|
|
57
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
58
|
+
void const* src, size_t srcSize);
|
|
59
|
+
size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
60
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
61
|
+
void const* src, size_t srcSize);
|
|
34
62
|
|
|
35
63
|
#if defined (__cplusplus)
|
|
36
64
|
}
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#include "zstd_ldm.h"
|
|
11
11
|
|
|
12
|
+
#include "debug.h"
|
|
12
13
|
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
|
13
14
|
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
|
14
15
|
|
|
@@ -17,36 +18,46 @@
|
|
|
17
18
|
#define LDM_HASH_RLOG 7
|
|
18
19
|
#define LDM_HASH_CHAR_OFFSET 10
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
|
22
|
+
ZSTD_compressionParameters const* cParams)
|
|
21
23
|
{
|
|
24
|
+
params->windowLog = cParams->windowLog;
|
|
22
25
|
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
|
|
23
|
-
|
|
24
|
-
params->
|
|
25
|
-
params->
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
|
27
|
+
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
|
28
|
+
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
|
29
|
+
if (cParams->strategy >= ZSTD_btopt) {
|
|
30
|
+
/* Get out of the way of the optimal parser */
|
|
31
|
+
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
|
|
32
|
+
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
|
|
33
|
+
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
|
|
34
|
+
params->minMatchLength = minMatch;
|
|
35
|
+
}
|
|
33
36
|
if (params->hashLog == 0) {
|
|
34
|
-
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
|
|
37
|
+
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
|
35
38
|
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
|
36
39
|
}
|
|
37
|
-
if (params->
|
|
38
|
-
params->
|
|
39
|
-
|
|
40
|
+
if (params->hashRateLog == 0) {
|
|
41
|
+
params->hashRateLog = params->windowLog < params->hashLog
|
|
42
|
+
? 0
|
|
43
|
+
: params->windowLog - params->hashLog;
|
|
40
44
|
}
|
|
41
45
|
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
|
|
42
46
|
}
|
|
43
47
|
|
|
44
|
-
size_t ZSTD_ldm_getTableSize(
|
|
45
|
-
|
|
46
|
-
size_t const
|
|
48
|
+
size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|
49
|
+
{
|
|
50
|
+
size_t const ldmHSize = ((size_t)1) << params.hashLog;
|
|
51
|
+
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
|
|
47
52
|
size_t const ldmBucketSize =
|
|
48
|
-
((size_t)1) << (hashLog - ldmBucketSizeLog);
|
|
49
|
-
|
|
53
|
+
((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
|
54
|
+
size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
|
|
55
|
+
return params.enableLdm ? totalSize : 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
|
59
|
+
{
|
|
60
|
+
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
|
50
61
|
}
|
|
51
62
|
|
|
52
63
|
/** ZSTD_ldm_getSmallHash() :
|
|
@@ -108,20 +119,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
|
|
|
108
119
|
*
|
|
109
120
|
* Gets the small hash, checksum, and tag from the rollingHash.
|
|
110
121
|
*
|
|
111
|
-
* If the tag matches (1 << ldmParams.
|
|
122
|
+
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
|
|
112
123
|
* creates an ldmEntry from the offset, and inserts it into the hash table.
|
|
113
124
|
*
|
|
114
125
|
* hBits is the length of the small hash, which is the most significant hBits
|
|
115
126
|
* of rollingHash. The checksum is the next 32 most significant bits, followed
|
|
116
|
-
* by ldmParams.
|
|
127
|
+
* by ldmParams.hashRateLog bits that make up the tag. */
|
|
117
128
|
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
|
118
129
|
U64 const rollingHash,
|
|
119
130
|
U32 const hBits,
|
|
120
131
|
U32 const offset,
|
|
121
132
|
ldmParams_t const ldmParams)
|
|
122
133
|
{
|
|
123
|
-
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.
|
|
124
|
-
U32 const tagMask = ((U32)1 << ldmParams.
|
|
134
|
+
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
|
|
135
|
+
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
|
|
125
136
|
if (tag == tagMask) {
|
|
126
137
|
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
|
127
138
|
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
|
@@ -132,55 +143,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
|
|
132
143
|
}
|
|
133
144
|
}
|
|
134
145
|
|
|
135
|
-
/** ZSTD_ldm_getRollingHash() :
|
|
136
|
-
* Get a 64-bit hash using the first len bytes from buf.
|
|
137
|
-
*
|
|
138
|
-
* Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
|
|
139
|
-
* H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
|
|
140
|
-
*
|
|
141
|
-
* where the constant a is defined to be prime8bytes.
|
|
142
|
-
*
|
|
143
|
-
* The implementation adds an offset to each byte, so
|
|
144
|
-
* H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
|
|
145
|
-
static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
|
|
146
|
-
{
|
|
147
|
-
U64 ret = 0;
|
|
148
|
-
U32 i;
|
|
149
|
-
for (i = 0; i < len; i++) {
|
|
150
|
-
ret *= prime8bytes;
|
|
151
|
-
ret += buf[i] + LDM_HASH_CHAR_OFFSET;
|
|
152
|
-
}
|
|
153
|
-
return ret;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/** ZSTD_ldm_ipow() :
|
|
157
|
-
* Return base^exp. */
|
|
158
|
-
static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
|
|
159
|
-
{
|
|
160
|
-
U64 ret = 1;
|
|
161
|
-
while (exp) {
|
|
162
|
-
if (exp & 1) { ret *= base; }
|
|
163
|
-
exp >>= 1;
|
|
164
|
-
base *= base;
|
|
165
|
-
}
|
|
166
|
-
return ret;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
|
|
170
|
-
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
|
|
171
|
-
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/** ZSTD_ldm_updateHash() :
|
|
175
|
-
* Updates hash by removing toRemove and adding toAdd. */
|
|
176
|
-
static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
|
|
177
|
-
{
|
|
178
|
-
hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
|
|
179
|
-
hash *= prime8bytes;
|
|
180
|
-
hash += toAdd + LDM_HASH_CHAR_OFFSET;
|
|
181
|
-
return hash;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
146
|
/** ZSTD_ldm_countBackwardsMatch() :
|
|
185
147
|
* Returns the number of bytes that match backwards before pIn and pMatch.
|
|
186
148
|
*
|
|
@@ -205,21 +167,19 @@ static size_t ZSTD_ldm_countBackwardsMatch(
|
|
|
205
167
|
*
|
|
206
168
|
* The tables for the other strategies are filled within their
|
|
207
169
|
* block compressors. */
|
|
208
|
-
static size_t ZSTD_ldm_fillFastTables(
|
|
170
|
+
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
|
171
|
+
void const* end)
|
|
209
172
|
{
|
|
210
173
|
const BYTE* const iend = (const BYTE*)end;
|
|
211
|
-
const U32 mls = zc->appliedParams.cParams.searchLength;
|
|
212
174
|
|
|
213
|
-
switch(
|
|
175
|
+
switch(ms->cParams.strategy)
|
|
214
176
|
{
|
|
215
177
|
case ZSTD_fast:
|
|
216
|
-
ZSTD_fillHashTable(
|
|
217
|
-
zc->nextToUpdate = (U32)(iend - zc->base);
|
|
178
|
+
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
|
|
218
179
|
break;
|
|
219
180
|
|
|
220
181
|
case ZSTD_dfast:
|
|
221
|
-
ZSTD_fillDoubleHashTable(
|
|
222
|
-
zc->nextToUpdate = (U32)(iend - zc->base);
|
|
182
|
+
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
|
|
223
183
|
break;
|
|
224
184
|
|
|
225
185
|
case ZSTD_greedy:
|
|
@@ -228,6 +188,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
|
|
|
228
188
|
case ZSTD_btlazy2:
|
|
229
189
|
case ZSTD_btopt:
|
|
230
190
|
case ZSTD_btultra:
|
|
191
|
+
case ZSTD_btultra2:
|
|
231
192
|
break;
|
|
232
193
|
default:
|
|
233
194
|
assert(0); /* not possible : not a valid strategy id */
|
|
@@ -251,9 +212,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
|
251
212
|
const BYTE* cur = lastHashed + 1;
|
|
252
213
|
|
|
253
214
|
while (cur < iend) {
|
|
254
|
-
rollingHash =
|
|
255
|
-
|
|
256
|
-
|
|
215
|
+
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
|
|
216
|
+
cur[ldmParams.minMatchLength-1],
|
|
217
|
+
state->hashPower);
|
|
257
218
|
ZSTD_ldm_makeEntryAndInsertByTag(state,
|
|
258
219
|
rollingHash, hBits,
|
|
259
220
|
(U32)(cur - base), ldmParams);
|
|
@@ -268,69 +229,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
|
268
229
|
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
|
|
269
230
|
* if it is far way
|
|
270
231
|
* (after a long match, only update tables a limited amount). */
|
|
271
|
-
static void ZSTD_ldm_limitTableUpdate(
|
|
232
|
+
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
|
|
272
233
|
{
|
|
273
|
-
U32 const current = (U32)(anchor -
|
|
274
|
-
if (current >
|
|
275
|
-
|
|
276
|
-
current - MIN(512, current -
|
|
234
|
+
U32 const current = (U32)(anchor - ms->window.base);
|
|
235
|
+
if (current > ms->nextToUpdate + 1024) {
|
|
236
|
+
ms->nextToUpdate =
|
|
237
|
+
current - MIN(512, current - ms->nextToUpdate - 1024);
|
|
277
238
|
}
|
|
278
239
|
}
|
|
279
240
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
FORCE_INLINE_TEMPLATE
|
|
285
|
-
size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
286
|
-
const void* src, size_t srcSize)
|
|
241
|
+
static size_t ZSTD_ldm_generateSequences_internal(
|
|
242
|
+
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
|
|
243
|
+
ldmParams_t const* params, void const* src, size_t srcSize)
|
|
287
244
|
{
|
|
288
|
-
|
|
289
|
-
const
|
|
290
|
-
const
|
|
291
|
-
const
|
|
292
|
-
const
|
|
293
|
-
const
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
const
|
|
298
|
-
const
|
|
299
|
-
const
|
|
300
|
-
const
|
|
301
|
-
const
|
|
302
|
-
const
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
245
|
+
/* LDM parameters */
|
|
246
|
+
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
|
|
247
|
+
U32 const minMatchLength = params->minMatchLength;
|
|
248
|
+
U64 const hashPower = ldmState->hashPower;
|
|
249
|
+
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
|
250
|
+
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
|
|
251
|
+
U32 const hashRateLog = params->hashRateLog;
|
|
252
|
+
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
|
|
253
|
+
/* Prefix and extDict parameters */
|
|
254
|
+
U32 const dictLimit = ldmState->window.dictLimit;
|
|
255
|
+
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
|
256
|
+
BYTE const* const base = ldmState->window.base;
|
|
257
|
+
BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
|
|
258
|
+
BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
|
|
259
|
+
BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
|
|
260
|
+
BYTE const* const lowPrefixPtr = base + dictLimit;
|
|
261
|
+
/* Input bounds */
|
|
262
|
+
BYTE const* const istart = (BYTE const*)src;
|
|
263
|
+
BYTE const* const iend = istart + srcSize;
|
|
264
|
+
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
|
|
265
|
+
/* Input positions */
|
|
266
|
+
BYTE const* anchor = istart;
|
|
267
|
+
BYTE const* ip = istart;
|
|
268
|
+
/* Rolling hash */
|
|
269
|
+
BYTE const* lastHashed = NULL;
|
|
308
270
|
U64 rollingHash = 0;
|
|
309
|
-
const BYTE* lastHashed = NULL;
|
|
310
|
-
size_t i, lastLiterals;
|
|
311
|
-
|
|
312
|
-
/* Save seqStorePtr->rep and copy repToConfirm */
|
|
313
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
|
314
|
-
savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
|
|
315
271
|
|
|
316
|
-
|
|
317
|
-
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
272
|
+
while (ip <= ilimit) {
|
|
318
273
|
size_t mLength;
|
|
319
274
|
U32 const current = (U32)(ip - base);
|
|
320
275
|
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
|
321
276
|
ldmEntry_t* bestEntry = NULL;
|
|
322
277
|
if (ip != istart) {
|
|
323
|
-
rollingHash =
|
|
324
|
-
|
|
325
|
-
|
|
278
|
+
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
|
|
279
|
+
lastHashed[minMatchLength],
|
|
280
|
+
hashPower);
|
|
326
281
|
} else {
|
|
327
|
-
rollingHash =
|
|
282
|
+
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
|
|
328
283
|
}
|
|
329
284
|
lastHashed = ip;
|
|
330
285
|
|
|
331
286
|
/* Do not insert and do not look for a match */
|
|
332
|
-
if (ZSTD_ldm_getTag(rollingHash, hBits,
|
|
333
|
-
ldmTagMask) {
|
|
287
|
+
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
|
|
334
288
|
ip++;
|
|
335
289
|
continue;
|
|
336
290
|
}
|
|
@@ -340,27 +294,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
|
340
294
|
ldmEntry_t* const bucket =
|
|
341
295
|
ZSTD_ldm_getBucket(ldmState,
|
|
342
296
|
ZSTD_ldm_getSmallHash(rollingHash, hBits),
|
|
343
|
-
|
|
297
|
+
*params);
|
|
344
298
|
ldmEntry_t* cur;
|
|
345
299
|
size_t bestMatchLength = 0;
|
|
346
300
|
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
|
347
301
|
|
|
348
302
|
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
|
349
|
-
const BYTE* const pMatch = cur->offset + base;
|
|
350
303
|
size_t curForwardMatchLength, curBackwardMatchLength,
|
|
351
304
|
curTotalMatchLength;
|
|
352
305
|
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
|
353
306
|
continue;
|
|
354
307
|
}
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
308
|
+
if (extDict) {
|
|
309
|
+
BYTE const* const curMatchBase =
|
|
310
|
+
cur->offset < dictLimit ? dictBase : base;
|
|
311
|
+
BYTE const* const pMatch = curMatchBase + cur->offset;
|
|
312
|
+
BYTE const* const matchEnd =
|
|
313
|
+
cur->offset < dictLimit ? dictEnd : iend;
|
|
314
|
+
BYTE const* const lowMatchPtr =
|
|
315
|
+
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
|
|
316
|
+
|
|
317
|
+
curForwardMatchLength = ZSTD_count_2segments(
|
|
318
|
+
ip, pMatch, iend,
|
|
319
|
+
matchEnd, lowPrefixPtr);
|
|
320
|
+
if (curForwardMatchLength < minMatchLength) {
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
curBackwardMatchLength =
|
|
324
|
+
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
|
325
|
+
lowMatchPtr);
|
|
326
|
+
curTotalMatchLength = curForwardMatchLength +
|
|
327
|
+
curBackwardMatchLength;
|
|
328
|
+
} else { /* !extDict */
|
|
329
|
+
BYTE const* const pMatch = base + cur->offset;
|
|
330
|
+
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
|
|
331
|
+
if (curForwardMatchLength < minMatchLength) {
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
curBackwardMatchLength =
|
|
335
|
+
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
|
336
|
+
lowPrefixPtr);
|
|
337
|
+
curTotalMatchLength = curForwardMatchLength +
|
|
338
|
+
curBackwardMatchLength;
|
|
359
339
|
}
|
|
360
|
-
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
|
|
361
|
-
ip, anchor, pMatch, lowest);
|
|
362
|
-
curTotalMatchLength = curForwardMatchLength +
|
|
363
|
-
curBackwardMatchLength;
|
|
364
340
|
|
|
365
341
|
if (curTotalMatchLength > bestMatchLength) {
|
|
366
342
|
bestMatchLength = curTotalMatchLength;
|
|
@@ -375,7 +351,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
|
375
351
|
if (bestEntry == NULL) {
|
|
376
352
|
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
|
|
377
353
|
hBits, current,
|
|
378
|
-
|
|
354
|
+
*params);
|
|
379
355
|
ip++;
|
|
380
356
|
continue;
|
|
381
357
|
}
|
|
@@ -384,324 +360,238 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
|
384
360
|
mLength = forwardMatchLength + backwardMatchLength;
|
|
385
361
|
ip -= backwardMatchLength;
|
|
386
362
|
|
|
387
|
-
/* Call the block compressor on the remaining literals */
|
|
388
363
|
{
|
|
364
|
+
/* Store the sequence:
|
|
365
|
+
* ip = current - backwardMatchLength
|
|
366
|
+
* The match is at (bestEntry->offset - backwardMatchLength)
|
|
367
|
+
*/
|
|
389
368
|
U32 const matchIndex = bestEntry->offset;
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
/*
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
/* Call block compressor and get remaining literals */
|
|
402
|
-
lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
|
|
403
|
-
cctx->nextToUpdate = (U32)(ip - base);
|
|
404
|
-
|
|
405
|
-
/* Update repToConfirm with the new offset */
|
|
406
|
-
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
|
|
407
|
-
repToConfirm[i] = repToConfirm[i-1];
|
|
408
|
-
repToConfirm[0] = offset;
|
|
409
|
-
|
|
410
|
-
/* Store the sequence with the leftover literals */
|
|
411
|
-
ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
|
|
412
|
-
offset + ZSTD_REP_MOVE, mLength - MINMATCH);
|
|
369
|
+
U32 const offset = current - matchIndex;
|
|
370
|
+
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
|
371
|
+
|
|
372
|
+
/* Out of sequence storage */
|
|
373
|
+
if (rawSeqStore->size == rawSeqStore->capacity)
|
|
374
|
+
return ERROR(dstSize_tooSmall);
|
|
375
|
+
seq->litLength = (U32)(ip - anchor);
|
|
376
|
+
seq->matchLength = (U32)mLength;
|
|
377
|
+
seq->offset = offset;
|
|
378
|
+
rawSeqStore->size++;
|
|
413
379
|
}
|
|
414
380
|
|
|
415
381
|
/* Insert the current entry into the hash table */
|
|
416
382
|
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
|
|
417
383
|
(U32)(lastHashed - base),
|
|
418
|
-
|
|
384
|
+
*params);
|
|
419
385
|
|
|
420
386
|
assert(ip + backwardMatchLength == lastHashed);
|
|
421
387
|
|
|
422
388
|
/* Fill the hash table from lastHashed+1 to ip+mLength*/
|
|
423
389
|
/* Heuristic: don't need to fill the entire table at end of block */
|
|
424
|
-
if (ip + mLength
|
|
390
|
+
if (ip + mLength <= ilimit) {
|
|
425
391
|
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
|
426
392
|
ldmState, rollingHash, lastHashed,
|
|
427
|
-
ip + mLength, base, hBits,
|
|
393
|
+
ip + mLength, base, hBits, *params);
|
|
428
394
|
lastHashed = ip + mLength - 1;
|
|
429
395
|
}
|
|
430
396
|
ip += mLength;
|
|
431
397
|
anchor = ip;
|
|
432
|
-
/* Check immediate repcode */
|
|
433
|
-
while ( (ip < ilimit)
|
|
434
|
-
&& ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
|
|
435
|
-
&& (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
|
|
436
|
-
|
|
437
|
-
size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
|
|
438
|
-
iend) + 4;
|
|
439
|
-
/* Swap repToConfirm[1] <=> repToConfirm[0] */
|
|
440
|
-
{
|
|
441
|
-
U32 const tmpOff = repToConfirm[1];
|
|
442
|
-
repToConfirm[1] = repToConfirm[0];
|
|
443
|
-
repToConfirm[0] = tmpOff;
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
|
|
447
|
-
|
|
448
|
-
/* Fill the hash table from lastHashed+1 to ip+rLength*/
|
|
449
|
-
if (ip + rLength < ilimit) {
|
|
450
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
|
451
|
-
ldmState, rollingHash, lastHashed,
|
|
452
|
-
ip + rLength, base, hBits, ldmParams);
|
|
453
|
-
lastHashed = ip + rLength - 1;
|
|
454
|
-
}
|
|
455
|
-
ip += rLength;
|
|
456
|
-
anchor = ip;
|
|
457
|
-
}
|
|
458
398
|
}
|
|
459
|
-
|
|
460
|
-
/* Overwrite rep */
|
|
461
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
|
462
|
-
seqStorePtr->rep[i] = repToConfirm[i];
|
|
463
|
-
|
|
464
|
-
ZSTD_ldm_limitTableUpdate(cctx, anchor);
|
|
465
|
-
ZSTD_ldm_fillFastTables(cctx, anchor);
|
|
466
|
-
|
|
467
|
-
lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
|
|
468
|
-
cctx->nextToUpdate = (U32)(iend - base);
|
|
469
|
-
|
|
470
|
-
/* Restore seqStorePtr->rep */
|
|
471
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
|
472
|
-
seqStorePtr->rep[i] = savedRep[i];
|
|
473
|
-
|
|
474
|
-
/* Return the last literals size */
|
|
475
|
-
return lastLiterals;
|
|
399
|
+
return iend - anchor;
|
|
476
400
|
}
|
|
477
401
|
|
|
478
|
-
|
|
479
|
-
|
|
402
|
+
/*! ZSTD_ldm_reduceTable() :
|
|
403
|
+
* reduce table indexes by `reducerValue` */
|
|
404
|
+
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
|
405
|
+
U32 const reducerValue)
|
|
480
406
|
{
|
|
481
|
-
|
|
407
|
+
U32 u;
|
|
408
|
+
for (u = 0; u < size; u++) {
|
|
409
|
+
if (table[u].offset < reducerValue) table[u].offset = 0;
|
|
410
|
+
else table[u].offset -= reducerValue;
|
|
411
|
+
}
|
|
482
412
|
}
|
|
483
413
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
414
|
+
size_t ZSTD_ldm_generateSequences(
|
|
415
|
+
ldmState_t* ldmState, rawSeqStore_t* sequences,
|
|
416
|
+
ldmParams_t const* params, void const* src, size_t srcSize)
|
|
487
417
|
{
|
|
488
|
-
|
|
489
|
-
const
|
|
490
|
-
const
|
|
491
|
-
const
|
|
492
|
-
const
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
418
|
+
U32 const maxDist = 1U << params->windowLog;
|
|
419
|
+
BYTE const* const istart = (BYTE const*)src;
|
|
420
|
+
BYTE const* const iend = istart + srcSize;
|
|
421
|
+
size_t const kMaxChunkSize = 1 << 20;
|
|
422
|
+
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
|
|
423
|
+
size_t chunk;
|
|
424
|
+
size_t leftoverSize = 0;
|
|
425
|
+
|
|
426
|
+
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
|
|
427
|
+
/* Check that ZSTD_window_update() has been called for this chunk prior
|
|
428
|
+
* to passing it to this function.
|
|
429
|
+
*/
|
|
430
|
+
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
|
|
431
|
+
/* The input could be very large (in zstdmt), so it must be broken up into
|
|
432
|
+
* chunks to enforce the maximum distance and handle overflow correction.
|
|
433
|
+
*/
|
|
434
|
+
assert(sequences->pos <= sequences->size);
|
|
435
|
+
assert(sequences->size <= sequences->capacity);
|
|
436
|
+
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
|
|
437
|
+
BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
|
|
438
|
+
size_t const remaining = (size_t)(iend - chunkStart);
|
|
439
|
+
BYTE const *const chunkEnd =
|
|
440
|
+
(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
|
|
441
|
+
size_t const chunkSize = chunkEnd - chunkStart;
|
|
442
|
+
size_t newLeftoverSize;
|
|
443
|
+
size_t const prevSize = sequences->size;
|
|
444
|
+
|
|
445
|
+
assert(chunkStart < iend);
|
|
446
|
+
/* 1. Perform overflow correction if necessary. */
|
|
447
|
+
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
|
448
|
+
U32 const ldmHSize = 1U << params->hashLog;
|
|
449
|
+
U32 const correction = ZSTD_window_correctOverflow(
|
|
450
|
+
&ldmState->window, /* cycleLog */ 0, maxDist, src);
|
|
451
|
+
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
|
452
|
+
}
|
|
453
|
+
/* 2. We enforce the maximum offset allowed.
|
|
454
|
+
*
|
|
455
|
+
* kMaxChunkSize should be small enough that we don't lose too much of
|
|
456
|
+
* the window through early invalidation.
|
|
457
|
+
* TODO: * Test the chunk size.
|
|
458
|
+
* * Try invalidation after the sequence generation and test the
|
|
459
|
+
* the offset against maxDist directly.
|
|
460
|
+
*/
|
|
461
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
|
|
462
|
+
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
|
463
|
+
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
|
464
|
+
ldmState, sequences, params, chunkStart, chunkSize);
|
|
465
|
+
if (ZSTD_isError(newLeftoverSize))
|
|
466
|
+
return newLeftoverSize;
|
|
467
|
+
/* 4. We add the leftover literals from previous iterations to the first
|
|
468
|
+
* newly generated sequence, or add the `newLeftoverSize` if none are
|
|
469
|
+
* generated.
|
|
470
|
+
*/
|
|
471
|
+
/* Prepend the leftover literals from the last call */
|
|
472
|
+
if (prevSize < sequences->size) {
|
|
473
|
+
sequences->seq[prevSize].litLength += (U32)leftoverSize;
|
|
474
|
+
leftoverSize = newLeftoverSize;
|
|
531
475
|
} else {
|
|
532
|
-
|
|
476
|
+
assert(newLeftoverSize == chunkSize);
|
|
477
|
+
leftoverSize += chunkSize;
|
|
533
478
|
}
|
|
534
|
-
|
|
479
|
+
}
|
|
480
|
+
return 0;
|
|
481
|
+
}
|
|
535
482
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
483
|
+
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
|
484
|
+
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
|
485
|
+
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
|
486
|
+
if (srcSize <= seq->litLength) {
|
|
487
|
+
/* Skip past srcSize literals */
|
|
488
|
+
seq->litLength -= (U32)srcSize;
|
|
489
|
+
return;
|
|
541
490
|
}
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
{
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
|
552
|
-
|
|
553
|
-
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
|
554
|
-
const BYTE* const curMatchBase =
|
|
555
|
-
cur->offset < dictLimit ? dictBase : base;
|
|
556
|
-
const BYTE* const pMatch = curMatchBase + cur->offset;
|
|
557
|
-
const BYTE* const matchEnd =
|
|
558
|
-
cur->offset < dictLimit ? dictEnd : iend;
|
|
559
|
-
const BYTE* const lowMatchPtr =
|
|
560
|
-
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
|
|
561
|
-
size_t curForwardMatchLength, curBackwardMatchLength,
|
|
562
|
-
curTotalMatchLength;
|
|
563
|
-
|
|
564
|
-
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
|
565
|
-
continue;
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
curForwardMatchLength = ZSTD_count_2segments(
|
|
569
|
-
ip, pMatch, iend,
|
|
570
|
-
matchEnd, lowPrefixPtr);
|
|
571
|
-
if (curForwardMatchLength < ldmParams.minMatchLength) {
|
|
572
|
-
continue;
|
|
573
|
-
}
|
|
574
|
-
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
|
|
575
|
-
ip, anchor, pMatch, lowMatchPtr);
|
|
576
|
-
curTotalMatchLength = curForwardMatchLength +
|
|
577
|
-
curBackwardMatchLength;
|
|
578
|
-
|
|
579
|
-
if (curTotalMatchLength > bestMatchLength) {
|
|
580
|
-
bestMatchLength = curTotalMatchLength;
|
|
581
|
-
forwardMatchLength = curForwardMatchLength;
|
|
582
|
-
backwardMatchLength = curBackwardMatchLength;
|
|
583
|
-
bestEntry = cur;
|
|
491
|
+
srcSize -= seq->litLength;
|
|
492
|
+
seq->litLength = 0;
|
|
493
|
+
if (srcSize < seq->matchLength) {
|
|
494
|
+
/* Skip past the first srcSize of the match */
|
|
495
|
+
seq->matchLength -= (U32)srcSize;
|
|
496
|
+
if (seq->matchLength < minMatch) {
|
|
497
|
+
/* The match is too short, omit it */
|
|
498
|
+
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
|
|
499
|
+
seq[1].litLength += seq[0].matchLength;
|
|
584
500
|
}
|
|
501
|
+
rawSeqStore->pos++;
|
|
585
502
|
}
|
|
503
|
+
return;
|
|
586
504
|
}
|
|
505
|
+
srcSize -= seq->matchLength;
|
|
506
|
+
seq->matchLength = 0;
|
|
507
|
+
rawSeqStore->pos++;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
587
510
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
511
|
+
/**
|
|
512
|
+
* If the sequence length is longer than remaining then the sequence is split
|
|
513
|
+
* between this block and the next.
|
|
514
|
+
*
|
|
515
|
+
* Returns the current sequence to handle, or if the rest of the block should
|
|
516
|
+
* be literals, it returns a sequence with offset == 0.
|
|
517
|
+
*/
|
|
518
|
+
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
|
519
|
+
U32 const remaining, U32 const minMatch)
|
|
520
|
+
{
|
|
521
|
+
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
|
|
522
|
+
assert(sequence.offset > 0);
|
|
523
|
+
/* Likely: No partial sequence */
|
|
524
|
+
if (remaining >= sequence.litLength + sequence.matchLength) {
|
|
525
|
+
rawSeqStore->pos++;
|
|
526
|
+
return sequence;
|
|
527
|
+
}
|
|
528
|
+
/* Cut the sequence short (offset == 0 ==> rest is literals). */
|
|
529
|
+
if (remaining <= sequence.litLength) {
|
|
530
|
+
sequence.offset = 0;
|
|
531
|
+
} else if (remaining < sequence.litLength + sequence.matchLength) {
|
|
532
|
+
sequence.matchLength = remaining - sequence.litLength;
|
|
533
|
+
if (sequence.matchLength < minMatch) {
|
|
534
|
+
sequence.offset = 0;
|
|
595
535
|
}
|
|
536
|
+
}
|
|
537
|
+
/* Skip past `remaining` bytes for the future sequences. */
|
|
538
|
+
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
|
|
539
|
+
return sequence;
|
|
540
|
+
}
|
|
596
541
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
542
|
+
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
543
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
544
|
+
void const* src, size_t srcSize)
|
|
545
|
+
{
|
|
546
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
547
|
+
unsigned const minMatch = cParams->minMatch;
|
|
548
|
+
ZSTD_blockCompressor const blockCompressor =
|
|
549
|
+
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
|
|
550
|
+
/* Input bounds */
|
|
551
|
+
BYTE const* const istart = (BYTE const*)src;
|
|
552
|
+
BYTE const* const iend = istart + srcSize;
|
|
553
|
+
/* Input positions */
|
|
554
|
+
BYTE const* ip = istart;
|
|
555
|
+
|
|
556
|
+
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
|
|
557
|
+
assert(rawSeqStore->pos <= rawSeqStore->size);
|
|
558
|
+
assert(rawSeqStore->size <= rawSeqStore->capacity);
|
|
559
|
+
/* Loop through each sequence and apply the block compressor to the lits */
|
|
560
|
+
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
|
|
561
|
+
/* maybeSplitSequence updates rawSeqStore->pos */
|
|
562
|
+
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
|
|
563
|
+
(U32)(iend - ip), minMatch);
|
|
564
|
+
int i;
|
|
565
|
+
/* End signal */
|
|
566
|
+
if (sequence.offset == 0)
|
|
567
|
+
break;
|
|
615
568
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
ctx->nextToUpdate = (U32)(ip - base);
|
|
569
|
+
assert(sequence.offset <= (1U << cParams->windowLog));
|
|
570
|
+
assert(ip + sequence.litLength + sequence.matchLength <= iend);
|
|
619
571
|
|
|
620
|
-
|
|
572
|
+
/* Fill tables for block compressor */
|
|
573
|
+
ZSTD_ldm_limitTableUpdate(ms, ip);
|
|
574
|
+
ZSTD_ldm_fillFastTables(ms, ip);
|
|
575
|
+
/* Run the block compressor */
|
|
576
|
+
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
|
|
577
|
+
{
|
|
578
|
+
size_t const newLitLength =
|
|
579
|
+
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
|
|
580
|
+
ip += sequence.litLength;
|
|
581
|
+
/* Update the repcodes */
|
|
621
582
|
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
/* Insert the current entry into the hash table */
|
|
631
|
-
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
|
|
632
|
-
(U32)(lastHashed - base),
|
|
633
|
-
ldmParams);
|
|
634
|
-
|
|
635
|
-
/* Fill the hash table from lastHashed+1 to ip+mLength */
|
|
636
|
-
assert(ip + backwardMatchLength == lastHashed);
|
|
637
|
-
if (ip + mLength < ilimit) {
|
|
638
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
|
639
|
-
ldmState, rollingHash, lastHashed,
|
|
640
|
-
ip + mLength, base, hBits,
|
|
641
|
-
ldmParams);
|
|
642
|
-
lastHashed = ip + mLength - 1;
|
|
643
|
-
}
|
|
644
|
-
ip += mLength;
|
|
645
|
-
anchor = ip;
|
|
646
|
-
|
|
647
|
-
/* check immediate repcode */
|
|
648
|
-
while (ip < ilimit) {
|
|
649
|
-
U32 const current2 = (U32)(ip-base);
|
|
650
|
-
U32 const repIndex2 = current2 - repToConfirm[1];
|
|
651
|
-
const BYTE* repMatch2 = repIndex2 < dictLimit ?
|
|
652
|
-
dictBase + repIndex2 : base + repIndex2;
|
|
653
|
-
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
|
|
654
|
-
(repIndex2 > lowestIndex)) /* intentional overflow */
|
|
655
|
-
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
656
|
-
const BYTE* const repEnd2 = repIndex2 < dictLimit ?
|
|
657
|
-
dictEnd : iend;
|
|
658
|
-
size_t const repLength2 =
|
|
659
|
-
ZSTD_count_2segments(ip+4, repMatch2+4, iend,
|
|
660
|
-
repEnd2, lowPrefixPtr) + 4;
|
|
661
|
-
|
|
662
|
-
U32 tmpOffset = repToConfirm[1];
|
|
663
|
-
repToConfirm[1] = repToConfirm[0];
|
|
664
|
-
repToConfirm[0] = tmpOffset;
|
|
665
|
-
|
|
666
|
-
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
|
|
667
|
-
|
|
668
|
-
/* Fill the hash table from lastHashed+1 to ip+repLength2*/
|
|
669
|
-
if (ip + repLength2 < ilimit) {
|
|
670
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
|
671
|
-
ldmState, rollingHash, lastHashed,
|
|
672
|
-
ip + repLength2, base, hBits,
|
|
673
|
-
ldmParams);
|
|
674
|
-
lastHashed = ip + repLength2 - 1;
|
|
675
|
-
}
|
|
676
|
-
ip += repLength2;
|
|
677
|
-
anchor = ip;
|
|
678
|
-
continue;
|
|
679
|
-
}
|
|
680
|
-
break;
|
|
583
|
+
rep[i] = rep[i-1];
|
|
584
|
+
rep[0] = sequence.offset;
|
|
585
|
+
/* Store the sequence */
|
|
586
|
+
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
|
|
587
|
+
sequence.offset + ZSTD_REP_MOVE,
|
|
588
|
+
sequence.matchLength - MINMATCH);
|
|
589
|
+
ip += sequence.matchLength;
|
|
681
590
|
}
|
|
682
591
|
}
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
ZSTD_ldm_limitTableUpdate(ctx, anchor);
|
|
689
|
-
ZSTD_ldm_fillFastTables(ctx, anchor);
|
|
690
|
-
|
|
691
|
-
/* Call the block compressor one last time on the last literals */
|
|
692
|
-
lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
|
|
693
|
-
ctx->nextToUpdate = (U32)(iend - base);
|
|
694
|
-
|
|
695
|
-
/* Restore seqStorePtr->rep */
|
|
696
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
|
697
|
-
seqStorePtr->rep[i] = savedRep[i];
|
|
698
|
-
|
|
699
|
-
/* Return the last literals size */
|
|
700
|
-
return lastLiterals;
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
|
|
704
|
-
const void* src, size_t srcSize)
|
|
705
|
-
{
|
|
706
|
-
return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
|
|
592
|
+
/* Fill the tables for the block compressor */
|
|
593
|
+
ZSTD_ldm_limitTableUpdate(ms, ip);
|
|
594
|
+
ZSTD_ldm_fillFastTables(ms, ip);
|
|
595
|
+
/* Compress the last literals */
|
|
596
|
+
return blockCompressor(ms, seqStore, rep, ip, iend - ip);
|
|
707
597
|
}
|