zstd-ruby 1.3.3.0 → 1.3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +13 -0
- data/ext/zstdruby/libzstd/README.md +32 -25
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +25 -0
- data/ext/zstdruby/libzstd/common/cpu.h +216 -0
- data/ext/zstdruby/libzstd/common/error_private.c +1 -0
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +114 -89
- data/ext/zstdruby/libzstd/common/pool.c +46 -17
- data/ext/zstdruby/libzstd/common/pool.h +18 -9
- data/ext/zstdruby/libzstd/common/threading.h +12 -12
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
- data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +254 -254
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -3
@@ -15,22 +15,39 @@
|
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#include "
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
size_t
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
size_t
|
33
|
-
|
18
|
+
#include "zstd_compress_internal.h"
|
19
|
+
|
20
|
+
U32 ZSTD_insertAndFindFirstIndex(
|
21
|
+
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
22
|
+
const BYTE* ip);
|
23
|
+
|
24
|
+
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
25
|
+
|
26
|
+
size_t ZSTD_compressBlock_btlazy2(
|
27
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
28
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
29
|
+
size_t ZSTD_compressBlock_lazy2(
|
30
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
31
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
32
|
+
size_t ZSTD_compressBlock_lazy(
|
33
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
34
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
35
|
+
size_t ZSTD_compressBlock_greedy(
|
36
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
37
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
38
|
+
|
39
|
+
size_t ZSTD_compressBlock_greedy_extDict(
|
40
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
41
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
42
|
+
size_t ZSTD_compressBlock_lazy_extDict(
|
43
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
44
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
45
|
+
size_t ZSTD_compressBlock_lazy2_extDict(
|
46
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
47
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
48
|
+
size_t ZSTD_compressBlock_btlazy2_extDict(
|
49
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
50
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
34
51
|
|
35
52
|
#if defined (__cplusplus)
|
36
53
|
}
|
@@ -17,36 +17,45 @@
|
|
17
17
|
#define LDM_HASH_RLOG 7
|
18
18
|
#define LDM_HASH_CHAR_OFFSET 10
|
19
19
|
|
20
|
-
|
20
|
+
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
21
|
+
ZSTD_compressionParameters const* cParams)
|
21
22
|
{
|
23
|
+
U32 const windowLog = cParams->windowLog;
|
22
24
|
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
|
23
|
-
|
24
|
-
params->
|
25
|
-
params->
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
26
|
+
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
27
|
+
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
28
|
+
if (cParams->strategy >= ZSTD_btopt) {
|
29
|
+
/* Get out of the way of the optimal parser */
|
30
|
+
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
|
31
|
+
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
|
32
|
+
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
|
33
|
+
params->minMatchLength = minMatch;
|
34
|
+
}
|
33
35
|
if (params->hashLog == 0) {
|
34
36
|
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
|
35
37
|
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
36
38
|
}
|
37
|
-
if (params->hashEveryLog ==
|
39
|
+
if (params->hashEveryLog == 0) {
|
38
40
|
params->hashEveryLog =
|
39
41
|
windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
|
40
42
|
}
|
41
43
|
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
|
42
44
|
}
|
43
45
|
|
44
|
-
size_t ZSTD_ldm_getTableSize(
|
45
|
-
|
46
|
-
size_t const
|
46
|
+
size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
47
|
+
{
|
48
|
+
size_t const ldmHSize = ((size_t)1) << params.hashLog;
|
49
|
+
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
|
47
50
|
size_t const ldmBucketSize =
|
48
|
-
((size_t)1) << (hashLog - ldmBucketSizeLog);
|
49
|
-
|
51
|
+
((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
52
|
+
size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
|
53
|
+
return params.enableLdm ? totalSize : 0;
|
54
|
+
}
|
55
|
+
|
56
|
+
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
57
|
+
{
|
58
|
+
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
50
59
|
}
|
51
60
|
|
52
61
|
/** ZSTD_ldm_getSmallHash() :
|
@@ -167,6 +176,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
|
|
167
176
|
}
|
168
177
|
|
169
178
|
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
|
179
|
+
DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
|
170
180
|
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
|
171
181
|
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
|
172
182
|
}
|
@@ -205,21 +215,22 @@ static size_t ZSTD_ldm_countBackwardsMatch(
|
|
205
215
|
*
|
206
216
|
* The tables for the other strategies are filled within their
|
207
217
|
* block compressors. */
|
208
|
-
static size_t ZSTD_ldm_fillFastTables(
|
218
|
+
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
219
|
+
ZSTD_compressionParameters const* cParams,
|
220
|
+
void const* end)
|
209
221
|
{
|
210
222
|
const BYTE* const iend = (const BYTE*)end;
|
211
|
-
const U32 mls = zc->appliedParams.cParams.searchLength;
|
212
223
|
|
213
|
-
switch(
|
224
|
+
switch(cParams->strategy)
|
214
225
|
{
|
215
226
|
case ZSTD_fast:
|
216
|
-
ZSTD_fillHashTable(
|
217
|
-
|
227
|
+
ZSTD_fillHashTable(ms, cParams, iend);
|
228
|
+
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
218
229
|
break;
|
219
230
|
|
220
231
|
case ZSTD_dfast:
|
221
|
-
ZSTD_fillDoubleHashTable(
|
222
|
-
|
232
|
+
ZSTD_fillDoubleHashTable(ms, cParams, iend);
|
233
|
+
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
223
234
|
break;
|
224
235
|
|
225
236
|
case ZSTD_greedy:
|
@@ -268,69 +279,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
268
279
|
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
|
269
280
|
* if it is far way
|
270
281
|
* (after a long match, only update tables a limited amount). */
|
271
|
-
static void ZSTD_ldm_limitTableUpdate(
|
282
|
+
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
|
272
283
|
{
|
273
|
-
U32 const current = (U32)(anchor -
|
274
|
-
if (current >
|
275
|
-
|
276
|
-
current - MIN(512, current -
|
284
|
+
U32 const current = (U32)(anchor - ms->window.base);
|
285
|
+
if (current > ms->nextToUpdate + 1024) {
|
286
|
+
ms->nextToUpdate =
|
287
|
+
current - MIN(512, current - ms->nextToUpdate - 1024);
|
277
288
|
}
|
278
289
|
}
|
279
290
|
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
FORCE_INLINE_TEMPLATE
|
285
|
-
size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
286
|
-
const void* src, size_t srcSize)
|
291
|
+
static size_t ZSTD_ldm_generateSequences_internal(
|
292
|
+
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
|
293
|
+
ldmParams_t const* params, void const* src, size_t srcSize)
|
287
294
|
{
|
288
|
-
|
289
|
-
const
|
290
|
-
const
|
291
|
-
const
|
292
|
-
const
|
293
|
-
const
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
const
|
298
|
-
const
|
299
|
-
const
|
300
|
-
const
|
301
|
-
const
|
302
|
-
const
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
295
|
+
/* LDM parameters */
|
296
|
+
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
|
297
|
+
U32 const minMatchLength = params->minMatchLength;
|
298
|
+
U64 const hashPower = ldmState->hashPower;
|
299
|
+
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
300
|
+
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
|
301
|
+
U32 const hashEveryLog = params->hashEveryLog;
|
302
|
+
U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
|
303
|
+
/* Prefix and extDict parameters */
|
304
|
+
U32 const dictLimit = ldmState->window.dictLimit;
|
305
|
+
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
306
|
+
BYTE const* const base = ldmState->window.base;
|
307
|
+
BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
|
308
|
+
BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
|
309
|
+
BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
|
310
|
+
BYTE const* const lowPrefixPtr = base + dictLimit;
|
311
|
+
/* Input bounds */
|
312
|
+
BYTE const* const istart = (BYTE const*)src;
|
313
|
+
BYTE const* const iend = istart + srcSize;
|
314
|
+
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
|
315
|
+
/* Input positions */
|
316
|
+
BYTE const* anchor = istart;
|
317
|
+
BYTE const* ip = istart;
|
318
|
+
/* Rolling hash */
|
319
|
+
BYTE const* lastHashed = NULL;
|
308
320
|
U64 rollingHash = 0;
|
309
|
-
const BYTE* lastHashed = NULL;
|
310
|
-
size_t i, lastLiterals;
|
311
321
|
|
312
|
-
|
313
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
314
|
-
savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
|
315
|
-
|
316
|
-
/* Main Search Loop */
|
317
|
-
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
322
|
+
while (ip <= ilimit) {
|
318
323
|
size_t mLength;
|
319
324
|
U32 const current = (U32)(ip - base);
|
320
325
|
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
321
326
|
ldmEntry_t* bestEntry = NULL;
|
322
327
|
if (ip != istart) {
|
323
328
|
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
|
324
|
-
lastHashed[
|
329
|
+
lastHashed[minMatchLength],
|
325
330
|
hashPower);
|
326
331
|
} else {
|
327
|
-
rollingHash = ZSTD_ldm_getRollingHash(ip,
|
332
|
+
rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
|
328
333
|
}
|
329
334
|
lastHashed = ip;
|
330
335
|
|
331
336
|
/* Do not insert and do not look for a match */
|
332
|
-
if (ZSTD_ldm_getTag(rollingHash, hBits,
|
333
|
-
ldmTagMask) {
|
337
|
+
if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
|
334
338
|
ip++;
|
335
339
|
continue;
|
336
340
|
}
|
@@ -340,27 +344,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
340
344
|
ldmEntry_t* const bucket =
|
341
345
|
ZSTD_ldm_getBucket(ldmState,
|
342
346
|
ZSTD_ldm_getSmallHash(rollingHash, hBits),
|
343
|
-
|
347
|
+
*params);
|
344
348
|
ldmEntry_t* cur;
|
345
349
|
size_t bestMatchLength = 0;
|
346
350
|
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
347
351
|
|
348
352
|
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
349
|
-
const BYTE* const pMatch = cur->offset + base;
|
350
353
|
size_t curForwardMatchLength, curBackwardMatchLength,
|
351
354
|
curTotalMatchLength;
|
352
355
|
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
353
356
|
continue;
|
354
357
|
}
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
358
|
+
if (extDict) {
|
359
|
+
BYTE const* const curMatchBase =
|
360
|
+
cur->offset < dictLimit ? dictBase : base;
|
361
|
+
BYTE const* const pMatch = curMatchBase + cur->offset;
|
362
|
+
BYTE const* const matchEnd =
|
363
|
+
cur->offset < dictLimit ? dictEnd : iend;
|
364
|
+
BYTE const* const lowMatchPtr =
|
365
|
+
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
|
366
|
+
|
367
|
+
curForwardMatchLength = ZSTD_count_2segments(
|
368
|
+
ip, pMatch, iend,
|
369
|
+
matchEnd, lowPrefixPtr);
|
370
|
+
if (curForwardMatchLength < minMatchLength) {
|
371
|
+
continue;
|
372
|
+
}
|
373
|
+
curBackwardMatchLength =
|
374
|
+
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
375
|
+
lowMatchPtr);
|
376
|
+
curTotalMatchLength = curForwardMatchLength +
|
377
|
+
curBackwardMatchLength;
|
378
|
+
} else { /* !extDict */
|
379
|
+
BYTE const* const pMatch = base + cur->offset;
|
380
|
+
curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
|
381
|
+
if (curForwardMatchLength < minMatchLength) {
|
382
|
+
continue;
|
383
|
+
}
|
384
|
+
curBackwardMatchLength =
|
385
|
+
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
|
386
|
+
lowPrefixPtr);
|
387
|
+
curTotalMatchLength = curForwardMatchLength +
|
388
|
+
curBackwardMatchLength;
|
359
389
|
}
|
360
|
-
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
|
361
|
-
ip, anchor, pMatch, lowest);
|
362
|
-
curTotalMatchLength = curForwardMatchLength +
|
363
|
-
curBackwardMatchLength;
|
364
390
|
|
365
391
|
if (curTotalMatchLength > bestMatchLength) {
|
366
392
|
bestMatchLength = curTotalMatchLength;
|
@@ -375,7 +401,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
375
401
|
if (bestEntry == NULL) {
|
376
402
|
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
|
377
403
|
hBits, current,
|
378
|
-
|
404
|
+
*params);
|
379
405
|
ip++;
|
380
406
|
continue;
|
381
407
|
}
|
@@ -384,324 +410,244 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|
384
410
|
mLength = forwardMatchLength + backwardMatchLength;
|
385
411
|
ip -= backwardMatchLength;
|
386
412
|
|
387
|
-
/* Call the block compressor on the remaining literals */
|
388
413
|
{
|
414
|
+
/* Store the sequence:
|
415
|
+
* ip = current - backwardMatchLength
|
416
|
+
* The match is at (bestEntry->offset - backwardMatchLength)
|
417
|
+
*/
|
389
418
|
U32 const matchIndex = bestEntry->offset;
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
/*
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
/* Call block compressor and get remaining literals */
|
402
|
-
lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
|
403
|
-
cctx->nextToUpdate = (U32)(ip - base);
|
404
|
-
|
405
|
-
/* Update repToConfirm with the new offset */
|
406
|
-
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
|
407
|
-
repToConfirm[i] = repToConfirm[i-1];
|
408
|
-
repToConfirm[0] = offset;
|
409
|
-
|
410
|
-
/* Store the sequence with the leftover literals */
|
411
|
-
ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
|
412
|
-
offset + ZSTD_REP_MOVE, mLength - MINMATCH);
|
419
|
+
U32 const offset = current - matchIndex;
|
420
|
+
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
421
|
+
|
422
|
+
/* Out of sequence storage */
|
423
|
+
if (rawSeqStore->size == rawSeqStore->capacity)
|
424
|
+
return ERROR(dstSize_tooSmall);
|
425
|
+
seq->litLength = (U32)(ip - anchor);
|
426
|
+
seq->matchLength = (U32)mLength;
|
427
|
+
seq->offset = offset;
|
428
|
+
rawSeqStore->size++;
|
413
429
|
}
|
414
430
|
|
415
431
|
/* Insert the current entry into the hash table */
|
416
432
|
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
|
417
433
|
(U32)(lastHashed - base),
|
418
|
-
|
434
|
+
*params);
|
419
435
|
|
420
436
|
assert(ip + backwardMatchLength == lastHashed);
|
421
437
|
|
422
438
|
/* Fill the hash table from lastHashed+1 to ip+mLength*/
|
423
439
|
/* Heuristic: don't need to fill the entire table at end of block */
|
424
|
-
if (ip + mLength
|
440
|
+
if (ip + mLength <= ilimit) {
|
425
441
|
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
426
442
|
ldmState, rollingHash, lastHashed,
|
427
|
-
ip + mLength, base, hBits,
|
443
|
+
ip + mLength, base, hBits, *params);
|
428
444
|
lastHashed = ip + mLength - 1;
|
429
445
|
}
|
430
446
|
ip += mLength;
|
431
447
|
anchor = ip;
|
432
|
-
/* Check immediate repcode */
|
433
|
-
while ( (ip < ilimit)
|
434
|
-
&& ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
|
435
|
-
&& (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
|
436
|
-
|
437
|
-
size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
|
438
|
-
iend) + 4;
|
439
|
-
/* Swap repToConfirm[1] <=> repToConfirm[0] */
|
440
|
-
{
|
441
|
-
U32 const tmpOff = repToConfirm[1];
|
442
|
-
repToConfirm[1] = repToConfirm[0];
|
443
|
-
repToConfirm[0] = tmpOff;
|
444
|
-
}
|
445
|
-
|
446
|
-
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
|
447
|
-
|
448
|
-
/* Fill the hash table from lastHashed+1 to ip+rLength*/
|
449
|
-
if (ip + rLength < ilimit) {
|
450
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
451
|
-
ldmState, rollingHash, lastHashed,
|
452
|
-
ip + rLength, base, hBits, ldmParams);
|
453
|
-
lastHashed = ip + rLength - 1;
|
454
|
-
}
|
455
|
-
ip += rLength;
|
456
|
-
anchor = ip;
|
457
|
-
}
|
458
448
|
}
|
459
|
-
|
460
|
-
/* Overwrite rep */
|
461
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
462
|
-
seqStorePtr->rep[i] = repToConfirm[i];
|
463
|
-
|
464
|
-
ZSTD_ldm_limitTableUpdate(cctx, anchor);
|
465
|
-
ZSTD_ldm_fillFastTables(cctx, anchor);
|
466
|
-
|
467
|
-
lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
|
468
|
-
cctx->nextToUpdate = (U32)(iend - base);
|
469
|
-
|
470
|
-
/* Restore seqStorePtr->rep */
|
471
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
472
|
-
seqStorePtr->rep[i] = savedRep[i];
|
473
|
-
|
474
|
-
/* Return the last literals size */
|
475
|
-
return lastLiterals;
|
449
|
+
return iend - anchor;
|
476
450
|
}
|
477
451
|
|
478
|
-
|
479
|
-
|
452
|
+
/*! ZSTD_ldm_reduceTable() :
|
453
|
+
* reduce table indexes by `reducerValue` */
|
454
|
+
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
455
|
+
U32 const reducerValue)
|
480
456
|
{
|
481
|
-
|
457
|
+
U32 u;
|
458
|
+
for (u = 0; u < size; u++) {
|
459
|
+
if (table[u].offset < reducerValue) table[u].offset = 0;
|
460
|
+
else table[u].offset -= reducerValue;
|
461
|
+
}
|
482
462
|
}
|
483
463
|
|
484
|
-
|
485
|
-
|
486
|
-
|
464
|
+
size_t ZSTD_ldm_generateSequences(
|
465
|
+
ldmState_t* ldmState, rawSeqStore_t* sequences,
|
466
|
+
ldmParams_t const* params, void const* src, size_t srcSize)
|
487
467
|
{
|
488
|
-
|
489
|
-
const
|
490
|
-
const
|
491
|
-
const
|
492
|
-
const
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
const
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
468
|
+
U32 const maxDist = 1U << params->windowLog;
|
469
|
+
BYTE const* const istart = (BYTE const*)src;
|
470
|
+
BYTE const* const iend = istart + srcSize;
|
471
|
+
size_t const kMaxChunkSize = 1 << 20;
|
472
|
+
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
|
473
|
+
size_t chunk;
|
474
|
+
size_t leftoverSize = 0;
|
475
|
+
|
476
|
+
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
|
477
|
+
/* Check that ZSTD_window_update() has been called for this chunk prior
|
478
|
+
* to passing it to this function.
|
479
|
+
*/
|
480
|
+
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
|
481
|
+
/* The input could be very large (in zstdmt), so it must be broken up into
|
482
|
+
* chunks to enforce the maximmum distance and handle overflow correction.
|
483
|
+
*/
|
484
|
+
assert(sequences->pos <= sequences->size);
|
485
|
+
assert(sequences->size <= sequences->capacity);
|
486
|
+
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
|
487
|
+
BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
|
488
|
+
size_t const remaining = (size_t)(iend - chunkStart);
|
489
|
+
BYTE const *const chunkEnd =
|
490
|
+
(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
|
491
|
+
size_t const chunkSize = chunkEnd - chunkStart;
|
492
|
+
size_t newLeftoverSize;
|
493
|
+
size_t const prevSize = sequences->size;
|
494
|
+
|
495
|
+
assert(chunkStart < iend);
|
496
|
+
/* 1. Perform overflow correction if necessary. */
|
497
|
+
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
498
|
+
U32 const ldmHSize = 1U << params->hashLog;
|
499
|
+
U32 const correction = ZSTD_window_correctOverflow(
|
500
|
+
&ldmState->window, /* cycleLog */ 0, maxDist, src);
|
501
|
+
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
502
|
+
}
|
503
|
+
/* 2. We enforce the maximum offset allowed.
|
504
|
+
*
|
505
|
+
* kMaxChunkSize should be small enough that we don't lose too much of
|
506
|
+
* the window through early invalidation.
|
507
|
+
* TODO: * Test the chunk size.
|
508
|
+
* * Try invalidation after the sequence generation and test the
|
509
|
+
* the offset against maxDist directly.
|
510
|
+
*/
|
511
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
|
512
|
+
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
513
|
+
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
514
|
+
ldmState, sequences, params, chunkStart, chunkSize);
|
515
|
+
if (ZSTD_isError(newLeftoverSize))
|
516
|
+
return newLeftoverSize;
|
517
|
+
/* 4. We add the leftover literals from previous iterations to the first
|
518
|
+
* newly generated sequence, or add the `newLeftoverSize` if none are
|
519
|
+
* generated.
|
520
|
+
*/
|
521
|
+
/* Prepend the leftover literals from the last call */
|
522
|
+
if (prevSize < sequences->size) {
|
523
|
+
sequences->seq[prevSize].litLength += (U32)leftoverSize;
|
524
|
+
leftoverSize = newLeftoverSize;
|
531
525
|
} else {
|
532
|
-
|
526
|
+
assert(newLeftoverSize == chunkSize);
|
527
|
+
leftoverSize += chunkSize;
|
533
528
|
}
|
534
|
-
|
529
|
+
}
|
530
|
+
return 0;
|
531
|
+
}
|
535
532
|
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
533
|
+
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
534
|
+
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
535
|
+
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
536
|
+
if (srcSize <= seq->litLength) {
|
537
|
+
/* Skip past srcSize literals */
|
538
|
+
seq->litLength -= (U32)srcSize;
|
539
|
+
return;
|
541
540
|
}
|
542
|
-
|
543
|
-
|
544
|
-
{
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
552
|
-
|
553
|
-
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
554
|
-
const BYTE* const curMatchBase =
|
555
|
-
cur->offset < dictLimit ? dictBase : base;
|
556
|
-
const BYTE* const pMatch = curMatchBase + cur->offset;
|
557
|
-
const BYTE* const matchEnd =
|
558
|
-
cur->offset < dictLimit ? dictEnd : iend;
|
559
|
-
const BYTE* const lowMatchPtr =
|
560
|
-
cur->offset < dictLimit ? dictStart : lowPrefixPtr;
|
561
|
-
size_t curForwardMatchLength, curBackwardMatchLength,
|
562
|
-
curTotalMatchLength;
|
563
|
-
|
564
|
-
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
565
|
-
continue;
|
566
|
-
}
|
567
|
-
|
568
|
-
curForwardMatchLength = ZSTD_count_2segments(
|
569
|
-
ip, pMatch, iend,
|
570
|
-
matchEnd, lowPrefixPtr);
|
571
|
-
if (curForwardMatchLength < ldmParams.minMatchLength) {
|
572
|
-
continue;
|
573
|
-
}
|
574
|
-
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
|
575
|
-
ip, anchor, pMatch, lowMatchPtr);
|
576
|
-
curTotalMatchLength = curForwardMatchLength +
|
577
|
-
curBackwardMatchLength;
|
578
|
-
|
579
|
-
if (curTotalMatchLength > bestMatchLength) {
|
580
|
-
bestMatchLength = curTotalMatchLength;
|
581
|
-
forwardMatchLength = curForwardMatchLength;
|
582
|
-
backwardMatchLength = curBackwardMatchLength;
|
583
|
-
bestEntry = cur;
|
541
|
+
srcSize -= seq->litLength;
|
542
|
+
seq->litLength = 0;
|
543
|
+
if (srcSize < seq->matchLength) {
|
544
|
+
/* Skip past the first srcSize of the match */
|
545
|
+
seq->matchLength -= (U32)srcSize;
|
546
|
+
if (seq->matchLength < minMatch) {
|
547
|
+
/* The match is too short, omit it */
|
548
|
+
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
|
549
|
+
seq[1].litLength += seq[0].matchLength;
|
584
550
|
}
|
551
|
+
rawSeqStore->pos++;
|
585
552
|
}
|
553
|
+
return;
|
586
554
|
}
|
555
|
+
srcSize -= seq->matchLength;
|
556
|
+
seq->matchLength = 0;
|
557
|
+
rawSeqStore->pos++;
|
558
|
+
}
|
559
|
+
}
|
587
560
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
561
|
+
/**
|
562
|
+
* If the sequence length is longer than remaining then the sequence is split
|
563
|
+
* between this block and the next.
|
564
|
+
*
|
565
|
+
* Returns the current sequence to handle, or if the rest of the block should
|
566
|
+
* be literals, it returns a sequence with offset == 0.
|
567
|
+
*/
|
568
|
+
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
569
|
+
U32 const remaining, U32 const minMatch)
|
570
|
+
{
|
571
|
+
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
|
572
|
+
assert(sequence.offset > 0);
|
573
|
+
/* Likely: No partial sequence */
|
574
|
+
if (remaining >= sequence.litLength + sequence.matchLength) {
|
575
|
+
rawSeqStore->pos++;
|
576
|
+
return sequence;
|
577
|
+
}
|
578
|
+
/* Cut the sequence short (offset == 0 ==> rest is literals). */
|
579
|
+
if (remaining <= sequence.litLength) {
|
580
|
+
sequence.offset = 0;
|
581
|
+
} else if (remaining < sequence.litLength + sequence.matchLength) {
|
582
|
+
sequence.matchLength = remaining - sequence.litLength;
|
583
|
+
if (sequence.matchLength < minMatch) {
|
584
|
+
sequence.offset = 0;
|
595
585
|
}
|
586
|
+
}
|
587
|
+
/* Skip past `remaining` bytes for the future sequences. */
|
588
|
+
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
|
589
|
+
return sequence;
|
590
|
+
}
|
596
591
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
592
|
+
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
593
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
594
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
|
595
|
+
int const extDict)
|
596
|
+
{
|
597
|
+
unsigned const minMatch = cParams->searchLength;
|
598
|
+
ZSTD_blockCompressor const blockCompressor =
|
599
|
+
ZSTD_selectBlockCompressor(cParams->strategy, extDict);
|
600
|
+
BYTE const* const base = ms->window.base;
|
601
|
+
/* Input bounds */
|
602
|
+
BYTE const* const istart = (BYTE const*)src;
|
603
|
+
BYTE const* const iend = istart + srcSize;
|
604
|
+
/* Input positions */
|
605
|
+
BYTE const* ip = istart;
|
606
|
+
|
607
|
+
assert(rawSeqStore->pos <= rawSeqStore->size);
|
608
|
+
assert(rawSeqStore->size <= rawSeqStore->capacity);
|
609
|
+
/* Loop through each sequence and apply the block compressor to the lits */
|
610
|
+
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
|
611
|
+
/* maybeSplitSequence updates rawSeqStore->pos */
|
612
|
+
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
|
613
|
+
(U32)(iend - ip), minMatch);
|
614
|
+
int i;
|
615
|
+
/* End signal */
|
616
|
+
if (sequence.offset == 0)
|
617
|
+
break;
|
615
618
|
|
616
|
-
|
617
|
-
|
618
|
-
ctx->nextToUpdate = (U32)(ip - base);
|
619
|
+
assert(sequence.offset <= (1U << cParams->windowLog));
|
620
|
+
assert(ip + sequence.litLength + sequence.matchLength <= iend);
|
619
621
|
|
620
|
-
|
622
|
+
/* Fill tables for block compressor */
|
623
|
+
ZSTD_ldm_limitTableUpdate(ms, ip);
|
624
|
+
ZSTD_ldm_fillFastTables(ms, cParams, ip);
|
625
|
+
/* Run the block compressor */
|
626
|
+
{
|
627
|
+
size_t const newLitLength =
|
628
|
+
blockCompressor(ms, seqStore, rep, cParams, ip,
|
629
|
+
sequence.litLength);
|
630
|
+
ip += sequence.litLength;
|
631
|
+
ms->nextToUpdate = (U32)(ip - base);
|
632
|
+
/* Update the repcodes */
|
621
633
|
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
/* Insert the current entry into the hash table */
|
631
|
-
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
|
632
|
-
(U32)(lastHashed - base),
|
633
|
-
ldmParams);
|
634
|
-
|
635
|
-
/* Fill the hash table from lastHashed+1 to ip+mLength */
|
636
|
-
assert(ip + backwardMatchLength == lastHashed);
|
637
|
-
if (ip + mLength < ilimit) {
|
638
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
639
|
-
ldmState, rollingHash, lastHashed,
|
640
|
-
ip + mLength, base, hBits,
|
641
|
-
ldmParams);
|
642
|
-
lastHashed = ip + mLength - 1;
|
643
|
-
}
|
644
|
-
ip += mLength;
|
645
|
-
anchor = ip;
|
646
|
-
|
647
|
-
/* check immediate repcode */
|
648
|
-
while (ip < ilimit) {
|
649
|
-
U32 const current2 = (U32)(ip-base);
|
650
|
-
U32 const repIndex2 = current2 - repToConfirm[1];
|
651
|
-
const BYTE* repMatch2 = repIndex2 < dictLimit ?
|
652
|
-
dictBase + repIndex2 : base + repIndex2;
|
653
|
-
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
|
654
|
-
(repIndex2 > lowestIndex)) /* intentional overflow */
|
655
|
-
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
656
|
-
const BYTE* const repEnd2 = repIndex2 < dictLimit ?
|
657
|
-
dictEnd : iend;
|
658
|
-
size_t const repLength2 =
|
659
|
-
ZSTD_count_2segments(ip+4, repMatch2+4, iend,
|
660
|
-
repEnd2, lowPrefixPtr) + 4;
|
661
|
-
|
662
|
-
U32 tmpOffset = repToConfirm[1];
|
663
|
-
repToConfirm[1] = repToConfirm[0];
|
664
|
-
repToConfirm[0] = tmpOffset;
|
665
|
-
|
666
|
-
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
|
667
|
-
|
668
|
-
/* Fill the hash table from lastHashed+1 to ip+repLength2*/
|
669
|
-
if (ip + repLength2 < ilimit) {
|
670
|
-
rollingHash = ZSTD_ldm_fillLdmHashTable(
|
671
|
-
ldmState, rollingHash, lastHashed,
|
672
|
-
ip + repLength2, base, hBits,
|
673
|
-
ldmParams);
|
674
|
-
lastHashed = ip + repLength2 - 1;
|
675
|
-
}
|
676
|
-
ip += repLength2;
|
677
|
-
anchor = ip;
|
678
|
-
continue;
|
679
|
-
}
|
680
|
-
break;
|
634
|
+
rep[i] = rep[i-1];
|
635
|
+
rep[0] = sequence.offset;
|
636
|
+
/* Store the sequence */
|
637
|
+
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
|
638
|
+
sequence.offset + ZSTD_REP_MOVE,
|
639
|
+
sequence.matchLength - MINMATCH);
|
640
|
+
ip += sequence.matchLength;
|
681
641
|
}
|
682
642
|
}
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
ctx->nextToUpdate = (U32)(iend - base);
|
694
|
-
|
695
|
-
/* Restore seqStorePtr->rep */
|
696
|
-
for (i = 0; i < ZSTD_REP_NUM; i++)
|
697
|
-
seqStorePtr->rep[i] = savedRep[i];
|
698
|
-
|
699
|
-
/* Return the last literals size */
|
700
|
-
return lastLiterals;
|
701
|
-
}
|
702
|
-
|
703
|
-
size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
|
704
|
-
const void* src, size_t srcSize)
|
705
|
-
{
|
706
|
-
return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
|
643
|
+
/* Fill the tables for the block compressor */
|
644
|
+
ZSTD_ldm_limitTableUpdate(ms, ip);
|
645
|
+
ZSTD_ldm_fillFastTables(ms, cParams, ip);
|
646
|
+
/* Compress the last literals */
|
647
|
+
{
|
648
|
+
size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
|
649
|
+
ip, iend - ip);
|
650
|
+
ms->nextToUpdate = (U32)(iend - base);
|
651
|
+
return lastLiterals;
|
652
|
+
}
|
707
653
|
}
|