zstd-ruby 1.5.0.0 → 1.5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/README.md +1 -1
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/Makefile +50 -175
- data/ext/zstdruby/libzstd/README.md +7 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
- data/ext/zstdruby/libzstd/common/compiler.h +89 -43
- data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
- data/ext/zstdruby/libzstd/common/error_private.h +79 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +24 -22
- data/ext/zstdruby/libzstd/common/mem.h +18 -0
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
- data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
- data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/zdict.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +179 -136
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +8 -3
@@ -159,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|
159
159
|
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
160
160
|
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
161
161
|
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
162
|
-
return params.enableLdm ? totalSize : 0;
|
162
|
+
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
|
163
163
|
}
|
164
164
|
|
165
165
|
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
166
166
|
{
|
167
|
-
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
167
|
+
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
|
168
168
|
}
|
169
169
|
|
170
170
|
/** ZSTD_ldm_getBucket() :
|
@@ -478,7 +478,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|
478
478
|
*/
|
479
479
|
if (anchor > ip + hashed) {
|
480
480
|
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
481
|
-
/* Continue the
|
481
|
+
/* Continue the outer loop at anchor (ip + hashed == anchor). */
|
482
482
|
ip = anchor - hashed;
|
483
483
|
break;
|
484
484
|
}
|
@@ -657,7 +657,7 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
|
657
657
|
|
658
658
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
659
659
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
660
|
-
|
660
|
+
ZSTD_paramSwitch_e useRowMatchFinder,
|
661
661
|
void const* src, size_t srcSize)
|
662
662
|
{
|
663
663
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -66,7 +66,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
66
66
|
*/
|
67
67
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
68
68
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
69
|
-
|
69
|
+
ZSTD_paramSwitch_e useRowMatchFinder,
|
70
70
|
void const* src, size_t srcSize);
|
71
71
|
|
72
72
|
/**
|
@@ -11,7 +11,10 @@
|
|
11
11
|
#ifndef ZSTD_LDM_GEARTAB_H
|
12
12
|
#define ZSTD_LDM_GEARTAB_H
|
13
13
|
|
14
|
-
|
14
|
+
#include "../common/compiler.h" /* UNUSED_ATTR */
|
15
|
+
#include "../common/mem.h" /* U64 */
|
16
|
+
|
17
|
+
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
|
15
18
|
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
|
16
19
|
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
|
17
20
|
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
|
@@ -14,7 +14,6 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
18
17
|
#define ZSTD_MAX_PRICE (1<<30)
|
19
18
|
|
20
19
|
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
@@ -24,11 +23,11 @@
|
|
24
23
|
* Price functions for optimal parser
|
25
24
|
***************************************/
|
26
25
|
|
27
|
-
#if 0 /* approximation at bit level */
|
26
|
+
#if 0 /* approximation at bit level (for tests) */
|
28
27
|
# define BITCOST_ACCURACY 0
|
29
28
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
30
|
-
# define WEIGHT(stat)
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
29
|
+
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
30
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
32
31
|
# define BITCOST_ACCURACY 8
|
33
32
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
34
33
|
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
66
65
|
|
67
66
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
68
67
|
{
|
69
|
-
return optPtr->literalCompressionMode !=
|
68
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
70
69
|
}
|
71
70
|
|
72
71
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
79
78
|
}
|
80
79
|
|
81
80
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
82
|
+
{
|
83
|
+
size_t n;
|
84
|
+
U32 total = 0;
|
85
|
+
for (n=0; n<nbElts; n++) {
|
86
|
+
total += table[n];
|
87
|
+
}
|
88
|
+
return total;
|
89
|
+
}
|
90
|
+
|
91
|
+
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
86
92
|
{
|
87
93
|
U32 s, sum=0;
|
88
|
-
DEBUGLOG(5, "
|
89
|
-
assert(
|
94
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
95
|
+
assert(shift < 30);
|
90
96
|
for (s=0; s<lastEltIndex+1; s++) {
|
91
|
-
table[s] = 1 + (table[s] >>
|
97
|
+
table[s] = 1 + (table[s] >> shift);
|
92
98
|
sum += table[s];
|
93
99
|
}
|
94
100
|
return sum;
|
95
101
|
}
|
96
102
|
|
103
|
+
/* ZSTD_scaleStats() :
|
104
|
+
* reduce all elements in table is sum too large
|
105
|
+
* return the resulting sum of elements */
|
106
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
107
|
+
{
|
108
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
109
|
+
U32 const factor = prevsum >> logTarget;
|
110
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
111
|
+
assert(logTarget < 30);
|
112
|
+
if (factor <= 1) return prevsum;
|
113
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
114
|
+
}
|
115
|
+
|
97
116
|
/* ZSTD_rescaleFreqs() :
|
98
117
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
99
118
|
* take hints from dictionary if there is one
|
100
|
-
*
|
119
|
+
* and init from zero if there is none,
|
120
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
101
121
|
* otherwise downscale existing stats, to be used as seed for next block.
|
102
122
|
*/
|
103
123
|
static void
|
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
126
146
|
optPtr->litSum = 0;
|
127
147
|
for (lit=0; lit<=MaxLit; lit++) {
|
128
148
|
U32 const scaleLog = 11; /* scale to 2K */
|
129
|
-
U32 const bitCost =
|
149
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
130
150
|
assert(bitCost <= scaleLog);
|
131
151
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
132
152
|
optPtr->litSum += optPtr->litFreq[lit];
|
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
174
194
|
if (compressedLiterals) {
|
175
195
|
unsigned lit = MaxLit;
|
176
196
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
177
|
-
optPtr->litSum =
|
197
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
178
198
|
}
|
179
199
|
|
180
|
-
{ unsigned
|
181
|
-
|
182
|
-
|
200
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
201
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
202
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
203
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
204
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
205
|
+
1, 1, 1, 1
|
206
|
+
};
|
207
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
183
208
|
}
|
184
|
-
optPtr->litLengthSum = MaxLL+1;
|
185
209
|
|
186
210
|
{ unsigned ml;
|
187
211
|
for (ml=0; ml<=MaxML; ml++)
|
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
189
213
|
}
|
190
214
|
optPtr->matchLengthSum = MaxML+1;
|
191
215
|
|
192
|
-
{ unsigned
|
193
|
-
|
194
|
-
|
216
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
217
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
218
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
219
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
220
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
221
|
+
};
|
222
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
195
223
|
}
|
196
|
-
|
224
|
+
|
197
225
|
|
198
226
|
}
|
199
227
|
|
200
228
|
} else { /* new block : re-use previous statistics, scaled down */
|
201
229
|
|
202
230
|
if (compressedLiterals)
|
203
|
-
optPtr->litSum =
|
204
|
-
optPtr->litLengthSum =
|
205
|
-
optPtr->matchLengthSum =
|
206
|
-
optPtr->offCodeSum =
|
231
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
232
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
233
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
234
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
207
235
|
}
|
208
236
|
|
209
237
|
ZSTD_setBasePrices(optPtr, optLevel);
|
@@ -338,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
338
366
|
|
339
367
|
/* Update hashTable3 up to ip (excluded)
|
340
368
|
Assumption : always within prefix (i.e. not within extDict) */
|
341
|
-
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
369
|
+
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
342
370
|
U32* nextToUpdate3,
|
343
371
|
const BYTE* const ip)
|
344
372
|
{
|
@@ -364,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
364
392
|
* Binary Tree search
|
365
393
|
***************************************/
|
366
394
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
367
|
-
*
|
395
|
+
* @param ip assumed <= iend-8 .
|
396
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
368
397
|
* @return : nb of positions added */
|
369
398
|
static U32 ZSTD_insertBt1(
|
370
|
-
ZSTD_matchState_t* ms,
|
399
|
+
const ZSTD_matchState_t* ms,
|
371
400
|
const BYTE* const ip, const BYTE* const iend,
|
401
|
+
U32 const target,
|
372
402
|
U32 const mls, const int extDict)
|
373
403
|
{
|
374
404
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -391,7 +421,10 @@ static U32 ZSTD_insertBt1(
|
|
391
421
|
U32* smallerPtr = bt + 2*(curr&btMask);
|
392
422
|
U32* largerPtr = smallerPtr + 1;
|
393
423
|
U32 dummy32; /* to be nullified at the end */
|
394
|
-
|
424
|
+
/* windowLow is based on target because
|
425
|
+
* we only need positions that will be in the window at the end of the tree update.
|
426
|
+
*/
|
427
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
395
428
|
U32 matchEndIdx = curr+8+1;
|
396
429
|
size_t bestLength = 8;
|
397
430
|
U32 nbCompares = 1U << cParams->searchLog;
|
@@ -404,11 +437,12 @@ static U32 ZSTD_insertBt1(
|
|
404
437
|
|
405
438
|
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
406
439
|
|
440
|
+
assert(curr <= target);
|
407
441
|
assert(ip <= iend-8); /* required for h calculation */
|
408
442
|
hashTable[h] = curr; /* Update Hash Table */
|
409
443
|
|
410
444
|
assert(windowLow > 0);
|
411
|
-
|
445
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
412
446
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
413
447
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
414
448
|
assert(matchIndex < curr);
|
@@ -492,7 +526,7 @@ void ZSTD_updateTree_internal(
|
|
492
526
|
idx, target, dictMode);
|
493
527
|
|
494
528
|
while(idx < target) {
|
495
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
529
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
496
530
|
assert(idx < (U32)(idx + forward));
|
497
531
|
idx += forward;
|
498
532
|
}
|
@@ -635,11 +669,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
635
669
|
return 1;
|
636
670
|
} } }
|
637
671
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
638
|
-
}
|
672
|
+
} /* if (mls == 3) */
|
639
673
|
|
640
674
|
hashTable[h] = curr; /* Update Hash Table */
|
641
675
|
|
642
|
-
|
676
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
643
677
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
644
678
|
const BYTE* match;
|
645
679
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
@@ -672,8 +706,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
672
706
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
673
707
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
674
708
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
675
|
-
|
676
|
-
}
|
709
|
+
} }
|
677
710
|
|
678
711
|
if (match[matchLength] < ip[matchLength]) {
|
679
712
|
/* match smaller than current */
|
@@ -692,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
692
725
|
|
693
726
|
*smallerPtr = *largerPtr = 0;
|
694
727
|
|
728
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
695
729
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
696
730
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
697
731
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
698
732
|
const U32* const dmsBt = dms->chainTable;
|
699
733
|
commonLengthSmaller = commonLengthLarger = 0;
|
700
|
-
|
734
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
701
735
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
702
736
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
703
737
|
const BYTE* match = dmsBase + dictMatchIndex;
|
@@ -718,8 +752,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
718
752
|
if ( (matchLength > ZSTD_OPT_NUM)
|
719
753
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
720
754
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
721
|
-
|
722
|
-
}
|
755
|
+
} }
|
723
756
|
|
724
757
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
725
758
|
if (match[matchLength] < ip[matchLength]) {
|
@@ -729,39 +762,90 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
729
762
|
/* match is larger than current */
|
730
763
|
commonLengthLarger = matchLength;
|
731
764
|
dictMatchIndex = nextPtr[0];
|
732
|
-
|
733
|
-
}
|
734
|
-
}
|
765
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
735
766
|
|
736
767
|
assert(matchEndIdx > curr+8);
|
737
768
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
738
769
|
return mnum;
|
739
770
|
}
|
740
771
|
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
772
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
773
|
+
ZSTD_match_t*,
|
774
|
+
ZSTD_matchState_t*,
|
775
|
+
U32*,
|
776
|
+
const BYTE*,
|
777
|
+
const BYTE*,
|
778
|
+
const U32 rep[ZSTD_REP_NUM],
|
779
|
+
U32 const ll0,
|
780
|
+
U32 const lengthToBeat);
|
781
|
+
|
782
|
+
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
|
783
|
+
ZSTD_match_t* matches,
|
784
|
+
ZSTD_matchState_t* ms,
|
785
|
+
U32* nextToUpdate3,
|
786
|
+
const BYTE* ip,
|
787
|
+
const BYTE* const iHighLimit,
|
788
|
+
const U32 rep[ZSTD_REP_NUM],
|
789
|
+
U32 const ll0,
|
790
|
+
U32 const lengthToBeat,
|
791
|
+
const ZSTD_dictMode_e dictMode,
|
792
|
+
const U32 mls)
|
750
793
|
{
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
ZSTD_updateTree_internal(ms, ip, iHighLimit,
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
794
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
795
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
796
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
797
|
+
return 0; /* skipped area */
|
798
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
799
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
800
|
+
}
|
801
|
+
|
802
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
803
|
+
|
804
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
805
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
806
|
+
ZSTD_match_t* matches, \
|
807
|
+
ZSTD_matchState_t* ms, \
|
808
|
+
U32* nextToUpdate3, \
|
809
|
+
const BYTE* ip, \
|
810
|
+
const BYTE* const iHighLimit, \
|
811
|
+
const U32 rep[ZSTD_REP_NUM], \
|
812
|
+
U32 const ll0, \
|
813
|
+
U32 const lengthToBeat) \
|
814
|
+
{ \
|
815
|
+
return ZSTD_btGetAllMatches_internal( \
|
816
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
817
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
818
|
+
}
|
819
|
+
|
820
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
821
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
822
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
823
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
824
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
825
|
+
|
826
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
827
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
828
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
829
|
+
|
830
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
831
|
+
{ \
|
832
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
833
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
834
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
835
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
764
836
|
}
|
837
|
+
|
838
|
+
static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
839
|
+
{
|
840
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
841
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
842
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
843
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
844
|
+
};
|
845
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
846
|
+
assert((U32)dictMode < 3);
|
847
|
+
assert(mls - 3 < 4);
|
848
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
765
849
|
}
|
766
850
|
|
767
851
|
/*************************
|
@@ -893,17 +977,17 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
|
893
977
|
*/
|
894
978
|
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
895
979
|
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
896
|
-
}
|
980
|
+
}
|
897
981
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
898
982
|
}
|
899
983
|
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
900
984
|
}
|
901
985
|
|
986
|
+
|
902
987
|
/*-*******************************
|
903
988
|
* Optimal parser
|
904
989
|
*********************************/
|
905
990
|
|
906
|
-
|
907
991
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
908
992
|
{
|
909
993
|
return sol.litlen + sol.mlen;
|
@@ -944,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
944
1028
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
945
1029
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
946
1030
|
|
1031
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
1032
|
+
|
947
1033
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
948
1034
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
949
1035
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
@@ -971,7 +1057,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
971
1057
|
/* find first match */
|
972
1058
|
{ U32 const litlen = (U32)(ip - anchor);
|
973
1059
|
U32 const ll0 = !litlen;
|
974
|
-
U32 nbMatches =
|
1060
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
975
1061
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
976
1062
|
(U32)(ip-istart), (U32)(iend - ip));
|
977
1063
|
if (!nbMatches) { ip++; continue; }
|
@@ -985,7 +1071,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
985
1071
|
* in every price. We include the literal length to avoid negative
|
986
1072
|
* prices when we subtract the previous literal length.
|
987
1073
|
*/
|
988
|
-
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
1074
|
+
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
989
1075
|
|
990
1076
|
/* large match -> immediate encoding */
|
991
1077
|
{ U32 const maxML = matches[nbMatches-1].len;
|
@@ -1005,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1005
1091
|
} }
|
1006
1092
|
|
1007
1093
|
/* set prices for first matches starting position == 0 */
|
1008
|
-
|
1094
|
+
assert(opt[0].price >= 0);
|
1095
|
+
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
1009
1096
|
U32 pos;
|
1010
1097
|
U32 matchNb;
|
1011
1098
|
for (pos = 1; pos < minMatch; pos++) {
|
@@ -1022,7 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1022
1109
|
opt[pos].mlen = pos;
|
1023
1110
|
opt[pos].off = offset;
|
1024
1111
|
opt[pos].litlen = litlen;
|
1025
|
-
opt[pos].price = sequencePrice;
|
1112
|
+
opt[pos].price = (int)sequencePrice;
|
1026
1113
|
} }
|
1027
1114
|
last_pos = pos-1;
|
1028
1115
|
}
|
@@ -1037,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1037
1124
|
/* Fix current position with one literal if cheaper */
|
1038
1125
|
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
1039
1126
|
int const price = opt[cur-1].price
|
1040
|
-
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
1041
|
-
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
1042
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1127
|
+
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
1128
|
+
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
1129
|
+
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1043
1130
|
assert(price < 1000000000); /* overflow check */
|
1044
1131
|
if (price <= opt[cur].price) {
|
1045
1132
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
@@ -1082,11 +1169,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1082
1169
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
1083
1170
|
}
|
1084
1171
|
|
1172
|
+
assert(opt[cur].price >= 0);
|
1085
1173
|
{ U32 const ll0 = (opt[cur].mlen != 0);
|
1086
1174
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
1087
|
-
U32 const previousPrice = opt[cur].price;
|
1175
|
+
U32 const previousPrice = (U32)opt[cur].price;
|
1088
1176
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
1089
|
-
U32 nbMatches =
|
1177
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
1090
1178
|
U32 matchNb;
|
1091
1179
|
|
1092
1180
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
@@ -1124,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1124
1212
|
|
1125
1213
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
1126
1214
|
U32 const pos = cur + mlen;
|
1127
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1215
|
+
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1128
1216
|
|
1129
1217
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
1130
1218
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
@@ -1210,38 +1298,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1210
1298
|
return (size_t)(iend - anchor);
|
1211
1299
|
}
|
1212
1300
|
|
1301
|
+
static size_t ZSTD_compressBlock_opt0(
|
1302
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1303
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1304
|
+
{
|
1305
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
1306
|
+
}
|
1307
|
+
|
1308
|
+
static size_t ZSTD_compressBlock_opt2(
|
1309
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1310
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1311
|
+
{
|
1312
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
1313
|
+
}
|
1213
1314
|
|
1214
1315
|
size_t ZSTD_compressBlock_btopt(
|
1215
1316
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1216
1317
|
const void* src, size_t srcSize)
|
1217
1318
|
{
|
1218
1319
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
1219
|
-
return
|
1320
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1220
1321
|
}
|
1221
1322
|
|
1222
1323
|
|
1223
|
-
/* used in 2-pass strategy */
|
1224
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
1225
|
-
{
|
1226
|
-
U32 s, sum=0;
|
1227
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
1228
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
1229
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
1230
|
-
table[s]--;
|
1231
|
-
sum += table[s];
|
1232
|
-
}
|
1233
|
-
return sum;
|
1234
|
-
}
|
1235
1324
|
|
1236
|
-
/* used in 2-pass strategy */
|
1237
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
1238
|
-
{
|
1239
|
-
if (ZSTD_compressedLiterals(optPtr))
|
1240
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
1241
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
1242
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
1243
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
1244
|
-
}
|
1245
1325
|
|
1246
1326
|
/* ZSTD_initStats_ultra():
|
1247
1327
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
@@ -1263,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1263
1343
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
1264
1344
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
1265
1345
|
|
1266
|
-
|
1346
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
1267
1347
|
|
1268
1348
|
/* invalidate first scan from history */
|
1269
1349
|
ZSTD_resetSeqStore(seqStore);
|
@@ -1272,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1272
1352
|
ms->window.lowLimit = ms->window.dictLimit;
|
1273
1353
|
ms->nextToUpdate = ms->window.dictLimit;
|
1274
1354
|
|
1275
|
-
/* re-inforce weight of collected statistics */
|
1276
|
-
ZSTD_upscaleStats(&ms->opt);
|
1277
1355
|
}
|
1278
1356
|
|
1279
1357
|
size_t ZSTD_compressBlock_btultra(
|
@@ -1281,7 +1359,7 @@ size_t ZSTD_compressBlock_btultra(
|
|
1281
1359
|
const void* src, size_t srcSize)
|
1282
1360
|
{
|
1283
1361
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
1284
|
-
return
|
1362
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1285
1363
|
}
|
1286
1364
|
|
1287
1365
|
size_t ZSTD_compressBlock_btultra2(
|
@@ -1309,35 +1387,35 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1309
1387
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
1310
1388
|
}
|
1311
1389
|
|
1312
|
-
return
|
1390
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1313
1391
|
}
|
1314
1392
|
|
1315
1393
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1316
1394
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1317
1395
|
const void* src, size_t srcSize)
|
1318
1396
|
{
|
1319
|
-
return
|
1397
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1320
1398
|
}
|
1321
1399
|
|
1322
1400
|
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1323
1401
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1324
1402
|
const void* src, size_t srcSize)
|
1325
1403
|
{
|
1326
|
-
return
|
1404
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1327
1405
|
}
|
1328
1406
|
|
1329
1407
|
size_t ZSTD_compressBlock_btopt_extDict(
|
1330
1408
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1331
1409
|
const void* src, size_t srcSize)
|
1332
1410
|
{
|
1333
|
-
return
|
1411
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1334
1412
|
}
|
1335
1413
|
|
1336
1414
|
size_t ZSTD_compressBlock_btultra_extDict(
|
1337
1415
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1338
1416
|
const void* src, size_t srcSize)
|
1339
1417
|
{
|
1340
|
-
return
|
1418
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1341
1419
|
}
|
1342
1420
|
|
1343
1421
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|