extzstd 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
|
@@ -16,11 +16,20 @@ extern "C" {
|
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
18
|
#include "mem.h" /* U32 */
|
|
19
|
-
#include "
|
|
19
|
+
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
|
20
|
+
|
|
21
|
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
22
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
|
23
|
+
size_t ZSTD_compressBlock_doubleFast(
|
|
24
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
25
|
+
void const* src, size_t srcSize);
|
|
26
|
+
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
|
27
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
28
|
+
void const* src, size_t srcSize);
|
|
29
|
+
size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
30
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
31
|
+
void const* src, size_t srcSize);
|
|
20
32
|
|
|
21
|
-
void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls);
|
|
22
|
-
size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
|
23
|
-
size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
|
24
33
|
|
|
25
34
|
#if defined (__cplusplus)
|
|
26
35
|
}
|
|
@@ -12,74 +12,281 @@
|
|
|
12
12
|
#include "zstd_fast.h"
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
void ZSTD_fillHashTable
|
|
15
|
+
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
16
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
16
17
|
{
|
|
17
|
-
|
|
18
|
-
U32
|
|
19
|
-
const
|
|
20
|
-
const
|
|
18
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
19
|
+
U32* const hashTable = ms->hashTable;
|
|
20
|
+
U32 const hBits = cParams->hashLog;
|
|
21
|
+
U32 const mls = cParams->minMatch;
|
|
22
|
+
const BYTE* const base = ms->window.base;
|
|
23
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
|
21
24
|
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
22
|
-
const
|
|
25
|
+
const U32 fastHashFillStep = 3;
|
|
23
26
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
27
|
+
/* Always insert every fastHashFillStep position into the hash table.
|
|
28
|
+
* Insert the other positions if their hash entry is empty.
|
|
29
|
+
*/
|
|
30
|
+
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
|
31
|
+
U32 const current = (U32)(ip - base);
|
|
32
|
+
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
|
33
|
+
hashTable[hash0] = current;
|
|
34
|
+
if (dtlm == ZSTD_dtlm_fast) continue;
|
|
35
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
|
36
|
+
{ U32 p;
|
|
37
|
+
for (p = 1; p < fastHashFillStep; ++p) {
|
|
38
|
+
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
|
39
|
+
if (hashTable[hash] == 0) { /* not yet filled */
|
|
40
|
+
hashTable[hash] = current + p;
|
|
41
|
+
} } } }
|
|
28
42
|
}
|
|
29
43
|
|
|
30
|
-
|
|
31
44
|
FORCE_INLINE_TEMPLATE
|
|
32
|
-
size_t ZSTD_compressBlock_fast_generic(
|
|
33
|
-
|
|
34
|
-
|
|
45
|
+
size_t ZSTD_compressBlock_fast_generic(
|
|
46
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
47
|
+
void const* src, size_t srcSize,
|
|
48
|
+
U32 const mls)
|
|
35
49
|
{
|
|
36
|
-
|
|
37
|
-
U32
|
|
38
|
-
|
|
39
|
-
|
|
50
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
51
|
+
U32* const hashTable = ms->hashTable;
|
|
52
|
+
U32 const hlog = cParams->hashLog;
|
|
53
|
+
/* support stepSize of 0 */
|
|
54
|
+
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
|
55
|
+
const BYTE* const base = ms->window.base;
|
|
40
56
|
const BYTE* const istart = (const BYTE*)src;
|
|
41
|
-
|
|
57
|
+
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
|
58
|
+
const BYTE* ip0 = istart;
|
|
59
|
+
const BYTE* ip1;
|
|
42
60
|
const BYTE* anchor = istart;
|
|
43
|
-
const U32
|
|
44
|
-
const BYTE* const
|
|
61
|
+
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
62
|
+
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
45
63
|
const BYTE* const iend = istart + srcSize;
|
|
46
64
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
47
|
-
U32 offset_1=
|
|
65
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
48
66
|
U32 offsetSaved = 0;
|
|
49
67
|
|
|
50
68
|
/* init */
|
|
51
|
-
|
|
52
|
-
|
|
69
|
+
ip0 += (ip0 == prefixStart);
|
|
70
|
+
ip1 = ip0 + 1;
|
|
71
|
+
{
|
|
72
|
+
U32 const maxRep = (U32)(ip0 - prefixStart);
|
|
53
73
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
|
54
74
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
|
55
75
|
}
|
|
56
76
|
|
|
77
|
+
/* Main Search Loop */
|
|
78
|
+
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
|
79
|
+
size_t mLength;
|
|
80
|
+
BYTE const* ip2 = ip0 + 2;
|
|
81
|
+
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
|
|
82
|
+
U32 const val0 = MEM_read32(ip0);
|
|
83
|
+
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
|
|
84
|
+
U32 const val1 = MEM_read32(ip1);
|
|
85
|
+
U32 const current0 = (U32)(ip0-base);
|
|
86
|
+
U32 const current1 = (U32)(ip1-base);
|
|
87
|
+
U32 const matchIndex0 = hashTable[h0];
|
|
88
|
+
U32 const matchIndex1 = hashTable[h1];
|
|
89
|
+
BYTE const* repMatch = ip2-offset_1;
|
|
90
|
+
const BYTE* match0 = base + matchIndex0;
|
|
91
|
+
const BYTE* match1 = base + matchIndex1;
|
|
92
|
+
U32 offcode;
|
|
93
|
+
hashTable[h0] = current0; /* update hash table */
|
|
94
|
+
hashTable[h1] = current1; /* update hash table */
|
|
95
|
+
|
|
96
|
+
assert(ip0 + 1 == ip1);
|
|
97
|
+
|
|
98
|
+
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
|
99
|
+
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
|
100
|
+
ip0 = ip2 - mLength;
|
|
101
|
+
match0 = repMatch - mLength;
|
|
102
|
+
offcode = 0;
|
|
103
|
+
goto _match;
|
|
104
|
+
}
|
|
105
|
+
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
|
|
106
|
+
/* found a regular match */
|
|
107
|
+
goto _offset;
|
|
108
|
+
}
|
|
109
|
+
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
|
|
110
|
+
/* found a regular match after one literal */
|
|
111
|
+
ip0 = ip1;
|
|
112
|
+
match0 = match1;
|
|
113
|
+
goto _offset;
|
|
114
|
+
}
|
|
115
|
+
{
|
|
116
|
+
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
|
117
|
+
assert(step >= 2);
|
|
118
|
+
ip0 += step;
|
|
119
|
+
ip1 += step;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
_offset: /* Requires: ip0, match0 */
|
|
123
|
+
/* Compute the offset code */
|
|
124
|
+
offset_2 = offset_1;
|
|
125
|
+
offset_1 = (U32)(ip0-match0);
|
|
126
|
+
offcode = offset_1 + ZSTD_REP_MOVE;
|
|
127
|
+
mLength = 0;
|
|
128
|
+
/* Count the backwards match length */
|
|
129
|
+
while (((ip0>anchor) & (match0>prefixStart))
|
|
130
|
+
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
|
131
|
+
|
|
132
|
+
_match: /* Requires: ip0, match0, offcode */
|
|
133
|
+
/* Count the forward length */
|
|
134
|
+
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
|
|
135
|
+
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
|
|
136
|
+
/* match found */
|
|
137
|
+
ip0 += mLength;
|
|
138
|
+
anchor = ip0;
|
|
139
|
+
ip1 = ip0 + 1;
|
|
140
|
+
|
|
141
|
+
if (ip0 <= ilimit) {
|
|
142
|
+
/* Fill Table */
|
|
143
|
+
assert(base+current0+2 > istart); /* check base overflow */
|
|
144
|
+
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
|
145
|
+
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
|
146
|
+
|
|
147
|
+
while ( (ip0 <= ilimit)
|
|
148
|
+
&& ( (offset_2>0)
|
|
149
|
+
& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
|
|
150
|
+
/* store sequence */
|
|
151
|
+
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
|
152
|
+
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
153
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
|
154
|
+
ip0 += rLength;
|
|
155
|
+
ip1 = ip0 + 1;
|
|
156
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
|
157
|
+
anchor = ip0;
|
|
158
|
+
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/* save reps for next block */
|
|
164
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
|
165
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
166
|
+
|
|
167
|
+
/* Return the last literals size */
|
|
168
|
+
return iend - anchor;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
size_t ZSTD_compressBlock_fast(
|
|
173
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
174
|
+
void const* src, size_t srcSize)
|
|
175
|
+
{
|
|
176
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
177
|
+
U32 const mls = cParams->minMatch;
|
|
178
|
+
assert(ms->dictMatchState == NULL);
|
|
179
|
+
switch(mls)
|
|
180
|
+
{
|
|
181
|
+
default: /* includes case 3 */
|
|
182
|
+
case 4 :
|
|
183
|
+
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
|
|
184
|
+
case 5 :
|
|
185
|
+
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
|
|
186
|
+
case 6 :
|
|
187
|
+
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
|
|
188
|
+
case 7 :
|
|
189
|
+
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
FORCE_INLINE_TEMPLATE
|
|
194
|
+
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
195
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
196
|
+
void const* src, size_t srcSize, U32 const mls)
|
|
197
|
+
{
|
|
198
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
199
|
+
U32* const hashTable = ms->hashTable;
|
|
200
|
+
U32 const hlog = cParams->hashLog;
|
|
201
|
+
/* support stepSize of 0 */
|
|
202
|
+
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
|
|
203
|
+
const BYTE* const base = ms->window.base;
|
|
204
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
205
|
+
const BYTE* ip = istart;
|
|
206
|
+
const BYTE* anchor = istart;
|
|
207
|
+
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
208
|
+
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
209
|
+
const BYTE* const iend = istart + srcSize;
|
|
210
|
+
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
211
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
212
|
+
U32 offsetSaved = 0;
|
|
213
|
+
|
|
214
|
+
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
|
215
|
+
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
|
|
216
|
+
const U32* const dictHashTable = dms->hashTable;
|
|
217
|
+
const U32 dictStartIndex = dms->window.dictLimit;
|
|
218
|
+
const BYTE* const dictBase = dms->window.base;
|
|
219
|
+
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
220
|
+
const BYTE* const dictEnd = dms->window.nextSrc;
|
|
221
|
+
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
|
|
222
|
+
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
|
223
|
+
const U32 dictHLog = dictCParams->hashLog;
|
|
224
|
+
|
|
225
|
+
/* otherwise, we would get index underflow when translating a dict index
|
|
226
|
+
* into a local index */
|
|
227
|
+
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
|
228
|
+
|
|
229
|
+
/* init */
|
|
230
|
+
ip += (dictAndPrefixLength == 0);
|
|
231
|
+
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
232
|
+
* disabling. */
|
|
233
|
+
assert(offset_1 <= dictAndPrefixLength);
|
|
234
|
+
assert(offset_2 <= dictAndPrefixLength);
|
|
235
|
+
|
|
57
236
|
/* Main Search Loop */
|
|
58
237
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
59
238
|
size_t mLength;
|
|
60
|
-
size_t const h = ZSTD_hashPtr(ip,
|
|
239
|
+
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
|
|
61
240
|
U32 const current = (U32)(ip-base);
|
|
62
241
|
U32 const matchIndex = hashTable[h];
|
|
63
242
|
const BYTE* match = base + matchIndex;
|
|
243
|
+
const U32 repIndex = current + 1 - offset_1;
|
|
244
|
+
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
|
245
|
+
dictBase + (repIndex - dictIndexDelta) :
|
|
246
|
+
base + repIndex;
|
|
64
247
|
hashTable[h] = current; /* update hash table */
|
|
65
248
|
|
|
66
|
-
if ((
|
|
67
|
-
|
|
249
|
+
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
|
250
|
+
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
251
|
+
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
252
|
+
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
68
253
|
ip++;
|
|
69
|
-
ZSTD_storeSeq(
|
|
70
|
-
} else {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
254
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
255
|
+
} else if ( (matchIndex <= prefixStartIndex) ) {
|
|
256
|
+
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
|
257
|
+
U32 const dictMatchIndex = dictHashTable[dictHash];
|
|
258
|
+
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
|
259
|
+
if (dictMatchIndex <= dictStartIndex ||
|
|
260
|
+
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
|
261
|
+
assert(stepSize >= 1);
|
|
262
|
+
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
|
74
263
|
continue;
|
|
264
|
+
} else {
|
|
265
|
+
/* found a dict match */
|
|
266
|
+
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
|
267
|
+
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
|
268
|
+
while (((ip>anchor) & (dictMatch>dictStart))
|
|
269
|
+
&& (ip[-1] == dictMatch[-1])) {
|
|
270
|
+
ip--; dictMatch--; mLength++;
|
|
271
|
+
} /* catch up */
|
|
272
|
+
offset_2 = offset_1;
|
|
273
|
+
offset_1 = offset;
|
|
274
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
75
275
|
}
|
|
276
|
+
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
|
277
|
+
/* it's not a match, and we're not going to check the dictionary */
|
|
278
|
+
assert(stepSize >= 1);
|
|
279
|
+
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
|
280
|
+
continue;
|
|
281
|
+
} else {
|
|
282
|
+
/* found a regular match */
|
|
283
|
+
U32 const offset = (U32)(ip-match);
|
|
76
284
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
|
77
|
-
|
|
78
|
-
|
|
285
|
+
while (((ip>anchor) & (match>prefixStart))
|
|
286
|
+
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
79
287
|
offset_2 = offset_1;
|
|
80
288
|
offset_1 = offset;
|
|
81
|
-
|
|
82
|
-
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
289
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
83
290
|
}
|
|
84
291
|
|
|
85
292
|
/* match found */
|
|
@@ -88,105 +295,122 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
|
|
88
295
|
|
|
89
296
|
if (ip <= ilimit) {
|
|
90
297
|
/* Fill Table */
|
|
91
|
-
|
|
92
|
-
hashTable[ZSTD_hashPtr(
|
|
298
|
+
assert(base+current+2 > istart); /* check base overflow */
|
|
299
|
+
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
|
300
|
+
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
|
301
|
+
|
|
93
302
|
/* check immediate repcode */
|
|
94
|
-
while (
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
303
|
+
while (ip <= ilimit) {
|
|
304
|
+
U32 const current2 = (U32)(ip-base);
|
|
305
|
+
U32 const repIndex2 = current2 - offset_2;
|
|
306
|
+
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
|
307
|
+
dictBase - dictIndexDelta + repIndex2 :
|
|
308
|
+
base + repIndex2;
|
|
309
|
+
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
|
310
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
311
|
+
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
312
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
313
|
+
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
314
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
315
|
+
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
|
316
|
+
ip += repLength2;
|
|
317
|
+
anchor = ip;
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
106
324
|
|
|
107
325
|
/* save reps for next block */
|
|
108
|
-
|
|
109
|
-
|
|
326
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
|
327
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
110
328
|
|
|
111
329
|
/* Return the last literals size */
|
|
112
330
|
return iend - anchor;
|
|
113
331
|
}
|
|
114
332
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
333
|
+
size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
334
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
335
|
+
void const* src, size_t srcSize)
|
|
118
336
|
{
|
|
119
|
-
const
|
|
337
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
338
|
+
U32 const mls = cParams->minMatch;
|
|
339
|
+
assert(ms->dictMatchState != NULL);
|
|
120
340
|
switch(mls)
|
|
121
341
|
{
|
|
122
342
|
default: /* includes case 3 */
|
|
123
343
|
case 4 :
|
|
124
|
-
return
|
|
344
|
+
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
|
|
125
345
|
case 5 :
|
|
126
|
-
return
|
|
346
|
+
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
|
|
127
347
|
case 6 :
|
|
128
|
-
return
|
|
348
|
+
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
|
|
129
349
|
case 7 :
|
|
130
|
-
return
|
|
350
|
+
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
|
|
131
351
|
}
|
|
132
352
|
}
|
|
133
353
|
|
|
134
354
|
|
|
135
|
-
static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
136
|
-
|
|
137
|
-
|
|
355
|
+
static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
356
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
357
|
+
void const* src, size_t srcSize, U32 const mls)
|
|
138
358
|
{
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
359
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
360
|
+
U32* const hashTable = ms->hashTable;
|
|
361
|
+
U32 const hlog = cParams->hashLog;
|
|
362
|
+
/* support stepSize of 0 */
|
|
363
|
+
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
|
|
364
|
+
const BYTE* const base = ms->window.base;
|
|
365
|
+
const BYTE* const dictBase = ms->window.dictBase;
|
|
144
366
|
const BYTE* const istart = (const BYTE*)src;
|
|
145
367
|
const BYTE* ip = istart;
|
|
146
368
|
const BYTE* anchor = istart;
|
|
147
|
-
const U32
|
|
148
|
-
const BYTE* const dictStart = dictBase +
|
|
149
|
-
const U32
|
|
150
|
-
const BYTE* const
|
|
151
|
-
const BYTE* const dictEnd = dictBase +
|
|
369
|
+
const U32 dictStartIndex = ms->window.lowLimit;
|
|
370
|
+
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
371
|
+
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
372
|
+
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
373
|
+
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
|
152
374
|
const BYTE* const iend = istart + srcSize;
|
|
153
375
|
const BYTE* const ilimit = iend - 8;
|
|
154
|
-
U32 offset_1=
|
|
376
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
155
377
|
|
|
156
378
|
/* Search Loop */
|
|
157
379
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
158
|
-
const size_t h = ZSTD_hashPtr(ip,
|
|
159
|
-
const U32
|
|
160
|
-
const BYTE* matchBase = matchIndex <
|
|
161
|
-
const BYTE*
|
|
162
|
-
const U32
|
|
163
|
-
const U32
|
|
164
|
-
const BYTE* repBase = repIndex <
|
|
165
|
-
const BYTE* repMatch = repBase + repIndex;
|
|
380
|
+
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
|
|
381
|
+
const U32 matchIndex = hashTable[h];
|
|
382
|
+
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
|
383
|
+
const BYTE* match = matchBase + matchIndex;
|
|
384
|
+
const U32 current = (U32)(ip-base);
|
|
385
|
+
const U32 repIndex = current + 1 - offset_1;
|
|
386
|
+
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
387
|
+
const BYTE* const repMatch = repBase + repIndex;
|
|
166
388
|
size_t mLength;
|
|
167
389
|
hashTable[h] = current; /* update hash table */
|
|
390
|
+
assert(offset_1 <= current +1); /* check repIndex */
|
|
168
391
|
|
|
169
|
-
if ( (((U32)((
|
|
392
|
+
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
|
|
170
393
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
171
|
-
const BYTE* repMatchEnd = repIndex <
|
|
172
|
-
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd,
|
|
394
|
+
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
395
|
+
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
173
396
|
ip++;
|
|
174
|
-
ZSTD_storeSeq(
|
|
397
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
175
398
|
} else {
|
|
176
|
-
if ( (matchIndex <
|
|
399
|
+
if ( (matchIndex < dictStartIndex) ||
|
|
177
400
|
(MEM_read32(match) != MEM_read32(ip)) ) {
|
|
178
|
-
|
|
401
|
+
assert(stepSize >= 1);
|
|
402
|
+
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
|
179
403
|
continue;
|
|
180
404
|
}
|
|
181
|
-
{ const BYTE* matchEnd = matchIndex <
|
|
182
|
-
const BYTE* lowMatchPtr = matchIndex <
|
|
405
|
+
{ const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
406
|
+
const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
183
407
|
U32 offset;
|
|
184
|
-
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd,
|
|
408
|
+
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
185
409
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
186
410
|
offset = current - matchIndex;
|
|
187
411
|
offset_2 = offset_1;
|
|
188
412
|
offset_1 = offset;
|
|
189
|
-
ZSTD_storeSeq(
|
|
413
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
190
414
|
} }
|
|
191
415
|
|
|
192
416
|
/* found a match : store it */
|
|
@@ -195,20 +419,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
|
|
|
195
419
|
|
|
196
420
|
if (ip <= ilimit) {
|
|
197
421
|
/* Fill Table */
|
|
198
|
-
hashTable[ZSTD_hashPtr(base+current+2,
|
|
199
|
-
hashTable[ZSTD_hashPtr(ip-2,
|
|
422
|
+
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
|
|
423
|
+
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
|
200
424
|
/* check immediate repcode */
|
|
201
425
|
while (ip <= ilimit) {
|
|
202
426
|
U32 const current2 = (U32)(ip-base);
|
|
203
427
|
U32 const repIndex2 = current2 - offset_2;
|
|
204
|
-
const BYTE* repMatch2 = repIndex2 <
|
|
205
|
-
if ( (((U32)((
|
|
428
|
+
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
429
|
+
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
|
|
206
430
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
207
|
-
const BYTE* const repEnd2 = repIndex2 <
|
|
208
|
-
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2,
|
|
431
|
+
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
432
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
209
433
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
210
|
-
ZSTD_storeSeq(
|
|
211
|
-
hashTable[ZSTD_hashPtr(ip,
|
|
434
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
435
|
+
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
|
212
436
|
ip += repLength2;
|
|
213
437
|
anchor = ip;
|
|
214
438
|
continue;
|
|
@@ -217,27 +441,30 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
|
|
|
217
441
|
} } }
|
|
218
442
|
|
|
219
443
|
/* save reps for next block */
|
|
220
|
-
|
|
444
|
+
rep[0] = offset_1;
|
|
445
|
+
rep[1] = offset_2;
|
|
221
446
|
|
|
222
447
|
/* Return the last literals size */
|
|
223
448
|
return iend - anchor;
|
|
224
449
|
}
|
|
225
450
|
|
|
226
451
|
|
|
227
|
-
size_t ZSTD_compressBlock_fast_extDict(
|
|
228
|
-
|
|
452
|
+
size_t ZSTD_compressBlock_fast_extDict(
|
|
453
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
454
|
+
void const* src, size_t srcSize)
|
|
229
455
|
{
|
|
230
|
-
|
|
456
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
457
|
+
U32 const mls = cParams->minMatch;
|
|
231
458
|
switch(mls)
|
|
232
459
|
{
|
|
233
460
|
default: /* includes case 3 */
|
|
234
461
|
case 4 :
|
|
235
|
-
return ZSTD_compressBlock_fast_extDict_generic(
|
|
462
|
+
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
|
236
463
|
case 5 :
|
|
237
|
-
return ZSTD_compressBlock_fast_extDict_generic(
|
|
464
|
+
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
|
238
465
|
case 6 :
|
|
239
|
-
return ZSTD_compressBlock_fast_extDict_generic(
|
|
466
|
+
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
|
240
467
|
case 7 :
|
|
241
|
-
return ZSTD_compressBlock_fast_extDict_generic(
|
|
468
|
+
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
|
242
469
|
}
|
|
243
470
|
}
|