extzstd 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
|
@@ -12,103 +12,239 @@
|
|
|
12
12
|
#include "zstd_double_fast.h"
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
void ZSTD_fillDoubleHashTable(
|
|
15
|
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
16
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
16
17
|
{
|
|
17
|
-
|
|
18
|
-
U32
|
|
19
|
-
U32
|
|
20
|
-
U32 const
|
|
21
|
-
|
|
22
|
-
const
|
|
18
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
19
|
+
U32* const hashLarge = ms->hashTable;
|
|
20
|
+
U32 const hBitsL = cParams->hashLog;
|
|
21
|
+
U32 const mls = cParams->minMatch;
|
|
22
|
+
U32* const hashSmall = ms->chainTable;
|
|
23
|
+
U32 const hBitsS = cParams->chainLog;
|
|
24
|
+
const BYTE* const base = ms->window.base;
|
|
25
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
|
23
26
|
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
const U32 fastHashFillStep = 3;
|
|
28
|
+
|
|
29
|
+
/* Always insert every fastHashFillStep position into the hash tables.
|
|
30
|
+
* Insert the other positions into the large hash table if their entry
|
|
31
|
+
* is empty.
|
|
32
|
+
*/
|
|
33
|
+
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
34
|
+
U32 const current = (U32)(ip - base);
|
|
35
|
+
U32 i;
|
|
36
|
+
for (i = 0; i < fastHashFillStep; ++i) {
|
|
37
|
+
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
38
|
+
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
39
|
+
if (i == 0)
|
|
40
|
+
hashSmall[smHash] = current + i;
|
|
41
|
+
if (i == 0 || hashLarge[lgHash] == 0)
|
|
42
|
+
hashLarge[lgHash] = current + i;
|
|
43
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
|
44
|
+
if (dtlm == ZSTD_dtlm_fast)
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
30
47
|
}
|
|
31
48
|
}
|
|
32
49
|
|
|
33
50
|
|
|
34
51
|
FORCE_INLINE_TEMPLATE
|
|
35
|
-
size_t ZSTD_compressBlock_doubleFast_generic(
|
|
36
|
-
|
|
37
|
-
|
|
52
|
+
size_t ZSTD_compressBlock_doubleFast_generic(
|
|
53
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
54
|
+
void const* src, size_t srcSize,
|
|
55
|
+
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
|
|
38
56
|
{
|
|
39
|
-
|
|
40
|
-
const
|
|
41
|
-
U32
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
const BYTE* const base =
|
|
57
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
58
|
+
U32* const hashLong = ms->hashTable;
|
|
59
|
+
const U32 hBitsL = cParams->hashLog;
|
|
60
|
+
U32* const hashSmall = ms->chainTable;
|
|
61
|
+
const U32 hBitsS = cParams->chainLog;
|
|
62
|
+
const BYTE* const base = ms->window.base;
|
|
45
63
|
const BYTE* const istart = (const BYTE*)src;
|
|
46
64
|
const BYTE* ip = istart;
|
|
47
65
|
const BYTE* anchor = istart;
|
|
48
|
-
const U32
|
|
49
|
-
const BYTE* const
|
|
66
|
+
const U32 prefixLowestIndex = ms->window.dictLimit;
|
|
67
|
+
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
50
68
|
const BYTE* const iend = istart + srcSize;
|
|
51
69
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
52
|
-
U32 offset_1=
|
|
70
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
53
71
|
U32 offsetSaved = 0;
|
|
54
72
|
|
|
73
|
+
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
|
74
|
+
const ZSTD_compressionParameters* const dictCParams =
|
|
75
|
+
dictMode == ZSTD_dictMatchState ?
|
|
76
|
+
&dms->cParams : NULL;
|
|
77
|
+
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
|
|
78
|
+
dms->hashTable : NULL;
|
|
79
|
+
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
|
|
80
|
+
dms->chainTable : NULL;
|
|
81
|
+
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
|
82
|
+
dms->window.dictLimit : 0;
|
|
83
|
+
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
|
84
|
+
dms->window.base : NULL;
|
|
85
|
+
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
|
86
|
+
dictBase + dictStartIndex : NULL;
|
|
87
|
+
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
|
88
|
+
dms->window.nextSrc : NULL;
|
|
89
|
+
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
|
90
|
+
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
|
91
|
+
0;
|
|
92
|
+
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
|
93
|
+
dictCParams->hashLog : hBitsL;
|
|
94
|
+
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
|
95
|
+
dictCParams->chainLog : hBitsS;
|
|
96
|
+
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
|
97
|
+
|
|
98
|
+
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
|
99
|
+
|
|
55
100
|
/* init */
|
|
56
|
-
ip += (
|
|
57
|
-
|
|
101
|
+
ip += (dictAndPrefixLength == 0);
|
|
102
|
+
if (dictMode == ZSTD_noDict) {
|
|
103
|
+
U32 const maxRep = (U32)(ip - prefixLowest);
|
|
58
104
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
|
59
105
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
|
60
106
|
}
|
|
107
|
+
if (dictMode == ZSTD_dictMatchState) {
|
|
108
|
+
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
109
|
+
* disabling. */
|
|
110
|
+
assert(offset_1 <= dictAndPrefixLength);
|
|
111
|
+
assert(offset_2 <= dictAndPrefixLength);
|
|
112
|
+
}
|
|
61
113
|
|
|
62
114
|
/* Main Search Loop */
|
|
63
115
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
64
116
|
size_t mLength;
|
|
117
|
+
U32 offset;
|
|
65
118
|
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
66
119
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
120
|
+
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
|
121
|
+
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
|
67
122
|
U32 const current = (U32)(ip-base);
|
|
68
123
|
U32 const matchIndexL = hashLong[h2];
|
|
69
|
-
U32
|
|
124
|
+
U32 matchIndexS = hashSmall[h];
|
|
70
125
|
const BYTE* matchLong = base + matchIndexL;
|
|
71
126
|
const BYTE* match = base + matchIndexS;
|
|
127
|
+
const U32 repIndex = current + 1 - offset_1;
|
|
128
|
+
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
|
129
|
+
&& repIndex < prefixLowestIndex) ?
|
|
130
|
+
dictBase + (repIndex - dictIndexDelta) :
|
|
131
|
+
base + repIndex;
|
|
72
132
|
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
|
|
73
133
|
|
|
74
|
-
|
|
75
|
-
if (
|
|
76
|
-
/*
|
|
134
|
+
/* check dictMatchState repcode */
|
|
135
|
+
if (dictMode == ZSTD_dictMatchState
|
|
136
|
+
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
|
137
|
+
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
138
|
+
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
139
|
+
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
140
|
+
ip++;
|
|
141
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
142
|
+
goto _match_stored;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/* check noDict repcode */
|
|
146
|
+
if ( dictMode == ZSTD_noDict
|
|
147
|
+
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
|
77
148
|
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
78
149
|
ip++;
|
|
79
|
-
ZSTD_storeSeq(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
150
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
151
|
+
goto _match_stored;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (matchIndexL > prefixLowestIndex) {
|
|
155
|
+
/* check prefix long match */
|
|
156
|
+
if (MEM_read64(matchLong) == MEM_read64(ip)) {
|
|
83
157
|
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
|
84
158
|
offset = (U32)(ip-matchLong);
|
|
85
|
-
while (((ip>anchor) & (matchLong>
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
159
|
+
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
160
|
+
goto _match_found;
|
|
161
|
+
}
|
|
162
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
|
163
|
+
/* check dictMatchState long match */
|
|
164
|
+
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
|
165
|
+
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
|
166
|
+
assert(dictMatchL < dictEnd);
|
|
167
|
+
|
|
168
|
+
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
|
169
|
+
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
|
170
|
+
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
|
|
171
|
+
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
|
172
|
+
goto _match_found;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (matchIndexS > prefixLowestIndex) {
|
|
177
|
+
/* check prefix short match */
|
|
178
|
+
if (MEM_read32(match) == MEM_read32(ip)) {
|
|
179
|
+
goto _search_next_long;
|
|
180
|
+
}
|
|
181
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
|
182
|
+
/* check dictMatchState short match */
|
|
183
|
+
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
|
184
|
+
match = dictBase + dictMatchIndexS;
|
|
185
|
+
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
|
186
|
+
|
|
187
|
+
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
|
188
|
+
goto _search_next_long;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
193
|
+
continue;
|
|
194
|
+
|
|
195
|
+
_search_next_long:
|
|
196
|
+
|
|
197
|
+
{
|
|
198
|
+
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
199
|
+
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
|
200
|
+
U32 const matchIndexL3 = hashLong[hl3];
|
|
201
|
+
const BYTE* matchL3 = base + matchIndexL3;
|
|
202
|
+
hashLong[hl3] = current + 1;
|
|
203
|
+
|
|
204
|
+
/* check prefix long +1 match */
|
|
205
|
+
if (matchIndexL3 > prefixLowestIndex) {
|
|
206
|
+
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
|
|
92
207
|
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
|
|
93
208
|
ip++;
|
|
94
209
|
offset = (U32)(ip-matchL3);
|
|
95
|
-
while (((ip>anchor) & (matchL3>
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
210
|
+
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
|
211
|
+
goto _match_found;
|
|
212
|
+
}
|
|
213
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
|
214
|
+
/* check dict long +1 match */
|
|
215
|
+
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
|
216
|
+
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
|
217
|
+
assert(dictMatchL3 < dictEnd);
|
|
218
|
+
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
|
219
|
+
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
|
220
|
+
ip++;
|
|
221
|
+
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
|
|
222
|
+
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
|
223
|
+
goto _match_found;
|
|
100
224
|
}
|
|
101
|
-
} else {
|
|
102
|
-
ip += ((ip-anchor) >> g_searchStrength) + 1;
|
|
103
|
-
continue;
|
|
104
225
|
}
|
|
226
|
+
}
|
|
105
227
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
228
|
+
/* if no long +1 match, explore the short match we found */
|
|
229
|
+
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
|
230
|
+
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
|
231
|
+
offset = (U32)(current - matchIndexS);
|
|
232
|
+
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
233
|
+
} else {
|
|
234
|
+
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
|
235
|
+
offset = (U32)(ip - match);
|
|
236
|
+
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
110
237
|
}
|
|
111
238
|
|
|
239
|
+
/* fall-through */
|
|
240
|
+
|
|
241
|
+
_match_found:
|
|
242
|
+
offset_2 = offset_1;
|
|
243
|
+
offset_1 = offset;
|
|
244
|
+
|
|
245
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
246
|
+
|
|
247
|
+
_match_stored:
|
|
112
248
|
/* match found */
|
|
113
249
|
ip += mLength;
|
|
114
250
|
anchor = ip;
|
|
@@ -121,134 +257,185 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
|
|
|
121
257
|
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
|
|
122
258
|
|
|
123
259
|
/* check immediate repcode */
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
260
|
+
if (dictMode == ZSTD_dictMatchState) {
|
|
261
|
+
while (ip <= ilimit) {
|
|
262
|
+
U32 const current2 = (U32)(ip-base);
|
|
263
|
+
U32 const repIndex2 = current2 - offset_2;
|
|
264
|
+
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
|
265
|
+
&& repIndex2 < prefixLowestIndex ?
|
|
266
|
+
dictBase - dictIndexDelta + repIndex2 :
|
|
267
|
+
base + repIndex2;
|
|
268
|
+
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
|
269
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
270
|
+
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
|
271
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
|
272
|
+
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
273
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
274
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
275
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
276
|
+
ip += repLength2;
|
|
277
|
+
anchor = ip;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
break;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (dictMode == ZSTD_noDict) {
|
|
285
|
+
while ( (ip <= ilimit)
|
|
286
|
+
&& ( (offset_2>0)
|
|
287
|
+
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
|
288
|
+
/* store sequence */
|
|
289
|
+
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
290
|
+
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
291
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
292
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
293
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
|
294
|
+
ip += rLength;
|
|
295
|
+
anchor = ip;
|
|
296
|
+
continue; /* faster when present ... (?) */
|
|
297
|
+
} } } }
|
|
137
298
|
|
|
138
299
|
/* save reps for next block */
|
|
139
|
-
|
|
140
|
-
|
|
300
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
|
301
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
141
302
|
|
|
142
303
|
/* Return the last literals size */
|
|
143
304
|
return iend - anchor;
|
|
144
305
|
}
|
|
145
306
|
|
|
146
307
|
|
|
147
|
-
size_t ZSTD_compressBlock_doubleFast(
|
|
308
|
+
size_t ZSTD_compressBlock_doubleFast(
|
|
309
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
310
|
+
void const* src, size_t srcSize)
|
|
148
311
|
{
|
|
149
|
-
const U32 mls =
|
|
312
|
+
const U32 mls = ms->cParams.minMatch;
|
|
150
313
|
switch(mls)
|
|
151
314
|
{
|
|
152
315
|
default: /* includes case 3 */
|
|
153
316
|
case 4 :
|
|
154
|
-
return ZSTD_compressBlock_doubleFast_generic(
|
|
317
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
|
155
318
|
case 5 :
|
|
156
|
-
return ZSTD_compressBlock_doubleFast_generic(
|
|
319
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
|
157
320
|
case 6 :
|
|
158
|
-
return ZSTD_compressBlock_doubleFast_generic(
|
|
321
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
|
159
322
|
case 7 :
|
|
160
|
-
return ZSTD_compressBlock_doubleFast_generic(
|
|
323
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
|
161
324
|
}
|
|
162
325
|
}
|
|
163
326
|
|
|
164
327
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
328
|
+
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
|
329
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
330
|
+
void const* src, size_t srcSize)
|
|
168
331
|
{
|
|
169
|
-
U32
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
332
|
+
const U32 mls = ms->cParams.minMatch;
|
|
333
|
+
switch(mls)
|
|
334
|
+
{
|
|
335
|
+
default: /* includes case 3 */
|
|
336
|
+
case 4 :
|
|
337
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
|
338
|
+
case 5 :
|
|
339
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
|
340
|
+
case 6 :
|
|
341
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
|
342
|
+
case 7 :
|
|
343
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
349
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
350
|
+
void const* src, size_t srcSize,
|
|
351
|
+
U32 const mls /* template */)
|
|
352
|
+
{
|
|
353
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
354
|
+
U32* const hashLong = ms->hashTable;
|
|
355
|
+
U32 const hBitsL = cParams->hashLog;
|
|
356
|
+
U32* const hashSmall = ms->chainTable;
|
|
357
|
+
U32 const hBitsS = cParams->chainLog;
|
|
176
358
|
const BYTE* const istart = (const BYTE*)src;
|
|
177
359
|
const BYTE* ip = istart;
|
|
178
360
|
const BYTE* anchor = istart;
|
|
179
|
-
const U32 lowestIndex = ctx->lowLimit;
|
|
180
|
-
const BYTE* const dictStart = dictBase + lowestIndex;
|
|
181
|
-
const U32 dictLimit = ctx->dictLimit;
|
|
182
|
-
const BYTE* const lowPrefixPtr = base + dictLimit;
|
|
183
|
-
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
184
361
|
const BYTE* const iend = istart + srcSize;
|
|
185
362
|
const BYTE* const ilimit = iend - 8;
|
|
186
|
-
U32
|
|
363
|
+
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
364
|
+
const BYTE* const base = ms->window.base;
|
|
365
|
+
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
366
|
+
const U32 dictStartIndex = ms->window.lowLimit;
|
|
367
|
+
const BYTE* const dictBase = ms->window.dictBase;
|
|
368
|
+
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
369
|
+
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
|
370
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
371
|
+
|
|
372
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
|
|
187
373
|
|
|
188
374
|
/* Search Loop */
|
|
189
375
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
190
376
|
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
191
377
|
const U32 matchIndex = hashSmall[hSmall];
|
|
192
|
-
const BYTE* matchBase = matchIndex <
|
|
378
|
+
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
|
193
379
|
const BYTE* match = matchBase + matchIndex;
|
|
194
380
|
|
|
195
381
|
const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
196
382
|
const U32 matchLongIndex = hashLong[hLong];
|
|
197
|
-
const BYTE* matchLongBase = matchLongIndex <
|
|
383
|
+
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
|
198
384
|
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
|
199
385
|
|
|
200
386
|
const U32 current = (U32)(ip-base);
|
|
201
387
|
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
|
|
202
|
-
const BYTE* repBase = repIndex <
|
|
203
|
-
const BYTE* repMatch = repBase + repIndex;
|
|
388
|
+
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
389
|
+
const BYTE* const repMatch = repBase + repIndex;
|
|
204
390
|
size_t mLength;
|
|
205
391
|
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
|
|
206
392
|
|
|
207
|
-
if (
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
393
|
+
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
|
394
|
+
& (repIndex > dictStartIndex))
|
|
395
|
+
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
396
|
+
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
397
|
+
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
211
398
|
ip++;
|
|
212
|
-
ZSTD_storeSeq(
|
|
399
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
213
400
|
} else {
|
|
214
|
-
if ((matchLongIndex >
|
|
215
|
-
const BYTE* matchEnd = matchLongIndex <
|
|
216
|
-
const BYTE* lowMatchPtr = matchLongIndex <
|
|
401
|
+
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
|
402
|
+
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
|
403
|
+
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
217
404
|
U32 offset;
|
|
218
|
-
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd,
|
|
405
|
+
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
|
219
406
|
offset = current - matchLongIndex;
|
|
220
407
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
221
408
|
offset_2 = offset_1;
|
|
222
409
|
offset_1 = offset;
|
|
223
|
-
ZSTD_storeSeq(
|
|
410
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
224
411
|
|
|
225
|
-
} else if ((matchIndex >
|
|
412
|
+
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
|
226
413
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
227
414
|
U32 const matchIndex3 = hashLong[h3];
|
|
228
|
-
const BYTE* const match3Base = matchIndex3 <
|
|
415
|
+
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
|
229
416
|
const BYTE* match3 = match3Base + matchIndex3;
|
|
230
417
|
U32 offset;
|
|
231
418
|
hashLong[h3] = current + 1;
|
|
232
|
-
if ( (matchIndex3 >
|
|
233
|
-
const BYTE* matchEnd = matchIndex3 <
|
|
234
|
-
const BYTE* lowMatchPtr = matchIndex3 <
|
|
235
|
-
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd,
|
|
419
|
+
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
|
420
|
+
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
|
421
|
+
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
|
422
|
+
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
|
236
423
|
ip++;
|
|
237
424
|
offset = current+1 - matchIndex3;
|
|
238
425
|
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
|
239
426
|
} else {
|
|
240
|
-
const BYTE* matchEnd = matchIndex <
|
|
241
|
-
const BYTE* lowMatchPtr = matchIndex <
|
|
242
|
-
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd,
|
|
427
|
+
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
428
|
+
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
429
|
+
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
243
430
|
offset = current - matchIndex;
|
|
244
431
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
245
432
|
}
|
|
246
433
|
offset_2 = offset_1;
|
|
247
434
|
offset_1 = offset;
|
|
248
|
-
ZSTD_storeSeq(
|
|
435
|
+
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
249
436
|
|
|
250
437
|
} else {
|
|
251
|
-
ip += ((ip-anchor) >>
|
|
438
|
+
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
252
439
|
continue;
|
|
253
440
|
} }
|
|
254
441
|
|
|
@@ -266,13 +453,14 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
|
|
|
266
453
|
while (ip <= ilimit) {
|
|
267
454
|
U32 const current2 = (U32)(ip-base);
|
|
268
455
|
U32 const repIndex2 = current2 - offset_2;
|
|
269
|
-
const BYTE* repMatch2 = repIndex2 <
|
|
270
|
-
if ( (((U32)((
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
456
|
+
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
457
|
+
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
|
458
|
+
& (repIndex2 > dictStartIndex))
|
|
459
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
460
|
+
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
461
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
462
|
+
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
463
|
+
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
276
464
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
277
465
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
278
466
|
ip += repLength2;
|
|
@@ -283,27 +471,29 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
|
|
|
283
471
|
} } }
|
|
284
472
|
|
|
285
473
|
/* save reps for next block */
|
|
286
|
-
|
|
474
|
+
rep[0] = offset_1;
|
|
475
|
+
rep[1] = offset_2;
|
|
287
476
|
|
|
288
477
|
/* Return the last literals size */
|
|
289
478
|
return iend - anchor;
|
|
290
479
|
}
|
|
291
480
|
|
|
292
481
|
|
|
293
|
-
size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
294
|
-
|
|
482
|
+
size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
483
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
484
|
+
void const* src, size_t srcSize)
|
|
295
485
|
{
|
|
296
|
-
U32 const mls =
|
|
486
|
+
U32 const mls = ms->cParams.minMatch;
|
|
297
487
|
switch(mls)
|
|
298
488
|
{
|
|
299
489
|
default: /* includes case 3 */
|
|
300
490
|
case 4 :
|
|
301
|
-
return ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
491
|
+
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
|
302
492
|
case 5 :
|
|
303
|
-
return ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
493
|
+
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
|
304
494
|
case 6 :
|
|
305
|
-
return ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
495
|
+
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
|
306
496
|
case 7 :
|
|
307
|
-
return ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
497
|
+
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
|
308
498
|
}
|
|
309
499
|
}
|