zstdlib 0.3.0-x64-mingw32 → 0.8.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/bitstream.h +59 -51
- data/ext/zstdlib/zstd-1.5.0/lib/common/compiler.h +289 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/cpu.h +1 -3
- data/ext/zstdlib/zstd-1.5.0/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/debug.h +22 -49
- data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.c +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.h +8 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse.h +50 -42
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse_decompress.c +149 -55
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/huf.h +43 -39
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/mem.h +69 -25
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.c +30 -20
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.c +51 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.c +40 -92
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.h +12 -32
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_internal.h +490 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/fse_compress.c +47 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.c +41 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.h +13 -33
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/huf_compress.c +332 -193
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress.c +6393 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +522 -86
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +25 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +50 -24
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +11 -4
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_cwksp.h +662 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +43 -41
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.c +85 -80
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.c +333 -208
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.c +228 -129
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +151 -440
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/huf_decompress.c +395 -276
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +628 -231
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/zstd.h +740 -153
- data/ext/zstdlib/{zstd-1.4.2/lib/common → zstd-1.5.0/lib}/zstd_errors.h +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +131 -45
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +76 -67
- data/ext/zstdlib/zstd-1.4.2/lib/common/compiler.h +0 -147
- data/ext/zstdlib/zstd-1.4.2/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.2/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.2/lib/common/zstd_internal.h +0 -371
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress.c +0 -3904
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.c +0 -1111
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
31
31
|
* is empty.
|
32
32
|
*/
|
33
33
|
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
34
|
-
U32 const
|
34
|
+
U32 const curr = (U32)(ip - base);
|
35
35
|
U32 i;
|
36
36
|
for (i = 0; i < fastHashFillStep; ++i) {
|
37
37
|
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
38
38
|
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
39
39
|
if (i == 0)
|
40
|
-
hashSmall[smHash] =
|
40
|
+
hashSmall[smHash] = curr + i;
|
41
41
|
if (i == 0 || hashLarge[lgHash] == 0)
|
42
|
-
hashLarge[lgHash] =
|
42
|
+
hashLarge[lgHash] = curr + i;
|
43
43
|
/* Only load extra positions for ZSTD_dtlm_full */
|
44
44
|
if (dtlm == ZSTD_dtlm_fast)
|
45
45
|
break;
|
@@ -63,9 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
63
63
|
const BYTE* ip = istart;
|
64
64
|
const BYTE* anchor = istart;
|
65
65
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
66
|
-
|
67
|
-
const U32
|
68
|
-
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
66
|
+
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
67
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
69
68
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
70
69
|
const BYTE* const iend = istart + srcSize;
|
71
70
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
@@ -95,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
95
94
|
dictCParams->hashLog : hBitsL;
|
96
95
|
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
97
96
|
dictCParams->chainLog : hBitsS;
|
98
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
97
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
99
98
|
|
100
99
|
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
101
100
|
|
@@ -103,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
103
102
|
|
104
103
|
/* if a dictionary is attached, it must be within window range */
|
105
104
|
if (dictMode == ZSTD_dictMatchState) {
|
106
|
-
assert(
|
105
|
+
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
107
106
|
}
|
108
107
|
|
109
108
|
/* init */
|
110
109
|
ip += (dictAndPrefixLength == 0);
|
111
110
|
if (dictMode == ZSTD_noDict) {
|
112
|
-
U32 const
|
111
|
+
U32 const curr = (U32)(ip - base);
|
112
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
113
|
+
U32 const maxRep = curr - windowLow;
|
113
114
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
114
115
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
115
116
|
}
|
@@ -128,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
128
129
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
129
130
|
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
130
131
|
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
131
|
-
U32 const
|
132
|
+
U32 const curr = (U32)(ip-base);
|
132
133
|
U32 const matchIndexL = hashLong[h2];
|
133
134
|
U32 matchIndexS = hashSmall[h];
|
134
135
|
const BYTE* matchLong = base + matchIndexL;
|
135
136
|
const BYTE* match = base + matchIndexS;
|
136
|
-
const U32 repIndex =
|
137
|
+
const U32 repIndex = curr + 1 - offset_1;
|
137
138
|
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
138
139
|
&& repIndex < prefixLowestIndex) ?
|
139
140
|
dictBase + (repIndex - dictIndexDelta) :
|
140
141
|
base + repIndex;
|
141
|
-
hashLong[h2] = hashSmall[h] =
|
142
|
+
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
142
143
|
|
143
144
|
/* check dictMatchState repcode */
|
144
145
|
if (dictMode == ZSTD_dictMatchState
|
@@ -147,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
147
148
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
148
149
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
149
150
|
ip++;
|
150
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
151
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
151
152
|
goto _match_stored;
|
152
153
|
}
|
153
154
|
|
@@ -156,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
156
157
|
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
157
158
|
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
158
159
|
ip++;
|
159
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
160
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
160
161
|
goto _match_stored;
|
161
162
|
}
|
162
163
|
|
@@ -176,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
176
177
|
|
177
178
|
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
178
179
|
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
179
|
-
offset = (U32)(
|
180
|
+
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
180
181
|
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
181
182
|
goto _match_found;
|
182
183
|
} }
|
@@ -197,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
197
198
|
} }
|
198
199
|
|
199
200
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
201
|
+
#if defined(__aarch64__)
|
202
|
+
PREFETCH_L1(ip+256);
|
203
|
+
#endif
|
200
204
|
continue;
|
201
205
|
|
202
206
|
_search_next_long:
|
@@ -205,7 +209,7 @@ _search_next_long:
|
|
205
209
|
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
206
210
|
U32 const matchIndexL3 = hashLong[hl3];
|
207
211
|
const BYTE* matchL3 = base + matchIndexL3;
|
208
|
-
hashLong[hl3] =
|
212
|
+
hashLong[hl3] = curr + 1;
|
209
213
|
|
210
214
|
/* check prefix long +1 match */
|
211
215
|
if (matchIndexL3 > prefixLowestIndex) {
|
@@ -224,7 +228,7 @@ _search_next_long:
|
|
224
228
|
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
225
229
|
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
226
230
|
ip++;
|
227
|
-
offset = (U32)(
|
231
|
+
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
228
232
|
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
229
233
|
goto _match_found;
|
230
234
|
} } }
|
@@ -232,7 +236,7 @@ _search_next_long:
|
|
232
236
|
/* if no long +1 match, explore the short match we found */
|
233
237
|
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
234
238
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
235
|
-
offset = (U32)(
|
239
|
+
offset = (U32)(curr - matchIndexS);
|
236
240
|
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
237
241
|
} else {
|
238
242
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
@@ -246,7 +250,7 @@ _match_found:
|
|
246
250
|
offset_2 = offset_1;
|
247
251
|
offset_1 = offset;
|
248
252
|
|
249
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
253
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
250
254
|
|
251
255
|
_match_stored:
|
252
256
|
/* match found */
|
@@ -256,7 +260,7 @@ _match_stored:
|
|
256
260
|
if (ip <= ilimit) {
|
257
261
|
/* Complementary insertion */
|
258
262
|
/* done after iLimit test, as candidates could be > iend-8 */
|
259
|
-
{ U32 const indexToInsert =
|
263
|
+
{ U32 const indexToInsert = curr+2;
|
260
264
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
261
265
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
262
266
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
@@ -270,14 +274,14 @@ _match_stored:
|
|
270
274
|
U32 const repIndex2 = current2 - offset_2;
|
271
275
|
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
272
276
|
&& repIndex2 < prefixLowestIndex ?
|
273
|
-
dictBase - dictIndexDelta
|
277
|
+
dictBase + repIndex2 - dictIndexDelta :
|
274
278
|
base + repIndex2;
|
275
279
|
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
276
280
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
277
281
|
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
278
282
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
279
283
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
280
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
284
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
281
285
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
282
286
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
283
287
|
ip += repLength2;
|
@@ -296,7 +300,7 @@ _match_stored:
|
|
296
300
|
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
297
301
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
298
302
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
299
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
303
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
300
304
|
ip += rLength;
|
301
305
|
anchor = ip;
|
302
306
|
continue; /* faster when present ... (?) */
|
@@ -369,9 +373,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
369
373
|
const BYTE* const ilimit = iend - 8;
|
370
374
|
const BYTE* const base = ms->window.base;
|
371
375
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
372
|
-
const U32
|
373
|
-
const U32 lowestValid = ms->window.lowLimit;
|
374
|
-
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
376
|
+
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
|
375
377
|
const U32 dictStartIndex = lowLimit;
|
376
378
|
const U32 dictLimit = ms->window.dictLimit;
|
377
379
|
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
|
@@ -399,31 +401,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
399
401
|
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
400
402
|
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
401
403
|
|
402
|
-
const U32
|
403
|
-
const U32 repIndex =
|
404
|
+
const U32 curr = (U32)(ip-base);
|
405
|
+
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
404
406
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
405
407
|
const BYTE* const repMatch = repBase + repIndex;
|
406
408
|
size_t mLength;
|
407
|
-
hashSmall[hSmall] = hashLong[hLong] =
|
409
|
+
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
408
410
|
|
409
411
|
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
410
|
-
& (
|
412
|
+
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
411
413
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
412
414
|
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
413
415
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
414
416
|
ip++;
|
415
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
417
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
416
418
|
} else {
|
417
419
|
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
418
420
|
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
419
421
|
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
420
422
|
U32 offset;
|
421
423
|
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
422
|
-
offset =
|
424
|
+
offset = curr - matchLongIndex;
|
423
425
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
424
426
|
offset_2 = offset_1;
|
425
427
|
offset_1 = offset;
|
426
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
428
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
427
429
|
|
428
430
|
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
429
431
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
@@ -431,24 +433,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
431
433
|
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
432
434
|
const BYTE* match3 = match3Base + matchIndex3;
|
433
435
|
U32 offset;
|
434
|
-
hashLong[h3] =
|
436
|
+
hashLong[h3] = curr + 1;
|
435
437
|
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
436
438
|
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
437
439
|
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
438
440
|
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
439
441
|
ip++;
|
440
|
-
offset =
|
442
|
+
offset = curr+1 - matchIndex3;
|
441
443
|
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
442
444
|
} else {
|
443
445
|
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
444
446
|
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
445
447
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
446
|
-
offset =
|
448
|
+
offset = curr - matchIndex;
|
447
449
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
448
450
|
}
|
449
451
|
offset_2 = offset_1;
|
450
452
|
offset_1 = offset;
|
451
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
453
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
452
454
|
|
453
455
|
} else {
|
454
456
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
@@ -462,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
462
464
|
if (ip <= ilimit) {
|
463
465
|
/* Complementary insertion */
|
464
466
|
/* done after iLimit test, as candidates could be > iend-8 */
|
465
|
-
{ U32 const indexToInsert =
|
467
|
+
{ U32 const indexToInsert = curr+2;
|
466
468
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
467
469
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
468
470
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
@@ -475,12 +477,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
475
477
|
U32 const repIndex2 = current2 - offset_2;
|
476
478
|
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
477
479
|
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
478
|
-
& (
|
480
|
+
& (offset_2 < current2 - dictStartIndex))
|
479
481
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
480
482
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
481
483
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
482
484
|
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
483
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
485
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
484
486
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
485
487
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
486
488
|
ip += repLength2;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,7 +15,7 @@
|
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#include "mem.h" /* U32 */
|
18
|
+
#include "../common/mem.h" /* U32 */
|
19
19
|
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
20
20
|
|
21
21
|
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -8,7 +8,7 @@
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
9
9
|
*/
|
10
10
|
|
11
|
-
#include "zstd_compress_internal.h"
|
11
|
+
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
|
12
12
|
#include "zstd_fast.h"
|
13
13
|
|
14
14
|
|
@@ -29,22 +29,22 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
29
29
|
* Insert the other positions if their hash entry is empty.
|
30
30
|
*/
|
31
31
|
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
32
|
-
U32 const
|
32
|
+
U32 const curr = (U32)(ip - base);
|
33
33
|
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
34
|
-
hashTable[hash0] =
|
34
|
+
hashTable[hash0] = curr;
|
35
35
|
if (dtlm == ZSTD_dtlm_fast) continue;
|
36
36
|
/* Only load extra positions for ZSTD_dtlm_full */
|
37
37
|
{ U32 p;
|
38
38
|
for (p = 1; p < fastHashFillStep; ++p) {
|
39
39
|
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
40
40
|
if (hashTable[hash] == 0) { /* not yet filled */
|
41
|
-
hashTable[hash] =
|
41
|
+
hashTable[hash] = curr + p;
|
42
42
|
} } } }
|
43
43
|
}
|
44
44
|
|
45
45
|
|
46
|
-
FORCE_INLINE_TEMPLATE
|
47
|
-
|
46
|
+
FORCE_INLINE_TEMPLATE size_t
|
47
|
+
ZSTD_compressBlock_fast_generic(
|
48
48
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
49
49
|
void const* src, size_t srcSize,
|
50
50
|
U32 const mls)
|
@@ -61,9 +61,7 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
61
61
|
const BYTE* ip1;
|
62
62
|
const BYTE* anchor = istart;
|
63
63
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
64
|
-
const U32
|
65
|
-
const U32 validStartIndex = ms->window.dictLimit;
|
66
|
-
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
|
64
|
+
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
67
65
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
68
66
|
const BYTE* const iend = istart + srcSize;
|
69
67
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
@@ -71,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
71
69
|
U32 offsetSaved = 0;
|
72
70
|
|
73
71
|
/* init */
|
72
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
74
73
|
ip0 += (ip0 == prefixStart);
|
75
74
|
ip1 = ip0 + 1;
|
76
|
-
{
|
77
|
-
U32 const
|
75
|
+
{ U32 const curr = (U32)(ip0 - base);
|
76
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
77
|
+
U32 const maxRep = curr - windowLow;
|
78
78
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
79
79
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
80
80
|
}
|
81
81
|
|
82
82
|
/* Main Search Loop */
|
83
|
+
#ifdef __INTEL_COMPILER
|
84
|
+
/* From intel 'The vector pragma indicates that the loop should be
|
85
|
+
* vectorized if it is legal to do so'. Can be used together with
|
86
|
+
* #pragma ivdep (but have opted to exclude that because intel
|
87
|
+
* warns against using it).*/
|
88
|
+
#pragma vector always
|
89
|
+
#endif
|
83
90
|
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
84
91
|
size_t mLength;
|
85
92
|
BYTE const* ip2 = ip0 + 2;
|
@@ -91,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
91
98
|
U32 const current1 = (U32)(ip1-base);
|
92
99
|
U32 const matchIndex0 = hashTable[h0];
|
93
100
|
U32 const matchIndex1 = hashTable[h1];
|
94
|
-
BYTE const* repMatch = ip2-offset_1;
|
101
|
+
BYTE const* repMatch = ip2 - offset_1;
|
95
102
|
const BYTE* match0 = base + matchIndex0;
|
96
103
|
const BYTE* match1 = base + matchIndex1;
|
97
104
|
U32 offcode;
|
105
|
+
|
106
|
+
#if defined(__aarch64__)
|
107
|
+
PREFETCH_L1(ip0+256);
|
108
|
+
#endif
|
109
|
+
|
98
110
|
hashTable[h0] = current0; /* update hash table */
|
99
111
|
hashTable[h1] = current1; /* update hash table */
|
100
112
|
|
101
113
|
assert(ip0 + 1 == ip1);
|
102
114
|
|
103
115
|
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
104
|
-
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
116
|
+
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
|
105
117
|
ip0 = ip2 - mLength;
|
106
118
|
match0 = repMatch - mLength;
|
119
|
+
mLength += 4;
|
107
120
|
offcode = 0;
|
108
121
|
goto _match;
|
109
122
|
}
|
@@ -117,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
117
130
|
match0 = match1;
|
118
131
|
goto _offset;
|
119
132
|
}
|
120
|
-
{
|
121
|
-
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
133
|
+
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
122
134
|
assert(step >= 2);
|
123
135
|
ip0 += step;
|
124
136
|
ip1 += step;
|
@@ -129,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
|
|
129
141
|
offset_2 = offset_1;
|
130
142
|
offset_1 = (U32)(ip0-match0);
|
131
143
|
offcode = offset_1 + ZSTD_REP_MOVE;
|
132
|
-
mLength =
|
144
|
+
mLength = 4;
|
133
145
|
/* Count the backwards match length */
|
134
146
|
while (((ip0>anchor) & (match0>prefixStart))
|
135
147
|
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
136
148
|
|
137
149
|
_match: /* Requires: ip0, match0, offcode */
|
138
150
|
/* Count the forward length */
|
139
|
-
mLength += ZSTD_count(ip0+mLength
|
140
|
-
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
|
151
|
+
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
|
152
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
141
153
|
/* match found */
|
142
154
|
ip0 += mLength;
|
143
155
|
anchor = ip0;
|
144
|
-
ip1 = ip0 + 1;
|
145
156
|
|
146
157
|
if (ip0 <= ilimit) {
|
147
158
|
/* Fill Table */
|
@@ -149,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
|
|
149
160
|
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
150
161
|
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
151
162
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
}
|
165
|
-
}
|
163
|
+
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
|
164
|
+
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
|
165
|
+
/* store sequence */
|
166
|
+
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
167
|
+
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
168
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
169
|
+
ip0 += rLength;
|
170
|
+
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
171
|
+
anchor = ip0;
|
172
|
+
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
173
|
+
} } }
|
174
|
+
ip1 = ip0 + 1;
|
166
175
|
}
|
167
176
|
|
168
177
|
/* save reps for next block */
|
@@ -178,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
|
|
178
187
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
179
188
|
void const* src, size_t srcSize)
|
180
189
|
{
|
181
|
-
|
182
|
-
U32 const mls = cParams->minMatch;
|
190
|
+
U32 const mls = ms->cParams.minMatch;
|
183
191
|
assert(ms->dictMatchState == NULL);
|
184
192
|
switch(mls)
|
185
193
|
{
|
@@ -234,11 +242,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
234
242
|
assert(endIndex - prefixStartIndex <= maxDistance);
|
235
243
|
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
236
244
|
|
237
|
-
/* ensure there will be no
|
245
|
+
/* ensure there will be no underflow
|
238
246
|
* when translating a dict index into a local index */
|
239
247
|
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
240
248
|
|
241
249
|
/* init */
|
250
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
|
242
251
|
ip += (dictAndPrefixLength == 0);
|
243
252
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
244
253
|
* disabling. */
|
@@ -249,21 +258,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
249
258
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
250
259
|
size_t mLength;
|
251
260
|
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
|
252
|
-
U32 const
|
261
|
+
U32 const curr = (U32)(ip-base);
|
253
262
|
U32 const matchIndex = hashTable[h];
|
254
263
|
const BYTE* match = base + matchIndex;
|
255
|
-
const U32 repIndex =
|
264
|
+
const U32 repIndex = curr + 1 - offset_1;
|
256
265
|
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
257
266
|
dictBase + (repIndex - dictIndexDelta) :
|
258
267
|
base + repIndex;
|
259
|
-
hashTable[h] =
|
268
|
+
hashTable[h] = curr; /* update hash table */
|
260
269
|
|
261
270
|
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
262
271
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
263
272
|
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
264
273
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
265
274
|
ip++;
|
266
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
275
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
267
276
|
} else if ( (matchIndex <= prefixStartIndex) ) {
|
268
277
|
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
269
278
|
U32 const dictMatchIndex = dictHashTable[dictHash];
|
@@ -275,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
275
284
|
continue;
|
276
285
|
} else {
|
277
286
|
/* found a dict match */
|
278
|
-
U32 const offset = (U32)(
|
287
|
+
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
|
279
288
|
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
280
289
|
while (((ip>anchor) & (dictMatch>dictStart))
|
281
290
|
&& (ip[-1] == dictMatch[-1])) {
|
@@ -283,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
283
292
|
} /* catch up */
|
284
293
|
offset_2 = offset_1;
|
285
294
|
offset_1 = offset;
|
286
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
295
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
287
296
|
}
|
288
297
|
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
289
298
|
/* it's not a match, and we're not going to check the dictionary */
|
@@ -298,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
298
307
|
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
299
308
|
offset_2 = offset_1;
|
300
309
|
offset_1 = offset;
|
301
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
310
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
302
311
|
}
|
303
312
|
|
304
313
|
/* match found */
|
@@ -307,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
307
316
|
|
308
317
|
if (ip <= ilimit) {
|
309
318
|
/* Fill Table */
|
310
|
-
assert(base+
|
311
|
-
hashTable[ZSTD_hashPtr(base+
|
319
|
+
assert(base+curr+2 > istart); /* check base overflow */
|
320
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
|
312
321
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
313
322
|
|
314
323
|
/* check immediate repcode */
|
@@ -323,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
323
332
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
324
333
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
325
334
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
326
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
335
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
327
336
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
328
337
|
ip += repLength2;
|
329
338
|
anchor = ip;
|
@@ -346,8 +355,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
346
355
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
347
356
|
void const* src, size_t srcSize)
|
348
357
|
{
|
349
|
-
|
350
|
-
U32 const mls = cParams->minMatch;
|
358
|
+
U32 const mls = ms->cParams.minMatch;
|
351
359
|
assert(ms->dictMatchState != NULL);
|
352
360
|
switch(mls)
|
353
361
|
{
|
@@ -379,9 +387,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
379
387
|
const BYTE* ip = istart;
|
380
388
|
const BYTE* anchor = istart;
|
381
389
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
382
|
-
const U32
|
383
|
-
const U32 validLow = ms->window.lowLimit;
|
384
|
-
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
|
390
|
+
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
|
385
391
|
const U32 dictStartIndex = lowLimit;
|
386
392
|
const BYTE* const dictStart = dictBase + dictStartIndex;
|
387
393
|
const U32 dictLimit = ms->window.dictLimit;
|
@@ -392,6 +398,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
392
398
|
const BYTE* const ilimit = iend - 8;
|
393
399
|
U32 offset_1=rep[0], offset_2=rep[1];
|
394
400
|
|
401
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
|
402
|
+
|
395
403
|
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
396
404
|
if (prefixStartIndex == dictStartIndex)
|
397
405
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
@@ -402,20 +410,22 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
402
410
|
const U32 matchIndex = hashTable[h];
|
403
411
|
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
404
412
|
const BYTE* match = matchBase + matchIndex;
|
405
|
-
const U32
|
406
|
-
const U32 repIndex =
|
413
|
+
const U32 curr = (U32)(ip-base);
|
414
|
+
const U32 repIndex = curr + 1 - offset_1;
|
407
415
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
408
416
|
const BYTE* const repMatch = repBase + repIndex;
|
409
|
-
|
410
|
-
|
411
|
-
assert(offset_1 <= current +1); /* check repIndex */
|
417
|
+
hashTable[h] = curr; /* update hash table */
|
418
|
+
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
412
419
|
|
413
|
-
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
420
|
+
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
421
|
+
& (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
414
422
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
415
|
-
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
416
|
-
|
423
|
+
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
424
|
+
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
417
425
|
ip++;
|
418
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0,
|
426
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
|
427
|
+
ip += rLength;
|
428
|
+
anchor = ip;
|
419
429
|
} else {
|
420
430
|
if ( (matchIndex < dictStartIndex) ||
|
421
431
|
(MEM_read32(match) != MEM_read32(ip)) ) {
|
@@ -423,36 +433,32 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
423
433
|
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
424
434
|
continue;
|
425
435
|
}
|
426
|
-
{ const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
427
|
-
const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
428
|
-
U32 offset;
|
429
|
-
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
436
|
+
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
437
|
+
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
438
|
+
U32 const offset = curr - matchIndex;
|
439
|
+
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
430
440
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
441
|
+
offset_2 = offset_1; offset_1 = offset; /* update offset history */
|
442
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
443
|
+
ip += mLength;
|
444
|
+
anchor = ip;
|
435
445
|
} }
|
436
446
|
|
437
|
-
/* found a match : store it */
|
438
|
-
ip += mLength;
|
439
|
-
anchor = ip;
|
440
|
-
|
441
447
|
if (ip <= ilimit) {
|
442
448
|
/* Fill Table */
|
443
|
-
hashTable[ZSTD_hashPtr(base+
|
449
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
|
444
450
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
445
451
|
/* check immediate repcode */
|
446
452
|
while (ip <= ilimit) {
|
447
453
|
U32 const current2 = (U32)(ip-base);
|
448
454
|
U32 const repIndex2 = current2 - offset_2;
|
449
|
-
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
450
|
-
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (
|
455
|
+
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
456
|
+
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
|
451
457
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
452
458
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
453
459
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
454
|
-
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;
|
455
|
-
ZSTD_storeSeq(seqStore, 0
|
460
|
+
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
|
461
|
+
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
|
456
462
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
457
463
|
ip += repLength2;
|
458
464
|
anchor = ip;
|
@@ -474,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
|
|
474
480
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
475
481
|
void const* src, size_t srcSize)
|
476
482
|
{
|
477
|
-
|
478
|
-
U32 const mls = cParams->minMatch;
|
483
|
+
U32 const mls = ms->cParams.minMatch;
|
479
484
|
switch(mls)
|
480
485
|
{
|
481
486
|
default: /* includes case 3 */
|