zstd-ruby 1.4.0.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -31,20 +31,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
|
31
31
|
* is empty.
|
|
32
32
|
*/
|
|
33
33
|
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
34
|
-
U32 const
|
|
34
|
+
U32 const curr = (U32)(ip - base);
|
|
35
35
|
U32 i;
|
|
36
36
|
for (i = 0; i < fastHashFillStep; ++i) {
|
|
37
37
|
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
38
38
|
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
39
39
|
if (i == 0)
|
|
40
|
-
hashSmall[smHash] =
|
|
40
|
+
hashSmall[smHash] = curr + i;
|
|
41
41
|
if (i == 0 || hashLarge[lgHash] == 0)
|
|
42
|
-
hashLarge[lgHash] =
|
|
42
|
+
hashLarge[lgHash] = curr + i;
|
|
43
43
|
/* Only load extra positions for ZSTD_dtlm_full */
|
|
44
44
|
if (dtlm == ZSTD_dtlm_fast)
|
|
45
45
|
break;
|
|
46
|
-
|
|
47
|
-
}
|
|
46
|
+
} }
|
|
48
47
|
}
|
|
49
48
|
|
|
50
49
|
|
|
@@ -63,7 +62,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
63
62
|
const BYTE* const istart = (const BYTE*)src;
|
|
64
63
|
const BYTE* ip = istart;
|
|
65
64
|
const BYTE* anchor = istart;
|
|
66
|
-
const U32
|
|
65
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
66
|
+
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
|
67
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
|
67
68
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
68
69
|
const BYTE* const iend = istart + srcSize;
|
|
69
70
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
@@ -93,14 +94,23 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
93
94
|
dictCParams->hashLog : hBitsL;
|
|
94
95
|
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
|
95
96
|
dictCParams->chainLog : hBitsS;
|
|
96
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
|
97
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
|
98
|
+
|
|
99
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
|
97
100
|
|
|
98
101
|
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
|
99
102
|
|
|
103
|
+
/* if a dictionary is attached, it must be within window range */
|
|
104
|
+
if (dictMode == ZSTD_dictMatchState) {
|
|
105
|
+
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
|
106
|
+
}
|
|
107
|
+
|
|
100
108
|
/* init */
|
|
101
109
|
ip += (dictAndPrefixLength == 0);
|
|
102
110
|
if (dictMode == ZSTD_noDict) {
|
|
103
|
-
U32 const
|
|
111
|
+
U32 const curr = (U32)(ip - base);
|
|
112
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
|
113
|
+
U32 const maxRep = curr - windowLow;
|
|
104
114
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
|
105
115
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
|
106
116
|
}
|
|
@@ -119,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
119
129
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
120
130
|
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
|
121
131
|
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
|
122
|
-
U32 const
|
|
132
|
+
U32 const curr = (U32)(ip-base);
|
|
123
133
|
U32 const matchIndexL = hashLong[h2];
|
|
124
134
|
U32 matchIndexS = hashSmall[h];
|
|
125
135
|
const BYTE* matchLong = base + matchIndexL;
|
|
126
136
|
const BYTE* match = base + matchIndexS;
|
|
127
|
-
const U32 repIndex =
|
|
137
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
128
138
|
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
|
129
139
|
&& repIndex < prefixLowestIndex) ?
|
|
130
140
|
dictBase + (repIndex - dictIndexDelta) :
|
|
131
141
|
base + repIndex;
|
|
132
|
-
hashLong[h2] = hashSmall[h] =
|
|
142
|
+
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
|
133
143
|
|
|
134
144
|
/* check dictMatchState repcode */
|
|
135
145
|
if (dictMode == ZSTD_dictMatchState
|
|
@@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
138
148
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
139
149
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
140
150
|
ip++;
|
|
141
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
151
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
142
152
|
goto _match_stored;
|
|
143
153
|
}
|
|
144
154
|
|
|
@@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
147
157
|
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
|
148
158
|
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
149
159
|
ip++;
|
|
150
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
160
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
151
161
|
goto _match_stored;
|
|
152
162
|
}
|
|
153
163
|
|
|
@@ -167,11 +177,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
167
177
|
|
|
168
178
|
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
|
169
179
|
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
|
170
|
-
offset = (U32)(
|
|
180
|
+
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
|
171
181
|
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
|
172
182
|
goto _match_found;
|
|
173
|
-
|
|
174
|
-
}
|
|
183
|
+
} }
|
|
175
184
|
|
|
176
185
|
if (matchIndexS > prefixLowestIndex) {
|
|
177
186
|
/* check prefix short match */
|
|
@@ -186,20 +195,21 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
186
195
|
|
|
187
196
|
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
|
188
197
|
goto _search_next_long;
|
|
189
|
-
|
|
190
|
-
}
|
|
198
|
+
} }
|
|
191
199
|
|
|
192
200
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
201
|
+
#if defined(__aarch64__)
|
|
202
|
+
PREFETCH_L1(ip+256);
|
|
203
|
+
#endif
|
|
193
204
|
continue;
|
|
194
205
|
|
|
195
206
|
_search_next_long:
|
|
196
207
|
|
|
197
|
-
{
|
|
198
|
-
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
208
|
+
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
199
209
|
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
|
200
210
|
U32 const matchIndexL3 = hashLong[hl3];
|
|
201
211
|
const BYTE* matchL3 = base + matchIndexL3;
|
|
202
|
-
hashLong[hl3] =
|
|
212
|
+
hashLong[hl3] = curr + 1;
|
|
203
213
|
|
|
204
214
|
/* check prefix long +1 match */
|
|
205
215
|
if (matchIndexL3 > prefixLowestIndex) {
|
|
@@ -218,17 +228,15 @@ _search_next_long:
|
|
|
218
228
|
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
|
219
229
|
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
|
220
230
|
ip++;
|
|
221
|
-
offset = (U32)(
|
|
231
|
+
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
|
222
232
|
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
|
223
233
|
goto _match_found;
|
|
224
|
-
|
|
225
|
-
}
|
|
226
|
-
}
|
|
234
|
+
} } }
|
|
227
235
|
|
|
228
236
|
/* if no long +1 match, explore the short match we found */
|
|
229
237
|
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
|
230
238
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
|
231
|
-
offset = (U32)(
|
|
239
|
+
offset = (U32)(curr - matchIndexS);
|
|
232
240
|
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
233
241
|
} else {
|
|
234
242
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
|
@@ -242,7 +250,7 @@ _match_found:
|
|
|
242
250
|
offset_2 = offset_1;
|
|
243
251
|
offset_1 = offset;
|
|
244
252
|
|
|
245
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
253
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
246
254
|
|
|
247
255
|
_match_stored:
|
|
248
256
|
/* match found */
|
|
@@ -250,11 +258,14 @@ _match_stored:
|
|
|
250
258
|
anchor = ip;
|
|
251
259
|
|
|
252
260
|
if (ip <= ilimit) {
|
|
253
|
-
/*
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
261
|
+
/* Complementary insertion */
|
|
262
|
+
/* done after iLimit test, as candidates could be > iend-8 */
|
|
263
|
+
{ U32 const indexToInsert = curr+2;
|
|
264
|
+
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
265
|
+
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
266
|
+
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
267
|
+
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
|
268
|
+
}
|
|
258
269
|
|
|
259
270
|
/* check immediate repcode */
|
|
260
271
|
if (dictMode == ZSTD_dictMatchState) {
|
|
@@ -263,14 +274,14 @@ _match_stored:
|
|
|
263
274
|
U32 const repIndex2 = current2 - offset_2;
|
|
264
275
|
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
|
265
276
|
&& repIndex2 < prefixLowestIndex ?
|
|
266
|
-
dictBase - dictIndexDelta
|
|
277
|
+
dictBase + repIndex2 - dictIndexDelta :
|
|
267
278
|
base + repIndex2;
|
|
268
279
|
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
|
269
280
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
270
281
|
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
|
271
282
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
|
272
283
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
273
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
284
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
|
274
285
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
275
286
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
276
287
|
ip += repLength2;
|
|
@@ -278,8 +289,7 @@ _match_stored:
|
|
|
278
289
|
continue;
|
|
279
290
|
}
|
|
280
291
|
break;
|
|
281
|
-
|
|
282
|
-
}
|
|
292
|
+
} }
|
|
283
293
|
|
|
284
294
|
if (dictMode == ZSTD_noDict) {
|
|
285
295
|
while ( (ip <= ilimit)
|
|
@@ -290,18 +300,19 @@ _match_stored:
|
|
|
290
300
|
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
291
301
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
292
302
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
293
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
|
303
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
|
294
304
|
ip += rLength;
|
|
295
305
|
anchor = ip;
|
|
296
306
|
continue; /* faster when present ... (?) */
|
|
297
|
-
|
|
307
|
+
} } }
|
|
308
|
+
} /* while (ip < ilimit) */
|
|
298
309
|
|
|
299
310
|
/* save reps for next block */
|
|
300
311
|
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
|
301
312
|
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
302
313
|
|
|
303
314
|
/* Return the last literals size */
|
|
304
|
-
return iend - anchor;
|
|
315
|
+
return (size_t)(iend - anchor);
|
|
305
316
|
}
|
|
306
317
|
|
|
307
318
|
|
|
@@ -360,10 +371,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
360
371
|
const BYTE* anchor = istart;
|
|
361
372
|
const BYTE* const iend = istart + srcSize;
|
|
362
373
|
const BYTE* const ilimit = iend - 8;
|
|
363
|
-
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
364
374
|
const BYTE* const base = ms->window.base;
|
|
375
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
376
|
+
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
|
|
377
|
+
const U32 dictStartIndex = lowLimit;
|
|
378
|
+
const U32 dictLimit = ms->window.dictLimit;
|
|
379
|
+
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
|
|
365
380
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
366
|
-
const U32 dictStartIndex = ms->window.lowLimit;
|
|
367
381
|
const BYTE* const dictBase = ms->window.dictBase;
|
|
368
382
|
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
369
383
|
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
|
@@ -371,6 +385,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
371
385
|
|
|
372
386
|
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
|
|
373
387
|
|
|
388
|
+
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
|
389
|
+
if (prefixStartIndex == dictStartIndex)
|
|
390
|
+
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
|
391
|
+
|
|
374
392
|
/* Search Loop */
|
|
375
393
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
376
394
|
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
@@ -383,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
383
401
|
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
|
384
402
|
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
|
385
403
|
|
|
386
|
-
const U32
|
|
387
|
-
const U32 repIndex =
|
|
404
|
+
const U32 curr = (U32)(ip-base);
|
|
405
|
+
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
|
388
406
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
389
407
|
const BYTE* const repMatch = repBase + repIndex;
|
|
390
408
|
size_t mLength;
|
|
391
|
-
hashSmall[hSmall] = hashLong[hLong] =
|
|
409
|
+
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
|
392
410
|
|
|
393
411
|
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
|
394
412
|
& (repIndex > dictStartIndex))
|
|
@@ -396,18 +414,18 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
396
414
|
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
397
415
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
398
416
|
ip++;
|
|
399
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
417
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
400
418
|
} else {
|
|
401
419
|
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
|
402
420
|
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
|
403
421
|
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
404
422
|
U32 offset;
|
|
405
423
|
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
|
406
|
-
offset =
|
|
424
|
+
offset = curr - matchLongIndex;
|
|
407
425
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
408
426
|
offset_2 = offset_1;
|
|
409
427
|
offset_1 = offset;
|
|
410
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
428
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
411
429
|
|
|
412
430
|
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
|
413
431
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
@@ -415,40 +433,44 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
415
433
|
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
|
416
434
|
const BYTE* match3 = match3Base + matchIndex3;
|
|
417
435
|
U32 offset;
|
|
418
|
-
hashLong[h3] =
|
|
436
|
+
hashLong[h3] = curr + 1;
|
|
419
437
|
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
|
420
438
|
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
|
421
439
|
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
|
422
440
|
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
|
423
441
|
ip++;
|
|
424
|
-
offset =
|
|
442
|
+
offset = curr+1 - matchIndex3;
|
|
425
443
|
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
|
426
444
|
} else {
|
|
427
445
|
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
428
446
|
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
429
447
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
430
|
-
offset =
|
|
448
|
+
offset = curr - matchIndex;
|
|
431
449
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
432
450
|
}
|
|
433
451
|
offset_2 = offset_1;
|
|
434
452
|
offset_1 = offset;
|
|
435
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
453
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
436
454
|
|
|
437
455
|
} else {
|
|
438
456
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
439
457
|
continue;
|
|
440
458
|
} }
|
|
441
459
|
|
|
442
|
-
/*
|
|
460
|
+
/* move to next sequence start */
|
|
443
461
|
ip += mLength;
|
|
444
462
|
anchor = ip;
|
|
445
463
|
|
|
446
464
|
if (ip <= ilimit) {
|
|
447
|
-
/*
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
465
|
+
/* Complementary insertion */
|
|
466
|
+
/* done after iLimit test, as candidates could be > iend-8 */
|
|
467
|
+
{ U32 const indexToInsert = curr+2;
|
|
468
|
+
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
469
|
+
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
470
|
+
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
471
|
+
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
|
472
|
+
}
|
|
473
|
+
|
|
452
474
|
/* check immediate repcode */
|
|
453
475
|
while (ip <= ilimit) {
|
|
454
476
|
U32 const current2 = (U32)(ip-base);
|
|
@@ -460,7 +482,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
460
482
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
461
483
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
462
484
|
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
463
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
485
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
|
464
486
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
465
487
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
466
488
|
ip += repLength2;
|
|
@@ -475,7 +497,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
475
497
|
rep[1] = offset_2;
|
|
476
498
|
|
|
477
499
|
/* Return the last literals size */
|
|
478
|
-
return iend - anchor;
|
|
500
|
+
return (size_t)(iend - anchor);
|
|
479
501
|
}
|
|
480
502
|
|
|
481
503
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
extern "C" {
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
|
-
#include "mem.h" /* U32 */
|
|
18
|
+
#include "../common/mem.h" /* U32 */
|
|
19
19
|
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
|
20
20
|
|
|
21
21
|
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -8,12 +8,13 @@
|
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
#include "zstd_compress_internal.h"
|
|
11
|
+
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
|
|
12
12
|
#include "zstd_fast.h"
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
16
|
-
void const
|
|
16
|
+
const void* const end,
|
|
17
|
+
ZSTD_dictTableLoadMethod_e dtlm)
|
|
17
18
|
{
|
|
18
19
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
19
20
|
U32* const hashTable = ms->hashTable;
|
|
@@ -28,21 +29,22 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
|
28
29
|
* Insert the other positions if their hash entry is empty.
|
|
29
30
|
*/
|
|
30
31
|
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
|
31
|
-
U32 const
|
|
32
|
+
U32 const curr = (U32)(ip - base);
|
|
32
33
|
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
|
33
|
-
hashTable[hash0] =
|
|
34
|
+
hashTable[hash0] = curr;
|
|
34
35
|
if (dtlm == ZSTD_dtlm_fast) continue;
|
|
35
36
|
/* Only load extra positions for ZSTD_dtlm_full */
|
|
36
37
|
{ U32 p;
|
|
37
38
|
for (p = 1; p < fastHashFillStep; ++p) {
|
|
38
39
|
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
|
39
40
|
if (hashTable[hash] == 0) { /* not yet filled */
|
|
40
|
-
hashTable[hash] =
|
|
41
|
+
hashTable[hash] = curr + p;
|
|
41
42
|
} } } }
|
|
42
43
|
}
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
size_t
|
|
45
|
+
|
|
46
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
47
|
+
ZSTD_compressBlock_fast_generic(
|
|
46
48
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
47
49
|
void const* src, size_t srcSize,
|
|
48
50
|
U32 const mls)
|
|
@@ -58,7 +60,8 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
|
58
60
|
const BYTE* ip0 = istart;
|
|
59
61
|
const BYTE* ip1;
|
|
60
62
|
const BYTE* anchor = istart;
|
|
61
|
-
const U32
|
|
63
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
64
|
+
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
|
62
65
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
63
66
|
const BYTE* const iend = istart + srcSize;
|
|
64
67
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
@@ -66,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
|
66
69
|
U32 offsetSaved = 0;
|
|
67
70
|
|
|
68
71
|
/* init */
|
|
72
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
|
69
73
|
ip0 += (ip0 == prefixStart);
|
|
70
74
|
ip1 = ip0 + 1;
|
|
71
|
-
{
|
|
72
|
-
U32 const
|
|
75
|
+
{ U32 const curr = (U32)(ip0 - base);
|
|
76
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
|
77
|
+
U32 const maxRep = curr - windowLow;
|
|
73
78
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
|
74
79
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
|
75
80
|
}
|
|
76
81
|
|
|
77
82
|
/* Main Search Loop */
|
|
83
|
+
#ifdef __INTEL_COMPILER
|
|
84
|
+
/* From intel 'The vector pragma indicates that the loop should be
|
|
85
|
+
* vectorized if it is legal to do so'. Can be used together with
|
|
86
|
+
* #pragma ivdep (but have opted to exclude that because intel
|
|
87
|
+
* warns against using it).*/
|
|
88
|
+
#pragma vector always
|
|
89
|
+
#endif
|
|
78
90
|
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
|
79
91
|
size_t mLength;
|
|
80
92
|
BYTE const* ip2 = ip0 + 2;
|
|
@@ -86,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
|
86
98
|
U32 const current1 = (U32)(ip1-base);
|
|
87
99
|
U32 const matchIndex0 = hashTable[h0];
|
|
88
100
|
U32 const matchIndex1 = hashTable[h1];
|
|
89
|
-
BYTE const* repMatch = ip2-offset_1;
|
|
101
|
+
BYTE const* repMatch = ip2 - offset_1;
|
|
90
102
|
const BYTE* match0 = base + matchIndex0;
|
|
91
103
|
const BYTE* match1 = base + matchIndex1;
|
|
92
104
|
U32 offcode;
|
|
105
|
+
|
|
106
|
+
#if defined(__aarch64__)
|
|
107
|
+
PREFETCH_L1(ip0+256);
|
|
108
|
+
#endif
|
|
109
|
+
|
|
93
110
|
hashTable[h0] = current0; /* update hash table */
|
|
94
111
|
hashTable[h1] = current1; /* update hash table */
|
|
95
112
|
|
|
96
113
|
assert(ip0 + 1 == ip1);
|
|
97
114
|
|
|
98
115
|
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
|
99
|
-
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
|
116
|
+
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
|
|
100
117
|
ip0 = ip2 - mLength;
|
|
101
118
|
match0 = repMatch - mLength;
|
|
119
|
+
mLength += 4;
|
|
102
120
|
offcode = 0;
|
|
103
121
|
goto _match;
|
|
104
122
|
}
|
|
@@ -112,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
|
|
|
112
130
|
match0 = match1;
|
|
113
131
|
goto _offset;
|
|
114
132
|
}
|
|
115
|
-
{
|
|
116
|
-
size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
|
133
|
+
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
|
117
134
|
assert(step >= 2);
|
|
118
135
|
ip0 += step;
|
|
119
136
|
ip1 += step;
|
|
@@ -124,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
|
|
|
124
141
|
offset_2 = offset_1;
|
|
125
142
|
offset_1 = (U32)(ip0-match0);
|
|
126
143
|
offcode = offset_1 + ZSTD_REP_MOVE;
|
|
127
|
-
mLength =
|
|
144
|
+
mLength = 4;
|
|
128
145
|
/* Count the backwards match length */
|
|
129
146
|
while (((ip0>anchor) & (match0>prefixStart))
|
|
130
147
|
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
|
131
148
|
|
|
132
149
|
_match: /* Requires: ip0, match0, offcode */
|
|
133
150
|
/* Count the forward length */
|
|
134
|
-
mLength += ZSTD_count(ip0+mLength
|
|
135
|
-
ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
|
|
151
|
+
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
|
|
152
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
|
136
153
|
/* match found */
|
|
137
154
|
ip0 += mLength;
|
|
138
155
|
anchor = ip0;
|
|
139
|
-
ip1 = ip0 + 1;
|
|
140
156
|
|
|
141
157
|
if (ip0 <= ilimit) {
|
|
142
158
|
/* Fill Table */
|
|
@@ -144,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
|
|
|
144
160
|
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
|
145
161
|
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
|
146
162
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
}
|
|
160
|
-
}
|
|
163
|
+
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
|
|
164
|
+
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
|
|
165
|
+
/* store sequence */
|
|
166
|
+
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
|
167
|
+
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
|
168
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
|
169
|
+
ip0 += rLength;
|
|
170
|
+
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
|
171
|
+
anchor = ip0;
|
|
172
|
+
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
|
173
|
+
} } }
|
|
174
|
+
ip1 = ip0 + 1;
|
|
161
175
|
}
|
|
162
176
|
|
|
163
177
|
/* save reps for next block */
|
|
@@ -165,7 +179,7 @@ _match: /* Requires: ip0, match0, offcode */
|
|
|
165
179
|
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
166
180
|
|
|
167
181
|
/* Return the last literals size */
|
|
168
|
-
return iend - anchor;
|
|
182
|
+
return (size_t)(iend - anchor);
|
|
169
183
|
}
|
|
170
184
|
|
|
171
185
|
|
|
@@ -173,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
|
|
|
173
187
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
174
188
|
void const* src, size_t srcSize)
|
|
175
189
|
{
|
|
176
|
-
|
|
177
|
-
U32 const mls = cParams->minMatch;
|
|
190
|
+
U32 const mls = ms->cParams.minMatch;
|
|
178
191
|
assert(ms->dictMatchState == NULL);
|
|
179
192
|
switch(mls)
|
|
180
193
|
{
|
|
@@ -222,11 +235,19 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
222
235
|
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
|
223
236
|
const U32 dictHLog = dictCParams->hashLog;
|
|
224
237
|
|
|
225
|
-
/*
|
|
226
|
-
*
|
|
238
|
+
/* if a dictionary is still attached, it necessarily means that
|
|
239
|
+
* it is within window size. So we just check it. */
|
|
240
|
+
const U32 maxDistance = 1U << cParams->windowLog;
|
|
241
|
+
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
|
|
242
|
+
assert(endIndex - prefixStartIndex <= maxDistance);
|
|
243
|
+
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
|
244
|
+
|
|
245
|
+
/* ensure there will be no underflow
|
|
246
|
+
* when translating a dict index into a local index */
|
|
227
247
|
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
|
228
248
|
|
|
229
249
|
/* init */
|
|
250
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
|
|
230
251
|
ip += (dictAndPrefixLength == 0);
|
|
231
252
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
232
253
|
* disabling. */
|
|
@@ -237,21 +258,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
237
258
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
238
259
|
size_t mLength;
|
|
239
260
|
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
|
|
240
|
-
U32 const
|
|
261
|
+
U32 const curr = (U32)(ip-base);
|
|
241
262
|
U32 const matchIndex = hashTable[h];
|
|
242
263
|
const BYTE* match = base + matchIndex;
|
|
243
|
-
const U32 repIndex =
|
|
264
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
244
265
|
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
|
245
266
|
dictBase + (repIndex - dictIndexDelta) :
|
|
246
267
|
base + repIndex;
|
|
247
|
-
hashTable[h] =
|
|
268
|
+
hashTable[h] = curr; /* update hash table */
|
|
248
269
|
|
|
249
270
|
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
|
250
271
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
251
272
|
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
252
273
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
253
274
|
ip++;
|
|
254
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
|
275
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
255
276
|
} else if ( (matchIndex <= prefixStartIndex) ) {
|
|
256
277
|
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
|
257
278
|
U32 const dictMatchIndex = dictHashTable[dictHash];
|
|
@@ -263,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
263
284
|
continue;
|
|
264
285
|
} else {
|
|
265
286
|
/* found a dict match */
|
|
266
|
-
U32 const offset = (U32)(
|
|
287
|
+
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
|
|
267
288
|
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
|
268
289
|
while (((ip>anchor) & (dictMatch>dictStart))
|
|
269
290
|
&& (ip[-1] == dictMatch[-1])) {
|
|
@@ -271,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
271
292
|
} /* catch up */
|
|
272
293
|
offset_2 = offset_1;
|
|
273
294
|
offset_1 = offset;
|
|
274
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
295
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
275
296
|
}
|
|
276
297
|
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
|
277
298
|
/* it's not a match, and we're not going to check the dictionary */
|
|
@@ -286,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
286
307
|
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
287
308
|
offset_2 = offset_1;
|
|
288
309
|
offset_1 = offset;
|
|
289
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
310
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
290
311
|
}
|
|
291
312
|
|
|
292
313
|
/* match found */
|
|
@@ -295,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
295
316
|
|
|
296
317
|
if (ip <= ilimit) {
|
|
297
318
|
/* Fill Table */
|
|
298
|
-
assert(base+
|
|
299
|
-
hashTable[ZSTD_hashPtr(base+
|
|
319
|
+
assert(base+curr+2 > istart); /* check base overflow */
|
|
320
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
|
|
300
321
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
|
301
322
|
|
|
302
323
|
/* check immediate repcode */
|
|
@@ -311,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
311
332
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
312
333
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
313
334
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
314
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
|
335
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
|
315
336
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
|
316
337
|
ip += repLength2;
|
|
317
338
|
anchor = ip;
|
|
@@ -327,15 +348,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
327
348
|
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
|
328
349
|
|
|
329
350
|
/* Return the last literals size */
|
|
330
|
-
return iend - anchor;
|
|
351
|
+
return (size_t)(iend - anchor);
|
|
331
352
|
}
|
|
332
353
|
|
|
333
354
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
334
355
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
335
356
|
void const* src, size_t srcSize)
|
|
336
357
|
{
|
|
337
|
-
|
|
338
|
-
U32 const mls = cParams->minMatch;
|
|
358
|
+
U32 const mls = ms->cParams.minMatch;
|
|
339
359
|
assert(ms->dictMatchState != NULL);
|
|
340
360
|
switch(mls)
|
|
341
361
|
{
|
|
@@ -366,35 +386,46 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
|
366
386
|
const BYTE* const istart = (const BYTE*)src;
|
|
367
387
|
const BYTE* ip = istart;
|
|
368
388
|
const BYTE* anchor = istart;
|
|
369
|
-
const U32
|
|
389
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
390
|
+
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
|
|
391
|
+
const U32 dictStartIndex = lowLimit;
|
|
370
392
|
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
371
|
-
const U32
|
|
393
|
+
const U32 dictLimit = ms->window.dictLimit;
|
|
394
|
+
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
|
|
372
395
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
373
396
|
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
|
374
397
|
const BYTE* const iend = istart + srcSize;
|
|
375
398
|
const BYTE* const ilimit = iend - 8;
|
|
376
399
|
U32 offset_1=rep[0], offset_2=rep[1];
|
|
377
400
|
|
|
401
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
|
|
402
|
+
|
|
403
|
+
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
|
404
|
+
if (prefixStartIndex == dictStartIndex)
|
|
405
|
+
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
|
406
|
+
|
|
378
407
|
/* Search Loop */
|
|
379
408
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
380
409
|
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
|
|
381
410
|
const U32 matchIndex = hashTable[h];
|
|
382
411
|
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
|
383
412
|
const BYTE* match = matchBase + matchIndex;
|
|
384
|
-
const U32
|
|
385
|
-
const U32 repIndex =
|
|
413
|
+
const U32 curr = (U32)(ip-base);
|
|
414
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
386
415
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
387
416
|
const BYTE* const repMatch = repBase + repIndex;
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
assert(offset_1 <=
|
|
417
|
+
hashTable[h] = curr; /* update hash table */
|
|
418
|
+
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
|
419
|
+
assert(offset_1 <= curr +1); /* check repIndex */
|
|
391
420
|
|
|
392
421
|
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
|
|
393
422
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
394
|
-
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
395
|
-
|
|
423
|
+
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
424
|
+
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
|
396
425
|
ip++;
|
|
397
|
-
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0,
|
|
426
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
|
|
427
|
+
ip += rLength;
|
|
428
|
+
anchor = ip;
|
|
398
429
|
} else {
|
|
399
430
|
if ( (matchIndex < dictStartIndex) ||
|
|
400
431
|
(MEM_read32(match) != MEM_read32(ip)) ) {
|
|
@@ -402,36 +433,32 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
|
402
433
|
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
|
403
434
|
continue;
|
|
404
435
|
}
|
|
405
|
-
{ const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
406
|
-
const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
407
|
-
U32 offset;
|
|
408
|
-
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
436
|
+
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
437
|
+
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
438
|
+
U32 const offset = curr - matchIndex;
|
|
439
|
+
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
409
440
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
441
|
+
offset_2 = offset_1; offset_1 = offset; /* update offset history */
|
|
442
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
|
443
|
+
ip += mLength;
|
|
444
|
+
anchor = ip;
|
|
414
445
|
} }
|
|
415
446
|
|
|
416
|
-
/* found a match : store it */
|
|
417
|
-
ip += mLength;
|
|
418
|
-
anchor = ip;
|
|
419
|
-
|
|
420
447
|
if (ip <= ilimit) {
|
|
421
448
|
/* Fill Table */
|
|
422
|
-
hashTable[ZSTD_hashPtr(base+
|
|
449
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
|
|
423
450
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
|
424
451
|
/* check immediate repcode */
|
|
425
452
|
while (ip <= ilimit) {
|
|
426
453
|
U32 const current2 = (U32)(ip-base);
|
|
427
454
|
U32 const repIndex2 = current2 - offset_2;
|
|
428
|
-
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
455
|
+
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
429
456
|
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
|
|
430
457
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
431
458
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
432
459
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
433
|
-
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;
|
|
434
|
-
ZSTD_storeSeq(seqStore, 0
|
|
460
|
+
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
|
|
461
|
+
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
|
|
435
462
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
|
436
463
|
ip += repLength2;
|
|
437
464
|
anchor = ip;
|
|
@@ -445,7 +472,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
|
445
472
|
rep[1] = offset_2;
|
|
446
473
|
|
|
447
474
|
/* Return the last literals size */
|
|
448
|
-
return iend - anchor;
|
|
475
|
+
return (size_t)(iend - anchor);
|
|
449
476
|
}
|
|
450
477
|
|
|
451
478
|
|
|
@@ -453,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
|
|
|
453
480
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
454
481
|
void const* src, size_t srcSize)
|
|
455
482
|
{
|
|
456
|
-
|
|
457
|
-
U32 const mls = cParams->minMatch;
|
|
483
|
+
U32 const mls = ms->cParams.minMatch;
|
|
458
484
|
switch(mls)
|
|
459
485
|
{
|
|
460
486
|
default: /* includes case 3 */
|