zstd-ruby 1.4.4.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,8 +11,43 @@
|
|
|
11
11
|
#include "zstd_compress_internal.h"
|
|
12
12
|
#include "zstd_double_fast.h"
|
|
13
13
|
|
|
14
|
+
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
|
15
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
16
|
+
{
|
|
17
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
18
|
+
U32* const hashLarge = ms->hashTable;
|
|
19
|
+
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
20
|
+
U32 const mls = cParams->minMatch;
|
|
21
|
+
U32* const hashSmall = ms->chainTable;
|
|
22
|
+
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
23
|
+
const BYTE* const base = ms->window.base;
|
|
24
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
|
25
|
+
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
26
|
+
const U32 fastHashFillStep = 3;
|
|
14
27
|
|
|
15
|
-
|
|
28
|
+
/* Always insert every fastHashFillStep position into the hash tables.
|
|
29
|
+
* Insert the other positions into the large hash table if their entry
|
|
30
|
+
* is empty.
|
|
31
|
+
*/
|
|
32
|
+
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
33
|
+
U32 const curr = (U32)(ip - base);
|
|
34
|
+
U32 i;
|
|
35
|
+
for (i = 0; i < fastHashFillStep; ++i) {
|
|
36
|
+
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
37
|
+
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
38
|
+
if (i == 0) {
|
|
39
|
+
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
|
40
|
+
}
|
|
41
|
+
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
|
42
|
+
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
|
43
|
+
}
|
|
44
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
|
45
|
+
if (dtlm == ZSTD_dtlm_fast)
|
|
46
|
+
break;
|
|
47
|
+
} }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
|
16
51
|
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
17
52
|
{
|
|
18
53
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -31,27 +66,249 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
|
31
66
|
* is empty.
|
|
32
67
|
*/
|
|
33
68
|
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
34
|
-
U32 const
|
|
69
|
+
U32 const curr = (U32)(ip - base);
|
|
35
70
|
U32 i;
|
|
36
71
|
for (i = 0; i < fastHashFillStep; ++i) {
|
|
37
72
|
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
38
73
|
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
39
74
|
if (i == 0)
|
|
40
|
-
hashSmall[smHash] =
|
|
75
|
+
hashSmall[smHash] = curr + i;
|
|
41
76
|
if (i == 0 || hashLarge[lgHash] == 0)
|
|
42
|
-
hashLarge[lgHash] =
|
|
77
|
+
hashLarge[lgHash] = curr + i;
|
|
43
78
|
/* Only load extra positions for ZSTD_dtlm_full */
|
|
44
79
|
if (dtlm == ZSTD_dtlm_fast)
|
|
45
80
|
break;
|
|
46
|
-
|
|
81
|
+
} }
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
85
|
+
const void* const end,
|
|
86
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
|
87
|
+
ZSTD_tableFillPurpose_e tfp)
|
|
88
|
+
{
|
|
89
|
+
if (tfp == ZSTD_tfp_forCDict) {
|
|
90
|
+
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
|
91
|
+
} else {
|
|
92
|
+
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
|
93
|
+
}
|
|
47
94
|
}
|
|
48
95
|
|
|
49
96
|
|
|
50
97
|
FORCE_INLINE_TEMPLATE
|
|
51
|
-
size_t
|
|
98
|
+
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
99
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
100
|
+
void const* src, size_t srcSize, U32 const mls /* template */)
|
|
101
|
+
{
|
|
102
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
103
|
+
U32* const hashLong = ms->hashTable;
|
|
104
|
+
const U32 hBitsL = cParams->hashLog;
|
|
105
|
+
U32* const hashSmall = ms->chainTable;
|
|
106
|
+
const U32 hBitsS = cParams->chainLog;
|
|
107
|
+
const BYTE* const base = ms->window.base;
|
|
108
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
109
|
+
const BYTE* anchor = istart;
|
|
110
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
111
|
+
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
|
112
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
|
113
|
+
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
114
|
+
const BYTE* const iend = istart + srcSize;
|
|
115
|
+
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
116
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
117
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
|
118
|
+
|
|
119
|
+
size_t mLength;
|
|
120
|
+
U32 offset;
|
|
121
|
+
U32 curr;
|
|
122
|
+
|
|
123
|
+
/* how many positions to search before increasing step size */
|
|
124
|
+
const size_t kStepIncr = 1 << kSearchStrength;
|
|
125
|
+
/* the position at which to increment the step size if no match is found */
|
|
126
|
+
const BYTE* nextStep;
|
|
127
|
+
size_t step; /* the current step size */
|
|
128
|
+
|
|
129
|
+
size_t hl0; /* the long hash at ip */
|
|
130
|
+
size_t hl1; /* the long hash at ip1 */
|
|
131
|
+
|
|
132
|
+
U32 idxl0; /* the long match index for ip */
|
|
133
|
+
U32 idxl1; /* the long match index for ip1 */
|
|
134
|
+
|
|
135
|
+
const BYTE* matchl0; /* the long match for ip */
|
|
136
|
+
const BYTE* matchs0; /* the short match for ip */
|
|
137
|
+
const BYTE* matchl1; /* the long match for ip1 */
|
|
138
|
+
|
|
139
|
+
const BYTE* ip = istart; /* the current position */
|
|
140
|
+
const BYTE* ip1; /* the next position */
|
|
141
|
+
|
|
142
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
|
143
|
+
|
|
144
|
+
/* init */
|
|
145
|
+
ip += ((ip - prefixLowest) == 0);
|
|
146
|
+
{
|
|
147
|
+
U32 const current = (U32)(ip - base);
|
|
148
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
|
149
|
+
U32 const maxRep = current - windowLow;
|
|
150
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
|
151
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/* Outer Loop: one iteration per match found and stored */
|
|
155
|
+
while (1) {
|
|
156
|
+
step = 1;
|
|
157
|
+
nextStep = ip + kStepIncr;
|
|
158
|
+
ip1 = ip + step;
|
|
159
|
+
|
|
160
|
+
if (ip1 > ilimit) {
|
|
161
|
+
goto _cleanup;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
165
|
+
idxl0 = hashLong[hl0];
|
|
166
|
+
matchl0 = base + idxl0;
|
|
167
|
+
|
|
168
|
+
/* Inner Loop: one iteration per search / position */
|
|
169
|
+
do {
|
|
170
|
+
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
171
|
+
const U32 idxs0 = hashSmall[hs0];
|
|
172
|
+
curr = (U32)(ip-base);
|
|
173
|
+
matchs0 = base + idxs0;
|
|
174
|
+
|
|
175
|
+
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
|
176
|
+
|
|
177
|
+
/* check noDict repcode */
|
|
178
|
+
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
|
179
|
+
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
180
|
+
ip++;
|
|
181
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
182
|
+
goto _match_stored;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
|
186
|
+
|
|
187
|
+
if (idxl0 > prefixLowestIndex) {
|
|
188
|
+
/* check prefix long match */
|
|
189
|
+
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
|
190
|
+
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
|
191
|
+
offset = (U32)(ip-matchl0);
|
|
192
|
+
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
|
193
|
+
goto _match_found;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
idxl1 = hashLong[hl1];
|
|
198
|
+
matchl1 = base + idxl1;
|
|
199
|
+
|
|
200
|
+
if (idxs0 > prefixLowestIndex) {
|
|
201
|
+
/* check prefix short match */
|
|
202
|
+
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
|
203
|
+
goto _search_next_long;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (ip1 >= nextStep) {
|
|
208
|
+
PREFETCH_L1(ip1 + 64);
|
|
209
|
+
PREFETCH_L1(ip1 + 128);
|
|
210
|
+
step++;
|
|
211
|
+
nextStep += kStepIncr;
|
|
212
|
+
}
|
|
213
|
+
ip = ip1;
|
|
214
|
+
ip1 += step;
|
|
215
|
+
|
|
216
|
+
hl0 = hl1;
|
|
217
|
+
idxl0 = idxl1;
|
|
218
|
+
matchl0 = matchl1;
|
|
219
|
+
#if defined(__aarch64__)
|
|
220
|
+
PREFETCH_L1(ip+256);
|
|
221
|
+
#endif
|
|
222
|
+
} while (ip1 <= ilimit);
|
|
223
|
+
|
|
224
|
+
_cleanup:
|
|
225
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
|
226
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
|
227
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
|
228
|
+
|
|
229
|
+
/* save reps for next block */
|
|
230
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
|
231
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
|
232
|
+
|
|
233
|
+
/* Return the last literals size */
|
|
234
|
+
return (size_t)(iend - anchor);
|
|
235
|
+
|
|
236
|
+
_search_next_long:
|
|
237
|
+
|
|
238
|
+
/* check prefix long +1 match */
|
|
239
|
+
if (idxl1 > prefixLowestIndex) {
|
|
240
|
+
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
|
241
|
+
ip = ip1;
|
|
242
|
+
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
|
243
|
+
offset = (U32)(ip-matchl1);
|
|
244
|
+
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
|
245
|
+
goto _match_found;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* if no long +1 match, explore the short match we found */
|
|
250
|
+
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
|
251
|
+
offset = (U32)(ip - matchs0);
|
|
252
|
+
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
|
253
|
+
|
|
254
|
+
/* fall-through */
|
|
255
|
+
|
|
256
|
+
_match_found: /* requires ip, offset, mLength */
|
|
257
|
+
offset_2 = offset_1;
|
|
258
|
+
offset_1 = offset;
|
|
259
|
+
|
|
260
|
+
if (step < 4) {
|
|
261
|
+
/* It is unsafe to write this value back to the hashtable when ip1 is
|
|
262
|
+
* greater than or equal to the new ip we will have after we're done
|
|
263
|
+
* processing this match. Rather than perform that test directly
|
|
264
|
+
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
|
265
|
+
* more predictable test. The minmatch even if we take a short match is
|
|
266
|
+
* 4 bytes, so as long as step, the distance between ip and ip1
|
|
267
|
+
* (initially) is less than 4, we know ip1 < new ip. */
|
|
268
|
+
hashLong[hl1] = (U32)(ip1 - base);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
272
|
+
|
|
273
|
+
_match_stored:
|
|
274
|
+
/* match found */
|
|
275
|
+
ip += mLength;
|
|
276
|
+
anchor = ip;
|
|
277
|
+
|
|
278
|
+
if (ip <= ilimit) {
|
|
279
|
+
/* Complementary insertion */
|
|
280
|
+
/* done after iLimit test, as candidates could be > iend-8 */
|
|
281
|
+
{ U32 const indexToInsert = curr+2;
|
|
282
|
+
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
283
|
+
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
284
|
+
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
285
|
+
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* check immediate repcode */
|
|
289
|
+
while ( (ip <= ilimit)
|
|
290
|
+
&& ( (offset_2>0)
|
|
291
|
+
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
|
292
|
+
/* store sequence */
|
|
293
|
+
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
294
|
+
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
295
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
296
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
297
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
|
298
|
+
ip += rLength;
|
|
299
|
+
anchor = ip;
|
|
300
|
+
continue; /* faster when present ... (?) */
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
FORCE_INLINE_TEMPLATE
|
|
308
|
+
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
52
309
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
53
310
|
void const* src, size_t srcSize,
|
|
54
|
-
U32 const mls /* template
|
|
311
|
+
U32 const mls /* template */)
|
|
55
312
|
{
|
|
56
313
|
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
57
314
|
U32* const hashLong = ms->hashTable;
|
|
@@ -63,63 +320,45 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
63
320
|
const BYTE* ip = istart;
|
|
64
321
|
const BYTE* anchor = istart;
|
|
65
322
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
66
|
-
const U32 lowestValid = ms->window.dictLimit;
|
|
67
|
-
const U32 maxDistance = 1U << cParams->windowLog;
|
|
68
323
|
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
|
69
|
-
const U32 prefixLowestIndex = (endIndex
|
|
324
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
|
70
325
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
71
326
|
const BYTE* const iend = istart + srcSize;
|
|
72
327
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
73
328
|
U32 offset_1=rep[0], offset_2=rep[1];
|
|
74
|
-
U32 offsetSaved = 0;
|
|
75
329
|
|
|
76
330
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
|
77
|
-
const ZSTD_compressionParameters* const dictCParams =
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
const U32
|
|
81
|
-
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
const U32
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
|
91
|
-
dms->window.nextSrc : NULL;
|
|
92
|
-
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
|
93
|
-
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
|
94
|
-
0;
|
|
95
|
-
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
|
96
|
-
dictCParams->hashLog : hBitsL;
|
|
97
|
-
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
|
98
|
-
dictCParams->chainLog : hBitsS;
|
|
99
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
|
100
|
-
|
|
101
|
-
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
|
102
|
-
|
|
103
|
-
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
|
331
|
+
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
|
332
|
+
const U32* const dictHashLong = dms->hashTable;
|
|
333
|
+
const U32* const dictHashSmall = dms->chainTable;
|
|
334
|
+
const U32 dictStartIndex = dms->window.dictLimit;
|
|
335
|
+
const BYTE* const dictBase = dms->window.base;
|
|
336
|
+
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
337
|
+
const BYTE* const dictEnd = dms->window.nextSrc;
|
|
338
|
+
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
|
339
|
+
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
340
|
+
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
341
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
|
342
|
+
|
|
343
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
|
104
344
|
|
|
105
345
|
/* if a dictionary is attached, it must be within window range */
|
|
106
|
-
|
|
107
|
-
|
|
346
|
+
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
|
347
|
+
|
|
348
|
+
if (ms->prefetchCDictTables) {
|
|
349
|
+
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
|
350
|
+
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
|
351
|
+
PREFETCH_AREA(dictHashLong, hashTableBytes)
|
|
352
|
+
PREFETCH_AREA(dictHashSmall, chainTableBytes)
|
|
108
353
|
}
|
|
109
354
|
|
|
110
355
|
/* init */
|
|
111
356
|
ip += (dictAndPrefixLength == 0);
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if (dictMode == ZSTD_dictMatchState) {
|
|
118
|
-
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
119
|
-
* disabling. */
|
|
120
|
-
assert(offset_1 <= dictAndPrefixLength);
|
|
121
|
-
assert(offset_2 <= dictAndPrefixLength);
|
|
122
|
-
}
|
|
357
|
+
|
|
358
|
+
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
359
|
+
* disabling. */
|
|
360
|
+
assert(offset_1 <= dictAndPrefixLength);
|
|
361
|
+
assert(offset_2 <= dictAndPrefixLength);
|
|
123
362
|
|
|
124
363
|
/* Main Search Loop */
|
|
125
364
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
@@ -127,37 +366,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
127
366
|
U32 offset;
|
|
128
367
|
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
129
368
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
130
|
-
size_t const
|
|
131
|
-
size_t const
|
|
132
|
-
U32 const
|
|
369
|
+
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
|
370
|
+
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
|
371
|
+
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
372
|
+
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
373
|
+
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
|
374
|
+
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
|
375
|
+
U32 const curr = (U32)(ip-base);
|
|
133
376
|
U32 const matchIndexL = hashLong[h2];
|
|
134
377
|
U32 matchIndexS = hashSmall[h];
|
|
135
378
|
const BYTE* matchLong = base + matchIndexL;
|
|
136
379
|
const BYTE* match = base + matchIndexS;
|
|
137
|
-
const U32 repIndex =
|
|
138
|
-
const BYTE* repMatch = (
|
|
139
|
-
&& repIndex < prefixLowestIndex) ?
|
|
380
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
381
|
+
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
|
140
382
|
dictBase + (repIndex - dictIndexDelta) :
|
|
141
383
|
base + repIndex;
|
|
142
|
-
hashLong[h2] = hashSmall[h] =
|
|
384
|
+
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
|
143
385
|
|
|
144
|
-
/* check
|
|
145
|
-
if (
|
|
146
|
-
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
|
386
|
+
/* check repcode */
|
|
387
|
+
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
|
147
388
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
148
389
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
149
390
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
150
391
|
ip++;
|
|
151
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
|
152
|
-
goto _match_stored;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/* check noDict repcode */
|
|
156
|
-
if ( dictMode == ZSTD_noDict
|
|
157
|
-
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
|
158
|
-
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
159
|
-
ip++;
|
|
160
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
392
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
161
393
|
goto _match_stored;
|
|
162
394
|
}
|
|
163
395
|
|
|
@@ -169,15 +401,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
169
401
|
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
170
402
|
goto _match_found;
|
|
171
403
|
}
|
|
172
|
-
} else if (
|
|
404
|
+
} else if (dictTagsMatchL) {
|
|
173
405
|
/* check dictMatchState long match */
|
|
174
|
-
U32 const dictMatchIndexL =
|
|
406
|
+
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
175
407
|
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
|
176
408
|
assert(dictMatchL < dictEnd);
|
|
177
409
|
|
|
178
410
|
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
|
179
411
|
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
|
180
|
-
offset = (U32)(
|
|
412
|
+
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
|
181
413
|
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
|
182
414
|
goto _match_found;
|
|
183
415
|
} }
|
|
@@ -187,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
187
419
|
if (MEM_read32(match) == MEM_read32(ip)) {
|
|
188
420
|
goto _search_next_long;
|
|
189
421
|
}
|
|
190
|
-
} else if (
|
|
422
|
+
} else if (dictTagsMatchS) {
|
|
191
423
|
/* check dictMatchState short match */
|
|
192
|
-
U32 const dictMatchIndexS =
|
|
424
|
+
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
193
425
|
match = dictBase + dictMatchIndexS;
|
|
194
426
|
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
|
195
427
|
|
|
@@ -198,15 +430,19 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
198
430
|
} }
|
|
199
431
|
|
|
200
432
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
433
|
+
#if defined(__aarch64__)
|
|
434
|
+
PREFETCH_L1(ip+256);
|
|
435
|
+
#endif
|
|
201
436
|
continue;
|
|
202
437
|
|
|
203
438
|
_search_next_long:
|
|
204
|
-
|
|
205
439
|
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
206
|
-
size_t const
|
|
440
|
+
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
|
207
441
|
U32 const matchIndexL3 = hashLong[hl3];
|
|
442
|
+
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
443
|
+
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
|
208
444
|
const BYTE* matchL3 = base + matchIndexL3;
|
|
209
|
-
hashLong[hl3] =
|
|
445
|
+
hashLong[hl3] = curr + 1;
|
|
210
446
|
|
|
211
447
|
/* check prefix long +1 match */
|
|
212
448
|
if (matchIndexL3 > prefixLowestIndex) {
|
|
@@ -217,23 +453,23 @@ _search_next_long:
|
|
|
217
453
|
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
|
218
454
|
goto _match_found;
|
|
219
455
|
}
|
|
220
|
-
} else if (
|
|
456
|
+
} else if (dictTagsMatchL3) {
|
|
221
457
|
/* check dict long +1 match */
|
|
222
|
-
U32 const dictMatchIndexL3 =
|
|
458
|
+
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
223
459
|
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
|
224
460
|
assert(dictMatchL3 < dictEnd);
|
|
225
461
|
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
|
226
462
|
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
|
227
463
|
ip++;
|
|
228
|
-
offset = (U32)(
|
|
464
|
+
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
|
229
465
|
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
|
230
466
|
goto _match_found;
|
|
231
467
|
} } }
|
|
232
468
|
|
|
233
469
|
/* if no long +1 match, explore the short match we found */
|
|
234
|
-
if (
|
|
470
|
+
if (matchIndexS < prefixLowestIndex) {
|
|
235
471
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
|
236
|
-
offset = (U32)(
|
|
472
|
+
offset = (U32)(curr - matchIndexS);
|
|
237
473
|
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
238
474
|
} else {
|
|
239
475
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
|
@@ -241,13 +477,11 @@ _search_next_long:
|
|
|
241
477
|
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
242
478
|
}
|
|
243
479
|
|
|
244
|
-
/* fall-through */
|
|
245
|
-
|
|
246
480
|
_match_found:
|
|
247
481
|
offset_2 = offset_1;
|
|
248
482
|
offset_1 = offset;
|
|
249
483
|
|
|
250
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
484
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
251
485
|
|
|
252
486
|
_match_stored:
|
|
253
487
|
/* match found */
|
|
@@ -257,7 +491,7 @@ _match_stored:
|
|
|
257
491
|
if (ip <= ilimit) {
|
|
258
492
|
/* Complementary insertion */
|
|
259
493
|
/* done after iLimit test, as candidates could be > iend-8 */
|
|
260
|
-
{ U32 const indexToInsert =
|
|
494
|
+
{ U32 const indexToInsert = curr+2;
|
|
261
495
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
262
496
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
263
497
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
@@ -265,53 +499,55 @@ _match_stored:
|
|
|
265
499
|
}
|
|
266
500
|
|
|
267
501
|
/* check immediate repcode */
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
284
|
-
ip += repLength2;
|
|
285
|
-
anchor = ip;
|
|
286
|
-
continue;
|
|
287
|
-
}
|
|
288
|
-
break;
|
|
289
|
-
} }
|
|
290
|
-
|
|
291
|
-
if (dictMode == ZSTD_noDict) {
|
|
292
|
-
while ( (ip <= ilimit)
|
|
293
|
-
&& ( (offset_2>0)
|
|
294
|
-
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
|
295
|
-
/* store sequence */
|
|
296
|
-
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
297
|
-
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
298
|
-
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
299
|
-
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
300
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
|
301
|
-
ip += rLength;
|
|
502
|
+
while (ip <= ilimit) {
|
|
503
|
+
U32 const current2 = (U32)(ip-base);
|
|
504
|
+
U32 const repIndex2 = current2 - offset_2;
|
|
505
|
+
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
|
506
|
+
dictBase + repIndex2 - dictIndexDelta :
|
|
507
|
+
base + repIndex2;
|
|
508
|
+
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
|
509
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
510
|
+
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
|
511
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
|
512
|
+
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
513
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
514
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
515
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
516
|
+
ip += repLength2;
|
|
302
517
|
anchor = ip;
|
|
303
|
-
continue;
|
|
304
|
-
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
break;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
305
523
|
} /* while (ip < ilimit) */
|
|
306
524
|
|
|
307
525
|
/* save reps for next block */
|
|
308
|
-
rep[0] = offset_1
|
|
309
|
-
rep[1] = offset_2
|
|
526
|
+
rep[0] = offset_1;
|
|
527
|
+
rep[1] = offset_2;
|
|
310
528
|
|
|
311
529
|
/* Return the last literals size */
|
|
312
530
|
return (size_t)(iend - anchor);
|
|
313
531
|
}
|
|
314
532
|
|
|
533
|
+
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
|
534
|
+
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
|
535
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
|
536
|
+
void const* src, size_t srcSize) \
|
|
537
|
+
{ \
|
|
538
|
+
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
ZSTD_GEN_DFAST_FN(noDict, 4)
|
|
542
|
+
ZSTD_GEN_DFAST_FN(noDict, 5)
|
|
543
|
+
ZSTD_GEN_DFAST_FN(noDict, 6)
|
|
544
|
+
ZSTD_GEN_DFAST_FN(noDict, 7)
|
|
545
|
+
|
|
546
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
|
547
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
|
548
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
|
549
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
|
550
|
+
|
|
315
551
|
|
|
316
552
|
size_t ZSTD_compressBlock_doubleFast(
|
|
317
553
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -322,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
|
|
322
558
|
{
|
|
323
559
|
default: /* includes case 3 */
|
|
324
560
|
case 4 :
|
|
325
|
-
return
|
|
561
|
+
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
|
326
562
|
case 5 :
|
|
327
|
-
return
|
|
563
|
+
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
|
328
564
|
case 6 :
|
|
329
|
-
return
|
|
565
|
+
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
|
330
566
|
case 7 :
|
|
331
|
-
return
|
|
567
|
+
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
|
332
568
|
}
|
|
333
569
|
}
|
|
334
570
|
|
|
@@ -342,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
|
|
342
578
|
{
|
|
343
579
|
default: /* includes case 3 */
|
|
344
580
|
case 4 :
|
|
345
|
-
return
|
|
581
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
|
346
582
|
case 5 :
|
|
347
|
-
return
|
|
583
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
|
348
584
|
case 6 :
|
|
349
|
-
return
|
|
585
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
|
350
586
|
case 7 :
|
|
351
|
-
return
|
|
587
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
|
352
588
|
}
|
|
353
589
|
}
|
|
354
590
|
|
|
@@ -384,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
384
620
|
|
|
385
621
|
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
|
386
622
|
if (prefixStartIndex == dictStartIndex)
|
|
387
|
-
return
|
|
623
|
+
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
|
388
624
|
|
|
389
625
|
/* Search Loop */
|
|
390
626
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
@@ -398,31 +634,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
398
634
|
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
|
399
635
|
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
|
400
636
|
|
|
401
|
-
const U32
|
|
402
|
-
const U32 repIndex =
|
|
637
|
+
const U32 curr = (U32)(ip-base);
|
|
638
|
+
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
|
403
639
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
404
640
|
const BYTE* const repMatch = repBase + repIndex;
|
|
405
641
|
size_t mLength;
|
|
406
|
-
hashSmall[hSmall] = hashLong[hLong] =
|
|
642
|
+
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
|
407
643
|
|
|
408
644
|
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
|
409
|
-
& (
|
|
645
|
+
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
|
410
646
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
411
647
|
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
412
648
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
413
649
|
ip++;
|
|
414
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
|
650
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
415
651
|
} else {
|
|
416
652
|
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
|
417
653
|
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
|
418
654
|
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
419
655
|
U32 offset;
|
|
420
656
|
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
|
421
|
-
offset =
|
|
657
|
+
offset = curr - matchLongIndex;
|
|
422
658
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
423
659
|
offset_2 = offset_1;
|
|
424
660
|
offset_1 = offset;
|
|
425
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
661
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
426
662
|
|
|
427
663
|
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
|
428
664
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
@@ -430,24 +666,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
430
666
|
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
|
431
667
|
const BYTE* match3 = match3Base + matchIndex3;
|
|
432
668
|
U32 offset;
|
|
433
|
-
hashLong[h3] =
|
|
669
|
+
hashLong[h3] = curr + 1;
|
|
434
670
|
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
|
435
671
|
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
|
436
672
|
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
|
437
673
|
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
|
438
674
|
ip++;
|
|
439
|
-
offset =
|
|
675
|
+
offset = curr+1 - matchIndex3;
|
|
440
676
|
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
|
441
677
|
} else {
|
|
442
678
|
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
443
679
|
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
444
680
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
445
|
-
offset =
|
|
681
|
+
offset = curr - matchIndex;
|
|
446
682
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
447
683
|
}
|
|
448
684
|
offset_2 = offset_1;
|
|
449
685
|
offset_1 = offset;
|
|
450
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
686
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
451
687
|
|
|
452
688
|
} else {
|
|
453
689
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
@@ -461,7 +697,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
461
697
|
if (ip <= ilimit) {
|
|
462
698
|
/* Complementary insertion */
|
|
463
699
|
/* done after iLimit test, as candidates could be > iend-8 */
|
|
464
|
-
{ U32 const indexToInsert =
|
|
700
|
+
{ U32 const indexToInsert = curr+2;
|
|
465
701
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
466
702
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
467
703
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
@@ -474,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
474
710
|
U32 const repIndex2 = current2 - offset_2;
|
|
475
711
|
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
476
712
|
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
|
477
|
-
& (
|
|
713
|
+
& (offset_2 <= current2 - dictStartIndex))
|
|
478
714
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
479
715
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
480
716
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
481
717
|
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
482
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
718
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
483
719
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
484
720
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
485
721
|
ip += repLength2;
|
|
@@ -497,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
497
733
|
return (size_t)(iend - anchor);
|
|
498
734
|
}
|
|
499
735
|
|
|
736
|
+
ZSTD_GEN_DFAST_FN(extDict, 4)
|
|
737
|
+
ZSTD_GEN_DFAST_FN(extDict, 5)
|
|
738
|
+
ZSTD_GEN_DFAST_FN(extDict, 6)
|
|
739
|
+
ZSTD_GEN_DFAST_FN(extDict, 7)
|
|
500
740
|
|
|
501
741
|
size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
502
742
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -507,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
|
507
747
|
{
|
|
508
748
|
default: /* includes case 3 */
|
|
509
749
|
case 4 :
|
|
510
|
-
return
|
|
750
|
+
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
|
511
751
|
case 5 :
|
|
512
|
-
return
|
|
752
|
+
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
|
513
753
|
case 6 :
|
|
514
|
-
return
|
|
754
|
+
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
|
515
755
|
case 7 :
|
|
516
|
-
return
|
|
756
|
+
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
|
517
757
|
}
|
|
518
758
|
}
|