zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,8 +11,43 @@
|
|
|
11
11
|
#include "zstd_compress_internal.h"
|
|
12
12
|
#include "zstd_double_fast.h"
|
|
13
13
|
|
|
14
|
+
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
|
15
|
+
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
16
|
+
{
|
|
17
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
18
|
+
U32* const hashLarge = ms->hashTable;
|
|
19
|
+
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
20
|
+
U32 const mls = cParams->minMatch;
|
|
21
|
+
U32* const hashSmall = ms->chainTable;
|
|
22
|
+
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
23
|
+
const BYTE* const base = ms->window.base;
|
|
24
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
|
25
|
+
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
26
|
+
const U32 fastHashFillStep = 3;
|
|
14
27
|
|
|
15
|
-
|
|
28
|
+
/* Always insert every fastHashFillStep position into the hash tables.
|
|
29
|
+
* Insert the other positions into the large hash table if their entry
|
|
30
|
+
* is empty.
|
|
31
|
+
*/
|
|
32
|
+
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
33
|
+
U32 const curr = (U32)(ip - base);
|
|
34
|
+
U32 i;
|
|
35
|
+
for (i = 0; i < fastHashFillStep; ++i) {
|
|
36
|
+
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
37
|
+
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
38
|
+
if (i == 0) {
|
|
39
|
+
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
|
40
|
+
}
|
|
41
|
+
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
|
42
|
+
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
|
43
|
+
}
|
|
44
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
|
45
|
+
if (dtlm == ZSTD_dtlm_fast)
|
|
46
|
+
break;
|
|
47
|
+
} }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
|
16
51
|
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
|
17
52
|
{
|
|
18
53
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -31,27 +66,249 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
|
31
66
|
* is empty.
|
|
32
67
|
*/
|
|
33
68
|
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
|
34
|
-
U32 const
|
|
69
|
+
U32 const curr = (U32)(ip - base);
|
|
35
70
|
U32 i;
|
|
36
71
|
for (i = 0; i < fastHashFillStep; ++i) {
|
|
37
72
|
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
|
38
73
|
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
|
39
74
|
if (i == 0)
|
|
40
|
-
hashSmall[smHash] =
|
|
75
|
+
hashSmall[smHash] = curr + i;
|
|
41
76
|
if (i == 0 || hashLarge[lgHash] == 0)
|
|
42
|
-
hashLarge[lgHash] =
|
|
77
|
+
hashLarge[lgHash] = curr + i;
|
|
43
78
|
/* Only load extra positions for ZSTD_dtlm_full */
|
|
44
79
|
if (dtlm == ZSTD_dtlm_fast)
|
|
45
80
|
break;
|
|
46
|
-
|
|
81
|
+
} }
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
|
85
|
+
const void* const end,
|
|
86
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
|
87
|
+
ZSTD_tableFillPurpose_e tfp)
|
|
88
|
+
{
|
|
89
|
+
if (tfp == ZSTD_tfp_forCDict) {
|
|
90
|
+
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
|
91
|
+
} else {
|
|
92
|
+
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
FORCE_INLINE_TEMPLATE
|
|
98
|
+
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|
99
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
100
|
+
void const* src, size_t srcSize, U32 const mls /* template */)
|
|
101
|
+
{
|
|
102
|
+
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
103
|
+
U32* const hashLong = ms->hashTable;
|
|
104
|
+
const U32 hBitsL = cParams->hashLog;
|
|
105
|
+
U32* const hashSmall = ms->chainTable;
|
|
106
|
+
const U32 hBitsS = cParams->chainLog;
|
|
107
|
+
const BYTE* const base = ms->window.base;
|
|
108
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
109
|
+
const BYTE* anchor = istart;
|
|
110
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
111
|
+
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
|
112
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
|
113
|
+
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
114
|
+
const BYTE* const iend = istart + srcSize;
|
|
115
|
+
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
116
|
+
U32 offset_1=rep[0], offset_2=rep[1];
|
|
117
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
|
118
|
+
|
|
119
|
+
size_t mLength;
|
|
120
|
+
U32 offset;
|
|
121
|
+
U32 curr;
|
|
122
|
+
|
|
123
|
+
/* how many positions to search before increasing step size */
|
|
124
|
+
const size_t kStepIncr = 1 << kSearchStrength;
|
|
125
|
+
/* the position at which to increment the step size if no match is found */
|
|
126
|
+
const BYTE* nextStep;
|
|
127
|
+
size_t step; /* the current step size */
|
|
128
|
+
|
|
129
|
+
size_t hl0; /* the long hash at ip */
|
|
130
|
+
size_t hl1; /* the long hash at ip1 */
|
|
131
|
+
|
|
132
|
+
U32 idxl0; /* the long match index for ip */
|
|
133
|
+
U32 idxl1; /* the long match index for ip1 */
|
|
134
|
+
|
|
135
|
+
const BYTE* matchl0; /* the long match for ip */
|
|
136
|
+
const BYTE* matchs0; /* the short match for ip */
|
|
137
|
+
const BYTE* matchl1; /* the long match for ip1 */
|
|
138
|
+
|
|
139
|
+
const BYTE* ip = istart; /* the current position */
|
|
140
|
+
const BYTE* ip1; /* the next position */
|
|
141
|
+
|
|
142
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
|
143
|
+
|
|
144
|
+
/* init */
|
|
145
|
+
ip += ((ip - prefixLowest) == 0);
|
|
146
|
+
{
|
|
147
|
+
U32 const current = (U32)(ip - base);
|
|
148
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
|
149
|
+
U32 const maxRep = current - windowLow;
|
|
150
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
|
151
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/* Outer Loop: one iteration per match found and stored */
|
|
155
|
+
while (1) {
|
|
156
|
+
step = 1;
|
|
157
|
+
nextStep = ip + kStepIncr;
|
|
158
|
+
ip1 = ip + step;
|
|
159
|
+
|
|
160
|
+
if (ip1 > ilimit) {
|
|
161
|
+
goto _cleanup;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
165
|
+
idxl0 = hashLong[hl0];
|
|
166
|
+
matchl0 = base + idxl0;
|
|
167
|
+
|
|
168
|
+
/* Inner Loop: one iteration per search / position */
|
|
169
|
+
do {
|
|
170
|
+
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
171
|
+
const U32 idxs0 = hashSmall[hs0];
|
|
172
|
+
curr = (U32)(ip-base);
|
|
173
|
+
matchs0 = base + idxs0;
|
|
174
|
+
|
|
175
|
+
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
|
176
|
+
|
|
177
|
+
/* check noDict repcode */
|
|
178
|
+
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
|
179
|
+
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
180
|
+
ip++;
|
|
181
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
182
|
+
goto _match_stored;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
|
186
|
+
|
|
187
|
+
if (idxl0 > prefixLowestIndex) {
|
|
188
|
+
/* check prefix long match */
|
|
189
|
+
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
|
190
|
+
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
|
191
|
+
offset = (U32)(ip-matchl0);
|
|
192
|
+
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
|
193
|
+
goto _match_found;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
idxl1 = hashLong[hl1];
|
|
198
|
+
matchl1 = base + idxl1;
|
|
199
|
+
|
|
200
|
+
if (idxs0 > prefixLowestIndex) {
|
|
201
|
+
/* check prefix short match */
|
|
202
|
+
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
|
203
|
+
goto _search_next_long;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (ip1 >= nextStep) {
|
|
208
|
+
PREFETCH_L1(ip1 + 64);
|
|
209
|
+
PREFETCH_L1(ip1 + 128);
|
|
210
|
+
step++;
|
|
211
|
+
nextStep += kStepIncr;
|
|
212
|
+
}
|
|
213
|
+
ip = ip1;
|
|
214
|
+
ip1 += step;
|
|
215
|
+
|
|
216
|
+
hl0 = hl1;
|
|
217
|
+
idxl0 = idxl1;
|
|
218
|
+
matchl0 = matchl1;
|
|
219
|
+
#if defined(__aarch64__)
|
|
220
|
+
PREFETCH_L1(ip+256);
|
|
221
|
+
#endif
|
|
222
|
+
} while (ip1 <= ilimit);
|
|
223
|
+
|
|
224
|
+
_cleanup:
|
|
225
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
|
226
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
|
227
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
|
228
|
+
|
|
229
|
+
/* save reps for next block */
|
|
230
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
|
231
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
|
232
|
+
|
|
233
|
+
/* Return the last literals size */
|
|
234
|
+
return (size_t)(iend - anchor);
|
|
235
|
+
|
|
236
|
+
_search_next_long:
|
|
237
|
+
|
|
238
|
+
/* check prefix long +1 match */
|
|
239
|
+
if (idxl1 > prefixLowestIndex) {
|
|
240
|
+
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
|
241
|
+
ip = ip1;
|
|
242
|
+
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
|
243
|
+
offset = (U32)(ip-matchl1);
|
|
244
|
+
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
|
245
|
+
goto _match_found;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* if no long +1 match, explore the short match we found */
|
|
250
|
+
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
|
251
|
+
offset = (U32)(ip - matchs0);
|
|
252
|
+
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
|
253
|
+
|
|
254
|
+
/* fall-through */
|
|
255
|
+
|
|
256
|
+
_match_found: /* requires ip, offset, mLength */
|
|
257
|
+
offset_2 = offset_1;
|
|
258
|
+
offset_1 = offset;
|
|
259
|
+
|
|
260
|
+
if (step < 4) {
|
|
261
|
+
/* It is unsafe to write this value back to the hashtable when ip1 is
|
|
262
|
+
* greater than or equal to the new ip we will have after we're done
|
|
263
|
+
* processing this match. Rather than perform that test directly
|
|
264
|
+
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
|
265
|
+
* more predictable test. The minmatch even if we take a short match is
|
|
266
|
+
* 4 bytes, so as long as step, the distance between ip and ip1
|
|
267
|
+
* (initially) is less than 4, we know ip1 < new ip. */
|
|
268
|
+
hashLong[hl1] = (U32)(ip1 - base);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
272
|
+
|
|
273
|
+
_match_stored:
|
|
274
|
+
/* match found */
|
|
275
|
+
ip += mLength;
|
|
276
|
+
anchor = ip;
|
|
277
|
+
|
|
278
|
+
if (ip <= ilimit) {
|
|
279
|
+
/* Complementary insertion */
|
|
280
|
+
/* done after iLimit test, as candidates could be > iend-8 */
|
|
281
|
+
{ U32 const indexToInsert = curr+2;
|
|
282
|
+
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
283
|
+
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
284
|
+
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
285
|
+
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/* check immediate repcode */
|
|
289
|
+
while ( (ip <= ilimit)
|
|
290
|
+
&& ( (offset_2>0)
|
|
291
|
+
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
|
292
|
+
/* store sequence */
|
|
293
|
+
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
294
|
+
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
295
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
296
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
297
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
|
298
|
+
ip += rLength;
|
|
299
|
+
anchor = ip;
|
|
300
|
+
continue; /* faster when present ... (?) */
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
47
304
|
}
|
|
48
305
|
|
|
49
306
|
|
|
50
307
|
FORCE_INLINE_TEMPLATE
|
|
51
|
-
size_t
|
|
308
|
+
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|
52
309
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
53
310
|
void const* src, size_t srcSize,
|
|
54
|
-
U32 const mls /* template
|
|
311
|
+
U32 const mls /* template */)
|
|
55
312
|
{
|
|
56
313
|
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
|
57
314
|
U32* const hashLong = ms->hashTable;
|
|
@@ -69,57 +326,39 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
69
326
|
const BYTE* const iend = istart + srcSize;
|
|
70
327
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
71
328
|
U32 offset_1=rep[0], offset_2=rep[1];
|
|
72
|
-
U32 offsetSaved = 0;
|
|
73
329
|
|
|
74
330
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
|
75
|
-
const ZSTD_compressionParameters* const dictCParams =
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
const U32
|
|
79
|
-
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
const U32
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
dms->window.base : NULL;
|
|
86
|
-
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
|
87
|
-
dictBase + dictStartIndex : NULL;
|
|
88
|
-
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
|
89
|
-
dms->window.nextSrc : NULL;
|
|
90
|
-
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
|
91
|
-
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
|
92
|
-
0;
|
|
93
|
-
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
|
94
|
-
dictCParams->hashLog : hBitsL;
|
|
95
|
-
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
|
96
|
-
dictCParams->chainLog : hBitsS;
|
|
331
|
+
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
|
332
|
+
const U32* const dictHashLong = dms->hashTable;
|
|
333
|
+
const U32* const dictHashSmall = dms->chainTable;
|
|
334
|
+
const U32 dictStartIndex = dms->window.dictLimit;
|
|
335
|
+
const BYTE* const dictBase = dms->window.base;
|
|
336
|
+
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
337
|
+
const BYTE* const dictEnd = dms->window.nextSrc;
|
|
338
|
+
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
|
339
|
+
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
340
|
+
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
97
341
|
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
|
98
342
|
|
|
99
|
-
DEBUGLOG(5, "
|
|
100
|
-
|
|
101
|
-
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
|
343
|
+
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
|
102
344
|
|
|
103
345
|
/* if a dictionary is attached, it must be within window range */
|
|
104
|
-
|
|
105
|
-
|
|
346
|
+
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
|
347
|
+
|
|
348
|
+
if (ms->prefetchCDictTables) {
|
|
349
|
+
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
|
350
|
+
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
|
351
|
+
PREFETCH_AREA(dictHashLong, hashTableBytes)
|
|
352
|
+
PREFETCH_AREA(dictHashSmall, chainTableBytes)
|
|
106
353
|
}
|
|
107
354
|
|
|
108
355
|
/* init */
|
|
109
356
|
ip += (dictAndPrefixLength == 0);
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
|
116
|
-
}
|
|
117
|
-
if (dictMode == ZSTD_dictMatchState) {
|
|
118
|
-
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
119
|
-
* disabling. */
|
|
120
|
-
assert(offset_1 <= dictAndPrefixLength);
|
|
121
|
-
assert(offset_2 <= dictAndPrefixLength);
|
|
122
|
-
}
|
|
357
|
+
|
|
358
|
+
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
359
|
+
* disabling. */
|
|
360
|
+
assert(offset_1 <= dictAndPrefixLength);
|
|
361
|
+
assert(offset_2 <= dictAndPrefixLength);
|
|
123
362
|
|
|
124
363
|
/* Main Search Loop */
|
|
125
364
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
|
@@ -127,37 +366,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
127
366
|
U32 offset;
|
|
128
367
|
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
|
129
368
|
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
|
130
|
-
size_t const
|
|
131
|
-
size_t const
|
|
132
|
-
U32 const
|
|
369
|
+
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
|
370
|
+
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
|
371
|
+
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
372
|
+
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
373
|
+
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
|
374
|
+
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
|
375
|
+
U32 const curr = (U32)(ip-base);
|
|
133
376
|
U32 const matchIndexL = hashLong[h2];
|
|
134
377
|
U32 matchIndexS = hashSmall[h];
|
|
135
378
|
const BYTE* matchLong = base + matchIndexL;
|
|
136
379
|
const BYTE* match = base + matchIndexS;
|
|
137
|
-
const U32 repIndex =
|
|
138
|
-
const BYTE* repMatch = (
|
|
139
|
-
&& repIndex < prefixLowestIndex) ?
|
|
380
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
381
|
+
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
|
140
382
|
dictBase + (repIndex - dictIndexDelta) :
|
|
141
383
|
base + repIndex;
|
|
142
|
-
hashLong[h2] = hashSmall[h] =
|
|
384
|
+
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
|
143
385
|
|
|
144
|
-
/* check
|
|
145
|
-
if (
|
|
146
|
-
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
|
386
|
+
/* check repcode */
|
|
387
|
+
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
|
147
388
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
148
389
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
149
390
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
150
391
|
ip++;
|
|
151
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
|
152
|
-
goto _match_stored;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/* check noDict repcode */
|
|
156
|
-
if ( dictMode == ZSTD_noDict
|
|
157
|
-
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
|
158
|
-
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
|
159
|
-
ip++;
|
|
160
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
|
392
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
161
393
|
goto _match_stored;
|
|
162
394
|
}
|
|
163
395
|
|
|
@@ -169,15 +401,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
169
401
|
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
170
402
|
goto _match_found;
|
|
171
403
|
}
|
|
172
|
-
} else if (
|
|
404
|
+
} else if (dictTagsMatchL) {
|
|
173
405
|
/* check dictMatchState long match */
|
|
174
|
-
U32 const dictMatchIndexL =
|
|
406
|
+
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
175
407
|
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
|
176
408
|
assert(dictMatchL < dictEnd);
|
|
177
409
|
|
|
178
410
|
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
|
|
179
411
|
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
|
|
180
|
-
offset = (U32)(
|
|
412
|
+
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
|
|
181
413
|
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
|
182
414
|
goto _match_found;
|
|
183
415
|
} }
|
|
@@ -187,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
187
419
|
if (MEM_read32(match) == MEM_read32(ip)) {
|
|
188
420
|
goto _search_next_long;
|
|
189
421
|
}
|
|
190
|
-
} else if (
|
|
422
|
+
} else if (dictTagsMatchS) {
|
|
191
423
|
/* check dictMatchState short match */
|
|
192
|
-
U32 const dictMatchIndexS =
|
|
424
|
+
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
193
425
|
match = dictBase + dictMatchIndexS;
|
|
194
426
|
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
|
195
427
|
|
|
@@ -204,12 +436,13 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
|
204
436
|
continue;
|
|
205
437
|
|
|
206
438
|
_search_next_long:
|
|
207
|
-
|
|
208
439
|
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
209
|
-
size_t const
|
|
440
|
+
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
|
210
441
|
U32 const matchIndexL3 = hashLong[hl3];
|
|
442
|
+
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
443
|
+
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
|
211
444
|
const BYTE* matchL3 = base + matchIndexL3;
|
|
212
|
-
hashLong[hl3] =
|
|
445
|
+
hashLong[hl3] = curr + 1;
|
|
213
446
|
|
|
214
447
|
/* check prefix long +1 match */
|
|
215
448
|
if (matchIndexL3 > prefixLowestIndex) {
|
|
@@ -220,23 +453,23 @@ _search_next_long:
|
|
|
220
453
|
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
|
221
454
|
goto _match_found;
|
|
222
455
|
}
|
|
223
|
-
} else if (
|
|
456
|
+
} else if (dictTagsMatchL3) {
|
|
224
457
|
/* check dict long +1 match */
|
|
225
|
-
U32 const dictMatchIndexL3 =
|
|
458
|
+
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
226
459
|
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
|
227
460
|
assert(dictMatchL3 < dictEnd);
|
|
228
461
|
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
|
229
462
|
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
|
|
230
463
|
ip++;
|
|
231
|
-
offset = (U32)(
|
|
464
|
+
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
|
|
232
465
|
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
|
233
466
|
goto _match_found;
|
|
234
467
|
} } }
|
|
235
468
|
|
|
236
469
|
/* if no long +1 match, explore the short match we found */
|
|
237
|
-
if (
|
|
470
|
+
if (matchIndexS < prefixLowestIndex) {
|
|
238
471
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
|
239
|
-
offset = (U32)(
|
|
472
|
+
offset = (U32)(curr - matchIndexS);
|
|
240
473
|
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
241
474
|
} else {
|
|
242
475
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
|
@@ -244,13 +477,11 @@ _search_next_long:
|
|
|
244
477
|
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
245
478
|
}
|
|
246
479
|
|
|
247
|
-
/* fall-through */
|
|
248
|
-
|
|
249
480
|
_match_found:
|
|
250
481
|
offset_2 = offset_1;
|
|
251
482
|
offset_1 = offset;
|
|
252
483
|
|
|
253
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
484
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
254
485
|
|
|
255
486
|
_match_stored:
|
|
256
487
|
/* match found */
|
|
@@ -260,7 +491,7 @@ _match_stored:
|
|
|
260
491
|
if (ip <= ilimit) {
|
|
261
492
|
/* Complementary insertion */
|
|
262
493
|
/* done after iLimit test, as candidates could be > iend-8 */
|
|
263
|
-
{ U32 const indexToInsert =
|
|
494
|
+
{ U32 const indexToInsert = curr+2;
|
|
264
495
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
265
496
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
266
497
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
@@ -268,53 +499,55 @@ _match_stored:
|
|
|
268
499
|
}
|
|
269
500
|
|
|
270
501
|
/* check immediate repcode */
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
287
|
-
ip += repLength2;
|
|
288
|
-
anchor = ip;
|
|
289
|
-
continue;
|
|
290
|
-
}
|
|
291
|
-
break;
|
|
292
|
-
} }
|
|
293
|
-
|
|
294
|
-
if (dictMode == ZSTD_noDict) {
|
|
295
|
-
while ( (ip <= ilimit)
|
|
296
|
-
&& ( (offset_2>0)
|
|
297
|
-
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
|
298
|
-
/* store sequence */
|
|
299
|
-
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
300
|
-
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
|
301
|
-
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
|
302
|
-
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
|
303
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
|
304
|
-
ip += rLength;
|
|
502
|
+
while (ip <= ilimit) {
|
|
503
|
+
U32 const current2 = (U32)(ip-base);
|
|
504
|
+
U32 const repIndex2 = current2 - offset_2;
|
|
505
|
+
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
|
506
|
+
dictBase + repIndex2 - dictIndexDelta :
|
|
507
|
+
base + repIndex2;
|
|
508
|
+
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
|
509
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
510
|
+
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
|
511
|
+
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
|
512
|
+
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
513
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
514
|
+
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
515
|
+
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
516
|
+
ip += repLength2;
|
|
305
517
|
anchor = ip;
|
|
306
|
-
continue;
|
|
307
|
-
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
break;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
308
523
|
} /* while (ip < ilimit) */
|
|
309
524
|
|
|
310
525
|
/* save reps for next block */
|
|
311
|
-
rep[0] = offset_1
|
|
312
|
-
rep[1] = offset_2
|
|
526
|
+
rep[0] = offset_1;
|
|
527
|
+
rep[1] = offset_2;
|
|
313
528
|
|
|
314
529
|
/* Return the last literals size */
|
|
315
530
|
return (size_t)(iend - anchor);
|
|
316
531
|
}
|
|
317
532
|
|
|
533
|
+
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
|
534
|
+
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
|
535
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
|
536
|
+
void const* src, size_t srcSize) \
|
|
537
|
+
{ \
|
|
538
|
+
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
ZSTD_GEN_DFAST_FN(noDict, 4)
|
|
542
|
+
ZSTD_GEN_DFAST_FN(noDict, 5)
|
|
543
|
+
ZSTD_GEN_DFAST_FN(noDict, 6)
|
|
544
|
+
ZSTD_GEN_DFAST_FN(noDict, 7)
|
|
545
|
+
|
|
546
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
|
547
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
|
548
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
|
549
|
+
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
|
550
|
+
|
|
318
551
|
|
|
319
552
|
size_t ZSTD_compressBlock_doubleFast(
|
|
320
553
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -325,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
|
|
325
558
|
{
|
|
326
559
|
default: /* includes case 3 */
|
|
327
560
|
case 4 :
|
|
328
|
-
return
|
|
561
|
+
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
|
329
562
|
case 5 :
|
|
330
|
-
return
|
|
563
|
+
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
|
331
564
|
case 6 :
|
|
332
|
-
return
|
|
565
|
+
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
|
333
566
|
case 7 :
|
|
334
|
-
return
|
|
567
|
+
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
|
335
568
|
}
|
|
336
569
|
}
|
|
337
570
|
|
|
@@ -345,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
|
|
345
578
|
{
|
|
346
579
|
default: /* includes case 3 */
|
|
347
580
|
case 4 :
|
|
348
|
-
return
|
|
581
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
|
349
582
|
case 5 :
|
|
350
|
-
return
|
|
583
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
|
351
584
|
case 6 :
|
|
352
|
-
return
|
|
585
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
|
353
586
|
case 7 :
|
|
354
|
-
return
|
|
587
|
+
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
|
355
588
|
}
|
|
356
589
|
}
|
|
357
590
|
|
|
@@ -387,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
387
620
|
|
|
388
621
|
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
|
389
622
|
if (prefixStartIndex == dictStartIndex)
|
|
390
|
-
return
|
|
623
|
+
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
|
391
624
|
|
|
392
625
|
/* Search Loop */
|
|
393
626
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
|
@@ -401,31 +634,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
401
634
|
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
|
|
402
635
|
const BYTE* matchLong = matchLongBase + matchLongIndex;
|
|
403
636
|
|
|
404
|
-
const U32
|
|
405
|
-
const U32 repIndex =
|
|
637
|
+
const U32 curr = (U32)(ip-base);
|
|
638
|
+
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
|
|
406
639
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
407
640
|
const BYTE* const repMatch = repBase + repIndex;
|
|
408
641
|
size_t mLength;
|
|
409
|
-
hashSmall[hSmall] = hashLong[hLong] =
|
|
642
|
+
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
|
410
643
|
|
|
411
644
|
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
|
412
|
-
& (
|
|
645
|
+
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
|
413
646
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
414
647
|
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
415
648
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
|
416
649
|
ip++;
|
|
417
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
|
650
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
418
651
|
} else {
|
|
419
652
|
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
|
420
653
|
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
|
421
654
|
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
422
655
|
U32 offset;
|
|
423
656
|
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
|
|
424
|
-
offset =
|
|
657
|
+
offset = curr - matchLongIndex;
|
|
425
658
|
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
|
426
659
|
offset_2 = offset_1;
|
|
427
660
|
offset_1 = offset;
|
|
428
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
661
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
429
662
|
|
|
430
663
|
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
|
431
664
|
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
|
@@ -433,24 +666,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
433
666
|
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
|
|
434
667
|
const BYTE* match3 = match3Base + matchIndex3;
|
|
435
668
|
U32 offset;
|
|
436
|
-
hashLong[h3] =
|
|
669
|
+
hashLong[h3] = curr + 1;
|
|
437
670
|
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
|
|
438
671
|
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
|
|
439
672
|
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
|
|
440
673
|
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
|
|
441
674
|
ip++;
|
|
442
|
-
offset =
|
|
675
|
+
offset = curr+1 - matchIndex3;
|
|
443
676
|
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
|
|
444
677
|
} else {
|
|
445
678
|
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
|
446
679
|
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
|
447
680
|
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
|
448
|
-
offset =
|
|
681
|
+
offset = curr - matchIndex;
|
|
449
682
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
|
450
683
|
}
|
|
451
684
|
offset_2 = offset_1;
|
|
452
685
|
offset_1 = offset;
|
|
453
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
|
686
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
454
687
|
|
|
455
688
|
} else {
|
|
456
689
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
|
@@ -464,7 +697,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
464
697
|
if (ip <= ilimit) {
|
|
465
698
|
/* Complementary insertion */
|
|
466
699
|
/* done after iLimit test, as candidates could be > iend-8 */
|
|
467
|
-
{ U32 const indexToInsert =
|
|
700
|
+
{ U32 const indexToInsert = curr+2;
|
|
468
701
|
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
|
469
702
|
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
|
470
703
|
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
|
@@ -477,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
477
710
|
U32 const repIndex2 = current2 - offset_2;
|
|
478
711
|
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
479
712
|
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
|
480
|
-
& (
|
|
713
|
+
& (offset_2 <= current2 - dictStartIndex))
|
|
481
714
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
|
482
715
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
483
716
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
484
717
|
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
485
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
718
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
486
719
|
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
|
487
720
|
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
|
488
721
|
ip += repLength2;
|
|
@@ -500,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
|
|
500
733
|
return (size_t)(iend - anchor);
|
|
501
734
|
}
|
|
502
735
|
|
|
736
|
+
ZSTD_GEN_DFAST_FN(extDict, 4)
|
|
737
|
+
ZSTD_GEN_DFAST_FN(extDict, 5)
|
|
738
|
+
ZSTD_GEN_DFAST_FN(extDict, 6)
|
|
739
|
+
ZSTD_GEN_DFAST_FN(extDict, 7)
|
|
503
740
|
|
|
504
741
|
size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
505
742
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -510,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
|
510
747
|
{
|
|
511
748
|
default: /* includes case 3 */
|
|
512
749
|
case 4 :
|
|
513
|
-
return
|
|
750
|
+
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
|
514
751
|
case 5 :
|
|
515
|
-
return
|
|
752
|
+
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
|
516
753
|
case 6 :
|
|
517
|
-
return
|
|
754
|
+
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
|
518
755
|
case 7 :
|
|
519
|
-
return
|
|
756
|
+
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
|
520
757
|
}
|
|
521
758
|
}
|