zstdlib 0.6.0-x64-mingw32 → 0.9.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +75 -57
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/debug.c +24 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +22 -49
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +3 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +51 -42
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +149 -57
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +60 -54
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +5 -5
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +10 -8
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +4 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_internal.h +493 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +105 -85
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +41 -63
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +13 -33
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress.c +6327 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +537 -82
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +21 -16
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +61 -34
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +236 -95
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +321 -143
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +328 -137
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +336 -209
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +439 -239
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +205 -462
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress.c +1889 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +691 -230
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/zstd_decompress_block.c +2072 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +16 -7
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +760 -234
- data/ext/{zstdlib/zstd-1.4.4/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +3 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +124 -114
- data/ext/zstdlib/zstd-1.4.4/lib/common/compiler.h +0 -159
- data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.4/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.4/lib/common/error_private.h +0 -76
- data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.c +0 -882
- data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.h +0 -305
- data/ext/zstdlib/zstd-1.4.4/lib/common/zstd_internal.h +0 -350
- data/ext/zstdlib/zstd-1.4.4/lib/compress/huf_compress.c +0 -798
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_compress.c +0 -4103
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.c +0 -1115
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstdmt_compress.h +0 -192
- data/ext/zstdlib/zstd-1.4.4/lib/decompress/huf_decompress.c +0 -1234
- data/ext/zstdlib/zstd-1.4.4/lib/decompress/zstd_decompress_block.c +0 -1323
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -29,148 +29,308 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
29
29
|
* Insert the other positions if their hash entry is empty.
|
30
30
|
*/
|
31
31
|
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
32
|
-
U32 const
|
32
|
+
U32 const curr = (U32)(ip - base);
|
33
33
|
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
34
|
-
hashTable[hash0] =
|
34
|
+
hashTable[hash0] = curr;
|
35
35
|
if (dtlm == ZSTD_dtlm_fast) continue;
|
36
36
|
/* Only load extra positions for ZSTD_dtlm_full */
|
37
37
|
{ U32 p;
|
38
38
|
for (p = 1; p < fastHashFillStep; ++p) {
|
39
39
|
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
40
40
|
if (hashTable[hash] == 0) { /* not yet filled */
|
41
|
-
hashTable[hash] =
|
41
|
+
hashTable[hash] = curr + p;
|
42
42
|
} } } }
|
43
43
|
}
|
44
44
|
|
45
45
|
|
46
|
+
/**
|
47
|
+
* If you squint hard enough (and ignore repcodes), the search operation at any
|
48
|
+
* given position is broken into 4 stages:
|
49
|
+
*
|
50
|
+
* 1. Hash (map position to hash value via input read)
|
51
|
+
* 2. Lookup (map hash val to index via hashtable read)
|
52
|
+
* 3. Load (map index to value at that position via input read)
|
53
|
+
* 4. Compare
|
54
|
+
*
|
55
|
+
* Each of these steps involves a memory read at an address which is computed
|
56
|
+
* from the previous step. This means these steps must be sequenced and their
|
57
|
+
* latencies are cumulative.
|
58
|
+
*
|
59
|
+
* Rather than do 1->2->3->4 sequentially for a single position before moving
|
60
|
+
* onto the next, this implementation interleaves these operations across the
|
61
|
+
* next few positions:
|
62
|
+
*
|
63
|
+
* R = Repcode Read & Compare
|
64
|
+
* H = Hash
|
65
|
+
* T = Table Lookup
|
66
|
+
* M = Match Read & Compare
|
67
|
+
*
|
68
|
+
* Pos | Time -->
|
69
|
+
* ----+-------------------
|
70
|
+
* N | ... M
|
71
|
+
* N+1 | ... TM
|
72
|
+
* N+2 | R H T M
|
73
|
+
* N+3 | H TM
|
74
|
+
* N+4 | R H T M
|
75
|
+
* N+5 | H ...
|
76
|
+
* N+6 | R ...
|
77
|
+
*
|
78
|
+
* This is very much analogous to the pipelining of execution in a CPU. And just
|
79
|
+
* like a CPU, we have to dump the pipeline when we find a match (i.e., take a
|
80
|
+
* branch).
|
81
|
+
*
|
82
|
+
* When this happens, we throw away our current state, and do the following prep
|
83
|
+
* to re-enter the loop:
|
84
|
+
*
|
85
|
+
* Pos | Time -->
|
86
|
+
* ----+-------------------
|
87
|
+
* N | H T
|
88
|
+
* N+1 | H
|
89
|
+
*
|
90
|
+
* This is also the work we do at the beginning to enter the loop initially.
|
91
|
+
*/
|
46
92
|
FORCE_INLINE_TEMPLATE size_t
|
47
|
-
|
93
|
+
ZSTD_compressBlock_fast_noDict_generic(
|
48
94
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
49
95
|
void const* src, size_t srcSize,
|
50
|
-
U32 const mls)
|
96
|
+
U32 const mls, U32 const hasStep)
|
51
97
|
{
|
52
98
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
53
99
|
U32* const hashTable = ms->hashTable;
|
54
100
|
U32 const hlog = cParams->hashLog;
|
55
101
|
/* support stepSize of 0 */
|
56
|
-
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
102
|
+
size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
|
57
103
|
const BYTE* const base = ms->window.base;
|
58
104
|
const BYTE* const istart = (const BYTE*)src;
|
59
|
-
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
60
|
-
const BYTE* ip0 = istart;
|
61
|
-
const BYTE* ip1;
|
62
|
-
const BYTE* anchor = istart;
|
63
105
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
64
|
-
const U32
|
65
|
-
const U32 validStartIndex = ms->window.dictLimit;
|
66
|
-
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
|
106
|
+
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
67
107
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
68
108
|
const BYTE* const iend = istart + srcSize;
|
69
109
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
70
|
-
|
110
|
+
|
111
|
+
const BYTE* anchor = istart;
|
112
|
+
const BYTE* ip0 = istart;
|
113
|
+
const BYTE* ip1;
|
114
|
+
const BYTE* ip2;
|
115
|
+
const BYTE* ip3;
|
116
|
+
U32 current0;
|
117
|
+
|
118
|
+
U32 rep_offset1 = rep[0];
|
119
|
+
U32 rep_offset2 = rep[1];
|
71
120
|
U32 offsetSaved = 0;
|
72
121
|
|
73
|
-
/*
|
122
|
+
size_t hash0; /* hash for ip0 */
|
123
|
+
size_t hash1; /* hash for ip1 */
|
124
|
+
U32 idx; /* match idx for ip0 */
|
125
|
+
U32 mval; /* src value at match idx */
|
126
|
+
|
127
|
+
U32 offcode;
|
128
|
+
const BYTE* match0;
|
129
|
+
size_t mLength;
|
130
|
+
|
131
|
+
/* ip0 and ip1 are always adjacent. The targetLength skipping and
|
132
|
+
* uncompressibility acceleration is applied to every other position,
|
133
|
+
* matching the behavior of #1562. step therefore represents the gap
|
134
|
+
* between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
|
135
|
+
size_t step;
|
136
|
+
const BYTE* nextStep;
|
137
|
+
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
138
|
+
|
74
139
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
75
140
|
ip0 += (ip0 == prefixStart);
|
141
|
+
{ U32 const curr = (U32)(ip0 - base);
|
142
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
143
|
+
U32 const maxRep = curr - windowLow;
|
144
|
+
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
|
145
|
+
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
|
146
|
+
}
|
147
|
+
|
148
|
+
/* start each op */
|
149
|
+
_start: /* Requires: ip0 */
|
150
|
+
|
151
|
+
step = stepSize;
|
152
|
+
nextStep = ip0 + kStepIncr;
|
153
|
+
|
154
|
+
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
|
76
155
|
ip1 = ip0 + 1;
|
77
|
-
|
78
|
-
|
79
|
-
|
156
|
+
ip2 = ip0 + step;
|
157
|
+
ip3 = ip2 + 1;
|
158
|
+
|
159
|
+
if (ip3 >= ilimit) {
|
160
|
+
goto _cleanup;
|
80
161
|
}
|
81
162
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
U32
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
105
|
-
ip0 = ip2 - mLength;
|
106
|
-
match0 = repMatch - mLength;
|
107
|
-
offcode = 0;
|
163
|
+
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
164
|
+
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
165
|
+
|
166
|
+
idx = hashTable[hash0];
|
167
|
+
|
168
|
+
do {
|
169
|
+
/* load repcode match for ip[2]*/
|
170
|
+
const U32 rval = MEM_read32(ip2 - rep_offset1);
|
171
|
+
|
172
|
+
/* write back hash table entry */
|
173
|
+
current0 = (U32)(ip0 - base);
|
174
|
+
hashTable[hash0] = current0;
|
175
|
+
|
176
|
+
/* check repcode at ip[2] */
|
177
|
+
if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
|
178
|
+
ip0 = ip2;
|
179
|
+
match0 = ip0 - rep_offset1;
|
180
|
+
mLength = ip0[-1] == match0[-1];
|
181
|
+
ip0 -= mLength;
|
182
|
+
match0 -= mLength;
|
183
|
+
offcode = STORE_REPCODE_1;
|
184
|
+
mLength += 4;
|
108
185
|
goto _match;
|
109
186
|
}
|
110
|
-
|
111
|
-
|
187
|
+
|
188
|
+
/* load match for ip[0] */
|
189
|
+
if (idx >= prefixStartIndex) {
|
190
|
+
mval = MEM_read32(base + idx);
|
191
|
+
} else {
|
192
|
+
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
193
|
+
}
|
194
|
+
|
195
|
+
/* check match at ip[0] */
|
196
|
+
if (MEM_read32(ip0) == mval) {
|
197
|
+
/* found a match! */
|
112
198
|
goto _offset;
|
113
199
|
}
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
200
|
+
|
201
|
+
/* lookup ip[1] */
|
202
|
+
idx = hashTable[hash1];
|
203
|
+
|
204
|
+
/* hash ip[2] */
|
205
|
+
hash0 = hash1;
|
206
|
+
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
207
|
+
|
208
|
+
/* advance to next positions */
|
209
|
+
ip0 = ip1;
|
210
|
+
ip1 = ip2;
|
211
|
+
ip2 = ip3;
|
212
|
+
|
213
|
+
/* write back hash table entry */
|
214
|
+
current0 = (U32)(ip0 - base);
|
215
|
+
hashTable[hash0] = current0;
|
216
|
+
|
217
|
+
/* load match for ip[0] */
|
218
|
+
if (idx >= prefixStartIndex) {
|
219
|
+
mval = MEM_read32(base + idx);
|
220
|
+
} else {
|
221
|
+
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
222
|
+
}
|
223
|
+
|
224
|
+
/* check match at ip[0] */
|
225
|
+
if (MEM_read32(ip0) == mval) {
|
226
|
+
/* found a match! */
|
118
227
|
goto _offset;
|
119
228
|
}
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
229
|
+
|
230
|
+
/* lookup ip[1] */
|
231
|
+
idx = hashTable[hash1];
|
232
|
+
|
233
|
+
/* hash ip[2] */
|
234
|
+
hash0 = hash1;
|
235
|
+
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
236
|
+
|
237
|
+
/* advance to next positions */
|
238
|
+
ip0 = ip1;
|
239
|
+
ip1 = ip2;
|
240
|
+
ip2 = ip0 + step;
|
241
|
+
ip3 = ip1 + step;
|
242
|
+
|
243
|
+
/* calculate step */
|
244
|
+
if (ip2 >= nextStep) {
|
245
|
+
step++;
|
246
|
+
PREFETCH_L1(ip1 + 64);
|
247
|
+
PREFETCH_L1(ip1 + 128);
|
248
|
+
nextStep += kStepIncr;
|
125
249
|
}
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
250
|
+
} while (ip3 < ilimit);
|
251
|
+
|
252
|
+
_cleanup:
|
253
|
+
/* Note that there are probably still a couple positions we could search.
|
254
|
+
* However, it seems to be a meaningful performance hit to try to search
|
255
|
+
* them. So let's not. */
|
256
|
+
|
257
|
+
/* save reps for next block */
|
258
|
+
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
|
259
|
+
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
|
260
|
+
|
261
|
+
/* Return the last literals size */
|
262
|
+
return (size_t)(iend - anchor);
|
263
|
+
|
264
|
+
_offset: /* Requires: ip0, idx */
|
265
|
+
|
266
|
+
/* Compute the offset code. */
|
267
|
+
match0 = base + idx;
|
268
|
+
rep_offset2 = rep_offset1;
|
269
|
+
rep_offset1 = (U32)(ip0-match0);
|
270
|
+
offcode = STORE_OFFSET(rep_offset1);
|
271
|
+
mLength = 4;
|
272
|
+
|
273
|
+
/* Count the backwards match length. */
|
274
|
+
while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
|
275
|
+
ip0--;
|
276
|
+
match0--;
|
277
|
+
mLength++;
|
278
|
+
}
|
135
279
|
|
136
280
|
_match: /* Requires: ip0, match0, offcode */
|
137
|
-
/* Count the forward length */
|
138
|
-
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
|
139
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
140
|
-
/* match found */
|
141
|
-
ip0 += mLength;
|
142
|
-
anchor = ip0;
|
143
|
-
ip1 = ip0 + 1;
|
144
281
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
282
|
+
/* Count the forward length. */
|
283
|
+
mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
|
284
|
+
|
285
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
|
286
|
+
|
287
|
+
ip0 += mLength;
|
288
|
+
anchor = ip0;
|
289
|
+
|
290
|
+
/* write next hash table entry */
|
291
|
+
if (ip1 < ip0) {
|
292
|
+
hashTable[hash1] = (U32)(ip1 - base);
|
293
|
+
}
|
150
294
|
|
151
|
-
|
152
|
-
|
295
|
+
/* Fill table and check for immediate repcode. */
|
296
|
+
if (ip0 <= ilimit) {
|
297
|
+
/* Fill Table */
|
298
|
+
assert(base+current0+2 > istart); /* check base overflow */
|
299
|
+
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
300
|
+
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
301
|
+
|
302
|
+
if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
|
303
|
+
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
|
153
304
|
/* store sequence */
|
154
|
-
size_t const rLength = ZSTD_count(ip0+4, ip0+4-
|
155
|
-
{ U32 const tmpOff =
|
305
|
+
size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
|
306
|
+
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
|
156
307
|
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
157
308
|
ip0 += rLength;
|
158
|
-
|
159
|
-
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
309
|
+
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
|
160
310
|
anchor = ip0;
|
161
311
|
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
162
|
-
|
163
|
-
}
|
164
|
-
}
|
165
|
-
|
166
|
-
/* save reps for next block */
|
167
|
-
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
168
|
-
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
312
|
+
} } }
|
169
313
|
|
170
|
-
|
171
|
-
return (size_t)(iend - anchor);
|
314
|
+
goto _start;
|
172
315
|
}
|
173
316
|
|
317
|
+
#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
|
318
|
+
static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
|
319
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
320
|
+
void const* src, size_t srcSize) \
|
321
|
+
{ \
|
322
|
+
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
|
323
|
+
}
|
324
|
+
|
325
|
+
ZSTD_GEN_FAST_FN(noDict, 4, 1)
|
326
|
+
ZSTD_GEN_FAST_FN(noDict, 5, 1)
|
327
|
+
ZSTD_GEN_FAST_FN(noDict, 6, 1)
|
328
|
+
ZSTD_GEN_FAST_FN(noDict, 7, 1)
|
329
|
+
|
330
|
+
ZSTD_GEN_FAST_FN(noDict, 4, 0)
|
331
|
+
ZSTD_GEN_FAST_FN(noDict, 5, 0)
|
332
|
+
ZSTD_GEN_FAST_FN(noDict, 6, 0)
|
333
|
+
ZSTD_GEN_FAST_FN(noDict, 7, 0)
|
174
334
|
|
175
335
|
size_t ZSTD_compressBlock_fast(
|
176
336
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
@@ -178,24 +338,40 @@ size_t ZSTD_compressBlock_fast(
|
|
178
338
|
{
|
179
339
|
U32 const mls = ms->cParams.minMatch;
|
180
340
|
assert(ms->dictMatchState == NULL);
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
341
|
+
if (ms->cParams.targetLength > 1) {
|
342
|
+
switch(mls)
|
343
|
+
{
|
344
|
+
default: /* includes case 3 */
|
345
|
+
case 4 :
|
346
|
+
return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
|
347
|
+
case 5 :
|
348
|
+
return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
|
349
|
+
case 6 :
|
350
|
+
return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
|
351
|
+
case 7 :
|
352
|
+
return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
|
353
|
+
}
|
354
|
+
} else {
|
355
|
+
switch(mls)
|
356
|
+
{
|
357
|
+
default: /* includes case 3 */
|
358
|
+
case 4 :
|
359
|
+
return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
|
360
|
+
case 5 :
|
361
|
+
return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
|
362
|
+
case 6 :
|
363
|
+
return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
|
364
|
+
case 7 :
|
365
|
+
return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
|
366
|
+
}
|
367
|
+
|
192
368
|
}
|
193
369
|
}
|
194
370
|
|
195
371
|
FORCE_INLINE_TEMPLATE
|
196
372
|
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
197
373
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
198
|
-
void const* src, size_t srcSize, U32 const mls)
|
374
|
+
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
199
375
|
{
|
200
376
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
201
377
|
U32* const hashTable = ms->hashTable;
|
@@ -231,7 +407,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
231
407
|
assert(endIndex - prefixStartIndex <= maxDistance);
|
232
408
|
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
233
409
|
|
234
|
-
/*
|
410
|
+
(void)hasStep; /* not currently specialized on whether it's accelerated */
|
411
|
+
|
412
|
+
/* ensure there will be no underflow
|
235
413
|
* when translating a dict index into a local index */
|
236
414
|
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
237
415
|
|
@@ -247,21 +425,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
247
425
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
248
426
|
size_t mLength;
|
249
427
|
size_t const h = ZSTD_hashPtr(ip, hlog, mls);
|
250
|
-
U32 const
|
428
|
+
U32 const curr = (U32)(ip-base);
|
251
429
|
U32 const matchIndex = hashTable[h];
|
252
430
|
const BYTE* match = base + matchIndex;
|
253
|
-
const U32 repIndex =
|
431
|
+
const U32 repIndex = curr + 1 - offset_1;
|
254
432
|
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
255
433
|
dictBase + (repIndex - dictIndexDelta) :
|
256
434
|
base + repIndex;
|
257
|
-
hashTable[h] =
|
435
|
+
hashTable[h] = curr; /* update hash table */
|
258
436
|
|
259
437
|
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
260
438
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
261
439
|
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
262
440
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
263
441
|
ip++;
|
264
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
442
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
|
265
443
|
} else if ( (matchIndex <= prefixStartIndex) ) {
|
266
444
|
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
267
445
|
U32 const dictMatchIndex = dictHashTable[dictHash];
|
@@ -273,7 +451,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
273
451
|
continue;
|
274
452
|
} else {
|
275
453
|
/* found a dict match */
|
276
|
-
U32 const offset = (U32)(
|
454
|
+
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
|
277
455
|
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
|
278
456
|
while (((ip>anchor) & (dictMatch>dictStart))
|
279
457
|
&& (ip[-1] == dictMatch[-1])) {
|
@@ -281,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
281
459
|
} /* catch up */
|
282
460
|
offset_2 = offset_1;
|
283
461
|
offset_1 = offset;
|
284
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
462
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
285
463
|
}
|
286
464
|
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
287
465
|
/* it's not a match, and we're not going to check the dictionary */
|
@@ -296,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
296
474
|
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
297
475
|
offset_2 = offset_1;
|
298
476
|
offset_1 = offset;
|
299
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
477
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
300
478
|
}
|
301
479
|
|
302
480
|
/* match found */
|
@@ -305,8 +483,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
305
483
|
|
306
484
|
if (ip <= ilimit) {
|
307
485
|
/* Fill Table */
|
308
|
-
assert(base+
|
309
|
-
hashTable[ZSTD_hashPtr(base+
|
486
|
+
assert(base+curr+2 > istart); /* check base overflow */
|
487
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
|
310
488
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
311
489
|
|
312
490
|
/* check immediate repcode */
|
@@ -321,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
321
499
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
322
500
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
323
501
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
324
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
502
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
|
325
503
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
326
504
|
ip += repLength2;
|
327
505
|
anchor = ip;
|
@@ -340,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
340
518
|
return (size_t)(iend - anchor);
|
341
519
|
}
|
342
520
|
|
521
|
+
|
522
|
+
ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
|
523
|
+
ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
|
524
|
+
ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
|
525
|
+
ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
|
526
|
+
|
343
527
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
344
528
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
345
529
|
void const* src, size_t srcSize)
|
@@ -350,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
350
534
|
{
|
351
535
|
default: /* includes case 3 */
|
352
536
|
case 4 :
|
353
|
-
return
|
537
|
+
return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
|
354
538
|
case 5 :
|
355
|
-
return
|
539
|
+
return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
|
356
540
|
case 6 :
|
357
|
-
return
|
541
|
+
return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
|
358
542
|
case 7 :
|
359
|
-
return
|
543
|
+
return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
|
360
544
|
}
|
361
545
|
}
|
362
546
|
|
363
547
|
|
364
548
|
static size_t ZSTD_compressBlock_fast_extDict_generic(
|
365
549
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
366
|
-
void const* src, size_t srcSize, U32 const mls)
|
550
|
+
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
367
551
|
{
|
368
552
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
369
553
|
U32* const hashTable = ms->hashTable;
|
@@ -387,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
387
571
|
const BYTE* const ilimit = iend - 8;
|
388
572
|
U32 offset_1=rep[0], offset_2=rep[1];
|
389
573
|
|
390
|
-
|
574
|
+
(void)hasStep; /* not currently specialized on whether it's accelerated */
|
575
|
+
|
576
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
|
391
577
|
|
392
578
|
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
393
579
|
if (prefixStartIndex == dictStartIndex)
|
394
|
-
return
|
580
|
+
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
|
395
581
|
|
396
582
|
/* Search Loop */
|
397
583
|
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
@@ -399,19 +585,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
399
585
|
const U32 matchIndex = hashTable[h];
|
400
586
|
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
|
401
587
|
const BYTE* match = matchBase + matchIndex;
|
402
|
-
const U32
|
403
|
-
const U32 repIndex =
|
588
|
+
const U32 curr = (U32)(ip-base);
|
589
|
+
const U32 repIndex = curr + 1 - offset_1;
|
404
590
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
405
591
|
const BYTE* const repMatch = repBase + repIndex;
|
406
|
-
hashTable[h] =
|
407
|
-
|
592
|
+
hashTable[h] = curr; /* update hash table */
|
593
|
+
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
408
594
|
|
409
|
-
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
595
|
+
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
596
|
+
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
410
597
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
411
598
|
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
412
599
|
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
413
600
|
ip++;
|
414
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,
|
601
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
|
415
602
|
ip += rLength;
|
416
603
|
anchor = ip;
|
417
604
|
} else {
|
@@ -423,30 +610,30 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
423
610
|
}
|
424
611
|
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
|
425
612
|
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
|
426
|
-
U32 const offset =
|
613
|
+
U32 const offset = curr - matchIndex;
|
427
614
|
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
428
615
|
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
429
616
|
offset_2 = offset_1; offset_1 = offset; /* update offset history */
|
430
|
-
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset
|
617
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
431
618
|
ip += mLength;
|
432
619
|
anchor = ip;
|
433
620
|
} }
|
434
621
|
|
435
622
|
if (ip <= ilimit) {
|
436
623
|
/* Fill Table */
|
437
|
-
hashTable[ZSTD_hashPtr(base+
|
624
|
+
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
|
438
625
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
439
626
|
/* check immediate repcode */
|
440
627
|
while (ip <= ilimit) {
|
441
628
|
U32 const current2 = (U32)(ip-base);
|
442
629
|
U32 const repIndex2 = current2 - offset_2;
|
443
630
|
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
444
|
-
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (
|
631
|
+
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
|
445
632
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
446
633
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
447
634
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
448
635
|
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
|
449
|
-
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend,
|
636
|
+
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
|
450
637
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
451
638
|
ip += repLength2;
|
452
639
|
anchor = ip;
|
@@ -463,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
463
650
|
return (size_t)(iend - anchor);
|
464
651
|
}
|
465
652
|
|
653
|
+
ZSTD_GEN_FAST_FN(extDict, 4, 0)
|
654
|
+
ZSTD_GEN_FAST_FN(extDict, 5, 0)
|
655
|
+
ZSTD_GEN_FAST_FN(extDict, 6, 0)
|
656
|
+
ZSTD_GEN_FAST_FN(extDict, 7, 0)
|
466
657
|
|
467
658
|
size_t ZSTD_compressBlock_fast_extDict(
|
468
659
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
@@ -473,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict(
|
|
473
664
|
{
|
474
665
|
default: /* includes case 3 */
|
475
666
|
case 4 :
|
476
|
-
return
|
667
|
+
return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
|
477
668
|
case 5 :
|
478
|
-
return
|
669
|
+
return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
|
479
670
|
case 6 :
|
480
|
-
return
|
671
|
+
return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
|
481
672
|
case 7 :
|
482
|
-
return
|
673
|
+
return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
|
483
674
|
}
|
484
675
|
}
|