zstd-ruby 1.4.4.0 → 1.4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +123 -58
- data/ext/zstdruby/libzstd/README.md +34 -14
- data/ext/zstdruby/libzstd/common/bitstream.h +31 -37
- data/ext/zstdruby/libzstd/common/compiler.h +19 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +11 -31
- data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +6 -2
- data/ext/zstdruby/libzstd/common/fse.h +11 -31
- data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -37
- data/ext/zstdruby/libzstd/common/huf.h +15 -33
- data/ext/zstdruby/libzstd/common/mem.h +1 -1
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/threading.c +4 -3
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -33
- data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +112 -15
- data/ext/zstdruby/libzstd/compress/fse_compress.c +17 -40
- data/ext/zstdruby/libzstd/compress/hist.c +15 -35
- data/ext/zstdruby/libzstd/compress/hist.h +12 -32
- data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +450 -275
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +136 -14
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +24 -20
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -13
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -8
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +36 -24
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +34 -11
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +27 -5
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +38 -84
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +48 -21
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +2 -2
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -62
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -8
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +264 -148
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +312 -203
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +18 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +5 -5
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +14 -4
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +14 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +33 -9
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +51 -28
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +18 -12
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +10 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +17 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +17 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +22 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -2
- data/ext/zstdruby/libzstd/zstd.h +62 -21
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -5
@@ -0,0 +1,32 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_COMPRESS_ADVANCED_H
|
12
|
+
#define ZSTD_COMPRESS_ADVANCED_H
|
13
|
+
|
14
|
+
/*-*************************************
|
15
|
+
* Dependencies
|
16
|
+
***************************************/
|
17
|
+
|
18
|
+
#include "../zstd.h" /* ZSTD_CCtx */
|
19
|
+
|
20
|
+
/*-*************************************
|
21
|
+
* Target Compressed Block Size
|
22
|
+
***************************************/
|
23
|
+
|
24
|
+
/* ZSTD_compressSuperBlock() :
|
25
|
+
* Used to compress a super block when targetCBlockSize is being used.
|
26
|
+
* The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
|
27
|
+
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
28
|
+
void* dst, size_t dstCapacity,
|
29
|
+
void const* src, size_t srcSize,
|
30
|
+
unsigned lastBlock);
|
31
|
+
|
32
|
+
#endif /* ZSTD_COMPRESS_ADVANCED_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,7 +14,7 @@
|
|
14
14
|
/*-*************************************
|
15
15
|
* Dependencies
|
16
16
|
***************************************/
|
17
|
-
#include "zstd_internal.h"
|
17
|
+
#include "../common/zstd_internal.h"
|
18
18
|
|
19
19
|
#if defined (__cplusplus)
|
20
20
|
extern "C" {
|
@@ -24,16 +24,6 @@ extern "C" {
|
|
24
24
|
* Constants
|
25
25
|
***************************************/
|
26
26
|
|
27
|
-
/* define "workspace is too large" as this number of times larger than needed */
|
28
|
-
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
|
29
|
-
|
30
|
-
/* when workspace is continuously too large
|
31
|
-
* during at least this number of times,
|
32
|
-
* context's memory usage is considered wasteful,
|
33
|
-
* because it's sized to handle a worst case scenario which rarely happens.
|
34
|
-
* In which case, resize it down to free some memory */
|
35
|
-
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
|
36
|
-
|
37
27
|
/* Since the workspace is effectively its own little malloc implementation /
|
38
28
|
* arena, when we run under ASAN, we should similarly insert redzones between
|
39
29
|
* each internal element of the workspace, so ASAN will catch overruns that
|
@@ -468,7 +458,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
|
|
468
458
|
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
|
469
459
|
void* workspace = ZSTD_malloc(size, customMem);
|
470
460
|
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
|
471
|
-
RETURN_ERROR_IF(workspace == NULL, memory_allocation);
|
461
|
+
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
|
472
462
|
ZSTD_cwksp_init(ws, workspace, size);
|
473
463
|
return 0;
|
474
464
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
63
63
|
const BYTE* ip = istart;
|
64
64
|
const BYTE* anchor = istart;
|
65
65
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
66
|
-
const U32 lowestValid = ms->window.dictLimit;
|
67
|
-
const U32 maxDistance = 1U << cParams->windowLog;
|
68
66
|
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
69
|
-
const U32 prefixLowestIndex = (endIndex
|
67
|
+
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
70
68
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
71
69
|
const BYTE* const iend = istart + srcSize;
|
72
70
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
@@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
96
94
|
dictCParams->hashLog : hBitsL;
|
97
95
|
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
98
96
|
dictCParams->chainLog : hBitsS;
|
99
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
97
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
100
98
|
|
101
99
|
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
102
100
|
|
@@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
104
102
|
|
105
103
|
/* if a dictionary is attached, it must be within window range */
|
106
104
|
if (dictMode == ZSTD_dictMatchState) {
|
107
|
-
assert(
|
105
|
+
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
108
106
|
}
|
109
107
|
|
110
108
|
/* init */
|
111
109
|
ip += (dictAndPrefixLength == 0);
|
112
110
|
if (dictMode == ZSTD_noDict) {
|
113
|
-
U32 const
|
111
|
+
U32 const current = (U32)(ip - base);
|
112
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
113
|
+
U32 const maxRep = current - windowLow;
|
114
114
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
115
115
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
116
116
|
}
|
@@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|
198
198
|
} }
|
199
199
|
|
200
200
|
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
201
|
+
#if defined(__aarch64__)
|
202
|
+
PREFETCH_L1(ip+256);
|
203
|
+
#endif
|
201
204
|
continue;
|
202
205
|
|
203
206
|
_search_next_long:
|
@@ -271,7 +274,7 @@ _match_stored:
|
|
271
274
|
U32 const repIndex2 = current2 - offset_2;
|
272
275
|
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
273
276
|
&& repIndex2 < prefixLowestIndex ?
|
274
|
-
dictBase - dictIndexDelta
|
277
|
+
dictBase + repIndex2 - dictIndexDelta :
|
275
278
|
base + repIndex2;
|
276
279
|
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
277
280
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,7 +15,7 @@
|
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#include "mem.h" /* U32 */
|
18
|
+
#include "../common/mem.h" /* U32 */
|
19
19
|
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
20
20
|
|
21
21
|
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic(
|
|
61
61
|
const BYTE* ip1;
|
62
62
|
const BYTE* anchor = istart;
|
63
63
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
64
|
-
const U32
|
65
|
-
const U32 validStartIndex = ms->window.dictLimit;
|
66
|
-
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
|
64
|
+
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
67
65
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
68
66
|
const BYTE* const iend = istart + srcSize;
|
69
67
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
@@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic(
|
|
74
72
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
75
73
|
ip0 += (ip0 == prefixStart);
|
76
74
|
ip1 = ip0 + 1;
|
77
|
-
{ U32 const
|
75
|
+
{ U32 const current = (U32)(ip0 - base);
|
76
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
77
|
+
U32 const maxRep = current - windowLow;
|
78
78
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
79
79
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
80
80
|
}
|
81
81
|
|
82
82
|
/* Main Search Loop */
|
83
|
+
#ifdef __INTEL_COMPILER
|
84
|
+
/* From intel 'The vector pragma indicates that the loop should be
|
85
|
+
* vectorized if it is legal to do so'. Can be used together with
|
86
|
+
* #pragma ivdep (but have opted to exclude that because intel
|
87
|
+
* warns against using it).*/
|
88
|
+
#pragma vector always
|
89
|
+
#endif
|
83
90
|
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
84
91
|
size_t mLength;
|
85
92
|
BYTE const* ip2 = ip0 + 2;
|
@@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic(
|
|
91
98
|
U32 const current1 = (U32)(ip1-base);
|
92
99
|
U32 const matchIndex0 = hashTable[h0];
|
93
100
|
U32 const matchIndex1 = hashTable[h1];
|
94
|
-
BYTE const* repMatch = ip2-offset_1;
|
101
|
+
BYTE const* repMatch = ip2 - offset_1;
|
95
102
|
const BYTE* match0 = base + matchIndex0;
|
96
103
|
const BYTE* match1 = base + matchIndex1;
|
97
104
|
U32 offcode;
|
105
|
+
|
106
|
+
#if defined(__aarch64__)
|
107
|
+
PREFETCH_L1(ip0+256);
|
108
|
+
#endif
|
109
|
+
|
98
110
|
hashTable[h0] = current0; /* update hash table */
|
99
111
|
hashTable[h1] = current1; /* update hash table */
|
100
112
|
|
101
113
|
assert(ip0 + 1 == ip1);
|
102
114
|
|
103
115
|
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
104
|
-
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
|
116
|
+
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
|
105
117
|
ip0 = ip2 - mLength;
|
106
118
|
match0 = repMatch - mLength;
|
119
|
+
mLength += 4;
|
107
120
|
offcode = 0;
|
108
121
|
goto _match;
|
109
122
|
}
|
@@ -128,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
|
|
128
141
|
offset_2 = offset_1;
|
129
142
|
offset_1 = (U32)(ip0-match0);
|
130
143
|
offcode = offset_1 + ZSTD_REP_MOVE;
|
131
|
-
mLength =
|
144
|
+
mLength = 4;
|
132
145
|
/* Count the backwards match length */
|
133
146
|
while (((ip0>anchor) & (match0>prefixStart))
|
134
147
|
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
135
148
|
|
136
149
|
_match: /* Requires: ip0, match0, offcode */
|
137
150
|
/* Count the forward length */
|
138
|
-
mLength += ZSTD_count(ip0+mLength
|
151
|
+
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
|
139
152
|
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
140
153
|
/* match found */
|
141
154
|
ip0 += mLength;
|
142
155
|
anchor = ip0;
|
143
|
-
ip1 = ip0 + 1;
|
144
156
|
|
145
157
|
if (ip0 <= ilimit) {
|
146
158
|
/* Fill Table */
|
@@ -148,19 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
|
|
148
160
|
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
149
161
|
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
150
162
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
}
|
163
|
+
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
|
164
|
+
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
|
165
|
+
/* store sequence */
|
166
|
+
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
167
|
+
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
168
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
169
|
+
ip0 += rLength;
|
170
|
+
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
171
|
+
anchor = ip0;
|
172
|
+
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
173
|
+
} } }
|
174
|
+
ip1 = ip0 + 1;
|
164
175
|
}
|
165
176
|
|
166
177
|
/* save reps for next block */
|
@@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
387
398
|
const BYTE* const ilimit = iend - 8;
|
388
399
|
U32 offset_1=rep[0], offset_2=rep[1];
|
389
400
|
|
390
|
-
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
|
401
|
+
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
|
391
402
|
|
392
403
|
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
393
404
|
if (prefixStartIndex == dictStartIndex)
|
@@ -404,6 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
404
415
|
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
405
416
|
const BYTE* const repMatch = repBase + repIndex;
|
406
417
|
hashTable[h] = current; /* update hash table */
|
418
|
+
DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
|
407
419
|
assert(offset_1 <= current +1); /* check repIndex */
|
408
420
|
|
409
421
|
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,7 +15,7 @@
|
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#include "mem.h" /* U32 */
|
18
|
+
#include "../common/mem.h" /* U32 */
|
19
19
|
#include "zstd_compress_internal.h"
|
20
20
|
|
21
21
|
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -660,12 +660,16 @@ ZSTD_compressBlock_lazy_generic(
|
|
660
660
|
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
661
661
|
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
662
662
|
0;
|
663
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
|
663
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
664
|
+
|
665
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
664
666
|
|
665
667
|
/* init */
|
666
668
|
ip += (dictAndPrefixLength == 0);
|
667
669
|
if (dictMode == ZSTD_noDict) {
|
668
|
-
U32 const
|
670
|
+
U32 const current = (U32)(ip - base);
|
671
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
|
672
|
+
U32 const maxRep = current - windowLow;
|
669
673
|
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
670
674
|
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
671
675
|
}
|
@@ -677,6 +681,12 @@ ZSTD_compressBlock_lazy_generic(
|
|
677
681
|
}
|
678
682
|
|
679
683
|
/* Match Loop */
|
684
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
685
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
686
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
687
|
+
*/
|
688
|
+
__asm__(".p2align 5");
|
689
|
+
#endif
|
680
690
|
while (ip < ilimit) {
|
681
691
|
size_t matchLength=0;
|
682
692
|
size_t offset=0;
|
@@ -929,11 +939,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
929
939
|
const BYTE* const ilimit = iend - 8;
|
930
940
|
const BYTE* const base = ms->window.base;
|
931
941
|
const U32 dictLimit = ms->window.dictLimit;
|
932
|
-
const U32 lowestIndex = ms->window.lowLimit;
|
933
942
|
const BYTE* const prefixStart = base + dictLimit;
|
934
943
|
const BYTE* const dictBase = ms->window.dictBase;
|
935
944
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
936
|
-
const BYTE* const dictStart = dictBase +
|
945
|
+
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
946
|
+
const U32 windowLog = ms->cParams.windowLog;
|
937
947
|
|
938
948
|
typedef size_t (*searchMax_f)(
|
939
949
|
ZSTD_matchState_t* ms,
|
@@ -942,10 +952,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
942
952
|
|
943
953
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
944
954
|
|
955
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
|
956
|
+
|
945
957
|
/* init */
|
946
958
|
ip += (ip == prefixStart);
|
947
959
|
|
948
960
|
/* Match Loop */
|
961
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
962
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
963
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
964
|
+
*/
|
965
|
+
__asm__(".p2align 5");
|
966
|
+
#endif
|
949
967
|
while (ip < ilimit) {
|
950
968
|
size_t matchLength=0;
|
951
969
|
size_t offset=0;
|
@@ -953,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
953
971
|
U32 current = (U32)(ip-base);
|
954
972
|
|
955
973
|
/* check repCode */
|
956
|
-
{ const U32
|
974
|
+
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
|
975
|
+
const U32 repIndex = (U32)(current+1 - offset_1);
|
957
976
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
958
977
|
const BYTE* const repMatch = repBase + repIndex;
|
959
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
978
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
960
979
|
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
961
980
|
/* repcode detected we should take it */
|
962
981
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -983,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
983
1002
|
current++;
|
984
1003
|
/* check repCode */
|
985
1004
|
if (offset) {
|
1005
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
986
1006
|
const U32 repIndex = (U32)(current - offset_1);
|
987
1007
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
988
1008
|
const BYTE* const repMatch = repBase + repIndex;
|
989
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1009
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
990
1010
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
991
1011
|
/* repcode detected */
|
992
1012
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -1013,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1013
1033
|
current++;
|
1014
1034
|
/* check repCode */
|
1015
1035
|
if (offset) {
|
1036
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
1016
1037
|
const U32 repIndex = (U32)(current - offset_1);
|
1017
1038
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1018
1039
|
const BYTE* const repMatch = repBase + repIndex;
|
1019
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1040
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
1020
1041
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1021
1042
|
/* repcode detected */
|
1022
1043
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -1057,10 +1078,12 @@ _storeSequence:
|
|
1057
1078
|
|
1058
1079
|
/* check immediate repcode */
|
1059
1080
|
while (ip <= ilimit) {
|
1060
|
-
const U32
|
1081
|
+
const U32 repCurrent = (U32)(ip-base);
|
1082
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
|
1083
|
+
const U32 repIndex = repCurrent - offset_2;
|
1061
1084
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1062
1085
|
const BYTE* const repMatch = repBase + repIndex;
|
1063
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1086
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
1064
1087
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1065
1088
|
/* repcode detected we should take it */
|
1066
1089
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -1,15 +1,16 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
6
6
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
7
|
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
#include "zstd_ldm.h"
|
11
12
|
|
12
|
-
#include "debug.h"
|
13
|
+
#include "../common/debug.h"
|
13
14
|
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
14
15
|
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
15
16
|
|
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
223
224
|
return rollingHash;
|
224
225
|
}
|
225
226
|
|
227
|
+
void ZSTD_ldm_fillHashTable(
|
228
|
+
ldmState_t* state, const BYTE* ip,
|
229
|
+
const BYTE* iend, ldmParams_t const* params)
|
230
|
+
{
|
231
|
+
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
|
232
|
+
if ((size_t)(iend - ip) >= params->minMatchLength) {
|
233
|
+
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
|
234
|
+
ZSTD_ldm_fillLdmHashTable(
|
235
|
+
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
|
236
|
+
params->hashLog - params->bucketSizeLog,
|
237
|
+
*params);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
226
241
|
|
227
242
|
/** ZSTD_ldm_limitTableUpdate() :
|
228
243
|
*
|
@@ -449,6 +464,8 @@ size_t ZSTD_ldm_generateSequences(
|
|
449
464
|
U32 const correction = ZSTD_window_correctOverflow(
|
450
465
|
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
451
466
|
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
467
|
+
/* invalidate dictionaries on overflow correction */
|
468
|
+
ldmState->loadedDictEnd = 0;
|
452
469
|
}
|
453
470
|
/* 2. We enforce the maximum offset allowed.
|
454
471
|
*
|
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
|
|
457
474
|
* TODO: * Test the chunk size.
|
458
475
|
* * Try invalidation after the sequence generation and test the
|
459
476
|
* the offset against maxDist directly.
|
477
|
+
*
|
478
|
+
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
479
|
+
* that any offset used is valid at the END of the sequence, since it may
|
480
|
+
* be split into two sequences. This condition holds when using
|
481
|
+
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
|
482
|
+
* against maxDist directly, we'll have to carefully handle that case.
|
460
483
|
*/
|
461
|
-
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist,
|
484
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
|
462
485
|
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
463
486
|
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
464
487
|
ldmState, sequences, params, chunkStart, chunkSize);
|
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
566
589
|
if (sequence.offset == 0)
|
567
590
|
break;
|
568
591
|
|
569
|
-
assert(sequence.offset <= (1U << cParams->windowLog));
|
570
592
|
assert(ip + sequence.litLength + sequence.matchLength <= iend);
|
571
593
|
|
572
594
|
/* Fill tables for block compressor */
|
573
595
|
ZSTD_ldm_limitTableUpdate(ms, ip);
|
574
596
|
ZSTD_ldm_fillFastTables(ms, ip);
|
575
597
|
/* Run the block compressor */
|
576
|
-
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
|
598
|
+
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
|
577
599
|
{
|
578
600
|
size_t const newLitLength =
|
579
601
|
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
|