zstdlib 0.3.0-x64-mingw32 → 0.8.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/bitstream.h +59 -51
- data/ext/zstdlib/zstd-1.5.0/lib/common/compiler.h +289 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/cpu.h +1 -3
- data/ext/zstdlib/zstd-1.5.0/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/debug.h +22 -49
- data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.c +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.h +8 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse.h +50 -42
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse_decompress.c +149 -55
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/huf.h +43 -39
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/mem.h +69 -25
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.c +30 -20
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.c +51 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.c +40 -92
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.h +12 -32
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_internal.h +490 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/fse_compress.c +47 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.c +41 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.h +13 -33
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/huf_compress.c +332 -193
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress.c +6393 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +522 -86
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +25 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +50 -24
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +11 -4
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_cwksp.h +662 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +43 -41
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.c +85 -80
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.c +333 -208
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.c +228 -129
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +151 -440
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/huf_decompress.c +395 -276
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +628 -231
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/zstd.h +740 -153
- data/ext/zstdlib/{zstd-1.4.2/lib/common → zstd-1.5.0/lib}/zstd_errors.h +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +131 -45
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +76 -67
- data/ext/zstdlib/zstd-1.4.2/lib/common/compiler.h +0 -147
- data/ext/zstdlib/zstd-1.4.2/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.2/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.2/lib/common/zstd_internal.h +0 -371
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress.c +0 -3904
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.c +0 -1111
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -18,7 +18,8 @@
|
|
18
18
|
/*-*************************************
|
19
19
|
* Dependencies
|
20
20
|
***************************************/
|
21
|
-
#include "zstd_internal.h"
|
21
|
+
#include "../common/zstd_internal.h"
|
22
|
+
#include "zstd_cwksp.h"
|
22
23
|
#ifdef ZSTD_MULTITHREAD
|
23
24
|
# include "zstdmt_compress.h"
|
24
25
|
#endif
|
@@ -27,7 +28,6 @@
|
|
27
28
|
extern "C" {
|
28
29
|
#endif
|
29
30
|
|
30
|
-
|
31
31
|
/*-*************************************
|
32
32
|
* Constants
|
33
33
|
***************************************/
|
@@ -63,7 +63,7 @@ typedef struct {
|
|
63
63
|
} ZSTD_localDict;
|
64
64
|
|
65
65
|
typedef struct {
|
66
|
-
|
66
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
67
67
|
HUF_repeat repeatMode;
|
68
68
|
} ZSTD_hufCTables_t;
|
69
69
|
|
@@ -81,11 +81,75 @@ typedef struct {
|
|
81
81
|
ZSTD_fseCTables_t fse;
|
82
82
|
} ZSTD_entropyCTables_t;
|
83
83
|
|
84
|
+
/***********************************************
|
85
|
+
* Entropy buffer statistics structs and funcs *
|
86
|
+
***********************************************/
|
87
|
+
/** ZSTD_hufCTablesMetadata_t :
|
88
|
+
* Stores Literals Block Type for a super-block in hType, and
|
89
|
+
* huffman tree description in hufDesBuffer.
|
90
|
+
* hufDesSize refers to the size of huffman tree description in bytes.
|
91
|
+
* This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
|
84
92
|
typedef struct {
|
85
|
-
|
86
|
-
|
93
|
+
symbolEncodingType_e hType;
|
94
|
+
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
|
95
|
+
size_t hufDesSize;
|
96
|
+
} ZSTD_hufCTablesMetadata_t;
|
97
|
+
|
98
|
+
/** ZSTD_fseCTablesMetadata_t :
|
99
|
+
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
100
|
+
* fse tables in fseTablesBuffer.
|
101
|
+
* fseTablesSize refers to the size of fse tables in bytes.
|
102
|
+
* This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
|
103
|
+
typedef struct {
|
104
|
+
symbolEncodingType_e llType;
|
105
|
+
symbolEncodingType_e ofType;
|
106
|
+
symbolEncodingType_e mlType;
|
107
|
+
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
|
108
|
+
size_t fseTablesSize;
|
109
|
+
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
110
|
+
} ZSTD_fseCTablesMetadata_t;
|
111
|
+
|
112
|
+
typedef struct {
|
113
|
+
ZSTD_hufCTablesMetadata_t hufMetadata;
|
114
|
+
ZSTD_fseCTablesMetadata_t fseMetadata;
|
115
|
+
} ZSTD_entropyCTablesMetadata_t;
|
116
|
+
|
117
|
+
/** ZSTD_buildBlockEntropyStats() :
|
118
|
+
* Builds entropy for the block.
|
119
|
+
* @return : 0 on success or error code */
|
120
|
+
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
121
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
122
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
123
|
+
const ZSTD_CCtx_params* cctxParams,
|
124
|
+
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
125
|
+
void* workspace, size_t wkspSize);
|
126
|
+
|
127
|
+
/*********************************
|
128
|
+
* Compression internals structs *
|
129
|
+
*********************************/
|
130
|
+
|
131
|
+
typedef struct {
|
132
|
+
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
|
133
|
+
U32 len; /* Raw length of match */
|
87
134
|
} ZSTD_match_t;
|
88
135
|
|
136
|
+
typedef struct {
|
137
|
+
U32 offset; /* Offset of sequence */
|
138
|
+
U32 litLength; /* Length of literals prior to match */
|
139
|
+
U32 matchLength; /* Raw length of match */
|
140
|
+
} rawSeq;
|
141
|
+
|
142
|
+
typedef struct {
|
143
|
+
rawSeq* seq; /* The start of the sequences */
|
144
|
+
size_t pos; /* The index in seq where reading stopped. pos <= size. */
|
145
|
+
size_t posInSequence; /* The position within the sequence at seq[pos] where reading
|
146
|
+
stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
|
147
|
+
size_t size; /* The number of sequences. <= capacity. */
|
148
|
+
size_t capacity; /* The capacity starting from `seq` pointer */
|
149
|
+
} rawSeqStore_t;
|
150
|
+
|
151
|
+
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
|
152
|
+
|
89
153
|
typedef struct {
|
90
154
|
int price;
|
91
155
|
U32 off;
|
@@ -124,25 +188,50 @@ typedef struct {
|
|
124
188
|
} ZSTD_compressedBlockState_t;
|
125
189
|
|
126
190
|
typedef struct {
|
127
|
-
BYTE const* nextSrc;
|
128
|
-
BYTE const* base;
|
129
|
-
BYTE const* dictBase;
|
130
|
-
U32 dictLimit;
|
131
|
-
U32 lowLimit;
|
191
|
+
BYTE const* nextSrc; /* next block here to continue on current prefix */
|
192
|
+
BYTE const* base; /* All regular indexes relative to this position */
|
193
|
+
BYTE const* dictBase; /* extDict indexes relative to this position */
|
194
|
+
U32 dictLimit; /* below that point, need extDict */
|
195
|
+
U32 lowLimit; /* below that point, no more valid data */
|
196
|
+
U32 nbOverflowCorrections; /* Number of times overflow correction has run since
|
197
|
+
* ZSTD_window_init(). Useful for debugging coredumps
|
198
|
+
* and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
|
199
|
+
*/
|
132
200
|
} ZSTD_window_t;
|
133
201
|
|
134
202
|
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
203
|
+
|
204
|
+
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
|
205
|
+
|
135
206
|
struct ZSTD_matchState_t {
|
136
207
|
ZSTD_window_t window; /* State for window round buffer management */
|
137
|
-
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
|
208
|
+
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
|
209
|
+
* When loadedDictEnd != 0, a dictionary is in use, and still valid.
|
210
|
+
* This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
|
211
|
+
* Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
|
212
|
+
* When dict referential is copied into active context (i.e. not attached),
|
213
|
+
* loadedDictEnd == dictSize, since referential starts from zero.
|
214
|
+
*/
|
138
215
|
U32 nextToUpdate; /* index from which to continue table update */
|
139
|
-
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
216
|
+
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
|
217
|
+
|
218
|
+
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
|
219
|
+
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
|
220
|
+
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
|
221
|
+
|
140
222
|
U32* hashTable;
|
141
223
|
U32* hashTable3;
|
142
224
|
U32* chainTable;
|
225
|
+
|
226
|
+
U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
|
227
|
+
|
228
|
+
int dedicatedDictSearch; /* Indicates whether this matchState is using the
|
229
|
+
* dedicated dictionary search structure.
|
230
|
+
*/
|
143
231
|
optState_t opt; /* optimal parser state */
|
144
232
|
const ZSTD_matchState_t* dictMatchState;
|
145
233
|
ZSTD_compressionParameters cParams;
|
234
|
+
const rawSeqStore_t* ldmSeqStore;
|
146
235
|
};
|
147
236
|
|
148
237
|
typedef struct {
|
@@ -156,12 +245,22 @@ typedef struct {
|
|
156
245
|
U32 checksum;
|
157
246
|
} ldmEntry_t;
|
158
247
|
|
248
|
+
typedef struct {
|
249
|
+
BYTE const* split;
|
250
|
+
U32 hash;
|
251
|
+
U32 checksum;
|
252
|
+
ldmEntry_t* bucket;
|
253
|
+
} ldmMatchCandidate_t;
|
254
|
+
|
255
|
+
#define LDM_BATCH_SIZE 64
|
256
|
+
|
159
257
|
typedef struct {
|
160
258
|
ZSTD_window_t window; /* State for the window round buffer management */
|
161
259
|
ldmEntry_t* hashTable;
|
260
|
+
U32 loadedDictEnd;
|
162
261
|
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
|
163
|
-
|
164
|
-
|
262
|
+
size_t splitIndices[LDM_BATCH_SIZE];
|
263
|
+
ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
|
165
264
|
} ldmState_t;
|
166
265
|
|
167
266
|
typedef struct {
|
@@ -174,17 +273,11 @@ typedef struct {
|
|
174
273
|
} ldmParams_t;
|
175
274
|
|
176
275
|
typedef struct {
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
typedef struct {
|
183
|
-
rawSeq* seq; /* The start of the sequences */
|
184
|
-
size_t pos; /* The position where reading stopped. <= size. */
|
185
|
-
size_t size; /* The number of sequences. <= capacity. */
|
186
|
-
size_t capacity; /* The capacity starting from `seq` pointer */
|
187
|
-
} rawSeqStore_t;
|
276
|
+
int collectSequences;
|
277
|
+
ZSTD_Sequence* seqStart;
|
278
|
+
size_t seqIndex;
|
279
|
+
size_t maxSequences;
|
280
|
+
} SeqCollector;
|
188
281
|
|
189
282
|
struct ZSTD_CCtx_params_s {
|
190
283
|
ZSTD_format_e format;
|
@@ -197,6 +290,9 @@ struct ZSTD_CCtx_params_s {
|
|
197
290
|
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
198
291
|
* No target when targetCBlockSize == 0.
|
199
292
|
* There is no guarantee on compressed block size */
|
293
|
+
int srcSizeHint; /* User's best guess of source size.
|
294
|
+
* Hint is not valid when srcSizeHint == 0.
|
295
|
+
* There is no guarantee that hint is close to actual source size */
|
200
296
|
|
201
297
|
ZSTD_dictAttachPref_e attachDictPref;
|
202
298
|
ZSTD_literalCompressionMode_e literalCompressionMode;
|
@@ -210,28 +306,65 @@ struct ZSTD_CCtx_params_s {
|
|
210
306
|
/* Long distance matching parameters */
|
211
307
|
ldmParams_t ldmParams;
|
212
308
|
|
309
|
+
/* Dedicated dict search algorithm trigger */
|
310
|
+
int enableDedicatedDictSearch;
|
311
|
+
|
312
|
+
/* Input/output buffer modes */
|
313
|
+
ZSTD_bufferMode_e inBufferMode;
|
314
|
+
ZSTD_bufferMode_e outBufferMode;
|
315
|
+
|
316
|
+
/* Sequence compression API */
|
317
|
+
ZSTD_sequenceFormat_e blockDelimiters;
|
318
|
+
int validateSequences;
|
319
|
+
|
320
|
+
/* Block splitting */
|
321
|
+
int splitBlocks;
|
322
|
+
|
323
|
+
/* Param for deciding whether to use row-based matchfinder */
|
324
|
+
ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
|
325
|
+
|
326
|
+
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
|
327
|
+
int deterministicRefPrefix;
|
328
|
+
|
213
329
|
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
214
330
|
ZSTD_customMem customMem;
|
215
331
|
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
216
332
|
|
333
|
+
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
334
|
+
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
|
335
|
+
|
336
|
+
/**
|
337
|
+
* Indicates whether this compression proceeds directly from user-provided
|
338
|
+
* source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
|
339
|
+
* whether the context needs to buffer the input/output (ZSTDb_buffered).
|
340
|
+
*/
|
341
|
+
typedef enum {
|
342
|
+
ZSTDb_not_buffered,
|
343
|
+
ZSTDb_buffered
|
344
|
+
} ZSTD_buffered_policy_e;
|
345
|
+
|
217
346
|
struct ZSTD_CCtx_s {
|
218
347
|
ZSTD_compressionStage_e stage;
|
219
348
|
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
220
349
|
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
221
350
|
ZSTD_CCtx_params requestedParams;
|
222
351
|
ZSTD_CCtx_params appliedParams;
|
352
|
+
ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
|
223
353
|
U32 dictID;
|
354
|
+
size_t dictContentSize;
|
224
355
|
|
225
|
-
|
226
|
-
void* workSpace;
|
227
|
-
size_t workSpaceSize;
|
356
|
+
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
|
228
357
|
size_t blockSize;
|
229
358
|
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
|
230
359
|
unsigned long long consumedSrcSize;
|
231
360
|
unsigned long long producedCSize;
|
232
361
|
XXH64_state_t xxhState;
|
233
362
|
ZSTD_customMem customMem;
|
363
|
+
ZSTD_threadPool* pool;
|
234
364
|
size_t staticSize;
|
365
|
+
SeqCollector seqCollector;
|
366
|
+
int isFirstBlock;
|
367
|
+
int initialized;
|
235
368
|
|
236
369
|
seqStore_t seqStore; /* sequences storage ptrs */
|
237
370
|
ldmState_t ldmState; /* long distance matching state */
|
@@ -239,7 +372,10 @@ struct ZSTD_CCtx_s {
|
|
239
372
|
size_t maxNbLdmSequences;
|
240
373
|
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
|
241
374
|
ZSTD_blockState_t blockState;
|
242
|
-
U32* entropyWorkspace; /* entropy workspace of
|
375
|
+
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
376
|
+
|
377
|
+
/* Wether we are streaming or not */
|
378
|
+
ZSTD_buffered_policy_e bufferedPolicy;
|
243
379
|
|
244
380
|
/* streaming */
|
245
381
|
char* inBuff;
|
@@ -254,6 +390,10 @@ struct ZSTD_CCtx_s {
|
|
254
390
|
ZSTD_cStreamStage streamStage;
|
255
391
|
U32 frameEnded;
|
256
392
|
|
393
|
+
/* Stable in/out buffer verification */
|
394
|
+
ZSTD_inBuffer expectedInBuffer;
|
395
|
+
size_t expectedOutBufferSize;
|
396
|
+
|
257
397
|
/* Dictionary */
|
258
398
|
ZSTD_localDict localDict;
|
259
399
|
const ZSTD_CDict* cdict;
|
@@ -263,17 +403,46 @@ struct ZSTD_CCtx_s {
|
|
263
403
|
#ifdef ZSTD_MULTITHREAD
|
264
404
|
ZSTDMT_CCtx* mtctx;
|
265
405
|
#endif
|
406
|
+
|
407
|
+
/* Tracing */
|
408
|
+
#if ZSTD_TRACE
|
409
|
+
ZSTD_TraceCtx traceCtx;
|
410
|
+
#endif
|
266
411
|
};
|
267
412
|
|
268
413
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
269
414
|
|
270
|
-
typedef enum {
|
271
|
-
|
415
|
+
typedef enum {
|
416
|
+
ZSTD_noDict = 0,
|
417
|
+
ZSTD_extDict = 1,
|
418
|
+
ZSTD_dictMatchState = 2,
|
419
|
+
ZSTD_dedicatedDictSearch = 3
|
420
|
+
} ZSTD_dictMode_e;
|
421
|
+
|
422
|
+
typedef enum {
|
423
|
+
ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
|
424
|
+
* In this mode we use both the srcSize and the dictSize
|
425
|
+
* when selecting and adjusting parameters.
|
426
|
+
*/
|
427
|
+
ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
|
428
|
+
* In this mode we only take the srcSize into account when selecting
|
429
|
+
* and adjusting parameters.
|
430
|
+
*/
|
431
|
+
ZSTD_cpm_createCDict = 2, /* Creating a CDict.
|
432
|
+
* In this mode we take both the source size and the dictionary size
|
433
|
+
* into account when selecting and adjusting the parameters.
|
434
|
+
*/
|
435
|
+
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
|
436
|
+
* We don't know what these parameters are for. We default to the legacy
|
437
|
+
* behavior of taking both the source size and the dict size into account
|
438
|
+
* when selecting and adjusting parameters.
|
439
|
+
*/
|
440
|
+
} ZSTD_cParamMode_e;
|
272
441
|
|
273
442
|
typedef size_t (*ZSTD_blockCompressor) (
|
274
443
|
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
275
444
|
void const* src, size_t srcSize);
|
276
|
-
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
|
445
|
+
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
277
446
|
|
278
447
|
|
279
448
|
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
@@ -307,6 +476,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
|
307
476
|
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
308
477
|
}
|
309
478
|
|
479
|
+
typedef struct repcodes_s {
|
480
|
+
U32 rep[3];
|
481
|
+
} repcodes_t;
|
482
|
+
|
483
|
+
MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
484
|
+
{
|
485
|
+
repcodes_t newReps;
|
486
|
+
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
487
|
+
newReps.rep[2] = rep[1];
|
488
|
+
newReps.rep[1] = rep[0];
|
489
|
+
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
490
|
+
} else { /* repcode */
|
491
|
+
U32 const repCode = offset + ll0;
|
492
|
+
if (repCode > 0) { /* note : if repCode==0, no change */
|
493
|
+
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
494
|
+
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
495
|
+
newReps.rep[1] = rep[0];
|
496
|
+
newReps.rep[0] = currentOffset;
|
497
|
+
} else { /* repCode == 0 */
|
498
|
+
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
499
|
+
}
|
500
|
+
}
|
501
|
+
return newReps;
|
502
|
+
}
|
503
|
+
|
310
504
|
/* ZSTD_cParam_withinBounds:
|
311
505
|
* @return 1 if value is within cParam bounds,
|
312
506
|
* 0 otherwise */
|
@@ -319,6 +513,30 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
|
319
513
|
return 1;
|
320
514
|
}
|
321
515
|
|
516
|
+
/* ZSTD_noCompressBlock() :
|
517
|
+
* Writes uncompressed block to dst buffer from given src.
|
518
|
+
* Returns the size of the block */
|
519
|
+
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
520
|
+
{
|
521
|
+
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
|
522
|
+
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
523
|
+
dstSize_tooSmall, "dst buf too small for uncompressed block");
|
524
|
+
MEM_writeLE24(dst, cBlockHeader24);
|
525
|
+
ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
526
|
+
return ZSTD_blockHeaderSize + srcSize;
|
527
|
+
}
|
528
|
+
|
529
|
+
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
|
530
|
+
{
|
531
|
+
BYTE* const op = (BYTE*)dst;
|
532
|
+
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
|
533
|
+
RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
|
534
|
+
MEM_writeLE24(op, cBlockHeader);
|
535
|
+
op[3] = src;
|
536
|
+
return 4;
|
537
|
+
}
|
538
|
+
|
539
|
+
|
322
540
|
/* ZSTD_minGain() :
|
323
541
|
* minimum compression required
|
324
542
|
* to generate a compress block or a compressed literals section.
|
@@ -331,43 +549,89 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
|
331
549
|
return (srcSize >> minlog) + 2;
|
332
550
|
}
|
333
551
|
|
552
|
+
MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
553
|
+
{
|
554
|
+
switch (cctxParams->literalCompressionMode) {
|
555
|
+
case ZSTD_lcm_huffman:
|
556
|
+
return 0;
|
557
|
+
case ZSTD_lcm_uncompressed:
|
558
|
+
return 1;
|
559
|
+
default:
|
560
|
+
assert(0 /* impossible: pre-validated */);
|
561
|
+
/* fall-through */
|
562
|
+
case ZSTD_lcm_auto:
|
563
|
+
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
564
|
+
}
|
565
|
+
}
|
566
|
+
|
567
|
+
/*! ZSTD_safecopyLiterals() :
|
568
|
+
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
|
569
|
+
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
|
570
|
+
* large copies.
|
571
|
+
*/
|
572
|
+
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
|
573
|
+
assert(iend > ilimit_w);
|
574
|
+
if (ip <= ilimit_w) {
|
575
|
+
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
|
576
|
+
op += ilimit_w - ip;
|
577
|
+
ip = ilimit_w;
|
578
|
+
}
|
579
|
+
while (ip < iend) *op++ = *ip++;
|
580
|
+
}
|
581
|
+
|
334
582
|
/*! ZSTD_storeSeq() :
|
335
|
-
* Store a sequence (
|
336
|
-
* `
|
583
|
+
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
|
584
|
+
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
|
337
585
|
* `mlBase` : matchLength - MINMATCH
|
586
|
+
* Allowed to overread literals up to litLimit.
|
338
587
|
*/
|
339
|
-
|
588
|
+
HINT_INLINE UNUSED_ATTR
|
589
|
+
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
|
340
590
|
{
|
591
|
+
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
|
592
|
+
BYTE const* const litEnd = literals + litLength;
|
341
593
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
|
342
594
|
static const BYTE* g_start = NULL;
|
343
595
|
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
|
344
596
|
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
|
345
597
|
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
|
346
|
-
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)
|
598
|
+
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
|
347
599
|
}
|
348
600
|
#endif
|
349
601
|
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
|
350
602
|
/* copy Literals */
|
351
603
|
assert(seqStorePtr->maxNbLit <= 128 KB);
|
352
604
|
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
353
|
-
|
605
|
+
assert(literals + litLength <= litLimit);
|
606
|
+
if (litEnd <= litLimit_w) {
|
607
|
+
/* Common case we can use wildcopy.
|
608
|
+
* First copy 16 bytes, because literals are likely short.
|
609
|
+
*/
|
610
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
611
|
+
ZSTD_copy16(seqStorePtr->lit, literals);
|
612
|
+
if (litLength > 16) {
|
613
|
+
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
|
614
|
+
}
|
615
|
+
} else {
|
616
|
+
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
|
617
|
+
}
|
354
618
|
seqStorePtr->lit += litLength;
|
355
619
|
|
356
620
|
/* literal Length */
|
357
621
|
if (litLength>0xFFFF) {
|
358
|
-
assert(seqStorePtr->
|
359
|
-
seqStorePtr->
|
622
|
+
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
623
|
+
seqStorePtr->longLengthType = ZSTD_llt_literalLength;
|
360
624
|
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
361
625
|
}
|
362
626
|
seqStorePtr->sequences[0].litLength = (U16)litLength;
|
363
627
|
|
364
628
|
/* match offset */
|
365
|
-
seqStorePtr->sequences[0].offset =
|
629
|
+
seqStorePtr->sequences[0].offset = offCode + 1;
|
366
630
|
|
367
631
|
/* match Length */
|
368
632
|
if (mlBase>0xFFFF) {
|
369
|
-
assert(seqStorePtr->
|
370
|
-
seqStorePtr->
|
633
|
+
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
634
|
+
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
|
371
635
|
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
372
636
|
}
|
373
637
|
seqStorePtr->sequences[0].matchLength = (U16)mlBase;
|
@@ -384,9 +648,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
384
648
|
if (MEM_isLittleEndian()) {
|
385
649
|
if (MEM_64bits()) {
|
386
650
|
# if defined(_MSC_VER) && defined(_WIN64)
|
387
|
-
|
388
|
-
|
389
|
-
|
651
|
+
# if STATIC_BMI2
|
652
|
+
return _tzcnt_u64(val) >> 3;
|
653
|
+
# else
|
654
|
+
unsigned long r = 0;
|
655
|
+
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
656
|
+
# endif
|
390
657
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
391
658
|
return (__builtin_ctzll((U64)val) >> 3);
|
392
659
|
# else
|
@@ -403,8 +670,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
403
670
|
} else { /* 32 bits */
|
404
671
|
# if defined(_MSC_VER)
|
405
672
|
unsigned long r=0;
|
406
|
-
_BitScanForward( &r, (U32)val );
|
407
|
-
return (unsigned)(r>>3);
|
673
|
+
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
|
408
674
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
409
675
|
return (__builtin_ctz((U32)val) >> 3);
|
410
676
|
# else
|
@@ -418,9 +684,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
418
684
|
} else { /* Big Endian CPU */
|
419
685
|
if (MEM_64bits()) {
|
420
686
|
# if defined(_MSC_VER) && defined(_WIN64)
|
421
|
-
|
422
|
-
|
423
|
-
|
687
|
+
# if STATIC_BMI2
|
688
|
+
return _lzcnt_u64(val) >> 3;
|
689
|
+
# else
|
690
|
+
unsigned long r = 0;
|
691
|
+
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
692
|
+
# endif
|
424
693
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
425
694
|
return (__builtin_clzll(val) >> 3);
|
426
695
|
# else
|
@@ -434,8 +703,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
434
703
|
} else { /* 32 bits */
|
435
704
|
# if defined(_MSC_VER)
|
436
705
|
unsigned long r = 0;
|
437
|
-
_BitScanReverse( &r, (unsigned long)val );
|
438
|
-
return (unsigned)(r>>3);
|
706
|
+
return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
|
439
707
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
440
708
|
return (__builtin_clz((U32)val) >> 3);
|
441
709
|
# else
|
@@ -516,7 +784,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
|
516
784
|
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
517
785
|
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
518
786
|
|
519
|
-
MEM_STATIC
|
787
|
+
MEM_STATIC FORCE_INLINE_ATTR
|
788
|
+
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
520
789
|
{
|
521
790
|
switch(mls)
|
522
791
|
{
|
@@ -613,6 +882,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
|
|
613
882
|
window->dictLimit = end;
|
614
883
|
}
|
615
884
|
|
885
|
+
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
|
886
|
+
{
|
887
|
+
return window.dictLimit == 1 &&
|
888
|
+
window.lowLimit == 1 &&
|
889
|
+
(window.nextSrc - window.base) == 1;
|
890
|
+
}
|
891
|
+
|
616
892
|
/**
|
617
893
|
* ZSTD_window_hasExtDict():
|
618
894
|
* Returns non-zero if the window has a non-empty extDict.
|
@@ -632,20 +908,74 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
|
632
908
|
return ZSTD_window_hasExtDict(ms->window) ?
|
633
909
|
ZSTD_extDict :
|
634
910
|
ms->dictMatchState != NULL ?
|
635
|
-
ZSTD_dictMatchState :
|
911
|
+
(ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
|
636
912
|
ZSTD_noDict;
|
637
913
|
}
|
638
914
|
|
915
|
+
/* Defining this macro to non-zero tells zstd to run the overflow correction
|
916
|
+
* code much more frequently. This is very inefficient, and should only be
|
917
|
+
* used for tests and fuzzers.
|
918
|
+
*/
|
919
|
+
#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
|
920
|
+
# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
921
|
+
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
|
922
|
+
# else
|
923
|
+
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
|
924
|
+
# endif
|
925
|
+
#endif
|
926
|
+
|
927
|
+
/**
|
928
|
+
* ZSTD_window_canOverflowCorrect():
|
929
|
+
* Returns non-zero if the indices are large enough for overflow correction
|
930
|
+
* to work correctly without impacting compression ratio.
|
931
|
+
*/
|
932
|
+
MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
|
933
|
+
U32 cycleLog,
|
934
|
+
U32 maxDist,
|
935
|
+
U32 loadedDictEnd,
|
936
|
+
void const* src)
|
937
|
+
{
|
938
|
+
U32 const cycleSize = 1u << cycleLog;
|
939
|
+
U32 const curr = (U32)((BYTE const*)src - window.base);
|
940
|
+
U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
|
941
|
+
|
942
|
+
/* Adjust the min index to backoff the overflow correction frequency,
|
943
|
+
* so we don't waste too much CPU in overflow correction. If this
|
944
|
+
* computation overflows we don't really care, we just need to make
|
945
|
+
* sure it is at least minIndexToOverflowCorrect.
|
946
|
+
*/
|
947
|
+
U32 const adjustment = window.nbOverflowCorrections + 1;
|
948
|
+
U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
|
949
|
+
minIndexToOverflowCorrect);
|
950
|
+
U32 const indexLargeEnough = curr > adjustedIndex;
|
951
|
+
|
952
|
+
/* Only overflow correct early if the dictionary is invalidated already,
|
953
|
+
* so we don't hurt compression ratio.
|
954
|
+
*/
|
955
|
+
U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
|
956
|
+
|
957
|
+
return indexLargeEnough && dictionaryInvalidated;
|
958
|
+
}
|
959
|
+
|
639
960
|
/**
|
640
961
|
* ZSTD_window_needOverflowCorrection():
|
641
962
|
* Returns non-zero if the indices are getting too large and need overflow
|
642
963
|
* protection.
|
643
964
|
*/
|
644
965
|
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
966
|
+
U32 cycleLog,
|
967
|
+
U32 maxDist,
|
968
|
+
U32 loadedDictEnd,
|
969
|
+
void const* src,
|
645
970
|
void const* srcEnd)
|
646
971
|
{
|
647
|
-
U32 const
|
648
|
-
|
972
|
+
U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
|
973
|
+
if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
|
974
|
+
if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
|
975
|
+
return 1;
|
976
|
+
}
|
977
|
+
}
|
978
|
+
return curr > ZSTD_CURRENT_MAX;
|
649
979
|
}
|
650
980
|
|
651
981
|
/**
|
@@ -656,7 +986,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
|
656
986
|
*
|
657
987
|
* The least significant cycleLog bits of the indices must remain the same,
|
658
988
|
* which may be 0. Every index up to maxDist in the past must be valid.
|
659
|
-
* NOTE: (maxDist & cycleMask) must be zero.
|
660
989
|
*/
|
661
990
|
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
662
991
|
U32 maxDist, void const* src)
|
@@ -680,19 +1009,41 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|
680
1009
|
* 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
|
681
1010
|
* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
|
682
1011
|
*/
|
683
|
-
U32 const
|
684
|
-
U32 const
|
685
|
-
U32 const
|
686
|
-
U32 const
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
1012
|
+
U32 const cycleSize = 1u << cycleLog;
|
1013
|
+
U32 const cycleMask = cycleSize - 1;
|
1014
|
+
U32 const curr = (U32)((BYTE const*)src - window->base);
|
1015
|
+
U32 const currentCycle0 = curr & cycleMask;
|
1016
|
+
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
1017
|
+
U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
|
1018
|
+
U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
|
1019
|
+
U32 const correction = curr - newCurrent;
|
1020
|
+
/* maxDist must be a power of two so that:
|
1021
|
+
* (newCurrent & cycleMask) == (curr & cycleMask)
|
1022
|
+
* This is required to not corrupt the chains / binary tree.
|
1023
|
+
*/
|
1024
|
+
assert((maxDist & (maxDist - 1)) == 0);
|
1025
|
+
assert((curr & cycleMask) == (newCurrent & cycleMask));
|
1026
|
+
assert(curr > newCurrent);
|
1027
|
+
if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
|
1028
|
+
/* Loose bound, should be around 1<<29 (see above) */
|
1029
|
+
assert(correction > 1<<28);
|
1030
|
+
}
|
691
1031
|
|
692
1032
|
window->base += correction;
|
693
1033
|
window->dictBase += correction;
|
694
|
-
window->lowLimit
|
695
|
-
window->
|
1034
|
+
if (window->lowLimit <= correction) window->lowLimit = 1;
|
1035
|
+
else window->lowLimit -= correction;
|
1036
|
+
if (window->dictLimit <= correction) window->dictLimit = 1;
|
1037
|
+
else window->dictLimit -= correction;
|
1038
|
+
|
1039
|
+
/* Ensure we can still reference the full window. */
|
1040
|
+
assert(newCurrent >= maxDist);
|
1041
|
+
assert(newCurrent - maxDist >= 1);
|
1042
|
+
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
1043
|
+
assert(window->lowLimit <= newCurrent);
|
1044
|
+
assert(window->dictLimit <= newCurrent);
|
1045
|
+
|
1046
|
+
++window->nbOverflowCorrections;
|
696
1047
|
|
697
1048
|
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
|
698
1049
|
window->lowLimit);
|
@@ -763,24 +1114,47 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
|
763
1114
|
|
764
1115
|
/* Similar to ZSTD_window_enforceMaxDist(),
|
765
1116
|
* but only invalidates dictionary
|
766
|
-
* when input progresses beyond window size.
|
1117
|
+
* when input progresses beyond window size.
|
1118
|
+
* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
|
1119
|
+
* loadedDictEnd uses same referential as window->base
|
1120
|
+
* maxDist is the window size */
|
767
1121
|
MEM_STATIC void
|
768
|
-
ZSTD_checkDictValidity(ZSTD_window_t* window,
|
1122
|
+
ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
769
1123
|
const void* blockEnd,
|
770
1124
|
U32 maxDist,
|
771
1125
|
U32* loadedDictEndPtr,
|
772
1126
|
const ZSTD_matchState_t** dictMatchStatePtr)
|
773
1127
|
{
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
1128
|
+
assert(loadedDictEndPtr != NULL);
|
1129
|
+
assert(dictMatchStatePtr != NULL);
|
1130
|
+
{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
1131
|
+
U32 const loadedDictEnd = *loadedDictEndPtr;
|
1132
|
+
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
1133
|
+
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
1134
|
+
assert(blockEndIdx >= loadedDictEnd);
|
1135
|
+
|
1136
|
+
if (blockEndIdx > loadedDictEnd + maxDist) {
|
1137
|
+
/* On reaching window size, dictionaries are invalidated.
|
1138
|
+
* For simplification, if window size is reached anywhere within next block,
|
1139
|
+
* the dictionary is invalidated for the full block.
|
1140
|
+
*/
|
1141
|
+
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
|
1142
|
+
*loadedDictEndPtr = 0;
|
1143
|
+
*dictMatchStatePtr = NULL;
|
1144
|
+
} else {
|
1145
|
+
if (*loadedDictEndPtr != 0) {
|
1146
|
+
DEBUGLOG(6, "dictionary considered valid for current block");
|
1147
|
+
} } }
|
1148
|
+
}
|
778
1149
|
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
1150
|
+
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
1151
|
+
ZSTD_memset(window, 0, sizeof(*window));
|
1152
|
+
window->base = (BYTE const*)"";
|
1153
|
+
window->dictBase = (BYTE const*)"";
|
1154
|
+
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
1155
|
+
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
1156
|
+
window->nextSrc = window->base + 1; /* see issue #1241 */
|
1157
|
+
window->nbOverflowCorrections = 0;
|
784
1158
|
}
|
785
1159
|
|
786
1160
|
/**
|
@@ -791,13 +1165,18 @@ ZSTD_checkDictValidity(ZSTD_window_t* window,
|
|
791
1165
|
* Returns non-zero if the segment is contiguous.
|
792
1166
|
*/
|
793
1167
|
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
794
|
-
void const* src, size_t srcSize
|
1168
|
+
void const* src, size_t srcSize,
|
1169
|
+
int forceNonContiguous)
|
795
1170
|
{
|
796
1171
|
BYTE const* const ip = (BYTE const*)src;
|
797
1172
|
U32 contiguous = 1;
|
798
1173
|
DEBUGLOG(5, "ZSTD_window_update");
|
1174
|
+
if (srcSize == 0)
|
1175
|
+
return contiguous;
|
1176
|
+
assert(window->base != NULL);
|
1177
|
+
assert(window->dictBase != NULL);
|
799
1178
|
/* Check if blocks follow each other */
|
800
|
-
if (src != window->nextSrc) {
|
1179
|
+
if (src != window->nextSrc || forceNonContiguous) {
|
801
1180
|
/* not contiguous */
|
802
1181
|
size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
|
803
1182
|
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
|
@@ -806,7 +1185,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
806
1185
|
window->dictLimit = (U32)distanceFromBase;
|
807
1186
|
window->dictBase = window->base;
|
808
1187
|
window->base = ip - distanceFromBase;
|
809
|
-
|
1188
|
+
/* ms->nextToUpdate = window->dictLimit; */
|
810
1189
|
if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
|
811
1190
|
contiguous = 0;
|
812
1191
|
}
|
@@ -822,6 +1201,40 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
822
1201
|
return contiguous;
|
823
1202
|
}
|
824
1203
|
|
1204
|
+
/**
|
1205
|
+
* Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
|
1206
|
+
*/
|
1207
|
+
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
1208
|
+
{
|
1209
|
+
U32 const maxDistance = 1U << windowLog;
|
1210
|
+
U32 const lowestValid = ms->window.lowLimit;
|
1211
|
+
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
1212
|
+
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
1213
|
+
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
1214
|
+
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
1215
|
+
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
1216
|
+
*/
|
1217
|
+
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
1218
|
+
return matchLowest;
|
1219
|
+
}
|
1220
|
+
|
1221
|
+
/**
|
1222
|
+
* Returns the lowest allowed match index in the prefix.
|
1223
|
+
*/
|
1224
|
+
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
1225
|
+
{
|
1226
|
+
U32 const maxDistance = 1U << windowLog;
|
1227
|
+
U32 const lowestValid = ms->window.dictLimit;
|
1228
|
+
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
1229
|
+
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
1230
|
+
/* When computing the lowest prefix index we need to take the dictionary into account to handle
|
1231
|
+
* the edge case where the dictionary and the source are contiguous in memory.
|
1232
|
+
*/
|
1233
|
+
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
1234
|
+
return matchLowest;
|
1235
|
+
}
|
1236
|
+
|
1237
|
+
|
825
1238
|
|
826
1239
|
/* debug functions */
|
827
1240
|
#if (DEBUGLEVEL>=2)
|
@@ -859,6 +1272,20 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
|
859
1272
|
}
|
860
1273
|
#endif
|
861
1274
|
|
1275
|
+
/* ===============================================================
|
1276
|
+
* Shared internal declarations
|
1277
|
+
* These prototypes may be called from sources not in lib/compress
|
1278
|
+
* =============================================================== */
|
1279
|
+
|
1280
|
+
/* ZSTD_loadCEntropy() :
|
1281
|
+
* dict : must point at beginning of a valid zstd dictionary.
|
1282
|
+
* return : size of dictionary header (size of magic number + dict ID + entropy tables)
|
1283
|
+
* assumptions : magic number supposed already checked
|
1284
|
+
* and dictSize >= 8 */
|
1285
|
+
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
1286
|
+
const void* const dict, size_t dictSize);
|
1287
|
+
|
1288
|
+
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
862
1289
|
|
863
1290
|
/* ==============================================================
|
864
1291
|
* Private declarations
|
@@ -868,9 +1295,10 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
|
868
1295
|
/* ZSTD_getCParamsFromCCtxParams() :
|
869
1296
|
* cParams are built depending on compressionLevel, src size hints,
|
870
1297
|
* LDM and manually set compression parameters.
|
1298
|
+
* Note: srcSizeHint == 0 means 0!
|
871
1299
|
*/
|
872
1300
|
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
873
|
-
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
|
1301
|
+
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
874
1302
|
|
875
1303
|
/*! ZSTD_initCStream_internal() :
|
876
1304
|
* Private use only. Init streaming operation.
|
@@ -880,7 +1308,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
880
1308
|
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
|
881
1309
|
const void* dict, size_t dictSize,
|
882
1310
|
const ZSTD_CDict* cdict,
|
883
|
-
ZSTD_CCtx_params
|
1311
|
+
const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
|
884
1312
|
|
885
1313
|
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
|
886
1314
|
|
@@ -895,7 +1323,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
|
|
895
1323
|
ZSTD_dictContentType_e dictContentType,
|
896
1324
|
ZSTD_dictTableLoadMethod_e dtlm,
|
897
1325
|
const ZSTD_CDict* cdict,
|
898
|
-
ZSTD_CCtx_params params,
|
1326
|
+
const ZSTD_CCtx_params* params,
|
899
1327
|
unsigned long long pledgedSrcSize);
|
900
1328
|
|
901
1329
|
/* ZSTD_compress_advanced_internal() :
|
@@ -904,7 +1332,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
|
|
904
1332
|
void* dst, size_t dstCapacity,
|
905
1333
|
const void* src, size_t srcSize,
|
906
1334
|
const void* dict,size_t dictSize,
|
907
|
-
ZSTD_CCtx_params params);
|
1335
|
+
const ZSTD_CCtx_params* params);
|
908
1336
|
|
909
1337
|
|
910
1338
|
/* ZSTD_writeLastEmptyBlock() :
|
@@ -927,5 +1355,13 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
|
|
927
1355
|
*/
|
928
1356
|
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
|
929
1357
|
|
1358
|
+
/** ZSTD_cycleLog() :
|
1359
|
+
* condition for correct operation : hashLog > 1 */
|
1360
|
+
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
|
1361
|
+
|
1362
|
+
/** ZSTD_CCtx_trace() :
|
1363
|
+
* Trace the end of a compression call.
|
1364
|
+
*/
|
1365
|
+
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
|
930
1366
|
|
931
1367
|
#endif /* ZSTD_COMPRESS_H */
|