zstd-ruby 1.4.0.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
/*-*************************************
|
|
19
19
|
* Dependencies
|
|
20
20
|
***************************************/
|
|
21
|
-
#include "zstd_internal.h"
|
|
21
|
+
#include "../common/zstd_internal.h"
|
|
22
|
+
#include "../common/zstd_trace.h" /* ZSTD_TraceCtx */
|
|
23
|
+
#include "zstd_cwksp.h"
|
|
22
24
|
#ifdef ZSTD_MULTITHREAD
|
|
23
25
|
# include "zstdmt_compress.h"
|
|
24
26
|
#endif
|
|
@@ -27,19 +29,18 @@
|
|
|
27
29
|
extern "C" {
|
|
28
30
|
#endif
|
|
29
31
|
|
|
30
|
-
|
|
31
32
|
/*-*************************************
|
|
32
33
|
* Constants
|
|
33
34
|
***************************************/
|
|
34
35
|
#define kSearchStrength 8
|
|
35
36
|
#define HASH_READ_SIZE 8
|
|
36
|
-
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1
|
|
37
|
+
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
|
|
37
38
|
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
|
38
39
|
It's not a big deal though : candidate will just be sorted again.
|
|
39
40
|
Additionally, candidate position 1 will be lost.
|
|
40
41
|
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
|
41
|
-
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
|
42
|
-
|
|
42
|
+
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
|
|
43
|
+
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
/*-*************************************
|
|
@@ -63,7 +64,7 @@ typedef struct {
|
|
|
63
64
|
} ZSTD_localDict;
|
|
64
65
|
|
|
65
66
|
typedef struct {
|
|
66
|
-
|
|
67
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
|
67
68
|
HUF_repeat repeatMode;
|
|
68
69
|
} ZSTD_hufCTables_t;
|
|
69
70
|
|
|
@@ -82,10 +83,27 @@ typedef struct {
|
|
|
82
83
|
} ZSTD_entropyCTables_t;
|
|
83
84
|
|
|
84
85
|
typedef struct {
|
|
85
|
-
U32 off;
|
|
86
|
-
U32 len;
|
|
86
|
+
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
|
|
87
|
+
U32 len; /* Raw length of match */
|
|
87
88
|
} ZSTD_match_t;
|
|
88
89
|
|
|
90
|
+
typedef struct {
|
|
91
|
+
U32 offset; /* Offset of sequence */
|
|
92
|
+
U32 litLength; /* Length of literals prior to match */
|
|
93
|
+
U32 matchLength; /* Raw length of match */
|
|
94
|
+
} rawSeq;
|
|
95
|
+
|
|
96
|
+
typedef struct {
|
|
97
|
+
rawSeq* seq; /* The start of the sequences */
|
|
98
|
+
size_t pos; /* The index in seq where reading stopped. pos <= size. */
|
|
99
|
+
size_t posInSequence; /* The position within the sequence at seq[pos] where reading
|
|
100
|
+
stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
|
|
101
|
+
size_t size; /* The number of sequences. <= capacity. */
|
|
102
|
+
size_t capacity; /* The capacity starting from `seq` pointer */
|
|
103
|
+
} rawSeqStore_t;
|
|
104
|
+
|
|
105
|
+
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
|
|
106
|
+
|
|
89
107
|
typedef struct {
|
|
90
108
|
int price;
|
|
91
109
|
U32 off;
|
|
@@ -128,22 +146,31 @@ typedef struct {
|
|
|
128
146
|
BYTE const* base; /* All regular indexes relative to this position */
|
|
129
147
|
BYTE const* dictBase; /* extDict indexes relative to this position */
|
|
130
148
|
U32 dictLimit; /* below that point, need extDict */
|
|
131
|
-
U32 lowLimit; /* below that point, no more data */
|
|
149
|
+
U32 lowLimit; /* below that point, no more valid data */
|
|
132
150
|
} ZSTD_window_t;
|
|
133
151
|
|
|
134
152
|
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
|
135
153
|
struct ZSTD_matchState_t {
|
|
136
154
|
ZSTD_window_t window; /* State for window round buffer management */
|
|
137
|
-
U32 loadedDictEnd; /* index of end of dictionary
|
|
155
|
+
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
|
|
156
|
+
* When loadedDictEnd != 0, a dictionary is in use, and still valid.
|
|
157
|
+
* This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
|
|
158
|
+
* Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
|
|
159
|
+
* When dict referential is copied into active context (i.e. not attached),
|
|
160
|
+
* loadedDictEnd == dictSize, since referential starts from zero.
|
|
161
|
+
*/
|
|
138
162
|
U32 nextToUpdate; /* index from which to continue table update */
|
|
139
|
-
U32
|
|
140
|
-
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
|
163
|
+
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
|
|
141
164
|
U32* hashTable;
|
|
142
165
|
U32* hashTable3;
|
|
143
166
|
U32* chainTable;
|
|
167
|
+
int dedicatedDictSearch; /* Indicates whether this matchState is using the
|
|
168
|
+
* dedicated dictionary search structure.
|
|
169
|
+
*/
|
|
144
170
|
optState_t opt; /* optimal parser state */
|
|
145
|
-
const ZSTD_matchState_t
|
|
171
|
+
const ZSTD_matchState_t* dictMatchState;
|
|
146
172
|
ZSTD_compressionParameters cParams;
|
|
173
|
+
const rawSeqStore_t* ldmSeqStore;
|
|
147
174
|
};
|
|
148
175
|
|
|
149
176
|
typedef struct {
|
|
@@ -157,12 +184,22 @@ typedef struct {
|
|
|
157
184
|
U32 checksum;
|
|
158
185
|
} ldmEntry_t;
|
|
159
186
|
|
|
187
|
+
typedef struct {
|
|
188
|
+
BYTE const* split;
|
|
189
|
+
U32 hash;
|
|
190
|
+
U32 checksum;
|
|
191
|
+
ldmEntry_t* bucket;
|
|
192
|
+
} ldmMatchCandidate_t;
|
|
193
|
+
|
|
194
|
+
#define LDM_BATCH_SIZE 64
|
|
195
|
+
|
|
160
196
|
typedef struct {
|
|
161
197
|
ZSTD_window_t window; /* State for the window round buffer management */
|
|
162
198
|
ldmEntry_t* hashTable;
|
|
199
|
+
U32 loadedDictEnd;
|
|
163
200
|
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
|
|
164
|
-
|
|
165
|
-
|
|
201
|
+
size_t splitIndices[LDM_BATCH_SIZE];
|
|
202
|
+
ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
|
|
166
203
|
} ldmState_t;
|
|
167
204
|
|
|
168
205
|
typedef struct {
|
|
@@ -175,17 +212,11 @@ typedef struct {
|
|
|
175
212
|
} ldmParams_t;
|
|
176
213
|
|
|
177
214
|
typedef struct {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
typedef struct {
|
|
184
|
-
rawSeq* seq; /* The start of the sequences */
|
|
185
|
-
size_t pos; /* The position where reading stopped. <= size. */
|
|
186
|
-
size_t size; /* The number of sequences. <= capacity. */
|
|
187
|
-
size_t capacity; /* The capacity starting from `seq` pointer */
|
|
188
|
-
} rawSeqStore_t;
|
|
215
|
+
int collectSequences;
|
|
216
|
+
ZSTD_Sequence* seqStart;
|
|
217
|
+
size_t seqIndex;
|
|
218
|
+
size_t maxSequences;
|
|
219
|
+
} SeqCollector;
|
|
189
220
|
|
|
190
221
|
struct ZSTD_CCtx_params_s {
|
|
191
222
|
ZSTD_format_e format;
|
|
@@ -195,6 +226,12 @@ struct ZSTD_CCtx_params_s {
|
|
|
195
226
|
int compressionLevel;
|
|
196
227
|
int forceWindow; /* force back-references to respect limit of
|
|
197
228
|
* 1<<wLog, even for dictionary */
|
|
229
|
+
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
|
230
|
+
* No target when targetCBlockSize == 0.
|
|
231
|
+
* There is no guarantee on compressed block size */
|
|
232
|
+
int srcSizeHint; /* User's best guess of source size.
|
|
233
|
+
* Hint is not valid when srcSizeHint == 0.
|
|
234
|
+
* There is no guarantee that hint is close to actual source size */
|
|
198
235
|
|
|
199
236
|
ZSTD_dictAttachPref_e attachDictPref;
|
|
200
237
|
ZSTD_literalCompressionMode_e literalCompressionMode;
|
|
@@ -208,10 +245,34 @@ struct ZSTD_CCtx_params_s {
|
|
|
208
245
|
/* Long distance matching parameters */
|
|
209
246
|
ldmParams_t ldmParams;
|
|
210
247
|
|
|
248
|
+
/* Dedicated dict search algorithm trigger */
|
|
249
|
+
int enableDedicatedDictSearch;
|
|
250
|
+
|
|
251
|
+
/* Input/output buffer modes */
|
|
252
|
+
ZSTD_bufferMode_e inBufferMode;
|
|
253
|
+
ZSTD_bufferMode_e outBufferMode;
|
|
254
|
+
|
|
255
|
+
/* Sequence compression API */
|
|
256
|
+
ZSTD_sequenceFormat_e blockDelimiters;
|
|
257
|
+
int validateSequences;
|
|
258
|
+
|
|
211
259
|
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
|
212
260
|
ZSTD_customMem customMem;
|
|
213
261
|
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
|
214
262
|
|
|
263
|
+
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
|
264
|
+
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Indicates whether this compression proceeds directly from user-provided
|
|
268
|
+
* source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
|
|
269
|
+
* whether the context needs to buffer the input/output (ZSTDb_buffered).
|
|
270
|
+
*/
|
|
271
|
+
typedef enum {
|
|
272
|
+
ZSTDb_not_buffered,
|
|
273
|
+
ZSTDb_buffered
|
|
274
|
+
} ZSTD_buffered_policy_e;
|
|
275
|
+
|
|
215
276
|
struct ZSTD_CCtx_s {
|
|
216
277
|
ZSTD_compressionStage_e stage;
|
|
217
278
|
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
|
@@ -219,17 +280,20 @@ struct ZSTD_CCtx_s {
|
|
|
219
280
|
ZSTD_CCtx_params requestedParams;
|
|
220
281
|
ZSTD_CCtx_params appliedParams;
|
|
221
282
|
U32 dictID;
|
|
283
|
+
size_t dictContentSize;
|
|
222
284
|
|
|
223
|
-
|
|
224
|
-
void* workSpace;
|
|
225
|
-
size_t workSpaceSize;
|
|
285
|
+
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
|
|
226
286
|
size_t blockSize;
|
|
227
287
|
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
|
|
228
288
|
unsigned long long consumedSrcSize;
|
|
229
289
|
unsigned long long producedCSize;
|
|
230
290
|
XXH64_state_t xxhState;
|
|
231
291
|
ZSTD_customMem customMem;
|
|
292
|
+
ZSTD_threadPool* pool;
|
|
232
293
|
size_t staticSize;
|
|
294
|
+
SeqCollector seqCollector;
|
|
295
|
+
int isFirstBlock;
|
|
296
|
+
int initialized;
|
|
233
297
|
|
|
234
298
|
seqStore_t seqStore; /* sequences storage ptrs */
|
|
235
299
|
ldmState_t ldmState; /* long distance matching state */
|
|
@@ -237,7 +301,10 @@ struct ZSTD_CCtx_s {
|
|
|
237
301
|
size_t maxNbLdmSequences;
|
|
238
302
|
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
|
|
239
303
|
ZSTD_blockState_t blockState;
|
|
240
|
-
U32* entropyWorkspace; /* entropy workspace of
|
|
304
|
+
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
|
305
|
+
|
|
306
|
+
/* Wether we are streaming or not */
|
|
307
|
+
ZSTD_buffered_policy_e bufferedPolicy;
|
|
241
308
|
|
|
242
309
|
/* streaming */
|
|
243
310
|
char* inBuff;
|
|
@@ -252,6 +319,10 @@ struct ZSTD_CCtx_s {
|
|
|
252
319
|
ZSTD_cStreamStage streamStage;
|
|
253
320
|
U32 frameEnded;
|
|
254
321
|
|
|
322
|
+
/* Stable in/out buffer verification */
|
|
323
|
+
ZSTD_inBuffer expectedInBuffer;
|
|
324
|
+
size_t expectedOutBufferSize;
|
|
325
|
+
|
|
255
326
|
/* Dictionary */
|
|
256
327
|
ZSTD_localDict localDict;
|
|
257
328
|
const ZSTD_CDict* cdict;
|
|
@@ -261,12 +332,41 @@ struct ZSTD_CCtx_s {
|
|
|
261
332
|
#ifdef ZSTD_MULTITHREAD
|
|
262
333
|
ZSTDMT_CCtx* mtctx;
|
|
263
334
|
#endif
|
|
335
|
+
|
|
336
|
+
/* Tracing */
|
|
337
|
+
#if ZSTD_TRACE
|
|
338
|
+
ZSTD_TraceCtx traceCtx;
|
|
339
|
+
#endif
|
|
264
340
|
};
|
|
265
341
|
|
|
266
342
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
|
267
343
|
|
|
268
|
-
typedef enum {
|
|
269
|
-
|
|
344
|
+
typedef enum {
|
|
345
|
+
ZSTD_noDict = 0,
|
|
346
|
+
ZSTD_extDict = 1,
|
|
347
|
+
ZSTD_dictMatchState = 2,
|
|
348
|
+
ZSTD_dedicatedDictSearch = 3
|
|
349
|
+
} ZSTD_dictMode_e;
|
|
350
|
+
|
|
351
|
+
typedef enum {
|
|
352
|
+
ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
|
|
353
|
+
* In this mode we use both the srcSize and the dictSize
|
|
354
|
+
* when selecting and adjusting parameters.
|
|
355
|
+
*/
|
|
356
|
+
ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
|
|
357
|
+
* In this mode we only take the srcSize into account when selecting
|
|
358
|
+
* and adjusting parameters.
|
|
359
|
+
*/
|
|
360
|
+
ZSTD_cpm_createCDict = 2, /* Creating a CDict.
|
|
361
|
+
* In this mode we take both the source size and the dictionary size
|
|
362
|
+
* into account when selecting and adjusting the parameters.
|
|
363
|
+
*/
|
|
364
|
+
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
|
|
365
|
+
* We don't know what these parameters are for. We default to the legacy
|
|
366
|
+
* behavior of taking both the source size and the dict size into account
|
|
367
|
+
* when selecting and adjusting parameters.
|
|
368
|
+
*/
|
|
369
|
+
} ZSTD_cParamMode_e;
|
|
270
370
|
|
|
271
371
|
typedef size_t (*ZSTD_blockCompressor) (
|
|
272
372
|
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -305,26 +405,145 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
|
|
305
405
|
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
|
306
406
|
}
|
|
307
407
|
|
|
408
|
+
typedef struct repcodes_s {
|
|
409
|
+
U32 rep[3];
|
|
410
|
+
} repcodes_t;
|
|
411
|
+
|
|
412
|
+
MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
|
413
|
+
{
|
|
414
|
+
repcodes_t newReps;
|
|
415
|
+
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
|
416
|
+
newReps.rep[2] = rep[1];
|
|
417
|
+
newReps.rep[1] = rep[0];
|
|
418
|
+
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
|
419
|
+
} else { /* repcode */
|
|
420
|
+
U32 const repCode = offset + ll0;
|
|
421
|
+
if (repCode > 0) { /* note : if repCode==0, no change */
|
|
422
|
+
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
|
423
|
+
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
|
424
|
+
newReps.rep[1] = rep[0];
|
|
425
|
+
newReps.rep[0] = currentOffset;
|
|
426
|
+
} else { /* repCode == 0 */
|
|
427
|
+
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
return newReps;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/* ZSTD_cParam_withinBounds:
|
|
434
|
+
* @return 1 if value is within cParam bounds,
|
|
435
|
+
* 0 otherwise */
|
|
436
|
+
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
|
437
|
+
{
|
|
438
|
+
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
|
|
439
|
+
if (ZSTD_isError(bounds.error)) return 0;
|
|
440
|
+
if (value < bounds.lowerBound) return 0;
|
|
441
|
+
if (value > bounds.upperBound) return 0;
|
|
442
|
+
return 1;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/* ZSTD_noCompressBlock() :
|
|
446
|
+
* Writes uncompressed block to dst buffer from given src.
|
|
447
|
+
* Returns the size of the block */
|
|
448
|
+
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
|
449
|
+
{
|
|
450
|
+
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
|
|
451
|
+
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
|
452
|
+
dstSize_tooSmall, "dst buf too small for uncompressed block");
|
|
453
|
+
MEM_writeLE24(dst, cBlockHeader24);
|
|
454
|
+
ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
|
455
|
+
return ZSTD_blockHeaderSize + srcSize;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
|
|
459
|
+
{
|
|
460
|
+
BYTE* const op = (BYTE*)dst;
|
|
461
|
+
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
|
|
462
|
+
RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
|
|
463
|
+
MEM_writeLE24(op, cBlockHeader);
|
|
464
|
+
op[3] = src;
|
|
465
|
+
return 4;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
/* ZSTD_minGain() :
|
|
470
|
+
* minimum compression required
|
|
471
|
+
* to generate a compress block or a compressed literals section.
|
|
472
|
+
* note : use same formula for both situations */
|
|
473
|
+
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
|
474
|
+
{
|
|
475
|
+
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
|
|
476
|
+
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
|
|
477
|
+
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
|
|
478
|
+
return (srcSize >> minlog) + 2;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
|
482
|
+
{
|
|
483
|
+
switch (cctxParams->literalCompressionMode) {
|
|
484
|
+
case ZSTD_lcm_huffman:
|
|
485
|
+
return 0;
|
|
486
|
+
case ZSTD_lcm_uncompressed:
|
|
487
|
+
return 1;
|
|
488
|
+
default:
|
|
489
|
+
assert(0 /* impossible: pre-validated */);
|
|
490
|
+
/* fall-through */
|
|
491
|
+
case ZSTD_lcm_auto:
|
|
492
|
+
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/*! ZSTD_safecopyLiterals() :
|
|
497
|
+
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
|
|
498
|
+
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
|
|
499
|
+
* large copies.
|
|
500
|
+
*/
|
|
501
|
+
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
|
|
502
|
+
assert(iend > ilimit_w);
|
|
503
|
+
if (ip <= ilimit_w) {
|
|
504
|
+
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
|
|
505
|
+
op += ilimit_w - ip;
|
|
506
|
+
ip = ilimit_w;
|
|
507
|
+
}
|
|
508
|
+
while (ip < iend) *op++ = *ip++;
|
|
509
|
+
}
|
|
510
|
+
|
|
308
511
|
/*! ZSTD_storeSeq() :
|
|
309
|
-
* Store a sequence (
|
|
310
|
-
* `
|
|
512
|
+
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
|
|
513
|
+
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
|
|
311
514
|
* `mlBase` : matchLength - MINMATCH
|
|
515
|
+
* Allowed to overread literals up to litLimit.
|
|
312
516
|
*/
|
|
313
|
-
|
|
517
|
+
HINT_INLINE UNUSED_ATTR
|
|
518
|
+
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
|
|
314
519
|
{
|
|
520
|
+
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
|
|
521
|
+
BYTE const* const litEnd = literals + litLength;
|
|
315
522
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
|
|
316
523
|
static const BYTE* g_start = NULL;
|
|
317
524
|
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
|
|
318
525
|
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
|
|
319
526
|
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
|
|
320
|
-
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)
|
|
527
|
+
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
|
|
321
528
|
}
|
|
322
529
|
#endif
|
|
323
530
|
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
|
|
324
531
|
/* copy Literals */
|
|
325
532
|
assert(seqStorePtr->maxNbLit <= 128 KB);
|
|
326
533
|
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
|
327
|
-
|
|
534
|
+
assert(literals + litLength <= litLimit);
|
|
535
|
+
if (litEnd <= litLimit_w) {
|
|
536
|
+
/* Common case we can use wildcopy.
|
|
537
|
+
* First copy 16 bytes, because literals are likely short.
|
|
538
|
+
*/
|
|
539
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
540
|
+
ZSTD_copy16(seqStorePtr->lit, literals);
|
|
541
|
+
if (litLength > 16) {
|
|
542
|
+
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
|
|
543
|
+
}
|
|
544
|
+
} else {
|
|
545
|
+
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
|
|
546
|
+
}
|
|
328
547
|
seqStorePtr->lit += litLength;
|
|
329
548
|
|
|
330
549
|
/* literal Length */
|
|
@@ -336,7 +555,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
|
|
336
555
|
seqStorePtr->sequences[0].litLength = (U16)litLength;
|
|
337
556
|
|
|
338
557
|
/* match offset */
|
|
339
|
-
seqStorePtr->sequences[0].offset =
|
|
558
|
+
seqStorePtr->sequences[0].offset = offCode + 1;
|
|
340
559
|
|
|
341
560
|
/* match Length */
|
|
342
561
|
if (mlBase>0xFFFF) {
|
|
@@ -358,9 +577,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
|
358
577
|
if (MEM_isLittleEndian()) {
|
|
359
578
|
if (MEM_64bits()) {
|
|
360
579
|
# if defined(_MSC_VER) && defined(_WIN64)
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
580
|
+
# if STATIC_BMI2
|
|
581
|
+
return _tzcnt_u64(val) >> 3;
|
|
582
|
+
# else
|
|
583
|
+
unsigned long r = 0;
|
|
584
|
+
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
|
585
|
+
# endif
|
|
364
586
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
|
365
587
|
return (__builtin_ctzll((U64)val) >> 3);
|
|
366
588
|
# else
|
|
@@ -377,8 +599,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
|
377
599
|
} else { /* 32 bits */
|
|
378
600
|
# if defined(_MSC_VER)
|
|
379
601
|
unsigned long r=0;
|
|
380
|
-
_BitScanForward( &r, (U32)val );
|
|
381
|
-
return (unsigned)(r>>3);
|
|
602
|
+
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
|
|
382
603
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
383
604
|
return (__builtin_ctz((U32)val) >> 3);
|
|
384
605
|
# else
|
|
@@ -392,9 +613,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
|
392
613
|
} else { /* Big Endian CPU */
|
|
393
614
|
if (MEM_64bits()) {
|
|
394
615
|
# if defined(_MSC_VER) && defined(_WIN64)
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
616
|
+
# if STATIC_BMI2
|
|
617
|
+
return _lzcnt_u64(val) >> 3;
|
|
618
|
+
# else
|
|
619
|
+
unsigned long r = 0;
|
|
620
|
+
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
|
621
|
+
# endif
|
|
398
622
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
|
399
623
|
return (__builtin_clzll(val) >> 3);
|
|
400
624
|
# else
|
|
@@ -408,8 +632,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
|
408
632
|
} else { /* 32 bits */
|
|
409
633
|
# if defined(_MSC_VER)
|
|
410
634
|
unsigned long r = 0;
|
|
411
|
-
_BitScanReverse( &r, (unsigned long)val );
|
|
412
|
-
return (unsigned)(r>>3);
|
|
635
|
+
return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
|
|
413
636
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
414
637
|
return (__builtin_clz((U32)val) >> 3);
|
|
415
638
|
# else
|
|
@@ -490,7 +713,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
|
|
490
713
|
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
|
491
714
|
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
|
492
715
|
|
|
493
|
-
MEM_STATIC
|
|
716
|
+
MEM_STATIC FORCE_INLINE_ATTR
|
|
717
|
+
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
|
494
718
|
{
|
|
495
719
|
switch(mls)
|
|
496
720
|
{
|
|
@@ -564,6 +788,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
|
|
|
564
788
|
/*-*************************************
|
|
565
789
|
* Round buffer management
|
|
566
790
|
***************************************/
|
|
791
|
+
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
|
|
792
|
+
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
|
|
793
|
+
#endif
|
|
567
794
|
/* Max current allowed */
|
|
568
795
|
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
|
|
569
796
|
/* Maximum chunk size before overflow correction needs to be called again */
|
|
@@ -603,7 +830,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
|
|
603
830
|
return ZSTD_window_hasExtDict(ms->window) ?
|
|
604
831
|
ZSTD_extDict :
|
|
605
832
|
ms->dictMatchState != NULL ?
|
|
606
|
-
ZSTD_dictMatchState :
|
|
833
|
+
(ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
|
|
607
834
|
ZSTD_noDict;
|
|
608
835
|
}
|
|
609
836
|
|
|
@@ -615,8 +842,8 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
|
|
615
842
|
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
|
616
843
|
void const* srcEnd)
|
|
617
844
|
{
|
|
618
|
-
U32 const
|
|
619
|
-
return
|
|
845
|
+
U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
|
|
846
|
+
return curr > ZSTD_CURRENT_MAX;
|
|
620
847
|
}
|
|
621
848
|
|
|
622
849
|
/**
|
|
@@ -652,18 +879,30 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|
|
652
879
|
* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
|
|
653
880
|
*/
|
|
654
881
|
U32 const cycleMask = (1U << cycleLog) - 1;
|
|
655
|
-
U32 const
|
|
656
|
-
U32 const
|
|
657
|
-
|
|
882
|
+
U32 const curr = (U32)((BYTE const*)src - window->base);
|
|
883
|
+
U32 const currentCycle0 = curr & cycleMask;
|
|
884
|
+
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
|
885
|
+
U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
|
|
886
|
+
U32 const newCurrent = currentCycle1 + maxDist;
|
|
887
|
+
U32 const correction = curr - newCurrent;
|
|
658
888
|
assert((maxDist & cycleMask) == 0);
|
|
659
|
-
assert(
|
|
889
|
+
assert(curr > newCurrent);
|
|
660
890
|
/* Loose bound, should be around 1<<29 (see above) */
|
|
661
891
|
assert(correction > 1<<28);
|
|
662
892
|
|
|
663
893
|
window->base += correction;
|
|
664
894
|
window->dictBase += correction;
|
|
665
|
-
window->lowLimit
|
|
666
|
-
window->
|
|
895
|
+
if (window->lowLimit <= correction) window->lowLimit = 1;
|
|
896
|
+
else window->lowLimit -= correction;
|
|
897
|
+
if (window->dictLimit <= correction) window->dictLimit = 1;
|
|
898
|
+
else window->dictLimit -= correction;
|
|
899
|
+
|
|
900
|
+
/* Ensure we can still reference the full window. */
|
|
901
|
+
assert(newCurrent >= maxDist);
|
|
902
|
+
assert(newCurrent - maxDist >= 1);
|
|
903
|
+
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
|
904
|
+
assert(window->lowLimit <= newCurrent);
|
|
905
|
+
assert(window->dictLimit <= newCurrent);
|
|
667
906
|
|
|
668
907
|
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
|
|
669
908
|
window->lowLimit);
|
|
@@ -675,31 +914,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|
|
675
914
|
* Updates lowLimit so that:
|
|
676
915
|
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
|
677
916
|
*
|
|
678
|
-
*
|
|
679
|
-
* This must be called before a block compression call
|
|
680
|
-
*
|
|
917
|
+
* It ensures index is valid as long as index >= lowLimit.
|
|
918
|
+
* This must be called before a block compression call.
|
|
919
|
+
*
|
|
920
|
+
* loadedDictEnd is only defined if a dictionary is in use for current compression.
|
|
921
|
+
* As the name implies, loadedDictEnd represents the index at end of dictionary.
|
|
922
|
+
* The value lies within context's referential, it can be directly compared to blockEndIdx.
|
|
681
923
|
*
|
|
682
|
-
* If loadedDictEndPtr is
|
|
683
|
-
*
|
|
684
|
-
*
|
|
685
|
-
*
|
|
686
|
-
*
|
|
924
|
+
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
|
|
925
|
+
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
|
|
926
|
+
* This is because dictionaries are allowed to be referenced fully
|
|
927
|
+
* as long as the last byte of the dictionary is in the window.
|
|
928
|
+
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
|
|
687
929
|
*
|
|
688
|
-
* In normal dict mode, the
|
|
689
|
-
* dictMatchState mode, lowLimit and dictLimit are the same,
|
|
690
|
-
* is below them.
|
|
930
|
+
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
|
|
931
|
+
* In dictMatchState mode, lowLimit and dictLimit are the same,
|
|
932
|
+
* and the dictionary is below them.
|
|
933
|
+
* forceWindow and dictMatchState are therefore incompatible.
|
|
691
934
|
*/
|
|
692
935
|
MEM_STATIC void
|
|
693
936
|
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
|
694
|
-
|
|
695
|
-
U32
|
|
696
|
-
U32*
|
|
937
|
+
const void* blockEnd,
|
|
938
|
+
U32 maxDist,
|
|
939
|
+
U32* loadedDictEndPtr,
|
|
697
940
|
const ZSTD_matchState_t** dictMatchStatePtr)
|
|
698
941
|
{
|
|
699
|
-
U32 const blockEndIdx = (U32)((BYTE const*)
|
|
700
|
-
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
|
701
|
-
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
|
|
702
|
-
(unsigned)blockEndIdx, (unsigned)maxDist);
|
|
942
|
+
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
|
943
|
+
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
|
944
|
+
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
|
945
|
+
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
|
946
|
+
|
|
947
|
+
/* - When there is no dictionary : loadedDictEnd == 0.
|
|
948
|
+
In which case, the test (blockEndIdx > maxDist) is merely to avoid
|
|
949
|
+
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
|
|
950
|
+
- When there is a standard dictionary :
|
|
951
|
+
Index referential is copied from the dictionary,
|
|
952
|
+
which means it starts from 0.
|
|
953
|
+
In which case, loadedDictEnd == dictSize,
|
|
954
|
+
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
|
|
955
|
+
since `blockEndIdx` also starts from zero.
|
|
956
|
+
- When there is an attached dictionary :
|
|
957
|
+
loadedDictEnd is expressed within the referential of the context,
|
|
958
|
+
so it can be directly compared against blockEndIdx.
|
|
959
|
+
*/
|
|
703
960
|
if (blockEndIdx > maxDist + loadedDictEnd) {
|
|
704
961
|
U32 const newLowLimit = blockEndIdx - maxDist;
|
|
705
962
|
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
|
|
@@ -708,11 +965,54 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
|
|
708
965
|
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
|
|
709
966
|
window->dictLimit = window->lowLimit;
|
|
710
967
|
}
|
|
711
|
-
|
|
968
|
+
/* On reaching window size, dictionaries are invalidated */
|
|
969
|
+
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
|
970
|
+
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
/* Similar to ZSTD_window_enforceMaxDist(),
|
|
975
|
+
* but only invalidates dictionary
|
|
976
|
+
* when input progresses beyond window size.
|
|
977
|
+
* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
|
|
978
|
+
* loadedDictEnd uses same referential as window->base
|
|
979
|
+
* maxDist is the window size */
|
|
980
|
+
MEM_STATIC void
|
|
981
|
+
ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
|
982
|
+
const void* blockEnd,
|
|
983
|
+
U32 maxDist,
|
|
984
|
+
U32* loadedDictEndPtr,
|
|
985
|
+
const ZSTD_matchState_t** dictMatchStatePtr)
|
|
986
|
+
{
|
|
987
|
+
assert(loadedDictEndPtr != NULL);
|
|
988
|
+
assert(dictMatchStatePtr != NULL);
|
|
989
|
+
{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
|
990
|
+
U32 const loadedDictEnd = *loadedDictEndPtr;
|
|
991
|
+
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
|
992
|
+
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
|
993
|
+
assert(blockEndIdx >= loadedDictEnd);
|
|
994
|
+
|
|
995
|
+
if (blockEndIdx > loadedDictEnd + maxDist) {
|
|
996
|
+
/* On reaching window size, dictionaries are invalidated.
|
|
997
|
+
* For simplification, if window size is reached anywhere within next block,
|
|
998
|
+
* the dictionary is invalidated for the full block.
|
|
999
|
+
*/
|
|
1000
|
+
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
|
|
712
1001
|
*loadedDictEndPtr = 0;
|
|
713
|
-
if (dictMatchStatePtr)
|
|
714
1002
|
*dictMatchStatePtr = NULL;
|
|
715
|
-
|
|
1003
|
+
} else {
|
|
1004
|
+
if (*loadedDictEndPtr != 0) {
|
|
1005
|
+
DEBUGLOG(6, "dictionary considered valid for current block");
|
|
1006
|
+
} } }
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
|
1010
|
+
ZSTD_memset(window, 0, sizeof(*window));
|
|
1011
|
+
window->base = (BYTE const*)"";
|
|
1012
|
+
window->dictBase = (BYTE const*)"";
|
|
1013
|
+
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
|
1014
|
+
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
|
1015
|
+
window->nextSrc = window->base + 1; /* see issue #1241 */
|
|
716
1016
|
}
|
|
717
1017
|
|
|
718
1018
|
/**
|
|
@@ -728,6 +1028,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
|
728
1028
|
BYTE const* const ip = (BYTE const*)src;
|
|
729
1029
|
U32 contiguous = 1;
|
|
730
1030
|
DEBUGLOG(5, "ZSTD_window_update");
|
|
1031
|
+
if (srcSize == 0)
|
|
1032
|
+
return contiguous;
|
|
1033
|
+
assert(window->base != NULL);
|
|
1034
|
+
assert(window->dictBase != NULL);
|
|
731
1035
|
/* Check if blocks follow each other */
|
|
732
1036
|
if (src != window->nextSrc) {
|
|
733
1037
|
/* not contiguous */
|
|
@@ -738,7 +1042,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
|
738
1042
|
window->dictLimit = (U32)distanceFromBase;
|
|
739
1043
|
window->dictBase = window->base;
|
|
740
1044
|
window->base = ip - distanceFromBase;
|
|
741
|
-
|
|
1045
|
+
/* ms->nextToUpdate = window->dictLimit; */
|
|
742
1046
|
if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
|
|
743
1047
|
contiguous = 0;
|
|
744
1048
|
}
|
|
@@ -754,6 +1058,40 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
|
754
1058
|
return contiguous;
|
|
755
1059
|
}
|
|
756
1060
|
|
|
1061
|
+
/**
|
|
1062
|
+
* Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
|
|
1063
|
+
*/
|
|
1064
|
+
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
|
1065
|
+
{
|
|
1066
|
+
U32 const maxDistance = 1U << windowLog;
|
|
1067
|
+
U32 const lowestValid = ms->window.lowLimit;
|
|
1068
|
+
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
|
1069
|
+
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
|
1070
|
+
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
|
1071
|
+
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
|
1072
|
+
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
|
1073
|
+
*/
|
|
1074
|
+
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
|
1075
|
+
return matchLowest;
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
/**
|
|
1079
|
+
* Returns the lowest allowed match index in the prefix.
|
|
1080
|
+
*/
|
|
1081
|
+
MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
|
1082
|
+
{
|
|
1083
|
+
U32 const maxDistance = 1U << windowLog;
|
|
1084
|
+
U32 const lowestValid = ms->window.dictLimit;
|
|
1085
|
+
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
|
1086
|
+
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
|
1087
|
+
/* When computing the lowest prefix index we need to take the dictionary into account to handle
|
|
1088
|
+
* the edge case where the dictionary and the source are contiguous in memory.
|
|
1089
|
+
*/
|
|
1090
|
+
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
|
1091
|
+
return matchLowest;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
|
|
757
1095
|
|
|
758
1096
|
/* debug functions */
|
|
759
1097
|
#if (DEBUGLEVEL>=2)
|
|
@@ -791,6 +1129,20 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
|
|
791
1129
|
}
|
|
792
1130
|
#endif
|
|
793
1131
|
|
|
1132
|
+
/* ===============================================================
|
|
1133
|
+
* Shared internal declarations
|
|
1134
|
+
* These prototypes may be called from sources not in lib/compress
|
|
1135
|
+
* =============================================================== */
|
|
1136
|
+
|
|
1137
|
+
/* ZSTD_loadCEntropy() :
|
|
1138
|
+
* dict : must point at beginning of a valid zstd dictionary.
|
|
1139
|
+
* return : size of dictionary header (size of magic number + dict ID + entropy tables)
|
|
1140
|
+
* assumptions : magic number supposed already checked
|
|
1141
|
+
* and dictSize >= 8 */
|
|
1142
|
+
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
1143
|
+
const void* const dict, size_t dictSize);
|
|
1144
|
+
|
|
1145
|
+
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
|
794
1146
|
|
|
795
1147
|
/* ==============================================================
|
|
796
1148
|
* Private declarations
|
|
@@ -800,9 +1152,10 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
|
|
800
1152
|
/* ZSTD_getCParamsFromCCtxParams() :
|
|
801
1153
|
* cParams are built depending on compressionLevel, src size hints,
|
|
802
1154
|
* LDM and manually set compression parameters.
|
|
1155
|
+
* Note: srcSizeHint == 0 means 0!
|
|
803
1156
|
*/
|
|
804
1157
|
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
805
|
-
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
|
|
1158
|
+
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
|
806
1159
|
|
|
807
1160
|
/*! ZSTD_initCStream_internal() :
|
|
808
1161
|
* Private use only. Init streaming operation.
|
|
@@ -812,7 +1165,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
|
812
1165
|
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
|
|
813
1166
|
const void* dict, size_t dictSize,
|
|
814
1167
|
const ZSTD_CDict* cdict,
|
|
815
|
-
ZSTD_CCtx_params
|
|
1168
|
+
const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
|
|
816
1169
|
|
|
817
1170
|
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
|
|
818
1171
|
|
|
@@ -827,7 +1180,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
|
|
|
827
1180
|
ZSTD_dictContentType_e dictContentType,
|
|
828
1181
|
ZSTD_dictTableLoadMethod_e dtlm,
|
|
829
1182
|
const ZSTD_CDict* cdict,
|
|
830
|
-
ZSTD_CCtx_params params,
|
|
1183
|
+
const ZSTD_CCtx_params* params,
|
|
831
1184
|
unsigned long long pledgedSrcSize);
|
|
832
1185
|
|
|
833
1186
|
/* ZSTD_compress_advanced_internal() :
|
|
@@ -836,7 +1189,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
|
|
|
836
1189
|
void* dst, size_t dstCapacity,
|
|
837
1190
|
const void* src, size_t srcSize,
|
|
838
1191
|
const void* dict,size_t dictSize,
|
|
839
|
-
ZSTD_CCtx_params params);
|
|
1192
|
+
const ZSTD_CCtx_params* params);
|
|
840
1193
|
|
|
841
1194
|
|
|
842
1195
|
/* ZSTD_writeLastEmptyBlock() :
|
|
@@ -859,5 +1212,13 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
|
|
|
859
1212
|
*/
|
|
860
1213
|
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
|
|
861
1214
|
|
|
1215
|
+
/** ZSTD_cycleLog() :
|
|
1216
|
+
* condition for correct operation : hashLog > 1 */
|
|
1217
|
+
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
|
|
1218
|
+
|
|
1219
|
+
/** ZSTD_CCtx_trace() :
|
|
1220
|
+
* Trace the end of a compression call.
|
|
1221
|
+
*/
|
|
1222
|
+
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
|
|
862
1223
|
|
|
863
1224
|
#endif /* ZSTD_COMPRESS_H */
|