extzstd 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +28 -14
- data/contrib/zstd/CHANGELOG +114 -56
- data/contrib/zstd/CONTRIBUTING.md +14 -0
- data/contrib/zstd/Makefile +37 -31
- data/contrib/zstd/README.md +6 -0
- data/contrib/zstd/appveyor.yml +4 -1
- data/contrib/zstd/lib/Makefile +231 -134
- data/contrib/zstd/lib/README.md +28 -0
- data/contrib/zstd/lib/common/bitstream.h +24 -15
- data/contrib/zstd/lib/common/compiler.h +116 -3
- data/contrib/zstd/lib/common/cpu.h +0 -2
- data/contrib/zstd/lib/common/debug.h +11 -18
- data/contrib/zstd/lib/common/entropy_common.c +188 -42
- data/contrib/zstd/lib/common/error_private.c +1 -0
- data/contrib/zstd/lib/common/error_private.h +1 -1
- data/contrib/zstd/lib/common/fse.h +38 -11
- data/contrib/zstd/lib/common/fse_decompress.c +123 -16
- data/contrib/zstd/lib/common/huf.h +26 -5
- data/contrib/zstd/lib/common/mem.h +66 -93
- data/contrib/zstd/lib/common/pool.c +22 -16
- data/contrib/zstd/lib/common/pool.h +1 -1
- data/contrib/zstd/lib/common/threading.c +6 -5
- data/contrib/zstd/lib/common/xxhash.c +18 -56
- data/contrib/zstd/lib/common/xxhash.h +1 -1
- data/contrib/zstd/lib/common/zstd_common.c +9 -9
- data/contrib/zstd/lib/common/zstd_deps.h +111 -0
- data/contrib/zstd/lib/common/zstd_errors.h +1 -0
- data/contrib/zstd/lib/common/zstd_internal.h +89 -58
- data/contrib/zstd/lib/compress/fse_compress.c +30 -23
- data/contrib/zstd/lib/compress/hist.c +26 -28
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +210 -95
- data/contrib/zstd/lib/compress/zstd_compress.c +1339 -409
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +119 -41
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +4 -4
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +17 -3
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +23 -19
- data/contrib/zstd/lib/compress/zstd_cwksp.h +60 -24
- data/contrib/zstd/lib/compress/zstd_double_fast.c +22 -22
- data/contrib/zstd/lib/compress/zstd_fast.c +19 -19
- data/contrib/zstd/lib/compress/zstd_lazy.c +351 -77
- data/contrib/zstd/lib/compress/zstd_lazy.h +20 -0
- data/contrib/zstd/lib/compress/zstd_ldm.c +59 -18
- data/contrib/zstd/lib/compress/zstd_ldm.h +6 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +190 -45
- data/contrib/zstd/lib/compress/zstdmt_compress.c +74 -406
- data/contrib/zstd/lib/compress/zstdmt_compress.h +26 -108
- data/contrib/zstd/lib/decompress/huf_decompress.c +302 -200
- data/contrib/zstd/lib/decompress/zstd_ddict.c +8 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +125 -80
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +145 -37
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +5 -2
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +11 -10
- data/contrib/zstd/lib/dictBuilder/cover.c +29 -20
- data/contrib/zstd/lib/dictBuilder/cover.h +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +20 -19
- data/contrib/zstd/lib/dictBuilder/zdict.c +15 -16
- data/contrib/zstd/lib/dictBuilder/zdict.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +5 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +5 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +5 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +6 -2
- data/contrib/zstd/lib/legacy/zstd_v05.c +5 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +5 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +5 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -3
- data/contrib/zstd/lib/zstd.h +348 -47
- data/ext/extzstd.c +6 -0
- data/ext/extzstd.h +6 -0
- data/gemstub.rb +3 -21
- data/lib/extzstd.rb +0 -2
- data/lib/extzstd/version.rb +6 -1
- data/test/test_basic.rb +0 -5
- metadata +5 -4
@@ -11,8 +11,7 @@
|
|
11
11
|
/*-*************************************
|
12
12
|
* Dependencies
|
13
13
|
***************************************/
|
14
|
-
#include
|
15
|
-
#include <string.h> /* memset */
|
14
|
+
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
|
16
15
|
#include "../common/cpu.h"
|
17
16
|
#include "../common/mem.h"
|
18
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
@@ -30,6 +29,19 @@
|
|
30
29
|
#include "zstd_ldm.h"
|
31
30
|
#include "zstd_compress_superblock.h"
|
32
31
|
|
32
|
+
/* ***************************************************************
|
33
|
+
* Tuning parameters
|
34
|
+
*****************************************************************/
|
35
|
+
/*!
|
36
|
+
* COMPRESS_HEAPMODE :
|
37
|
+
* Select how default decompression function ZSTD_compress() allocates its context,
|
38
|
+
* on stack (0, default), or into heap (1).
|
39
|
+
* Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
|
40
|
+
*/
|
41
|
+
#ifndef ZSTD_COMPRESS_HEAPMODE
|
42
|
+
# define ZSTD_COMPRESS_HEAPMODE 0
|
43
|
+
#endif
|
44
|
+
|
33
45
|
|
34
46
|
/*-*************************************
|
35
47
|
* Helper functions
|
@@ -52,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
|
|
52
64
|
struct ZSTD_CDict_s {
|
53
65
|
const void* dictContent;
|
54
66
|
size_t dictContentSize;
|
67
|
+
ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
|
55
68
|
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
|
56
69
|
ZSTD_cwksp workspace;
|
57
70
|
ZSTD_matchState_t matchState;
|
@@ -69,7 +82,7 @@ ZSTD_CCtx* ZSTD_createCCtx(void)
|
|
69
82
|
static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
|
70
83
|
{
|
71
84
|
assert(cctx != NULL);
|
72
|
-
|
85
|
+
ZSTD_memset(cctx, 0, sizeof(*cctx));
|
73
86
|
cctx->customMem = memManager;
|
74
87
|
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
75
88
|
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
|
@@ -82,8 +95,8 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
|
|
82
95
|
{
|
83
96
|
ZSTD_STATIC_ASSERT(zcss_init==0);
|
84
97
|
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
|
85
|
-
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
86
|
-
{ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)
|
98
|
+
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
99
|
+
{ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
|
87
100
|
if (!cctx) return NULL;
|
88
101
|
ZSTD_initCCtx(cctx, customMem);
|
89
102
|
return cctx;
|
@@ -96,20 +109,20 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
|
|
96
109
|
ZSTD_CCtx* cctx;
|
97
110
|
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
|
98
111
|
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
|
99
|
-
ZSTD_cwksp_init(&ws, workspace, workspaceSize);
|
112
|
+
ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
|
100
113
|
|
101
114
|
cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
|
102
115
|
if (cctx == NULL) return NULL;
|
103
116
|
|
104
|
-
|
117
|
+
ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
|
105
118
|
ZSTD_cwksp_move(&cctx->workspace, &ws);
|
106
119
|
cctx->staticSize = workspaceSize;
|
107
120
|
|
108
121
|
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
|
109
|
-
if (!ZSTD_cwksp_check_available(&cctx->workspace,
|
122
|
+
if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
|
110
123
|
cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
|
111
124
|
cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
|
112
|
-
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace,
|
125
|
+
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
|
113
126
|
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
114
127
|
return cctx;
|
115
128
|
}
|
@@ -119,10 +132,10 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
|
|
119
132
|
*/
|
120
133
|
static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
|
121
134
|
{
|
122
|
-
|
135
|
+
ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
|
123
136
|
ZSTD_freeCDict(cctx->localDict.cdict);
|
124
|
-
|
125
|
-
|
137
|
+
ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
|
138
|
+
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
|
126
139
|
cctx->cdict = NULL;
|
127
140
|
}
|
128
141
|
|
@@ -153,7 +166,7 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
|
|
153
166
|
int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
154
167
|
ZSTD_freeCCtxContent(cctx);
|
155
168
|
if (!cctxInWorkspace) {
|
156
|
-
|
169
|
+
ZSTD_customFree(cctx, cctx->customMem);
|
157
170
|
}
|
158
171
|
}
|
159
172
|
return 0;
|
@@ -189,15 +202,32 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
|
|
189
202
|
/* private API call, for dictBuilder only */
|
190
203
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
|
191
204
|
|
205
|
+
/* Returns 1 if compression parameters are such that we should
|
206
|
+
* enable long distance matching (wlog >= 27, strategy >= btopt).
|
207
|
+
* Returns 0 otherwise.
|
208
|
+
*/
|
209
|
+
static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
|
210
|
+
return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
|
211
|
+
}
|
212
|
+
|
192
213
|
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
193
214
|
ZSTD_compressionParameters cParams)
|
194
215
|
{
|
195
216
|
ZSTD_CCtx_params cctxParams;
|
196
|
-
|
217
|
+
/* should not matter, as all cParams are presumed properly defined */
|
218
|
+
ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
|
197
219
|
cctxParams.cParams = cParams;
|
198
|
-
|
220
|
+
|
221
|
+
if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
|
222
|
+
DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
|
223
|
+
cctxParams.ldmParams.enableLdm = 1;
|
224
|
+
/* LDM is enabled by default for optimal parser and window size >= 128MB */
|
225
|
+
ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
|
226
|
+
assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
|
227
|
+
assert(cctxParams.ldmParams.hashRateLog < 32);
|
228
|
+
}
|
229
|
+
|
199
230
|
assert(!ZSTD_checkCParams(cParams));
|
200
|
-
cctxParams.fParams.contentSizeFlag = 1;
|
201
231
|
return cctxParams;
|
202
232
|
}
|
203
233
|
|
@@ -205,13 +235,12 @@ static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
|
|
205
235
|
ZSTD_customMem customMem)
|
206
236
|
{
|
207
237
|
ZSTD_CCtx_params* params;
|
208
|
-
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
209
|
-
params = (ZSTD_CCtx_params*)
|
238
|
+
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
239
|
+
params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
|
210
240
|
sizeof(ZSTD_CCtx_params), customMem);
|
211
241
|
if (!params) { return NULL; }
|
242
|
+
ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
|
212
243
|
params->customMem = customMem;
|
213
|
-
params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
214
|
-
params->fParams.contentSizeFlag = 1;
|
215
244
|
return params;
|
216
245
|
}
|
217
246
|
|
@@ -223,7 +252,7 @@ ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
|
|
223
252
|
size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
|
224
253
|
{
|
225
254
|
if (params == NULL) { return 0; }
|
226
|
-
|
255
|
+
ZSTD_customFree(params, params->customMem);
|
227
256
|
return 0;
|
228
257
|
}
|
229
258
|
|
@@ -234,7 +263,7 @@ size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
|
|
234
263
|
|
235
264
|
size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
|
236
265
|
RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
|
237
|
-
|
266
|
+
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
|
238
267
|
cctxParams->compressionLevel = compressionLevel;
|
239
268
|
cctxParams->fParams.contentSizeFlag = 1;
|
240
269
|
return 0;
|
@@ -244,7 +273,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
|
|
244
273
|
{
|
245
274
|
RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
|
246
275
|
FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
|
247
|
-
|
276
|
+
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
|
248
277
|
assert(!ZSTD_checkCParams(params.cParams));
|
249
278
|
cctxParams->cParams = params.cParams;
|
250
279
|
cctxParams->fParams = params.fParams;
|
@@ -354,6 +383,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
354
383
|
#endif
|
355
384
|
return bounds;
|
356
385
|
|
386
|
+
case ZSTD_c_enableDedicatedDictSearch:
|
387
|
+
bounds.lowerBound = 0;
|
388
|
+
bounds.upperBound = 1;
|
389
|
+
return bounds;
|
390
|
+
|
357
391
|
case ZSTD_c_enableLongDistanceMatching:
|
358
392
|
bounds.lowerBound = 0;
|
359
393
|
bounds.upperBound = 1;
|
@@ -397,7 +431,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
397
431
|
return bounds;
|
398
432
|
|
399
433
|
case ZSTD_c_forceAttachDict:
|
400
|
-
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach <
|
434
|
+
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
|
401
435
|
bounds.lowerBound = ZSTD_dictDefaultAttach;
|
402
436
|
bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
|
403
437
|
return bounds;
|
@@ -418,6 +452,22 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
418
452
|
bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
|
419
453
|
return bounds;
|
420
454
|
|
455
|
+
case ZSTD_c_stableInBuffer:
|
456
|
+
case ZSTD_c_stableOutBuffer:
|
457
|
+
bounds.lowerBound = (int)ZSTD_bm_buffered;
|
458
|
+
bounds.upperBound = (int)ZSTD_bm_stable;
|
459
|
+
return bounds;
|
460
|
+
|
461
|
+
case ZSTD_c_blockDelimiters:
|
462
|
+
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
|
463
|
+
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
|
464
|
+
return bounds;
|
465
|
+
|
466
|
+
case ZSTD_c_validateSequences:
|
467
|
+
bounds.lowerBound = 0;
|
468
|
+
bounds.upperBound = 1;
|
469
|
+
return bounds;
|
470
|
+
|
421
471
|
default:
|
422
472
|
bounds.error = ERROR(parameter_unsupported);
|
423
473
|
return bounds;
|
@@ -465,6 +515,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
465
515
|
case ZSTD_c_jobSize:
|
466
516
|
case ZSTD_c_overlapLog:
|
467
517
|
case ZSTD_c_rsyncable:
|
518
|
+
case ZSTD_c_enableDedicatedDictSearch:
|
468
519
|
case ZSTD_c_enableLongDistanceMatching:
|
469
520
|
case ZSTD_c_ldmHashLog:
|
470
521
|
case ZSTD_c_ldmMinMatch:
|
@@ -474,6 +525,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
474
525
|
case ZSTD_c_literalCompressionMode:
|
475
526
|
case ZSTD_c_targetCBlockSize:
|
476
527
|
case ZSTD_c_srcSizeHint:
|
528
|
+
case ZSTD_c_stableInBuffer:
|
529
|
+
case ZSTD_c_stableOutBuffer:
|
530
|
+
case ZSTD_c_blockDelimiters:
|
531
|
+
case ZSTD_c_validateSequences:
|
477
532
|
default:
|
478
533
|
return 0;
|
479
534
|
}
|
@@ -515,12 +570,17 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
515
570
|
case ZSTD_c_jobSize:
|
516
571
|
case ZSTD_c_overlapLog:
|
517
572
|
case ZSTD_c_rsyncable:
|
573
|
+
case ZSTD_c_enableDedicatedDictSearch:
|
518
574
|
case ZSTD_c_enableLongDistanceMatching:
|
519
575
|
case ZSTD_c_ldmHashLog:
|
520
576
|
case ZSTD_c_ldmMinMatch:
|
521
577
|
case ZSTD_c_ldmBucketSizeLog:
|
522
578
|
case ZSTD_c_targetCBlockSize:
|
523
579
|
case ZSTD_c_srcSizeHint:
|
580
|
+
case ZSTD_c_stableInBuffer:
|
581
|
+
case ZSTD_c_stableOutBuffer:
|
582
|
+
case ZSTD_c_blockDelimiters:
|
583
|
+
case ZSTD_c_validateSequences:
|
524
584
|
break;
|
525
585
|
|
526
586
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
@@ -541,9 +601,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
541
601
|
|
542
602
|
case ZSTD_c_compressionLevel : {
|
543
603
|
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
|
544
|
-
if (value
|
604
|
+
if (value == 0)
|
605
|
+
CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
|
606
|
+
else
|
545
607
|
CCtxParams->compressionLevel = value;
|
546
|
-
}
|
547
608
|
if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
|
548
609
|
return 0; /* return type (size_t) cannot represent negative values */
|
549
610
|
}
|
@@ -667,6 +728,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
667
728
|
return CCtxParams->rsyncable;
|
668
729
|
#endif
|
669
730
|
|
731
|
+
case ZSTD_c_enableDedicatedDictSearch :
|
732
|
+
CCtxParams->enableDedicatedDictSearch = (value!=0);
|
733
|
+
return CCtxParams->enableDedicatedDictSearch;
|
734
|
+
|
670
735
|
case ZSTD_c_enableLongDistanceMatching :
|
671
736
|
CCtxParams->ldmParams.enableLdm = (value!=0);
|
672
737
|
return CCtxParams->ldmParams.enableLdm;
|
@@ -707,6 +772,26 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
707
772
|
CCtxParams->srcSizeHint = value;
|
708
773
|
return CCtxParams->srcSizeHint;
|
709
774
|
|
775
|
+
case ZSTD_c_stableInBuffer:
|
776
|
+
BOUNDCHECK(ZSTD_c_stableInBuffer, value);
|
777
|
+
CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
|
778
|
+
return CCtxParams->inBufferMode;
|
779
|
+
|
780
|
+
case ZSTD_c_stableOutBuffer:
|
781
|
+
BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
|
782
|
+
CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
|
783
|
+
return CCtxParams->outBufferMode;
|
784
|
+
|
785
|
+
case ZSTD_c_blockDelimiters:
|
786
|
+
BOUNDCHECK(ZSTD_c_blockDelimiters, value);
|
787
|
+
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
|
788
|
+
return CCtxParams->blockDelimiters;
|
789
|
+
|
790
|
+
case ZSTD_c_validateSequences:
|
791
|
+
BOUNDCHECK(ZSTD_c_validateSequences, value);
|
792
|
+
CCtxParams->validateSequences = value;
|
793
|
+
return CCtxParams->validateSequences;
|
794
|
+
|
710
795
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
711
796
|
}
|
712
797
|
}
|
@@ -794,6 +879,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
794
879
|
*value = CCtxParams->rsyncable;
|
795
880
|
break;
|
796
881
|
#endif
|
882
|
+
case ZSTD_c_enableDedicatedDictSearch :
|
883
|
+
*value = CCtxParams->enableDedicatedDictSearch;
|
884
|
+
break;
|
797
885
|
case ZSTD_c_enableLongDistanceMatching :
|
798
886
|
*value = CCtxParams->ldmParams.enableLdm;
|
799
887
|
break;
|
@@ -815,6 +903,18 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
815
903
|
case ZSTD_c_srcSizeHint :
|
816
904
|
*value = (int)CCtxParams->srcSizeHint;
|
817
905
|
break;
|
906
|
+
case ZSTD_c_stableInBuffer :
|
907
|
+
*value = (int)CCtxParams->inBufferMode;
|
908
|
+
break;
|
909
|
+
case ZSTD_c_stableOutBuffer :
|
910
|
+
*value = (int)CCtxParams->outBufferMode;
|
911
|
+
break;
|
912
|
+
case ZSTD_c_blockDelimiters :
|
913
|
+
*value = (int)CCtxParams->blockDelimiters;
|
914
|
+
break;
|
915
|
+
case ZSTD_c_validateSequences :
|
916
|
+
*value = (int)CCtxParams->validateSequences;
|
917
|
+
break;
|
818
918
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
819
919
|
}
|
820
920
|
return 0;
|
@@ -850,6 +950,14 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
|
|
850
950
|
return 0;
|
851
951
|
}
|
852
952
|
|
953
|
+
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
|
954
|
+
int const compressionLevel,
|
955
|
+
size_t const dictSize);
|
956
|
+
static int ZSTD_dedicatedDictSearch_isSupported(
|
957
|
+
const ZSTD_compressionParameters* cParams);
|
958
|
+
static void ZSTD_dedicatedDictSearch_revertCParams(
|
959
|
+
ZSTD_compressionParameters* cParams);
|
960
|
+
|
853
961
|
/**
|
854
962
|
* Initializes the local dict using the requested parameters.
|
855
963
|
* NOTE: This does not use the pledged src size, because it may be used for more
|
@@ -858,8 +966,6 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
|
|
858
966
|
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
859
967
|
{
|
860
968
|
ZSTD_localDict* const dl = &cctx->localDict;
|
861
|
-
ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
|
862
|
-
&cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
|
863
969
|
if (dl->dict == NULL) {
|
864
970
|
/* No local dictionary. */
|
865
971
|
assert(dl->dictBuffer == NULL);
|
@@ -876,12 +982,12 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
876
982
|
assert(cctx->cdict == NULL);
|
877
983
|
assert(cctx->prefixDict.dict == NULL);
|
878
984
|
|
879
|
-
dl->cdict =
|
985
|
+
dl->cdict = ZSTD_createCDict_advanced2(
|
880
986
|
dl->dict,
|
881
987
|
dl->dictSize,
|
882
988
|
ZSTD_dlm_byRef,
|
883
989
|
dl->dictContentType,
|
884
|
-
|
990
|
+
&cctx->requestedParams,
|
885
991
|
cctx->customMem);
|
886
992
|
RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
|
887
993
|
cctx->cdict = dl->cdict;
|
@@ -894,8 +1000,6 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
|
|
894
1000
|
{
|
895
1001
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
896
1002
|
"Can't load a dictionary when ctx is not in init stage.");
|
897
|
-
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
898
|
-
"no malloc for static CCtx");
|
899
1003
|
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
|
900
1004
|
ZSTD_clearAllDicts(cctx); /* in case one already exists */
|
901
1005
|
if (dict == NULL || dictSize == 0) /* no dictionary mode */
|
@@ -903,9 +1007,12 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
|
|
903
1007
|
if (dictLoadMethod == ZSTD_dlm_byRef) {
|
904
1008
|
cctx->localDict.dict = dict;
|
905
1009
|
} else {
|
906
|
-
void* dictBuffer
|
1010
|
+
void* dictBuffer;
|
1011
|
+
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
1012
|
+
"no malloc for static CCtx");
|
1013
|
+
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
|
907
1014
|
RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
|
908
|
-
|
1015
|
+
ZSTD_memcpy(dictBuffer, dict, dictSize);
|
909
1016
|
cctx->localDict.dictBuffer = dictBuffer;
|
910
1017
|
cctx->localDict.dict = dictBuffer;
|
911
1018
|
}
|
@@ -938,6 +1045,14 @@ size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
|
|
938
1045
|
return 0;
|
939
1046
|
}
|
940
1047
|
|
1048
|
+
size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
|
1049
|
+
{
|
1050
|
+
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
1051
|
+
"Can't ref a pool when ctx not in init stage.");
|
1052
|
+
cctx->pool = pool;
|
1053
|
+
return 0;
|
1054
|
+
}
|
1055
|
+
|
941
1056
|
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
|
942
1057
|
{
|
943
1058
|
return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
|
@@ -1022,24 +1137,73 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
|
|
1022
1137
|
return hashLog - btScale;
|
1023
1138
|
}
|
1024
1139
|
|
1140
|
+
/** ZSTD_dictAndWindowLog() :
|
1141
|
+
* Returns an adjusted window log that is large enough to fit the source and the dictionary.
|
1142
|
+
* The zstd format says that the entire dictionary is valid if one byte of the dictionary
|
1143
|
+
* is within the window. So the hashLog and chainLog should be large enough to reference both
|
1144
|
+
* the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
|
1145
|
+
* the hashLog and windowLog.
|
1146
|
+
* NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
|
1147
|
+
*/
|
1148
|
+
static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
|
1149
|
+
{
|
1150
|
+
const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
|
1151
|
+
/* No dictionary ==> No change */
|
1152
|
+
if (dictSize == 0) {
|
1153
|
+
return windowLog;
|
1154
|
+
}
|
1155
|
+
assert(windowLog <= ZSTD_WINDOWLOG_MAX);
|
1156
|
+
assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
|
1157
|
+
{
|
1158
|
+
U64 const windowSize = 1ULL << windowLog;
|
1159
|
+
U64 const dictAndWindowSize = dictSize + windowSize;
|
1160
|
+
/* If the window size is already large enough to fit both the source and the dictionary
|
1161
|
+
* then just use the window size. Otherwise adjust so that it fits the dictionary and
|
1162
|
+
* the window.
|
1163
|
+
*/
|
1164
|
+
if (windowSize >= dictSize + srcSize) {
|
1165
|
+
return windowLog; /* Window size large enough already */
|
1166
|
+
} else if (dictAndWindowSize >= maxWindowSize) {
|
1167
|
+
return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
|
1168
|
+
} else {
|
1169
|
+
return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
|
1170
|
+
}
|
1171
|
+
}
|
1172
|
+
}
|
1173
|
+
|
1025
1174
|
/** ZSTD_adjustCParams_internal() :
|
1026
1175
|
* optimize `cPar` for a specified input (`srcSize` and `dictSize`).
|
1027
1176
|
* mostly downsize to reduce memory consumption and initialization latency.
|
1028
1177
|
* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
|
1178
|
+
* `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
|
1029
1179
|
* note : `srcSize==0` means 0!
|
1030
1180
|
* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
|
1031
1181
|
static ZSTD_compressionParameters
|
1032
1182
|
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
1033
1183
|
unsigned long long srcSize,
|
1034
|
-
size_t dictSize
|
1184
|
+
size_t dictSize,
|
1185
|
+
ZSTD_cParamMode_e mode)
|
1035
1186
|
{
|
1036
|
-
|
1037
|
-
|
1187
|
+
const U64 minSrcSize = 513; /* (1<<9) + 1 */
|
1188
|
+
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
1038
1189
|
assert(ZSTD_checkCParams(cPar)==0);
|
1039
1190
|
|
1040
1191
|
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
1041
1192
|
srcSize = minSrcSize;
|
1042
1193
|
|
1194
|
+
switch (mode) {
|
1195
|
+
case ZSTD_cpm_noAttachDict:
|
1196
|
+
case ZSTD_cpm_unknown:
|
1197
|
+
case ZSTD_cpm_createCDict:
|
1198
|
+
break;
|
1199
|
+
case ZSTD_cpm_attachDict:
|
1200
|
+
dictSize = 0;
|
1201
|
+
break;
|
1202
|
+
default:
|
1203
|
+
assert(0);
|
1204
|
+
break;
|
1205
|
+
}
|
1206
|
+
|
1043
1207
|
/* resize windowLog if input is small enough, to use less memory */
|
1044
1208
|
if ( (srcSize < maxWindowResize)
|
1045
1209
|
&& (dictSize < maxWindowResize) ) {
|
@@ -1049,10 +1213,11 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1049
1213
|
ZSTD_highbit32(tSize-1) + 1;
|
1050
1214
|
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
|
1051
1215
|
}
|
1052
|
-
|
1053
|
-
|
1054
|
-
if (
|
1055
|
-
|
1216
|
+
{ U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
|
1217
|
+
U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
|
1218
|
+
if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
|
1219
|
+
if (cycleLog > dictAndWindowLog)
|
1220
|
+
cPar.chainLog -= (cycleLog - dictAndWindowLog);
|
1056
1221
|
}
|
1057
1222
|
|
1058
1223
|
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
@@ -1068,31 +1233,38 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
|
1068
1233
|
{
|
1069
1234
|
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
1070
1235
|
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
1071
|
-
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
|
1236
|
+
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
|
1072
1237
|
}
|
1073
1238
|
|
1074
|
-
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
|
1075
|
-
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
|
1239
|
+
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
1240
|
+
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
1241
|
+
|
1242
|
+
static void ZSTD_overrideCParams(
|
1243
|
+
ZSTD_compressionParameters* cParams,
|
1244
|
+
const ZSTD_compressionParameters* overrides)
|
1245
|
+
{
|
1246
|
+
if (overrides->windowLog) cParams->windowLog = overrides->windowLog;
|
1247
|
+
if (overrides->hashLog) cParams->hashLog = overrides->hashLog;
|
1248
|
+
if (overrides->chainLog) cParams->chainLog = overrides->chainLog;
|
1249
|
+
if (overrides->searchLog) cParams->searchLog = overrides->searchLog;
|
1250
|
+
if (overrides->minMatch) cParams->minMatch = overrides->minMatch;
|
1251
|
+
if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
|
1252
|
+
if (overrides->strategy) cParams->strategy = overrides->strategy;
|
1253
|
+
}
|
1076
1254
|
|
1077
1255
|
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
1078
|
-
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
|
1256
|
+
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
|
1079
1257
|
{
|
1080
1258
|
ZSTD_compressionParameters cParams;
|
1081
1259
|
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
|
1082
1260
|
srcSizeHint = CCtxParams->srcSizeHint;
|
1083
1261
|
}
|
1084
|
-
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
1262
|
+
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
|
1085
1263
|
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
1086
|
-
|
1087
|
-
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
|
1088
|
-
if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
|
1089
|
-
if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
|
1090
|
-
if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
|
1091
|
-
if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
|
1092
|
-
if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
|
1264
|
+
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
1093
1265
|
assert(!ZSTD_checkCParams(cParams));
|
1094
1266
|
/* srcSizeHint == 0 means 0 */
|
1095
|
-
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
|
1267
|
+
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
|
1096
1268
|
}
|
1097
1269
|
|
1098
1270
|
static size_t
|
@@ -1123,45 +1295,61 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
1123
1295
|
return tableSpace + optSpace;
|
1124
1296
|
}
|
1125
1297
|
|
1126
|
-
size_t
|
1298
|
+
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1299
|
+
const ZSTD_compressionParameters* cParams,
|
1300
|
+
const ldmParams_t* ldmParams,
|
1301
|
+
const int isStatic,
|
1302
|
+
const size_t buffInSize,
|
1303
|
+
const size_t buffOutSize,
|
1304
|
+
const U64 pledgedSrcSize)
|
1127
1305
|
{
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
1139
|
-
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
|
1306
|
+
size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
|
1307
|
+
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
|
1308
|
+
U32 const divider = (cParams->minMatch==3) ? 3 : 4;
|
1309
|
+
size_t const maxNbSeq = blockSize / divider;
|
1310
|
+
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
1311
|
+
+ ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
|
1312
|
+
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
1313
|
+
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
|
1314
|
+
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
1315
|
+
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
|
1140
1316
|
|
1141
|
-
|
1142
|
-
|
1317
|
+
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
|
1318
|
+
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
|
1319
|
+
size_t const ldmSeqSpace = ldmParams->enableLdm ?
|
1320
|
+
ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
|
1143
1321
|
|
1144
|
-
/* estimateCCtxSize is for one-shot compression. So no buffers should
|
1145
|
-
* be needed. However, we still allocate two 0-sized buffers, which can
|
1146
|
-
* take space under ASAN. */
|
1147
|
-
size_t const bufferSpace = ZSTD_cwksp_alloc_size(0)
|
1148
|
-
+ ZSTD_cwksp_alloc_size(0);
|
1149
1322
|
|
1150
|
-
|
1323
|
+
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
|
1324
|
+
+ ZSTD_cwksp_alloc_size(buffOutSize);
|
1151
1325
|
|
1152
|
-
|
1153
|
-
cctxSpace +
|
1154
|
-
entropySpace +
|
1155
|
-
blockStateSpace +
|
1156
|
-
ldmSpace +
|
1157
|
-
ldmSeqSpace +
|
1158
|
-
matchStateSize +
|
1159
|
-
tokenSpace +
|
1160
|
-
bufferSpace;
|
1326
|
+
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
|
1161
1327
|
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1328
|
+
size_t const neededSpace =
|
1329
|
+
cctxSpace +
|
1330
|
+
entropySpace +
|
1331
|
+
blockStateSpace +
|
1332
|
+
ldmSpace +
|
1333
|
+
ldmSeqSpace +
|
1334
|
+
matchStateSize +
|
1335
|
+
tokenSpace +
|
1336
|
+
bufferSpace;
|
1337
|
+
|
1338
|
+
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
|
1339
|
+
return neededSpace;
|
1340
|
+
}
|
1341
|
+
|
1342
|
+
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
1343
|
+
{
|
1344
|
+
ZSTD_compressionParameters const cParams =
|
1345
|
+
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1346
|
+
|
1347
|
+
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
1348
|
+
/* estimateCCtxSize is for one-shot compression. So no buffers should
|
1349
|
+
* be needed. However, we still allocate two 0-sized buffers, which can
|
1350
|
+
* take space under ASAN. */
|
1351
|
+
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1352
|
+
&cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
|
1165
1353
|
}
|
1166
1354
|
|
1167
1355
|
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
@@ -1172,7 +1360,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
1172
1360
|
|
1173
1361
|
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
|
1174
1362
|
{
|
1175
|
-
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
1363
|
+
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1176
1364
|
return ZSTD_estimateCCtxSize_usingCParams(cParams);
|
1177
1365
|
}
|
1178
1366
|
|
@@ -1191,15 +1379,18 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1191
1379
|
{
|
1192
1380
|
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
1193
1381
|
{ ZSTD_compressionParameters const cParams =
|
1194
|
-
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
1195
|
-
size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
|
1382
|
+
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1196
1383
|
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
|
1197
|
-
size_t const inBuffSize = (
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1384
|
+
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
|
1385
|
+
? ((size_t)1 << cParams.windowLog) + blockSize
|
1386
|
+
: 0;
|
1387
|
+
size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
|
1388
|
+
? ZSTD_compressBound(blockSize) + 1
|
1389
|
+
: 0;
|
1390
|
+
|
1391
|
+
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1392
|
+
&cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize,
|
1393
|
+
ZSTD_CONTENTSIZE_UNKNOWN);
|
1203
1394
|
}
|
1204
1395
|
}
|
1205
1396
|
|
@@ -1211,7 +1402,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
1211
1402
|
|
1212
1403
|
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
|
1213
1404
|
{
|
1214
|
-
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
1405
|
+
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1215
1406
|
return ZSTD_estimateCStreamSize_usingCParams(cParams);
|
1216
1407
|
}
|
1217
1408
|
|
@@ -1304,16 +1495,6 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
|
|
1304
1495
|
ms->dictMatchState = NULL;
|
1305
1496
|
}
|
1306
1497
|
|
1307
|
-
/**
|
1308
|
-
* Indicates whether this compression proceeds directly from user-provided
|
1309
|
-
* source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
|
1310
|
-
* whether the context needs to buffer the input/output (ZSTDb_buffered).
|
1311
|
-
*/
|
1312
|
-
typedef enum {
|
1313
|
-
ZSTDb_not_buffered,
|
1314
|
-
ZSTDb_buffered
|
1315
|
-
} ZSTD_buffered_policy_e;
|
1316
|
-
|
1317
1498
|
/**
|
1318
1499
|
* Controls, for this matchState reset, whether the tables need to be cleared /
|
1319
1500
|
* prepared for the coming compression (ZSTDcrp_makeClean), or whether the
|
@@ -1441,45 +1622,32 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1441
1622
|
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
|
1442
1623
|
U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
|
1443
1624
|
size_t const maxNbSeq = blockSize / divider;
|
1444
|
-
size_t const
|
1445
|
-
|
1446
|
-
|
1447
|
-
size_t const
|
1448
|
-
|
1449
|
-
|
1625
|
+
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
|
1626
|
+
? ZSTD_compressBound(blockSize) + 1
|
1627
|
+
: 0;
|
1628
|
+
size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
|
1629
|
+
? windowSize + blockSize
|
1630
|
+
: 0;
|
1450
1631
|
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
|
1451
1632
|
|
1452
|
-
|
1633
|
+
int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
|
1634
|
+
ZSTD_indexResetPolicy_e needsIndexReset =
|
1635
|
+
(!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
|
1453
1636
|
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1637
|
+
size_t const neededSpace =
|
1638
|
+
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1639
|
+
¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0,
|
1640
|
+
buffInSize, buffOutSize, pledgedSrcSize);
|
1641
|
+
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
|
1457
1642
|
|
1458
1643
|
if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
|
1459
1644
|
|
1460
1645
|
/* Check if workspace is large enough, alloc a new one if needed */
|
1461
|
-
{
|
1462
|
-
size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
|
1463
|
-
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
1464
|
-
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
|
1465
|
-
size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
|
1466
|
-
size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
|
1467
|
-
|
1468
|
-
size_t const neededSpace =
|
1469
|
-
cctxSpace +
|
1470
|
-
entropySpace +
|
1471
|
-
blockStateSpace +
|
1472
|
-
ldmSpace +
|
1473
|
-
ldmSeqSpace +
|
1474
|
-
matchStateSize +
|
1475
|
-
tokenSpace +
|
1476
|
-
bufferSpace;
|
1477
|
-
|
1646
|
+
{
|
1478
1647
|
int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
|
1479
1648
|
int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
|
1480
1649
|
|
1481
|
-
DEBUGLOG(4, "Need %
|
1482
|
-
neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
|
1650
|
+
DEBUGLOG(4, "Need %zu B workspace", neededSpace);
|
1483
1651
|
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
|
1484
1652
|
|
1485
1653
|
if (workspaceTooSmall || workspaceWasteful) {
|
@@ -1503,7 +1671,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1503
1671
|
RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
|
1504
1672
|
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
|
1505
1673
|
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
|
1506
|
-
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws,
|
1674
|
+
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
|
1507
1675
|
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
|
1508
1676
|
} }
|
1509
1677
|
|
@@ -1534,6 +1702,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1534
1702
|
zc->seqStore.maxNbLit = blockSize;
|
1535
1703
|
|
1536
1704
|
/* buffers */
|
1705
|
+
zc->bufferedPolicy = zbuff;
|
1537
1706
|
zc->inBuffSize = buffInSize;
|
1538
1707
|
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
|
1539
1708
|
zc->outBuffSize = buffOutSize;
|
@@ -1546,7 +1715,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1546
1715
|
((size_t)1) << (params.ldmParams.hashLog -
|
1547
1716
|
params.ldmParams.bucketSizeLog);
|
1548
1717
|
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
|
1549
|
-
|
1718
|
+
ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
|
1550
1719
|
}
|
1551
1720
|
|
1552
1721
|
/* sequences storage */
|
@@ -1570,7 +1739,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1570
1739
|
/* TODO: avoid memset? */
|
1571
1740
|
size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
|
1572
1741
|
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
1573
|
-
|
1742
|
+
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
|
1574
1743
|
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
1575
1744
|
zc->maxNbLdmSequences = maxNbLdmSeq;
|
1576
1745
|
|
@@ -1579,6 +1748,12 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1579
1748
|
zc->ldmState.loadedDictEnd = 0;
|
1580
1749
|
}
|
1581
1750
|
|
1751
|
+
/* Due to alignment, when reusing a workspace, we can actually consume
|
1752
|
+
* up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
|
1753
|
+
*/
|
1754
|
+
assert(ZSTD_cwksp_used(ws) >= neededSpace &&
|
1755
|
+
ZSTD_cwksp_used(ws) <= neededSpace + 3);
|
1756
|
+
|
1582
1757
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
1583
1758
|
zc->initialized = 1;
|
1584
1759
|
|
@@ -1618,12 +1793,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
|
|
1618
1793
|
U64 pledgedSrcSize)
|
1619
1794
|
{
|
1620
1795
|
size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1796
|
+
int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
|
1797
|
+
return dedicatedDictSearch
|
1798
|
+
|| ( ( pledgedSrcSize <= cutoff
|
1799
|
+
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
|
1800
|
+
|| params->attachDictPref == ZSTD_dictForceAttach )
|
1801
|
+
&& params->attachDictPref != ZSTD_dictForceCopy
|
1802
|
+
&& !params->forceWindow ); /* dictMatchState isn't correctly
|
1803
|
+
* handled in _enforceMaxDist */
|
1627
1804
|
}
|
1628
1805
|
|
1629
1806
|
static size_t
|
@@ -1633,17 +1810,24 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
1633
1810
|
U64 pledgedSrcSize,
|
1634
1811
|
ZSTD_buffered_policy_e zbuff)
|
1635
1812
|
{
|
1636
|
-
{
|
1813
|
+
{
|
1814
|
+
ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
|
1637
1815
|
unsigned const windowLog = params.cParams.windowLog;
|
1638
1816
|
assert(windowLog != 0);
|
1639
1817
|
/* Resize working context table params for input only, since the dict
|
1640
1818
|
* has its own tables. */
|
1641
|
-
/*
|
1642
|
-
|
1819
|
+
/* pledgedSrcSize == 0 means 0! */
|
1820
|
+
|
1821
|
+
if (cdict->matchState.dedicatedDictSearch) {
|
1822
|
+
ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
|
1823
|
+
}
|
1824
|
+
|
1825
|
+
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
|
1826
|
+
cdict->dictContentSize, ZSTD_cpm_attachDict);
|
1643
1827
|
params.cParams.windowLog = windowLog;
|
1644
1828
|
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
1645
1829
|
ZSTDcrp_makeClean, zbuff), "");
|
1646
|
-
assert(cctx->appliedParams.cParams.strategy ==
|
1830
|
+
assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
|
1647
1831
|
}
|
1648
1832
|
|
1649
1833
|
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
@@ -1670,7 +1854,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
1670
1854
|
cctx->dictID = cdict->dictID;
|
1671
1855
|
|
1672
1856
|
/* copy block state */
|
1673
|
-
|
1857
|
+
ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
|
1674
1858
|
|
1675
1859
|
return 0;
|
1676
1860
|
}
|
@@ -1683,6 +1867,8 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
1683
1867
|
{
|
1684
1868
|
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
|
1685
1869
|
|
1870
|
+
assert(!cdict->matchState.dedicatedDictSearch);
|
1871
|
+
|
1686
1872
|
DEBUGLOG(4, "copying dictionary into context");
|
1687
1873
|
|
1688
1874
|
{ unsigned const windowLog = params.cParams.windowLog;
|
@@ -1703,10 +1889,10 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
1703
1889
|
{ size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
|
1704
1890
|
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
|
1705
1891
|
|
1706
|
-
|
1892
|
+
ZSTD_memcpy(cctx->blockState.matchState.hashTable,
|
1707
1893
|
cdict->matchState.hashTable,
|
1708
1894
|
hSize * sizeof(U32));
|
1709
|
-
|
1895
|
+
ZSTD_memcpy(cctx->blockState.matchState.chainTable,
|
1710
1896
|
cdict->matchState.chainTable,
|
1711
1897
|
chainSize * sizeof(U32));
|
1712
1898
|
}
|
@@ -1715,7 +1901,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
1715
1901
|
{ int const h3log = cctx->blockState.matchState.hashLog3;
|
1716
1902
|
size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
|
1717
1903
|
assert(cdict->matchState.hashLog3 == 0);
|
1718
|
-
|
1904
|
+
ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
|
1719
1905
|
}
|
1720
1906
|
|
1721
1907
|
ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
|
@@ -1731,7 +1917,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
1731
1917
|
cctx->dictID = cdict->dictID;
|
1732
1918
|
|
1733
1919
|
/* copy block state */
|
1734
|
-
|
1920
|
+
ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
|
1735
1921
|
|
1736
1922
|
return 0;
|
1737
1923
|
}
|
@@ -1775,7 +1961,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
1775
1961
|
RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
|
1776
1962
|
"Can't copy a ctx that's not in init stage.");
|
1777
1963
|
|
1778
|
-
|
1964
|
+
ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
|
1779
1965
|
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
|
1780
1966
|
/* Copy only compression parameters related to tables. */
|
1781
1967
|
params.cParams = srcCCtx->appliedParams.cParams;
|
@@ -1797,13 +1983,13 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
1797
1983
|
int const h3log = srcCCtx->blockState.matchState.hashLog3;
|
1798
1984
|
size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
|
1799
1985
|
|
1800
|
-
|
1986
|
+
ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
|
1801
1987
|
srcCCtx->blockState.matchState.hashTable,
|
1802
1988
|
hSize * sizeof(U32));
|
1803
|
-
|
1989
|
+
ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
|
1804
1990
|
srcCCtx->blockState.matchState.chainTable,
|
1805
1991
|
chainSize * sizeof(U32));
|
1806
|
-
|
1992
|
+
ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
|
1807
1993
|
srcCCtx->blockState.matchState.hashTable3,
|
1808
1994
|
h3Size * sizeof(U32));
|
1809
1995
|
}
|
@@ -1821,7 +2007,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
1821
2007
|
dstCCtx->dictID = srcCCtx->dictID;
|
1822
2008
|
|
1823
2009
|
/* copy block state */
|
1824
|
-
|
2010
|
+
ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
|
1825
2011
|
|
1826
2012
|
return 0;
|
1827
2013
|
}
|
@@ -1834,7 +2020,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
1834
2020
|
size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
|
1835
2021
|
{
|
1836
2022
|
ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
|
1837
|
-
ZSTD_buffered_policy_e const zbuff =
|
2023
|
+
ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
|
1838
2024
|
ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
|
1839
2025
|
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
1840
2026
|
fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
|
@@ -1861,7 +2047,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
|
|
1861
2047
|
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
1862
2048
|
assert(size < (1U<<31)); /* can be casted to int */
|
1863
2049
|
|
1864
|
-
#if
|
2050
|
+
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
|
1865
2051
|
/* To validate that the table re-use logic is sound, and that we don't
|
1866
2052
|
* access table space that we haven't cleaned, we re-"poison" the table
|
1867
2053
|
* space every time we mark it dirty.
|
@@ -1958,10 +2144,10 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
|
|
1958
2144
|
return (cctxParams->targetCBlockSize != 0);
|
1959
2145
|
}
|
1960
2146
|
|
1961
|
-
/*
|
2147
|
+
/* ZSTD_entropyCompressSequences_internal():
|
1962
2148
|
* actually compresses both literals and sequences */
|
1963
2149
|
MEM_STATIC size_t
|
1964
|
-
|
2150
|
+
ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
|
1965
2151
|
const ZSTD_entropyCTables_t* prevEntropy,
|
1966
2152
|
ZSTD_entropyCTables_t* nextEntropy,
|
1967
2153
|
const ZSTD_CCtx_params* cctxParams,
|
@@ -1971,7 +2157,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|
1971
2157
|
{
|
1972
2158
|
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
1973
2159
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
1974
|
-
unsigned count
|
2160
|
+
unsigned* count = (unsigned*)entropyWorkspace;
|
1975
2161
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
1976
2162
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
1977
2163
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
@@ -1987,8 +2173,12 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|
1987
2173
|
BYTE* seqHead;
|
1988
2174
|
BYTE* lastNCount = NULL;
|
1989
2175
|
|
1990
|
-
|
2176
|
+
entropyWorkspace = count + (MaxSeq + 1);
|
2177
|
+
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
2178
|
+
|
2179
|
+
DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
|
1991
2180
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
2181
|
+
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
1992
2182
|
|
1993
2183
|
/* Compress literals */
|
1994
2184
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
@@ -2023,7 +2213,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|
2023
2213
|
assert(op <= oend);
|
2024
2214
|
if (nbSeq==0) {
|
2025
2215
|
/* Copy the old tables over as if we repeated them */
|
2026
|
-
|
2216
|
+
ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
2027
2217
|
return (size_t)(op - ostart);
|
2028
2218
|
}
|
2029
2219
|
|
@@ -2148,7 +2338,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|
2148
2338
|
}
|
2149
2339
|
|
2150
2340
|
MEM_STATIC size_t
|
2151
|
-
|
2341
|
+
ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
|
2152
2342
|
const ZSTD_entropyCTables_t* prevEntropy,
|
2153
2343
|
ZSTD_entropyCTables_t* nextEntropy,
|
2154
2344
|
const ZSTD_CCtx_params* cctxParams,
|
@@ -2157,7 +2347,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|
2157
2347
|
void* entropyWorkspace, size_t entropyWkspSize,
|
2158
2348
|
int bmi2)
|
2159
2349
|
{
|
2160
|
-
size_t const cSize =
|
2350
|
+
size_t const cSize = ZSTD_entropyCompressSequences_internal(
|
2161
2351
|
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
|
2162
2352
|
dst, dstCapacity,
|
2163
2353
|
entropyWorkspace, entropyWkspSize, bmi2);
|
@@ -2167,13 +2357,13 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|
2167
2357
|
*/
|
2168
2358
|
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
|
2169
2359
|
return 0; /* block not compressed */
|
2170
|
-
FORWARD_IF_ERROR(cSize, "
|
2360
|
+
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
|
2171
2361
|
|
2172
2362
|
/* Check compressibility */
|
2173
2363
|
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
|
2174
2364
|
if (cSize >= maxCSize) return 0; /* block not compressed */
|
2175
2365
|
}
|
2176
|
-
|
2366
|
+
DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
|
2177
2367
|
return cSize;
|
2178
2368
|
}
|
2179
2369
|
|
@@ -2182,7 +2372,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|
2182
2372
|
* assumption : strat is a valid strategy */
|
2183
2373
|
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
|
2184
2374
|
{
|
2185
|
-
static const ZSTD_blockCompressor blockCompressor[
|
2375
|
+
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
|
2186
2376
|
{ ZSTD_compressBlock_fast /* default for 0 */,
|
2187
2377
|
ZSTD_compressBlock_fast,
|
2188
2378
|
ZSTD_compressBlock_doubleFast,
|
@@ -2212,7 +2402,17 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
|
|
2212
2402
|
ZSTD_compressBlock_btlazy2_dictMatchState,
|
2213
2403
|
ZSTD_compressBlock_btopt_dictMatchState,
|
2214
2404
|
ZSTD_compressBlock_btultra_dictMatchState,
|
2215
|
-
ZSTD_compressBlock_btultra_dictMatchState }
|
2405
|
+
ZSTD_compressBlock_btultra_dictMatchState },
|
2406
|
+
{ NULL /* default for 0 */,
|
2407
|
+
NULL,
|
2408
|
+
NULL,
|
2409
|
+
ZSTD_compressBlock_greedy_dedicatedDictSearch,
|
2410
|
+
ZSTD_compressBlock_lazy_dedicatedDictSearch,
|
2411
|
+
ZSTD_compressBlock_lazy2_dedicatedDictSearch,
|
2412
|
+
NULL,
|
2413
|
+
NULL,
|
2414
|
+
NULL,
|
2415
|
+
NULL }
|
2216
2416
|
};
|
2217
2417
|
ZSTD_blockCompressor selectedCompressor;
|
2218
2418
|
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
|
@@ -2226,7 +2426,7 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
|
|
2226
2426
|
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
|
2227
2427
|
const BYTE* anchor, size_t lastLLSize)
|
2228
2428
|
{
|
2229
|
-
|
2429
|
+
ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
|
2230
2430
|
seqStorePtr->lit += lastLLSize;
|
2231
2431
|
}
|
2232
2432
|
|
@@ -2247,7 +2447,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2247
2447
|
/* Assert that we have correctly flushed the ctx params into the ms's copy */
|
2248
2448
|
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
|
2249
2449
|
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
2250
|
-
|
2450
|
+
if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
|
2451
|
+
ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
|
2452
|
+
} else {
|
2453
|
+
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
|
2454
|
+
}
|
2251
2455
|
return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
|
2252
2456
|
}
|
2253
2457
|
ZSTD_resetSeqStore(&(zc->seqStore));
|
@@ -2263,10 +2467,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2263
2467
|
/* limited update after a very long match */
|
2264
2468
|
{ const BYTE* const base = ms->window.base;
|
2265
2469
|
const BYTE* const istart = (const BYTE*)src;
|
2266
|
-
const U32
|
2470
|
+
const U32 curr = (U32)(istart-base);
|
2267
2471
|
if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
|
2268
|
-
if (
|
2269
|
-
ms->nextToUpdate =
|
2472
|
+
if (curr > ms->nextToUpdate + 384)
|
2473
|
+
ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
|
2270
2474
|
}
|
2271
2475
|
|
2272
2476
|
/* select and store sequences */
|
@@ -2286,7 +2490,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2286
2490
|
src, srcSize);
|
2287
2491
|
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
|
2288
2492
|
} else if (zc->appliedParams.ldmParams.enableLdm) {
|
2289
|
-
rawSeqStore_t ldmSeqStore =
|
2493
|
+
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
|
2290
2494
|
|
2291
2495
|
ldmSeqStore.seq = zc->ldmSequences;
|
2292
2496
|
ldmSeqStore.capacity = zc->maxNbLdmSequences;
|
@@ -2303,6 +2507,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2303
2507
|
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
2304
2508
|
} else { /* not long range mode */
|
2305
2509
|
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
|
2510
|
+
ms->ldmSeqStore = NULL;
|
2306
2511
|
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
|
2307
2512
|
}
|
2308
2513
|
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
|
@@ -2314,17 +2519,25 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2314
2519
|
static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
2315
2520
|
{
|
2316
2521
|
const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
|
2317
|
-
const seqDef*
|
2318
|
-
size_t
|
2522
|
+
const seqDef* seqStoreSeqs = seqStore->sequencesStart;
|
2523
|
+
size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
|
2524
|
+
size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
|
2525
|
+
size_t literalsRead = 0;
|
2526
|
+
size_t lastLLSize;
|
2319
2527
|
|
2320
2528
|
ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
|
2321
|
-
size_t i;
|
2529
|
+
size_t i;
|
2530
|
+
repcodes_t updatedRepcodes;
|
2322
2531
|
|
2323
2532
|
assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
|
2324
|
-
|
2325
|
-
|
2326
|
-
|
2327
|
-
|
2533
|
+
/* Ensure we have enough space for last literals "sequence" */
|
2534
|
+
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
|
2535
|
+
ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
2536
|
+
for (i = 0; i < seqStoreSeqSize; ++i) {
|
2537
|
+
U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
|
2538
|
+
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
2539
|
+
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
|
2540
|
+
outSeqs[i].rep = 0;
|
2328
2541
|
|
2329
2542
|
if (i == seqStore->longLengthPos) {
|
2330
2543
|
if (seqStore->longLengthID == 1) {
|
@@ -2334,39 +2547,44 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
2334
2547
|
}
|
2335
2548
|
}
|
2336
2549
|
|
2337
|
-
if (
|
2338
|
-
|
2339
|
-
|
2340
|
-
|
2341
|
-
|
2342
|
-
|
2343
|
-
|
2550
|
+
if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
|
2551
|
+
/* Derive the correct offset corresponding to a repcode */
|
2552
|
+
outSeqs[i].rep = seqStoreSeqs[i].offset;
|
2553
|
+
if (outSeqs[i].litLength != 0) {
|
2554
|
+
rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
|
2555
|
+
} else {
|
2556
|
+
if (outSeqs[i].rep == 3) {
|
2557
|
+
rawOffset = updatedRepcodes.rep[0] - 1;
|
2344
2558
|
} else {
|
2345
|
-
|
2559
|
+
rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
|
2346
2560
|
}
|
2347
|
-
++outSeqs[i].rep;
|
2348
|
-
}
|
2349
|
-
assert(repIdx >= -3);
|
2350
|
-
outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
|
2351
|
-
if (outSeqs[i].rep == 4) {
|
2352
|
-
--outSeqs[i].offset;
|
2353
2561
|
}
|
2354
|
-
} else {
|
2355
|
-
outSeqs[i].offset -= ZSTD_REP_NUM;
|
2356
2562
|
}
|
2357
|
-
|
2358
|
-
|
2359
|
-
|
2360
|
-
|
2563
|
+
outSeqs[i].offset = rawOffset;
|
2564
|
+
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
|
2565
|
+
so we provide seqStoreSeqs[i].offset - 1 */
|
2566
|
+
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
|
2567
|
+
seqStoreSeqs[i].offset - 1,
|
2568
|
+
seqStoreSeqs[i].litLength == 0);
|
2569
|
+
literalsRead += outSeqs[i].litLength;
|
2361
2570
|
}
|
2362
|
-
|
2571
|
+
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
|
2572
|
+
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
|
2573
|
+
* for the block boundary, according to the API.
|
2574
|
+
*/
|
2575
|
+
assert(seqStoreLiteralsSize >= literalsRead);
|
2576
|
+
lastLLSize = seqStoreLiteralsSize - literalsRead;
|
2577
|
+
outSeqs[i].litLength = (U32)lastLLSize;
|
2578
|
+
outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
|
2579
|
+
seqStoreSeqSize++;
|
2580
|
+
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
2363
2581
|
}
|
2364
2582
|
|
2365
|
-
size_t
|
2366
|
-
|
2583
|
+
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
2584
|
+
size_t outSeqsSize, const void* src, size_t srcSize)
|
2367
2585
|
{
|
2368
2586
|
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
2369
|
-
void* dst =
|
2587
|
+
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
|
2370
2588
|
SeqCollector seqCollector;
|
2371
2589
|
|
2372
2590
|
RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
|
@@ -2378,16 +2596,47 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|
2378
2596
|
zc->seqCollector = seqCollector;
|
2379
2597
|
|
2380
2598
|
ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
|
2381
|
-
|
2599
|
+
ZSTD_customFree(dst, ZSTD_defaultCMem);
|
2382
2600
|
return zc->seqCollector.seqIndex;
|
2383
2601
|
}
|
2384
2602
|
|
2385
|
-
|
2386
|
-
|
2603
|
+
size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
|
2604
|
+
size_t in = 0;
|
2605
|
+
size_t out = 0;
|
2606
|
+
for (; in < seqsSize; ++in) {
|
2607
|
+
if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
|
2608
|
+
if (in != seqsSize - 1) {
|
2609
|
+
sequences[in+1].litLength += sequences[in].litLength;
|
2610
|
+
}
|
2611
|
+
} else {
|
2612
|
+
sequences[out] = sequences[in];
|
2613
|
+
++out;
|
2614
|
+
}
|
2615
|
+
}
|
2616
|
+
return out;
|
2617
|
+
}
|
2618
|
+
|
2619
|
+
/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
|
2620
|
+
static int ZSTD_isRLE(const BYTE* src, size_t length) {
|
2621
|
+
const BYTE* ip = src;
|
2622
|
+
const BYTE value = ip[0];
|
2623
|
+
const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
|
2624
|
+
const size_t unrollSize = sizeof(size_t) * 4;
|
2625
|
+
const size_t unrollMask = unrollSize - 1;
|
2626
|
+
const size_t prefixLength = length & unrollMask;
|
2387
2627
|
size_t i;
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2628
|
+
size_t u;
|
2629
|
+
if (length == 1) return 1;
|
2630
|
+
/* Check if prefix is RLE first before using unrolled loop */
|
2631
|
+
if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
|
2632
|
+
return 0;
|
2633
|
+
}
|
2634
|
+
for (i = prefixLength; i != length; i += unrollSize) {
|
2635
|
+
for (u = 0; u < unrollSize; u += sizeof(size_t)) {
|
2636
|
+
if (MEM_readST(ip + i + u) != valueST) {
|
2637
|
+
return 0;
|
2638
|
+
}
|
2639
|
+
}
|
2391
2640
|
}
|
2392
2641
|
return 1;
|
2393
2642
|
}
|
@@ -2434,18 +2683,25 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|
2434
2683
|
|
2435
2684
|
if (zc->seqCollector.collectSequences) {
|
2436
2685
|
ZSTD_copyBlockSequences(zc);
|
2686
|
+
ZSTD_confirmRepcodesAndEntropyTables(zc);
|
2437
2687
|
return 0;
|
2438
2688
|
}
|
2439
2689
|
|
2440
2690
|
/* encode sequences and literals */
|
2441
|
-
cSize =
|
2691
|
+
cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
|
2442
2692
|
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
|
2443
2693
|
&zc->appliedParams,
|
2444
2694
|
dst, dstCapacity,
|
2445
2695
|
srcSize,
|
2446
|
-
zc->entropyWorkspace,
|
2696
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
2447
2697
|
zc->bmi2);
|
2448
2698
|
|
2699
|
+
if (zc->seqCollector.collectSequences) {
|
2700
|
+
ZSTD_copyBlockSequences(zc);
|
2701
|
+
return 0;
|
2702
|
+
}
|
2703
|
+
|
2704
|
+
|
2449
2705
|
if (frame &&
|
2450
2706
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
2451
2707
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
@@ -2593,7 +2849,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|
2593
2849
|
|
2594
2850
|
assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
|
2595
2851
|
|
2596
|
-
DEBUGLOG(
|
2852
|
+
DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
|
2597
2853
|
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
|
2598
2854
|
XXH64_update(&cctx->xxhState, src, srcSize);
|
2599
2855
|
|
@@ -2673,7 +2929,6 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
|
2673
2929
|
"dst buf is too small to fit worst-case frame header size.");
|
2674
2930
|
DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
|
2675
2931
|
!params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
|
2676
|
-
|
2677
2932
|
if (params->format == ZSTD_f_zstd1) {
|
2678
2933
|
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
|
2679
2934
|
pos = 4;
|
@@ -2725,6 +2980,7 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
|
|
2725
2980
|
cctx->externSeqStore.size = nbSeq;
|
2726
2981
|
cctx->externSeqStore.capacity = nbSeq;
|
2727
2982
|
cctx->externSeqStore.pos = 0;
|
2983
|
+
cctx->externSeqStore.posInSequence = 0;
|
2728
2984
|
return 0;
|
2729
2985
|
}
|
2730
2986
|
|
@@ -2862,8 +3118,12 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
2862
3118
|
case ZSTD_greedy:
|
2863
3119
|
case ZSTD_lazy:
|
2864
3120
|
case ZSTD_lazy2:
|
2865
|
-
if (chunk >= HASH_READ_SIZE)
|
3121
|
+
if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
|
3122
|
+
assert(chunk == remaining); /* must load everything in one go */
|
3123
|
+
ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
|
3124
|
+
} else if (chunk >= HASH_READ_SIZE) {
|
2866
3125
|
ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
|
3126
|
+
}
|
2867
3127
|
break;
|
2868
3128
|
|
2869
3129
|
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
|
@@ -2887,22 +3147,28 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
2887
3147
|
|
2888
3148
|
|
2889
3149
|
/* Dictionaries that assign zero probability to symbols that show up causes problems
|
2890
|
-
|
2891
|
-
|
2892
|
-
|
2893
|
-
static
|
3150
|
+
* when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
|
3151
|
+
* and only dictionaries with 100% valid symbols can be assumed valid.
|
3152
|
+
*/
|
3153
|
+
static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
|
3154
|
+
{
|
2894
3155
|
U32 s;
|
2895
|
-
|
3156
|
+
if (dictMaxSymbolValue < maxSymbolValue) {
|
3157
|
+
return FSE_repeat_check;
|
3158
|
+
}
|
2896
3159
|
for (s = 0; s <= maxSymbolValue; ++s) {
|
2897
|
-
|
3160
|
+
if (normalizedCounter[s] == 0) {
|
3161
|
+
return FSE_repeat_check;
|
3162
|
+
}
|
2898
3163
|
}
|
2899
|
-
return
|
3164
|
+
return FSE_repeat_valid;
|
2900
3165
|
}
|
2901
3166
|
|
2902
3167
|
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
2903
|
-
short* offcodeNCount, unsigned* offcodeMaxValue,
|
2904
3168
|
const void* const dict, size_t dictSize)
|
2905
3169
|
{
|
3170
|
+
short offcodeNCount[MaxOff+1];
|
3171
|
+
unsigned offcodeMaxValue = MaxOff;
|
2906
3172
|
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
|
2907
3173
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
2908
3174
|
dictPtr += 8;
|
@@ -2924,16 +3190,16 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
2924
3190
|
}
|
2925
3191
|
|
2926
3192
|
{ unsigned offcodeLog;
|
2927
|
-
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
3193
|
+
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
2928
3194
|
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
|
2929
3195
|
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
|
2930
|
-
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
2931
3196
|
/* fill all offset symbols to avoid garbage at end of table */
|
2932
3197
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
2933
3198
|
bs->entropy.fse.offcodeCTable,
|
2934
3199
|
offcodeNCount, MaxOff, offcodeLog,
|
2935
3200
|
workspace, HUF_WORKSPACE_SIZE)),
|
2936
3201
|
dictionary_corrupted, "");
|
3202
|
+
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
2937
3203
|
dictPtr += offcodeHeaderSize;
|
2938
3204
|
}
|
2939
3205
|
|
@@ -2942,13 +3208,12 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
2942
3208
|
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
2943
3209
|
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
|
2944
3210
|
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
|
2945
|
-
/* Every match length code must have non-zero probability */
|
2946
|
-
FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
|
2947
3211
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
2948
3212
|
bs->entropy.fse.matchlengthCTable,
|
2949
3213
|
matchlengthNCount, matchlengthMaxValue, matchlengthLog,
|
2950
3214
|
workspace, HUF_WORKSPACE_SIZE)),
|
2951
3215
|
dictionary_corrupted, "");
|
3216
|
+
bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
|
2952
3217
|
dictPtr += matchlengthHeaderSize;
|
2953
3218
|
}
|
2954
3219
|
|
@@ -2957,13 +3222,12 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
2957
3222
|
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
2958
3223
|
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
|
2959
3224
|
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
|
2960
|
-
/* Every literal length code must have non-zero probability */
|
2961
|
-
FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
|
2962
3225
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
2963
3226
|
bs->entropy.fse.litlengthCTable,
|
2964
3227
|
litlengthNCount, litlengthMaxValue, litlengthLog,
|
2965
3228
|
workspace, HUF_WORKSPACE_SIZE)),
|
2966
3229
|
dictionary_corrupted, "");
|
3230
|
+
bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
|
2967
3231
|
dictPtr += litlengthHeaderSize;
|
2968
3232
|
}
|
2969
3233
|
|
@@ -2973,6 +3237,22 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
2973
3237
|
bs->rep[2] = MEM_readLE32(dictPtr+8);
|
2974
3238
|
dictPtr += 12;
|
2975
3239
|
|
3240
|
+
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
3241
|
+
U32 offcodeMax = MaxOff;
|
3242
|
+
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
3243
|
+
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
|
3244
|
+
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
|
3245
|
+
}
|
3246
|
+
/* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
|
3247
|
+
bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
|
3248
|
+
|
3249
|
+
/* All repCodes must be <= dictContentSize and != 0 */
|
3250
|
+
{ U32 u;
|
3251
|
+
for (u=0; u<3; u++) {
|
3252
|
+
RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
|
3253
|
+
RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
|
3254
|
+
} } }
|
3255
|
+
|
2976
3256
|
return dictPtr - (const BYTE*)dict;
|
2977
3257
|
}
|
2978
3258
|
|
@@ -2995,8 +3275,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
2995
3275
|
{
|
2996
3276
|
const BYTE* dictPtr = (const BYTE*)dict;
|
2997
3277
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
2998
|
-
short offcodeNCount[MaxOff+1];
|
2999
|
-
unsigned offcodeMaxValue = MaxOff;
|
3000
3278
|
size_t dictID;
|
3001
3279
|
size_t eSize;
|
3002
3280
|
|
@@ -3005,32 +3283,16 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
3005
3283
|
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
3006
3284
|
|
3007
3285
|
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
|
3008
|
-
eSize = ZSTD_loadCEntropy(bs, workspace,
|
3286
|
+
eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
|
3009
3287
|
FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
|
3010
3288
|
dictPtr += eSize;
|
3011
3289
|
|
3012
|
-
{
|
3013
|
-
|
3014
|
-
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
3015
|
-
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
|
3016
|
-
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
|
3017
|
-
}
|
3018
|
-
/* All offset values <= dictContentSize + 128 KB must be representable */
|
3019
|
-
FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
|
3020
|
-
/* All repCodes must be <= dictContentSize and != 0*/
|
3021
|
-
{ U32 u;
|
3022
|
-
for (u=0; u<3; u++) {
|
3023
|
-
RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
|
3024
|
-
RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
|
3025
|
-
} }
|
3026
|
-
|
3027
|
-
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
|
3028
|
-
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
|
3029
|
-
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
|
3290
|
+
{
|
3291
|
+
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
3030
3292
|
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
3031
3293
|
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
3032
|
-
return dictID;
|
3033
3294
|
}
|
3295
|
+
return dictID;
|
3034
3296
|
}
|
3035
3297
|
|
3036
3298
|
/** ZSTD_compress_insertDictionary() :
|
@@ -3074,7 +3336,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
3074
3336
|
}
|
3075
3337
|
|
3076
3338
|
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
3077
|
-
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (
|
3339
|
+
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
|
3078
3340
|
|
3079
3341
|
/*! ZSTD_compressBegin_internal() :
|
3080
3342
|
* @return : 0, or an error code */
|
@@ -3106,7 +3368,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
3106
3368
|
ZSTD_compress_insertDictionary(
|
3107
3369
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
3108
3370
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
|
3109
|
-
cdict->dictContentSize, dictContentType, dtlm,
|
3371
|
+
cdict->dictContentSize, cdict->dictContentType, dtlm,
|
3110
3372
|
cctx->entropyWorkspace)
|
3111
3373
|
: ZSTD_compress_insertDictionary(
|
3112
3374
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
@@ -3153,7 +3415,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
|
3153
3415
|
|
3154
3416
|
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
3155
3417
|
{
|
3156
|
-
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
3418
|
+
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
3157
3419
|
ZSTD_CCtx_params const cctxParams =
|
3158
3420
|
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms);
|
3159
3421
|
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
|
@@ -3234,7 +3496,6 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
|
|
3234
3496
|
return cSize + endResult;
|
3235
3497
|
}
|
3236
3498
|
|
3237
|
-
|
3238
3499
|
static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
|
3239
3500
|
void* dst, size_t dstCapacity,
|
3240
3501
|
const void* src, size_t srcSize,
|
@@ -3287,7 +3548,7 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
|
|
3287
3548
|
const void* dict, size_t dictSize,
|
3288
3549
|
int compressionLevel)
|
3289
3550
|
{
|
3290
|
-
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
|
3551
|
+
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
|
3291
3552
|
ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms);
|
3292
3553
|
DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
|
3293
3554
|
assert(params.fParams.contentSizeFlag == 1);
|
@@ -3309,10 +3570,17 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity,
|
|
3309
3570
|
int compressionLevel)
|
3310
3571
|
{
|
3311
3572
|
size_t result;
|
3573
|
+
#if ZSTD_COMPRESS_HEAPMODE
|
3574
|
+
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
3575
|
+
RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
|
3576
|
+
result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
|
3577
|
+
ZSTD_freeCCtx(cctx);
|
3578
|
+
#else
|
3312
3579
|
ZSTD_CCtx ctxBody;
|
3313
3580
|
ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
|
3314
3581
|
result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
|
3315
3582
|
ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */
|
3583
|
+
#endif
|
3316
3584
|
return result;
|
3317
3585
|
}
|
3318
3586
|
|
@@ -3335,7 +3603,7 @@ size_t ZSTD_estimateCDictSize_advanced(
|
|
3335
3603
|
|
3336
3604
|
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
|
3337
3605
|
{
|
3338
|
-
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
3606
|
+
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
|
3339
3607
|
return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
|
3340
3608
|
}
|
3341
3609
|
|
@@ -3353,20 +3621,25 @@ static size_t ZSTD_initCDict_internal(
|
|
3353
3621
|
const void* dictBuffer, size_t dictSize,
|
3354
3622
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
3355
3623
|
ZSTD_dictContentType_e dictContentType,
|
3356
|
-
|
3624
|
+
ZSTD_CCtx_params params)
|
3357
3625
|
{
|
3358
3626
|
DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
|
3359
|
-
assert(!ZSTD_checkCParams(cParams));
|
3360
|
-
cdict->matchState.cParams = cParams;
|
3627
|
+
assert(!ZSTD_checkCParams(params.cParams));
|
3628
|
+
cdict->matchState.cParams = params.cParams;
|
3629
|
+
cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
|
3630
|
+
if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
|
3631
|
+
cdict->matchState.dedicatedDictSearch = 0;
|
3632
|
+
}
|
3361
3633
|
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
|
3362
3634
|
cdict->dictContent = dictBuffer;
|
3363
3635
|
} else {
|
3364
3636
|
void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
|
3365
3637
|
RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
|
3366
3638
|
cdict->dictContent = internalBuffer;
|
3367
|
-
|
3639
|
+
ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
|
3368
3640
|
}
|
3369
3641
|
cdict->dictContentSize = dictSize;
|
3642
|
+
cdict->dictContentType = dictContentType;
|
3370
3643
|
|
3371
3644
|
cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
|
3372
3645
|
|
@@ -3376,18 +3649,15 @@ static size_t ZSTD_initCDict_internal(
|
|
3376
3649
|
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
3377
3650
|
&cdict->matchState,
|
3378
3651
|
&cdict->workspace,
|
3379
|
-
&cParams,
|
3652
|
+
¶ms.cParams,
|
3380
3653
|
ZSTDcrp_makeClean,
|
3381
3654
|
ZSTDirp_reset,
|
3382
3655
|
ZSTD_resetTarget_CDict), "");
|
3383
3656
|
/* (Maybe) load the dictionary
|
3384
3657
|
* Skips loading the dictionary if it is < 8 bytes.
|
3385
3658
|
*/
|
3386
|
-
{
|
3387
|
-
memset(¶ms, 0, sizeof(params));
|
3388
|
-
params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
3659
|
+
{ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
3389
3660
|
params.fParams.contentSizeFlag = 1;
|
3390
|
-
params.cParams = cParams;
|
3391
3661
|
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
3392
3662
|
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
|
3393
3663
|
¶ms, cdict->dictContent, cdict->dictContentSize,
|
@@ -3401,13 +3671,11 @@ static size_t ZSTD_initCDict_internal(
|
|
3401
3671
|
return 0;
|
3402
3672
|
}
|
3403
3673
|
|
3404
|
-
ZSTD_CDict*
|
3674
|
+
static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
|
3405
3675
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
3406
|
-
ZSTD_dictContentType_e dictContentType,
|
3407
3676
|
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
|
3408
3677
|
{
|
3409
|
-
|
3410
|
-
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
3678
|
+
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
3411
3679
|
|
3412
3680
|
{ size_t const workspaceSize =
|
3413
3681
|
ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
|
@@ -3415,16 +3683,16 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
|
3415
3683
|
ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
|
3416
3684
|
(dictLoadMethod == ZSTD_dlm_byRef ? 0
|
3417
3685
|
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
|
3418
|
-
void* const workspace =
|
3686
|
+
void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
|
3419
3687
|
ZSTD_cwksp ws;
|
3420
3688
|
ZSTD_CDict* cdict;
|
3421
3689
|
|
3422
3690
|
if (!workspace) {
|
3423
|
-
|
3691
|
+
ZSTD_customFree(workspace, customMem);
|
3424
3692
|
return NULL;
|
3425
3693
|
}
|
3426
3694
|
|
3427
|
-
ZSTD_cwksp_init(&ws, workspace, workspaceSize);
|
3695
|
+
ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
|
3428
3696
|
|
3429
3697
|
cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
|
3430
3698
|
assert(cdict != NULL);
|
@@ -3432,35 +3700,94 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
|
3432
3700
|
cdict->customMem = customMem;
|
3433
3701
|
cdict->compressionLevel = 0; /* signals advanced API usage */
|
3434
3702
|
|
3435
|
-
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
3436
|
-
dictBuffer, dictSize,
|
3437
|
-
dictLoadMethod, dictContentType,
|
3438
|
-
cParams) )) {
|
3439
|
-
ZSTD_freeCDict(cdict);
|
3440
|
-
return NULL;
|
3441
|
-
}
|
3442
|
-
|
3443
3703
|
return cdict;
|
3444
3704
|
}
|
3445
3705
|
}
|
3446
3706
|
|
3707
|
+
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
3708
|
+
ZSTD_dictLoadMethod_e dictLoadMethod,
|
3709
|
+
ZSTD_dictContentType_e dictContentType,
|
3710
|
+
ZSTD_compressionParameters cParams,
|
3711
|
+
ZSTD_customMem customMem)
|
3712
|
+
{
|
3713
|
+
ZSTD_CCtx_params cctxParams;
|
3714
|
+
ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
|
3715
|
+
ZSTD_CCtxParams_init(&cctxParams, 0);
|
3716
|
+
cctxParams.cParams = cParams;
|
3717
|
+
cctxParams.customMem = customMem;
|
3718
|
+
return ZSTD_createCDict_advanced2(
|
3719
|
+
dictBuffer, dictSize,
|
3720
|
+
dictLoadMethod, dictContentType,
|
3721
|
+
&cctxParams, customMem);
|
3722
|
+
}
|
3723
|
+
|
3724
|
+
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
|
3725
|
+
const void* dict, size_t dictSize,
|
3726
|
+
ZSTD_dictLoadMethod_e dictLoadMethod,
|
3727
|
+
ZSTD_dictContentType_e dictContentType,
|
3728
|
+
const ZSTD_CCtx_params* originalCctxParams,
|
3729
|
+
ZSTD_customMem customMem)
|
3730
|
+
{
|
3731
|
+
ZSTD_CCtx_params cctxParams = *originalCctxParams;
|
3732
|
+
ZSTD_compressionParameters cParams;
|
3733
|
+
ZSTD_CDict* cdict;
|
3734
|
+
|
3735
|
+
DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
|
3736
|
+
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
3737
|
+
|
3738
|
+
if (cctxParams.enableDedicatedDictSearch) {
|
3739
|
+
cParams = ZSTD_dedicatedDictSearch_getCParams(
|
3740
|
+
cctxParams.compressionLevel, dictSize);
|
3741
|
+
ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
|
3742
|
+
} else {
|
3743
|
+
cParams = ZSTD_getCParamsFromCCtxParams(
|
3744
|
+
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
|
3745
|
+
}
|
3746
|
+
|
3747
|
+
if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
|
3748
|
+
/* Fall back to non-DDSS params */
|
3749
|
+
cctxParams.enableDedicatedDictSearch = 0;
|
3750
|
+
cParams = ZSTD_getCParamsFromCCtxParams(
|
3751
|
+
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
|
3752
|
+
}
|
3753
|
+
|
3754
|
+
cctxParams.cParams = cParams;
|
3755
|
+
|
3756
|
+
cdict = ZSTD_createCDict_advanced_internal(dictSize,
|
3757
|
+
dictLoadMethod, cctxParams.cParams,
|
3758
|
+
customMem);
|
3759
|
+
|
3760
|
+
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
3761
|
+
dict, dictSize,
|
3762
|
+
dictLoadMethod, dictContentType,
|
3763
|
+
cctxParams) )) {
|
3764
|
+
ZSTD_freeCDict(cdict);
|
3765
|
+
return NULL;
|
3766
|
+
}
|
3767
|
+
|
3768
|
+
return cdict;
|
3769
|
+
}
|
3770
|
+
|
3447
3771
|
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
|
3448
3772
|
{
|
3449
|
-
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
3450
|
-
ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
|
3773
|
+
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
|
3774
|
+
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
|
3451
3775
|
ZSTD_dlm_byCopy, ZSTD_dct_auto,
|
3452
3776
|
cParams, ZSTD_defaultCMem);
|
3453
3777
|
if (cdict)
|
3454
|
-
cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
|
3778
|
+
cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
|
3455
3779
|
return cdict;
|
3456
3780
|
}
|
3457
3781
|
|
3458
3782
|
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
|
3459
3783
|
{
|
3460
|
-
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
3461
|
-
|
3784
|
+
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
|
3785
|
+
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
|
3462
3786
|
ZSTD_dlm_byRef, ZSTD_dct_auto,
|
3463
3787
|
cParams, ZSTD_defaultCMem);
|
3788
|
+
if (cdict)
|
3789
|
+
cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
|
3790
|
+
return cdict;
|
3464
3791
|
}
|
3465
3792
|
|
3466
3793
|
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
|
@@ -3470,7 +3797,7 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
|
|
3470
3797
|
int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
|
3471
3798
|
ZSTD_cwksp_free(&cdict->workspace, cMem);
|
3472
3799
|
if (!cdictInWorkspace) {
|
3473
|
-
|
3800
|
+
ZSTD_customFree(cdict, cMem);
|
3474
3801
|
}
|
3475
3802
|
return 0;
|
3476
3803
|
}
|
@@ -3503,12 +3830,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
|
3503
3830
|
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
|
3504
3831
|
+ matchStateSize;
|
3505
3832
|
ZSTD_CDict* cdict;
|
3833
|
+
ZSTD_CCtx_params params;
|
3506
3834
|
|
3507
3835
|
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
|
3508
3836
|
|
3509
3837
|
{
|
3510
3838
|
ZSTD_cwksp ws;
|
3511
|
-
ZSTD_cwksp_init(&ws, workspace, workspaceSize);
|
3839
|
+
ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
|
3512
3840
|
cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
|
3513
3841
|
if (cdict == NULL) return NULL;
|
3514
3842
|
ZSTD_cwksp_move(&cdict->workspace, &ws);
|
@@ -3518,10 +3846,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
|
3518
3846
|
(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
|
3519
3847
|
if (workspaceSize < neededSize) return NULL;
|
3520
3848
|
|
3849
|
+
ZSTD_CCtxParams_init(¶ms, 0);
|
3850
|
+
params.cParams = cParams;
|
3851
|
+
|
3521
3852
|
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
3522
3853
|
dict, dictSize,
|
3523
3854
|
dictLoadMethod, dictContentType,
|
3524
|
-
|
3855
|
+
params) ))
|
3525
3856
|
return NULL;
|
3526
3857
|
|
3527
3858
|
return cdict;
|
@@ -3533,6 +3864,17 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
|
|
3533
3864
|
return cdict->matchState.cParams;
|
3534
3865
|
}
|
3535
3866
|
|
3867
|
+
/*! ZSTD_getDictID_fromCDict() :
|
3868
|
+
* Provides the dictID of the dictionary loaded into `cdict`.
|
3869
|
+
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
3870
|
+
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
3871
|
+
unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
|
3872
|
+
{
|
3873
|
+
if (cdict==NULL) return 0;
|
3874
|
+
return cdict->dictID;
|
3875
|
+
}
|
3876
|
+
|
3877
|
+
|
3536
3878
|
/* ZSTD_compressBegin_usingCDict_advanced() :
|
3537
3879
|
* cdict must be != NULL */
|
3538
3880
|
size_t ZSTD_compressBegin_usingCDict_advanced(
|
@@ -3640,32 +3982,12 @@ size_t ZSTD_CStreamOutSize(void)
|
|
3640
3982
|
return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
|
3641
3983
|
}
|
3642
3984
|
|
3643
|
-
static
|
3644
|
-
const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
|
3645
|
-
const ZSTD_CDict* const cdict,
|
3646
|
-
ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
|
3985
|
+
static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
|
3647
3986
|
{
|
3648
|
-
|
3649
|
-
|
3650
|
-
|
3651
|
-
|
3652
|
-
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
3653
|
-
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
3654
|
-
|
3655
|
-
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
3656
|
-
dict, dictSize, dictContentType, ZSTD_dtlm_fast,
|
3657
|
-
cdict,
|
3658
|
-
¶ms, pledgedSrcSize,
|
3659
|
-
ZSTDb_buffered) , "");
|
3660
|
-
|
3661
|
-
cctx->inToCompress = 0;
|
3662
|
-
cctx->inBuffPos = 0;
|
3663
|
-
cctx->inBuffTarget = cctx->blockSize
|
3664
|
-
+ (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
|
3665
|
-
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
|
3666
|
-
cctx->streamStage = zcss_load;
|
3667
|
-
cctx->frameEnded = 0;
|
3668
|
-
return 0; /* ready to go */
|
3987
|
+
if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
|
3988
|
+
return ZSTD_cpm_attachDict;
|
3989
|
+
else
|
3990
|
+
return ZSTD_cpm_noAttachDict;
|
3669
3991
|
}
|
3670
3992
|
|
3671
3993
|
/* ZSTD_resetCStream():
|
@@ -3815,12 +4137,17 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
3815
4137
|
|
3816
4138
|
/* check expectations */
|
3817
4139
|
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
|
3818
|
-
|
3819
|
-
|
3820
|
-
|
3821
|
-
|
4140
|
+
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
4141
|
+
assert(zcs->inBuff != NULL);
|
4142
|
+
assert(zcs->inBuffSize > 0);
|
4143
|
+
}
|
4144
|
+
if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
|
4145
|
+
assert(zcs->outBuff != NULL);
|
4146
|
+
assert(zcs->outBuffSize > 0);
|
4147
|
+
}
|
3822
4148
|
assert(output->pos <= output->size);
|
3823
4149
|
assert(input->pos <= input->size);
|
4150
|
+
assert((U32)flushMode <= (U32)ZSTD_e_end);
|
3824
4151
|
|
3825
4152
|
while (someMoreWork) {
|
3826
4153
|
switch(zcs->streamStage)
|
@@ -3830,7 +4157,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
3830
4157
|
|
3831
4158
|
case zcss_load:
|
3832
4159
|
if ( (flushMode == ZSTD_e_end)
|
3833
|
-
&& ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)
|
4160
|
+
&& ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */
|
4161
|
+
|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
|
3834
4162
|
&& (zcs->inBuffPos == 0) ) {
|
3835
4163
|
/* shortcut to compression pass directly into output buffer */
|
3836
4164
|
size_t const cSize = ZSTD_compressEnd(zcs,
|
@@ -3843,8 +4171,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
3843
4171
|
ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
3844
4172
|
someMoreWork = 0; break;
|
3845
4173
|
}
|
3846
|
-
/* complete loading into inBuffer */
|
3847
|
-
|
4174
|
+
/* complete loading into inBuffer in buffered mode */
|
4175
|
+
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
4176
|
+
size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
|
3848
4177
|
size_t const loaded = ZSTD_limitCopy(
|
3849
4178
|
zcs->inBuff + zcs->inBuffPos, toLoad,
|
3850
4179
|
ip, iend-ip);
|
@@ -3864,31 +4193,49 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
3864
4193
|
}
|
3865
4194
|
/* compress current block (note : this stage cannot be stopped in the middle) */
|
3866
4195
|
DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
|
3867
|
-
{
|
4196
|
+
{ int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
|
4197
|
+
void* cDst;
|
3868
4198
|
size_t cSize;
|
3869
|
-
size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
|
3870
4199
|
size_t oSize = oend-op;
|
3871
|
-
|
3872
|
-
|
4200
|
+
size_t const iSize = inputBuffered
|
4201
|
+
? zcs->inBuffPos - zcs->inToCompress
|
4202
|
+
: MIN((size_t)(iend - ip), zcs->blockSize);
|
4203
|
+
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
|
3873
4204
|
cDst = op; /* compress into output buffer, to skip flush stage */
|
3874
4205
|
else
|
3875
4206
|
cDst = zcs->outBuff, oSize = zcs->outBuffSize;
|
3876
|
-
|
3877
|
-
|
3878
|
-
|
3879
|
-
|
3880
|
-
|
3881
|
-
|
3882
|
-
|
3883
|
-
|
3884
|
-
|
3885
|
-
|
3886
|
-
zcs->
|
3887
|
-
|
3888
|
-
|
3889
|
-
|
3890
|
-
|
3891
|
-
|
4207
|
+
if (inputBuffered) {
|
4208
|
+
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
|
4209
|
+
cSize = lastBlock ?
|
4210
|
+
ZSTD_compressEnd(zcs, cDst, oSize,
|
4211
|
+
zcs->inBuff + zcs->inToCompress, iSize) :
|
4212
|
+
ZSTD_compressContinue(zcs, cDst, oSize,
|
4213
|
+
zcs->inBuff + zcs->inToCompress, iSize);
|
4214
|
+
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
4215
|
+
zcs->frameEnded = lastBlock;
|
4216
|
+
/* prepare next block */
|
4217
|
+
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
|
4218
|
+
if (zcs->inBuffTarget > zcs->inBuffSize)
|
4219
|
+
zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
|
4220
|
+
DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
|
4221
|
+
(unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
|
4222
|
+
if (!lastBlock)
|
4223
|
+
assert(zcs->inBuffTarget <= zcs->inBuffSize);
|
4224
|
+
zcs->inToCompress = zcs->inBuffPos;
|
4225
|
+
} else {
|
4226
|
+
unsigned const lastBlock = (ip + iSize == iend);
|
4227
|
+
assert(flushMode == ZSTD_e_end /* Already validated */);
|
4228
|
+
cSize = lastBlock ?
|
4229
|
+
ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
|
4230
|
+
ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
|
4231
|
+
/* Consume the input prior to error checking to mirror buffered mode. */
|
4232
|
+
if (iSize > 0)
|
4233
|
+
ip += iSize;
|
4234
|
+
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
4235
|
+
zcs->frameEnded = lastBlock;
|
4236
|
+
if (lastBlock)
|
4237
|
+
assert(ip == iend);
|
4238
|
+
}
|
3892
4239
|
if (cDst == op) { /* no need to flush */
|
3893
4240
|
op += cSize;
|
3894
4241
|
if (zcs->frameEnded) {
|
@@ -3905,6 +4252,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
3905
4252
|
/* fall-through */
|
3906
4253
|
case zcss_flush:
|
3907
4254
|
DEBUGLOG(5, "flush stage");
|
4255
|
+
assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
|
3908
4256
|
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
|
3909
4257
|
size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
|
3910
4258
|
zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
|
@@ -3959,6 +4307,116 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
|
|
3959
4307
|
return ZSTD_nextInputSizeHint_MTorST(zcs);
|
3960
4308
|
}
|
3961
4309
|
|
4310
|
+
/* After a compression call set the expected input/output buffer.
|
4311
|
+
* This is validated at the start of the next compression call.
|
4312
|
+
*/
|
4313
|
+
static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
|
4314
|
+
{
|
4315
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
4316
|
+
cctx->expectedInBuffer = *input;
|
4317
|
+
}
|
4318
|
+
if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
|
4319
|
+
cctx->expectedOutBufferSize = output->size - output->pos;
|
4320
|
+
}
|
4321
|
+
}
|
4322
|
+
|
4323
|
+
/* Validate that the input/output buffers match the expectations set by
|
4324
|
+
* ZSTD_setBufferExpectations.
|
4325
|
+
*/
|
4326
|
+
static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
|
4327
|
+
ZSTD_outBuffer const* output,
|
4328
|
+
ZSTD_inBuffer const* input,
|
4329
|
+
ZSTD_EndDirective endOp)
|
4330
|
+
{
|
4331
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
4332
|
+
ZSTD_inBuffer const expect = cctx->expectedInBuffer;
|
4333
|
+
if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
|
4334
|
+
RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
|
4335
|
+
if (endOp != ZSTD_e_end)
|
4336
|
+
RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
|
4337
|
+
}
|
4338
|
+
if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
|
4339
|
+
size_t const outBufferSize = output->size - output->pos;
|
4340
|
+
if (cctx->expectedOutBufferSize != outBufferSize)
|
4341
|
+
RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
|
4342
|
+
}
|
4343
|
+
return 0;
|
4344
|
+
}
|
4345
|
+
|
4346
|
+
static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
4347
|
+
ZSTD_EndDirective endOp,
|
4348
|
+
size_t inSize) {
|
4349
|
+
ZSTD_CCtx_params params = cctx->requestedParams;
|
4350
|
+
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
|
4351
|
+
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
|
4352
|
+
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
|
4353
|
+
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
|
4354
|
+
if (cctx->cdict)
|
4355
|
+
params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
|
4356
|
+
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
4357
|
+
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */
|
4358
|
+
{
|
4359
|
+
size_t const dictSize = prefixDict.dict
|
4360
|
+
? prefixDict.dictSize
|
4361
|
+
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
|
4362
|
+
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
|
4363
|
+
params.cParams = ZSTD_getCParamsFromCCtxParams(
|
4364
|
+
¶ms, cctx->pledgedSrcSizePlusOne-1,
|
4365
|
+
dictSize, mode);
|
4366
|
+
}
|
4367
|
+
|
4368
|
+
if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) {
|
4369
|
+
/* Enable LDM by default for optimal parser and window size >= 128MB */
|
4370
|
+
DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
|
4371
|
+
params.ldmParams.enableLdm = 1;
|
4372
|
+
}
|
4373
|
+
|
4374
|
+
#ifdef ZSTD_MULTITHREAD
|
4375
|
+
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
4376
|
+
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
|
4377
|
+
}
|
4378
|
+
if (params.nbWorkers > 0) {
|
4379
|
+
/* mt context creation */
|
4380
|
+
if (cctx->mtctx == NULL) {
|
4381
|
+
DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
|
4382
|
+
params.nbWorkers);
|
4383
|
+
cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
|
4384
|
+
RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
|
4385
|
+
}
|
4386
|
+
/* mt compression */
|
4387
|
+
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
|
4388
|
+
FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
|
4389
|
+
cctx->mtctx,
|
4390
|
+
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
|
4391
|
+
cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
|
4392
|
+
cctx->streamStage = zcss_load;
|
4393
|
+
cctx->appliedParams = params;
|
4394
|
+
} else
|
4395
|
+
#endif
|
4396
|
+
{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
|
4397
|
+
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
4398
|
+
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
4399
|
+
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
|
4400
|
+
cctx->cdict,
|
4401
|
+
¶ms, pledgedSrcSize,
|
4402
|
+
ZSTDb_buffered) , "");
|
4403
|
+
assert(cctx->appliedParams.nbWorkers == 0);
|
4404
|
+
cctx->inToCompress = 0;
|
4405
|
+
cctx->inBuffPos = 0;
|
4406
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
4407
|
+
/* for small input: avoid automatic flush on reaching end of block, since
|
4408
|
+
* it would require to add a 3-bytes null block to end frame
|
4409
|
+
*/
|
4410
|
+
cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
|
4411
|
+
} else {
|
4412
|
+
cctx->inBuffTarget = 0;
|
4413
|
+
}
|
4414
|
+
cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
|
4415
|
+
cctx->streamStage = zcss_load;
|
4416
|
+
cctx->frameEnded = 0;
|
4417
|
+
}
|
4418
|
+
return 0;
|
4419
|
+
}
|
3962
4420
|
|
3963
4421
|
size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
3964
4422
|
ZSTD_outBuffer* output,
|
@@ -3967,82 +4425,65 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
3967
4425
|
{
|
3968
4426
|
DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
|
3969
4427
|
/* check conditions */
|
3970
|
-
RETURN_ERROR_IF(output->pos > output->size,
|
3971
|
-
RETURN_ERROR_IF(input->pos > input->size,
|
3972
|
-
|
4428
|
+
RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
|
4429
|
+
RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer");
|
4430
|
+
RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
|
4431
|
+
assert(cctx != NULL);
|
3973
4432
|
|
3974
4433
|
/* transparent initialization stage */
|
3975
4434
|
if (cctx->streamStage == zcss_init) {
|
3976
|
-
|
3977
|
-
|
3978
|
-
|
3979
|
-
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
|
3980
|
-
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
|
3981
|
-
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
3982
|
-
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
|
3983
|
-
params.cParams = ZSTD_getCParamsFromCCtxParams(
|
3984
|
-
&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
|
3985
|
-
|
3986
|
-
|
3987
|
-
#ifdef ZSTD_MULTITHREAD
|
3988
|
-
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
3989
|
-
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
|
3990
|
-
}
|
3991
|
-
if (params.nbWorkers > 0) {
|
3992
|
-
/* mt context creation */
|
3993
|
-
if (cctx->mtctx == NULL) {
|
3994
|
-
DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
|
3995
|
-
params.nbWorkers);
|
3996
|
-
cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
|
3997
|
-
RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
|
3998
|
-
}
|
3999
|
-
/* mt compression */
|
4000
|
-
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
|
4001
|
-
FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
|
4002
|
-
cctx->mtctx,
|
4003
|
-
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
|
4004
|
-
cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
|
4005
|
-
cctx->streamStage = zcss_load;
|
4006
|
-
cctx->appliedParams.nbWorkers = params.nbWorkers;
|
4007
|
-
} else
|
4008
|
-
#endif
|
4009
|
-
{ FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
|
4010
|
-
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
|
4011
|
-
cctx->cdict,
|
4012
|
-
params, cctx->pledgedSrcSizePlusOne-1) , "");
|
4013
|
-
assert(cctx->streamStage == zcss_load);
|
4014
|
-
assert(cctx->appliedParams.nbWorkers == 0);
|
4015
|
-
} }
|
4435
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
|
4436
|
+
ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */
|
4437
|
+
}
|
4016
4438
|
/* end of transparent initialization stage */
|
4017
4439
|
|
4440
|
+
FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
|
4018
4441
|
/* compression stage */
|
4019
4442
|
#ifdef ZSTD_MULTITHREAD
|
4020
4443
|
if (cctx->appliedParams.nbWorkers > 0) {
|
4021
|
-
int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
|
4022
4444
|
size_t flushMin;
|
4023
|
-
assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
|
4024
4445
|
if (cctx->cParamsChanged) {
|
4025
4446
|
ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
|
4026
4447
|
cctx->cParamsChanged = 0;
|
4027
4448
|
}
|
4028
|
-
|
4449
|
+
for (;;) {
|
4450
|
+
size_t const ipos = input->pos;
|
4451
|
+
size_t const opos = output->pos;
|
4029
4452
|
flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
|
4030
4453
|
if ( ZSTD_isError(flushMin)
|
4031
4454
|
|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
|
4032
4455
|
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
|
4033
4456
|
}
|
4034
4457
|
FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
|
4035
|
-
|
4458
|
+
|
4459
|
+
if (endOp == ZSTD_e_continue) {
|
4460
|
+
/* We only require some progress with ZSTD_e_continue, not maximal progress.
|
4461
|
+
* We're done if we've consumed or produced any bytes, or either buffer is
|
4462
|
+
* full.
|
4463
|
+
*/
|
4464
|
+
if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)
|
4465
|
+
break;
|
4466
|
+
} else {
|
4467
|
+
assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
|
4468
|
+
/* We require maximal progress. We're done when the flush is complete or the
|
4469
|
+
* output buffer is full.
|
4470
|
+
*/
|
4471
|
+
if (flushMin == 0 || output->pos == output->size)
|
4472
|
+
break;
|
4473
|
+
}
|
4474
|
+
}
|
4036
4475
|
DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
|
4037
4476
|
/* Either we don't require maximum forward progress, we've finished the
|
4038
4477
|
* flush, or we are out of output space.
|
4039
4478
|
*/
|
4040
|
-
assert(
|
4479
|
+
assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);
|
4480
|
+
ZSTD_setBufferExpectations(cctx, output, input);
|
4041
4481
|
return flushMin;
|
4042
4482
|
}
|
4043
4483
|
#endif
|
4044
4484
|
FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
|
4045
4485
|
DEBUGLOG(5, "completed ZSTD_compressStream2");
|
4486
|
+
ZSTD_setBufferExpectations(cctx, output, input);
|
4046
4487
|
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
|
4047
4488
|
}
|
4048
4489
|
|
@@ -4065,14 +4506,22 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
4065
4506
|
void* dst, size_t dstCapacity,
|
4066
4507
|
const void* src, size_t srcSize)
|
4067
4508
|
{
|
4509
|
+
ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
|
4510
|
+
ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
|
4068
4511
|
DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
|
4069
4512
|
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
|
4513
|
+
/* Enable stable input/output buffers. */
|
4514
|
+
cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
|
4515
|
+
cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
|
4070
4516
|
{ size_t oPos = 0;
|
4071
4517
|
size_t iPos = 0;
|
4072
4518
|
size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
|
4073
4519
|
dst, dstCapacity, &oPos,
|
4074
4520
|
src, srcSize, &iPos,
|
4075
4521
|
ZSTD_e_end);
|
4522
|
+
/* Reset to the original values. */
|
4523
|
+
cctx->requestedParams.inBufferMode = originalInBufferMode;
|
4524
|
+
cctx->requestedParams.outBufferMode = originalOutBufferMode;
|
4076
4525
|
FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
|
4077
4526
|
if (result != 0) { /* compression not completed, due to lack of output space */
|
4078
4527
|
assert(oPos == dstCapacity);
|
@@ -4083,6 +4532,409 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
4083
4532
|
}
|
4084
4533
|
}
|
4085
4534
|
|
4535
|
+
typedef struct {
|
4536
|
+
U32 idx; /* Index in array of ZSTD_Sequence */
|
4537
|
+
U32 posInSequence; /* Position within sequence at idx */
|
4538
|
+
size_t posInSrc; /* Number of bytes given by sequences provided so far */
|
4539
|
+
} ZSTD_sequencePosition;
|
4540
|
+
|
4541
|
+
/* Returns a ZSTD error code if sequence is not valid */
|
4542
|
+
static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
|
4543
|
+
size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
|
4544
|
+
size_t offsetBound;
|
4545
|
+
U32 windowSize = 1 << windowLog;
|
4546
|
+
/* posInSrc represents the amount of data the the decoder would decode up to this point.
|
4547
|
+
* As long as the amount of data decoded is less than or equal to window size, offsets may be
|
4548
|
+
* larger than the total length of output decoded in order to reference the dict, even larger than
|
4549
|
+
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
|
4550
|
+
*/
|
4551
|
+
offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
4552
|
+
RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
|
4553
|
+
RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
|
4554
|
+
return 0;
|
4555
|
+
}
|
4556
|
+
|
4557
|
+
/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
|
4558
|
+
static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
|
4559
|
+
U32 offCode = rawOffset + ZSTD_REP_MOVE;
|
4560
|
+
U32 repCode = 0;
|
4561
|
+
|
4562
|
+
if (!ll0 && rawOffset == rep[0]) {
|
4563
|
+
repCode = 1;
|
4564
|
+
} else if (rawOffset == rep[1]) {
|
4565
|
+
repCode = 2 - ll0;
|
4566
|
+
} else if (rawOffset == rep[2]) {
|
4567
|
+
repCode = 3 - ll0;
|
4568
|
+
} else if (ll0 && rawOffset == rep[0] - 1) {
|
4569
|
+
repCode = 3;
|
4570
|
+
}
|
4571
|
+
if (repCode) {
|
4572
|
+
/* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
|
4573
|
+
offCode = repCode - 1;
|
4574
|
+
}
|
4575
|
+
return offCode;
|
4576
|
+
}
|
4577
|
+
|
4578
|
+
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
|
4579
|
+
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
|
4580
|
+
*/
|
4581
|
+
static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
4582
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
4583
|
+
const void* src, size_t blockSize) {
|
4584
|
+
U32 idx = seqPos->idx;
|
4585
|
+
BYTE const* ip = (BYTE const*)(src);
|
4586
|
+
const BYTE* const iend = ip + blockSize;
|
4587
|
+
repcodes_t updatedRepcodes;
|
4588
|
+
U32 dictSize;
|
4589
|
+
U32 litLength;
|
4590
|
+
U32 matchLength;
|
4591
|
+
U32 ll0;
|
4592
|
+
U32 offCode;
|
4593
|
+
|
4594
|
+
if (cctx->cdict) {
|
4595
|
+
dictSize = (U32)cctx->cdict->dictContentSize;
|
4596
|
+
} else if (cctx->prefixDict.dict) {
|
4597
|
+
dictSize = (U32)cctx->prefixDict.dictSize;
|
4598
|
+
} else {
|
4599
|
+
dictSize = 0;
|
4600
|
+
}
|
4601
|
+
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
4602
|
+
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
|
4603
|
+
litLength = inSeqs[idx].litLength;
|
4604
|
+
matchLength = inSeqs[idx].matchLength;
|
4605
|
+
ll0 = litLength == 0;
|
4606
|
+
offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
|
4607
|
+
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
|
4608
|
+
|
4609
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
4610
|
+
if (cctx->appliedParams.validateSequences) {
|
4611
|
+
seqPos->posInSrc += litLength + matchLength;
|
4612
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
4613
|
+
cctx->appliedParams.cParams.windowLog, dictSize,
|
4614
|
+
cctx->appliedParams.cParams.minMatch),
|
4615
|
+
"Sequence validation failed");
|
4616
|
+
}
|
4617
|
+
RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
|
4618
|
+
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
4619
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
4620
|
+
ip += matchLength + litLength;
|
4621
|
+
}
|
4622
|
+
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
|
4623
|
+
|
4624
|
+
if (inSeqs[idx].litLength) {
|
4625
|
+
DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
|
4626
|
+
ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
|
4627
|
+
ip += inSeqs[idx].litLength;
|
4628
|
+
seqPos->posInSrc += inSeqs[idx].litLength;
|
4629
|
+
}
|
4630
|
+
RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
|
4631
|
+
seqPos->idx = idx+1;
|
4632
|
+
return 0;
|
4633
|
+
}
|
4634
|
+
|
4635
|
+
/* Returns the number of bytes to move the current read position back by. Only non-zero
|
4636
|
+
* if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
|
4637
|
+
* went wrong.
|
4638
|
+
*
|
4639
|
+
* This function will attempt to scan through blockSize bytes represented by the sequences
|
4640
|
+
* in inSeqs, storing any (partial) sequences.
|
4641
|
+
*
|
4642
|
+
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
|
4643
|
+
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
|
4644
|
+
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
|
4645
|
+
*/
|
4646
|
+
static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
4647
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
4648
|
+
const void* src, size_t blockSize) {
|
4649
|
+
U32 idx = seqPos->idx;
|
4650
|
+
U32 startPosInSequence = seqPos->posInSequence;
|
4651
|
+
U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
|
4652
|
+
size_t dictSize;
|
4653
|
+
BYTE const* ip = (BYTE const*)(src);
|
4654
|
+
BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
|
4655
|
+
repcodes_t updatedRepcodes;
|
4656
|
+
U32 bytesAdjustment = 0;
|
4657
|
+
U32 finalMatchSplit = 0;
|
4658
|
+
U32 litLength;
|
4659
|
+
U32 matchLength;
|
4660
|
+
U32 rawOffset;
|
4661
|
+
U32 offCode;
|
4662
|
+
|
4663
|
+
if (cctx->cdict) {
|
4664
|
+
dictSize = cctx->cdict->dictContentSize;
|
4665
|
+
} else if (cctx->prefixDict.dict) {
|
4666
|
+
dictSize = cctx->prefixDict.dictSize;
|
4667
|
+
} else {
|
4668
|
+
dictSize = 0;
|
4669
|
+
}
|
4670
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
|
4671
|
+
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
4672
|
+
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
4673
|
+
while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
|
4674
|
+
const ZSTD_Sequence currSeq = inSeqs[idx];
|
4675
|
+
litLength = currSeq.litLength;
|
4676
|
+
matchLength = currSeq.matchLength;
|
4677
|
+
rawOffset = currSeq.offset;
|
4678
|
+
|
4679
|
+
/* Modify the sequence depending on where endPosInSequence lies */
|
4680
|
+
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
|
4681
|
+
if (startPosInSequence >= litLength) {
|
4682
|
+
startPosInSequence -= litLength;
|
4683
|
+
litLength = 0;
|
4684
|
+
matchLength -= startPosInSequence;
|
4685
|
+
} else {
|
4686
|
+
litLength -= startPosInSequence;
|
4687
|
+
}
|
4688
|
+
/* Move to the next sequence */
|
4689
|
+
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
|
4690
|
+
startPosInSequence = 0;
|
4691
|
+
idx++;
|
4692
|
+
} else {
|
4693
|
+
/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
|
4694
|
+
does not reach the end of the match. So, we have to split the sequence */
|
4695
|
+
DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
|
4696
|
+
currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
|
4697
|
+
if (endPosInSequence > litLength) {
|
4698
|
+
U32 firstHalfMatchLength;
|
4699
|
+
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
|
4700
|
+
firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
|
4701
|
+
if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
|
4702
|
+
/* Only ever split the match if it is larger than the block size */
|
4703
|
+
U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
|
4704
|
+
if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
|
4705
|
+
/* Move the endPosInSequence backward so that it creates match of minMatch length */
|
4706
|
+
endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
|
4707
|
+
bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
|
4708
|
+
firstHalfMatchLength -= bytesAdjustment;
|
4709
|
+
}
|
4710
|
+
matchLength = firstHalfMatchLength;
|
4711
|
+
/* Flag that we split the last match - after storing the sequence, exit the loop,
|
4712
|
+
but keep the value of endPosInSequence */
|
4713
|
+
finalMatchSplit = 1;
|
4714
|
+
} else {
|
4715
|
+
/* Move the position in sequence backwards so that we don't split match, and break to store
|
4716
|
+
* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
|
4717
|
+
* should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
|
4718
|
+
* would cause the first half of the match to be too small
|
4719
|
+
*/
|
4720
|
+
bytesAdjustment = endPosInSequence - currSeq.litLength;
|
4721
|
+
endPosInSequence = currSeq.litLength;
|
4722
|
+
break;
|
4723
|
+
}
|
4724
|
+
} else {
|
4725
|
+
/* This sequence ends inside the literals, break to store the last literals */
|
4726
|
+
break;
|
4727
|
+
}
|
4728
|
+
}
|
4729
|
+
/* Check if this offset can be represented with a repcode */
|
4730
|
+
{ U32 ll0 = (litLength == 0);
|
4731
|
+
offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
|
4732
|
+
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
|
4733
|
+
}
|
4734
|
+
|
4735
|
+
if (cctx->appliedParams.validateSequences) {
|
4736
|
+
seqPos->posInSrc += litLength + matchLength;
|
4737
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
4738
|
+
cctx->appliedParams.cParams.windowLog, dictSize,
|
4739
|
+
cctx->appliedParams.cParams.minMatch),
|
4740
|
+
"Sequence validation failed");
|
4741
|
+
}
|
4742
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
4743
|
+
RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
|
4744
|
+
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
4745
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
4746
|
+
ip += matchLength + litLength;
|
4747
|
+
}
|
4748
|
+
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
4749
|
+
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
|
4750
|
+
seqPos->idx = idx;
|
4751
|
+
seqPos->posInSequence = endPosInSequence;
|
4752
|
+
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
|
4753
|
+
|
4754
|
+
iend -= bytesAdjustment;
|
4755
|
+
if (ip != iend) {
|
4756
|
+
/* Store any last literals */
|
4757
|
+
U32 lastLLSize = (U32)(iend - ip);
|
4758
|
+
assert(ip <= iend);
|
4759
|
+
DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
|
4760
|
+
ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
|
4761
|
+
seqPos->posInSrc += lastLLSize;
|
4762
|
+
}
|
4763
|
+
|
4764
|
+
return bytesAdjustment;
|
4765
|
+
}
|
4766
|
+
|
4767
|
+
typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
4768
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
4769
|
+
const void* src, size_t blockSize);
|
4770
|
+
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
|
4771
|
+
ZSTD_sequenceCopier sequenceCopier = NULL;
|
4772
|
+
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
|
4773
|
+
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
4774
|
+
return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
|
4775
|
+
} else if (mode == ZSTD_sf_noBlockDelimiters) {
|
4776
|
+
return ZSTD_copySequencesToSeqStoreNoBlockDelim;
|
4777
|
+
}
|
4778
|
+
assert(sequenceCopier != NULL);
|
4779
|
+
return sequenceCopier;
|
4780
|
+
}
|
4781
|
+
|
4782
|
+
/* Compress, block-by-block, all of the sequences given.
|
4783
|
+
*
|
4784
|
+
* Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
|
4785
|
+
*/
|
4786
|
+
static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
4787
|
+
void* dst, size_t dstCapacity,
|
4788
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
4789
|
+
const void* src, size_t srcSize) {
|
4790
|
+
size_t cSize = 0;
|
4791
|
+
U32 lastBlock;
|
4792
|
+
size_t blockSize;
|
4793
|
+
size_t compressedSeqsSize;
|
4794
|
+
size_t remaining = srcSize;
|
4795
|
+
ZSTD_sequencePosition seqPos = {0, 0, 0};
|
4796
|
+
|
4797
|
+
BYTE const* ip = (BYTE const*)src;
|
4798
|
+
BYTE* op = (BYTE*)dst;
|
4799
|
+
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
4800
|
+
|
4801
|
+
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
|
4802
|
+
/* Special case: empty frame */
|
4803
|
+
if (remaining == 0) {
|
4804
|
+
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
|
4805
|
+
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
|
4806
|
+
MEM_writeLE32(op, cBlockHeader24);
|
4807
|
+
op += ZSTD_blockHeaderSize;
|
4808
|
+
dstCapacity -= ZSTD_blockHeaderSize;
|
4809
|
+
cSize += ZSTD_blockHeaderSize;
|
4810
|
+
}
|
4811
|
+
|
4812
|
+
while (remaining) {
|
4813
|
+
size_t cBlockSize;
|
4814
|
+
size_t additionalByteAdjustment;
|
4815
|
+
lastBlock = remaining <= cctx->blockSize;
|
4816
|
+
blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
|
4817
|
+
ZSTD_resetSeqStore(&cctx->seqStore);
|
4818
|
+
DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
|
4819
|
+
|
4820
|
+
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
|
4821
|
+
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
|
4822
|
+
blockSize -= additionalByteAdjustment;
|
4823
|
+
|
4824
|
+
/* If blocks are too small, emit as a nocompress block */
|
4825
|
+
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
4826
|
+
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
4827
|
+
FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
|
4828
|
+
DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
|
4829
|
+
cSize += cBlockSize;
|
4830
|
+
ip += blockSize;
|
4831
|
+
op += cBlockSize;
|
4832
|
+
remaining -= blockSize;
|
4833
|
+
dstCapacity -= cBlockSize;
|
4834
|
+
continue;
|
4835
|
+
}
|
4836
|
+
|
4837
|
+
compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
|
4838
|
+
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
4839
|
+
&cctx->appliedParams,
|
4840
|
+
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
|
4841
|
+
blockSize,
|
4842
|
+
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
4843
|
+
cctx->bmi2);
|
4844
|
+
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
4845
|
+
DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
|
4846
|
+
|
4847
|
+
if (!cctx->isFirstBlock &&
|
4848
|
+
ZSTD_maybeRLE(&cctx->seqStore) &&
|
4849
|
+
ZSTD_isRLE((BYTE const*)src, srcSize)) {
|
4850
|
+
/* We don't want to emit our first block as a RLE even if it qualifies because
|
4851
|
+
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
4852
|
+
* This is only an issue for zstd <= v1.4.3
|
4853
|
+
*/
|
4854
|
+
compressedSeqsSize = 1;
|
4855
|
+
}
|
4856
|
+
|
4857
|
+
if (compressedSeqsSize == 0) {
|
4858
|
+
/* ZSTD_noCompressBlock writes the block header as well */
|
4859
|
+
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
4860
|
+
FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
|
4861
|
+
DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
|
4862
|
+
} else if (compressedSeqsSize == 1) {
|
4863
|
+
cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
|
4864
|
+
FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
|
4865
|
+
DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
|
4866
|
+
} else {
|
4867
|
+
U32 cBlockHeader;
|
4868
|
+
/* Error checking and repcodes update */
|
4869
|
+
ZSTD_confirmRepcodesAndEntropyTables(cctx);
|
4870
|
+
if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
4871
|
+
cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
4872
|
+
|
4873
|
+
/* Write block header into beginning of block*/
|
4874
|
+
cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
|
4875
|
+
MEM_writeLE24(op, cBlockHeader);
|
4876
|
+
cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
|
4877
|
+
DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
|
4878
|
+
}
|
4879
|
+
|
4880
|
+
cSize += cBlockSize;
|
4881
|
+
DEBUGLOG(4, "cSize running total: %zu", cSize);
|
4882
|
+
|
4883
|
+
if (lastBlock) {
|
4884
|
+
break;
|
4885
|
+
} else {
|
4886
|
+
ip += blockSize;
|
4887
|
+
op += cBlockSize;
|
4888
|
+
remaining -= blockSize;
|
4889
|
+
dstCapacity -= cBlockSize;
|
4890
|
+
cctx->isFirstBlock = 0;
|
4891
|
+
}
|
4892
|
+
}
|
4893
|
+
|
4894
|
+
return cSize;
|
4895
|
+
}
|
4896
|
+
|
4897
|
+
size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
|
4898
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
4899
|
+
const void* src, size_t srcSize) {
|
4900
|
+
BYTE* op = (BYTE*)dst;
|
4901
|
+
size_t cSize = 0;
|
4902
|
+
size_t compressedBlocksSize = 0;
|
4903
|
+
size_t frameHeaderSize = 0;
|
4904
|
+
|
4905
|
+
/* Transparent initialization stage, same as compressStream2() */
|
4906
|
+
DEBUGLOG(3, "ZSTD_compressSequences()");
|
4907
|
+
assert(cctx != NULL);
|
4908
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
4909
|
+
/* Begin writing output, starting with frame header */
|
4910
|
+
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
|
4911
|
+
op += frameHeaderSize;
|
4912
|
+
dstCapacity -= frameHeaderSize;
|
4913
|
+
cSize += frameHeaderSize;
|
4914
|
+
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
|
4915
|
+
XXH64_update(&cctx->xxhState, src, srcSize);
|
4916
|
+
}
|
4917
|
+
/* cSize includes block header size and compressed sequences size */
|
4918
|
+
compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
|
4919
|
+
op, dstCapacity,
|
4920
|
+
inSeqs, inSeqsSize,
|
4921
|
+
src, srcSize);
|
4922
|
+
FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
|
4923
|
+
cSize += compressedBlocksSize;
|
4924
|
+
dstCapacity -= compressedBlocksSize;
|
4925
|
+
|
4926
|
+
if (cctx->appliedParams.fParams.checksumFlag) {
|
4927
|
+
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
|
4928
|
+
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
|
4929
|
+
DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
|
4930
|
+
MEM_writeLE32((char*)dst + cSize, checksum);
|
4931
|
+
cSize += 4;
|
4932
|
+
}
|
4933
|
+
|
4934
|
+
DEBUGLOG(3, "Final compressed size: %zu", cSize);
|
4935
|
+
return cSize;
|
4936
|
+
}
|
4937
|
+
|
4086
4938
|
/*====== Finalize ======*/
|
4087
4939
|
|
4088
4940
|
/*! ZSTD_flushStream() :
|
@@ -4223,25 +5075,103 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
|
|
4223
5075
|
},
|
4224
5076
|
};
|
4225
5077
|
|
5078
|
+
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
|
5079
|
+
{
|
5080
|
+
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
|
5081
|
+
switch (cParams.strategy) {
|
5082
|
+
case ZSTD_fast:
|
5083
|
+
case ZSTD_dfast:
|
5084
|
+
break;
|
5085
|
+
case ZSTD_greedy:
|
5086
|
+
case ZSTD_lazy:
|
5087
|
+
case ZSTD_lazy2:
|
5088
|
+
cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
|
5089
|
+
break;
|
5090
|
+
case ZSTD_btlazy2:
|
5091
|
+
case ZSTD_btopt:
|
5092
|
+
case ZSTD_btultra:
|
5093
|
+
case ZSTD_btultra2:
|
5094
|
+
break;
|
5095
|
+
}
|
5096
|
+
return cParams;
|
5097
|
+
}
|
5098
|
+
|
5099
|
+
static int ZSTD_dedicatedDictSearch_isSupported(
|
5100
|
+
ZSTD_compressionParameters const* cParams)
|
5101
|
+
{
|
5102
|
+
return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
|
5103
|
+
}
|
5104
|
+
|
5105
|
+
/**
|
5106
|
+
* Reverses the adjustment applied to cparams when enabling dedicated dict
|
5107
|
+
* search. This is used to recover the params set to be used in the working
|
5108
|
+
* context. (Otherwise, those tables would also grow.)
|
5109
|
+
*/
|
5110
|
+
static void ZSTD_dedicatedDictSearch_revertCParams(
|
5111
|
+
ZSTD_compressionParameters* cParams) {
|
5112
|
+
switch (cParams->strategy) {
|
5113
|
+
case ZSTD_fast:
|
5114
|
+
case ZSTD_dfast:
|
5115
|
+
break;
|
5116
|
+
case ZSTD_greedy:
|
5117
|
+
case ZSTD_lazy:
|
5118
|
+
case ZSTD_lazy2:
|
5119
|
+
cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
|
5120
|
+
break;
|
5121
|
+
case ZSTD_btlazy2:
|
5122
|
+
case ZSTD_btopt:
|
5123
|
+
case ZSTD_btultra:
|
5124
|
+
case ZSTD_btultra2:
|
5125
|
+
break;
|
5126
|
+
}
|
5127
|
+
}
|
5128
|
+
|
5129
|
+
static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
|
5130
|
+
{
|
5131
|
+
switch (mode) {
|
5132
|
+
case ZSTD_cpm_unknown:
|
5133
|
+
case ZSTD_cpm_noAttachDict:
|
5134
|
+
case ZSTD_cpm_createCDict:
|
5135
|
+
break;
|
5136
|
+
case ZSTD_cpm_attachDict:
|
5137
|
+
dictSize = 0;
|
5138
|
+
break;
|
5139
|
+
default:
|
5140
|
+
assert(0);
|
5141
|
+
break;
|
5142
|
+
}
|
5143
|
+
{ int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
|
5144
|
+
size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
|
5145
|
+
return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
|
5146
|
+
}
|
5147
|
+
}
|
5148
|
+
|
4226
5149
|
/*! ZSTD_getCParams_internal() :
|
4227
5150
|
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
|
4228
5151
|
* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
|
4229
|
-
* Use dictSize == 0 for unknown or unused.
|
4230
|
-
|
5152
|
+
* Use dictSize == 0 for unknown or unused.
|
5153
|
+
* Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
|
5154
|
+
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
|
4231
5155
|
{
|
4232
|
-
|
4233
|
-
size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
|
4234
|
-
U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
|
5156
|
+
U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
|
4235
5157
|
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
|
4236
|
-
int row
|
5158
|
+
int row;
|
4237
5159
|
DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
|
5160
|
+
|
5161
|
+
/* row */
|
4238
5162
|
if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
|
4239
|
-
if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
|
4240
|
-
if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
|
5163
|
+
else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
|
5164
|
+
else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
|
5165
|
+
else row = compressionLevel;
|
5166
|
+
|
4241
5167
|
{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
|
4242
|
-
|
5168
|
+
/* acceleration factor */
|
5169
|
+
if (compressionLevel < 0) {
|
5170
|
+
int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
|
5171
|
+
cp.targetLength = (unsigned)(-clampedCompressionLevel);
|
5172
|
+
}
|
4243
5173
|
/* refine parameters based on srcSize & dictSize */
|
4244
|
-
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
|
5174
|
+
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
|
4245
5175
|
}
|
4246
5176
|
}
|
4247
5177
|
|
@@ -4251,18 +5181,18 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
|
|
4251
5181
|
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
|
4252
5182
|
{
|
4253
5183
|
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
4254
|
-
return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
|
5184
|
+
return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
4255
5185
|
}
|
4256
5186
|
|
4257
5187
|
/*! ZSTD_getParams() :
|
4258
5188
|
* same idea as ZSTD_getCParams()
|
4259
5189
|
* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
|
4260
5190
|
* Fields of `ZSTD_frameParameters` are set to default values */
|
4261
|
-
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
|
5191
|
+
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
|
4262
5192
|
ZSTD_parameters params;
|
4263
|
-
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
|
5193
|
+
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
|
4264
5194
|
DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
|
4265
|
-
|
5195
|
+
ZSTD_memset(¶ms, 0, sizeof(params));
|
4266
5196
|
params.cParams = cParams;
|
4267
5197
|
params.fParams.contentSizeFlag = 1;
|
4268
5198
|
return params;
|
@@ -4274,5 +5204,5 @@ static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned lo
|
|
4274
5204
|
* Fields of `ZSTD_frameParameters` are set to default values */
|
4275
5205
|
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
|
4276
5206
|
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
4277
|
-
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
|
5207
|
+
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
4278
5208
|
}
|