zstd-ruby 1.5.1.1 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +1 -1
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +19 -60
- data/ext/zstdruby/libzstd/common/compiler.h +26 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +1 -1
- data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
- data/ext/zstdruby/libzstd/common/error_private.c +9 -2
- data/ext/zstdruby/libzstd/common/error_private.h +1 -1
- data/ext/zstdruby/libzstd/common/fse.h +5 -83
- data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
- data/ext/zstdruby/libzstd/common/huf.h +65 -156
- data/ext/zstdruby/libzstd/common/mem.h +39 -46
- data/ext/zstdruby/libzstd/common/pool.c +37 -16
- data/ext/zstdruby/libzstd/common/pool.h +9 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +28 -3
- data/ext/zstdruby/libzstd/common/threading.c +68 -14
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
- data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -36
- data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +20 -122
- data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
- data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
- data/ext/zstdruby/libzstd/compress/hist.c +1 -1
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1317 -594
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +272 -165
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +13 -13
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +162 -82
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +434 -149
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +405 -348
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +149 -100
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -16
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -2
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +42 -37
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +205 -80
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +13 -91
- data/ext/zstdruby/libzstd/zdict.h +53 -31
- data/ext/zstdruby/libzstd/zstd.h +580 -135
- data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +113 -31
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +11 -37
- data/.github/dependabot.yml +0 -8
- data/.github/workflows/ruby.yml +0 -35
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -232
- data/ext/zstdruby/libzstd/Makefile +0 -357
- data/ext/zstdruby/libzstd/README.md +0 -217
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -167
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -63
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.mk +0 -185
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -16
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +0 -4
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,12 +11,12 @@
|
|
|
11
11
|
/*-*************************************
|
|
12
12
|
* Dependencies
|
|
13
13
|
***************************************/
|
|
14
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
|
14
15
|
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
|
|
15
16
|
#include "../common/mem.h"
|
|
16
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
|
17
18
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
|
18
19
|
#include "../common/fse.h"
|
|
19
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
20
20
|
#include "../common/huf.h"
|
|
21
21
|
#include "zstd_compress_internal.h"
|
|
22
22
|
#include "zstd_compress_sequences.h"
|
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
#include "zstd_opt.h"
|
|
28
28
|
#include "zstd_ldm.h"
|
|
29
29
|
#include "zstd_compress_superblock.h"
|
|
30
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
|
|
30
31
|
|
|
31
32
|
/* ***************************************************************
|
|
32
33
|
* Tuning parameters
|
|
@@ -58,14 +59,17 @@
|
|
|
58
59
|
* Helper functions
|
|
59
60
|
***************************************/
|
|
60
61
|
/* ZSTD_compressBound()
|
|
61
|
-
* Note that the result from this function is only
|
|
62
|
-
*
|
|
63
|
-
* When
|
|
64
|
-
*
|
|
65
|
-
*
|
|
62
|
+
* Note that the result from this function is only valid for
|
|
63
|
+
* the one-pass compression functions.
|
|
64
|
+
* When employing the streaming mode,
|
|
65
|
+
* if flushes are frequently altering the size of blocks,
|
|
66
|
+
* the overhead from block headers can make the compressed data larger
|
|
67
|
+
* than the return value of ZSTD_compressBound().
|
|
66
68
|
*/
|
|
67
69
|
size_t ZSTD_compressBound(size_t srcSize) {
|
|
68
|
-
|
|
70
|
+
size_t const r = ZSTD_COMPRESSBOUND(srcSize);
|
|
71
|
+
if (r==0) return ERROR(srcSize_wrong);
|
|
72
|
+
return r;
|
|
69
73
|
}
|
|
70
74
|
|
|
71
75
|
|
|
@@ -177,12 +181,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
|
|
|
177
181
|
if (cctx==NULL) return 0; /* support free on NULL */
|
|
178
182
|
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
|
179
183
|
"not compatible with static CCtx");
|
|
180
|
-
{
|
|
181
|
-
int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
|
184
|
+
{ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
|
182
185
|
ZSTD_freeCCtxContent(cctx);
|
|
183
|
-
if (!cctxInWorkspace)
|
|
184
|
-
ZSTD_customFree(cctx, cctx->customMem);
|
|
185
|
-
}
|
|
186
|
+
if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);
|
|
186
187
|
}
|
|
187
188
|
return 0;
|
|
188
189
|
}
|
|
@@ -267,9 +268,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
|
|
|
267
268
|
return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
|
|
268
269
|
}
|
|
269
270
|
|
|
270
|
-
/* Returns
|
|
271
|
+
/* Returns ZSTD_ps_enable if compression parameters are such that we should
|
|
271
272
|
* enable long distance matching (wlog >= 27, strategy >= btopt).
|
|
272
|
-
* Returns
|
|
273
|
+
* Returns ZSTD_ps_disable otherwise.
|
|
273
274
|
*/
|
|
274
275
|
static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
|
275
276
|
const ZSTD_compressionParameters* const cParams) {
|
|
@@ -277,6 +278,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
|
|
277
278
|
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
|
278
279
|
}
|
|
279
280
|
|
|
281
|
+
static int ZSTD_resolveExternalSequenceValidation(int mode) {
|
|
282
|
+
return mode;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/* Resolves maxBlockSize to the default if no value is present. */
|
|
286
|
+
static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
|
|
287
|
+
if (maxBlockSize == 0) {
|
|
288
|
+
return ZSTD_BLOCKSIZE_MAX;
|
|
289
|
+
} else {
|
|
290
|
+
return maxBlockSize;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
|
|
295
|
+
if (value != ZSTD_ps_auto) return value;
|
|
296
|
+
if (cLevel < 10) {
|
|
297
|
+
return ZSTD_ps_disable;
|
|
298
|
+
} else {
|
|
299
|
+
return ZSTD_ps_enable;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
|
|
304
|
+
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
|
|
305
|
+
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
|
|
306
|
+
return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
|
|
307
|
+
}
|
|
308
|
+
|
|
280
309
|
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
281
310
|
ZSTD_compressionParameters cParams)
|
|
282
311
|
{
|
|
@@ -294,6 +323,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
|
294
323
|
}
|
|
295
324
|
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
|
|
296
325
|
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
|
|
326
|
+
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
|
|
327
|
+
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
|
|
328
|
+
cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
|
|
329
|
+
cctxParams.compressionLevel);
|
|
297
330
|
assert(!ZSTD_checkCParams(cParams));
|
|
298
331
|
return cctxParams;
|
|
299
332
|
}
|
|
@@ -339,10 +372,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
|
|
|
339
372
|
#define ZSTD_NO_CLEVEL 0
|
|
340
373
|
|
|
341
374
|
/**
|
|
342
|
-
* Initializes
|
|
375
|
+
* Initializes `cctxParams` from `params` and `compressionLevel`.
|
|
343
376
|
* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
|
|
344
377
|
*/
|
|
345
|
-
static void
|
|
378
|
+
static void
|
|
379
|
+
ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
|
|
380
|
+
const ZSTD_parameters* params,
|
|
381
|
+
int compressionLevel)
|
|
346
382
|
{
|
|
347
383
|
assert(!ZSTD_checkCParams(params->cParams));
|
|
348
384
|
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
|
|
@@ -355,6 +391,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
|
|
|
355
391
|
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
|
|
356
392
|
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
|
|
357
393
|
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
|
|
394
|
+
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
|
|
395
|
+
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
|
|
396
|
+
cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
|
|
358
397
|
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
|
|
359
398
|
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
|
|
360
399
|
}
|
|
@@ -369,7 +408,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
|
|
|
369
408
|
|
|
370
409
|
/**
|
|
371
410
|
* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
|
|
372
|
-
* @param
|
|
411
|
+
* @param params Validated zstd parameters.
|
|
373
412
|
*/
|
|
374
413
|
static void ZSTD_CCtxParams_setZstdParams(
|
|
375
414
|
ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
|
|
@@ -478,8 +517,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
|
478
517
|
return bounds;
|
|
479
518
|
|
|
480
519
|
case ZSTD_c_enableLongDistanceMatching:
|
|
481
|
-
bounds.lowerBound =
|
|
482
|
-
bounds.upperBound =
|
|
520
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
521
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
483
522
|
return bounds;
|
|
484
523
|
|
|
485
524
|
case ZSTD_c_ldmHashLog:
|
|
@@ -572,6 +611,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
|
572
611
|
bounds.upperBound = 1;
|
|
573
612
|
return bounds;
|
|
574
613
|
|
|
614
|
+
case ZSTD_c_prefetchCDictTables:
|
|
615
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
616
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
617
|
+
return bounds;
|
|
618
|
+
|
|
619
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
620
|
+
bounds.lowerBound = 0;
|
|
621
|
+
bounds.upperBound = 1;
|
|
622
|
+
return bounds;
|
|
623
|
+
|
|
624
|
+
case ZSTD_c_maxBlockSize:
|
|
625
|
+
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
|
|
626
|
+
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
|
|
627
|
+
return bounds;
|
|
628
|
+
|
|
629
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
630
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
631
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
632
|
+
return bounds;
|
|
633
|
+
|
|
575
634
|
default:
|
|
576
635
|
bounds.error = ERROR(parameter_unsupported);
|
|
577
636
|
return bounds;
|
|
@@ -636,6 +695,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
|
636
695
|
case ZSTD_c_useBlockSplitter:
|
|
637
696
|
case ZSTD_c_useRowMatchFinder:
|
|
638
697
|
case ZSTD_c_deterministicRefPrefix:
|
|
698
|
+
case ZSTD_c_prefetchCDictTables:
|
|
699
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
700
|
+
case ZSTD_c_maxBlockSize:
|
|
701
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
639
702
|
default:
|
|
640
703
|
return 0;
|
|
641
704
|
}
|
|
@@ -648,7 +711,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
|
648
711
|
if (ZSTD_isUpdateAuthorized(param)) {
|
|
649
712
|
cctx->cParamsChanged = 1;
|
|
650
713
|
} else {
|
|
651
|
-
RETURN_ERROR(stage_wrong, "can only set params in
|
|
714
|
+
RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
|
|
652
715
|
} }
|
|
653
716
|
|
|
654
717
|
switch(param)
|
|
@@ -691,6 +754,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
|
691
754
|
case ZSTD_c_useBlockSplitter:
|
|
692
755
|
case ZSTD_c_useRowMatchFinder:
|
|
693
756
|
case ZSTD_c_deterministicRefPrefix:
|
|
757
|
+
case ZSTD_c_prefetchCDictTables:
|
|
758
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
759
|
+
case ZSTD_c_maxBlockSize:
|
|
760
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
694
761
|
break;
|
|
695
762
|
|
|
696
763
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
@@ -746,12 +813,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
746
813
|
case ZSTD_c_minMatch :
|
|
747
814
|
if (value!=0) /* 0 => use default */
|
|
748
815
|
BOUNDCHECK(ZSTD_c_minMatch, value);
|
|
749
|
-
CCtxParams->cParams.minMatch = value;
|
|
816
|
+
CCtxParams->cParams.minMatch = (U32)value;
|
|
750
817
|
return CCtxParams->cParams.minMatch;
|
|
751
818
|
|
|
752
819
|
case ZSTD_c_targetLength :
|
|
753
820
|
BOUNDCHECK(ZSTD_c_targetLength, value);
|
|
754
|
-
CCtxParams->cParams.targetLength = value;
|
|
821
|
+
CCtxParams->cParams.targetLength = (U32)value;
|
|
755
822
|
return CCtxParams->cParams.targetLength;
|
|
756
823
|
|
|
757
824
|
case ZSTD_c_strategy :
|
|
@@ -764,12 +831,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
764
831
|
/* Content size written in frame header _when known_ (default:1) */
|
|
765
832
|
DEBUGLOG(4, "set content size flag = %u", (value!=0));
|
|
766
833
|
CCtxParams->fParams.contentSizeFlag = value != 0;
|
|
767
|
-
return CCtxParams->fParams.contentSizeFlag;
|
|
834
|
+
return (size_t)CCtxParams->fParams.contentSizeFlag;
|
|
768
835
|
|
|
769
836
|
case ZSTD_c_checksumFlag :
|
|
770
837
|
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
|
|
771
838
|
CCtxParams->fParams.checksumFlag = value != 0;
|
|
772
|
-
return CCtxParams->fParams.checksumFlag;
|
|
839
|
+
return (size_t)CCtxParams->fParams.checksumFlag;
|
|
773
840
|
|
|
774
841
|
case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
|
|
775
842
|
DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
|
|
@@ -778,18 +845,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
778
845
|
|
|
779
846
|
case ZSTD_c_forceMaxWindow :
|
|
780
847
|
CCtxParams->forceWindow = (value != 0);
|
|
781
|
-
return CCtxParams->forceWindow;
|
|
848
|
+
return (size_t)CCtxParams->forceWindow;
|
|
782
849
|
|
|
783
850
|
case ZSTD_c_forceAttachDict : {
|
|
784
851
|
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
|
|
785
|
-
BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
|
|
852
|
+
BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
|
|
786
853
|
CCtxParams->attachDictPref = pref;
|
|
787
854
|
return CCtxParams->attachDictPref;
|
|
788
855
|
}
|
|
789
856
|
|
|
790
857
|
case ZSTD_c_literalCompressionMode : {
|
|
791
858
|
const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
|
|
792
|
-
BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
|
|
859
|
+
BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
|
|
793
860
|
CCtxParams->literalCompressionMode = lcm;
|
|
794
861
|
return CCtxParams->literalCompressionMode;
|
|
795
862
|
}
|
|
@@ -840,47 +907,48 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
840
907
|
|
|
841
908
|
case ZSTD_c_enableDedicatedDictSearch :
|
|
842
909
|
CCtxParams->enableDedicatedDictSearch = (value!=0);
|
|
843
|
-
return CCtxParams->enableDedicatedDictSearch;
|
|
910
|
+
return (size_t)CCtxParams->enableDedicatedDictSearch;
|
|
844
911
|
|
|
845
912
|
case ZSTD_c_enableLongDistanceMatching :
|
|
913
|
+
BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);
|
|
846
914
|
CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
|
|
847
915
|
return CCtxParams->ldmParams.enableLdm;
|
|
848
916
|
|
|
849
917
|
case ZSTD_c_ldmHashLog :
|
|
850
918
|
if (value!=0) /* 0 ==> auto */
|
|
851
919
|
BOUNDCHECK(ZSTD_c_ldmHashLog, value);
|
|
852
|
-
CCtxParams->ldmParams.hashLog = value;
|
|
920
|
+
CCtxParams->ldmParams.hashLog = (U32)value;
|
|
853
921
|
return CCtxParams->ldmParams.hashLog;
|
|
854
922
|
|
|
855
923
|
case ZSTD_c_ldmMinMatch :
|
|
856
924
|
if (value!=0) /* 0 ==> default */
|
|
857
925
|
BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
|
|
858
|
-
CCtxParams->ldmParams.minMatchLength = value;
|
|
926
|
+
CCtxParams->ldmParams.minMatchLength = (U32)value;
|
|
859
927
|
return CCtxParams->ldmParams.minMatchLength;
|
|
860
928
|
|
|
861
929
|
case ZSTD_c_ldmBucketSizeLog :
|
|
862
930
|
if (value!=0) /* 0 ==> default */
|
|
863
931
|
BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
|
|
864
|
-
CCtxParams->ldmParams.bucketSizeLog = value;
|
|
932
|
+
CCtxParams->ldmParams.bucketSizeLog = (U32)value;
|
|
865
933
|
return CCtxParams->ldmParams.bucketSizeLog;
|
|
866
934
|
|
|
867
935
|
case ZSTD_c_ldmHashRateLog :
|
|
868
936
|
if (value!=0) /* 0 ==> default */
|
|
869
937
|
BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
|
|
870
|
-
CCtxParams->ldmParams.hashRateLog = value;
|
|
938
|
+
CCtxParams->ldmParams.hashRateLog = (U32)value;
|
|
871
939
|
return CCtxParams->ldmParams.hashRateLog;
|
|
872
940
|
|
|
873
941
|
case ZSTD_c_targetCBlockSize :
|
|
874
942
|
if (value!=0) /* 0 ==> default */
|
|
875
943
|
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
|
|
876
|
-
CCtxParams->targetCBlockSize = value;
|
|
944
|
+
CCtxParams->targetCBlockSize = (U32)value;
|
|
877
945
|
return CCtxParams->targetCBlockSize;
|
|
878
946
|
|
|
879
947
|
case ZSTD_c_srcSizeHint :
|
|
880
948
|
if (value!=0) /* 0 ==> default */
|
|
881
949
|
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
|
|
882
950
|
CCtxParams->srcSizeHint = value;
|
|
883
|
-
return CCtxParams->srcSizeHint;
|
|
951
|
+
return (size_t)CCtxParams->srcSizeHint;
|
|
884
952
|
|
|
885
953
|
case ZSTD_c_stableInBuffer:
|
|
886
954
|
BOUNDCHECK(ZSTD_c_stableInBuffer, value);
|
|
@@ -917,6 +985,27 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
917
985
|
CCtxParams->deterministicRefPrefix = !!value;
|
|
918
986
|
return CCtxParams->deterministicRefPrefix;
|
|
919
987
|
|
|
988
|
+
case ZSTD_c_prefetchCDictTables:
|
|
989
|
+
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
|
|
990
|
+
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
|
|
991
|
+
return CCtxParams->prefetchCDictTables;
|
|
992
|
+
|
|
993
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
994
|
+
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
|
|
995
|
+
CCtxParams->enableMatchFinderFallback = value;
|
|
996
|
+
return CCtxParams->enableMatchFinderFallback;
|
|
997
|
+
|
|
998
|
+
case ZSTD_c_maxBlockSize:
|
|
999
|
+
if (value!=0) /* 0 ==> default */
|
|
1000
|
+
BOUNDCHECK(ZSTD_c_maxBlockSize, value);
|
|
1001
|
+
CCtxParams->maxBlockSize = value;
|
|
1002
|
+
return CCtxParams->maxBlockSize;
|
|
1003
|
+
|
|
1004
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
1005
|
+
BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
|
|
1006
|
+
CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
|
|
1007
|
+
return CCtxParams->searchForExternalRepcodes;
|
|
1008
|
+
|
|
920
1009
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
921
1010
|
}
|
|
922
1011
|
}
|
|
@@ -1049,6 +1138,18 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
|
1049
1138
|
case ZSTD_c_deterministicRefPrefix:
|
|
1050
1139
|
*value = (int)CCtxParams->deterministicRefPrefix;
|
|
1051
1140
|
break;
|
|
1141
|
+
case ZSTD_c_prefetchCDictTables:
|
|
1142
|
+
*value = (int)CCtxParams->prefetchCDictTables;
|
|
1143
|
+
break;
|
|
1144
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
1145
|
+
*value = CCtxParams->enableMatchFinderFallback;
|
|
1146
|
+
break;
|
|
1147
|
+
case ZSTD_c_maxBlockSize:
|
|
1148
|
+
*value = (int)CCtxParams->maxBlockSize;
|
|
1149
|
+
break;
|
|
1150
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
1151
|
+
*value = (int)CCtxParams->searchForExternalRepcodes;
|
|
1152
|
+
break;
|
|
1052
1153
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
1053
1154
|
}
|
|
1054
1155
|
return 0;
|
|
@@ -1075,9 +1176,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
|
|
1075
1176
|
return 0;
|
|
1076
1177
|
}
|
|
1077
1178
|
|
|
1179
|
+
size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
|
|
1180
|
+
{
|
|
1181
|
+
ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
|
|
1182
|
+
DEBUGLOG(4, "ZSTD_CCtx_setCParams");
|
|
1183
|
+
/* only update if all parameters are valid */
|
|
1184
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
|
|
1185
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
|
|
1186
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
|
|
1187
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
|
|
1188
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
|
|
1189
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
|
|
1190
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
|
|
1191
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
|
|
1192
|
+
return 0;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
|
|
1196
|
+
{
|
|
1197
|
+
ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
|
|
1198
|
+
DEBUGLOG(4, "ZSTD_CCtx_setFParams");
|
|
1199
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
|
|
1200
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
|
|
1201
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
|
|
1202
|
+
return 0;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
|
|
1206
|
+
{
|
|
1207
|
+
DEBUGLOG(4, "ZSTD_CCtx_setParams");
|
|
1208
|
+
/* First check cParams, because we want to update all or none. */
|
|
1209
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
|
1210
|
+
/* Next set fParams, because this could fail if the cctx isn't in init stage. */
|
|
1211
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
|
|
1212
|
+
/* Finally set cParams, which should succeed. */
|
|
1213
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
|
|
1214
|
+
return 0;
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1078
1217
|
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
|
|
1079
1218
|
{
|
|
1080
|
-
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %
|
|
1219
|
+
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
|
|
1081
1220
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1082
1221
|
"Can't set pledgedSrcSize when not in init stage.");
|
|
1083
1222
|
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
|
@@ -1093,9 +1232,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
|
|
|
1093
1232
|
ZSTD_compressionParameters* cParams);
|
|
1094
1233
|
|
|
1095
1234
|
/**
|
|
1096
|
-
* Initializes the local
|
|
1097
|
-
* NOTE:
|
|
1098
|
-
*
|
|
1235
|
+
* Initializes the local dictionary using requested parameters.
|
|
1236
|
+
* NOTE: Initialization does not employ the pledged src size,
|
|
1237
|
+
* because the dictionary may be used for multiple compressions.
|
|
1099
1238
|
*/
|
|
1100
1239
|
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
1101
1240
|
{
|
|
@@ -1108,8 +1247,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
|
1108
1247
|
return 0;
|
|
1109
1248
|
}
|
|
1110
1249
|
if (dl->cdict != NULL) {
|
|
1111
|
-
assert(cctx->cdict == dl->cdict);
|
|
1112
1250
|
/* Local dictionary already initialized. */
|
|
1251
|
+
assert(cctx->cdict == dl->cdict);
|
|
1113
1252
|
return 0;
|
|
1114
1253
|
}
|
|
1115
1254
|
assert(dl->dictSize > 0);
|
|
@@ -1129,26 +1268,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
|
1129
1268
|
}
|
|
1130
1269
|
|
|
1131
1270
|
size_t ZSTD_CCtx_loadDictionary_advanced(
|
|
1132
|
-
ZSTD_CCtx* cctx,
|
|
1133
|
-
|
|
1271
|
+
ZSTD_CCtx* cctx,
|
|
1272
|
+
const void* dict, size_t dictSize,
|
|
1273
|
+
ZSTD_dictLoadMethod_e dictLoadMethod,
|
|
1274
|
+
ZSTD_dictContentType_e dictContentType)
|
|
1134
1275
|
{
|
|
1135
|
-
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1136
|
-
"Can't load a dictionary when ctx is not in init stage.");
|
|
1137
1276
|
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
|
|
1138
|
-
|
|
1139
|
-
|
|
1277
|
+
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1278
|
+
"Can't load a dictionary when cctx is not in init stage.");
|
|
1279
|
+
ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */
|
|
1280
|
+
if (dict == NULL || dictSize == 0) /* no dictionary */
|
|
1140
1281
|
return 0;
|
|
1141
1282
|
if (dictLoadMethod == ZSTD_dlm_byRef) {
|
|
1142
1283
|
cctx->localDict.dict = dict;
|
|
1143
1284
|
} else {
|
|
1285
|
+
/* copy dictionary content inside CCtx to own its lifetime */
|
|
1144
1286
|
void* dictBuffer;
|
|
1145
1287
|
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
|
1146
|
-
"
|
|
1288
|
+
"static CCtx can't allocate for an internal copy of dictionary");
|
|
1147
1289
|
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
|
|
1148
|
-
RETURN_ERROR_IF(
|
|
1290
|
+
RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
|
|
1291
|
+
"allocation failed for dictionary content");
|
|
1149
1292
|
ZSTD_memcpy(dictBuffer, dict, dictSize);
|
|
1150
|
-
cctx->localDict.dictBuffer = dictBuffer;
|
|
1151
|
-
cctx->localDict.dict = dictBuffer;
|
|
1293
|
+
cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */
|
|
1294
|
+
cctx->localDict.dict = dictBuffer; /* read-only reference */
|
|
1152
1295
|
}
|
|
1153
1296
|
cctx->localDict.dictSize = dictSize;
|
|
1154
1297
|
cctx->localDict.dictContentType = dictContentType;
|
|
@@ -1218,8 +1361,9 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
|
|
1218
1361
|
if ( (reset == ZSTD_reset_parameters)
|
|
1219
1362
|
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
|
1220
1363
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1221
|
-
"
|
|
1364
|
+
"Reset parameters is only possible during init stage.");
|
|
1222
1365
|
ZSTD_clearAllDicts(cctx);
|
|
1366
|
+
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
|
|
1223
1367
|
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
|
|
1224
1368
|
}
|
|
1225
1369
|
return 0;
|
|
@@ -1316,7 +1460,8 @@ static ZSTD_compressionParameters
|
|
|
1316
1460
|
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1317
1461
|
unsigned long long srcSize,
|
|
1318
1462
|
size_t dictSize,
|
|
1319
|
-
ZSTD_cParamMode_e mode
|
|
1463
|
+
ZSTD_cParamMode_e mode,
|
|
1464
|
+
ZSTD_paramSwitch_e useRowMatchFinder)
|
|
1320
1465
|
{
|
|
1321
1466
|
const U64 minSrcSize = 513; /* (1<<9) + 1 */
|
|
1322
1467
|
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
|
@@ -1350,8 +1495,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
|
1350
1495
|
}
|
|
1351
1496
|
|
|
1352
1497
|
/* resize windowLog if input is small enough, to use less memory */
|
|
1353
|
-
if ( (srcSize
|
|
1354
|
-
&& (dictSize
|
|
1498
|
+
if ( (srcSize <= maxWindowResize)
|
|
1499
|
+
&& (dictSize <= maxWindowResize) ) {
|
|
1355
1500
|
U32 const tSize = (U32)(srcSize + dictSize);
|
|
1356
1501
|
static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
|
|
1357
1502
|
U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
|
|
@@ -1369,6 +1514,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
|
1369
1514
|
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
|
1370
1515
|
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
|
|
1371
1516
|
|
|
1517
|
+
/* We can't use more than 32 bits of hash in total, so that means that we require:
|
|
1518
|
+
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
|
|
1519
|
+
*/
|
|
1520
|
+
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
|
|
1521
|
+
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
|
|
1522
|
+
if (cPar.hashLog > maxShortCacheHashLog) {
|
|
1523
|
+
cPar.hashLog = maxShortCacheHashLog;
|
|
1524
|
+
}
|
|
1525
|
+
if (cPar.chainLog > maxShortCacheHashLog) {
|
|
1526
|
+
cPar.chainLog = maxShortCacheHashLog;
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
/* At this point, we aren't 100% sure if we are using the row match finder.
|
|
1532
|
+
* Unless it is explicitly disabled, conservatively assume that it is enabled.
|
|
1533
|
+
* In this case it will only be disabled for small sources, so shrinking the
|
|
1534
|
+
* hash log a little bit shouldn't result in any ratio loss.
|
|
1535
|
+
*/
|
|
1536
|
+
if (useRowMatchFinder == ZSTD_ps_auto)
|
|
1537
|
+
useRowMatchFinder = ZSTD_ps_enable;
|
|
1538
|
+
|
|
1539
|
+
/* We can't hash more than 32-bits in total. So that means that we require:
|
|
1540
|
+
* (hashLog - rowLog + 8) <= 32
|
|
1541
|
+
*/
|
|
1542
|
+
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
|
|
1543
|
+
/* Switch to 32-entry rows if searchLog is 5 (or more) */
|
|
1544
|
+
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
|
|
1545
|
+
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
|
|
1546
|
+
U32 const maxHashLog = maxRowHashLog + rowLog;
|
|
1547
|
+
assert(cPar.hashLog >= rowLog);
|
|
1548
|
+
if (cPar.hashLog > maxHashLog) {
|
|
1549
|
+
cPar.hashLog = maxHashLog;
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1372
1553
|
return cPar;
|
|
1373
1554
|
}
|
|
1374
1555
|
|
|
@@ -1379,7 +1560,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
|
|
1379
1560
|
{
|
|
1380
1561
|
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
|
1381
1562
|
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
|
1382
|
-
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
|
|
1563
|
+
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
|
|
1383
1564
|
}
|
|
1384
1565
|
|
|
1385
1566
|
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
|
@@ -1410,7 +1591,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
|
1410
1591
|
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
|
1411
1592
|
assert(!ZSTD_checkCParams(cParams));
|
|
1412
1593
|
/* srcSizeHint == 0 means 0 */
|
|
1413
|
-
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
|
|
1594
|
+
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
|
|
1414
1595
|
}
|
|
1415
1596
|
|
|
1416
1597
|
static size_t
|
|
@@ -1439,7 +1620,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
|
1439
1620
|
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
|
|
1440
1621
|
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
|
|
1441
1622
|
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
|
|
1442
|
-
? ZSTD_cwksp_aligned_alloc_size(hSize
|
|
1623
|
+
? ZSTD_cwksp_aligned_alloc_size(hSize)
|
|
1443
1624
|
: 0;
|
|
1444
1625
|
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
|
|
1445
1626
|
? optPotentialSpace
|
|
@@ -1455,6 +1636,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
|
1455
1636
|
return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
|
|
1456
1637
|
}
|
|
1457
1638
|
|
|
1639
|
+
/* Helper function for calculating memory requirements.
|
|
1640
|
+
* Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
|
|
1641
|
+
static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
|
|
1642
|
+
U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
|
|
1643
|
+
return blockSize / divider;
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1458
1646
|
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1459
1647
|
const ZSTD_compressionParameters* cParams,
|
|
1460
1648
|
const ldmParams_t* ldmParams,
|
|
@@ -1462,12 +1650,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1462
1650
|
const ZSTD_paramSwitch_e useRowMatchFinder,
|
|
1463
1651
|
const size_t buffInSize,
|
|
1464
1652
|
const size_t buffOutSize,
|
|
1465
|
-
const U64 pledgedSrcSize
|
|
1653
|
+
const U64 pledgedSrcSize,
|
|
1654
|
+
int useSequenceProducer,
|
|
1655
|
+
size_t maxBlockSize)
|
|
1466
1656
|
{
|
|
1467
1657
|
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
|
|
1468
|
-
size_t const blockSize = MIN(
|
|
1469
|
-
|
|
1470
|
-
size_t const maxNbSeq = blockSize / divider;
|
|
1658
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
|
|
1659
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
|
|
1471
1660
|
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
|
1472
1661
|
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
|
1473
1662
|
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
|
@@ -1486,6 +1675,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1486
1675
|
|
|
1487
1676
|
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
|
|
1488
1677
|
|
|
1678
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
|
1679
|
+
size_t const externalSeqSpace = useSequenceProducer
|
|
1680
|
+
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
|
|
1681
|
+
: 0;
|
|
1682
|
+
|
|
1489
1683
|
size_t const neededSpace =
|
|
1490
1684
|
cctxSpace +
|
|
1491
1685
|
entropySpace +
|
|
@@ -1494,7 +1688,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1494
1688
|
ldmSeqSpace +
|
|
1495
1689
|
matchStateSize +
|
|
1496
1690
|
tokenSpace +
|
|
1497
|
-
bufferSpace
|
|
1691
|
+
bufferSpace +
|
|
1692
|
+
externalSeqSpace;
|
|
1498
1693
|
|
|
1499
1694
|
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
|
|
1500
1695
|
return neededSpace;
|
|
@@ -1512,7 +1707,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1512
1707
|
* be needed. However, we still allocate two 0-sized buffers, which can
|
|
1513
1708
|
* take space under ASAN. */
|
|
1514
1709
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1515
|
-
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
|
|
1710
|
+
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
|
1516
1711
|
}
|
|
1517
1712
|
|
|
1518
1713
|
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
@@ -1562,7 +1757,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1562
1757
|
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
|
1563
1758
|
{ ZSTD_compressionParameters const cParams =
|
|
1564
1759
|
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
|
1565
|
-
size_t const blockSize = MIN(
|
|
1760
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);
|
|
1566
1761
|
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
|
|
1567
1762
|
? ((size_t)1 << cParams.windowLog) + blockSize
|
|
1568
1763
|
: 0;
|
|
@@ -1573,7 +1768,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1573
1768
|
|
|
1574
1769
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1575
1770
|
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
|
|
1576
|
-
ZSTD_CONTENTSIZE_UNKNOWN);
|
|
1771
|
+
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
|
1577
1772
|
}
|
|
1578
1773
|
}
|
|
1579
1774
|
|
|
@@ -1716,6 +1911,19 @@ typedef enum {
|
|
|
1716
1911
|
ZSTD_resetTarget_CCtx
|
|
1717
1912
|
} ZSTD_resetTarget_e;
|
|
1718
1913
|
|
|
1914
|
+
/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
|
|
1915
|
+
static U64 ZSTD_bitmix(U64 val, U64 len) {
|
|
1916
|
+
val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
|
|
1917
|
+
val *= 0x9FB21C651E98DF25ULL;
|
|
1918
|
+
val ^= (val >> 35) + len ;
|
|
1919
|
+
val *= 0x9FB21C651E98DF25ULL;
|
|
1920
|
+
return val ^ (val >> 28);
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
|
|
1924
|
+
static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
|
|
1925
|
+
ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
|
|
1926
|
+
}
|
|
1719
1927
|
|
|
1720
1928
|
static size_t
|
|
1721
1929
|
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
@@ -1743,6 +1951,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
|
1743
1951
|
}
|
|
1744
1952
|
|
|
1745
1953
|
ms->hashLog3 = hashLog3;
|
|
1954
|
+
ms->lazySkipping = 0;
|
|
1746
1955
|
|
|
1747
1956
|
ZSTD_invalidateMatchState(ms);
|
|
1748
1957
|
|
|
@@ -1764,6 +1973,27 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
|
1764
1973
|
ZSTD_cwksp_clean_tables(ws);
|
|
1765
1974
|
}
|
|
1766
1975
|
|
|
1976
|
+
if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
|
|
1977
|
+
/* Row match finder needs an additional table of hashes ("tags") */
|
|
1978
|
+
size_t const tagTableSize = hSize;
|
|
1979
|
+
/* We want to generate a new salt in case we reset a Cctx, but we always want to use
|
|
1980
|
+
* 0 when we reset a Cdict */
|
|
1981
|
+
if(forWho == ZSTD_resetTarget_CCtx) {
|
|
1982
|
+
ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
|
|
1983
|
+
ZSTD_advanceHashSalt(ms);
|
|
1984
|
+
} else {
|
|
1985
|
+
/* When we are not salting we want to always memset the memory */
|
|
1986
|
+
ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
|
|
1987
|
+
ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
|
1988
|
+
ms->hashSalt = 0;
|
|
1989
|
+
}
|
|
1990
|
+
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
|
|
1991
|
+
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
|
|
1992
|
+
assert(cParams->hashLog >= rowLog);
|
|
1993
|
+
ms->rowHashLog = cParams->hashLog - rowLog;
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
|
|
1767
1997
|
/* opt parser space */
|
|
1768
1998
|
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
|
|
1769
1999
|
DEBUGLOG(4, "reserving optimal parser space");
|
|
@@ -1775,19 +2005,6 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
|
1775
2005
|
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
|
|
1776
2006
|
}
|
|
1777
2007
|
|
|
1778
|
-
if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
|
|
1779
|
-
{ /* Row match finder needs an additional table of hashes ("tags") */
|
|
1780
|
-
size_t const tagTableSize = hSize*sizeof(U16);
|
|
1781
|
-
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
|
|
1782
|
-
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
|
1783
|
-
}
|
|
1784
|
-
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
|
|
1785
|
-
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
|
|
1786
|
-
assert(cParams->hashLog >= rowLog);
|
|
1787
|
-
ms->rowHashLog = cParams->hashLog - rowLog;
|
|
1788
|
-
}
|
|
1789
|
-
}
|
|
1790
|
-
|
|
1791
2008
|
ms->cParams = *cParams;
|
|
1792
2009
|
|
|
1793
2010
|
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
|
|
@@ -1847,6 +2064,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1847
2064
|
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
|
1848
2065
|
assert(params->useBlockSplitter != ZSTD_ps_auto);
|
|
1849
2066
|
assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
|
|
2067
|
+
assert(params->maxBlockSize != 0);
|
|
1850
2068
|
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1851
2069
|
/* Adjust long distance matching parameters */
|
|
1852
2070
|
ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);
|
|
@@ -1855,9 +2073,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1855
2073
|
}
|
|
1856
2074
|
|
|
1857
2075
|
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
|
|
1858
|
-
size_t const blockSize = MIN(
|
|
1859
|
-
|
|
1860
|
-
size_t const maxNbSeq = blockSize / divider;
|
|
2076
|
+
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
|
|
2077
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
|
|
1861
2078
|
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
|
|
1862
2079
|
? ZSTD_compressBound(blockSize) + 1
|
|
1863
2080
|
: 0;
|
|
@@ -1874,7 +2091,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1874
2091
|
size_t const neededSpace =
|
|
1875
2092
|
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1876
2093
|
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
|
|
1877
|
-
buffInSize, buffOutSize, pledgedSrcSize);
|
|
2094
|
+
buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
|
|
1878
2095
|
int resizeWorkspace;
|
|
1879
2096
|
|
|
1880
2097
|
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
|
|
@@ -1917,6 +2134,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1917
2134
|
|
|
1918
2135
|
/* init params */
|
|
1919
2136
|
zc->blockState.matchState.cParams = params->cParams;
|
|
2137
|
+
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
|
|
1920
2138
|
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
|
1921
2139
|
zc->consumedSrcSize = 0;
|
|
1922
2140
|
zc->producedCSize = 0;
|
|
@@ -1933,13 +2151,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1933
2151
|
|
|
1934
2152
|
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
|
|
1935
2153
|
|
|
2154
|
+
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
|
2155
|
+
&zc->blockState.matchState,
|
|
2156
|
+
ws,
|
|
2157
|
+
¶ms->cParams,
|
|
2158
|
+
params->useRowMatchFinder,
|
|
2159
|
+
crp,
|
|
2160
|
+
needsIndexReset,
|
|
2161
|
+
ZSTD_resetTarget_CCtx), "");
|
|
2162
|
+
|
|
2163
|
+
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
|
2164
|
+
|
|
2165
|
+
/* ldm hash table */
|
|
2166
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
2167
|
+
/* TODO: avoid memset? */
|
|
2168
|
+
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
|
|
2169
|
+
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
|
2170
|
+
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
|
|
2171
|
+
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
|
2172
|
+
zc->maxNbLdmSequences = maxNbLdmSeq;
|
|
2173
|
+
|
|
2174
|
+
ZSTD_window_init(&zc->ldmState.window);
|
|
2175
|
+
zc->ldmState.loadedDictEnd = 0;
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2178
|
+
/* reserve space for block-level external sequences */
|
|
2179
|
+
if (params->useSequenceProducer) {
|
|
2180
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
|
2181
|
+
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
|
|
2182
|
+
zc->externalMatchCtx.seqBuffer =
|
|
2183
|
+
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
|
|
2184
|
+
}
|
|
2185
|
+
|
|
2186
|
+
/* buffers */
|
|
2187
|
+
|
|
1936
2188
|
/* ZSTD_wildcopy() is used to copy into the literals buffer,
|
|
1937
2189
|
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
|
|
1938
2190
|
*/
|
|
1939
2191
|
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
|
|
1940
2192
|
zc->seqStore.maxNbLit = blockSize;
|
|
1941
2193
|
|
|
1942
|
-
/* buffers */
|
|
1943
2194
|
zc->bufferedPolicy = zbuff;
|
|
1944
2195
|
zc->inBuffSize = buffInSize;
|
|
1945
2196
|
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
|
|
@@ -1962,32 +2213,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1962
2213
|
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
|
1963
2214
|
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
|
1964
2215
|
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
|
1965
|
-
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
|
1966
|
-
|
|
1967
|
-
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
|
1968
|
-
&zc->blockState.matchState,
|
|
1969
|
-
ws,
|
|
1970
|
-
¶ms->cParams,
|
|
1971
|
-
params->useRowMatchFinder,
|
|
1972
|
-
crp,
|
|
1973
|
-
needsIndexReset,
|
|
1974
|
-
ZSTD_resetTarget_CCtx), "");
|
|
1975
|
-
|
|
1976
|
-
/* ldm hash table */
|
|
1977
|
-
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1978
|
-
/* TODO: avoid memset? */
|
|
1979
|
-
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
|
|
1980
|
-
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
|
1981
|
-
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
|
|
1982
|
-
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
|
1983
|
-
zc->maxNbLdmSequences = maxNbLdmSeq;
|
|
1984
|
-
|
|
1985
|
-
ZSTD_window_init(&zc->ldmState.window);
|
|
1986
|
-
zc->ldmState.loadedDictEnd = 0;
|
|
1987
|
-
}
|
|
1988
2216
|
|
|
1989
2217
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
|
1990
|
-
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace
|
|
2218
|
+
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
|
|
1991
2219
|
|
|
1992
2220
|
zc->initialized = 1;
|
|
1993
2221
|
|
|
@@ -2059,7 +2287,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
|
2059
2287
|
}
|
|
2060
2288
|
|
|
2061
2289
|
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
|
|
2062
|
-
cdict->dictContentSize, ZSTD_cpm_attachDict
|
|
2290
|
+
cdict->dictContentSize, ZSTD_cpm_attachDict,
|
|
2291
|
+
params.useRowMatchFinder);
|
|
2063
2292
|
params.cParams.windowLog = windowLog;
|
|
2064
2293
|
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
|
|
2065
2294
|
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
|
|
@@ -2098,6 +2327,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
|
2098
2327
|
return 0;
|
|
2099
2328
|
}
|
|
2100
2329
|
|
|
2330
|
+
static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
|
|
2331
|
+
ZSTD_compressionParameters const* cParams) {
|
|
2332
|
+
if (ZSTD_CDictIndicesAreTagged(cParams)){
|
|
2333
|
+
/* Remove tags from the CDict table if they are present.
|
|
2334
|
+
* See docs on "short cache" in zstd_compress_internal.h for context. */
|
|
2335
|
+
size_t i;
|
|
2336
|
+
for (i = 0; i < tableSize; i++) {
|
|
2337
|
+
U32 const taggedIndex = src[i];
|
|
2338
|
+
U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
2339
|
+
dst[i] = index;
|
|
2340
|
+
}
|
|
2341
|
+
} else {
|
|
2342
|
+
ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
|
|
2343
|
+
}
|
|
2344
|
+
}
|
|
2345
|
+
|
|
2101
2346
|
static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
2102
2347
|
const ZSTD_CDict* cdict,
|
|
2103
2348
|
ZSTD_CCtx_params params,
|
|
@@ -2133,21 +2378,23 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
|
2133
2378
|
: 0;
|
|
2134
2379
|
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
|
|
2135
2380
|
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2381
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
|
|
2382
|
+
cdict->matchState.hashTable,
|
|
2383
|
+
hSize, cdict_cParams);
|
|
2384
|
+
|
|
2139
2385
|
/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
|
|
2140
2386
|
if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2387
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
|
|
2388
|
+
cdict->matchState.chainTable,
|
|
2389
|
+
chainSize, cdict_cParams);
|
|
2144
2390
|
}
|
|
2145
2391
|
/* copy tag table */
|
|
2146
2392
|
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
|
|
2147
|
-
size_t const tagTableSize = hSize
|
|
2393
|
+
size_t const tagTableSize = hSize;
|
|
2148
2394
|
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
|
|
2149
|
-
|
|
2150
|
-
|
|
2395
|
+
cdict->matchState.tagTable,
|
|
2396
|
+
tagTableSize);
|
|
2397
|
+
cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
|
|
2151
2398
|
}
|
|
2152
2399
|
}
|
|
2153
2400
|
|
|
@@ -2226,6 +2473,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
|
2226
2473
|
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
|
|
2227
2474
|
params.ldmParams = srcCCtx->appliedParams.ldmParams;
|
|
2228
2475
|
params.fParams = fParams;
|
|
2476
|
+
params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
|
|
2229
2477
|
ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,
|
|
2230
2478
|
/* loadedDictSize */ 0,
|
|
2231
2479
|
ZSTDcrp_leaveDirty, zbuff);
|
|
@@ -2385,7 +2633,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
|
|
|
2385
2633
|
|
|
2386
2634
|
/* See doc/zstd_compression_format.md for detailed format description */
|
|
2387
2635
|
|
|
2388
|
-
|
|
2636
|
+
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
2389
2637
|
{
|
|
2390
2638
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
|
2391
2639
|
BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
@@ -2393,18 +2641,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
|
2393
2641
|
BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
2394
2642
|
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2395
2643
|
U32 u;
|
|
2644
|
+
int longOffsets = 0;
|
|
2396
2645
|
assert(nbSeq <= seqStorePtr->maxNbSeq);
|
|
2397
2646
|
for (u=0; u<nbSeq; u++) {
|
|
2398
2647
|
U32 const llv = sequences[u].litLength;
|
|
2399
|
-
U32 const
|
|
2648
|
+
U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);
|
|
2649
|
+
U32 const mlv = sequences[u].mlBase;
|
|
2400
2650
|
llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
|
|
2401
|
-
ofCodeTable[u] = (BYTE)
|
|
2651
|
+
ofCodeTable[u] = (BYTE)ofCode;
|
|
2402
2652
|
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
|
|
2653
|
+
assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));
|
|
2654
|
+
if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)
|
|
2655
|
+
longOffsets = 1;
|
|
2403
2656
|
}
|
|
2404
2657
|
if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
|
|
2405
2658
|
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
|
|
2406
2659
|
if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
|
|
2407
2660
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
|
2661
|
+
return longOffsets;
|
|
2408
2662
|
}
|
|
2409
2663
|
|
|
2410
2664
|
/* ZSTD_useTargetCBlockSize():
|
|
@@ -2438,6 +2692,7 @@ typedef struct {
|
|
|
2438
2692
|
U32 MLtype;
|
|
2439
2693
|
size_t size;
|
|
2440
2694
|
size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
|
2695
|
+
int longOffsets;
|
|
2441
2696
|
} ZSTD_symbolEncodingTypeStats_t;
|
|
2442
2697
|
|
|
2443
2698
|
/* ZSTD_buildSequencesStatistics():
|
|
@@ -2448,11 +2703,13 @@ typedef struct {
|
|
|
2448
2703
|
* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
|
|
2449
2704
|
*/
|
|
2450
2705
|
static ZSTD_symbolEncodingTypeStats_t
|
|
2451
|
-
ZSTD_buildSequencesStatistics(
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2706
|
+
ZSTD_buildSequencesStatistics(
|
|
2707
|
+
const seqStore_t* seqStorePtr, size_t nbSeq,
|
|
2708
|
+
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
|
|
2709
|
+
BYTE* dst, const BYTE* const dstEnd,
|
|
2710
|
+
ZSTD_strategy strategy, unsigned* countWorkspace,
|
|
2711
|
+
void* entropyWorkspace, size_t entropyWkspSize)
|
|
2712
|
+
{
|
|
2456
2713
|
BYTE* const ostart = dst;
|
|
2457
2714
|
const BYTE* const oend = dstEnd;
|
|
2458
2715
|
BYTE* op = ostart;
|
|
@@ -2466,7 +2723,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
|
2466
2723
|
|
|
2467
2724
|
stats.lastCountSize = 0;
|
|
2468
2725
|
/* convert length/distances into codes */
|
|
2469
|
-
ZSTD_seqToCodes(seqStorePtr);
|
|
2726
|
+
stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
|
|
2470
2727
|
assert(op <= oend);
|
|
2471
2728
|
assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
|
|
2472
2729
|
/* build CTable for Literal Lengths */
|
|
@@ -2571,22 +2828,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
|
2571
2828
|
*/
|
|
2572
2829
|
#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
|
|
2573
2830
|
MEM_STATIC size_t
|
|
2574
|
-
ZSTD_entropyCompressSeqStore_internal(
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2831
|
+
ZSTD_entropyCompressSeqStore_internal(
|
|
2832
|
+
const seqStore_t* seqStorePtr,
|
|
2833
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
2834
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
2835
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
2836
|
+
void* dst, size_t dstCapacity,
|
|
2837
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
|
2838
|
+
const int bmi2)
|
|
2581
2839
|
{
|
|
2582
|
-
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
|
2583
2840
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
2584
2841
|
unsigned* count = (unsigned*)entropyWorkspace;
|
|
2585
2842
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
|
2586
2843
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
|
2587
2844
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
|
2588
2845
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
|
2589
|
-
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
2846
|
+
const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2590
2847
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
2591
2848
|
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
2592
2849
|
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
@@ -2594,29 +2851,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2594
2851
|
BYTE* const oend = ostart + dstCapacity;
|
|
2595
2852
|
BYTE* op = ostart;
|
|
2596
2853
|
size_t lastCountSize;
|
|
2854
|
+
int longOffsets = 0;
|
|
2597
2855
|
|
|
2598
2856
|
entropyWorkspace = count + (MaxSeq + 1);
|
|
2599
2857
|
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
|
2600
2858
|
|
|
2601
|
-
DEBUGLOG(
|
|
2859
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
|
|
2602
2860
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
|
2603
2861
|
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
|
2604
2862
|
|
|
2605
2863
|
/* Compress literals */
|
|
2606
2864
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
|
2607
|
-
size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
2608
|
-
size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
|
|
2865
|
+
size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2866
|
+
size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
|
2609
2867
|
/* Base suspicion of uncompressibility on ratio of literals to sequences */
|
|
2610
2868
|
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
|
|
2611
2869
|
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
|
|
2870
|
+
|
|
2612
2871
|
size_t const cSize = ZSTD_compressLiterals(
|
|
2613
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
|
2614
|
-
cctxParams->cParams.strategy,
|
|
2615
|
-
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
2616
2872
|
op, dstCapacity,
|
|
2617
2873
|
literals, litSize,
|
|
2618
2874
|
entropyWorkspace, entropyWkspSize,
|
|
2619
|
-
|
|
2875
|
+
&prevEntropy->huf, &nextEntropy->huf,
|
|
2876
|
+
cctxParams->cParams.strategy,
|
|
2877
|
+
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
2878
|
+
suspectUncompressible, bmi2);
|
|
2620
2879
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
|
2621
2880
|
assert(cSize <= dstCapacity);
|
|
2622
2881
|
op += cSize;
|
|
@@ -2642,11 +2901,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2642
2901
|
ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
|
2643
2902
|
return (size_t)(op - ostart);
|
|
2644
2903
|
}
|
|
2645
|
-
{
|
|
2646
|
-
ZSTD_symbolEncodingTypeStats_t stats;
|
|
2647
|
-
BYTE* seqHead = op++;
|
|
2904
|
+
{ BYTE* const seqHead = op++;
|
|
2648
2905
|
/* build stats for sequences */
|
|
2649
|
-
stats =
|
|
2906
|
+
const ZSTD_symbolEncodingTypeStats_t stats =
|
|
2907
|
+
ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
|
|
2650
2908
|
&prevEntropy->fse, &nextEntropy->fse,
|
|
2651
2909
|
op, oend,
|
|
2652
2910
|
strategy, count,
|
|
@@ -2655,6 +2913,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2655
2913
|
*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
|
|
2656
2914
|
lastCountSize = stats.lastCountSize;
|
|
2657
2915
|
op += stats.size;
|
|
2916
|
+
longOffsets = stats.longOffsets;
|
|
2658
2917
|
}
|
|
2659
2918
|
|
|
2660
2919
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
|
@@ -2689,14 +2948,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2689
2948
|
}
|
|
2690
2949
|
|
|
2691
2950
|
MEM_STATIC size_t
|
|
2692
|
-
ZSTD_entropyCompressSeqStore(
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2951
|
+
ZSTD_entropyCompressSeqStore(
|
|
2952
|
+
const seqStore_t* seqStorePtr,
|
|
2953
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
2954
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
2955
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
2956
|
+
void* dst, size_t dstCapacity,
|
|
2957
|
+
size_t srcSize,
|
|
2958
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
|
2959
|
+
int bmi2)
|
|
2700
2960
|
{
|
|
2701
2961
|
size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
|
|
2702
2962
|
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
|
|
@@ -2706,15 +2966,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
|
|
|
2706
2966
|
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
|
|
2707
2967
|
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
|
|
2708
2968
|
*/
|
|
2709
|
-
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
|
|
2969
|
+
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {
|
|
2970
|
+
DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
|
|
2710
2971
|
return 0; /* block not compressed */
|
|
2972
|
+
}
|
|
2711
2973
|
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
|
|
2712
2974
|
|
|
2713
2975
|
/* Check compressibility */
|
|
2714
2976
|
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
|
|
2715
2977
|
if (cSize >= maxCSize) return 0; /* block not compressed */
|
|
2716
2978
|
}
|
|
2717
|
-
DEBUGLOG(
|
|
2979
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
|
|
2980
|
+
/* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
|
|
2981
|
+
* This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
|
|
2982
|
+
*/
|
|
2983
|
+
assert(cSize < ZSTD_BLOCKSIZE_MAX);
|
|
2718
2984
|
return cSize;
|
|
2719
2985
|
}
|
|
2720
2986
|
|
|
@@ -2809,6 +3075,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
|
|
2809
3075
|
ssPtr->longLengthType = ZSTD_llt_none;
|
|
2810
3076
|
}
|
|
2811
3077
|
|
|
3078
|
+
/* ZSTD_postProcessSequenceProducerResult() :
|
|
3079
|
+
* Validates and post-processes sequences obtained through the external matchfinder API:
|
|
3080
|
+
* - Checks whether nbExternalSeqs represents an error condition.
|
|
3081
|
+
* - Appends a block delimiter to outSeqs if one is not already present.
|
|
3082
|
+
* See zstd.h for context regarding block delimiters.
|
|
3083
|
+
* Returns the number of sequences after post-processing, or an error code. */
|
|
3084
|
+
static size_t ZSTD_postProcessSequenceProducerResult(
|
|
3085
|
+
ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
|
|
3086
|
+
) {
|
|
3087
|
+
RETURN_ERROR_IF(
|
|
3088
|
+
nbExternalSeqs > outSeqsCapacity,
|
|
3089
|
+
sequenceProducer_failed,
|
|
3090
|
+
"External sequence producer returned error code %lu",
|
|
3091
|
+
(unsigned long)nbExternalSeqs
|
|
3092
|
+
);
|
|
3093
|
+
|
|
3094
|
+
RETURN_ERROR_IF(
|
|
3095
|
+
nbExternalSeqs == 0 && srcSize > 0,
|
|
3096
|
+
sequenceProducer_failed,
|
|
3097
|
+
"Got zero sequences from external sequence producer for a non-empty src buffer!"
|
|
3098
|
+
);
|
|
3099
|
+
|
|
3100
|
+
if (srcSize == 0) {
|
|
3101
|
+
ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
|
|
3102
|
+
return 1;
|
|
3103
|
+
}
|
|
3104
|
+
|
|
3105
|
+
{
|
|
3106
|
+
ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
|
|
3107
|
+
|
|
3108
|
+
/* We can return early if lastSeq is already a block delimiter. */
|
|
3109
|
+
if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
|
|
3110
|
+
return nbExternalSeqs;
|
|
3111
|
+
}
|
|
3112
|
+
|
|
3113
|
+
/* This error condition is only possible if the external matchfinder
|
|
3114
|
+
* produced an invalid parse, by definition of ZSTD_sequenceBound(). */
|
|
3115
|
+
RETURN_ERROR_IF(
|
|
3116
|
+
nbExternalSeqs == outSeqsCapacity,
|
|
3117
|
+
sequenceProducer_failed,
|
|
3118
|
+
"nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
|
|
3119
|
+
);
|
|
3120
|
+
|
|
3121
|
+
/* lastSeq is not a block delimiter, so we need to append one. */
|
|
3122
|
+
ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
|
|
3123
|
+
return nbExternalSeqs + 1;
|
|
3124
|
+
}
|
|
3125
|
+
}
|
|
3126
|
+
|
|
3127
|
+
/* ZSTD_fastSequenceLengthSum() :
|
|
3128
|
+
* Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
|
|
3129
|
+
* Similar to another function in zstd_compress.c (determine_blockSize),
|
|
3130
|
+
* except it doesn't check for a block delimiter to end summation.
|
|
3131
|
+
* Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
|
|
3132
|
+
* This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
|
|
3133
|
+
static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
|
|
3134
|
+
size_t matchLenSum, litLenSum, i;
|
|
3135
|
+
matchLenSum = 0;
|
|
3136
|
+
litLenSum = 0;
|
|
3137
|
+
for (i = 0; i < seqBufSize; i++) {
|
|
3138
|
+
litLenSum += seqBuf[i].litLength;
|
|
3139
|
+
matchLenSum += seqBuf[i].matchLength;
|
|
3140
|
+
}
|
|
3141
|
+
return litLenSum + matchLenSum;
|
|
3142
|
+
}
|
|
3143
|
+
|
|
2812
3144
|
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
|
|
2813
3145
|
|
|
2814
3146
|
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
@@ -2818,7 +3150,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2818
3150
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
|
2819
3151
|
/* Assert that we have correctly flushed the ctx params into the ms's copy */
|
|
2820
3152
|
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
|
|
2821
|
-
|
|
3153
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
3154
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
3155
|
+
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
|
2822
3156
|
if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
|
|
2823
3157
|
ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
|
|
2824
3158
|
} else {
|
|
@@ -2854,6 +3188,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2854
3188
|
}
|
|
2855
3189
|
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
|
|
2856
3190
|
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
|
|
3191
|
+
|
|
3192
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
|
3193
|
+
* We need to revisit soon and implement it. */
|
|
3194
|
+
RETURN_ERROR_IF(
|
|
3195
|
+
zc->appliedParams.useSequenceProducer,
|
|
3196
|
+
parameter_combination_unsupported,
|
|
3197
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
|
3198
|
+
);
|
|
3199
|
+
|
|
2857
3200
|
/* Updates ldmSeqStore.pos */
|
|
2858
3201
|
lastLLSize =
|
|
2859
3202
|
ZSTD_ldm_blockCompress(&zc->externSeqStore,
|
|
@@ -2865,6 +3208,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2865
3208
|
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
2866
3209
|
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
|
|
2867
3210
|
|
|
3211
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
|
3212
|
+
* We need to revisit soon and implement it. */
|
|
3213
|
+
RETURN_ERROR_IF(
|
|
3214
|
+
zc->appliedParams.useSequenceProducer,
|
|
3215
|
+
parameter_combination_unsupported,
|
|
3216
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
|
3217
|
+
);
|
|
3218
|
+
|
|
2868
3219
|
ldmSeqStore.seq = zc->ldmSequences;
|
|
2869
3220
|
ldmSeqStore.capacity = zc->maxNbLdmSequences;
|
|
2870
3221
|
/* Updates ldmSeqStore.size */
|
|
@@ -2879,7 +3230,68 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2879
3230
|
zc->appliedParams.useRowMatchFinder,
|
|
2880
3231
|
src, srcSize);
|
|
2881
3232
|
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
|
2882
|
-
} else
|
|
3233
|
+
} else if (zc->appliedParams.useSequenceProducer) {
|
|
3234
|
+
assert(
|
|
3235
|
+
zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
|
|
3236
|
+
);
|
|
3237
|
+
assert(zc->externalMatchCtx.mFinder != NULL);
|
|
3238
|
+
|
|
3239
|
+
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
|
|
3240
|
+
|
|
3241
|
+
size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
|
|
3242
|
+
zc->externalMatchCtx.mState,
|
|
3243
|
+
zc->externalMatchCtx.seqBuffer,
|
|
3244
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
|
3245
|
+
src, srcSize,
|
|
3246
|
+
NULL, 0, /* dict and dictSize, currently not supported */
|
|
3247
|
+
zc->appliedParams.compressionLevel,
|
|
3248
|
+
windowSize
|
|
3249
|
+
);
|
|
3250
|
+
|
|
3251
|
+
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
|
|
3252
|
+
zc->externalMatchCtx.seqBuffer,
|
|
3253
|
+
nbExternalSeqs,
|
|
3254
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
|
3255
|
+
srcSize
|
|
3256
|
+
);
|
|
3257
|
+
|
|
3258
|
+
/* Return early if there is no error, since we don't need to worry about last literals */
|
|
3259
|
+
if (!ZSTD_isError(nbPostProcessedSeqs)) {
|
|
3260
|
+
ZSTD_sequencePosition seqPos = {0,0,0};
|
|
3261
|
+
size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
|
|
3262
|
+
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
|
|
3263
|
+
FORWARD_IF_ERROR(
|
|
3264
|
+
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
|
|
3265
|
+
zc, &seqPos,
|
|
3266
|
+
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
|
|
3267
|
+
src, srcSize,
|
|
3268
|
+
zc->appliedParams.searchForExternalRepcodes
|
|
3269
|
+
),
|
|
3270
|
+
"Failed to copy external sequences to seqStore!"
|
|
3271
|
+
);
|
|
3272
|
+
ms->ldmSeqStore = NULL;
|
|
3273
|
+
DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
|
|
3274
|
+
return ZSTDbss_compress;
|
|
3275
|
+
}
|
|
3276
|
+
|
|
3277
|
+
/* Propagate the error if fallback is disabled */
|
|
3278
|
+
if (!zc->appliedParams.enableMatchFinderFallback) {
|
|
3279
|
+
return nbPostProcessedSeqs;
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
/* Fallback to software matchfinder */
|
|
3283
|
+
{ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
|
3284
|
+
zc->appliedParams.useRowMatchFinder,
|
|
3285
|
+
dictMode);
|
|
3286
|
+
ms->ldmSeqStore = NULL;
|
|
3287
|
+
DEBUGLOG(
|
|
3288
|
+
5,
|
|
3289
|
+
"External sequence producer returned error code %lu. Falling back to internal parser.",
|
|
3290
|
+
(unsigned long)nbExternalSeqs
|
|
3291
|
+
);
|
|
3292
|
+
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
|
|
3293
|
+
} }
|
|
3294
|
+
} else { /* not long range mode and no external matchfinder */
|
|
2883
3295
|
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
|
2884
3296
|
zc->appliedParams.useRowMatchFinder,
|
|
2885
3297
|
dictMode);
|
|
@@ -2910,9 +3322,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2910
3322
|
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
|
|
2911
3323
|
ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
2912
3324
|
for (i = 0; i < seqStoreSeqSize; ++i) {
|
|
2913
|
-
U32 rawOffset = seqStoreSeqs[i].
|
|
3325
|
+
U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
|
|
2914
3326
|
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
|
2915
|
-
outSeqs[i].matchLength = seqStoreSeqs[i].
|
|
3327
|
+
outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
|
|
2916
3328
|
outSeqs[i].rep = 0;
|
|
2917
3329
|
|
|
2918
3330
|
if (i == seqStore->longLengthPos) {
|
|
@@ -2923,9 +3335,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2923
3335
|
}
|
|
2924
3336
|
}
|
|
2925
3337
|
|
|
2926
|
-
if (seqStoreSeqs[i].
|
|
3338
|
+
if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
|
|
2927
3339
|
/* Derive the correct offset corresponding to a repcode */
|
|
2928
|
-
outSeqs[i].rep = seqStoreSeqs[i].
|
|
3340
|
+
outSeqs[i].rep = seqStoreSeqs[i].offBase;
|
|
2929
3341
|
if (outSeqs[i].litLength != 0) {
|
|
2930
3342
|
rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
|
|
2931
3343
|
} else {
|
|
@@ -2939,9 +3351,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2939
3351
|
outSeqs[i].offset = rawOffset;
|
|
2940
3352
|
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
|
|
2941
3353
|
so we provide seqStoreSeqs[i].offset - 1 */
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
3354
|
+
ZSTD_updateRep(updatedRepcodes.rep,
|
|
3355
|
+
seqStoreSeqs[i].offBase,
|
|
3356
|
+
seqStoreSeqs[i].litLength == 0);
|
|
2945
3357
|
literalsRead += outSeqs[i].litLength;
|
|
2946
3358
|
}
|
|
2947
3359
|
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
|
|
@@ -2956,6 +3368,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2956
3368
|
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
|
2957
3369
|
}
|
|
2958
3370
|
|
|
3371
|
+
size_t ZSTD_sequenceBound(size_t srcSize) {
|
|
3372
|
+
return (srcSize / ZSTD_MINMATCH_MIN) + 1;
|
|
3373
|
+
}
|
|
3374
|
+
|
|
2959
3375
|
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|
2960
3376
|
size_t outSeqsSize, const void* src, size_t srcSize)
|
|
2961
3377
|
{
|
|
@@ -3001,19 +3417,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) {
|
|
|
3001
3417
|
const size_t unrollMask = unrollSize - 1;
|
|
3002
3418
|
const size_t prefixLength = length & unrollMask;
|
|
3003
3419
|
size_t i;
|
|
3004
|
-
size_t u;
|
|
3005
3420
|
if (length == 1) return 1;
|
|
3006
3421
|
/* Check if prefix is RLE first before using unrolled loop */
|
|
3007
3422
|
if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
|
|
3008
3423
|
return 0;
|
|
3009
3424
|
}
|
|
3010
3425
|
for (i = prefixLength; i != length; i += unrollSize) {
|
|
3426
|
+
size_t u;
|
|
3011
3427
|
for (u = 0; u < unrollSize; u += sizeof(size_t)) {
|
|
3012
3428
|
if (MEM_readST(ip + i + u) != valueST) {
|
|
3013
3429
|
return 0;
|
|
3014
|
-
|
|
3015
|
-
}
|
|
3016
|
-
}
|
|
3430
|
+
} } }
|
|
3017
3431
|
return 1;
|
|
3018
3432
|
}
|
|
3019
3433
|
|
|
@@ -3029,7 +3443,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
|
|
|
3029
3443
|
return nbSeqs < 4 && nbLits < 10;
|
|
3030
3444
|
}
|
|
3031
3445
|
|
|
3032
|
-
static void
|
|
3446
|
+
static void
|
|
3447
|
+
ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
|
|
3033
3448
|
{
|
|
3034
3449
|
ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
|
|
3035
3450
|
bs->prevCBlock = bs->nextCBlock;
|
|
@@ -3037,7 +3452,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c
|
|
|
3037
3452
|
}
|
|
3038
3453
|
|
|
3039
3454
|
/* Writes the block header */
|
|
3040
|
-
static void
|
|
3455
|
+
static void
|
|
3456
|
+
writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
|
|
3457
|
+
{
|
|
3041
3458
|
U32 const cBlockHeader = cSize == 1 ?
|
|
3042
3459
|
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
|
3043
3460
|
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
|
@@ -3050,13 +3467,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
|
|
|
3050
3467
|
* Stores literals block type (raw, rle, compressed, repeat) and
|
|
3051
3468
|
* huffman description table to hufMetadata.
|
|
3052
3469
|
* Requires ENTROPY_WORKSPACE_SIZE workspace
|
|
3053
|
-
*
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3470
|
+
* @return : size of huffman description table, or an error code
|
|
3471
|
+
*/
|
|
3472
|
+
static size_t
|
|
3473
|
+
ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
|
|
3474
|
+
const ZSTD_hufCTables_t* prevHuf,
|
|
3475
|
+
ZSTD_hufCTables_t* nextHuf,
|
|
3476
|
+
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
3477
|
+
const int literalsCompressionIsDisabled,
|
|
3478
|
+
void* workspace, size_t wkspSize,
|
|
3479
|
+
int hufFlags)
|
|
3060
3480
|
{
|
|
3061
3481
|
BYTE* const wkspStart = (BYTE*)workspace;
|
|
3062
3482
|
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
@@ -3064,9 +3484,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3064
3484
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3065
3485
|
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
|
3066
3486
|
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
|
3067
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
|
3487
|
+
const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);
|
|
3068
3488
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
|
3069
|
-
unsigned huffLog =
|
|
3489
|
+
unsigned huffLog = LitHufLog;
|
|
3070
3490
|
HUF_repeat repeat = prevHuf->repeatMode;
|
|
3071
3491
|
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
|
|
3072
3492
|
|
|
@@ -3081,73 +3501,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3081
3501
|
|
|
3082
3502
|
/* small ? don't even attempt compression (speed opt) */
|
|
3083
3503
|
#ifndef COMPRESS_LITERALS_SIZE_MIN
|
|
3084
|
-
#define COMPRESS_LITERALS_SIZE_MIN 63
|
|
3504
|
+
# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */
|
|
3085
3505
|
#endif
|
|
3086
3506
|
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
|
3087
3507
|
if (srcSize <= minLitSize) {
|
|
3088
3508
|
DEBUGLOG(5, "set_basic - too small");
|
|
3089
3509
|
hufMetadata->hType = set_basic;
|
|
3090
3510
|
return 0;
|
|
3091
|
-
|
|
3092
|
-
}
|
|
3511
|
+
} }
|
|
3093
3512
|
|
|
3094
3513
|
/* Scan input and build symbol stats */
|
|
3095
|
-
{ size_t const largest =
|
|
3514
|
+
{ size_t const largest =
|
|
3515
|
+
HIST_count_wksp (countWksp, &maxSymbolValue,
|
|
3516
|
+
(const BYTE*)src, srcSize,
|
|
3517
|
+
workspace, wkspSize);
|
|
3096
3518
|
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
|
3097
3519
|
if (largest == srcSize) {
|
|
3520
|
+
/* only one literal symbol */
|
|
3098
3521
|
DEBUGLOG(5, "set_rle");
|
|
3099
3522
|
hufMetadata->hType = set_rle;
|
|
3100
3523
|
return 0;
|
|
3101
3524
|
}
|
|
3102
3525
|
if (largest <= (srcSize >> 7)+4) {
|
|
3526
|
+
/* heuristic: likely not compressible */
|
|
3103
3527
|
DEBUGLOG(5, "set_basic - no gain");
|
|
3104
3528
|
hufMetadata->hType = set_basic;
|
|
3105
3529
|
return 0;
|
|
3106
|
-
|
|
3107
|
-
}
|
|
3530
|
+
} }
|
|
3108
3531
|
|
|
3109
3532
|
/* Validate the previous Huffman table */
|
|
3110
|
-
if (repeat == HUF_repeat_check
|
|
3533
|
+
if (repeat == HUF_repeat_check
|
|
3534
|
+
&& !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
|
3111
3535
|
repeat = HUF_repeat_none;
|
|
3112
3536
|
}
|
|
3113
3537
|
|
|
3114
3538
|
/* Build Huffman Tree */
|
|
3115
3539
|
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
|
3116
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
3540
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);
|
|
3541
|
+
assert(huffLog <= LitHufLog);
|
|
3117
3542
|
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
|
3118
3543
|
maxSymbolValue, huffLog,
|
|
3119
3544
|
nodeWksp, nodeWkspSize);
|
|
3120
3545
|
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
|
3121
3546
|
huffLog = (U32)maxBits;
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
hufMetadata->hType = set_repeat;
|
|
3137
|
-
return 0;
|
|
3138
|
-
}
|
|
3139
|
-
}
|
|
3140
|
-
if (newCSize + hSize >= srcSize) {
|
|
3141
|
-
DEBUGLOG(5, "set_basic - no gains");
|
|
3547
|
+
}
|
|
3548
|
+
{ /* Build and write the CTable */
|
|
3549
|
+
size_t const newCSize = HUF_estimateCompressedSize(
|
|
3550
|
+
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
|
3551
|
+
size_t const hSize = HUF_writeCTable_wksp(
|
|
3552
|
+
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
|
3553
|
+
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
|
|
3554
|
+
nodeWksp, nodeWkspSize);
|
|
3555
|
+
/* Check against repeating the previous CTable */
|
|
3556
|
+
if (repeat != HUF_repeat_none) {
|
|
3557
|
+
size_t const oldCSize = HUF_estimateCompressedSize(
|
|
3558
|
+
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
|
3559
|
+
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
|
3560
|
+
DEBUGLOG(5, "set_repeat - smaller");
|
|
3142
3561
|
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
3143
|
-
hufMetadata->hType =
|
|
3562
|
+
hufMetadata->hType = set_repeat;
|
|
3144
3563
|
return 0;
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
nextHuf
|
|
3149
|
-
|
|
3564
|
+
} }
|
|
3565
|
+
if (newCSize + hSize >= srcSize) {
|
|
3566
|
+
DEBUGLOG(5, "set_basic - no gains");
|
|
3567
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
3568
|
+
hufMetadata->hType = set_basic;
|
|
3569
|
+
return 0;
|
|
3150
3570
|
}
|
|
3571
|
+
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
|
3572
|
+
hufMetadata->hType = set_compressed;
|
|
3573
|
+
nextHuf->repeatMode = HUF_repeat_check;
|
|
3574
|
+
return hSize;
|
|
3151
3575
|
}
|
|
3152
3576
|
}
|
|
3153
3577
|
|
|
@@ -3157,8 +3581,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3157
3581
|
* and updates nextEntropy to the appropriate repeatMode.
|
|
3158
3582
|
*/
|
|
3159
3583
|
static ZSTD_symbolEncodingTypeStats_t
|
|
3160
|
-
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
|
3161
|
-
|
|
3584
|
+
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
|
3585
|
+
{
|
|
3586
|
+
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
|
|
3162
3587
|
nextEntropy->litlength_repeatMode = FSE_repeat_none;
|
|
3163
3588
|
nextEntropy->offcode_repeatMode = FSE_repeat_none;
|
|
3164
3589
|
nextEntropy->matchlength_repeatMode = FSE_repeat_none;
|
|
@@ -3169,16 +3594,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
|
|
|
3169
3594
|
* Builds entropy for the sequences.
|
|
3170
3595
|
* Stores symbol compression modes and fse table to fseMetadata.
|
|
3171
3596
|
* Requires ENTROPY_WORKSPACE_SIZE wksp.
|
|
3172
|
-
*
|
|
3173
|
-
static size_t
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3597
|
+
* @return : size of fse tables or error code */
|
|
3598
|
+
static size_t
|
|
3599
|
+
ZSTD_buildBlockEntropyStats_sequences(
|
|
3600
|
+
const seqStore_t* seqStorePtr,
|
|
3601
|
+
const ZSTD_fseCTables_t* prevEntropy,
|
|
3602
|
+
ZSTD_fseCTables_t* nextEntropy,
|
|
3603
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
3604
|
+
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
3605
|
+
void* workspace, size_t wkspSize)
|
|
3179
3606
|
{
|
|
3180
3607
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
3181
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
3608
|
+
size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
3182
3609
|
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
|
3183
3610
|
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
|
3184
3611
|
BYTE* op = ostart;
|
|
@@ -3205,23 +3632,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
|
|
3205
3632
|
/** ZSTD_buildBlockEntropyStats() :
|
|
3206
3633
|
* Builds entropy for the block.
|
|
3207
3634
|
* Requires workspace size ENTROPY_WORKSPACE_SIZE
|
|
3208
|
-
*
|
|
3209
|
-
*
|
|
3635
|
+
* @return : 0 on success, or an error code
|
|
3636
|
+
* Note : also employed in superblock
|
|
3210
3637
|
*/
|
|
3211
|
-
size_t ZSTD_buildBlockEntropyStats(
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3638
|
+
size_t ZSTD_buildBlockEntropyStats(
|
|
3639
|
+
const seqStore_t* seqStorePtr,
|
|
3640
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
3641
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
3642
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
3643
|
+
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
3644
|
+
void* workspace, size_t wkspSize)
|
|
3645
|
+
{
|
|
3646
|
+
size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
|
3647
|
+
int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
|
|
3648
|
+
int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;
|
|
3649
|
+
|
|
3219
3650
|
entropyMetadata->hufMetadata.hufDesSize =
|
|
3220
3651
|
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
|
|
3221
3652
|
&prevEntropy->huf, &nextEntropy->huf,
|
|
3222
3653
|
&entropyMetadata->hufMetadata,
|
|
3223
3654
|
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
3224
|
-
workspace, wkspSize);
|
|
3655
|
+
workspace, wkspSize, hufFlags);
|
|
3656
|
+
|
|
3225
3657
|
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
|
|
3226
3658
|
entropyMetadata->fseMetadata.fseTablesSize =
|
|
3227
3659
|
ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
|
|
@@ -3234,11 +3666,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|
|
3234
3666
|
}
|
|
3235
3667
|
|
|
3236
3668
|
/* Returns the size estimate for the literals section (header + content) of a block */
|
|
3237
|
-
static size_t
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3669
|
+
static size_t
|
|
3670
|
+
ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
|
|
3671
|
+
const ZSTD_hufCTables_t* huf,
|
|
3672
|
+
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
3673
|
+
void* workspace, size_t wkspSize,
|
|
3674
|
+
int writeEntropy)
|
|
3242
3675
|
{
|
|
3243
3676
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3244
3677
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
|
@@ -3260,12 +3693,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
|
|
|
3260
3693
|
}
|
|
3261
3694
|
|
|
3262
3695
|
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
|
|
3263
|
-
static size_t
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3696
|
+
static size_t
|
|
3697
|
+
ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
|
3698
|
+
const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
|
|
3699
|
+
const FSE_CTable* fseCTable,
|
|
3700
|
+
const U8* additionalBits,
|
|
3701
|
+
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
3702
|
+
void* workspace, size_t wkspSize)
|
|
3269
3703
|
{
|
|
3270
3704
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3271
3705
|
const BYTE* ctp = codeTable;
|
|
@@ -3297,99 +3731,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
|
|
3297
3731
|
}
|
|
3298
3732
|
|
|
3299
3733
|
/* Returns the size estimate for the sequences section (header + content) of a block */
|
|
3300
|
-
static size_t
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3734
|
+
static size_t
|
|
3735
|
+
ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
|
|
3736
|
+
const BYTE* llCodeTable,
|
|
3737
|
+
const BYTE* mlCodeTable,
|
|
3738
|
+
size_t nbSeq,
|
|
3739
|
+
const ZSTD_fseCTables_t* fseTables,
|
|
3740
|
+
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
3741
|
+
void* workspace, size_t wkspSize,
|
|
3742
|
+
int writeEntropy)
|
|
3308
3743
|
{
|
|
3309
3744
|
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
|
|
3310
3745
|
size_t cSeqSizeEstimate = 0;
|
|
3311
3746
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3747
|
+
fseTables->offcodeCTable, NULL,
|
|
3748
|
+
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
3749
|
+
workspace, wkspSize);
|
|
3315
3750
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3751
|
+
fseTables->litlengthCTable, LL_bits,
|
|
3752
|
+
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
3753
|
+
workspace, wkspSize);
|
|
3319
3754
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3755
|
+
fseTables->matchlengthCTable, ML_bits,
|
|
3756
|
+
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
3757
|
+
workspace, wkspSize);
|
|
3323
3758
|
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
|
3324
3759
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
|
3325
3760
|
}
|
|
3326
3761
|
|
|
3327
3762
|
/* Returns the size estimate for a given stream of literals, of, ll, ml */
|
|
3328
|
-
static size_t
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3763
|
+
static size_t
|
|
3764
|
+
ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
|
|
3765
|
+
const BYTE* ofCodeTable,
|
|
3766
|
+
const BYTE* llCodeTable,
|
|
3767
|
+
const BYTE* mlCodeTable,
|
|
3768
|
+
size_t nbSeq,
|
|
3769
|
+
const ZSTD_entropyCTables_t* entropy,
|
|
3770
|
+
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
3771
|
+
void* workspace, size_t wkspSize,
|
|
3772
|
+
int writeLitEntropy, int writeSeqEntropy)
|
|
3773
|
+
{
|
|
3337
3774
|
size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
|
|
3338
|
-
|
|
3339
|
-
|
|
3775
|
+
&entropy->huf, &entropyMetadata->hufMetadata,
|
|
3776
|
+
workspace, wkspSize, writeLitEntropy);
|
|
3340
3777
|
size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
|
3341
|
-
|
|
3342
|
-
|
|
3778
|
+
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
|
3779
|
+
workspace, wkspSize, writeSeqEntropy);
|
|
3343
3780
|
return seqSize + literalsSize + ZSTD_blockHeaderSize;
|
|
3344
3781
|
}
|
|
3345
3782
|
|
|
3346
3783
|
/* Builds entropy statistics and uses them for blocksize estimation.
|
|
3347
3784
|
*
|
|
3348
|
-
*
|
|
3785
|
+
* @return: estimated compressed size of the seqStore, or a zstd error.
|
|
3349
3786
|
*/
|
|
3350
|
-
static size_t
|
|
3351
|
-
|
|
3787
|
+
static size_t
|
|
3788
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
|
|
3789
|
+
{
|
|
3790
|
+
ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
|
|
3352
3791
|
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
|
3353
3792
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
|
3354
3793
|
&zc->blockState.prevCBlock->entropy,
|
|
3355
3794
|
&zc->blockState.nextCBlock->entropy,
|
|
3356
3795
|
&zc->appliedParams,
|
|
3357
3796
|
entropyMetadata,
|
|
3358
|
-
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE
|
|
3359
|
-
return ZSTD_estimateBlockSize(
|
|
3797
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
|
|
3798
|
+
return ZSTD_estimateBlockSize(
|
|
3799
|
+
seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
|
3360
3800
|
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
|
3361
3801
|
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
|
3362
|
-
&zc->blockState.nextCBlock->entropy,
|
|
3802
|
+
&zc->blockState.nextCBlock->entropy,
|
|
3803
|
+
entropyMetadata,
|
|
3804
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
|
3363
3805
|
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
|
|
3364
3806
|
}
|
|
3365
3807
|
|
|
3366
3808
|
/* Returns literals bytes represented in a seqStore */
|
|
3367
|
-
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
|
3809
|
+
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
|
3810
|
+
{
|
|
3368
3811
|
size_t literalsBytes = 0;
|
|
3369
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
|
3812
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
|
3370
3813
|
size_t i;
|
|
3371
3814
|
for (i = 0; i < nbSeqs; ++i) {
|
|
3372
|
-
seqDef seq = seqStore->sequencesStart[i];
|
|
3815
|
+
seqDef const seq = seqStore->sequencesStart[i];
|
|
3373
3816
|
literalsBytes += seq.litLength;
|
|
3374
3817
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
|
|
3375
3818
|
literalsBytes += 0x10000;
|
|
3376
|
-
|
|
3377
|
-
}
|
|
3819
|
+
} }
|
|
3378
3820
|
return literalsBytes;
|
|
3379
3821
|
}
|
|
3380
3822
|
|
|
3381
3823
|
/* Returns match bytes represented in a seqStore */
|
|
3382
|
-
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
|
3824
|
+
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
|
3825
|
+
{
|
|
3383
3826
|
size_t matchBytes = 0;
|
|
3384
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
|
3827
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
|
3385
3828
|
size_t i;
|
|
3386
3829
|
for (i = 0; i < nbSeqs; ++i) {
|
|
3387
3830
|
seqDef seq = seqStore->sequencesStart[i];
|
|
3388
|
-
matchBytes += seq.
|
|
3831
|
+
matchBytes += seq.mlBase + MINMATCH;
|
|
3389
3832
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
|
|
3390
3833
|
matchBytes += 0x10000;
|
|
3391
|
-
|
|
3392
|
-
}
|
|
3834
|
+
} }
|
|
3393
3835
|
return matchBytes;
|
|
3394
3836
|
}
|
|
3395
3837
|
|
|
@@ -3398,15 +3840,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
|
|
|
3398
3840
|
*/
|
|
3399
3841
|
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
3400
3842
|
const seqStore_t* originalSeqStore,
|
|
3401
|
-
size_t startIdx, size_t endIdx)
|
|
3402
|
-
|
|
3403
|
-
size_t literalsBytes;
|
|
3404
|
-
size_t literalsBytesPreceding = 0;
|
|
3405
|
-
|
|
3843
|
+
size_t startIdx, size_t endIdx)
|
|
3844
|
+
{
|
|
3406
3845
|
*resultSeqStore = *originalSeqStore;
|
|
3407
3846
|
if (startIdx > 0) {
|
|
3408
3847
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
|
|
3409
|
-
|
|
3848
|
+
resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3410
3849
|
}
|
|
3411
3850
|
|
|
3412
3851
|
/* Move longLengthPos into the correct position if necessary */
|
|
@@ -3419,13 +3858,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
|
3419
3858
|
}
|
|
3420
3859
|
resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
|
|
3421
3860
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
|
|
3422
|
-
literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3423
|
-
resultSeqStore->litStart += literalsBytesPreceding;
|
|
3424
3861
|
if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
|
|
3425
3862
|
/* This accounts for possible last literals if the derived chunk reaches the end of the block */
|
|
3426
|
-
resultSeqStore->lit
|
|
3863
|
+
assert(resultSeqStore->lit == originalSeqStore->lit);
|
|
3427
3864
|
} else {
|
|
3428
|
-
|
|
3865
|
+
size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3866
|
+
resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;
|
|
3429
3867
|
}
|
|
3430
3868
|
resultSeqStore->llCode += startIdx;
|
|
3431
3869
|
resultSeqStore->mlCode += startIdx;
|
|
@@ -3433,52 +3871,68 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
|
3433
3871
|
}
|
|
3434
3872
|
|
|
3435
3873
|
/**
|
|
3436
|
-
* Returns the raw offset represented by the combination of
|
|
3437
|
-
*
|
|
3874
|
+
* Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
|
|
3875
|
+
* offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
|
|
3438
3876
|
*/
|
|
3439
|
-
static U32
|
|
3440
|
-
|
|
3441
|
-
|
|
3442
|
-
|
|
3443
|
-
|
|
3444
|
-
|
|
3877
|
+
static U32
|
|
3878
|
+
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
|
|
3879
|
+
{
|
|
3880
|
+
U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */
|
|
3881
|
+
assert(OFFBASE_IS_REPCODE(offBase));
|
|
3882
|
+
if (adjustedRepCode == ZSTD_REP_NUM) {
|
|
3883
|
+
assert(ll0);
|
|
3884
|
+
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1
|
|
3885
|
+
* This is only valid if it results in a valid offset value, aka > 0.
|
|
3886
|
+
* Note : it may happen that `rep[0]==1` in exceptional circumstances.
|
|
3887
|
+
* In which case this function will return 0, which is an invalid offset.
|
|
3888
|
+
* It's not an issue though, since this value will be
|
|
3889
|
+
* compared and discarded within ZSTD_seqStore_resolveOffCodes().
|
|
3890
|
+
*/
|
|
3445
3891
|
return rep[0] - 1;
|
|
3446
3892
|
}
|
|
3447
|
-
return rep[
|
|
3893
|
+
return rep[adjustedRepCode];
|
|
3448
3894
|
}
|
|
3449
3895
|
|
|
3450
3896
|
/**
|
|
3451
3897
|
* ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
|
|
3452
|
-
* due to emission of RLE/raw blocks that disturb the offset history,
|
|
3453
|
-
* the seqStore that may be invalid.
|
|
3898
|
+
* due to emission of RLE/raw blocks that disturb the offset history,
|
|
3899
|
+
* and replaces any repcodes within the seqStore that may be invalid.
|
|
3454
3900
|
*
|
|
3455
|
-
* dRepcodes are updated as would be on the decompression side.
|
|
3456
|
-
* accordance with the seqStore.
|
|
3901
|
+
* dRepcodes are updated as would be on the decompression side.
|
|
3902
|
+
* cRepcodes are updated exactly in accordance with the seqStore.
|
|
3903
|
+
*
|
|
3904
|
+
* Note : this function assumes seq->offBase respects the following numbering scheme :
|
|
3905
|
+
* 0 : invalid
|
|
3906
|
+
* 1-3 : repcode 1-3
|
|
3907
|
+
* 4+ : real_offset+3
|
|
3457
3908
|
*/
|
|
3458
|
-
static void
|
|
3459
|
-
|
|
3909
|
+
static void
|
|
3910
|
+
ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
|
|
3911
|
+
const seqStore_t* const seqStore, U32 const nbSeq)
|
|
3912
|
+
{
|
|
3460
3913
|
U32 idx = 0;
|
|
3914
|
+
U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
|
|
3461
3915
|
for (; idx < nbSeq; ++idx) {
|
|
3462
3916
|
seqDef* const seq = seqStore->sequencesStart + idx;
|
|
3463
|
-
U32 const ll0 = (seq->litLength == 0);
|
|
3464
|
-
U32
|
|
3465
|
-
assert(
|
|
3466
|
-
if (
|
|
3467
|
-
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep,
|
|
3468
|
-
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep,
|
|
3917
|
+
U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
|
|
3918
|
+
U32 const offBase = seq->offBase;
|
|
3919
|
+
assert(offBase > 0);
|
|
3920
|
+
if (OFFBASE_IS_REPCODE(offBase)) {
|
|
3921
|
+
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
|
|
3922
|
+
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
|
|
3469
3923
|
/* Adjust simulated decompression repcode history if we come across a mismatch. Replace
|
|
3470
3924
|
* the repcode with the offset it actually references, determined by the compression
|
|
3471
3925
|
* repcode history.
|
|
3472
3926
|
*/
|
|
3473
3927
|
if (dRawOffset != cRawOffset) {
|
|
3474
|
-
seq->
|
|
3928
|
+
seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
|
|
3475
3929
|
}
|
|
3476
3930
|
}
|
|
3477
3931
|
/* Compression repcode history is always updated with values directly from the unmodified seqStore.
|
|
3478
3932
|
* Decompression repcode history may use modified seq->offset value taken from compression repcode history.
|
|
3479
3933
|
*/
|
|
3480
|
-
|
|
3481
|
-
|
|
3934
|
+
ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
|
|
3935
|
+
ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
|
|
3482
3936
|
}
|
|
3483
3937
|
}
|
|
3484
3938
|
|
|
@@ -3487,11 +3941,14 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
|
|
|
3487
3941
|
*
|
|
3488
3942
|
* Returns the total size of that block (including header) or a ZSTD error code.
|
|
3489
3943
|
*/
|
|
3490
|
-
static size_t
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3944
|
+
static size_t
|
|
3945
|
+
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
|
|
3946
|
+
const seqStore_t* const seqStore,
|
|
3947
|
+
repcodes_t* const dRep, repcodes_t* const cRep,
|
|
3948
|
+
void* dst, size_t dstCapacity,
|
|
3949
|
+
const void* src, size_t srcSize,
|
|
3950
|
+
U32 lastBlock, U32 isPartition)
|
|
3951
|
+
{
|
|
3495
3952
|
const U32 rleMaxLength = 25;
|
|
3496
3953
|
BYTE* op = (BYTE*)dst;
|
|
3497
3954
|
const BYTE* ip = (const BYTE*)src;
|
|
@@ -3500,6 +3957,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
|
|
|
3500
3957
|
|
|
3501
3958
|
/* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
|
|
3502
3959
|
repcodes_t const dRepOriginal = *dRep;
|
|
3960
|
+
DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
|
|
3503
3961
|
if (isPartition)
|
|
3504
3962
|
ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
|
|
3505
3963
|
|
|
@@ -3562,43 +4020,49 @@ typedef struct {
|
|
|
3562
4020
|
|
|
3563
4021
|
/* Helper function to perform the recursive search for block splits.
|
|
3564
4022
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
|
3565
|
-
* If advantageous to split, then we recurse down the two sub-blocks.
|
|
3566
|
-
* we do not recurse.
|
|
4023
|
+
* If advantageous to split, then we recurse down the two sub-blocks.
|
|
4024
|
+
* If not, or if an error occurred in estimation, then we do not recurse.
|
|
3567
4025
|
*
|
|
3568
|
-
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
|
4026
|
+
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
|
4027
|
+
* defined by MIN_SEQUENCES_BLOCK_SPLITTING.
|
|
3569
4028
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
|
3570
4029
|
* In practice, recursion depth usually doesn't go beyond 4.
|
|
3571
4030
|
*
|
|
3572
|
-
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
|
4031
|
+
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
|
4032
|
+
* At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
|
|
3573
4033
|
* maximum of 128 KB, this value is actually impossible to reach.
|
|
3574
4034
|
*/
|
|
3575
|
-
static void
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
seqStore_t*
|
|
4035
|
+
static void
|
|
4036
|
+
ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
|
4037
|
+
ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
|
|
4038
|
+
{
|
|
4039
|
+
seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
|
4040
|
+
seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
|
4041
|
+
seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
|
3580
4042
|
size_t estimatedOriginalSize;
|
|
3581
4043
|
size_t estimatedFirstHalfSize;
|
|
3582
4044
|
size_t estimatedSecondHalfSize;
|
|
3583
4045
|
size_t midIdx = (startIdx + endIdx)/2;
|
|
3584
4046
|
|
|
4047
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
|
4048
|
+
assert(endIdx >= startIdx);
|
|
3585
4049
|
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
|
|
3586
|
-
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
|
4050
|
+
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
|
|
3587
4051
|
return;
|
|
3588
4052
|
}
|
|
3589
|
-
DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
|
3590
4053
|
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
|
3591
4054
|
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
|
3592
4055
|
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
|
3593
4056
|
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
|
|
3594
4057
|
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
|
|
3595
4058
|
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
|
|
3596
|
-
DEBUGLOG(
|
|
4059
|
+
DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
|
3597
4060
|
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
|
3598
4061
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
|
3599
4062
|
return;
|
|
3600
4063
|
}
|
|
3601
4064
|
if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
|
|
4065
|
+
DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);
|
|
3602
4066
|
ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
|
|
3603
4067
|
splits->splitLocations[splits->idx] = (U32)midIdx;
|
|
3604
4068
|
splits->idx++;
|
|
@@ -3606,14 +4070,18 @@ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx
|
|
|
3606
4070
|
}
|
|
3607
4071
|
}
|
|
3608
4072
|
|
|
3609
|
-
/* Base recursive function.
|
|
4073
|
+
/* Base recursive function.
|
|
4074
|
+
* Populates a table with intra-block partition indices that can improve compression ratio.
|
|
3610
4075
|
*
|
|
3611
|
-
*
|
|
4076
|
+
* @return: number of splits made (which equals the size of the partition table - 1).
|
|
3612
4077
|
*/
|
|
3613
|
-
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
3614
|
-
|
|
4078
|
+
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
4079
|
+
{
|
|
4080
|
+
seqStoreSplits splits;
|
|
4081
|
+
splits.splitLocations = partitions;
|
|
4082
|
+
splits.idx = 0;
|
|
3615
4083
|
if (nbSeq <= 4) {
|
|
3616
|
-
DEBUGLOG(
|
|
4084
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
|
|
3617
4085
|
/* Refuse to try and split anything with less than 4 sequences */
|
|
3618
4086
|
return 0;
|
|
3619
4087
|
}
|
|
@@ -3628,17 +4096,21 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
|
3628
4096
|
*
|
|
3629
4097
|
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
|
|
3630
4098
|
*/
|
|
3631
|
-
static size_t
|
|
3632
|
-
|
|
4099
|
+
static size_t
|
|
4100
|
+
ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
|
4101
|
+
void* dst, size_t dstCapacity,
|
|
4102
|
+
const void* src, size_t blockSize,
|
|
4103
|
+
U32 lastBlock, U32 nbSeq)
|
|
4104
|
+
{
|
|
3633
4105
|
size_t cSize = 0;
|
|
3634
4106
|
const BYTE* ip = (const BYTE*)src;
|
|
3635
4107
|
BYTE* op = (BYTE*)dst;
|
|
3636
4108
|
size_t i = 0;
|
|
3637
4109
|
size_t srcBytesTotal = 0;
|
|
3638
|
-
U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
|
3639
|
-
seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
|
3640
|
-
seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
|
3641
|
-
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
|
4110
|
+
U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
|
4111
|
+
seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
|
4112
|
+
seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
|
4113
|
+
size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
|
3642
4114
|
|
|
3643
4115
|
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
|
3644
4116
|
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
|
@@ -3660,30 +4132,31 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
|
3660
4132
|
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
3661
4133
|
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
|
|
3662
4134
|
|
|
3663
|
-
DEBUGLOG(
|
|
4135
|
+
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
|
3664
4136
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
|
3665
4137
|
(unsigned)zc->blockState.matchState.nextToUpdate);
|
|
3666
4138
|
|
|
3667
4139
|
if (numSplits == 0) {
|
|
3668
|
-
size_t cSizeSingleBlock =
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3672
|
-
|
|
4140
|
+
size_t cSizeSingleBlock =
|
|
4141
|
+
ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
|
|
4142
|
+
&dRep, &cRep,
|
|
4143
|
+
op, dstCapacity,
|
|
4144
|
+
ip, blockSize,
|
|
4145
|
+
lastBlock, 0 /* isPartition */);
|
|
3673
4146
|
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
|
|
3674
4147
|
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
|
|
3675
|
-
assert(
|
|
4148
|
+
assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
|
|
4149
|
+
assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
|
|
3676
4150
|
return cSizeSingleBlock;
|
|
3677
4151
|
}
|
|
3678
4152
|
|
|
3679
4153
|
ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
|
|
3680
4154
|
for (i = 0; i <= numSplits; ++i) {
|
|
3681
|
-
size_t srcBytes;
|
|
3682
4155
|
size_t cSizeChunk;
|
|
3683
4156
|
U32 const lastPartition = (i == numSplits);
|
|
3684
4157
|
U32 lastBlockEntireSrc = 0;
|
|
3685
4158
|
|
|
3686
|
-
srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
|
4159
|
+
size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
|
3687
4160
|
srcBytesTotal += srcBytes;
|
|
3688
4161
|
if (lastPartition) {
|
|
3689
4162
|
/* This is the final partition, need to account for possible last literals */
|
|
@@ -3698,7 +4171,8 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
|
3698
4171
|
op, dstCapacity,
|
|
3699
4172
|
ip, srcBytes,
|
|
3700
4173
|
lastBlockEntireSrc, 1 /* isPartition */);
|
|
3701
|
-
DEBUGLOG(5, "Estimated size: %zu
|
|
4174
|
+
DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
|
|
4175
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
|
|
3702
4176
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
|
3703
4177
|
|
|
3704
4178
|
ip += srcBytes;
|
|
@@ -3706,20 +4180,20 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
|
3706
4180
|
dstCapacity -= cSizeChunk;
|
|
3707
4181
|
cSize += cSizeChunk;
|
|
3708
4182
|
*currSeqStore = *nextSeqStore;
|
|
3709
|
-
assert(cSizeChunk <=
|
|
4183
|
+
assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
|
|
3710
4184
|
}
|
|
3711
|
-
/* cRep and dRep may have diverged during the compression.
|
|
3712
|
-
* for the next block.
|
|
4185
|
+
/* cRep and dRep may have diverged during the compression.
|
|
4186
|
+
* If so, we use the dRep repcodes for the next block.
|
|
3713
4187
|
*/
|
|
3714
4188
|
ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
|
|
3715
4189
|
return cSize;
|
|
3716
4190
|
}
|
|
3717
4191
|
|
|
3718
|
-
static size_t
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
|
|
4192
|
+
static size_t
|
|
4193
|
+
ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
4194
|
+
void* dst, size_t dstCapacity,
|
|
4195
|
+
const void* src, size_t srcSize, U32 lastBlock)
|
|
4196
|
+
{
|
|
3723
4197
|
U32 nbSeq;
|
|
3724
4198
|
size_t cSize;
|
|
3725
4199
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
|
@@ -3730,7 +4204,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
|
3730
4204
|
if (bss == ZSTDbss_noCompress) {
|
|
3731
4205
|
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
|
3732
4206
|
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
|
3733
|
-
cSize = ZSTD_noCompressBlock(
|
|
4207
|
+
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
|
|
3734
4208
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
|
3735
4209
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
|
|
3736
4210
|
return cSize;
|
|
@@ -3743,13 +4217,14 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
|
3743
4217
|
return cSize;
|
|
3744
4218
|
}
|
|
3745
4219
|
|
|
3746
|
-
static size_t
|
|
3747
|
-
|
|
3748
|
-
|
|
4220
|
+
static size_t
|
|
4221
|
+
ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|
4222
|
+
void* dst, size_t dstCapacity,
|
|
4223
|
+
const void* src, size_t srcSize, U32 frame)
|
|
3749
4224
|
{
|
|
3750
|
-
/* This
|
|
3751
|
-
* This isn't the actual upper bound.
|
|
3752
|
-
* needs further investigation.
|
|
4225
|
+
/* This is an estimated upper bound for the length of an rle block.
|
|
4226
|
+
* This isn't the actual upper bound.
|
|
4227
|
+
* Finding the real threshold needs further investigation.
|
|
3753
4228
|
*/
|
|
3754
4229
|
const U32 rleMaxLength = 25;
|
|
3755
4230
|
size_t cSize;
|
|
@@ -3841,10 +4316,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
|
3841
4316
|
* * cSize >= blockBound(srcSize): We have expanded the block too much so
|
|
3842
4317
|
* emit an uncompressed block.
|
|
3843
4318
|
*/
|
|
3844
|
-
{
|
|
3845
|
-
|
|
4319
|
+
{ size_t const cSize =
|
|
4320
|
+
ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
|
|
3846
4321
|
if (cSize != ERROR(dstSize_tooSmall)) {
|
|
3847
|
-
size_t const maxCSize =
|
|
4322
|
+
size_t const maxCSize =
|
|
4323
|
+
srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
|
|
3848
4324
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
|
|
3849
4325
|
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
|
|
3850
4326
|
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
|
|
@@ -3852,7 +4328,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
|
3852
4328
|
}
|
|
3853
4329
|
}
|
|
3854
4330
|
}
|
|
3855
|
-
}
|
|
4331
|
+
} /* if (bss == ZSTDbss_compress)*/
|
|
3856
4332
|
|
|
3857
4333
|
DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
|
|
3858
4334
|
/* Superblock compression failed, attempt to emit a single no compress block.
|
|
@@ -3910,7 +4386,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
|
|
3910
4386
|
* All blocks will be terminated, all input will be consumed.
|
|
3911
4387
|
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
|
|
3912
4388
|
* Frame is supposed already started (header already produced)
|
|
3913
|
-
*
|
|
4389
|
+
* @return : compressed size, or an error code
|
|
3914
4390
|
*/
|
|
3915
4391
|
static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
3916
4392
|
void* dst, size_t dstCapacity,
|
|
@@ -3934,7 +4410,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
|
3934
4410
|
ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
|
|
3935
4411
|
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
|
|
3936
4412
|
|
|
3937
|
-
|
|
4413
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
4414
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
4415
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,
|
|
3938
4416
|
dstSize_tooSmall,
|
|
3939
4417
|
"not enough space to store compressed block");
|
|
3940
4418
|
if (remaining < blockSize) blockSize = remaining;
|
|
@@ -3973,7 +4451,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
|
3973
4451
|
MEM_writeLE24(op, cBlockHeader);
|
|
3974
4452
|
cSize += ZSTD_blockHeaderSize;
|
|
3975
4453
|
}
|
|
3976
|
-
}
|
|
4454
|
+
} /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
|
|
3977
4455
|
|
|
3978
4456
|
|
|
3979
4457
|
ip += blockSize;
|
|
@@ -4152,31 +4630,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
|
|
4152
4630
|
}
|
|
4153
4631
|
}
|
|
4154
4632
|
|
|
4155
|
-
size_t
|
|
4156
|
-
|
|
4157
|
-
|
|
4633
|
+
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
|
|
4634
|
+
void* dst, size_t dstCapacity,
|
|
4635
|
+
const void* src, size_t srcSize)
|
|
4158
4636
|
{
|
|
4159
4637
|
DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
|
|
4160
4638
|
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
|
|
4161
4639
|
}
|
|
4162
4640
|
|
|
4641
|
+
/* NOTE: Must just wrap ZSTD_compressContinue_public() */
|
|
4642
|
+
size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
|
|
4643
|
+
void* dst, size_t dstCapacity,
|
|
4644
|
+
const void* src, size_t srcSize)
|
|
4645
|
+
{
|
|
4646
|
+
return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
|
|
4647
|
+
}
|
|
4163
4648
|
|
|
4164
|
-
size_t
|
|
4649
|
+
static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
|
|
4165
4650
|
{
|
|
4166
4651
|
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
|
|
4167
4652
|
assert(!ZSTD_checkCParams(cParams));
|
|
4168
|
-
return MIN
|
|
4653
|
+
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
|
|
4169
4654
|
}
|
|
4170
4655
|
|
|
4171
|
-
|
|
4656
|
+
/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
|
|
4657
|
+
size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
|
|
4658
|
+
{
|
|
4659
|
+
return ZSTD_getBlockSize_deprecated(cctx);
|
|
4660
|
+
}
|
|
4661
|
+
|
|
4662
|
+
/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
|
|
4663
|
+
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
4172
4664
|
{
|
|
4173
4665
|
DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
|
|
4174
|
-
{ size_t const blockSizeMax =
|
|
4666
|
+
{ size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
|
|
4175
4667
|
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
|
|
4176
4668
|
|
|
4177
4669
|
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
|
|
4178
4670
|
}
|
|
4179
4671
|
|
|
4672
|
+
/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
|
|
4673
|
+
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
4674
|
+
{
|
|
4675
|
+
return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
|
|
4676
|
+
}
|
|
4677
|
+
|
|
4180
4678
|
/*! ZSTD_loadDictionaryContent() :
|
|
4181
4679
|
* @return : 0, or an error code
|
|
4182
4680
|
*/
|
|
@@ -4185,25 +4683,36 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
4185
4683
|
ZSTD_cwksp* ws,
|
|
4186
4684
|
ZSTD_CCtx_params const* params,
|
|
4187
4685
|
const void* src, size_t srcSize,
|
|
4188
|
-
ZSTD_dictTableLoadMethod_e dtlm
|
|
4686
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4687
|
+
ZSTD_tableFillPurpose_e tfp)
|
|
4189
4688
|
{
|
|
4190
4689
|
const BYTE* ip = (const BYTE*) src;
|
|
4191
4690
|
const BYTE* const iend = ip + srcSize;
|
|
4192
4691
|
int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
|
|
4193
4692
|
|
|
4194
|
-
/* Assert that
|
|
4693
|
+
/* Assert that the ms params match the params we're being given */
|
|
4195
4694
|
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
|
|
4196
4695
|
|
|
4197
|
-
|
|
4696
|
+
{ /* Ensure large dictionaries can't cause index overflow */
|
|
4697
|
+
|
|
4198
4698
|
/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
|
|
4199
4699
|
* Dictionaries right at the edge will immediately trigger overflow
|
|
4200
4700
|
* correction, but I don't want to insert extra constraints here.
|
|
4201
4701
|
*/
|
|
4202
|
-
U32
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
if (
|
|
4206
|
-
|
|
4702
|
+
U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
|
|
4703
|
+
|
|
4704
|
+
int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams);
|
|
4705
|
+
if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
|
|
4706
|
+
/* Some dictionary matchfinders in zstd use "short cache",
|
|
4707
|
+
* which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
|
|
4708
|
+
* CDict hashtable entry as a tag rather than as part of an index.
|
|
4709
|
+
* When short cache is used, we need to truncate the dictionary
|
|
4710
|
+
* so that its indices don't overlap with the tag. */
|
|
4711
|
+
U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
|
|
4712
|
+
maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
|
|
4713
|
+
assert(!loadLdmDict);
|
|
4714
|
+
}
|
|
4715
|
+
|
|
4207
4716
|
/* If the dictionary is too large, only load the suffix of the dictionary. */
|
|
4208
4717
|
if (srcSize > maxDictSize) {
|
|
4209
4718
|
ip = iend - maxDictSize;
|
|
@@ -4212,30 +4721,46 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
4212
4721
|
}
|
|
4213
4722
|
}
|
|
4214
4723
|
|
|
4215
|
-
|
|
4724
|
+
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
|
|
4725
|
+
/* We must have cleared our windows when our source is this large. */
|
|
4726
|
+
assert(ZSTD_window_isEmpty(ms->window));
|
|
4727
|
+
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
|
|
4728
|
+
}
|
|
4216
4729
|
ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
|
|
4217
|
-
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
|
|
4218
|
-
ms->forceNonContiguous = params->deterministicRefPrefix;
|
|
4219
4730
|
|
|
4220
|
-
|
|
4731
|
+
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
|
|
4732
|
+
|
|
4733
|
+
if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
|
|
4221
4734
|
ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
|
|
4222
4735
|
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
|
|
4736
|
+
ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);
|
|
4737
|
+
}
|
|
4738
|
+
|
|
4739
|
+
/* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
|
|
4740
|
+
if (params->cParams.strategy < ZSTD_btultra) {
|
|
4741
|
+
U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
|
|
4742
|
+
if (srcSize > maxDictSize) {
|
|
4743
|
+
ip = iend - maxDictSize;
|
|
4744
|
+
src = ip;
|
|
4745
|
+
srcSize = maxDictSize;
|
|
4746
|
+
}
|
|
4223
4747
|
}
|
|
4224
4748
|
|
|
4749
|
+
ms->nextToUpdate = (U32)(ip - ms->window.base);
|
|
4750
|
+
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
|
|
4751
|
+
ms->forceNonContiguous = params->deterministicRefPrefix;
|
|
4752
|
+
|
|
4225
4753
|
if (srcSize <= HASH_READ_SIZE) return 0;
|
|
4226
4754
|
|
|
4227
4755
|
ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
|
|
4228
4756
|
|
|
4229
|
-
if (loadLdmDict)
|
|
4230
|
-
ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);
|
|
4231
|
-
|
|
4232
4757
|
switch(params->cParams.strategy)
|
|
4233
4758
|
{
|
|
4234
4759
|
case ZSTD_fast:
|
|
4235
|
-
ZSTD_fillHashTable(ms, iend, dtlm);
|
|
4760
|
+
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
|
|
4236
4761
|
break;
|
|
4237
4762
|
case ZSTD_dfast:
|
|
4238
|
-
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
|
4763
|
+
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
|
|
4239
4764
|
break;
|
|
4240
4765
|
|
|
4241
4766
|
case ZSTD_greedy:
|
|
@@ -4248,7 +4773,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
4248
4773
|
} else {
|
|
4249
4774
|
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
|
4250
4775
|
if (params->useRowMatchFinder == ZSTD_ps_enable) {
|
|
4251
|
-
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog)
|
|
4776
|
+
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
|
|
4252
4777
|
ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
|
4253
4778
|
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
|
|
4254
4779
|
DEBUGLOG(4, "Using row-based hash table for lazy dict");
|
|
@@ -4401,6 +4926,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4401
4926
|
ZSTD_CCtx_params const* params,
|
|
4402
4927
|
const void* dict, size_t dictSize,
|
|
4403
4928
|
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4929
|
+
ZSTD_tableFillPurpose_e tfp,
|
|
4404
4930
|
void* workspace)
|
|
4405
4931
|
{
|
|
4406
4932
|
const BYTE* dictPtr = (const BYTE*)dict;
|
|
@@ -4419,7 +4945,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4419
4945
|
{
|
|
4420
4946
|
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
|
4421
4947
|
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
|
4422
|
-
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
|
4948
|
+
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
|
|
4423
4949
|
}
|
|
4424
4950
|
return dictID;
|
|
4425
4951
|
}
|
|
@@ -4435,6 +4961,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4435
4961
|
const void* dict, size_t dictSize,
|
|
4436
4962
|
ZSTD_dictContentType_e dictContentType,
|
|
4437
4963
|
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4964
|
+
ZSTD_tableFillPurpose_e tfp,
|
|
4438
4965
|
void* workspace)
|
|
4439
4966
|
{
|
|
4440
4967
|
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
|
|
@@ -4447,13 +4974,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4447
4974
|
|
|
4448
4975
|
/* dict restricted modes */
|
|
4449
4976
|
if (dictContentType == ZSTD_dct_rawContent)
|
|
4450
|
-
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
|
|
4977
|
+
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
|
4451
4978
|
|
|
4452
4979
|
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
|
|
4453
4980
|
if (dictContentType == ZSTD_dct_auto) {
|
|
4454
4981
|
DEBUGLOG(4, "raw content dictionary detected");
|
|
4455
4982
|
return ZSTD_loadDictionaryContent(
|
|
4456
|
-
ms, ls, ws, params, dict, dictSize, dtlm);
|
|
4983
|
+
ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
|
4457
4984
|
}
|
|
4458
4985
|
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
|
|
4459
4986
|
assert(0); /* impossible */
|
|
@@ -4461,13 +4988,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4461
4988
|
|
|
4462
4989
|
/* dict as full zstd dictionary */
|
|
4463
4990
|
return ZSTD_loadZstdDictionary(
|
|
4464
|
-
bs, ms, ws, params, dict, dictSize, dtlm, workspace);
|
|
4991
|
+
bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
|
|
4465
4992
|
}
|
|
4466
4993
|
|
|
4467
4994
|
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
|
4468
4995
|
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
|
|
4469
4996
|
|
|
4470
4997
|
/*! ZSTD_compressBegin_internal() :
|
|
4998
|
+
* Assumption : either @dict OR @cdict (or none) is non-NULL, never both
|
|
4471
4999
|
* @return : 0, or an error code */
|
|
4472
5000
|
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
4473
5001
|
const void* dict, size_t dictSize,
|
|
@@ -4503,11 +5031,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
|
4503
5031
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
|
4504
5032
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
|
|
4505
5033
|
cdict->dictContentSize, cdict->dictContentType, dtlm,
|
|
4506
|
-
cctx->entropyWorkspace)
|
|
5034
|
+
ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
|
|
4507
5035
|
: ZSTD_compress_insertDictionary(
|
|
4508
5036
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
|
4509
5037
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
|
|
4510
|
-
dictContentType, dtlm, cctx->entropyWorkspace);
|
|
5038
|
+
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
|
|
4511
5039
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
|
4512
5040
|
assert(dictID <= UINT_MAX);
|
|
4513
5041
|
cctx->dictID = (U32)dictID;
|
|
@@ -4548,11 +5076,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
|
|
4548
5076
|
&cctxParams, pledgedSrcSize);
|
|
4549
5077
|
}
|
|
4550
5078
|
|
|
4551
|
-
|
|
5079
|
+
static size_t
|
|
5080
|
+
ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
|
4552
5081
|
{
|
|
4553
5082
|
ZSTD_CCtx_params cctxParams;
|
|
4554
|
-
{
|
|
4555
|
-
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
|
5083
|
+
{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
|
4556
5084
|
ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
|
|
4557
5085
|
}
|
|
4558
5086
|
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
|
|
@@ -4560,9 +5088,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
|
|
|
4560
5088
|
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
|
|
4561
5089
|
}
|
|
4562
5090
|
|
|
5091
|
+
size_t
|
|
5092
|
+
ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
|
5093
|
+
{
|
|
5094
|
+
return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
|
|
5095
|
+
}
|
|
5096
|
+
|
|
4563
5097
|
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
|
|
4564
5098
|
{
|
|
4565
|
-
return
|
|
5099
|
+
return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
|
|
4566
5100
|
}
|
|
4567
5101
|
|
|
4568
5102
|
|
|
@@ -4632,9 +5166,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
|
|
|
4632
5166
|
#endif
|
|
4633
5167
|
}
|
|
4634
5168
|
|
|
4635
|
-
size_t
|
|
4636
|
-
|
|
4637
|
-
|
|
5169
|
+
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
|
|
5170
|
+
void* dst, size_t dstCapacity,
|
|
5171
|
+
const void* src, size_t srcSize)
|
|
4638
5172
|
{
|
|
4639
5173
|
size_t endResult;
|
|
4640
5174
|
size_t const cSize = ZSTD_compressContinue_internal(cctx,
|
|
@@ -4658,6 +5192,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
|
|
|
4658
5192
|
return cSize + endResult;
|
|
4659
5193
|
}
|
|
4660
5194
|
|
|
5195
|
+
/* NOTE: Must just wrap ZSTD_compressEnd_public() */
|
|
5196
|
+
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
|
|
5197
|
+
void* dst, size_t dstCapacity,
|
|
5198
|
+
const void* src, size_t srcSize)
|
|
5199
|
+
{
|
|
5200
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
|
5201
|
+
}
|
|
5202
|
+
|
|
4661
5203
|
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
|
|
4662
5204
|
void* dst, size_t dstCapacity,
|
|
4663
5205
|
const void* src, size_t srcSize,
|
|
@@ -4686,7 +5228,7 @@ size_t ZSTD_compress_advanced_internal(
|
|
|
4686
5228
|
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
|
4687
5229
|
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
|
|
4688
5230
|
params, srcSize, ZSTDb_not_buffered) , "");
|
|
4689
|
-
return
|
|
5231
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
|
4690
5232
|
}
|
|
4691
5233
|
|
|
4692
5234
|
size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
|
|
@@ -4811,7 +5353,7 @@ static size_t ZSTD_initCDict_internal(
|
|
|
4811
5353
|
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
|
4812
5354
|
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
|
|
4813
5355
|
¶ms, cdict->dictContent, cdict->dictContentSize,
|
|
4814
|
-
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
|
|
5356
|
+
dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
|
|
4815
5357
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
|
4816
5358
|
assert(dictID <= (size_t)(U32)-1);
|
|
4817
5359
|
cdict->dictID = (U32)dictID;
|
|
@@ -5008,6 +5550,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
|
|
5008
5550
|
params.cParams = cParams;
|
|
5009
5551
|
params.useRowMatchFinder = useRowMatchFinder;
|
|
5010
5552
|
cdict->useRowMatchFinder = useRowMatchFinder;
|
|
5553
|
+
cdict->compressionLevel = ZSTD_NO_CLEVEL;
|
|
5011
5554
|
|
|
5012
5555
|
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
|
5013
5556
|
dict, dictSize,
|
|
@@ -5087,12 +5630,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
|
|
|
5087
5630
|
|
|
5088
5631
|
/* ZSTD_compressBegin_usingCDict() :
|
|
5089
5632
|
* cdict must be != NULL */
|
|
5090
|
-
size_t
|
|
5633
|
+
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
|
|
5091
5634
|
{
|
|
5092
5635
|
ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
|
|
5093
5636
|
return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
|
|
5094
5637
|
}
|
|
5095
5638
|
|
|
5639
|
+
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
|
|
5640
|
+
{
|
|
5641
|
+
return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
|
|
5642
|
+
}
|
|
5643
|
+
|
|
5096
5644
|
/*! ZSTD_compress_usingCDict_internal():
|
|
5097
5645
|
* Implementation of various ZSTD_compress_usingCDict* functions.
|
|
5098
5646
|
*/
|
|
@@ -5102,7 +5650,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
|
|
|
5102
5650
|
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
|
|
5103
5651
|
{
|
|
5104
5652
|
FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
|
|
5105
|
-
return
|
|
5653
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
|
5106
5654
|
}
|
|
5107
5655
|
|
|
5108
5656
|
/*! ZSTD_compress_usingCDict_advanced():
|
|
@@ -5299,30 +5847,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
|
|
5299
5847
|
|
|
5300
5848
|
static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
|
|
5301
5849
|
{
|
|
5302
|
-
|
|
5303
|
-
|
|
5304
|
-
|
|
5850
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5851
|
+
return cctx->blockSize - cctx->stableIn_notConsumed;
|
|
5852
|
+
}
|
|
5853
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
|
|
5854
|
+
{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
|
|
5855
|
+
if (hintInSize==0) hintInSize = cctx->blockSize;
|
|
5856
|
+
return hintInSize;
|
|
5857
|
+
}
|
|
5305
5858
|
}
|
|
5306
5859
|
|
|
5307
5860
|
/** ZSTD_compressStream_generic():
|
|
5308
5861
|
* internal function for all *compressStream*() variants
|
|
5309
|
-
*
|
|
5310
|
-
* @return : hint size for next input */
|
|
5862
|
+
* @return : hint size for next input to complete ongoing block */
|
|
5311
5863
|
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5312
5864
|
ZSTD_outBuffer* output,
|
|
5313
5865
|
ZSTD_inBuffer* input,
|
|
5314
5866
|
ZSTD_EndDirective const flushMode)
|
|
5315
5867
|
{
|
|
5316
|
-
const char* const istart = (const char*)input->src;
|
|
5317
|
-
const char* const iend =
|
|
5318
|
-
const char* ip =
|
|
5319
|
-
char* const ostart = (char*)output->dst;
|
|
5320
|
-
char* const oend =
|
|
5321
|
-
char* op =
|
|
5868
|
+
const char* const istart = (assert(input != NULL), (const char*)input->src);
|
|
5869
|
+
const char* const iend = (istart != NULL) ? istart + input->size : istart;
|
|
5870
|
+
const char* ip = (istart != NULL) ? istart + input->pos : istart;
|
|
5871
|
+
char* const ostart = (assert(output != NULL), (char*)output->dst);
|
|
5872
|
+
char* const oend = (ostart != NULL) ? ostart + output->size : ostart;
|
|
5873
|
+
char* op = (ostart != NULL) ? ostart + output->pos : ostart;
|
|
5322
5874
|
U32 someMoreWork = 1;
|
|
5323
5875
|
|
|
5324
5876
|
/* check expectations */
|
|
5325
|
-
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%
|
|
5877
|
+
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);
|
|
5878
|
+
assert(zcs != NULL);
|
|
5879
|
+
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5880
|
+
assert(input->pos >= zcs->stableIn_notConsumed);
|
|
5881
|
+
input->pos -= zcs->stableIn_notConsumed;
|
|
5882
|
+
ip -= zcs->stableIn_notConsumed;
|
|
5883
|
+
zcs->stableIn_notConsumed = 0;
|
|
5884
|
+
}
|
|
5326
5885
|
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
|
5327
5886
|
assert(zcs->inBuff != NULL);
|
|
5328
5887
|
assert(zcs->inBuffSize > 0);
|
|
@@ -5331,8 +5890,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5331
5890
|
assert(zcs->outBuff != NULL);
|
|
5332
5891
|
assert(zcs->outBuffSize > 0);
|
|
5333
5892
|
}
|
|
5334
|
-
|
|
5893
|
+
if (input->src == NULL) assert(input->size == 0);
|
|
5335
5894
|
assert(input->pos <= input->size);
|
|
5895
|
+
if (output->dst == NULL) assert(output->size == 0);
|
|
5896
|
+
assert(output->pos <= output->size);
|
|
5336
5897
|
assert((U32)flushMode <= (U32)ZSTD_e_end);
|
|
5337
5898
|
|
|
5338
5899
|
while (someMoreWork) {
|
|
@@ -5347,7 +5908,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5347
5908
|
|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
|
|
5348
5909
|
&& (zcs->inBuffPos == 0) ) {
|
|
5349
5910
|
/* shortcut to compression pass directly into output buffer */
|
|
5350
|
-
size_t const cSize =
|
|
5911
|
+
size_t const cSize = ZSTD_compressEnd_public(zcs,
|
|
5351
5912
|
op, oend-op, ip, iend-ip);
|
|
5352
5913
|
DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
|
|
5353
5914
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
|
|
@@ -5364,8 +5925,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5364
5925
|
zcs->inBuff + zcs->inBuffPos, toLoad,
|
|
5365
5926
|
ip, iend-ip);
|
|
5366
5927
|
zcs->inBuffPos += loaded;
|
|
5367
|
-
if (
|
|
5368
|
-
ip += loaded;
|
|
5928
|
+
if (ip) ip += loaded;
|
|
5369
5929
|
if ( (flushMode == ZSTD_e_continue)
|
|
5370
5930
|
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
|
|
5371
5931
|
/* not enough input to fill full block : stop here */
|
|
@@ -5376,6 +5936,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5376
5936
|
/* empty */
|
|
5377
5937
|
someMoreWork = 0; break;
|
|
5378
5938
|
}
|
|
5939
|
+
} else {
|
|
5940
|
+
assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
5941
|
+
if ( (flushMode == ZSTD_e_continue)
|
|
5942
|
+
&& ( (size_t)(iend - ip) < zcs->blockSize) ) {
|
|
5943
|
+
/* can't compress a full block : stop here */
|
|
5944
|
+
zcs->stableIn_notConsumed = (size_t)(iend - ip);
|
|
5945
|
+
ip = iend; /* pretend to have consumed input */
|
|
5946
|
+
someMoreWork = 0; break;
|
|
5947
|
+
}
|
|
5948
|
+
if ( (flushMode == ZSTD_e_flush)
|
|
5949
|
+
&& (ip == iend) ) {
|
|
5950
|
+
/* empty */
|
|
5951
|
+
someMoreWork = 0; break;
|
|
5952
|
+
}
|
|
5379
5953
|
}
|
|
5380
5954
|
/* compress current block (note : this stage cannot be stopped in the middle) */
|
|
5381
5955
|
DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
|
|
@@ -5383,9 +5957,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5383
5957
|
void* cDst;
|
|
5384
5958
|
size_t cSize;
|
|
5385
5959
|
size_t oSize = oend-op;
|
|
5386
|
-
size_t const iSize = inputBuffered
|
|
5387
|
-
|
|
5388
|
-
: MIN((size_t)(iend - ip), zcs->blockSize);
|
|
5960
|
+
size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
|
|
5961
|
+
: MIN((size_t)(iend - ip), zcs->blockSize);
|
|
5389
5962
|
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
|
|
5390
5963
|
cDst = op; /* compress into output buffer, to skip flush stage */
|
|
5391
5964
|
else
|
|
@@ -5393,9 +5966,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5393
5966
|
if (inputBuffered) {
|
|
5394
5967
|
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
|
|
5395
5968
|
cSize = lastBlock ?
|
|
5396
|
-
|
|
5969
|
+
ZSTD_compressEnd_public(zcs, cDst, oSize,
|
|
5397
5970
|
zcs->inBuff + zcs->inToCompress, iSize) :
|
|
5398
|
-
|
|
5971
|
+
ZSTD_compressContinue_public(zcs, cDst, oSize,
|
|
5399
5972
|
zcs->inBuff + zcs->inToCompress, iSize);
|
|
5400
5973
|
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
|
5401
5974
|
zcs->frameEnded = lastBlock;
|
|
@@ -5408,19 +5981,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5408
5981
|
if (!lastBlock)
|
|
5409
5982
|
assert(zcs->inBuffTarget <= zcs->inBuffSize);
|
|
5410
5983
|
zcs->inToCompress = zcs->inBuffPos;
|
|
5411
|
-
} else {
|
|
5412
|
-
unsigned const lastBlock = (ip + iSize == iend);
|
|
5413
|
-
assert(flushMode == ZSTD_e_end /* Already validated */);
|
|
5984
|
+
} else { /* !inputBuffered, hence ZSTD_bm_stable */
|
|
5985
|
+
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
|
|
5414
5986
|
cSize = lastBlock ?
|
|
5415
|
-
|
|
5416
|
-
|
|
5987
|
+
ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
|
|
5988
|
+
ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
|
|
5417
5989
|
/* Consume the input prior to error checking to mirror buffered mode. */
|
|
5418
|
-
if (
|
|
5419
|
-
ip += iSize;
|
|
5990
|
+
if (ip) ip += iSize;
|
|
5420
5991
|
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
|
5421
5992
|
zcs->frameEnded = lastBlock;
|
|
5422
|
-
if (lastBlock)
|
|
5423
|
-
assert(ip == iend);
|
|
5993
|
+
if (lastBlock) assert(ip == iend);
|
|
5424
5994
|
}
|
|
5425
5995
|
if (cDst == op) { /* no need to flush */
|
|
5426
5996
|
op += cSize;
|
|
@@ -5496,8 +6066,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
|
|
|
5496
6066
|
/* After a compression call set the expected input/output buffer.
|
|
5497
6067
|
* This is validated at the start of the next compression call.
|
|
5498
6068
|
*/
|
|
5499
|
-
static void
|
|
6069
|
+
static void
|
|
6070
|
+
ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)
|
|
5500
6071
|
{
|
|
6072
|
+
DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");
|
|
5501
6073
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5502
6074
|
cctx->expectedInBuffer = *input;
|
|
5503
6075
|
}
|
|
@@ -5516,22 +6088,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
|
|
|
5516
6088
|
{
|
|
5517
6089
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5518
6090
|
ZSTD_inBuffer const expect = cctx->expectedInBuffer;
|
|
5519
|
-
if (expect.src != input->src || expect.pos != input->pos
|
|
5520
|
-
RETURN_ERROR(
|
|
5521
|
-
if (endOp != ZSTD_e_end)
|
|
5522
|
-
RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
|
|
6091
|
+
if (expect.src != input->src || expect.pos != input->pos)
|
|
6092
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");
|
|
5523
6093
|
}
|
|
6094
|
+
(void)endOp;
|
|
5524
6095
|
if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
|
|
5525
6096
|
size_t const outBufferSize = output->size - output->pos;
|
|
5526
6097
|
if (cctx->expectedOutBufferSize != outBufferSize)
|
|
5527
|
-
RETURN_ERROR(
|
|
6098
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");
|
|
5528
6099
|
}
|
|
5529
6100
|
return 0;
|
|
5530
6101
|
}
|
|
5531
6102
|
|
|
5532
6103
|
static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5533
6104
|
ZSTD_EndDirective endOp,
|
|
5534
|
-
size_t inSize)
|
|
6105
|
+
size_t inSize)
|
|
6106
|
+
{
|
|
5535
6107
|
ZSTD_CCtx_params params = cctx->requestedParams;
|
|
5536
6108
|
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
|
|
5537
6109
|
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
|
|
@@ -5545,9 +6117,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5545
6117
|
params.compressionLevel = cctx->cdict->compressionLevel;
|
|
5546
6118
|
}
|
|
5547
6119
|
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
|
5548
|
-
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-
|
|
5549
|
-
|
|
5550
|
-
|
|
6120
|
+
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */
|
|
6121
|
+
|
|
6122
|
+
{ size_t const dictSize = prefixDict.dict
|
|
5551
6123
|
? prefixDict.dictSize
|
|
5552
6124
|
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
|
|
5553
6125
|
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
|
|
@@ -5559,8 +6131,18 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5559
6131
|
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
|
|
5560
6132
|
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
|
|
5561
6133
|
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
|
|
6134
|
+
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
|
|
6135
|
+
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
|
|
6136
|
+
params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);
|
|
5562
6137
|
|
|
5563
6138
|
#ifdef ZSTD_MULTITHREAD
|
|
6139
|
+
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
|
|
6140
|
+
RETURN_ERROR_IF(
|
|
6141
|
+
params.useSequenceProducer == 1 && params.nbWorkers >= 1,
|
|
6142
|
+
parameter_combination_unsupported,
|
|
6143
|
+
"External sequence producer isn't supported with nbWorkers >= 1"
|
|
6144
|
+
);
|
|
6145
|
+
|
|
5564
6146
|
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
|
5565
6147
|
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
|
|
5566
6148
|
}
|
|
@@ -5588,7 +6170,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5588
6170
|
cctx->streamStage = zcss_load;
|
|
5589
6171
|
cctx->appliedParams = params;
|
|
5590
6172
|
} else
|
|
5591
|
-
#endif
|
|
6173
|
+
#endif /* ZSTD_MULTITHREAD */
|
|
5592
6174
|
{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
|
|
5593
6175
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
|
5594
6176
|
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
|
@@ -5614,6 +6196,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5614
6196
|
return 0;
|
|
5615
6197
|
}
|
|
5616
6198
|
|
|
6199
|
+
/* @return provides a minimum amount of data remaining to be flushed from internal buffers
|
|
6200
|
+
*/
|
|
5617
6201
|
size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
5618
6202
|
ZSTD_outBuffer* output,
|
|
5619
6203
|
ZSTD_inBuffer* input,
|
|
@@ -5628,8 +6212,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5628
6212
|
|
|
5629
6213
|
/* transparent initialization stage */
|
|
5630
6214
|
if (cctx->streamStage == zcss_init) {
|
|
5631
|
-
|
|
5632
|
-
|
|
6215
|
+
size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */
|
|
6216
|
+
size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;
|
|
6217
|
+
if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */
|
|
6218
|
+
&& (endOp == ZSTD_e_continue) /* no flush requested, more input to come */
|
|
6219
|
+
&& (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */
|
|
6220
|
+
if (cctx->stableIn_notConsumed) { /* not the first time */
|
|
6221
|
+
/* check stable source guarantees */
|
|
6222
|
+
RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");
|
|
6223
|
+
RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");
|
|
6224
|
+
}
|
|
6225
|
+
/* pretend input was consumed, to give a sense forward progress */
|
|
6226
|
+
input->pos = input->size;
|
|
6227
|
+
/* save stable inBuffer, for later control, and flush/end */
|
|
6228
|
+
cctx->expectedInBuffer = *input;
|
|
6229
|
+
/* but actually input wasn't consumed, so keep track of position from where compression shall resume */
|
|
6230
|
+
cctx->stableIn_notConsumed += inputSize;
|
|
6231
|
+
/* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */
|
|
6232
|
+
return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */
|
|
6233
|
+
}
|
|
6234
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");
|
|
6235
|
+
ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */
|
|
5633
6236
|
}
|
|
5634
6237
|
/* end of transparent initialization stage */
|
|
5635
6238
|
|
|
@@ -5642,6 +6245,13 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5642
6245
|
ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
|
|
5643
6246
|
cctx->cParamsChanged = 0;
|
|
5644
6247
|
}
|
|
6248
|
+
if (cctx->stableIn_notConsumed) {
|
|
6249
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
6250
|
+
/* some early data was skipped - make it available for consumption */
|
|
6251
|
+
assert(input->pos >= cctx->stableIn_notConsumed);
|
|
6252
|
+
input->pos -= cctx->stableIn_notConsumed;
|
|
6253
|
+
cctx->stableIn_notConsumed = 0;
|
|
6254
|
+
}
|
|
5645
6255
|
for (;;) {
|
|
5646
6256
|
size_t const ipos = input->pos;
|
|
5647
6257
|
size_t const opos = output->pos;
|
|
@@ -5680,7 +6290,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5680
6290
|
ZSTD_setBufferExpectations(cctx, output, input);
|
|
5681
6291
|
return flushMin;
|
|
5682
6292
|
}
|
|
5683
|
-
#endif
|
|
6293
|
+
#endif /* ZSTD_MULTITHREAD */
|
|
5684
6294
|
FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
|
|
5685
6295
|
DEBUGLOG(5, "completed ZSTD_compressStream2");
|
|
5686
6296
|
ZSTD_setBufferExpectations(cctx, output, input);
|
|
@@ -5693,13 +6303,20 @@ size_t ZSTD_compressStream2_simpleArgs (
|
|
|
5693
6303
|
const void* src, size_t srcSize, size_t* srcPos,
|
|
5694
6304
|
ZSTD_EndDirective endOp)
|
|
5695
6305
|
{
|
|
5696
|
-
ZSTD_outBuffer output
|
|
5697
|
-
ZSTD_inBuffer input
|
|
6306
|
+
ZSTD_outBuffer output;
|
|
6307
|
+
ZSTD_inBuffer input;
|
|
6308
|
+
output.dst = dst;
|
|
6309
|
+
output.size = dstCapacity;
|
|
6310
|
+
output.pos = *dstPos;
|
|
6311
|
+
input.src = src;
|
|
6312
|
+
input.size = srcSize;
|
|
6313
|
+
input.pos = *srcPos;
|
|
5698
6314
|
/* ZSTD_compressStream2() will check validity of dstPos and srcPos */
|
|
5699
|
-
size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
|
5700
|
-
|
|
5701
|
-
|
|
5702
|
-
|
|
6315
|
+
{ size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
|
6316
|
+
*dstPos = output.pos;
|
|
6317
|
+
*srcPos = input.pos;
|
|
6318
|
+
return cErr;
|
|
6319
|
+
}
|
|
5703
6320
|
}
|
|
5704
6321
|
|
|
5705
6322
|
size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
@@ -5722,6 +6339,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
|
5722
6339
|
/* Reset to the original values. */
|
|
5723
6340
|
cctx->requestedParams.inBufferMode = originalInBufferMode;
|
|
5724
6341
|
cctx->requestedParams.outBufferMode = originalOutBufferMode;
|
|
6342
|
+
|
|
5725
6343
|
FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
|
|
5726
6344
|
if (result != 0) { /* compression not completed, due to lack of output space */
|
|
5727
6345
|
assert(oPos == dstCapacity);
|
|
@@ -5732,64 +6350,60 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
|
5732
6350
|
}
|
|
5733
6351
|
}
|
|
5734
6352
|
|
|
5735
|
-
|
|
5736
|
-
|
|
5737
|
-
|
|
5738
|
-
|
|
5739
|
-
|
|
5740
|
-
|
|
5741
|
-
|
|
5742
|
-
|
|
5743
|
-
|
|
5744
|
-
|
|
5745
|
-
U32 windowSize = 1 << windowLog;
|
|
5746
|
-
/* posInSrc represents the amount of data the the decoder would decode up to this point.
|
|
6353
|
+
/* ZSTD_validateSequence() :
|
|
6354
|
+
* @offCode : is presumed to follow format required by ZSTD_storeSeq()
|
|
6355
|
+
* @returns a ZSTD error code if sequence is not valid
|
|
6356
|
+
*/
|
|
6357
|
+
static size_t
|
|
6358
|
+
ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
|
|
6359
|
+
size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
|
|
6360
|
+
{
|
|
6361
|
+
U32 const windowSize = 1u << windowLog;
|
|
6362
|
+
/* posInSrc represents the amount of data the decoder would decode up to this point.
|
|
5747
6363
|
* As long as the amount of data decoded is less than or equal to window size, offsets may be
|
|
5748
6364
|
* larger than the total length of output decoded in order to reference the dict, even larger than
|
|
5749
6365
|
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
|
|
5750
6366
|
*/
|
|
5751
|
-
offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
|
5752
|
-
|
|
5753
|
-
RETURN_ERROR_IF(
|
|
6367
|
+
size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
|
6368
|
+
size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
|
|
6369
|
+
RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
|
|
6370
|
+
/* Validate maxNbSeq is large enough for the given matchLength and minMatch */
|
|
6371
|
+
RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
|
|
5754
6372
|
return 0;
|
|
5755
6373
|
}
|
|
5756
6374
|
|
|
5757
6375
|
/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
|
|
5758
|
-
static U32
|
|
5759
|
-
|
|
5760
|
-
U32
|
|
6376
|
+
static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
|
|
6377
|
+
{
|
|
6378
|
+
U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
|
|
5761
6379
|
|
|
5762
6380
|
if (!ll0 && rawOffset == rep[0]) {
|
|
5763
|
-
|
|
6381
|
+
offBase = REPCODE1_TO_OFFBASE;
|
|
5764
6382
|
} else if (rawOffset == rep[1]) {
|
|
5765
|
-
|
|
6383
|
+
offBase = REPCODE_TO_OFFBASE(2 - ll0);
|
|
5766
6384
|
} else if (rawOffset == rep[2]) {
|
|
5767
|
-
|
|
6385
|
+
offBase = REPCODE_TO_OFFBASE(3 - ll0);
|
|
5768
6386
|
} else if (ll0 && rawOffset == rep[0] - 1) {
|
|
5769
|
-
|
|
6387
|
+
offBase = REPCODE3_TO_OFFBASE;
|
|
5770
6388
|
}
|
|
5771
|
-
|
|
5772
|
-
/* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
|
|
5773
|
-
offCode = repCode - 1;
|
|
5774
|
-
}
|
|
5775
|
-
return offCode;
|
|
6389
|
+
return offBase;
|
|
5776
6390
|
}
|
|
5777
6391
|
|
|
5778
|
-
|
|
5779
|
-
|
|
5780
|
-
|
|
5781
|
-
|
|
5782
|
-
|
|
5783
|
-
|
|
6392
|
+
size_t
|
|
6393
|
+
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
|
6394
|
+
ZSTD_sequencePosition* seqPos,
|
|
6395
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
6396
|
+
const void* src, size_t blockSize,
|
|
6397
|
+
ZSTD_paramSwitch_e externalRepSearch)
|
|
6398
|
+
{
|
|
5784
6399
|
U32 idx = seqPos->idx;
|
|
6400
|
+
U32 const startIdx = idx;
|
|
5785
6401
|
BYTE const* ip = (BYTE const*)(src);
|
|
5786
6402
|
const BYTE* const iend = ip + blockSize;
|
|
5787
6403
|
repcodes_t updatedRepcodes;
|
|
5788
6404
|
U32 dictSize;
|
|
5789
|
-
|
|
5790
|
-
|
|
5791
|
-
U32 ll0;
|
|
5792
|
-
U32 offCode;
|
|
6405
|
+
|
|
6406
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
|
|
5793
6407
|
|
|
5794
6408
|
if (cctx->cdict) {
|
|
5795
6409
|
dictSize = (U32)cctx->cdict->dictContentSize;
|
|
@@ -5799,26 +6413,55 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|
|
5799
6413
|
dictSize = 0;
|
|
5800
6414
|
}
|
|
5801
6415
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
5802
|
-
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0)
|
|
5803
|
-
litLength = inSeqs[idx].litLength;
|
|
5804
|
-
matchLength = inSeqs[idx].matchLength;
|
|
5805
|
-
|
|
5806
|
-
|
|
5807
|
-
|
|
5808
|
-
|
|
5809
|
-
|
|
6416
|
+
for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
|
|
6417
|
+
U32 const litLength = inSeqs[idx].litLength;
|
|
6418
|
+
U32 const matchLength = inSeqs[idx].matchLength;
|
|
6419
|
+
U32 offBase;
|
|
6420
|
+
|
|
6421
|
+
if (externalRepSearch == ZSTD_ps_disable) {
|
|
6422
|
+
offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
|
|
6423
|
+
} else {
|
|
6424
|
+
U32 const ll0 = (litLength == 0);
|
|
6425
|
+
offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
|
|
6426
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
|
6427
|
+
}
|
|
6428
|
+
|
|
6429
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
|
5810
6430
|
if (cctx->appliedParams.validateSequences) {
|
|
5811
6431
|
seqPos->posInSrc += litLength + matchLength;
|
|
5812
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
|
5813
|
-
cctx->appliedParams.cParams.windowLog, dictSize,
|
|
5814
|
-
cctx->appliedParams.cParams.minMatch),
|
|
6432
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
|
6433
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
|
5815
6434
|
"Sequence validation failed");
|
|
5816
6435
|
}
|
|
5817
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
|
6436
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
|
5818
6437
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
|
5819
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
|
6438
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
|
5820
6439
|
ip += matchLength + litLength;
|
|
5821
6440
|
}
|
|
6441
|
+
|
|
6442
|
+
/* If we skipped repcode search while parsing, we need to update repcodes now */
|
|
6443
|
+
assert(externalRepSearch != ZSTD_ps_auto);
|
|
6444
|
+
assert(idx >= startIdx);
|
|
6445
|
+
if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
|
|
6446
|
+
U32* const rep = updatedRepcodes.rep;
|
|
6447
|
+
U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
|
|
6448
|
+
|
|
6449
|
+
if (lastSeqIdx >= startIdx + 2) {
|
|
6450
|
+
rep[2] = inSeqs[lastSeqIdx - 2].offset;
|
|
6451
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
|
6452
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6453
|
+
} else if (lastSeqIdx == startIdx + 1) {
|
|
6454
|
+
rep[2] = rep[0];
|
|
6455
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
|
6456
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6457
|
+
} else {
|
|
6458
|
+
assert(lastSeqIdx == startIdx);
|
|
6459
|
+
rep[2] = rep[1];
|
|
6460
|
+
rep[1] = rep[0];
|
|
6461
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6462
|
+
}
|
|
6463
|
+
}
|
|
6464
|
+
|
|
5822
6465
|
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
|
|
5823
6466
|
|
|
5824
6467
|
if (inSeqs[idx].litLength) {
|
|
@@ -5827,25 +6470,16 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|
|
5827
6470
|
ip += inSeqs[idx].litLength;
|
|
5828
6471
|
seqPos->posInSrc += inSeqs[idx].litLength;
|
|
5829
6472
|
}
|
|
5830
|
-
RETURN_ERROR_IF(ip != iend,
|
|
6473
|
+
RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
|
|
5831
6474
|
seqPos->idx = idx+1;
|
|
5832
6475
|
return 0;
|
|
5833
6476
|
}
|
|
5834
6477
|
|
|
5835
|
-
|
|
5836
|
-
|
|
5837
|
-
*
|
|
5838
|
-
*
|
|
5839
|
-
|
|
5840
|
-
* in inSeqs, storing any (partial) sequences.
|
|
5841
|
-
*
|
|
5842
|
-
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
|
|
5843
|
-
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
|
|
5844
|
-
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
|
|
5845
|
-
*/
|
|
5846
|
-
static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
|
5847
|
-
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
5848
|
-
const void* src, size_t blockSize) {
|
|
6478
|
+
size_t
|
|
6479
|
+
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
|
6480
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
6481
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
|
|
6482
|
+
{
|
|
5849
6483
|
U32 idx = seqPos->idx;
|
|
5850
6484
|
U32 startPosInSequence = seqPos->posInSequence;
|
|
5851
6485
|
U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
|
|
@@ -5855,10 +6489,9 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
|
5855
6489
|
repcodes_t updatedRepcodes;
|
|
5856
6490
|
U32 bytesAdjustment = 0;
|
|
5857
6491
|
U32 finalMatchSplit = 0;
|
|
5858
|
-
|
|
5859
|
-
|
|
5860
|
-
|
|
5861
|
-
U32 offCode;
|
|
6492
|
+
|
|
6493
|
+
/* TODO(embg) support fast parsing mode in noBlockDelim mode */
|
|
6494
|
+
(void)externalRepSearch;
|
|
5862
6495
|
|
|
5863
6496
|
if (cctx->cdict) {
|
|
5864
6497
|
dictSize = cctx->cdict->dictContentSize;
|
|
@@ -5867,14 +6500,15 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
|
5867
6500
|
} else {
|
|
5868
6501
|
dictSize = 0;
|
|
5869
6502
|
}
|
|
5870
|
-
DEBUGLOG(5, "
|
|
6503
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
|
|
5871
6504
|
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
|
5872
6505
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
5873
6506
|
while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
|
|
5874
6507
|
const ZSTD_Sequence currSeq = inSeqs[idx];
|
|
5875
|
-
litLength = currSeq.litLength;
|
|
5876
|
-
matchLength = currSeq.matchLength;
|
|
5877
|
-
rawOffset = currSeq.offset;
|
|
6508
|
+
U32 litLength = currSeq.litLength;
|
|
6509
|
+
U32 matchLength = currSeq.matchLength;
|
|
6510
|
+
U32 const rawOffset = currSeq.offset;
|
|
6511
|
+
U32 offBase;
|
|
5878
6512
|
|
|
5879
6513
|
/* Modify the sequence depending on where endPosInSequence lies */
|
|
5880
6514
|
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
|
|
@@ -5888,7 +6522,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
|
5888
6522
|
/* Move to the next sequence */
|
|
5889
6523
|
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
|
|
5890
6524
|
startPosInSequence = 0;
|
|
5891
|
-
idx++;
|
|
5892
6525
|
} else {
|
|
5893
6526
|
/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
|
|
5894
6527
|
does not reach the end of the match. So, we have to split the sequence */
|
|
@@ -5927,23 +6560,24 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
|
5927
6560
|
}
|
|
5928
6561
|
}
|
|
5929
6562
|
/* Check if this offset can be represented with a repcode */
|
|
5930
|
-
{ U32 ll0 = (litLength == 0);
|
|
5931
|
-
|
|
5932
|
-
|
|
6563
|
+
{ U32 const ll0 = (litLength == 0);
|
|
6564
|
+
offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
|
|
6565
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
|
5933
6566
|
}
|
|
5934
6567
|
|
|
5935
6568
|
if (cctx->appliedParams.validateSequences) {
|
|
5936
6569
|
seqPos->posInSrc += litLength + matchLength;
|
|
5937
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
|
5938
|
-
cctx->appliedParams.cParams.windowLog, dictSize,
|
|
5939
|
-
cctx->appliedParams.cParams.minMatch),
|
|
6570
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
|
6571
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
|
5940
6572
|
"Sequence validation failed");
|
|
5941
6573
|
}
|
|
5942
|
-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)",
|
|
5943
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
|
6574
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
|
6575
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
|
5944
6576
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
|
5945
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
|
6577
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
|
5946
6578
|
ip += matchLength + litLength;
|
|
6579
|
+
if (!finalMatchSplit)
|
|
6580
|
+
idx++; /* Next Sequence */
|
|
5947
6581
|
}
|
|
5948
6582
|
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
|
5949
6583
|
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
|
|
@@ -5966,8 +6600,9 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
|
5966
6600
|
|
|
5967
6601
|
typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
|
5968
6602
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
5969
|
-
const void* src, size_t blockSize);
|
|
5970
|
-
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
6603
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
|
6604
|
+
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
6605
|
+
{
|
|
5971
6606
|
ZSTD_sequenceCopier sequenceCopier = NULL;
|
|
5972
6607
|
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
|
|
5973
6608
|
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
|
@@ -5979,24 +6614,75 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
|
5979
6614
|
return sequenceCopier;
|
|
5980
6615
|
}
|
|
5981
6616
|
|
|
6617
|
+
/* Discover the size of next block by searching for the delimiter.
|
|
6618
|
+
* Note that a block delimiter **must** exist in this mode,
|
|
6619
|
+
* otherwise it's an input error.
|
|
6620
|
+
* The block size retrieved will be later compared to ensure it remains within bounds */
|
|
6621
|
+
static size_t
|
|
6622
|
+
blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
|
6623
|
+
{
|
|
6624
|
+
int end = 0;
|
|
6625
|
+
size_t blockSize = 0;
|
|
6626
|
+
size_t spos = seqPos.idx;
|
|
6627
|
+
DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
|
|
6628
|
+
assert(spos <= inSeqsSize);
|
|
6629
|
+
while (spos < inSeqsSize) {
|
|
6630
|
+
end = (inSeqs[spos].offset == 0);
|
|
6631
|
+
blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
|
|
6632
|
+
if (end) {
|
|
6633
|
+
if (inSeqs[spos].matchLength != 0)
|
|
6634
|
+
RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
|
|
6635
|
+
break;
|
|
6636
|
+
}
|
|
6637
|
+
spos++;
|
|
6638
|
+
}
|
|
6639
|
+
if (!end)
|
|
6640
|
+
RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
|
|
6641
|
+
return blockSize;
|
|
6642
|
+
}
|
|
6643
|
+
|
|
6644
|
+
/* More a "target" block size */
|
|
6645
|
+
static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
|
|
6646
|
+
{
|
|
6647
|
+
int const lastBlock = (remaining <= blockSize);
|
|
6648
|
+
return lastBlock ? remaining : blockSize;
|
|
6649
|
+
}
|
|
6650
|
+
|
|
6651
|
+
static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
|
|
6652
|
+
size_t blockSize, size_t remaining,
|
|
6653
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
|
6654
|
+
{
|
|
6655
|
+
DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
|
|
6656
|
+
if (mode == ZSTD_sf_noBlockDelimiters)
|
|
6657
|
+
return blockSize_noDelimiter(blockSize, remaining);
|
|
6658
|
+
{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
|
|
6659
|
+
FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
|
|
6660
|
+
if (explicitBlockSize > blockSize)
|
|
6661
|
+
RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
|
|
6662
|
+
if (explicitBlockSize > remaining)
|
|
6663
|
+
RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
|
|
6664
|
+
return explicitBlockSize;
|
|
6665
|
+
}
|
|
6666
|
+
}
|
|
6667
|
+
|
|
5982
6668
|
/* Compress, block-by-block, all of the sequences given.
|
|
5983
6669
|
*
|
|
5984
|
-
* Returns the cumulative size of all compressed blocks (including their headers),
|
|
6670
|
+
* Returns the cumulative size of all compressed blocks (including their headers),
|
|
6671
|
+
* otherwise a ZSTD error.
|
|
5985
6672
|
*/
|
|
5986
|
-
static size_t
|
|
5987
|
-
|
|
5988
|
-
|
|
5989
|
-
|
|
6673
|
+
static size_t
|
|
6674
|
+
ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6675
|
+
void* dst, size_t dstCapacity,
|
|
6676
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
|
6677
|
+
const void* src, size_t srcSize)
|
|
6678
|
+
{
|
|
5990
6679
|
size_t cSize = 0;
|
|
5991
|
-
U32 lastBlock;
|
|
5992
|
-
size_t blockSize;
|
|
5993
|
-
size_t compressedSeqsSize;
|
|
5994
6680
|
size_t remaining = srcSize;
|
|
5995
6681
|
ZSTD_sequencePosition seqPos = {0, 0, 0};
|
|
5996
6682
|
|
|
5997
6683
|
BYTE const* ip = (BYTE const*)src;
|
|
5998
6684
|
BYTE* op = (BYTE*)dst;
|
|
5999
|
-
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
|
6685
|
+
ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
|
6000
6686
|
|
|
6001
6687
|
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
|
|
6002
6688
|
/* Special case: empty frame */
|
|
@@ -6010,22 +6696,29 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6010
6696
|
}
|
|
6011
6697
|
|
|
6012
6698
|
while (remaining) {
|
|
6699
|
+
size_t compressedSeqsSize;
|
|
6013
6700
|
size_t cBlockSize;
|
|
6014
6701
|
size_t additionalByteAdjustment;
|
|
6015
|
-
|
|
6016
|
-
|
|
6702
|
+
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
|
6703
|
+
cctx->blockSize, remaining,
|
|
6704
|
+
inSeqs, inSeqsSize, seqPos);
|
|
6705
|
+
U32 const lastBlock = (blockSize == remaining);
|
|
6706
|
+
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
|
6707
|
+
assert(blockSize <= remaining);
|
|
6017
6708
|
ZSTD_resetSeqStore(&cctx->seqStore);
|
|
6018
|
-
DEBUGLOG(
|
|
6709
|
+
DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
|
|
6019
6710
|
|
|
6020
|
-
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
|
|
6711
|
+
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
|
|
6021
6712
|
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
|
|
6022
6713
|
blockSize -= additionalByteAdjustment;
|
|
6023
6714
|
|
|
6024
6715
|
/* If blocks are too small, emit as a nocompress block */
|
|
6025
|
-
|
|
6716
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
6717
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
6718
|
+
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
|
6026
6719
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
|
6027
6720
|
FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
|
|
6028
|
-
DEBUGLOG(
|
|
6721
|
+
DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
|
|
6029
6722
|
cSize += cBlockSize;
|
|
6030
6723
|
ip += blockSize;
|
|
6031
6724
|
op += cBlockSize;
|
|
@@ -6034,6 +6727,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6034
6727
|
continue;
|
|
6035
6728
|
}
|
|
6036
6729
|
|
|
6730
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
|
6037
6731
|
compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
|
|
6038
6732
|
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
|
6039
6733
|
&cctx->appliedParams,
|
|
@@ -6042,11 +6736,11 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6042
6736
|
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
|
6043
6737
|
cctx->bmi2);
|
|
6044
6738
|
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
|
6045
|
-
DEBUGLOG(
|
|
6739
|
+
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
|
6046
6740
|
|
|
6047
6741
|
if (!cctx->isFirstBlock &&
|
|
6048
6742
|
ZSTD_maybeRLE(&cctx->seqStore) &&
|
|
6049
|
-
ZSTD_isRLE(
|
|
6743
|
+
ZSTD_isRLE(ip, blockSize)) {
|
|
6050
6744
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
|
6051
6745
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
|
6052
6746
|
* This is only an issue for zstd <= v1.4.3
|
|
@@ -6057,12 +6751,12 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6057
6751
|
if (compressedSeqsSize == 0) {
|
|
6058
6752
|
/* ZSTD_noCompressBlock writes the block header as well */
|
|
6059
6753
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
|
6060
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
|
6061
|
-
DEBUGLOG(
|
|
6754
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");
|
|
6755
|
+
DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);
|
|
6062
6756
|
} else if (compressedSeqsSize == 1) {
|
|
6063
6757
|
cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
|
|
6064
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
|
6065
|
-
DEBUGLOG(
|
|
6758
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");
|
|
6759
|
+
DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);
|
|
6066
6760
|
} else {
|
|
6067
6761
|
U32 cBlockHeader;
|
|
6068
6762
|
/* Error checking and repcodes update */
|
|
@@ -6074,11 +6768,10 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6074
6768
|
cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
|
|
6075
6769
|
MEM_writeLE24(op, cBlockHeader);
|
|
6076
6770
|
cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
|
|
6077
|
-
DEBUGLOG(
|
|
6771
|
+
DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);
|
|
6078
6772
|
}
|
|
6079
6773
|
|
|
6080
6774
|
cSize += cBlockSize;
|
|
6081
|
-
DEBUGLOG(4, "cSize running total: %zu", cSize);
|
|
6082
6775
|
|
|
6083
6776
|
if (lastBlock) {
|
|
6084
6777
|
break;
|
|
@@ -6089,21 +6782,25 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6089
6782
|
dstCapacity -= cBlockSize;
|
|
6090
6783
|
cctx->isFirstBlock = 0;
|
|
6091
6784
|
}
|
|
6785
|
+
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
|
|
6092
6786
|
}
|
|
6093
6787
|
|
|
6788
|
+
DEBUGLOG(4, "cSize final total: %zu", cSize);
|
|
6094
6789
|
return cSize;
|
|
6095
6790
|
}
|
|
6096
6791
|
|
|
6097
|
-
size_t ZSTD_compressSequences(ZSTD_CCtx*
|
|
6792
|
+
size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|
6793
|
+
void* dst, size_t dstCapacity,
|
|
6098
6794
|
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
|
6099
|
-
const void* src, size_t srcSize)
|
|
6795
|
+
const void* src, size_t srcSize)
|
|
6796
|
+
{
|
|
6100
6797
|
BYTE* op = (BYTE*)dst;
|
|
6101
6798
|
size_t cSize = 0;
|
|
6102
6799
|
size_t compressedBlocksSize = 0;
|
|
6103
6800
|
size_t frameHeaderSize = 0;
|
|
6104
6801
|
|
|
6105
6802
|
/* Transparent initialization stage, same as compressStream2() */
|
|
6106
|
-
DEBUGLOG(
|
|
6803
|
+
DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
|
|
6107
6804
|
assert(cctx != NULL);
|
|
6108
6805
|
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
|
6109
6806
|
/* Begin writing output, starting with frame header */
|
|
@@ -6131,26 +6828,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
|
|
|
6131
6828
|
cSize += 4;
|
|
6132
6829
|
}
|
|
6133
6830
|
|
|
6134
|
-
DEBUGLOG(
|
|
6831
|
+
DEBUGLOG(4, "Final compressed size: %zu", cSize);
|
|
6135
6832
|
return cSize;
|
|
6136
6833
|
}
|
|
6137
6834
|
|
|
6138
6835
|
/*====== Finalize ======*/
|
|
6139
6836
|
|
|
6837
|
+
static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)
|
|
6838
|
+
{
|
|
6839
|
+
const ZSTD_inBuffer nullInput = { NULL, 0, 0 };
|
|
6840
|
+
const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
6841
|
+
return stableInput ? zcs->expectedInBuffer : nullInput;
|
|
6842
|
+
}
|
|
6843
|
+
|
|
6140
6844
|
/*! ZSTD_flushStream() :
|
|
6141
6845
|
* @return : amount of data remaining to flush */
|
|
6142
6846
|
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
|
6143
6847
|
{
|
|
6144
|
-
ZSTD_inBuffer input =
|
|
6848
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
|
6849
|
+
input.size = input.pos; /* do not ingest more input during flush */
|
|
6145
6850
|
return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
|
|
6146
6851
|
}
|
|
6147
6852
|
|
|
6148
6853
|
|
|
6149
6854
|
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
|
6150
6855
|
{
|
|
6151
|
-
ZSTD_inBuffer input =
|
|
6856
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
|
6152
6857
|
size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
|
|
6153
|
-
FORWARD_IF_ERROR(
|
|
6858
|
+
FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");
|
|
6154
6859
|
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
|
|
6155
6860
|
/* single thread mode : attempt to calculate remaining to flush more precisely */
|
|
6156
6861
|
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
|
|
@@ -6272,7 +6977,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
|
|
|
6272
6977
|
cp.targetLength = (unsigned)(-clampedCompressionLevel);
|
|
6273
6978
|
}
|
|
6274
6979
|
/* refine parameters based on srcSize & dictSize */
|
|
6275
|
-
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
|
|
6980
|
+
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
|
|
6276
6981
|
}
|
|
6277
6982
|
}
|
|
6278
6983
|
|
|
@@ -6307,3 +7012,21 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
|
|
|
6307
7012
|
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
|
6308
7013
|
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
|
6309
7014
|
}
|
|
7015
|
+
|
|
7016
|
+
void ZSTD_registerSequenceProducer(
|
|
7017
|
+
ZSTD_CCtx* zc, void* mState,
|
|
7018
|
+
ZSTD_sequenceProducer_F* mFinder
|
|
7019
|
+
) {
|
|
7020
|
+
if (mFinder != NULL) {
|
|
7021
|
+
ZSTD_externalMatchCtx emctx;
|
|
7022
|
+
emctx.mState = mState;
|
|
7023
|
+
emctx.mFinder = mFinder;
|
|
7024
|
+
emctx.seqBuffer = NULL;
|
|
7025
|
+
emctx.seqBufferCapacity = 0;
|
|
7026
|
+
zc->externalMatchCtx = emctx;
|
|
7027
|
+
zc->requestedParams.useSequenceProducer = 1;
|
|
7028
|
+
} else {
|
|
7029
|
+
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
|
|
7030
|
+
zc->requestedParams.useSequenceProducer = 0;
|
|
7031
|
+
}
|
|
7032
|
+
}
|