zstd-ruby 1.5.1.1 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +1 -1
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +19 -60
- data/ext/zstdruby/libzstd/common/compiler.h +26 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +1 -1
- data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
- data/ext/zstdruby/libzstd/common/error_private.c +9 -2
- data/ext/zstdruby/libzstd/common/error_private.h +1 -1
- data/ext/zstdruby/libzstd/common/fse.h +5 -83
- data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
- data/ext/zstdruby/libzstd/common/huf.h +65 -156
- data/ext/zstdruby/libzstd/common/mem.h +39 -46
- data/ext/zstdruby/libzstd/common/pool.c +37 -16
- data/ext/zstdruby/libzstd/common/pool.h +9 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +28 -3
- data/ext/zstdruby/libzstd/common/threading.c +68 -14
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
- data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -36
- data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +20 -122
- data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
- data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
- data/ext/zstdruby/libzstd/compress/hist.c +1 -1
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1317 -594
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +272 -165
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +13 -13
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +162 -82
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +434 -149
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +405 -348
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +149 -100
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -16
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -2
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +42 -37
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +205 -80
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +13 -91
- data/ext/zstdruby/libzstd/zdict.h +53 -31
- data/ext/zstdruby/libzstd/zstd.h +580 -135
- data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +113 -31
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +11 -37
- data/.github/dependabot.yml +0 -8
- data/.github/workflows/ruby.yml +0 -35
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -232
- data/ext/zstdruby/libzstd/Makefile +0 -357
- data/ext/zstdruby/libzstd/README.md +0 -217
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -167
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -63
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.mk +0 -185
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -16
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +0 -4
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -11,12 +11,12 @@
|
|
11
11
|
/*-*************************************
|
12
12
|
* Dependencies
|
13
13
|
***************************************/
|
14
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
14
15
|
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
|
15
16
|
#include "../common/mem.h"
|
16
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
17
18
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
18
19
|
#include "../common/fse.h"
|
19
|
-
#define HUF_STATIC_LINKING_ONLY
|
20
20
|
#include "../common/huf.h"
|
21
21
|
#include "zstd_compress_internal.h"
|
22
22
|
#include "zstd_compress_sequences.h"
|
@@ -27,6 +27,7 @@
|
|
27
27
|
#include "zstd_opt.h"
|
28
28
|
#include "zstd_ldm.h"
|
29
29
|
#include "zstd_compress_superblock.h"
|
30
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
|
30
31
|
|
31
32
|
/* ***************************************************************
|
32
33
|
* Tuning parameters
|
@@ -58,14 +59,17 @@
|
|
58
59
|
* Helper functions
|
59
60
|
***************************************/
|
60
61
|
/* ZSTD_compressBound()
|
61
|
-
* Note that the result from this function is only
|
62
|
-
*
|
63
|
-
* When
|
64
|
-
*
|
65
|
-
*
|
62
|
+
* Note that the result from this function is only valid for
|
63
|
+
* the one-pass compression functions.
|
64
|
+
* When employing the streaming mode,
|
65
|
+
* if flushes are frequently altering the size of blocks,
|
66
|
+
* the overhead from block headers can make the compressed data larger
|
67
|
+
* than the return value of ZSTD_compressBound().
|
66
68
|
*/
|
67
69
|
size_t ZSTD_compressBound(size_t srcSize) {
|
68
|
-
|
70
|
+
size_t const r = ZSTD_COMPRESSBOUND(srcSize);
|
71
|
+
if (r==0) return ERROR(srcSize_wrong);
|
72
|
+
return r;
|
69
73
|
}
|
70
74
|
|
71
75
|
|
@@ -177,12 +181,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
|
|
177
181
|
if (cctx==NULL) return 0; /* support free on NULL */
|
178
182
|
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
179
183
|
"not compatible with static CCtx");
|
180
|
-
{
|
181
|
-
int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
184
|
+
{ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
182
185
|
ZSTD_freeCCtxContent(cctx);
|
183
|
-
if (!cctxInWorkspace)
|
184
|
-
ZSTD_customFree(cctx, cctx->customMem);
|
185
|
-
}
|
186
|
+
if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);
|
186
187
|
}
|
187
188
|
return 0;
|
188
189
|
}
|
@@ -267,9 +268,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
|
|
267
268
|
return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
|
268
269
|
}
|
269
270
|
|
270
|
-
/* Returns
|
271
|
+
/* Returns ZSTD_ps_enable if compression parameters are such that we should
|
271
272
|
* enable long distance matching (wlog >= 27, strategy >= btopt).
|
272
|
-
* Returns
|
273
|
+
* Returns ZSTD_ps_disable otherwise.
|
273
274
|
*/
|
274
275
|
static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
275
276
|
const ZSTD_compressionParameters* const cParams) {
|
@@ -277,6 +278,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
|
277
278
|
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
278
279
|
}
|
279
280
|
|
281
|
+
static int ZSTD_resolveExternalSequenceValidation(int mode) {
|
282
|
+
return mode;
|
283
|
+
}
|
284
|
+
|
285
|
+
/* Resolves maxBlockSize to the default if no value is present. */
|
286
|
+
static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
|
287
|
+
if (maxBlockSize == 0) {
|
288
|
+
return ZSTD_BLOCKSIZE_MAX;
|
289
|
+
} else {
|
290
|
+
return maxBlockSize;
|
291
|
+
}
|
292
|
+
}
|
293
|
+
|
294
|
+
static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
|
295
|
+
if (value != ZSTD_ps_auto) return value;
|
296
|
+
if (cLevel < 10) {
|
297
|
+
return ZSTD_ps_disable;
|
298
|
+
} else {
|
299
|
+
return ZSTD_ps_enable;
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
|
304
|
+
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
|
305
|
+
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
|
306
|
+
return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
|
307
|
+
}
|
308
|
+
|
280
309
|
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
281
310
|
ZSTD_compressionParameters cParams)
|
282
311
|
{
|
@@ -294,6 +323,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
294
323
|
}
|
295
324
|
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
|
296
325
|
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
|
326
|
+
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
|
327
|
+
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
|
328
|
+
cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
|
329
|
+
cctxParams.compressionLevel);
|
297
330
|
assert(!ZSTD_checkCParams(cParams));
|
298
331
|
return cctxParams;
|
299
332
|
}
|
@@ -339,10 +372,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
|
|
339
372
|
#define ZSTD_NO_CLEVEL 0
|
340
373
|
|
341
374
|
/**
|
342
|
-
* Initializes
|
375
|
+
* Initializes `cctxParams` from `params` and `compressionLevel`.
|
343
376
|
* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
|
344
377
|
*/
|
345
|
-
static void
|
378
|
+
static void
|
379
|
+
ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
|
380
|
+
const ZSTD_parameters* params,
|
381
|
+
int compressionLevel)
|
346
382
|
{
|
347
383
|
assert(!ZSTD_checkCParams(params->cParams));
|
348
384
|
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
|
@@ -355,6 +391,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
|
|
355
391
|
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
|
356
392
|
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
|
357
393
|
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
|
394
|
+
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
|
395
|
+
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
|
396
|
+
cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
|
358
397
|
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
|
359
398
|
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
|
360
399
|
}
|
@@ -369,7 +408,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
|
|
369
408
|
|
370
409
|
/**
|
371
410
|
* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
|
372
|
-
* @param
|
411
|
+
* @param params Validated zstd parameters.
|
373
412
|
*/
|
374
413
|
static void ZSTD_CCtxParams_setZstdParams(
|
375
414
|
ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
|
@@ -478,8 +517,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
478
517
|
return bounds;
|
479
518
|
|
480
519
|
case ZSTD_c_enableLongDistanceMatching:
|
481
|
-
bounds.lowerBound =
|
482
|
-
bounds.upperBound =
|
520
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
521
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
483
522
|
return bounds;
|
484
523
|
|
485
524
|
case ZSTD_c_ldmHashLog:
|
@@ -572,6 +611,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
572
611
|
bounds.upperBound = 1;
|
573
612
|
return bounds;
|
574
613
|
|
614
|
+
case ZSTD_c_prefetchCDictTables:
|
615
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
616
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
617
|
+
return bounds;
|
618
|
+
|
619
|
+
case ZSTD_c_enableSeqProducerFallback:
|
620
|
+
bounds.lowerBound = 0;
|
621
|
+
bounds.upperBound = 1;
|
622
|
+
return bounds;
|
623
|
+
|
624
|
+
case ZSTD_c_maxBlockSize:
|
625
|
+
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
|
626
|
+
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
|
627
|
+
return bounds;
|
628
|
+
|
629
|
+
case ZSTD_c_searchForExternalRepcodes:
|
630
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
631
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
632
|
+
return bounds;
|
633
|
+
|
575
634
|
default:
|
576
635
|
bounds.error = ERROR(parameter_unsupported);
|
577
636
|
return bounds;
|
@@ -636,6 +695,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
636
695
|
case ZSTD_c_useBlockSplitter:
|
637
696
|
case ZSTD_c_useRowMatchFinder:
|
638
697
|
case ZSTD_c_deterministicRefPrefix:
|
698
|
+
case ZSTD_c_prefetchCDictTables:
|
699
|
+
case ZSTD_c_enableSeqProducerFallback:
|
700
|
+
case ZSTD_c_maxBlockSize:
|
701
|
+
case ZSTD_c_searchForExternalRepcodes:
|
639
702
|
default:
|
640
703
|
return 0;
|
641
704
|
}
|
@@ -648,7 +711,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
648
711
|
if (ZSTD_isUpdateAuthorized(param)) {
|
649
712
|
cctx->cParamsChanged = 1;
|
650
713
|
} else {
|
651
|
-
RETURN_ERROR(stage_wrong, "can only set params in
|
714
|
+
RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
|
652
715
|
} }
|
653
716
|
|
654
717
|
switch(param)
|
@@ -691,6 +754,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
691
754
|
case ZSTD_c_useBlockSplitter:
|
692
755
|
case ZSTD_c_useRowMatchFinder:
|
693
756
|
case ZSTD_c_deterministicRefPrefix:
|
757
|
+
case ZSTD_c_prefetchCDictTables:
|
758
|
+
case ZSTD_c_enableSeqProducerFallback:
|
759
|
+
case ZSTD_c_maxBlockSize:
|
760
|
+
case ZSTD_c_searchForExternalRepcodes:
|
694
761
|
break;
|
695
762
|
|
696
763
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
@@ -746,12 +813,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
746
813
|
case ZSTD_c_minMatch :
|
747
814
|
if (value!=0) /* 0 => use default */
|
748
815
|
BOUNDCHECK(ZSTD_c_minMatch, value);
|
749
|
-
CCtxParams->cParams.minMatch = value;
|
816
|
+
CCtxParams->cParams.minMatch = (U32)value;
|
750
817
|
return CCtxParams->cParams.minMatch;
|
751
818
|
|
752
819
|
case ZSTD_c_targetLength :
|
753
820
|
BOUNDCHECK(ZSTD_c_targetLength, value);
|
754
|
-
CCtxParams->cParams.targetLength = value;
|
821
|
+
CCtxParams->cParams.targetLength = (U32)value;
|
755
822
|
return CCtxParams->cParams.targetLength;
|
756
823
|
|
757
824
|
case ZSTD_c_strategy :
|
@@ -764,12 +831,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
764
831
|
/* Content size written in frame header _when known_ (default:1) */
|
765
832
|
DEBUGLOG(4, "set content size flag = %u", (value!=0));
|
766
833
|
CCtxParams->fParams.contentSizeFlag = value != 0;
|
767
|
-
return CCtxParams->fParams.contentSizeFlag;
|
834
|
+
return (size_t)CCtxParams->fParams.contentSizeFlag;
|
768
835
|
|
769
836
|
case ZSTD_c_checksumFlag :
|
770
837
|
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
|
771
838
|
CCtxParams->fParams.checksumFlag = value != 0;
|
772
|
-
return CCtxParams->fParams.checksumFlag;
|
839
|
+
return (size_t)CCtxParams->fParams.checksumFlag;
|
773
840
|
|
774
841
|
case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
|
775
842
|
DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
|
@@ -778,18 +845,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
778
845
|
|
779
846
|
case ZSTD_c_forceMaxWindow :
|
780
847
|
CCtxParams->forceWindow = (value != 0);
|
781
|
-
return CCtxParams->forceWindow;
|
848
|
+
return (size_t)CCtxParams->forceWindow;
|
782
849
|
|
783
850
|
case ZSTD_c_forceAttachDict : {
|
784
851
|
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
|
785
|
-
BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
|
852
|
+
BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
|
786
853
|
CCtxParams->attachDictPref = pref;
|
787
854
|
return CCtxParams->attachDictPref;
|
788
855
|
}
|
789
856
|
|
790
857
|
case ZSTD_c_literalCompressionMode : {
|
791
858
|
const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
|
792
|
-
BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
|
859
|
+
BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
|
793
860
|
CCtxParams->literalCompressionMode = lcm;
|
794
861
|
return CCtxParams->literalCompressionMode;
|
795
862
|
}
|
@@ -840,47 +907,48 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
840
907
|
|
841
908
|
case ZSTD_c_enableDedicatedDictSearch :
|
842
909
|
CCtxParams->enableDedicatedDictSearch = (value!=0);
|
843
|
-
return CCtxParams->enableDedicatedDictSearch;
|
910
|
+
return (size_t)CCtxParams->enableDedicatedDictSearch;
|
844
911
|
|
845
912
|
case ZSTD_c_enableLongDistanceMatching :
|
913
|
+
BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);
|
846
914
|
CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
|
847
915
|
return CCtxParams->ldmParams.enableLdm;
|
848
916
|
|
849
917
|
case ZSTD_c_ldmHashLog :
|
850
918
|
if (value!=0) /* 0 ==> auto */
|
851
919
|
BOUNDCHECK(ZSTD_c_ldmHashLog, value);
|
852
|
-
CCtxParams->ldmParams.hashLog = value;
|
920
|
+
CCtxParams->ldmParams.hashLog = (U32)value;
|
853
921
|
return CCtxParams->ldmParams.hashLog;
|
854
922
|
|
855
923
|
case ZSTD_c_ldmMinMatch :
|
856
924
|
if (value!=0) /* 0 ==> default */
|
857
925
|
BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
|
858
|
-
CCtxParams->ldmParams.minMatchLength = value;
|
926
|
+
CCtxParams->ldmParams.minMatchLength = (U32)value;
|
859
927
|
return CCtxParams->ldmParams.minMatchLength;
|
860
928
|
|
861
929
|
case ZSTD_c_ldmBucketSizeLog :
|
862
930
|
if (value!=0) /* 0 ==> default */
|
863
931
|
BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
|
864
|
-
CCtxParams->ldmParams.bucketSizeLog = value;
|
932
|
+
CCtxParams->ldmParams.bucketSizeLog = (U32)value;
|
865
933
|
return CCtxParams->ldmParams.bucketSizeLog;
|
866
934
|
|
867
935
|
case ZSTD_c_ldmHashRateLog :
|
868
936
|
if (value!=0) /* 0 ==> default */
|
869
937
|
BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
|
870
|
-
CCtxParams->ldmParams.hashRateLog = value;
|
938
|
+
CCtxParams->ldmParams.hashRateLog = (U32)value;
|
871
939
|
return CCtxParams->ldmParams.hashRateLog;
|
872
940
|
|
873
941
|
case ZSTD_c_targetCBlockSize :
|
874
942
|
if (value!=0) /* 0 ==> default */
|
875
943
|
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
|
876
|
-
CCtxParams->targetCBlockSize = value;
|
944
|
+
CCtxParams->targetCBlockSize = (U32)value;
|
877
945
|
return CCtxParams->targetCBlockSize;
|
878
946
|
|
879
947
|
case ZSTD_c_srcSizeHint :
|
880
948
|
if (value!=0) /* 0 ==> default */
|
881
949
|
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
|
882
950
|
CCtxParams->srcSizeHint = value;
|
883
|
-
return CCtxParams->srcSizeHint;
|
951
|
+
return (size_t)CCtxParams->srcSizeHint;
|
884
952
|
|
885
953
|
case ZSTD_c_stableInBuffer:
|
886
954
|
BOUNDCHECK(ZSTD_c_stableInBuffer, value);
|
@@ -917,6 +985,27 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
917
985
|
CCtxParams->deterministicRefPrefix = !!value;
|
918
986
|
return CCtxParams->deterministicRefPrefix;
|
919
987
|
|
988
|
+
case ZSTD_c_prefetchCDictTables:
|
989
|
+
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
|
990
|
+
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
|
991
|
+
return CCtxParams->prefetchCDictTables;
|
992
|
+
|
993
|
+
case ZSTD_c_enableSeqProducerFallback:
|
994
|
+
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
|
995
|
+
CCtxParams->enableMatchFinderFallback = value;
|
996
|
+
return CCtxParams->enableMatchFinderFallback;
|
997
|
+
|
998
|
+
case ZSTD_c_maxBlockSize:
|
999
|
+
if (value!=0) /* 0 ==> default */
|
1000
|
+
BOUNDCHECK(ZSTD_c_maxBlockSize, value);
|
1001
|
+
CCtxParams->maxBlockSize = value;
|
1002
|
+
return CCtxParams->maxBlockSize;
|
1003
|
+
|
1004
|
+
case ZSTD_c_searchForExternalRepcodes:
|
1005
|
+
BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
|
1006
|
+
CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
|
1007
|
+
return CCtxParams->searchForExternalRepcodes;
|
1008
|
+
|
920
1009
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
921
1010
|
}
|
922
1011
|
}
|
@@ -1049,6 +1138,18 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
1049
1138
|
case ZSTD_c_deterministicRefPrefix:
|
1050
1139
|
*value = (int)CCtxParams->deterministicRefPrefix;
|
1051
1140
|
break;
|
1141
|
+
case ZSTD_c_prefetchCDictTables:
|
1142
|
+
*value = (int)CCtxParams->prefetchCDictTables;
|
1143
|
+
break;
|
1144
|
+
case ZSTD_c_enableSeqProducerFallback:
|
1145
|
+
*value = CCtxParams->enableMatchFinderFallback;
|
1146
|
+
break;
|
1147
|
+
case ZSTD_c_maxBlockSize:
|
1148
|
+
*value = (int)CCtxParams->maxBlockSize;
|
1149
|
+
break;
|
1150
|
+
case ZSTD_c_searchForExternalRepcodes:
|
1151
|
+
*value = (int)CCtxParams->searchForExternalRepcodes;
|
1152
|
+
break;
|
1052
1153
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
1053
1154
|
}
|
1054
1155
|
return 0;
|
@@ -1075,9 +1176,47 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
|
1075
1176
|
return 0;
|
1076
1177
|
}
|
1077
1178
|
|
1179
|
+
size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
|
1180
|
+
{
|
1181
|
+
ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
|
1182
|
+
DEBUGLOG(4, "ZSTD_CCtx_setCParams");
|
1183
|
+
/* only update if all parameters are valid */
|
1184
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
|
1185
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
|
1186
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
|
1187
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
|
1188
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
|
1189
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
|
1190
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
|
1191
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
|
1192
|
+
return 0;
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
|
1196
|
+
{
|
1197
|
+
ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
|
1198
|
+
DEBUGLOG(4, "ZSTD_CCtx_setFParams");
|
1199
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
|
1200
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
|
1201
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
|
1202
|
+
return 0;
|
1203
|
+
}
|
1204
|
+
|
1205
|
+
size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
|
1206
|
+
{
|
1207
|
+
DEBUGLOG(4, "ZSTD_CCtx_setParams");
|
1208
|
+
/* First check cParams, because we want to update all or none. */
|
1209
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
|
1210
|
+
/* Next set fParams, because this could fail if the cctx isn't in init stage. */
|
1211
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
|
1212
|
+
/* Finally set cParams, which should succeed. */
|
1213
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
|
1214
|
+
return 0;
|
1215
|
+
}
|
1216
|
+
|
1078
1217
|
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
|
1079
1218
|
{
|
1080
|
-
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %
|
1219
|
+
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
|
1081
1220
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
1082
1221
|
"Can't set pledgedSrcSize when not in init stage.");
|
1083
1222
|
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
@@ -1093,9 +1232,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
|
|
1093
1232
|
ZSTD_compressionParameters* cParams);
|
1094
1233
|
|
1095
1234
|
/**
|
1096
|
-
* Initializes the local
|
1097
|
-
* NOTE:
|
1098
|
-
*
|
1235
|
+
* Initializes the local dictionary using requested parameters.
|
1236
|
+
* NOTE: Initialization does not employ the pledged src size,
|
1237
|
+
* because the dictionary may be used for multiple compressions.
|
1099
1238
|
*/
|
1100
1239
|
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
1101
1240
|
{
|
@@ -1108,8 +1247,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
1108
1247
|
return 0;
|
1109
1248
|
}
|
1110
1249
|
if (dl->cdict != NULL) {
|
1111
|
-
assert(cctx->cdict == dl->cdict);
|
1112
1250
|
/* Local dictionary already initialized. */
|
1251
|
+
assert(cctx->cdict == dl->cdict);
|
1113
1252
|
return 0;
|
1114
1253
|
}
|
1115
1254
|
assert(dl->dictSize > 0);
|
@@ -1129,26 +1268,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
|
1129
1268
|
}
|
1130
1269
|
|
1131
1270
|
size_t ZSTD_CCtx_loadDictionary_advanced(
|
1132
|
-
ZSTD_CCtx* cctx,
|
1133
|
-
|
1271
|
+
ZSTD_CCtx* cctx,
|
1272
|
+
const void* dict, size_t dictSize,
|
1273
|
+
ZSTD_dictLoadMethod_e dictLoadMethod,
|
1274
|
+
ZSTD_dictContentType_e dictContentType)
|
1134
1275
|
{
|
1135
|
-
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
1136
|
-
"Can't load a dictionary when ctx is not in init stage.");
|
1137
1276
|
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
|
1138
|
-
|
1139
|
-
|
1277
|
+
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
1278
|
+
"Can't load a dictionary when cctx is not in init stage.");
|
1279
|
+
ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */
|
1280
|
+
if (dict == NULL || dictSize == 0) /* no dictionary */
|
1140
1281
|
return 0;
|
1141
1282
|
if (dictLoadMethod == ZSTD_dlm_byRef) {
|
1142
1283
|
cctx->localDict.dict = dict;
|
1143
1284
|
} else {
|
1285
|
+
/* copy dictionary content inside CCtx to own its lifetime */
|
1144
1286
|
void* dictBuffer;
|
1145
1287
|
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
1146
|
-
"
|
1288
|
+
"static CCtx can't allocate for an internal copy of dictionary");
|
1147
1289
|
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
|
1148
|
-
RETURN_ERROR_IF(
|
1290
|
+
RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
|
1291
|
+
"allocation failed for dictionary content");
|
1149
1292
|
ZSTD_memcpy(dictBuffer, dict, dictSize);
|
1150
|
-
cctx->localDict.dictBuffer = dictBuffer;
|
1151
|
-
cctx->localDict.dict = dictBuffer;
|
1293
|
+
cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */
|
1294
|
+
cctx->localDict.dict = dictBuffer; /* read-only reference */
|
1152
1295
|
}
|
1153
1296
|
cctx->localDict.dictSize = dictSize;
|
1154
1297
|
cctx->localDict.dictContentType = dictContentType;
|
@@ -1218,8 +1361,9 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
|
1218
1361
|
if ( (reset == ZSTD_reset_parameters)
|
1219
1362
|
|| (reset == ZSTD_reset_session_and_parameters) ) {
|
1220
1363
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
1221
|
-
"
|
1364
|
+
"Reset parameters is only possible during init stage.");
|
1222
1365
|
ZSTD_clearAllDicts(cctx);
|
1366
|
+
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
|
1223
1367
|
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
|
1224
1368
|
}
|
1225
1369
|
return 0;
|
@@ -1316,7 +1460,8 @@ static ZSTD_compressionParameters
|
|
1316
1460
|
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
1317
1461
|
unsigned long long srcSize,
|
1318
1462
|
size_t dictSize,
|
1319
|
-
ZSTD_cParamMode_e mode
|
1463
|
+
ZSTD_cParamMode_e mode,
|
1464
|
+
ZSTD_paramSwitch_e useRowMatchFinder)
|
1320
1465
|
{
|
1321
1466
|
const U64 minSrcSize = 513; /* (1<<9) + 1 */
|
1322
1467
|
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
@@ -1350,8 +1495,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1350
1495
|
}
|
1351
1496
|
|
1352
1497
|
/* resize windowLog if input is small enough, to use less memory */
|
1353
|
-
if ( (srcSize
|
1354
|
-
&& (dictSize
|
1498
|
+
if ( (srcSize <= maxWindowResize)
|
1499
|
+
&& (dictSize <= maxWindowResize) ) {
|
1355
1500
|
U32 const tSize = (U32)(srcSize + dictSize);
|
1356
1501
|
static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
|
1357
1502
|
U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
|
@@ -1369,6 +1514,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1369
1514
|
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
1370
1515
|
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
|
1371
1516
|
|
1517
|
+
/* We can't use more than 32 bits of hash in total, so that means that we require:
|
1518
|
+
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
|
1519
|
+
*/
|
1520
|
+
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
|
1521
|
+
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
|
1522
|
+
if (cPar.hashLog > maxShortCacheHashLog) {
|
1523
|
+
cPar.hashLog = maxShortCacheHashLog;
|
1524
|
+
}
|
1525
|
+
if (cPar.chainLog > maxShortCacheHashLog) {
|
1526
|
+
cPar.chainLog = maxShortCacheHashLog;
|
1527
|
+
}
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
|
1531
|
+
/* At this point, we aren't 100% sure if we are using the row match finder.
|
1532
|
+
* Unless it is explicitly disabled, conservatively assume that it is enabled.
|
1533
|
+
* In this case it will only be disabled for small sources, so shrinking the
|
1534
|
+
* hash log a little bit shouldn't result in any ratio loss.
|
1535
|
+
*/
|
1536
|
+
if (useRowMatchFinder == ZSTD_ps_auto)
|
1537
|
+
useRowMatchFinder = ZSTD_ps_enable;
|
1538
|
+
|
1539
|
+
/* We can't hash more than 32-bits in total. So that means that we require:
|
1540
|
+
* (hashLog - rowLog + 8) <= 32
|
1541
|
+
*/
|
1542
|
+
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
|
1543
|
+
/* Switch to 32-entry rows if searchLog is 5 (or more) */
|
1544
|
+
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
|
1545
|
+
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
|
1546
|
+
U32 const maxHashLog = maxRowHashLog + rowLog;
|
1547
|
+
assert(cPar.hashLog >= rowLog);
|
1548
|
+
if (cPar.hashLog > maxHashLog) {
|
1549
|
+
cPar.hashLog = maxHashLog;
|
1550
|
+
}
|
1551
|
+
}
|
1552
|
+
|
1372
1553
|
return cPar;
|
1373
1554
|
}
|
1374
1555
|
|
@@ -1379,7 +1560,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
|
1379
1560
|
{
|
1380
1561
|
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
1381
1562
|
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
1382
|
-
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
|
1563
|
+
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
|
1383
1564
|
}
|
1384
1565
|
|
1385
1566
|
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
@@ -1410,7 +1591,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
1410
1591
|
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
1411
1592
|
assert(!ZSTD_checkCParams(cParams));
|
1412
1593
|
/* srcSizeHint == 0 means 0 */
|
1413
|
-
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
|
1594
|
+
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
|
1414
1595
|
}
|
1415
1596
|
|
1416
1597
|
static size_t
|
@@ -1439,7 +1620,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
1439
1620
|
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
|
1440
1621
|
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
|
1441
1622
|
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
|
1442
|
-
? ZSTD_cwksp_aligned_alloc_size(hSize
|
1623
|
+
? ZSTD_cwksp_aligned_alloc_size(hSize)
|
1443
1624
|
: 0;
|
1444
1625
|
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
|
1445
1626
|
? optPotentialSpace
|
@@ -1455,6 +1636,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
1455
1636
|
return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
|
1456
1637
|
}
|
1457
1638
|
|
1639
|
+
/* Helper function for calculating memory requirements.
|
1640
|
+
* Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
|
1641
|
+
static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
|
1642
|
+
U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
|
1643
|
+
return blockSize / divider;
|
1644
|
+
}
|
1645
|
+
|
1458
1646
|
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1459
1647
|
const ZSTD_compressionParameters* cParams,
|
1460
1648
|
const ldmParams_t* ldmParams,
|
@@ -1462,12 +1650,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1462
1650
|
const ZSTD_paramSwitch_e useRowMatchFinder,
|
1463
1651
|
const size_t buffInSize,
|
1464
1652
|
const size_t buffOutSize,
|
1465
|
-
const U64 pledgedSrcSize
|
1653
|
+
const U64 pledgedSrcSize,
|
1654
|
+
int useSequenceProducer,
|
1655
|
+
size_t maxBlockSize)
|
1466
1656
|
{
|
1467
1657
|
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
|
1468
|
-
size_t const blockSize = MIN(
|
1469
|
-
|
1470
|
-
size_t const maxNbSeq = blockSize / divider;
|
1658
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
|
1659
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
|
1471
1660
|
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
1472
1661
|
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
1473
1662
|
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
@@ -1486,6 +1675,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1486
1675
|
|
1487
1676
|
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
|
1488
1677
|
|
1678
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
1679
|
+
size_t const externalSeqSpace = useSequenceProducer
|
1680
|
+
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
|
1681
|
+
: 0;
|
1682
|
+
|
1489
1683
|
size_t const neededSpace =
|
1490
1684
|
cctxSpace +
|
1491
1685
|
entropySpace +
|
@@ -1494,7 +1688,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1494
1688
|
ldmSeqSpace +
|
1495
1689
|
matchStateSize +
|
1496
1690
|
tokenSpace +
|
1497
|
-
bufferSpace
|
1691
|
+
bufferSpace +
|
1692
|
+
externalSeqSpace;
|
1498
1693
|
|
1499
1694
|
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
|
1500
1695
|
return neededSpace;
|
@@ -1512,7 +1707,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1512
1707
|
* be needed. However, we still allocate two 0-sized buffers, which can
|
1513
1708
|
* take space under ASAN. */
|
1514
1709
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1515
|
-
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
|
1710
|
+
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
1516
1711
|
}
|
1517
1712
|
|
1518
1713
|
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
@@ -1562,7 +1757,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1562
1757
|
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
1563
1758
|
{ ZSTD_compressionParameters const cParams =
|
1564
1759
|
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1565
|
-
size_t const blockSize = MIN(
|
1760
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);
|
1566
1761
|
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
|
1567
1762
|
? ((size_t)1 << cParams.windowLog) + blockSize
|
1568
1763
|
: 0;
|
@@ -1573,7 +1768,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1573
1768
|
|
1574
1769
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1575
1770
|
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
|
1576
|
-
ZSTD_CONTENTSIZE_UNKNOWN);
|
1771
|
+
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
1577
1772
|
}
|
1578
1773
|
}
|
1579
1774
|
|
@@ -1716,6 +1911,19 @@ typedef enum {
|
|
1716
1911
|
ZSTD_resetTarget_CCtx
|
1717
1912
|
} ZSTD_resetTarget_e;
|
1718
1913
|
|
1914
|
+
/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
|
1915
|
+
static U64 ZSTD_bitmix(U64 val, U64 len) {
|
1916
|
+
val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
|
1917
|
+
val *= 0x9FB21C651E98DF25ULL;
|
1918
|
+
val ^= (val >> 35) + len ;
|
1919
|
+
val *= 0x9FB21C651E98DF25ULL;
|
1920
|
+
return val ^ (val >> 28);
|
1921
|
+
}
|
1922
|
+
|
1923
|
+
/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
|
1924
|
+
static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
|
1925
|
+
ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
|
1926
|
+
}
|
1719
1927
|
|
1720
1928
|
static size_t
|
1721
1929
|
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
@@ -1743,6 +1951,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
1743
1951
|
}
|
1744
1952
|
|
1745
1953
|
ms->hashLog3 = hashLog3;
|
1954
|
+
ms->lazySkipping = 0;
|
1746
1955
|
|
1747
1956
|
ZSTD_invalidateMatchState(ms);
|
1748
1957
|
|
@@ -1764,6 +1973,27 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
1764
1973
|
ZSTD_cwksp_clean_tables(ws);
|
1765
1974
|
}
|
1766
1975
|
|
1976
|
+
if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
|
1977
|
+
/* Row match finder needs an additional table of hashes ("tags") */
|
1978
|
+
size_t const tagTableSize = hSize;
|
1979
|
+
/* We want to generate a new salt in case we reset a Cctx, but we always want to use
|
1980
|
+
* 0 when we reset a Cdict */
|
1981
|
+
if(forWho == ZSTD_resetTarget_CCtx) {
|
1982
|
+
ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
|
1983
|
+
ZSTD_advanceHashSalt(ms);
|
1984
|
+
} else {
|
1985
|
+
/* When we are not salting we want to always memset the memory */
|
1986
|
+
ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
|
1987
|
+
ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
1988
|
+
ms->hashSalt = 0;
|
1989
|
+
}
|
1990
|
+
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
|
1991
|
+
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
|
1992
|
+
assert(cParams->hashLog >= rowLog);
|
1993
|
+
ms->rowHashLog = cParams->hashLog - rowLog;
|
1994
|
+
}
|
1995
|
+
}
|
1996
|
+
|
1767
1997
|
/* opt parser space */
|
1768
1998
|
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
|
1769
1999
|
DEBUGLOG(4, "reserving optimal parser space");
|
@@ -1775,19 +2005,6 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
1775
2005
|
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
|
1776
2006
|
}
|
1777
2007
|
|
1778
|
-
if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
|
1779
|
-
{ /* Row match finder needs an additional table of hashes ("tags") */
|
1780
|
-
size_t const tagTableSize = hSize*sizeof(U16);
|
1781
|
-
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
|
1782
|
-
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
1783
|
-
}
|
1784
|
-
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
|
1785
|
-
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
|
1786
|
-
assert(cParams->hashLog >= rowLog);
|
1787
|
-
ms->rowHashLog = cParams->hashLog - rowLog;
|
1788
|
-
}
|
1789
|
-
}
|
1790
|
-
|
1791
2008
|
ms->cParams = *cParams;
|
1792
2009
|
|
1793
2010
|
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
|
@@ -1847,6 +2064,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1847
2064
|
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
1848
2065
|
assert(params->useBlockSplitter != ZSTD_ps_auto);
|
1849
2066
|
assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
|
2067
|
+
assert(params->maxBlockSize != 0);
|
1850
2068
|
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1851
2069
|
/* Adjust long distance matching parameters */
|
1852
2070
|
ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);
|
@@ -1855,9 +2073,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1855
2073
|
}
|
1856
2074
|
|
1857
2075
|
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
|
1858
|
-
size_t const blockSize = MIN(
|
1859
|
-
|
1860
|
-
size_t const maxNbSeq = blockSize / divider;
|
2076
|
+
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
|
2077
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
|
1861
2078
|
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
|
1862
2079
|
? ZSTD_compressBound(blockSize) + 1
|
1863
2080
|
: 0;
|
@@ -1874,7 +2091,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1874
2091
|
size_t const neededSpace =
|
1875
2092
|
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1876
2093
|
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
|
1877
|
-
buffInSize, buffOutSize, pledgedSrcSize);
|
2094
|
+
buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
|
1878
2095
|
int resizeWorkspace;
|
1879
2096
|
|
1880
2097
|
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
|
@@ -1917,6 +2134,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1917
2134
|
|
1918
2135
|
/* init params */
|
1919
2136
|
zc->blockState.matchState.cParams = params->cParams;
|
2137
|
+
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
|
1920
2138
|
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
1921
2139
|
zc->consumedSrcSize = 0;
|
1922
2140
|
zc->producedCSize = 0;
|
@@ -1933,13 +2151,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1933
2151
|
|
1934
2152
|
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
|
1935
2153
|
|
2154
|
+
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
2155
|
+
&zc->blockState.matchState,
|
2156
|
+
ws,
|
2157
|
+
¶ms->cParams,
|
2158
|
+
params->useRowMatchFinder,
|
2159
|
+
crp,
|
2160
|
+
needsIndexReset,
|
2161
|
+
ZSTD_resetTarget_CCtx), "");
|
2162
|
+
|
2163
|
+
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
2164
|
+
|
2165
|
+
/* ldm hash table */
|
2166
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
2167
|
+
/* TODO: avoid memset? */
|
2168
|
+
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
|
2169
|
+
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
2170
|
+
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
|
2171
|
+
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
2172
|
+
zc->maxNbLdmSequences = maxNbLdmSeq;
|
2173
|
+
|
2174
|
+
ZSTD_window_init(&zc->ldmState.window);
|
2175
|
+
zc->ldmState.loadedDictEnd = 0;
|
2176
|
+
}
|
2177
|
+
|
2178
|
+
/* reserve space for block-level external sequences */
|
2179
|
+
if (params->useSequenceProducer) {
|
2180
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
2181
|
+
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
|
2182
|
+
zc->externalMatchCtx.seqBuffer =
|
2183
|
+
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
|
2184
|
+
}
|
2185
|
+
|
2186
|
+
/* buffers */
|
2187
|
+
|
1936
2188
|
/* ZSTD_wildcopy() is used to copy into the literals buffer,
|
1937
2189
|
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
|
1938
2190
|
*/
|
1939
2191
|
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
|
1940
2192
|
zc->seqStore.maxNbLit = blockSize;
|
1941
2193
|
|
1942
|
-
/* buffers */
|
1943
2194
|
zc->bufferedPolicy = zbuff;
|
1944
2195
|
zc->inBuffSize = buffInSize;
|
1945
2196
|
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
|
@@ -1962,32 +2213,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1962
2213
|
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
1963
2214
|
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
1964
2215
|
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
1965
|
-
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
1966
|
-
|
1967
|
-
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
1968
|
-
&zc->blockState.matchState,
|
1969
|
-
ws,
|
1970
|
-
¶ms->cParams,
|
1971
|
-
params->useRowMatchFinder,
|
1972
|
-
crp,
|
1973
|
-
needsIndexReset,
|
1974
|
-
ZSTD_resetTarget_CCtx), "");
|
1975
|
-
|
1976
|
-
/* ldm hash table */
|
1977
|
-
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1978
|
-
/* TODO: avoid memset? */
|
1979
|
-
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
|
1980
|
-
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
1981
|
-
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
|
1982
|
-
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
1983
|
-
zc->maxNbLdmSequences = maxNbLdmSeq;
|
1984
|
-
|
1985
|
-
ZSTD_window_init(&zc->ldmState.window);
|
1986
|
-
zc->ldmState.loadedDictEnd = 0;
|
1987
|
-
}
|
1988
2216
|
|
1989
2217
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
1990
|
-
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace
|
2218
|
+
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
|
1991
2219
|
|
1992
2220
|
zc->initialized = 1;
|
1993
2221
|
|
@@ -2059,7 +2287,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
2059
2287
|
}
|
2060
2288
|
|
2061
2289
|
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
|
2062
|
-
cdict->dictContentSize, ZSTD_cpm_attachDict
|
2290
|
+
cdict->dictContentSize, ZSTD_cpm_attachDict,
|
2291
|
+
params.useRowMatchFinder);
|
2063
2292
|
params.cParams.windowLog = windowLog;
|
2064
2293
|
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
|
2065
2294
|
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
|
@@ -2098,6 +2327,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
2098
2327
|
return 0;
|
2099
2328
|
}
|
2100
2329
|
|
2330
|
+
static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
|
2331
|
+
ZSTD_compressionParameters const* cParams) {
|
2332
|
+
if (ZSTD_CDictIndicesAreTagged(cParams)){
|
2333
|
+
/* Remove tags from the CDict table if they are present.
|
2334
|
+
* See docs on "short cache" in zstd_compress_internal.h for context. */
|
2335
|
+
size_t i;
|
2336
|
+
for (i = 0; i < tableSize; i++) {
|
2337
|
+
U32 const taggedIndex = src[i];
|
2338
|
+
U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
|
2339
|
+
dst[i] = index;
|
2340
|
+
}
|
2341
|
+
} else {
|
2342
|
+
ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
|
2343
|
+
}
|
2344
|
+
}
|
2345
|
+
|
2101
2346
|
static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
2102
2347
|
const ZSTD_CDict* cdict,
|
2103
2348
|
ZSTD_CCtx_params params,
|
@@ -2133,21 +2378,23 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
2133
2378
|
: 0;
|
2134
2379
|
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
|
2135
2380
|
|
2136
|
-
|
2137
|
-
|
2138
|
-
|
2381
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
|
2382
|
+
cdict->matchState.hashTable,
|
2383
|
+
hSize, cdict_cParams);
|
2384
|
+
|
2139
2385
|
/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
|
2140
2386
|
if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
|
2141
|
-
|
2142
|
-
|
2143
|
-
|
2387
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
|
2388
|
+
cdict->matchState.chainTable,
|
2389
|
+
chainSize, cdict_cParams);
|
2144
2390
|
}
|
2145
2391
|
/* copy tag table */
|
2146
2392
|
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
|
2147
|
-
size_t const tagTableSize = hSize
|
2393
|
+
size_t const tagTableSize = hSize;
|
2148
2394
|
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
|
2149
|
-
|
2150
|
-
|
2395
|
+
cdict->matchState.tagTable,
|
2396
|
+
tagTableSize);
|
2397
|
+
cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
|
2151
2398
|
}
|
2152
2399
|
}
|
2153
2400
|
|
@@ -2226,6 +2473,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
2226
2473
|
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
|
2227
2474
|
params.ldmParams = srcCCtx->appliedParams.ldmParams;
|
2228
2475
|
params.fParams = fParams;
|
2476
|
+
params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
|
2229
2477
|
ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,
|
2230
2478
|
/* loadedDictSize */ 0,
|
2231
2479
|
ZSTDcrp_leaveDirty, zbuff);
|
@@ -2385,7 +2633,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
|
|
2385
2633
|
|
2386
2634
|
/* See doc/zstd_compression_format.md for detailed format description */
|
2387
2635
|
|
2388
|
-
|
2636
|
+
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
2389
2637
|
{
|
2390
2638
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
2391
2639
|
BYTE* const llCodeTable = seqStorePtr->llCode;
|
@@ -2393,18 +2641,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
2393
2641
|
BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
2394
2642
|
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
2395
2643
|
U32 u;
|
2644
|
+
int longOffsets = 0;
|
2396
2645
|
assert(nbSeq <= seqStorePtr->maxNbSeq);
|
2397
2646
|
for (u=0; u<nbSeq; u++) {
|
2398
2647
|
U32 const llv = sequences[u].litLength;
|
2399
|
-
U32 const
|
2648
|
+
U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);
|
2649
|
+
U32 const mlv = sequences[u].mlBase;
|
2400
2650
|
llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
|
2401
|
-
ofCodeTable[u] = (BYTE)
|
2651
|
+
ofCodeTable[u] = (BYTE)ofCode;
|
2402
2652
|
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
|
2653
|
+
assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));
|
2654
|
+
if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)
|
2655
|
+
longOffsets = 1;
|
2403
2656
|
}
|
2404
2657
|
if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
|
2405
2658
|
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
|
2406
2659
|
if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
|
2407
2660
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
2661
|
+
return longOffsets;
|
2408
2662
|
}
|
2409
2663
|
|
2410
2664
|
/* ZSTD_useTargetCBlockSize():
|
@@ -2438,6 +2692,7 @@ typedef struct {
|
|
2438
2692
|
U32 MLtype;
|
2439
2693
|
size_t size;
|
2440
2694
|
size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
2695
|
+
int longOffsets;
|
2441
2696
|
} ZSTD_symbolEncodingTypeStats_t;
|
2442
2697
|
|
2443
2698
|
/* ZSTD_buildSequencesStatistics():
|
@@ -2448,11 +2703,13 @@ typedef struct {
|
|
2448
2703
|
* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
|
2449
2704
|
*/
|
2450
2705
|
static ZSTD_symbolEncodingTypeStats_t
|
2451
|
-
ZSTD_buildSequencesStatistics(
|
2452
|
-
|
2453
|
-
|
2454
|
-
|
2455
|
-
|
2706
|
+
ZSTD_buildSequencesStatistics(
|
2707
|
+
const seqStore_t* seqStorePtr, size_t nbSeq,
|
2708
|
+
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
|
2709
|
+
BYTE* dst, const BYTE* const dstEnd,
|
2710
|
+
ZSTD_strategy strategy, unsigned* countWorkspace,
|
2711
|
+
void* entropyWorkspace, size_t entropyWkspSize)
|
2712
|
+
{
|
2456
2713
|
BYTE* const ostart = dst;
|
2457
2714
|
const BYTE* const oend = dstEnd;
|
2458
2715
|
BYTE* op = ostart;
|
@@ -2466,7 +2723,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
2466
2723
|
|
2467
2724
|
stats.lastCountSize = 0;
|
2468
2725
|
/* convert length/distances into codes */
|
2469
|
-
ZSTD_seqToCodes(seqStorePtr);
|
2726
|
+
stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
|
2470
2727
|
assert(op <= oend);
|
2471
2728
|
assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
|
2472
2729
|
/* build CTable for Literal Lengths */
|
@@ -2571,22 +2828,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
2571
2828
|
*/
|
2572
2829
|
#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
|
2573
2830
|
MEM_STATIC size_t
|
2574
|
-
ZSTD_entropyCompressSeqStore_internal(
|
2575
|
-
|
2576
|
-
|
2577
|
-
|
2578
|
-
|
2579
|
-
|
2580
|
-
|
2831
|
+
ZSTD_entropyCompressSeqStore_internal(
|
2832
|
+
const seqStore_t* seqStorePtr,
|
2833
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
2834
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
2835
|
+
const ZSTD_CCtx_params* cctxParams,
|
2836
|
+
void* dst, size_t dstCapacity,
|
2837
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
2838
|
+
const int bmi2)
|
2581
2839
|
{
|
2582
|
-
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
2583
2840
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
2584
2841
|
unsigned* count = (unsigned*)entropyWorkspace;
|
2585
2842
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
2586
2843
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
2587
2844
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
2588
2845
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
2589
|
-
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
2846
|
+
const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
2590
2847
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
2591
2848
|
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
2592
2849
|
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
@@ -2594,29 +2851,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
2594
2851
|
BYTE* const oend = ostart + dstCapacity;
|
2595
2852
|
BYTE* op = ostart;
|
2596
2853
|
size_t lastCountSize;
|
2854
|
+
int longOffsets = 0;
|
2597
2855
|
|
2598
2856
|
entropyWorkspace = count + (MaxSeq + 1);
|
2599
2857
|
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
2600
2858
|
|
2601
|
-
DEBUGLOG(
|
2859
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
|
2602
2860
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
2603
2861
|
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
2604
2862
|
|
2605
2863
|
/* Compress literals */
|
2606
2864
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
2607
|
-
size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
2608
|
-
size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
|
2865
|
+
size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
2866
|
+
size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
2609
2867
|
/* Base suspicion of uncompressibility on ratio of literals to sequences */
|
2610
2868
|
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
|
2611
2869
|
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
|
2870
|
+
|
2612
2871
|
size_t const cSize = ZSTD_compressLiterals(
|
2613
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
2614
|
-
cctxParams->cParams.strategy,
|
2615
|
-
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
2616
2872
|
op, dstCapacity,
|
2617
2873
|
literals, litSize,
|
2618
2874
|
entropyWorkspace, entropyWkspSize,
|
2619
|
-
|
2875
|
+
&prevEntropy->huf, &nextEntropy->huf,
|
2876
|
+
cctxParams->cParams.strategy,
|
2877
|
+
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
2878
|
+
suspectUncompressible, bmi2);
|
2620
2879
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
2621
2880
|
assert(cSize <= dstCapacity);
|
2622
2881
|
op += cSize;
|
@@ -2642,11 +2901,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
2642
2901
|
ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
2643
2902
|
return (size_t)(op - ostart);
|
2644
2903
|
}
|
2645
|
-
{
|
2646
|
-
ZSTD_symbolEncodingTypeStats_t stats;
|
2647
|
-
BYTE* seqHead = op++;
|
2904
|
+
{ BYTE* const seqHead = op++;
|
2648
2905
|
/* build stats for sequences */
|
2649
|
-
stats =
|
2906
|
+
const ZSTD_symbolEncodingTypeStats_t stats =
|
2907
|
+
ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
|
2650
2908
|
&prevEntropy->fse, &nextEntropy->fse,
|
2651
2909
|
op, oend,
|
2652
2910
|
strategy, count,
|
@@ -2655,6 +2913,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
2655
2913
|
*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
|
2656
2914
|
lastCountSize = stats.lastCountSize;
|
2657
2915
|
op += stats.size;
|
2916
|
+
longOffsets = stats.longOffsets;
|
2658
2917
|
}
|
2659
2918
|
|
2660
2919
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
@@ -2689,14 +2948,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
2689
2948
|
}
|
2690
2949
|
|
2691
2950
|
MEM_STATIC size_t
|
2692
|
-
ZSTD_entropyCompressSeqStore(
|
2693
|
-
|
2694
|
-
|
2695
|
-
|
2696
|
-
|
2697
|
-
|
2698
|
-
|
2699
|
-
|
2951
|
+
ZSTD_entropyCompressSeqStore(
|
2952
|
+
const seqStore_t* seqStorePtr,
|
2953
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
2954
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
2955
|
+
const ZSTD_CCtx_params* cctxParams,
|
2956
|
+
void* dst, size_t dstCapacity,
|
2957
|
+
size_t srcSize,
|
2958
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
2959
|
+
int bmi2)
|
2700
2960
|
{
|
2701
2961
|
size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
|
2702
2962
|
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
|
@@ -2706,15 +2966,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
|
|
2706
2966
|
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
|
2707
2967
|
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
|
2708
2968
|
*/
|
2709
|
-
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
|
2969
|
+
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {
|
2970
|
+
DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
|
2710
2971
|
return 0; /* block not compressed */
|
2972
|
+
}
|
2711
2973
|
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
|
2712
2974
|
|
2713
2975
|
/* Check compressibility */
|
2714
2976
|
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
|
2715
2977
|
if (cSize >= maxCSize) return 0; /* block not compressed */
|
2716
2978
|
}
|
2717
|
-
DEBUGLOG(
|
2979
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
|
2980
|
+
/* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
|
2981
|
+
* This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
|
2982
|
+
*/
|
2983
|
+
assert(cSize < ZSTD_BLOCKSIZE_MAX);
|
2718
2984
|
return cSize;
|
2719
2985
|
}
|
2720
2986
|
|
@@ -2809,6 +3075,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
|
2809
3075
|
ssPtr->longLengthType = ZSTD_llt_none;
|
2810
3076
|
}
|
2811
3077
|
|
3078
|
+
/* ZSTD_postProcessSequenceProducerResult() :
|
3079
|
+
* Validates and post-processes sequences obtained through the external matchfinder API:
|
3080
|
+
* - Checks whether nbExternalSeqs represents an error condition.
|
3081
|
+
* - Appends a block delimiter to outSeqs if one is not already present.
|
3082
|
+
* See zstd.h for context regarding block delimiters.
|
3083
|
+
* Returns the number of sequences after post-processing, or an error code. */
|
3084
|
+
static size_t ZSTD_postProcessSequenceProducerResult(
|
3085
|
+
ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
|
3086
|
+
) {
|
3087
|
+
RETURN_ERROR_IF(
|
3088
|
+
nbExternalSeqs > outSeqsCapacity,
|
3089
|
+
sequenceProducer_failed,
|
3090
|
+
"External sequence producer returned error code %lu",
|
3091
|
+
(unsigned long)nbExternalSeqs
|
3092
|
+
);
|
3093
|
+
|
3094
|
+
RETURN_ERROR_IF(
|
3095
|
+
nbExternalSeqs == 0 && srcSize > 0,
|
3096
|
+
sequenceProducer_failed,
|
3097
|
+
"Got zero sequences from external sequence producer for a non-empty src buffer!"
|
3098
|
+
);
|
3099
|
+
|
3100
|
+
if (srcSize == 0) {
|
3101
|
+
ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
|
3102
|
+
return 1;
|
3103
|
+
}
|
3104
|
+
|
3105
|
+
{
|
3106
|
+
ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
|
3107
|
+
|
3108
|
+
/* We can return early if lastSeq is already a block delimiter. */
|
3109
|
+
if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
|
3110
|
+
return nbExternalSeqs;
|
3111
|
+
}
|
3112
|
+
|
3113
|
+
/* This error condition is only possible if the external matchfinder
|
3114
|
+
* produced an invalid parse, by definition of ZSTD_sequenceBound(). */
|
3115
|
+
RETURN_ERROR_IF(
|
3116
|
+
nbExternalSeqs == outSeqsCapacity,
|
3117
|
+
sequenceProducer_failed,
|
3118
|
+
"nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
|
3119
|
+
);
|
3120
|
+
|
3121
|
+
/* lastSeq is not a block delimiter, so we need to append one. */
|
3122
|
+
ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
|
3123
|
+
return nbExternalSeqs + 1;
|
3124
|
+
}
|
3125
|
+
}
|
3126
|
+
|
3127
|
+
/* ZSTD_fastSequenceLengthSum() :
|
3128
|
+
* Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
|
3129
|
+
* Similar to another function in zstd_compress.c (determine_blockSize),
|
3130
|
+
* except it doesn't check for a block delimiter to end summation.
|
3131
|
+
* Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
|
3132
|
+
* This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
|
3133
|
+
static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
|
3134
|
+
size_t matchLenSum, litLenSum, i;
|
3135
|
+
matchLenSum = 0;
|
3136
|
+
litLenSum = 0;
|
3137
|
+
for (i = 0; i < seqBufSize; i++) {
|
3138
|
+
litLenSum += seqBuf[i].litLength;
|
3139
|
+
matchLenSum += seqBuf[i].matchLength;
|
3140
|
+
}
|
3141
|
+
return litLenSum + matchLenSum;
|
3142
|
+
}
|
3143
|
+
|
2812
3144
|
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
|
2813
3145
|
|
2814
3146
|
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
@@ -2818,7 +3150,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2818
3150
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
2819
3151
|
/* Assert that we have correctly flushed the ctx params into the ms's copy */
|
2820
3152
|
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
|
2821
|
-
|
3153
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
3154
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
3155
|
+
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
2822
3156
|
if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
|
2823
3157
|
ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
|
2824
3158
|
} else {
|
@@ -2854,6 +3188,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2854
3188
|
}
|
2855
3189
|
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
|
2856
3190
|
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
|
3191
|
+
|
3192
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
3193
|
+
* We need to revisit soon and implement it. */
|
3194
|
+
RETURN_ERROR_IF(
|
3195
|
+
zc->appliedParams.useSequenceProducer,
|
3196
|
+
parameter_combination_unsupported,
|
3197
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
3198
|
+
);
|
3199
|
+
|
2857
3200
|
/* Updates ldmSeqStore.pos */
|
2858
3201
|
lastLLSize =
|
2859
3202
|
ZSTD_ldm_blockCompress(&zc->externSeqStore,
|
@@ -2865,6 +3208,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2865
3208
|
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
2866
3209
|
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
|
2867
3210
|
|
3211
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
3212
|
+
* We need to revisit soon and implement it. */
|
3213
|
+
RETURN_ERROR_IF(
|
3214
|
+
zc->appliedParams.useSequenceProducer,
|
3215
|
+
parameter_combination_unsupported,
|
3216
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
3217
|
+
);
|
3218
|
+
|
2868
3219
|
ldmSeqStore.seq = zc->ldmSequences;
|
2869
3220
|
ldmSeqStore.capacity = zc->maxNbLdmSequences;
|
2870
3221
|
/* Updates ldmSeqStore.size */
|
@@ -2879,7 +3230,68 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2879
3230
|
zc->appliedParams.useRowMatchFinder,
|
2880
3231
|
src, srcSize);
|
2881
3232
|
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
2882
|
-
} else
|
3233
|
+
} else if (zc->appliedParams.useSequenceProducer) {
|
3234
|
+
assert(
|
3235
|
+
zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
|
3236
|
+
);
|
3237
|
+
assert(zc->externalMatchCtx.mFinder != NULL);
|
3238
|
+
|
3239
|
+
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
|
3240
|
+
|
3241
|
+
size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
|
3242
|
+
zc->externalMatchCtx.mState,
|
3243
|
+
zc->externalMatchCtx.seqBuffer,
|
3244
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
3245
|
+
src, srcSize,
|
3246
|
+
NULL, 0, /* dict and dictSize, currently not supported */
|
3247
|
+
zc->appliedParams.compressionLevel,
|
3248
|
+
windowSize
|
3249
|
+
);
|
3250
|
+
|
3251
|
+
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
|
3252
|
+
zc->externalMatchCtx.seqBuffer,
|
3253
|
+
nbExternalSeqs,
|
3254
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
3255
|
+
srcSize
|
3256
|
+
);
|
3257
|
+
|
3258
|
+
/* Return early if there is no error, since we don't need to worry about last literals */
|
3259
|
+
if (!ZSTD_isError(nbPostProcessedSeqs)) {
|
3260
|
+
ZSTD_sequencePosition seqPos = {0,0,0};
|
3261
|
+
size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
|
3262
|
+
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
|
3263
|
+
FORWARD_IF_ERROR(
|
3264
|
+
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
|
3265
|
+
zc, &seqPos,
|
3266
|
+
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
|
3267
|
+
src, srcSize,
|
3268
|
+
zc->appliedParams.searchForExternalRepcodes
|
3269
|
+
),
|
3270
|
+
"Failed to copy external sequences to seqStore!"
|
3271
|
+
);
|
3272
|
+
ms->ldmSeqStore = NULL;
|
3273
|
+
DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
|
3274
|
+
return ZSTDbss_compress;
|
3275
|
+
}
|
3276
|
+
|
3277
|
+
/* Propagate the error if fallback is disabled */
|
3278
|
+
if (!zc->appliedParams.enableMatchFinderFallback) {
|
3279
|
+
return nbPostProcessedSeqs;
|
3280
|
+
}
|
3281
|
+
|
3282
|
+
/* Fallback to software matchfinder */
|
3283
|
+
{ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
3284
|
+
zc->appliedParams.useRowMatchFinder,
|
3285
|
+
dictMode);
|
3286
|
+
ms->ldmSeqStore = NULL;
|
3287
|
+
DEBUGLOG(
|
3288
|
+
5,
|
3289
|
+
"External sequence producer returned error code %lu. Falling back to internal parser.",
|
3290
|
+
(unsigned long)nbExternalSeqs
|
3291
|
+
);
|
3292
|
+
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
|
3293
|
+
} }
|
3294
|
+
} else { /* not long range mode and no external matchfinder */
|
2883
3295
|
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
2884
3296
|
zc->appliedParams.useRowMatchFinder,
|
2885
3297
|
dictMode);
|
@@ -2910,9 +3322,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
2910
3322
|
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
|
2911
3323
|
ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
2912
3324
|
for (i = 0; i < seqStoreSeqSize; ++i) {
|
2913
|
-
U32 rawOffset = seqStoreSeqs[i].
|
3325
|
+
U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
|
2914
3326
|
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
2915
|
-
outSeqs[i].matchLength = seqStoreSeqs[i].
|
3327
|
+
outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
|
2916
3328
|
outSeqs[i].rep = 0;
|
2917
3329
|
|
2918
3330
|
if (i == seqStore->longLengthPos) {
|
@@ -2923,9 +3335,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
2923
3335
|
}
|
2924
3336
|
}
|
2925
3337
|
|
2926
|
-
if (seqStoreSeqs[i].
|
3338
|
+
if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
|
2927
3339
|
/* Derive the correct offset corresponding to a repcode */
|
2928
|
-
outSeqs[i].rep = seqStoreSeqs[i].
|
3340
|
+
outSeqs[i].rep = seqStoreSeqs[i].offBase;
|
2929
3341
|
if (outSeqs[i].litLength != 0) {
|
2930
3342
|
rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
|
2931
3343
|
} else {
|
@@ -2939,9 +3351,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
2939
3351
|
outSeqs[i].offset = rawOffset;
|
2940
3352
|
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
|
2941
3353
|
so we provide seqStoreSeqs[i].offset - 1 */
|
2942
|
-
|
2943
|
-
|
2944
|
-
|
3354
|
+
ZSTD_updateRep(updatedRepcodes.rep,
|
3355
|
+
seqStoreSeqs[i].offBase,
|
3356
|
+
seqStoreSeqs[i].litLength == 0);
|
2945
3357
|
literalsRead += outSeqs[i].litLength;
|
2946
3358
|
}
|
2947
3359
|
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
|
@@ -2956,6 +3368,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
2956
3368
|
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
2957
3369
|
}
|
2958
3370
|
|
3371
|
+
size_t ZSTD_sequenceBound(size_t srcSize) {
|
3372
|
+
return (srcSize / ZSTD_MINMATCH_MIN) + 1;
|
3373
|
+
}
|
3374
|
+
|
2959
3375
|
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
2960
3376
|
size_t outSeqsSize, const void* src, size_t srcSize)
|
2961
3377
|
{
|
@@ -3001,19 +3417,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) {
|
|
3001
3417
|
const size_t unrollMask = unrollSize - 1;
|
3002
3418
|
const size_t prefixLength = length & unrollMask;
|
3003
3419
|
size_t i;
|
3004
|
-
size_t u;
|
3005
3420
|
if (length == 1) return 1;
|
3006
3421
|
/* Check if prefix is RLE first before using unrolled loop */
|
3007
3422
|
if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
|
3008
3423
|
return 0;
|
3009
3424
|
}
|
3010
3425
|
for (i = prefixLength; i != length; i += unrollSize) {
|
3426
|
+
size_t u;
|
3011
3427
|
for (u = 0; u < unrollSize; u += sizeof(size_t)) {
|
3012
3428
|
if (MEM_readST(ip + i + u) != valueST) {
|
3013
3429
|
return 0;
|
3014
|
-
|
3015
|
-
}
|
3016
|
-
}
|
3430
|
+
} } }
|
3017
3431
|
return 1;
|
3018
3432
|
}
|
3019
3433
|
|
@@ -3029,7 +3443,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
|
|
3029
3443
|
return nbSeqs < 4 && nbLits < 10;
|
3030
3444
|
}
|
3031
3445
|
|
3032
|
-
static void
|
3446
|
+
static void
|
3447
|
+
ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
|
3033
3448
|
{
|
3034
3449
|
ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
|
3035
3450
|
bs->prevCBlock = bs->nextCBlock;
|
@@ -3037,7 +3452,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c
|
|
3037
3452
|
}
|
3038
3453
|
|
3039
3454
|
/* Writes the block header */
|
3040
|
-
static void
|
3455
|
+
static void
|
3456
|
+
writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
|
3457
|
+
{
|
3041
3458
|
U32 const cBlockHeader = cSize == 1 ?
|
3042
3459
|
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
3043
3460
|
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
@@ -3050,13 +3467,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
|
|
3050
3467
|
* Stores literals block type (raw, rle, compressed, repeat) and
|
3051
3468
|
* huffman description table to hufMetadata.
|
3052
3469
|
* Requires ENTROPY_WORKSPACE_SIZE workspace
|
3053
|
-
*
|
3054
|
-
|
3055
|
-
|
3056
|
-
|
3057
|
-
|
3058
|
-
|
3059
|
-
|
3470
|
+
* @return : size of huffman description table, or an error code
|
3471
|
+
*/
|
3472
|
+
static size_t
|
3473
|
+
ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
|
3474
|
+
const ZSTD_hufCTables_t* prevHuf,
|
3475
|
+
ZSTD_hufCTables_t* nextHuf,
|
3476
|
+
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
3477
|
+
const int literalsCompressionIsDisabled,
|
3478
|
+
void* workspace, size_t wkspSize,
|
3479
|
+
int hufFlags)
|
3060
3480
|
{
|
3061
3481
|
BYTE* const wkspStart = (BYTE*)workspace;
|
3062
3482
|
BYTE* const wkspEnd = wkspStart + wkspSize;
|
@@ -3064,9 +3484,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
3064
3484
|
unsigned* const countWksp = (unsigned*)workspace;
|
3065
3485
|
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
3066
3486
|
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
3067
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
3487
|
+
const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);
|
3068
3488
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
3069
|
-
unsigned huffLog =
|
3489
|
+
unsigned huffLog = LitHufLog;
|
3070
3490
|
HUF_repeat repeat = prevHuf->repeatMode;
|
3071
3491
|
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
|
3072
3492
|
|
@@ -3081,73 +3501,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
3081
3501
|
|
3082
3502
|
/* small ? don't even attempt compression (speed opt) */
|
3083
3503
|
#ifndef COMPRESS_LITERALS_SIZE_MIN
|
3084
|
-
#define COMPRESS_LITERALS_SIZE_MIN 63
|
3504
|
+
# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */
|
3085
3505
|
#endif
|
3086
3506
|
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
3087
3507
|
if (srcSize <= minLitSize) {
|
3088
3508
|
DEBUGLOG(5, "set_basic - too small");
|
3089
3509
|
hufMetadata->hType = set_basic;
|
3090
3510
|
return 0;
|
3091
|
-
|
3092
|
-
}
|
3511
|
+
} }
|
3093
3512
|
|
3094
3513
|
/* Scan input and build symbol stats */
|
3095
|
-
{ size_t const largest =
|
3514
|
+
{ size_t const largest =
|
3515
|
+
HIST_count_wksp (countWksp, &maxSymbolValue,
|
3516
|
+
(const BYTE*)src, srcSize,
|
3517
|
+
workspace, wkspSize);
|
3096
3518
|
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
3097
3519
|
if (largest == srcSize) {
|
3520
|
+
/* only one literal symbol */
|
3098
3521
|
DEBUGLOG(5, "set_rle");
|
3099
3522
|
hufMetadata->hType = set_rle;
|
3100
3523
|
return 0;
|
3101
3524
|
}
|
3102
3525
|
if (largest <= (srcSize >> 7)+4) {
|
3526
|
+
/* heuristic: likely not compressible */
|
3103
3527
|
DEBUGLOG(5, "set_basic - no gain");
|
3104
3528
|
hufMetadata->hType = set_basic;
|
3105
3529
|
return 0;
|
3106
|
-
|
3107
|
-
}
|
3530
|
+
} }
|
3108
3531
|
|
3109
3532
|
/* Validate the previous Huffman table */
|
3110
|
-
if (repeat == HUF_repeat_check
|
3533
|
+
if (repeat == HUF_repeat_check
|
3534
|
+
&& !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
3111
3535
|
repeat = HUF_repeat_none;
|
3112
3536
|
}
|
3113
3537
|
|
3114
3538
|
/* Build Huffman Tree */
|
3115
3539
|
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
3116
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
3540
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);
|
3541
|
+
assert(huffLog <= LitHufLog);
|
3117
3542
|
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
3118
3543
|
maxSymbolValue, huffLog,
|
3119
3544
|
nodeWksp, nodeWkspSize);
|
3120
3545
|
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
3121
3546
|
huffLog = (U32)maxBits;
|
3122
|
-
|
3123
|
-
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3127
|
-
|
3128
|
-
|
3129
|
-
|
3130
|
-
|
3131
|
-
|
3132
|
-
|
3133
|
-
|
3134
|
-
|
3135
|
-
|
3136
|
-
hufMetadata->hType = set_repeat;
|
3137
|
-
return 0;
|
3138
|
-
}
|
3139
|
-
}
|
3140
|
-
if (newCSize + hSize >= srcSize) {
|
3141
|
-
DEBUGLOG(5, "set_basic - no gains");
|
3547
|
+
}
|
3548
|
+
{ /* Build and write the CTable */
|
3549
|
+
size_t const newCSize = HUF_estimateCompressedSize(
|
3550
|
+
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
3551
|
+
size_t const hSize = HUF_writeCTable_wksp(
|
3552
|
+
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
3553
|
+
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
|
3554
|
+
nodeWksp, nodeWkspSize);
|
3555
|
+
/* Check against repeating the previous CTable */
|
3556
|
+
if (repeat != HUF_repeat_none) {
|
3557
|
+
size_t const oldCSize = HUF_estimateCompressedSize(
|
3558
|
+
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
3559
|
+
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
3560
|
+
DEBUGLOG(5, "set_repeat - smaller");
|
3142
3561
|
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
3143
|
-
hufMetadata->hType =
|
3562
|
+
hufMetadata->hType = set_repeat;
|
3144
3563
|
return 0;
|
3145
|
-
|
3146
|
-
|
3147
|
-
|
3148
|
-
nextHuf
|
3149
|
-
|
3564
|
+
} }
|
3565
|
+
if (newCSize + hSize >= srcSize) {
|
3566
|
+
DEBUGLOG(5, "set_basic - no gains");
|
3567
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
3568
|
+
hufMetadata->hType = set_basic;
|
3569
|
+
return 0;
|
3150
3570
|
}
|
3571
|
+
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
3572
|
+
hufMetadata->hType = set_compressed;
|
3573
|
+
nextHuf->repeatMode = HUF_repeat_check;
|
3574
|
+
return hSize;
|
3151
3575
|
}
|
3152
3576
|
}
|
3153
3577
|
|
@@ -3157,8 +3581,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
3157
3581
|
* and updates nextEntropy to the appropriate repeatMode.
|
3158
3582
|
*/
|
3159
3583
|
static ZSTD_symbolEncodingTypeStats_t
|
3160
|
-
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
3161
|
-
|
3584
|
+
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
3585
|
+
{
|
3586
|
+
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
|
3162
3587
|
nextEntropy->litlength_repeatMode = FSE_repeat_none;
|
3163
3588
|
nextEntropy->offcode_repeatMode = FSE_repeat_none;
|
3164
3589
|
nextEntropy->matchlength_repeatMode = FSE_repeat_none;
|
@@ -3169,16 +3594,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
|
|
3169
3594
|
* Builds entropy for the sequences.
|
3170
3595
|
* Stores symbol compression modes and fse table to fseMetadata.
|
3171
3596
|
* Requires ENTROPY_WORKSPACE_SIZE wksp.
|
3172
|
-
*
|
3173
|
-
static size_t
|
3174
|
-
|
3175
|
-
|
3176
|
-
|
3177
|
-
|
3178
|
-
|
3597
|
+
* @return : size of fse tables or error code */
|
3598
|
+
static size_t
|
3599
|
+
ZSTD_buildBlockEntropyStats_sequences(
|
3600
|
+
const seqStore_t* seqStorePtr,
|
3601
|
+
const ZSTD_fseCTables_t* prevEntropy,
|
3602
|
+
ZSTD_fseCTables_t* nextEntropy,
|
3603
|
+
const ZSTD_CCtx_params* cctxParams,
|
3604
|
+
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
3605
|
+
void* workspace, size_t wkspSize)
|
3179
3606
|
{
|
3180
3607
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
3181
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
3608
|
+
size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
3182
3609
|
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
3183
3610
|
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
3184
3611
|
BYTE* op = ostart;
|
@@ -3205,23 +3632,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
|
3205
3632
|
/** ZSTD_buildBlockEntropyStats() :
|
3206
3633
|
* Builds entropy for the block.
|
3207
3634
|
* Requires workspace size ENTROPY_WORKSPACE_SIZE
|
3208
|
-
*
|
3209
|
-
*
|
3635
|
+
* @return : 0 on success, or an error code
|
3636
|
+
* Note : also employed in superblock
|
3210
3637
|
*/
|
3211
|
-
size_t ZSTD_buildBlockEntropyStats(
|
3212
|
-
|
3213
|
-
|
3214
|
-
|
3215
|
-
|
3216
|
-
|
3217
|
-
|
3218
|
-
|
3638
|
+
size_t ZSTD_buildBlockEntropyStats(
|
3639
|
+
const seqStore_t* seqStorePtr,
|
3640
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
3641
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
3642
|
+
const ZSTD_CCtx_params* cctxParams,
|
3643
|
+
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
3644
|
+
void* workspace, size_t wkspSize)
|
3645
|
+
{
|
3646
|
+
size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
3647
|
+
int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
|
3648
|
+
int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;
|
3649
|
+
|
3219
3650
|
entropyMetadata->hufMetadata.hufDesSize =
|
3220
3651
|
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
|
3221
3652
|
&prevEntropy->huf, &nextEntropy->huf,
|
3222
3653
|
&entropyMetadata->hufMetadata,
|
3223
3654
|
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
3224
|
-
workspace, wkspSize);
|
3655
|
+
workspace, wkspSize, hufFlags);
|
3656
|
+
|
3225
3657
|
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
|
3226
3658
|
entropyMetadata->fseMetadata.fseTablesSize =
|
3227
3659
|
ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
|
@@ -3234,11 +3666,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|
3234
3666
|
}
|
3235
3667
|
|
3236
3668
|
/* Returns the size estimate for the literals section (header + content) of a block */
|
3237
|
-
static size_t
|
3238
|
-
|
3239
|
-
|
3240
|
-
|
3241
|
-
|
3669
|
+
static size_t
|
3670
|
+
ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
|
3671
|
+
const ZSTD_hufCTables_t* huf,
|
3672
|
+
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
3673
|
+
void* workspace, size_t wkspSize,
|
3674
|
+
int writeEntropy)
|
3242
3675
|
{
|
3243
3676
|
unsigned* const countWksp = (unsigned*)workspace;
|
3244
3677
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
@@ -3260,12 +3693,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
|
|
3260
3693
|
}
|
3261
3694
|
|
3262
3695
|
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
|
3263
|
-
static size_t
|
3264
|
-
|
3265
|
-
|
3266
|
-
|
3267
|
-
|
3268
|
-
|
3696
|
+
static size_t
|
3697
|
+
ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
3698
|
+
const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
|
3699
|
+
const FSE_CTable* fseCTable,
|
3700
|
+
const U8* additionalBits,
|
3701
|
+
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
3702
|
+
void* workspace, size_t wkspSize)
|
3269
3703
|
{
|
3270
3704
|
unsigned* const countWksp = (unsigned*)workspace;
|
3271
3705
|
const BYTE* ctp = codeTable;
|
@@ -3297,99 +3731,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
|
3297
3731
|
}
|
3298
3732
|
|
3299
3733
|
/* Returns the size estimate for the sequences section (header + content) of a block */
|
3300
|
-
static size_t
|
3301
|
-
|
3302
|
-
|
3303
|
-
|
3304
|
-
|
3305
|
-
|
3306
|
-
|
3307
|
-
|
3734
|
+
static size_t
|
3735
|
+
ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
|
3736
|
+
const BYTE* llCodeTable,
|
3737
|
+
const BYTE* mlCodeTable,
|
3738
|
+
size_t nbSeq,
|
3739
|
+
const ZSTD_fseCTables_t* fseTables,
|
3740
|
+
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
3741
|
+
void* workspace, size_t wkspSize,
|
3742
|
+
int writeEntropy)
|
3308
3743
|
{
|
3309
3744
|
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
|
3310
3745
|
size_t cSeqSizeEstimate = 0;
|
3311
3746
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
|
3312
|
-
|
3313
|
-
|
3314
|
-
|
3747
|
+
fseTables->offcodeCTable, NULL,
|
3748
|
+
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
3749
|
+
workspace, wkspSize);
|
3315
3750
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
|
3316
|
-
|
3317
|
-
|
3318
|
-
|
3751
|
+
fseTables->litlengthCTable, LL_bits,
|
3752
|
+
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
3753
|
+
workspace, wkspSize);
|
3319
3754
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
|
3320
|
-
|
3321
|
-
|
3322
|
-
|
3755
|
+
fseTables->matchlengthCTable, ML_bits,
|
3756
|
+
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
3757
|
+
workspace, wkspSize);
|
3323
3758
|
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
3324
3759
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
3325
3760
|
}
|
3326
3761
|
|
3327
3762
|
/* Returns the size estimate for a given stream of literals, of, ll, ml */
|
3328
|
-
static size_t
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
|
3335
|
-
|
3336
|
-
|
3763
|
+
static size_t
|
3764
|
+
ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
|
3765
|
+
const BYTE* ofCodeTable,
|
3766
|
+
const BYTE* llCodeTable,
|
3767
|
+
const BYTE* mlCodeTable,
|
3768
|
+
size_t nbSeq,
|
3769
|
+
const ZSTD_entropyCTables_t* entropy,
|
3770
|
+
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
3771
|
+
void* workspace, size_t wkspSize,
|
3772
|
+
int writeLitEntropy, int writeSeqEntropy)
|
3773
|
+
{
|
3337
3774
|
size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
|
3338
|
-
|
3339
|
-
|
3775
|
+
&entropy->huf, &entropyMetadata->hufMetadata,
|
3776
|
+
workspace, wkspSize, writeLitEntropy);
|
3340
3777
|
size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
3341
|
-
|
3342
|
-
|
3778
|
+
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
3779
|
+
workspace, wkspSize, writeSeqEntropy);
|
3343
3780
|
return seqSize + literalsSize + ZSTD_blockHeaderSize;
|
3344
3781
|
}
|
3345
3782
|
|
3346
3783
|
/* Builds entropy statistics and uses them for blocksize estimation.
|
3347
3784
|
*
|
3348
|
-
*
|
3785
|
+
* @return: estimated compressed size of the seqStore, or a zstd error.
|
3349
3786
|
*/
|
3350
|
-
static size_t
|
3351
|
-
|
3787
|
+
static size_t
|
3788
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
|
3789
|
+
{
|
3790
|
+
ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
|
3352
3791
|
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
3353
3792
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
3354
3793
|
&zc->blockState.prevCBlock->entropy,
|
3355
3794
|
&zc->blockState.nextCBlock->entropy,
|
3356
3795
|
&zc->appliedParams,
|
3357
3796
|
entropyMetadata,
|
3358
|
-
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE
|
3359
|
-
return ZSTD_estimateBlockSize(
|
3797
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
|
3798
|
+
return ZSTD_estimateBlockSize(
|
3799
|
+
seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
3360
3800
|
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
3361
3801
|
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
3362
|
-
&zc->blockState.nextCBlock->entropy,
|
3802
|
+
&zc->blockState.nextCBlock->entropy,
|
3803
|
+
entropyMetadata,
|
3804
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
3363
3805
|
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
|
3364
3806
|
}
|
3365
3807
|
|
3366
3808
|
/* Returns literals bytes represented in a seqStore */
|
3367
|
-
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
3809
|
+
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
3810
|
+
{
|
3368
3811
|
size_t literalsBytes = 0;
|
3369
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
3812
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
3370
3813
|
size_t i;
|
3371
3814
|
for (i = 0; i < nbSeqs; ++i) {
|
3372
|
-
seqDef seq = seqStore->sequencesStart[i];
|
3815
|
+
seqDef const seq = seqStore->sequencesStart[i];
|
3373
3816
|
literalsBytes += seq.litLength;
|
3374
3817
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
|
3375
3818
|
literalsBytes += 0x10000;
|
3376
|
-
|
3377
|
-
}
|
3819
|
+
} }
|
3378
3820
|
return literalsBytes;
|
3379
3821
|
}
|
3380
3822
|
|
3381
3823
|
/* Returns match bytes represented in a seqStore */
|
3382
|
-
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
3824
|
+
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
3825
|
+
{
|
3383
3826
|
size_t matchBytes = 0;
|
3384
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
3827
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
3385
3828
|
size_t i;
|
3386
3829
|
for (i = 0; i < nbSeqs; ++i) {
|
3387
3830
|
seqDef seq = seqStore->sequencesStart[i];
|
3388
|
-
matchBytes += seq.
|
3831
|
+
matchBytes += seq.mlBase + MINMATCH;
|
3389
3832
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
|
3390
3833
|
matchBytes += 0x10000;
|
3391
|
-
|
3392
|
-
}
|
3834
|
+
} }
|
3393
3835
|
return matchBytes;
|
3394
3836
|
}
|
3395
3837
|
|
@@ -3398,15 +3840,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
|
|
3398
3840
|
*/
|
3399
3841
|
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
3400
3842
|
const seqStore_t* originalSeqStore,
|
3401
|
-
size_t startIdx, size_t endIdx)
|
3402
|
-
|
3403
|
-
size_t literalsBytes;
|
3404
|
-
size_t literalsBytesPreceding = 0;
|
3405
|
-
|
3843
|
+
size_t startIdx, size_t endIdx)
|
3844
|
+
{
|
3406
3845
|
*resultSeqStore = *originalSeqStore;
|
3407
3846
|
if (startIdx > 0) {
|
3408
3847
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
|
3409
|
-
|
3848
|
+
resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
3410
3849
|
}
|
3411
3850
|
|
3412
3851
|
/* Move longLengthPos into the correct position if necessary */
|
@@ -3419,13 +3858,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
3419
3858
|
}
|
3420
3859
|
resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
|
3421
3860
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
|
3422
|
-
literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
3423
|
-
resultSeqStore->litStart += literalsBytesPreceding;
|
3424
3861
|
if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
|
3425
3862
|
/* This accounts for possible last literals if the derived chunk reaches the end of the block */
|
3426
|
-
resultSeqStore->lit
|
3863
|
+
assert(resultSeqStore->lit == originalSeqStore->lit);
|
3427
3864
|
} else {
|
3428
|
-
|
3865
|
+
size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
3866
|
+
resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;
|
3429
3867
|
}
|
3430
3868
|
resultSeqStore->llCode += startIdx;
|
3431
3869
|
resultSeqStore->mlCode += startIdx;
|
@@ -3433,52 +3871,68 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
3433
3871
|
}
|
3434
3872
|
|
3435
3873
|
/**
|
3436
|
-
* Returns the raw offset represented by the combination of
|
3437
|
-
*
|
3874
|
+
* Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
|
3875
|
+
* offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
|
3438
3876
|
*/
|
3439
|
-
static U32
|
3440
|
-
|
3441
|
-
|
3442
|
-
|
3443
|
-
|
3444
|
-
|
3877
|
+
static U32
|
3878
|
+
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
|
3879
|
+
{
|
3880
|
+
U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */
|
3881
|
+
assert(OFFBASE_IS_REPCODE(offBase));
|
3882
|
+
if (adjustedRepCode == ZSTD_REP_NUM) {
|
3883
|
+
assert(ll0);
|
3884
|
+
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1
|
3885
|
+
* This is only valid if it results in a valid offset value, aka > 0.
|
3886
|
+
* Note : it may happen that `rep[0]==1` in exceptional circumstances.
|
3887
|
+
* In which case this function will return 0, which is an invalid offset.
|
3888
|
+
* It's not an issue though, since this value will be
|
3889
|
+
* compared and discarded within ZSTD_seqStore_resolveOffCodes().
|
3890
|
+
*/
|
3445
3891
|
return rep[0] - 1;
|
3446
3892
|
}
|
3447
|
-
return rep[
|
3893
|
+
return rep[adjustedRepCode];
|
3448
3894
|
}
|
3449
3895
|
|
3450
3896
|
/**
|
3451
3897
|
* ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
|
3452
|
-
* due to emission of RLE/raw blocks that disturb the offset history,
|
3453
|
-
* the seqStore that may be invalid.
|
3898
|
+
* due to emission of RLE/raw blocks that disturb the offset history,
|
3899
|
+
* and replaces any repcodes within the seqStore that may be invalid.
|
3454
3900
|
*
|
3455
|
-
* dRepcodes are updated as would be on the decompression side.
|
3456
|
-
* accordance with the seqStore.
|
3901
|
+
* dRepcodes are updated as would be on the decompression side.
|
3902
|
+
* cRepcodes are updated exactly in accordance with the seqStore.
|
3903
|
+
*
|
3904
|
+
* Note : this function assumes seq->offBase respects the following numbering scheme :
|
3905
|
+
* 0 : invalid
|
3906
|
+
* 1-3 : repcode 1-3
|
3907
|
+
* 4+ : real_offset+3
|
3457
3908
|
*/
|
3458
|
-
static void
|
3459
|
-
|
3909
|
+
static void
|
3910
|
+
ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
|
3911
|
+
const seqStore_t* const seqStore, U32 const nbSeq)
|
3912
|
+
{
|
3460
3913
|
U32 idx = 0;
|
3914
|
+
U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
|
3461
3915
|
for (; idx < nbSeq; ++idx) {
|
3462
3916
|
seqDef* const seq = seqStore->sequencesStart + idx;
|
3463
|
-
U32 const ll0 = (seq->litLength == 0);
|
3464
|
-
U32
|
3465
|
-
assert(
|
3466
|
-
if (
|
3467
|
-
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep,
|
3468
|
-
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep,
|
3917
|
+
U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
|
3918
|
+
U32 const offBase = seq->offBase;
|
3919
|
+
assert(offBase > 0);
|
3920
|
+
if (OFFBASE_IS_REPCODE(offBase)) {
|
3921
|
+
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
|
3922
|
+
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
|
3469
3923
|
/* Adjust simulated decompression repcode history if we come across a mismatch. Replace
|
3470
3924
|
* the repcode with the offset it actually references, determined by the compression
|
3471
3925
|
* repcode history.
|
3472
3926
|
*/
|
3473
3927
|
if (dRawOffset != cRawOffset) {
|
3474
|
-
seq->
|
3928
|
+
seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
|
3475
3929
|
}
|
3476
3930
|
}
|
3477
3931
|
/* Compression repcode history is always updated with values directly from the unmodified seqStore.
|
3478
3932
|
* Decompression repcode history may use modified seq->offset value taken from compression repcode history.
|
3479
3933
|
*/
|
3480
|
-
|
3481
|
-
|
3934
|
+
ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
|
3935
|
+
ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
|
3482
3936
|
}
|
3483
3937
|
}
|
3484
3938
|
|
@@ -3487,11 +3941,14 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
|
|
3487
3941
|
*
|
3488
3942
|
* Returns the total size of that block (including header) or a ZSTD error code.
|
3489
3943
|
*/
|
3490
|
-
static size_t
|
3491
|
-
|
3492
|
-
|
3493
|
-
|
3494
|
-
|
3944
|
+
static size_t
|
3945
|
+
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
|
3946
|
+
const seqStore_t* const seqStore,
|
3947
|
+
repcodes_t* const dRep, repcodes_t* const cRep,
|
3948
|
+
void* dst, size_t dstCapacity,
|
3949
|
+
const void* src, size_t srcSize,
|
3950
|
+
U32 lastBlock, U32 isPartition)
|
3951
|
+
{
|
3495
3952
|
const U32 rleMaxLength = 25;
|
3496
3953
|
BYTE* op = (BYTE*)dst;
|
3497
3954
|
const BYTE* ip = (const BYTE*)src;
|
@@ -3500,6 +3957,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
|
|
3500
3957
|
|
3501
3958
|
/* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
|
3502
3959
|
repcodes_t const dRepOriginal = *dRep;
|
3960
|
+
DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
|
3503
3961
|
if (isPartition)
|
3504
3962
|
ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
|
3505
3963
|
|
@@ -3562,43 +4020,49 @@ typedef struct {
|
|
3562
4020
|
|
3563
4021
|
/* Helper function to perform the recursive search for block splits.
|
3564
4022
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
3565
|
-
* If advantageous to split, then we recurse down the two sub-blocks.
|
3566
|
-
* we do not recurse.
|
4023
|
+
* If advantageous to split, then we recurse down the two sub-blocks.
|
4024
|
+
* If not, or if an error occurred in estimation, then we do not recurse.
|
3567
4025
|
*
|
3568
|
-
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
4026
|
+
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
4027
|
+
* defined by MIN_SEQUENCES_BLOCK_SPLITTING.
|
3569
4028
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
3570
4029
|
* In practice, recursion depth usually doesn't go beyond 4.
|
3571
4030
|
*
|
3572
|
-
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
4031
|
+
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
4032
|
+
* At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
|
3573
4033
|
* maximum of 128 KB, this value is actually impossible to reach.
|
3574
4034
|
*/
|
3575
|
-
static void
|
3576
|
-
|
3577
|
-
|
3578
|
-
|
3579
|
-
seqStore_t*
|
4035
|
+
static void
|
4036
|
+
ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
4037
|
+
ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
|
4038
|
+
{
|
4039
|
+
seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
4040
|
+
seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
4041
|
+
seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
3580
4042
|
size_t estimatedOriginalSize;
|
3581
4043
|
size_t estimatedFirstHalfSize;
|
3582
4044
|
size_t estimatedSecondHalfSize;
|
3583
4045
|
size_t midIdx = (startIdx + endIdx)/2;
|
3584
4046
|
|
4047
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
4048
|
+
assert(endIdx >= startIdx);
|
3585
4049
|
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
|
3586
|
-
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
4050
|
+
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
|
3587
4051
|
return;
|
3588
4052
|
}
|
3589
|
-
DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
3590
4053
|
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
3591
4054
|
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
3592
4055
|
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
3593
4056
|
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
|
3594
4057
|
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
|
3595
4058
|
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
|
3596
|
-
DEBUGLOG(
|
4059
|
+
DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
3597
4060
|
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
3598
4061
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
3599
4062
|
return;
|
3600
4063
|
}
|
3601
4064
|
if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
|
4065
|
+
DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);
|
3602
4066
|
ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
|
3603
4067
|
splits->splitLocations[splits->idx] = (U32)midIdx;
|
3604
4068
|
splits->idx++;
|
@@ -3606,14 +4070,18 @@ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx
|
|
3606
4070
|
}
|
3607
4071
|
}
|
3608
4072
|
|
3609
|
-
/* Base recursive function.
|
4073
|
+
/* Base recursive function.
|
4074
|
+
* Populates a table with intra-block partition indices that can improve compression ratio.
|
3610
4075
|
*
|
3611
|
-
*
|
4076
|
+
* @return: number of splits made (which equals the size of the partition table - 1).
|
3612
4077
|
*/
|
3613
|
-
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
3614
|
-
|
4078
|
+
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
4079
|
+
{
|
4080
|
+
seqStoreSplits splits;
|
4081
|
+
splits.splitLocations = partitions;
|
4082
|
+
splits.idx = 0;
|
3615
4083
|
if (nbSeq <= 4) {
|
3616
|
-
DEBUGLOG(
|
4084
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
|
3617
4085
|
/* Refuse to try and split anything with less than 4 sequences */
|
3618
4086
|
return 0;
|
3619
4087
|
}
|
@@ -3628,17 +4096,21 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
3628
4096
|
*
|
3629
4097
|
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
|
3630
4098
|
*/
|
3631
|
-
static size_t
|
3632
|
-
|
4099
|
+
static size_t
|
4100
|
+
ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
4101
|
+
void* dst, size_t dstCapacity,
|
4102
|
+
const void* src, size_t blockSize,
|
4103
|
+
U32 lastBlock, U32 nbSeq)
|
4104
|
+
{
|
3633
4105
|
size_t cSize = 0;
|
3634
4106
|
const BYTE* ip = (const BYTE*)src;
|
3635
4107
|
BYTE* op = (BYTE*)dst;
|
3636
4108
|
size_t i = 0;
|
3637
4109
|
size_t srcBytesTotal = 0;
|
3638
|
-
U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
3639
|
-
seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
3640
|
-
seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
3641
|
-
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
4110
|
+
U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
4111
|
+
seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
4112
|
+
seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
4113
|
+
size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
3642
4114
|
|
3643
4115
|
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
3644
4116
|
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
@@ -3660,30 +4132,31 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3660
4132
|
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
3661
4133
|
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
|
3662
4134
|
|
3663
|
-
DEBUGLOG(
|
4135
|
+
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
3664
4136
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
3665
4137
|
(unsigned)zc->blockState.matchState.nextToUpdate);
|
3666
4138
|
|
3667
4139
|
if (numSplits == 0) {
|
3668
|
-
size_t cSizeSingleBlock =
|
3669
|
-
|
3670
|
-
|
3671
|
-
|
3672
|
-
|
4140
|
+
size_t cSizeSingleBlock =
|
4141
|
+
ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
|
4142
|
+
&dRep, &cRep,
|
4143
|
+
op, dstCapacity,
|
4144
|
+
ip, blockSize,
|
4145
|
+
lastBlock, 0 /* isPartition */);
|
3673
4146
|
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
|
3674
4147
|
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
|
3675
|
-
assert(
|
4148
|
+
assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
|
4149
|
+
assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
|
3676
4150
|
return cSizeSingleBlock;
|
3677
4151
|
}
|
3678
4152
|
|
3679
4153
|
ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
|
3680
4154
|
for (i = 0; i <= numSplits; ++i) {
|
3681
|
-
size_t srcBytes;
|
3682
4155
|
size_t cSizeChunk;
|
3683
4156
|
U32 const lastPartition = (i == numSplits);
|
3684
4157
|
U32 lastBlockEntireSrc = 0;
|
3685
4158
|
|
3686
|
-
srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
4159
|
+
size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
3687
4160
|
srcBytesTotal += srcBytes;
|
3688
4161
|
if (lastPartition) {
|
3689
4162
|
/* This is the final partition, need to account for possible last literals */
|
@@ -3698,7 +4171,8 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3698
4171
|
op, dstCapacity,
|
3699
4172
|
ip, srcBytes,
|
3700
4173
|
lastBlockEntireSrc, 1 /* isPartition */);
|
3701
|
-
DEBUGLOG(5, "Estimated size: %zu
|
4174
|
+
DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
|
4175
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
|
3702
4176
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
3703
4177
|
|
3704
4178
|
ip += srcBytes;
|
@@ -3706,20 +4180,20 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3706
4180
|
dstCapacity -= cSizeChunk;
|
3707
4181
|
cSize += cSizeChunk;
|
3708
4182
|
*currSeqStore = *nextSeqStore;
|
3709
|
-
assert(cSizeChunk <=
|
4183
|
+
assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
|
3710
4184
|
}
|
3711
|
-
/* cRep and dRep may have diverged during the compression.
|
3712
|
-
* for the next block.
|
4185
|
+
/* cRep and dRep may have diverged during the compression.
|
4186
|
+
* If so, we use the dRep repcodes for the next block.
|
3713
4187
|
*/
|
3714
4188
|
ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
|
3715
4189
|
return cSize;
|
3716
4190
|
}
|
3717
4191
|
|
3718
|
-
static size_t
|
3719
|
-
|
3720
|
-
|
3721
|
-
|
3722
|
-
|
4192
|
+
static size_t
|
4193
|
+
ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
4194
|
+
void* dst, size_t dstCapacity,
|
4195
|
+
const void* src, size_t srcSize, U32 lastBlock)
|
4196
|
+
{
|
3723
4197
|
U32 nbSeq;
|
3724
4198
|
size_t cSize;
|
3725
4199
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
@@ -3730,7 +4204,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
3730
4204
|
if (bss == ZSTDbss_noCompress) {
|
3731
4205
|
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
3732
4206
|
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
3733
|
-
cSize = ZSTD_noCompressBlock(
|
4207
|
+
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
|
3734
4208
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
3735
4209
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
|
3736
4210
|
return cSize;
|
@@ -3743,13 +4217,14 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
3743
4217
|
return cSize;
|
3744
4218
|
}
|
3745
4219
|
|
3746
|
-
static size_t
|
3747
|
-
|
3748
|
-
|
4220
|
+
static size_t
|
4221
|
+
ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
4222
|
+
void* dst, size_t dstCapacity,
|
4223
|
+
const void* src, size_t srcSize, U32 frame)
|
3749
4224
|
{
|
3750
|
-
/* This
|
3751
|
-
* This isn't the actual upper bound.
|
3752
|
-
* needs further investigation.
|
4225
|
+
/* This is an estimated upper bound for the length of an rle block.
|
4226
|
+
* This isn't the actual upper bound.
|
4227
|
+
* Finding the real threshold needs further investigation.
|
3753
4228
|
*/
|
3754
4229
|
const U32 rleMaxLength = 25;
|
3755
4230
|
size_t cSize;
|
@@ -3841,10 +4316,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
3841
4316
|
* * cSize >= blockBound(srcSize): We have expanded the block too much so
|
3842
4317
|
* emit an uncompressed block.
|
3843
4318
|
*/
|
3844
|
-
{
|
3845
|
-
|
4319
|
+
{ size_t const cSize =
|
4320
|
+
ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
|
3846
4321
|
if (cSize != ERROR(dstSize_tooSmall)) {
|
3847
|
-
size_t const maxCSize =
|
4322
|
+
size_t const maxCSize =
|
4323
|
+
srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
|
3848
4324
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
|
3849
4325
|
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
|
3850
4326
|
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
|
@@ -3852,7 +4328,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
3852
4328
|
}
|
3853
4329
|
}
|
3854
4330
|
}
|
3855
|
-
}
|
4331
|
+
} /* if (bss == ZSTDbss_compress)*/
|
3856
4332
|
|
3857
4333
|
DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
|
3858
4334
|
/* Superblock compression failed, attempt to emit a single no compress block.
|
@@ -3910,7 +4386,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
|
3910
4386
|
* All blocks will be terminated, all input will be consumed.
|
3911
4387
|
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
|
3912
4388
|
* Frame is supposed already started (header already produced)
|
3913
|
-
*
|
4389
|
+
* @return : compressed size, or an error code
|
3914
4390
|
*/
|
3915
4391
|
static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
3916
4392
|
void* dst, size_t dstCapacity,
|
@@ -3934,7 +4410,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
3934
4410
|
ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
|
3935
4411
|
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
|
3936
4412
|
|
3937
|
-
|
4413
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
4414
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
4415
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,
|
3938
4416
|
dstSize_tooSmall,
|
3939
4417
|
"not enough space to store compressed block");
|
3940
4418
|
if (remaining < blockSize) blockSize = remaining;
|
@@ -3973,7 +4451,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
3973
4451
|
MEM_writeLE24(op, cBlockHeader);
|
3974
4452
|
cSize += ZSTD_blockHeaderSize;
|
3975
4453
|
}
|
3976
|
-
}
|
4454
|
+
} /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
|
3977
4455
|
|
3978
4456
|
|
3979
4457
|
ip += blockSize;
|
@@ -4152,31 +4630,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
|
4152
4630
|
}
|
4153
4631
|
}
|
4154
4632
|
|
4155
|
-
size_t
|
4156
|
-
|
4157
|
-
|
4633
|
+
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
|
4634
|
+
void* dst, size_t dstCapacity,
|
4635
|
+
const void* src, size_t srcSize)
|
4158
4636
|
{
|
4159
4637
|
DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
|
4160
4638
|
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
|
4161
4639
|
}
|
4162
4640
|
|
4641
|
+
/* NOTE: Must just wrap ZSTD_compressContinue_public() */
|
4642
|
+
size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
|
4643
|
+
void* dst, size_t dstCapacity,
|
4644
|
+
const void* src, size_t srcSize)
|
4645
|
+
{
|
4646
|
+
return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
|
4647
|
+
}
|
4163
4648
|
|
4164
|
-
size_t
|
4649
|
+
static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
|
4165
4650
|
{
|
4166
4651
|
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
|
4167
4652
|
assert(!ZSTD_checkCParams(cParams));
|
4168
|
-
return MIN
|
4653
|
+
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
|
4169
4654
|
}
|
4170
4655
|
|
4171
|
-
|
4656
|
+
/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
|
4657
|
+
size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
|
4658
|
+
{
|
4659
|
+
return ZSTD_getBlockSize_deprecated(cctx);
|
4660
|
+
}
|
4661
|
+
|
4662
|
+
/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
|
4663
|
+
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
4172
4664
|
{
|
4173
4665
|
DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
|
4174
|
-
{ size_t const blockSizeMax =
|
4666
|
+
{ size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
|
4175
4667
|
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
|
4176
4668
|
|
4177
4669
|
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
|
4178
4670
|
}
|
4179
4671
|
|
4672
|
+
/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
|
4673
|
+
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
4674
|
+
{
|
4675
|
+
return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
|
4676
|
+
}
|
4677
|
+
|
4180
4678
|
/*! ZSTD_loadDictionaryContent() :
|
4181
4679
|
* @return : 0, or an error code
|
4182
4680
|
*/
|
@@ -4185,25 +4683,36 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
4185
4683
|
ZSTD_cwksp* ws,
|
4186
4684
|
ZSTD_CCtx_params const* params,
|
4187
4685
|
const void* src, size_t srcSize,
|
4188
|
-
ZSTD_dictTableLoadMethod_e dtlm
|
4686
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
4687
|
+
ZSTD_tableFillPurpose_e tfp)
|
4189
4688
|
{
|
4190
4689
|
const BYTE* ip = (const BYTE*) src;
|
4191
4690
|
const BYTE* const iend = ip + srcSize;
|
4192
4691
|
int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
|
4193
4692
|
|
4194
|
-
/* Assert that
|
4693
|
+
/* Assert that the ms params match the params we're being given */
|
4195
4694
|
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
|
4196
4695
|
|
4197
|
-
|
4696
|
+
{ /* Ensure large dictionaries can't cause index overflow */
|
4697
|
+
|
4198
4698
|
/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
|
4199
4699
|
* Dictionaries right at the edge will immediately trigger overflow
|
4200
4700
|
* correction, but I don't want to insert extra constraints here.
|
4201
4701
|
*/
|
4202
|
-
U32
|
4203
|
-
|
4204
|
-
|
4205
|
-
if (
|
4206
|
-
|
4702
|
+
U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
|
4703
|
+
|
4704
|
+
int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams);
|
4705
|
+
if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
|
4706
|
+
/* Some dictionary matchfinders in zstd use "short cache",
|
4707
|
+
* which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
|
4708
|
+
* CDict hashtable entry as a tag rather than as part of an index.
|
4709
|
+
* When short cache is used, we need to truncate the dictionary
|
4710
|
+
* so that its indices don't overlap with the tag. */
|
4711
|
+
U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
|
4712
|
+
maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
|
4713
|
+
assert(!loadLdmDict);
|
4714
|
+
}
|
4715
|
+
|
4207
4716
|
/* If the dictionary is too large, only load the suffix of the dictionary. */
|
4208
4717
|
if (srcSize > maxDictSize) {
|
4209
4718
|
ip = iend - maxDictSize;
|
@@ -4212,30 +4721,46 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
4212
4721
|
}
|
4213
4722
|
}
|
4214
4723
|
|
4215
|
-
|
4724
|
+
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
|
4725
|
+
/* We must have cleared our windows when our source is this large. */
|
4726
|
+
assert(ZSTD_window_isEmpty(ms->window));
|
4727
|
+
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
|
4728
|
+
}
|
4216
4729
|
ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
|
4217
|
-
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
|
4218
|
-
ms->forceNonContiguous = params->deterministicRefPrefix;
|
4219
4730
|
|
4220
|
-
|
4731
|
+
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
|
4732
|
+
|
4733
|
+
if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
|
4221
4734
|
ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
|
4222
4735
|
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
|
4736
|
+
ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);
|
4737
|
+
}
|
4738
|
+
|
4739
|
+
/* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
|
4740
|
+
if (params->cParams.strategy < ZSTD_btultra) {
|
4741
|
+
U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
|
4742
|
+
if (srcSize > maxDictSize) {
|
4743
|
+
ip = iend - maxDictSize;
|
4744
|
+
src = ip;
|
4745
|
+
srcSize = maxDictSize;
|
4746
|
+
}
|
4223
4747
|
}
|
4224
4748
|
|
4749
|
+
ms->nextToUpdate = (U32)(ip - ms->window.base);
|
4750
|
+
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
|
4751
|
+
ms->forceNonContiguous = params->deterministicRefPrefix;
|
4752
|
+
|
4225
4753
|
if (srcSize <= HASH_READ_SIZE) return 0;
|
4226
4754
|
|
4227
4755
|
ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
|
4228
4756
|
|
4229
|
-
if (loadLdmDict)
|
4230
|
-
ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);
|
4231
|
-
|
4232
4757
|
switch(params->cParams.strategy)
|
4233
4758
|
{
|
4234
4759
|
case ZSTD_fast:
|
4235
|
-
ZSTD_fillHashTable(ms, iend, dtlm);
|
4760
|
+
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
|
4236
4761
|
break;
|
4237
4762
|
case ZSTD_dfast:
|
4238
|
-
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
4763
|
+
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
|
4239
4764
|
break;
|
4240
4765
|
|
4241
4766
|
case ZSTD_greedy:
|
@@ -4248,7 +4773,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
4248
4773
|
} else {
|
4249
4774
|
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
4250
4775
|
if (params->useRowMatchFinder == ZSTD_ps_enable) {
|
4251
|
-
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog)
|
4776
|
+
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
|
4252
4777
|
ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
4253
4778
|
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
|
4254
4779
|
DEBUGLOG(4, "Using row-based hash table for lazy dict");
|
@@ -4401,6 +4926,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
4401
4926
|
ZSTD_CCtx_params const* params,
|
4402
4927
|
const void* dict, size_t dictSize,
|
4403
4928
|
ZSTD_dictTableLoadMethod_e dtlm,
|
4929
|
+
ZSTD_tableFillPurpose_e tfp,
|
4404
4930
|
void* workspace)
|
4405
4931
|
{
|
4406
4932
|
const BYTE* dictPtr = (const BYTE*)dict;
|
@@ -4419,7 +4945,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
4419
4945
|
{
|
4420
4946
|
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
4421
4947
|
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
4422
|
-
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
4948
|
+
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
|
4423
4949
|
}
|
4424
4950
|
return dictID;
|
4425
4951
|
}
|
@@ -4435,6 +4961,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
4435
4961
|
const void* dict, size_t dictSize,
|
4436
4962
|
ZSTD_dictContentType_e dictContentType,
|
4437
4963
|
ZSTD_dictTableLoadMethod_e dtlm,
|
4964
|
+
ZSTD_tableFillPurpose_e tfp,
|
4438
4965
|
void* workspace)
|
4439
4966
|
{
|
4440
4967
|
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
|
@@ -4447,13 +4974,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
4447
4974
|
|
4448
4975
|
/* dict restricted modes */
|
4449
4976
|
if (dictContentType == ZSTD_dct_rawContent)
|
4450
|
-
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
|
4977
|
+
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
4451
4978
|
|
4452
4979
|
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
|
4453
4980
|
if (dictContentType == ZSTD_dct_auto) {
|
4454
4981
|
DEBUGLOG(4, "raw content dictionary detected");
|
4455
4982
|
return ZSTD_loadDictionaryContent(
|
4456
|
-
ms, ls, ws, params, dict, dictSize, dtlm);
|
4983
|
+
ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
4457
4984
|
}
|
4458
4985
|
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
|
4459
4986
|
assert(0); /* impossible */
|
@@ -4461,13 +4988,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
4461
4988
|
|
4462
4989
|
/* dict as full zstd dictionary */
|
4463
4990
|
return ZSTD_loadZstdDictionary(
|
4464
|
-
bs, ms, ws, params, dict, dictSize, dtlm, workspace);
|
4991
|
+
bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
|
4465
4992
|
}
|
4466
4993
|
|
4467
4994
|
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
4468
4995
|
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
|
4469
4996
|
|
4470
4997
|
/*! ZSTD_compressBegin_internal() :
|
4998
|
+
* Assumption : either @dict OR @cdict (or none) is non-NULL, never both
|
4471
4999
|
* @return : 0, or an error code */
|
4472
5000
|
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
4473
5001
|
const void* dict, size_t dictSize,
|
@@ -4503,11 +5031,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
4503
5031
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
4504
5032
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
|
4505
5033
|
cdict->dictContentSize, cdict->dictContentType, dtlm,
|
4506
|
-
cctx->entropyWorkspace)
|
5034
|
+
ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
|
4507
5035
|
: ZSTD_compress_insertDictionary(
|
4508
5036
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
4509
5037
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
|
4510
|
-
dictContentType, dtlm, cctx->entropyWorkspace);
|
5038
|
+
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
|
4511
5039
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
4512
5040
|
assert(dictID <= UINT_MAX);
|
4513
5041
|
cctx->dictID = (U32)dictID;
|
@@ -4548,11 +5076,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
|
4548
5076
|
&cctxParams, pledgedSrcSize);
|
4549
5077
|
}
|
4550
5078
|
|
4551
|
-
|
5079
|
+
static size_t
|
5080
|
+
ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
4552
5081
|
{
|
4553
5082
|
ZSTD_CCtx_params cctxParams;
|
4554
|
-
{
|
4555
|
-
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
5083
|
+
{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
4556
5084
|
ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
|
4557
5085
|
}
|
4558
5086
|
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
|
@@ -4560,9 +5088,15 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
|
|
4560
5088
|
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
|
4561
5089
|
}
|
4562
5090
|
|
5091
|
+
size_t
|
5092
|
+
ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
5093
|
+
{
|
5094
|
+
return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
|
5095
|
+
}
|
5096
|
+
|
4563
5097
|
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
|
4564
5098
|
{
|
4565
|
-
return
|
5099
|
+
return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
|
4566
5100
|
}
|
4567
5101
|
|
4568
5102
|
|
@@ -4632,9 +5166,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
|
|
4632
5166
|
#endif
|
4633
5167
|
}
|
4634
5168
|
|
4635
|
-
size_t
|
4636
|
-
|
4637
|
-
|
5169
|
+
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
|
5170
|
+
void* dst, size_t dstCapacity,
|
5171
|
+
const void* src, size_t srcSize)
|
4638
5172
|
{
|
4639
5173
|
size_t endResult;
|
4640
5174
|
size_t const cSize = ZSTD_compressContinue_internal(cctx,
|
@@ -4658,6 +5192,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
|
|
4658
5192
|
return cSize + endResult;
|
4659
5193
|
}
|
4660
5194
|
|
5195
|
+
/* NOTE: Must just wrap ZSTD_compressEnd_public() */
|
5196
|
+
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
|
5197
|
+
void* dst, size_t dstCapacity,
|
5198
|
+
const void* src, size_t srcSize)
|
5199
|
+
{
|
5200
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
5201
|
+
}
|
5202
|
+
|
4661
5203
|
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
|
4662
5204
|
void* dst, size_t dstCapacity,
|
4663
5205
|
const void* src, size_t srcSize,
|
@@ -4686,7 +5228,7 @@ size_t ZSTD_compress_advanced_internal(
|
|
4686
5228
|
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
4687
5229
|
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
|
4688
5230
|
params, srcSize, ZSTDb_not_buffered) , "");
|
4689
|
-
return
|
5231
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
4690
5232
|
}
|
4691
5233
|
|
4692
5234
|
size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
|
@@ -4811,7 +5353,7 @@ static size_t ZSTD_initCDict_internal(
|
|
4811
5353
|
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
4812
5354
|
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
|
4813
5355
|
¶ms, cdict->dictContent, cdict->dictContentSize,
|
4814
|
-
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
|
5356
|
+
dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
|
4815
5357
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
4816
5358
|
assert(dictID <= (size_t)(U32)-1);
|
4817
5359
|
cdict->dictID = (U32)dictID;
|
@@ -5008,6 +5550,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
|
5008
5550
|
params.cParams = cParams;
|
5009
5551
|
params.useRowMatchFinder = useRowMatchFinder;
|
5010
5552
|
cdict->useRowMatchFinder = useRowMatchFinder;
|
5553
|
+
cdict->compressionLevel = ZSTD_NO_CLEVEL;
|
5011
5554
|
|
5012
5555
|
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
5013
5556
|
dict, dictSize,
|
@@ -5087,12 +5630,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
|
|
5087
5630
|
|
5088
5631
|
/* ZSTD_compressBegin_usingCDict() :
|
5089
5632
|
* cdict must be != NULL */
|
5090
|
-
size_t
|
5633
|
+
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
|
5091
5634
|
{
|
5092
5635
|
ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
|
5093
5636
|
return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
|
5094
5637
|
}
|
5095
5638
|
|
5639
|
+
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
|
5640
|
+
{
|
5641
|
+
return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
|
5642
|
+
}
|
5643
|
+
|
5096
5644
|
/*! ZSTD_compress_usingCDict_internal():
|
5097
5645
|
* Implementation of various ZSTD_compress_usingCDict* functions.
|
5098
5646
|
*/
|
@@ -5102,7 +5650,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
|
|
5102
5650
|
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
|
5103
5651
|
{
|
5104
5652
|
FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
|
5105
|
-
return
|
5653
|
+
return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
|
5106
5654
|
}
|
5107
5655
|
|
5108
5656
|
/*! ZSTD_compress_usingCDict_advanced():
|
@@ -5299,30 +5847,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
|
5299
5847
|
|
5300
5848
|
static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
|
5301
5849
|
{
|
5302
|
-
|
5303
|
-
|
5304
|
-
|
5850
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
5851
|
+
return cctx->blockSize - cctx->stableIn_notConsumed;
|
5852
|
+
}
|
5853
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
|
5854
|
+
{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
|
5855
|
+
if (hintInSize==0) hintInSize = cctx->blockSize;
|
5856
|
+
return hintInSize;
|
5857
|
+
}
|
5305
5858
|
}
|
5306
5859
|
|
5307
5860
|
/** ZSTD_compressStream_generic():
|
5308
5861
|
* internal function for all *compressStream*() variants
|
5309
|
-
*
|
5310
|
-
* @return : hint size for next input */
|
5862
|
+
* @return : hint size for next input to complete ongoing block */
|
5311
5863
|
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
5312
5864
|
ZSTD_outBuffer* output,
|
5313
5865
|
ZSTD_inBuffer* input,
|
5314
5866
|
ZSTD_EndDirective const flushMode)
|
5315
5867
|
{
|
5316
|
-
const char* const istart = (const char*)input->src;
|
5317
|
-
const char* const iend =
|
5318
|
-
const char* ip =
|
5319
|
-
char* const ostart = (char*)output->dst;
|
5320
|
-
char* const oend =
|
5321
|
-
char* op =
|
5868
|
+
const char* const istart = (assert(input != NULL), (const char*)input->src);
|
5869
|
+
const char* const iend = (istart != NULL) ? istart + input->size : istart;
|
5870
|
+
const char* ip = (istart != NULL) ? istart + input->pos : istart;
|
5871
|
+
char* const ostart = (assert(output != NULL), (char*)output->dst);
|
5872
|
+
char* const oend = (ostart != NULL) ? ostart + output->size : ostart;
|
5873
|
+
char* op = (ostart != NULL) ? ostart + output->pos : ostart;
|
5322
5874
|
U32 someMoreWork = 1;
|
5323
5875
|
|
5324
5876
|
/* check expectations */
|
5325
|
-
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%
|
5877
|
+
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);
|
5878
|
+
assert(zcs != NULL);
|
5879
|
+
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
5880
|
+
assert(input->pos >= zcs->stableIn_notConsumed);
|
5881
|
+
input->pos -= zcs->stableIn_notConsumed;
|
5882
|
+
ip -= zcs->stableIn_notConsumed;
|
5883
|
+
zcs->stableIn_notConsumed = 0;
|
5884
|
+
}
|
5326
5885
|
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
5327
5886
|
assert(zcs->inBuff != NULL);
|
5328
5887
|
assert(zcs->inBuffSize > 0);
|
@@ -5331,8 +5890,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5331
5890
|
assert(zcs->outBuff != NULL);
|
5332
5891
|
assert(zcs->outBuffSize > 0);
|
5333
5892
|
}
|
5334
|
-
|
5893
|
+
if (input->src == NULL) assert(input->size == 0);
|
5335
5894
|
assert(input->pos <= input->size);
|
5895
|
+
if (output->dst == NULL) assert(output->size == 0);
|
5896
|
+
assert(output->pos <= output->size);
|
5336
5897
|
assert((U32)flushMode <= (U32)ZSTD_e_end);
|
5337
5898
|
|
5338
5899
|
while (someMoreWork) {
|
@@ -5347,7 +5908,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5347
5908
|
|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
|
5348
5909
|
&& (zcs->inBuffPos == 0) ) {
|
5349
5910
|
/* shortcut to compression pass directly into output buffer */
|
5350
|
-
size_t const cSize =
|
5911
|
+
size_t const cSize = ZSTD_compressEnd_public(zcs,
|
5351
5912
|
op, oend-op, ip, iend-ip);
|
5352
5913
|
DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
|
5353
5914
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
|
@@ -5364,8 +5925,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5364
5925
|
zcs->inBuff + zcs->inBuffPos, toLoad,
|
5365
5926
|
ip, iend-ip);
|
5366
5927
|
zcs->inBuffPos += loaded;
|
5367
|
-
if (
|
5368
|
-
ip += loaded;
|
5928
|
+
if (ip) ip += loaded;
|
5369
5929
|
if ( (flushMode == ZSTD_e_continue)
|
5370
5930
|
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
|
5371
5931
|
/* not enough input to fill full block : stop here */
|
@@ -5376,6 +5936,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5376
5936
|
/* empty */
|
5377
5937
|
someMoreWork = 0; break;
|
5378
5938
|
}
|
5939
|
+
} else {
|
5940
|
+
assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
5941
|
+
if ( (flushMode == ZSTD_e_continue)
|
5942
|
+
&& ( (size_t)(iend - ip) < zcs->blockSize) ) {
|
5943
|
+
/* can't compress a full block : stop here */
|
5944
|
+
zcs->stableIn_notConsumed = (size_t)(iend - ip);
|
5945
|
+
ip = iend; /* pretend to have consumed input */
|
5946
|
+
someMoreWork = 0; break;
|
5947
|
+
}
|
5948
|
+
if ( (flushMode == ZSTD_e_flush)
|
5949
|
+
&& (ip == iend) ) {
|
5950
|
+
/* empty */
|
5951
|
+
someMoreWork = 0; break;
|
5952
|
+
}
|
5379
5953
|
}
|
5380
5954
|
/* compress current block (note : this stage cannot be stopped in the middle) */
|
5381
5955
|
DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
|
@@ -5383,9 +5957,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5383
5957
|
void* cDst;
|
5384
5958
|
size_t cSize;
|
5385
5959
|
size_t oSize = oend-op;
|
5386
|
-
size_t const iSize = inputBuffered
|
5387
|
-
|
5388
|
-
: MIN((size_t)(iend - ip), zcs->blockSize);
|
5960
|
+
size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
|
5961
|
+
: MIN((size_t)(iend - ip), zcs->blockSize);
|
5389
5962
|
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
|
5390
5963
|
cDst = op; /* compress into output buffer, to skip flush stage */
|
5391
5964
|
else
|
@@ -5393,9 +5966,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5393
5966
|
if (inputBuffered) {
|
5394
5967
|
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
|
5395
5968
|
cSize = lastBlock ?
|
5396
|
-
|
5969
|
+
ZSTD_compressEnd_public(zcs, cDst, oSize,
|
5397
5970
|
zcs->inBuff + zcs->inToCompress, iSize) :
|
5398
|
-
|
5971
|
+
ZSTD_compressContinue_public(zcs, cDst, oSize,
|
5399
5972
|
zcs->inBuff + zcs->inToCompress, iSize);
|
5400
5973
|
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
5401
5974
|
zcs->frameEnded = lastBlock;
|
@@ -5408,19 +5981,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5408
5981
|
if (!lastBlock)
|
5409
5982
|
assert(zcs->inBuffTarget <= zcs->inBuffSize);
|
5410
5983
|
zcs->inToCompress = zcs->inBuffPos;
|
5411
|
-
} else {
|
5412
|
-
unsigned const lastBlock = (ip + iSize == iend);
|
5413
|
-
assert(flushMode == ZSTD_e_end /* Already validated */);
|
5984
|
+
} else { /* !inputBuffered, hence ZSTD_bm_stable */
|
5985
|
+
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
|
5414
5986
|
cSize = lastBlock ?
|
5415
|
-
|
5416
|
-
|
5987
|
+
ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
|
5988
|
+
ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
|
5417
5989
|
/* Consume the input prior to error checking to mirror buffered mode. */
|
5418
|
-
if (
|
5419
|
-
ip += iSize;
|
5990
|
+
if (ip) ip += iSize;
|
5420
5991
|
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
5421
5992
|
zcs->frameEnded = lastBlock;
|
5422
|
-
if (lastBlock)
|
5423
|
-
assert(ip == iend);
|
5993
|
+
if (lastBlock) assert(ip == iend);
|
5424
5994
|
}
|
5425
5995
|
if (cDst == op) { /* no need to flush */
|
5426
5996
|
op += cSize;
|
@@ -5496,8 +6066,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
|
|
5496
6066
|
/* After a compression call set the expected input/output buffer.
|
5497
6067
|
* This is validated at the start of the next compression call.
|
5498
6068
|
*/
|
5499
|
-
static void
|
6069
|
+
static void
|
6070
|
+
ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)
|
5500
6071
|
{
|
6072
|
+
DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");
|
5501
6073
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
5502
6074
|
cctx->expectedInBuffer = *input;
|
5503
6075
|
}
|
@@ -5516,22 +6088,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
|
|
5516
6088
|
{
|
5517
6089
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
5518
6090
|
ZSTD_inBuffer const expect = cctx->expectedInBuffer;
|
5519
|
-
if (expect.src != input->src || expect.pos != input->pos
|
5520
|
-
RETURN_ERROR(
|
5521
|
-
if (endOp != ZSTD_e_end)
|
5522
|
-
RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
|
6091
|
+
if (expect.src != input->src || expect.pos != input->pos)
|
6092
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");
|
5523
6093
|
}
|
6094
|
+
(void)endOp;
|
5524
6095
|
if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
|
5525
6096
|
size_t const outBufferSize = output->size - output->pos;
|
5526
6097
|
if (cctx->expectedOutBufferSize != outBufferSize)
|
5527
|
-
RETURN_ERROR(
|
6098
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");
|
5528
6099
|
}
|
5529
6100
|
return 0;
|
5530
6101
|
}
|
5531
6102
|
|
5532
6103
|
static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
5533
6104
|
ZSTD_EndDirective endOp,
|
5534
|
-
size_t inSize)
|
6105
|
+
size_t inSize)
|
6106
|
+
{
|
5535
6107
|
ZSTD_CCtx_params params = cctx->requestedParams;
|
5536
6108
|
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
|
5537
6109
|
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
|
@@ -5545,9 +6117,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5545
6117
|
params.compressionLevel = cctx->cdict->compressionLevel;
|
5546
6118
|
}
|
5547
6119
|
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
5548
|
-
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-
|
5549
|
-
|
5550
|
-
|
6120
|
+
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */
|
6121
|
+
|
6122
|
+
{ size_t const dictSize = prefixDict.dict
|
5551
6123
|
? prefixDict.dictSize
|
5552
6124
|
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
|
5553
6125
|
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
|
@@ -5559,8 +6131,18 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5559
6131
|
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
|
5560
6132
|
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
|
5561
6133
|
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
|
6134
|
+
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
|
6135
|
+
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
|
6136
|
+
params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);
|
5562
6137
|
|
5563
6138
|
#ifdef ZSTD_MULTITHREAD
|
6139
|
+
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
|
6140
|
+
RETURN_ERROR_IF(
|
6141
|
+
params.useSequenceProducer == 1 && params.nbWorkers >= 1,
|
6142
|
+
parameter_combination_unsupported,
|
6143
|
+
"External sequence producer isn't supported with nbWorkers >= 1"
|
6144
|
+
);
|
6145
|
+
|
5564
6146
|
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
5565
6147
|
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
|
5566
6148
|
}
|
@@ -5588,7 +6170,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5588
6170
|
cctx->streamStage = zcss_load;
|
5589
6171
|
cctx->appliedParams = params;
|
5590
6172
|
} else
|
5591
|
-
#endif
|
6173
|
+
#endif /* ZSTD_MULTITHREAD */
|
5592
6174
|
{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
|
5593
6175
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
5594
6176
|
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
@@ -5614,6 +6196,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5614
6196
|
return 0;
|
5615
6197
|
}
|
5616
6198
|
|
6199
|
+
/* @return provides a minimum amount of data remaining to be flushed from internal buffers
|
6200
|
+
*/
|
5617
6201
|
size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
5618
6202
|
ZSTD_outBuffer* output,
|
5619
6203
|
ZSTD_inBuffer* input,
|
@@ -5628,8 +6212,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
5628
6212
|
|
5629
6213
|
/* transparent initialization stage */
|
5630
6214
|
if (cctx->streamStage == zcss_init) {
|
5631
|
-
|
5632
|
-
|
6215
|
+
size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */
|
6216
|
+
size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;
|
6217
|
+
if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */
|
6218
|
+
&& (endOp == ZSTD_e_continue) /* no flush requested, more input to come */
|
6219
|
+
&& (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */
|
6220
|
+
if (cctx->stableIn_notConsumed) { /* not the first time */
|
6221
|
+
/* check stable source guarantees */
|
6222
|
+
RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");
|
6223
|
+
RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");
|
6224
|
+
}
|
6225
|
+
/* pretend input was consumed, to give a sense forward progress */
|
6226
|
+
input->pos = input->size;
|
6227
|
+
/* save stable inBuffer, for later control, and flush/end */
|
6228
|
+
cctx->expectedInBuffer = *input;
|
6229
|
+
/* but actually input wasn't consumed, so keep track of position from where compression shall resume */
|
6230
|
+
cctx->stableIn_notConsumed += inputSize;
|
6231
|
+
/* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */
|
6232
|
+
return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */
|
6233
|
+
}
|
6234
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");
|
6235
|
+
ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */
|
5633
6236
|
}
|
5634
6237
|
/* end of transparent initialization stage */
|
5635
6238
|
|
@@ -5642,6 +6245,13 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
5642
6245
|
ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
|
5643
6246
|
cctx->cParamsChanged = 0;
|
5644
6247
|
}
|
6248
|
+
if (cctx->stableIn_notConsumed) {
|
6249
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_stable);
|
6250
|
+
/* some early data was skipped - make it available for consumption */
|
6251
|
+
assert(input->pos >= cctx->stableIn_notConsumed);
|
6252
|
+
input->pos -= cctx->stableIn_notConsumed;
|
6253
|
+
cctx->stableIn_notConsumed = 0;
|
6254
|
+
}
|
5645
6255
|
for (;;) {
|
5646
6256
|
size_t const ipos = input->pos;
|
5647
6257
|
size_t const opos = output->pos;
|
@@ -5680,7 +6290,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
5680
6290
|
ZSTD_setBufferExpectations(cctx, output, input);
|
5681
6291
|
return flushMin;
|
5682
6292
|
}
|
5683
|
-
#endif
|
6293
|
+
#endif /* ZSTD_MULTITHREAD */
|
5684
6294
|
FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
|
5685
6295
|
DEBUGLOG(5, "completed ZSTD_compressStream2");
|
5686
6296
|
ZSTD_setBufferExpectations(cctx, output, input);
|
@@ -5693,13 +6303,20 @@ size_t ZSTD_compressStream2_simpleArgs (
|
|
5693
6303
|
const void* src, size_t srcSize, size_t* srcPos,
|
5694
6304
|
ZSTD_EndDirective endOp)
|
5695
6305
|
{
|
5696
|
-
ZSTD_outBuffer output
|
5697
|
-
ZSTD_inBuffer input
|
6306
|
+
ZSTD_outBuffer output;
|
6307
|
+
ZSTD_inBuffer input;
|
6308
|
+
output.dst = dst;
|
6309
|
+
output.size = dstCapacity;
|
6310
|
+
output.pos = *dstPos;
|
6311
|
+
input.src = src;
|
6312
|
+
input.size = srcSize;
|
6313
|
+
input.pos = *srcPos;
|
5698
6314
|
/* ZSTD_compressStream2() will check validity of dstPos and srcPos */
|
5699
|
-
size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
5700
|
-
|
5701
|
-
|
5702
|
-
|
6315
|
+
{ size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
6316
|
+
*dstPos = output.pos;
|
6317
|
+
*srcPos = input.pos;
|
6318
|
+
return cErr;
|
6319
|
+
}
|
5703
6320
|
}
|
5704
6321
|
|
5705
6322
|
size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
@@ -5722,6 +6339,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
5722
6339
|
/* Reset to the original values. */
|
5723
6340
|
cctx->requestedParams.inBufferMode = originalInBufferMode;
|
5724
6341
|
cctx->requestedParams.outBufferMode = originalOutBufferMode;
|
6342
|
+
|
5725
6343
|
FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
|
5726
6344
|
if (result != 0) { /* compression not completed, due to lack of output space */
|
5727
6345
|
assert(oPos == dstCapacity);
|
@@ -5732,64 +6350,60 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
5732
6350
|
}
|
5733
6351
|
}
|
5734
6352
|
|
5735
|
-
|
5736
|
-
|
5737
|
-
|
5738
|
-
|
5739
|
-
|
5740
|
-
|
5741
|
-
|
5742
|
-
|
5743
|
-
|
5744
|
-
|
5745
|
-
U32 windowSize = 1 << windowLog;
|
5746
|
-
/* posInSrc represents the amount of data the the decoder would decode up to this point.
|
6353
|
+
/* ZSTD_validateSequence() :
|
6354
|
+
* @offCode : is presumed to follow format required by ZSTD_storeSeq()
|
6355
|
+
* @returns a ZSTD error code if sequence is not valid
|
6356
|
+
*/
|
6357
|
+
static size_t
|
6358
|
+
ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
|
6359
|
+
size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
|
6360
|
+
{
|
6361
|
+
U32 const windowSize = 1u << windowLog;
|
6362
|
+
/* posInSrc represents the amount of data the decoder would decode up to this point.
|
5747
6363
|
* As long as the amount of data decoded is less than or equal to window size, offsets may be
|
5748
6364
|
* larger than the total length of output decoded in order to reference the dict, even larger than
|
5749
6365
|
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
|
5750
6366
|
*/
|
5751
|
-
offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
5752
|
-
|
5753
|
-
RETURN_ERROR_IF(
|
6367
|
+
size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
6368
|
+
size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
|
6369
|
+
RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
|
6370
|
+
/* Validate maxNbSeq is large enough for the given matchLength and minMatch */
|
6371
|
+
RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
|
5754
6372
|
return 0;
|
5755
6373
|
}
|
5756
6374
|
|
5757
6375
|
/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
|
5758
|
-
static U32
|
5759
|
-
|
5760
|
-
U32
|
6376
|
+
static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
|
6377
|
+
{
|
6378
|
+
U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
|
5761
6379
|
|
5762
6380
|
if (!ll0 && rawOffset == rep[0]) {
|
5763
|
-
|
6381
|
+
offBase = REPCODE1_TO_OFFBASE;
|
5764
6382
|
} else if (rawOffset == rep[1]) {
|
5765
|
-
|
6383
|
+
offBase = REPCODE_TO_OFFBASE(2 - ll0);
|
5766
6384
|
} else if (rawOffset == rep[2]) {
|
5767
|
-
|
6385
|
+
offBase = REPCODE_TO_OFFBASE(3 - ll0);
|
5768
6386
|
} else if (ll0 && rawOffset == rep[0] - 1) {
|
5769
|
-
|
6387
|
+
offBase = REPCODE3_TO_OFFBASE;
|
5770
6388
|
}
|
5771
|
-
|
5772
|
-
/* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
|
5773
|
-
offCode = repCode - 1;
|
5774
|
-
}
|
5775
|
-
return offCode;
|
6389
|
+
return offBase;
|
5776
6390
|
}
|
5777
6391
|
|
5778
|
-
|
5779
|
-
|
5780
|
-
|
5781
|
-
|
5782
|
-
|
5783
|
-
|
6392
|
+
size_t
|
6393
|
+
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
6394
|
+
ZSTD_sequencePosition* seqPos,
|
6395
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
6396
|
+
const void* src, size_t blockSize,
|
6397
|
+
ZSTD_paramSwitch_e externalRepSearch)
|
6398
|
+
{
|
5784
6399
|
U32 idx = seqPos->idx;
|
6400
|
+
U32 const startIdx = idx;
|
5785
6401
|
BYTE const* ip = (BYTE const*)(src);
|
5786
6402
|
const BYTE* const iend = ip + blockSize;
|
5787
6403
|
repcodes_t updatedRepcodes;
|
5788
6404
|
U32 dictSize;
|
5789
|
-
|
5790
|
-
|
5791
|
-
U32 ll0;
|
5792
|
-
U32 offCode;
|
6405
|
+
|
6406
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
|
5793
6407
|
|
5794
6408
|
if (cctx->cdict) {
|
5795
6409
|
dictSize = (U32)cctx->cdict->dictContentSize;
|
@@ -5799,26 +6413,55 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|
5799
6413
|
dictSize = 0;
|
5800
6414
|
}
|
5801
6415
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
5802
|
-
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0)
|
5803
|
-
litLength = inSeqs[idx].litLength;
|
5804
|
-
matchLength = inSeqs[idx].matchLength;
|
5805
|
-
|
5806
|
-
|
5807
|
-
|
5808
|
-
|
5809
|
-
|
6416
|
+
for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
|
6417
|
+
U32 const litLength = inSeqs[idx].litLength;
|
6418
|
+
U32 const matchLength = inSeqs[idx].matchLength;
|
6419
|
+
U32 offBase;
|
6420
|
+
|
6421
|
+
if (externalRepSearch == ZSTD_ps_disable) {
|
6422
|
+
offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
|
6423
|
+
} else {
|
6424
|
+
U32 const ll0 = (litLength == 0);
|
6425
|
+
offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
|
6426
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
6427
|
+
}
|
6428
|
+
|
6429
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
5810
6430
|
if (cctx->appliedParams.validateSequences) {
|
5811
6431
|
seqPos->posInSrc += litLength + matchLength;
|
5812
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
5813
|
-
cctx->appliedParams.cParams.windowLog, dictSize,
|
5814
|
-
cctx->appliedParams.cParams.minMatch),
|
6432
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
6433
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
5815
6434
|
"Sequence validation failed");
|
5816
6435
|
}
|
5817
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
6436
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
5818
6437
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
5819
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
6438
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
5820
6439
|
ip += matchLength + litLength;
|
5821
6440
|
}
|
6441
|
+
|
6442
|
+
/* If we skipped repcode search while parsing, we need to update repcodes now */
|
6443
|
+
assert(externalRepSearch != ZSTD_ps_auto);
|
6444
|
+
assert(idx >= startIdx);
|
6445
|
+
if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
|
6446
|
+
U32* const rep = updatedRepcodes.rep;
|
6447
|
+
U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
|
6448
|
+
|
6449
|
+
if (lastSeqIdx >= startIdx + 2) {
|
6450
|
+
rep[2] = inSeqs[lastSeqIdx - 2].offset;
|
6451
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
6452
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
6453
|
+
} else if (lastSeqIdx == startIdx + 1) {
|
6454
|
+
rep[2] = rep[0];
|
6455
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
6456
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
6457
|
+
} else {
|
6458
|
+
assert(lastSeqIdx == startIdx);
|
6459
|
+
rep[2] = rep[1];
|
6460
|
+
rep[1] = rep[0];
|
6461
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
6462
|
+
}
|
6463
|
+
}
|
6464
|
+
|
5822
6465
|
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
|
5823
6466
|
|
5824
6467
|
if (inSeqs[idx].litLength) {
|
@@ -5827,25 +6470,16 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|
5827
6470
|
ip += inSeqs[idx].litLength;
|
5828
6471
|
seqPos->posInSrc += inSeqs[idx].litLength;
|
5829
6472
|
}
|
5830
|
-
RETURN_ERROR_IF(ip != iend,
|
6473
|
+
RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
|
5831
6474
|
seqPos->idx = idx+1;
|
5832
6475
|
return 0;
|
5833
6476
|
}
|
5834
6477
|
|
5835
|
-
|
5836
|
-
|
5837
|
-
*
|
5838
|
-
*
|
5839
|
-
|
5840
|
-
* in inSeqs, storing any (partial) sequences.
|
5841
|
-
*
|
5842
|
-
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
|
5843
|
-
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
|
5844
|
-
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
|
5845
|
-
*/
|
5846
|
-
static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
5847
|
-
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
5848
|
-
const void* src, size_t blockSize) {
|
6478
|
+
size_t
|
6479
|
+
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
6480
|
+
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
6481
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
|
6482
|
+
{
|
5849
6483
|
U32 idx = seqPos->idx;
|
5850
6484
|
U32 startPosInSequence = seqPos->posInSequence;
|
5851
6485
|
U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
|
@@ -5855,10 +6489,9 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
5855
6489
|
repcodes_t updatedRepcodes;
|
5856
6490
|
U32 bytesAdjustment = 0;
|
5857
6491
|
U32 finalMatchSplit = 0;
|
5858
|
-
|
5859
|
-
|
5860
|
-
|
5861
|
-
U32 offCode;
|
6492
|
+
|
6493
|
+
/* TODO(embg) support fast parsing mode in noBlockDelim mode */
|
6494
|
+
(void)externalRepSearch;
|
5862
6495
|
|
5863
6496
|
if (cctx->cdict) {
|
5864
6497
|
dictSize = cctx->cdict->dictContentSize;
|
@@ -5867,14 +6500,15 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
5867
6500
|
} else {
|
5868
6501
|
dictSize = 0;
|
5869
6502
|
}
|
5870
|
-
DEBUGLOG(5, "
|
6503
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
|
5871
6504
|
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
5872
6505
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
5873
6506
|
while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
|
5874
6507
|
const ZSTD_Sequence currSeq = inSeqs[idx];
|
5875
|
-
litLength = currSeq.litLength;
|
5876
|
-
matchLength = currSeq.matchLength;
|
5877
|
-
rawOffset = currSeq.offset;
|
6508
|
+
U32 litLength = currSeq.litLength;
|
6509
|
+
U32 matchLength = currSeq.matchLength;
|
6510
|
+
U32 const rawOffset = currSeq.offset;
|
6511
|
+
U32 offBase;
|
5878
6512
|
|
5879
6513
|
/* Modify the sequence depending on where endPosInSequence lies */
|
5880
6514
|
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
|
@@ -5888,7 +6522,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
5888
6522
|
/* Move to the next sequence */
|
5889
6523
|
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
|
5890
6524
|
startPosInSequence = 0;
|
5891
|
-
idx++;
|
5892
6525
|
} else {
|
5893
6526
|
/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
|
5894
6527
|
does not reach the end of the match. So, we have to split the sequence */
|
@@ -5927,23 +6560,24 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
5927
6560
|
}
|
5928
6561
|
}
|
5929
6562
|
/* Check if this offset can be represented with a repcode */
|
5930
|
-
{ U32 ll0 = (litLength == 0);
|
5931
|
-
|
5932
|
-
|
6563
|
+
{ U32 const ll0 = (litLength == 0);
|
6564
|
+
offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
|
6565
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
5933
6566
|
}
|
5934
6567
|
|
5935
6568
|
if (cctx->appliedParams.validateSequences) {
|
5936
6569
|
seqPos->posInSrc += litLength + matchLength;
|
5937
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
5938
|
-
cctx->appliedParams.cParams.windowLog, dictSize,
|
5939
|
-
cctx->appliedParams.cParams.minMatch),
|
6570
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
6571
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
5940
6572
|
"Sequence validation failed");
|
5941
6573
|
}
|
5942
|
-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)",
|
5943
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
6574
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
6575
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
5944
6576
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
5945
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
6577
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
5946
6578
|
ip += matchLength + litLength;
|
6579
|
+
if (!finalMatchSplit)
|
6580
|
+
idx++; /* Next Sequence */
|
5947
6581
|
}
|
5948
6582
|
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
5949
6583
|
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
|
@@ -5966,8 +6600,9 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|
5966
6600
|
|
5967
6601
|
typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
5968
6602
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
5969
|
-
const void* src, size_t blockSize);
|
5970
|
-
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
6603
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
6604
|
+
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
6605
|
+
{
|
5971
6606
|
ZSTD_sequenceCopier sequenceCopier = NULL;
|
5972
6607
|
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
|
5973
6608
|
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
@@ -5979,24 +6614,75 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
5979
6614
|
return sequenceCopier;
|
5980
6615
|
}
|
5981
6616
|
|
6617
|
+
/* Discover the size of next block by searching for the delimiter.
|
6618
|
+
* Note that a block delimiter **must** exist in this mode,
|
6619
|
+
* otherwise it's an input error.
|
6620
|
+
* The block size retrieved will be later compared to ensure it remains within bounds */
|
6621
|
+
static size_t
|
6622
|
+
blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
6623
|
+
{
|
6624
|
+
int end = 0;
|
6625
|
+
size_t blockSize = 0;
|
6626
|
+
size_t spos = seqPos.idx;
|
6627
|
+
DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
|
6628
|
+
assert(spos <= inSeqsSize);
|
6629
|
+
while (spos < inSeqsSize) {
|
6630
|
+
end = (inSeqs[spos].offset == 0);
|
6631
|
+
blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
|
6632
|
+
if (end) {
|
6633
|
+
if (inSeqs[spos].matchLength != 0)
|
6634
|
+
RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
|
6635
|
+
break;
|
6636
|
+
}
|
6637
|
+
spos++;
|
6638
|
+
}
|
6639
|
+
if (!end)
|
6640
|
+
RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
|
6641
|
+
return blockSize;
|
6642
|
+
}
|
6643
|
+
|
6644
|
+
/* More a "target" block size */
|
6645
|
+
static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
|
6646
|
+
{
|
6647
|
+
int const lastBlock = (remaining <= blockSize);
|
6648
|
+
return lastBlock ? remaining : blockSize;
|
6649
|
+
}
|
6650
|
+
|
6651
|
+
static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
|
6652
|
+
size_t blockSize, size_t remaining,
|
6653
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
6654
|
+
{
|
6655
|
+
DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
|
6656
|
+
if (mode == ZSTD_sf_noBlockDelimiters)
|
6657
|
+
return blockSize_noDelimiter(blockSize, remaining);
|
6658
|
+
{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
|
6659
|
+
FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
|
6660
|
+
if (explicitBlockSize > blockSize)
|
6661
|
+
RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
|
6662
|
+
if (explicitBlockSize > remaining)
|
6663
|
+
RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
|
6664
|
+
return explicitBlockSize;
|
6665
|
+
}
|
6666
|
+
}
|
6667
|
+
|
5982
6668
|
/* Compress, block-by-block, all of the sequences given.
|
5983
6669
|
*
|
5984
|
-
* Returns the cumulative size of all compressed blocks (including their headers),
|
6670
|
+
* Returns the cumulative size of all compressed blocks (including their headers),
|
6671
|
+
* otherwise a ZSTD error.
|
5985
6672
|
*/
|
5986
|
-
static size_t
|
5987
|
-
|
5988
|
-
|
5989
|
-
|
6673
|
+
static size_t
|
6674
|
+
ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
6675
|
+
void* dst, size_t dstCapacity,
|
6676
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
6677
|
+
const void* src, size_t srcSize)
|
6678
|
+
{
|
5990
6679
|
size_t cSize = 0;
|
5991
|
-
U32 lastBlock;
|
5992
|
-
size_t blockSize;
|
5993
|
-
size_t compressedSeqsSize;
|
5994
6680
|
size_t remaining = srcSize;
|
5995
6681
|
ZSTD_sequencePosition seqPos = {0, 0, 0};
|
5996
6682
|
|
5997
6683
|
BYTE const* ip = (BYTE const*)src;
|
5998
6684
|
BYTE* op = (BYTE*)dst;
|
5999
|
-
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
6685
|
+
ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
6000
6686
|
|
6001
6687
|
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
|
6002
6688
|
/* Special case: empty frame */
|
@@ -6010,22 +6696,29 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6010
6696
|
}
|
6011
6697
|
|
6012
6698
|
while (remaining) {
|
6699
|
+
size_t compressedSeqsSize;
|
6013
6700
|
size_t cBlockSize;
|
6014
6701
|
size_t additionalByteAdjustment;
|
6015
|
-
|
6016
|
-
|
6702
|
+
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
6703
|
+
cctx->blockSize, remaining,
|
6704
|
+
inSeqs, inSeqsSize, seqPos);
|
6705
|
+
U32 const lastBlock = (blockSize == remaining);
|
6706
|
+
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
6707
|
+
assert(blockSize <= remaining);
|
6017
6708
|
ZSTD_resetSeqStore(&cctx->seqStore);
|
6018
|
-
DEBUGLOG(
|
6709
|
+
DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
|
6019
6710
|
|
6020
|
-
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
|
6711
|
+
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
|
6021
6712
|
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
|
6022
6713
|
blockSize -= additionalByteAdjustment;
|
6023
6714
|
|
6024
6715
|
/* If blocks are too small, emit as a nocompress block */
|
6025
|
-
|
6716
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
6717
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
6718
|
+
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
6026
6719
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
6027
6720
|
FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
|
6028
|
-
DEBUGLOG(
|
6721
|
+
DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
|
6029
6722
|
cSize += cBlockSize;
|
6030
6723
|
ip += blockSize;
|
6031
6724
|
op += cBlockSize;
|
@@ -6034,6 +6727,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6034
6727
|
continue;
|
6035
6728
|
}
|
6036
6729
|
|
6730
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
6037
6731
|
compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
|
6038
6732
|
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
6039
6733
|
&cctx->appliedParams,
|
@@ -6042,11 +6736,11 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6042
6736
|
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
6043
6737
|
cctx->bmi2);
|
6044
6738
|
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
6045
|
-
DEBUGLOG(
|
6739
|
+
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
6046
6740
|
|
6047
6741
|
if (!cctx->isFirstBlock &&
|
6048
6742
|
ZSTD_maybeRLE(&cctx->seqStore) &&
|
6049
|
-
ZSTD_isRLE(
|
6743
|
+
ZSTD_isRLE(ip, blockSize)) {
|
6050
6744
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
6051
6745
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
6052
6746
|
* This is only an issue for zstd <= v1.4.3
|
@@ -6057,12 +6751,12 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6057
6751
|
if (compressedSeqsSize == 0) {
|
6058
6752
|
/* ZSTD_noCompressBlock writes the block header as well */
|
6059
6753
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
6060
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
6061
|
-
DEBUGLOG(
|
6754
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");
|
6755
|
+
DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);
|
6062
6756
|
} else if (compressedSeqsSize == 1) {
|
6063
6757
|
cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
|
6064
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
6065
|
-
DEBUGLOG(
|
6758
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");
|
6759
|
+
DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);
|
6066
6760
|
} else {
|
6067
6761
|
U32 cBlockHeader;
|
6068
6762
|
/* Error checking and repcodes update */
|
@@ -6074,11 +6768,10 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6074
6768
|
cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
|
6075
6769
|
MEM_writeLE24(op, cBlockHeader);
|
6076
6770
|
cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
|
6077
|
-
DEBUGLOG(
|
6771
|
+
DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);
|
6078
6772
|
}
|
6079
6773
|
|
6080
6774
|
cSize += cBlockSize;
|
6081
|
-
DEBUGLOG(4, "cSize running total: %zu", cSize);
|
6082
6775
|
|
6083
6776
|
if (lastBlock) {
|
6084
6777
|
break;
|
@@ -6089,21 +6782,25 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
6089
6782
|
dstCapacity -= cBlockSize;
|
6090
6783
|
cctx->isFirstBlock = 0;
|
6091
6784
|
}
|
6785
|
+
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
|
6092
6786
|
}
|
6093
6787
|
|
6788
|
+
DEBUGLOG(4, "cSize final total: %zu", cSize);
|
6094
6789
|
return cSize;
|
6095
6790
|
}
|
6096
6791
|
|
6097
|
-
size_t ZSTD_compressSequences(ZSTD_CCtx*
|
6792
|
+
size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
6793
|
+
void* dst, size_t dstCapacity,
|
6098
6794
|
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
6099
|
-
const void* src, size_t srcSize)
|
6795
|
+
const void* src, size_t srcSize)
|
6796
|
+
{
|
6100
6797
|
BYTE* op = (BYTE*)dst;
|
6101
6798
|
size_t cSize = 0;
|
6102
6799
|
size_t compressedBlocksSize = 0;
|
6103
6800
|
size_t frameHeaderSize = 0;
|
6104
6801
|
|
6105
6802
|
/* Transparent initialization stage, same as compressStream2() */
|
6106
|
-
DEBUGLOG(
|
6803
|
+
DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
|
6107
6804
|
assert(cctx != NULL);
|
6108
6805
|
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
6109
6806
|
/* Begin writing output, starting with frame header */
|
@@ -6131,26 +6828,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
|
|
6131
6828
|
cSize += 4;
|
6132
6829
|
}
|
6133
6830
|
|
6134
|
-
DEBUGLOG(
|
6831
|
+
DEBUGLOG(4, "Final compressed size: %zu", cSize);
|
6135
6832
|
return cSize;
|
6136
6833
|
}
|
6137
6834
|
|
6138
6835
|
/*====== Finalize ======*/
|
6139
6836
|
|
6837
|
+
static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)
|
6838
|
+
{
|
6839
|
+
const ZSTD_inBuffer nullInput = { NULL, 0, 0 };
|
6840
|
+
const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
6841
|
+
return stableInput ? zcs->expectedInBuffer : nullInput;
|
6842
|
+
}
|
6843
|
+
|
6140
6844
|
/*! ZSTD_flushStream() :
|
6141
6845
|
* @return : amount of data remaining to flush */
|
6142
6846
|
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
6143
6847
|
{
|
6144
|
-
ZSTD_inBuffer input =
|
6848
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
6849
|
+
input.size = input.pos; /* do not ingest more input during flush */
|
6145
6850
|
return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
|
6146
6851
|
}
|
6147
6852
|
|
6148
6853
|
|
6149
6854
|
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
6150
6855
|
{
|
6151
|
-
ZSTD_inBuffer input =
|
6856
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
6152
6857
|
size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
|
6153
|
-
FORWARD_IF_ERROR(
|
6858
|
+
FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");
|
6154
6859
|
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
|
6155
6860
|
/* single thread mode : attempt to calculate remaining to flush more precisely */
|
6156
6861
|
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
|
@@ -6272,7 +6977,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
|
|
6272
6977
|
cp.targetLength = (unsigned)(-clampedCompressionLevel);
|
6273
6978
|
}
|
6274
6979
|
/* refine parameters based on srcSize & dictSize */
|
6275
|
-
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
|
6980
|
+
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
|
6276
6981
|
}
|
6277
6982
|
}
|
6278
6983
|
|
@@ -6307,3 +7012,21 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
|
|
6307
7012
|
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
6308
7013
|
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
6309
7014
|
}
|
7015
|
+
|
7016
|
+
void ZSTD_registerSequenceProducer(
|
7017
|
+
ZSTD_CCtx* zc, void* mState,
|
7018
|
+
ZSTD_sequenceProducer_F* mFinder
|
7019
|
+
) {
|
7020
|
+
if (mFinder != NULL) {
|
7021
|
+
ZSTD_externalMatchCtx emctx;
|
7022
|
+
emctx.mState = mState;
|
7023
|
+
emctx.mFinder = mFinder;
|
7024
|
+
emctx.seqBuffer = NULL;
|
7025
|
+
emctx.seqBufferCapacity = 0;
|
7026
|
+
zc->externalMatchCtx = emctx;
|
7027
|
+
zc->requestedParams.useSequenceProducer = 1;
|
7028
|
+
} else {
|
7029
|
+
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
|
7030
|
+
zc->requestedParams.useSequenceProducer = 0;
|
7031
|
+
}
|
7032
|
+
}
|